{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9997324056729997, "eval_steps": 500, "global_step": 11210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001783962180001784, "grad_norm": 7.837475776672363, "learning_rate": 0.0004999999901825538, "loss": 6.5872, "step": 1 }, { "epoch": 0.0003567924360003568, "grad_norm": 32.637916564941406, "learning_rate": 0.000499999960730216, "loss": 7.833, "step": 2 }, { "epoch": 0.0005351886540005352, "grad_norm": 19.442291259765625, "learning_rate": 0.0004999999116429887, "loss": 6.3418, "step": 3 }, { "epoch": 0.0007135848720007136, "grad_norm": 7.7614946365356445, "learning_rate": 0.0004999998429208761, "loss": 5.3582, "step": 4 }, { "epoch": 0.000891981090000892, "grad_norm": 5.833192348480225, "learning_rate": 0.0004999997545638834, "loss": 4.4165, "step": 5 }, { "epoch": 0.0010703773080010704, "grad_norm": 2.9772305488586426, "learning_rate": 0.0004999996465720175, "loss": 4.1561, "step": 6 }, { "epoch": 0.0012487735260012487, "grad_norm": 3.942150354385376, "learning_rate": 0.0004999995189452869, "loss": 4.0736, "step": 7 }, { "epoch": 0.0014271697440014271, "grad_norm": 2.2194981575012207, "learning_rate": 0.0004999993716837017, "loss": 3.9339, "step": 8 }, { "epoch": 0.0016055659620016055, "grad_norm": 2.6826322078704834, "learning_rate": 0.0004999992047872735, "loss": 4.0066, "step": 9 }, { "epoch": 0.001783962180001784, "grad_norm": 2.3299083709716797, "learning_rate": 0.0004999990182560153, "loss": 3.9545, "step": 10 }, { "epoch": 0.0019623583980019625, "grad_norm": 2.3098955154418945, "learning_rate": 0.0004999988120899418, "loss": 3.8027, "step": 11 }, { "epoch": 0.0021407546160021407, "grad_norm": 1.8366882801055908, "learning_rate": 0.0004999985862890691, "loss": 3.8252, "step": 12 }, { "epoch": 0.0023191508340023193, "grad_norm": 1.689456820487976, "learning_rate": 0.0004999983408534151, "loss": 3.6765, "step": 13 }, { "epoch": 0.0024975470520024975, "grad_norm": 2.1124043464660645, "learning_rate": 0.000499998075782999, "loss": 3.7322, "step": 14 }, { "epoch": 0.002675943270002676, "grad_norm": 1.5068916082382202, "learning_rate": 0.0004999977910778417, "loss": 3.7449, "step": 15 }, { "epoch": 0.0028543394880028543, "grad_norm": 1.2359281778335571, "learning_rate": 0.0004999974867379652, "loss": 3.6367, "step": 16 }, { "epoch": 0.003032735706003033, "grad_norm": 1.5040236711502075, "learning_rate": 0.0004999971627633939, "loss": 3.5622, "step": 17 }, { "epoch": 0.003211131924003211, "grad_norm": 1.7240302562713623, "learning_rate": 0.000499996819154153, "loss": 3.4124, "step": 18 }, { "epoch": 0.0033895281420033897, "grad_norm": 1.6077510118484497, "learning_rate": 0.0004999964559102693, "loss": 3.6737, "step": 19 }, { "epoch": 0.003567924360003568, "grad_norm": 1.3428990840911865, "learning_rate": 0.0004999960730317718, "loss": 3.4728, "step": 20 }, { "epoch": 0.0037463205780037465, "grad_norm": 1.5660542249679565, "learning_rate": 0.0004999956705186902, "loss": 3.2715, "step": 21 }, { "epoch": 0.003924716796003925, "grad_norm": 1.5012935400009155, "learning_rate": 0.0004999952483710562, "loss": 3.6257, "step": 22 }, { "epoch": 0.004103113014004103, "grad_norm": 1.3174484968185425, "learning_rate": 0.0004999948065889029, "loss": 3.5645, "step": 23 }, { "epoch": 0.004281509232004281, "grad_norm": 1.1299705505371094, "learning_rate": 0.0004999943451722653, "loss": 3.2416, "step": 24 }, { "epoch": 0.00445990545000446, "grad_norm": 0.8905799984931946, "learning_rate": 0.0004999938641211792, "loss": 3.2397, "step": 25 }, { "epoch": 0.004638301668004639, "grad_norm": 1.1655199527740479, "learning_rate": 0.0004999933634356826, "loss": 3.1717, "step": 26 }, { "epoch": 0.004816697886004816, "grad_norm": 1.191267967224121, "learning_rate": 0.0004999928431158149, "loss": 3.3958, "step": 27 }, { "epoch": 0.004995094104004995, "grad_norm": 1.0441665649414062, "learning_rate": 0.0004999923031616169, "loss": 3.3732, "step": 28 }, { "epoch": 0.005173490322005174, "grad_norm": 1.2923051118850708, "learning_rate": 0.0004999917435731309, "loss": 3.3872, "step": 29 }, { "epoch": 0.005351886540005352, "grad_norm": 1.1339762210845947, "learning_rate": 0.000499991164350401, "loss": 2.9439, "step": 30 }, { "epoch": 0.00553028275800553, "grad_norm": 0.8818377256393433, "learning_rate": 0.0004999905654934726, "loss": 3.1922, "step": 31 }, { "epoch": 0.0057086789760057086, "grad_norm": 0.7961766123771667, "learning_rate": 0.0004999899470023929, "loss": 3.2558, "step": 32 }, { "epoch": 0.005887075194005887, "grad_norm": 1.180748701095581, "learning_rate": 0.0004999893088772102, "loss": 3.2286, "step": 33 }, { "epoch": 0.006065471412006066, "grad_norm": 1.035274624824524, "learning_rate": 0.0004999886511179748, "loss": 3.4693, "step": 34 }, { "epoch": 0.0062438676300062435, "grad_norm": 1.15408194065094, "learning_rate": 0.0004999879737247382, "loss": 3.2316, "step": 35 }, { "epoch": 0.006422263848006422, "grad_norm": 0.8637914657592773, "learning_rate": 0.0004999872766975539, "loss": 2.9908, "step": 36 }, { "epoch": 0.006600660066006601, "grad_norm": 1.0279821157455444, "learning_rate": 0.0004999865600364764, "loss": 3.1329, "step": 37 }, { "epoch": 0.006779056284006779, "grad_norm": 0.9054232239723206, "learning_rate": 0.000499985823741562, "loss": 3.2613, "step": 38 }, { "epoch": 0.006957452502006957, "grad_norm": 0.883553683757782, "learning_rate": 0.0004999850678128687, "loss": 3.0738, "step": 39 }, { "epoch": 0.007135848720007136, "grad_norm": 0.9399454593658447, "learning_rate": 0.0004999842922504556, "loss": 3.2061, "step": 40 }, { "epoch": 0.007314244938007314, "grad_norm": 1.0862022638320923, "learning_rate": 0.0004999834970543839, "loss": 3.0835, "step": 41 }, { "epoch": 0.007492641156007493, "grad_norm": 0.9416713118553162, "learning_rate": 0.0004999826822247159, "loss": 3.0841, "step": 42 }, { "epoch": 0.007671037374007671, "grad_norm": 1.0028291940689087, "learning_rate": 0.0004999818477615155, "loss": 3.0349, "step": 43 }, { "epoch": 0.00784943359200785, "grad_norm": 0.8068011403083801, "learning_rate": 0.0004999809936648484, "loss": 3.0634, "step": 44 }, { "epoch": 0.008027829810008028, "grad_norm": 2.0067076683044434, "learning_rate": 0.0004999801199347817, "loss": 3.2768, "step": 45 }, { "epoch": 0.008206226028008206, "grad_norm": 0.8227775692939758, "learning_rate": 0.000499979226571384, "loss": 3.2637, "step": 46 }, { "epoch": 0.008384622246008385, "grad_norm": 0.9798381924629211, "learning_rate": 0.0004999783135747252, "loss": 2.9619, "step": 47 }, { "epoch": 0.008563018464008563, "grad_norm": 0.9678353071212769, "learning_rate": 0.0004999773809448774, "loss": 2.7488, "step": 48 }, { "epoch": 0.00874141468200874, "grad_norm": 1.023210048675537, "learning_rate": 0.0004999764286819137, "loss": 3.1175, "step": 49 }, { "epoch": 0.00891981090000892, "grad_norm": 0.892423689365387, "learning_rate": 0.0004999754567859087, "loss": 2.7036, "step": 50 }, { "epoch": 0.009098207118009098, "grad_norm": 1.0287269353866577, "learning_rate": 0.000499974465256939, "loss": 2.892, "step": 51 }, { "epoch": 0.009276603336009277, "grad_norm": 1.1491647958755493, "learning_rate": 0.0004999734540950824, "loss": 2.9976, "step": 52 }, { "epoch": 0.009454999554009455, "grad_norm": 1.1946396827697754, "learning_rate": 0.0004999724233004183, "loss": 2.8391, "step": 53 }, { "epoch": 0.009633395772009633, "grad_norm": 1.0518914461135864, "learning_rate": 0.0004999713728730276, "loss": 2.7981, "step": 54 }, { "epoch": 0.009811791990009812, "grad_norm": 0.9127726554870605, "learning_rate": 0.0004999703028129929, "loss": 3.0045, "step": 55 }, { "epoch": 0.00999018820800999, "grad_norm": 1.4292141199111938, "learning_rate": 0.000499969213120398, "loss": 2.8518, "step": 56 }, { "epoch": 0.010168584426010168, "grad_norm": 1.256000280380249, "learning_rate": 0.0004999681037953288, "loss": 2.8525, "step": 57 }, { "epoch": 0.010346980644010347, "grad_norm": 1.166292428970337, "learning_rate": 0.0004999669748378723, "loss": 2.9721, "step": 58 }, { "epoch": 0.010525376862010525, "grad_norm": 1.197026014328003, "learning_rate": 0.0004999658262481172, "loss": 2.6609, "step": 59 }, { "epoch": 0.010703773080010704, "grad_norm": 1.027642846107483, "learning_rate": 0.0004999646580261537, "loss": 2.7145, "step": 60 }, { "epoch": 0.010882169298010882, "grad_norm": 1.0411521196365356, "learning_rate": 0.0004999634701720734, "loss": 2.895, "step": 61 }, { "epoch": 0.01106056551601106, "grad_norm": 1.0868782997131348, "learning_rate": 0.0004999622626859699, "loss": 2.8614, "step": 62 }, { "epoch": 0.01123896173401124, "grad_norm": 1.0062867403030396, "learning_rate": 0.0004999610355679377, "loss": 2.4979, "step": 63 }, { "epoch": 0.011417357952011417, "grad_norm": 1.0268216133117676, "learning_rate": 0.0004999597888180734, "loss": 2.9, "step": 64 }, { "epoch": 0.011595754170011597, "grad_norm": 0.9433121085166931, "learning_rate": 0.0004999585224364748, "loss": 3.0019, "step": 65 }, { "epoch": 0.011774150388011774, "grad_norm": 0.9315336346626282, "learning_rate": 0.0004999572364232414, "loss": 2.7774, "step": 66 }, { "epoch": 0.011952546606011952, "grad_norm": 0.8882467746734619, "learning_rate": 0.0004999559307784743, "loss": 2.7762, "step": 67 }, { "epoch": 0.012130942824012132, "grad_norm": 0.9650992751121521, "learning_rate": 0.000499954605502276, "loss": 2.5736, "step": 68 }, { "epoch": 0.01230933904201231, "grad_norm": 1.0080440044403076, "learning_rate": 0.0004999532605947505, "loss": 2.8355, "step": 69 }, { "epoch": 0.012487735260012487, "grad_norm": 1.0923867225646973, "learning_rate": 0.0004999518960560034, "loss": 2.5415, "step": 70 }, { "epoch": 0.012666131478012667, "grad_norm": 0.8062555193901062, "learning_rate": 0.000499950511886142, "loss": 2.1503, "step": 71 }, { "epoch": 0.012844527696012844, "grad_norm": 1.1151552200317383, "learning_rate": 0.000499949108085275, "loss": 2.9247, "step": 72 }, { "epoch": 0.013022923914013024, "grad_norm": 0.7492188811302185, "learning_rate": 0.0004999476846535125, "loss": 2.577, "step": 73 }, { "epoch": 0.013201320132013201, "grad_norm": 1.0736455917358398, "learning_rate": 0.0004999462415909664, "loss": 2.7738, "step": 74 }, { "epoch": 0.01337971635001338, "grad_norm": 0.9508681297302246, "learning_rate": 0.0004999447788977502, "loss": 2.4073, "step": 75 }, { "epoch": 0.013558112568013559, "grad_norm": 0.8389919996261597, "learning_rate": 0.0004999432965739786, "loss": 2.7485, "step": 76 }, { "epoch": 0.013736508786013736, "grad_norm": 1.055034875869751, "learning_rate": 0.0004999417946197679, "loss": 2.5765, "step": 77 }, { "epoch": 0.013914905004013914, "grad_norm": 1.1897584199905396, "learning_rate": 0.0004999402730352363, "loss": 2.7678, "step": 78 }, { "epoch": 0.014093301222014094, "grad_norm": 0.9991359710693359, "learning_rate": 0.0004999387318205032, "loss": 2.7551, "step": 79 }, { "epoch": 0.014271697440014271, "grad_norm": 0.9848081469535828, "learning_rate": 0.0004999371709756897, "loss": 2.5171, "step": 80 }, { "epoch": 0.014450093658014451, "grad_norm": 1.1168460845947266, "learning_rate": 0.0004999355905009183, "loss": 2.3899, "step": 81 }, { "epoch": 0.014628489876014629, "grad_norm": 0.9936979413032532, "learning_rate": 0.0004999339903963133, "loss": 2.6265, "step": 82 }, { "epoch": 0.014806886094014806, "grad_norm": 1.0733612775802612, "learning_rate": 0.0004999323706620001, "loss": 2.5981, "step": 83 }, { "epoch": 0.014985282312014986, "grad_norm": 1.0748244524002075, "learning_rate": 0.000499930731298106, "loss": 2.5156, "step": 84 }, { "epoch": 0.015163678530015164, "grad_norm": 0.8287347555160522, "learning_rate": 0.00049992907230476, "loss": 2.0732, "step": 85 }, { "epoch": 0.015342074748015341, "grad_norm": 0.8893202543258667, "learning_rate": 0.0004999273936820922, "loss": 2.4538, "step": 86 }, { "epoch": 0.01552047096601552, "grad_norm": 0.8320645689964294, "learning_rate": 0.0004999256954302344, "loss": 2.5106, "step": 87 }, { "epoch": 0.0156988671840157, "grad_norm": 0.9351370930671692, "learning_rate": 0.0004999239775493199, "loss": 2.529, "step": 88 }, { "epoch": 0.015877263402015878, "grad_norm": 0.8308570981025696, "learning_rate": 0.0004999222400394839, "loss": 2.234, "step": 89 }, { "epoch": 0.016055659620016056, "grad_norm": 1.0600675344467163, "learning_rate": 0.0004999204829008628, "loss": 2.5008, "step": 90 }, { "epoch": 0.016234055838016234, "grad_norm": 0.9695082902908325, "learning_rate": 0.0004999187061335943, "loss": 2.3691, "step": 91 }, { "epoch": 0.01641245205601641, "grad_norm": 0.9149223566055298, "learning_rate": 0.0004999169097378184, "loss": 2.5968, "step": 92 }, { "epoch": 0.016590848274016592, "grad_norm": 0.9797092080116272, "learning_rate": 0.0004999150937136758, "loss": 2.535, "step": 93 }, { "epoch": 0.01676924449201677, "grad_norm": 0.7779546976089478, "learning_rate": 0.0004999132580613094, "loss": 2.0829, "step": 94 }, { "epoch": 0.016947640710016948, "grad_norm": 0.8772634863853455, "learning_rate": 0.0004999114027808632, "loss": 2.2751, "step": 95 }, { "epoch": 0.017126036928017126, "grad_norm": 0.828109860420227, "learning_rate": 0.0004999095278724829, "loss": 2.5915, "step": 96 }, { "epoch": 0.017304433146017303, "grad_norm": 1.0454559326171875, "learning_rate": 0.0004999076333363159, "loss": 2.3756, "step": 97 }, { "epoch": 0.01748282936401748, "grad_norm": 1.0165445804595947, "learning_rate": 0.000499905719172511, "loss": 2.3156, "step": 98 }, { "epoch": 0.017661225582017662, "grad_norm": 0.9280771017074585, "learning_rate": 0.0004999037853812183, "loss": 2.4135, "step": 99 }, { "epoch": 0.01783962180001784, "grad_norm": 0.9677728414535522, "learning_rate": 0.0004999018319625898, "loss": 2.2452, "step": 100 }, { "epoch": 0.018018018018018018, "grad_norm": 0.9852823615074158, "learning_rate": 0.0004998998589167791, "loss": 2.1909, "step": 101 }, { "epoch": 0.018196414236018196, "grad_norm": 1.07912278175354, "learning_rate": 0.0004998978662439411, "loss": 2.1278, "step": 102 }, { "epoch": 0.018374810454018373, "grad_norm": 0.9431922435760498, "learning_rate": 0.000499895853944232, "loss": 2.2322, "step": 103 }, { "epoch": 0.018553206672018555, "grad_norm": 0.8767147064208984, "learning_rate": 0.0004998938220178102, "loss": 2.1025, "step": 104 }, { "epoch": 0.018731602890018732, "grad_norm": 1.079720377922058, "learning_rate": 0.0004998917704648352, "loss": 2.5964, "step": 105 }, { "epoch": 0.01890999910801891, "grad_norm": 1.181444764137268, "learning_rate": 0.000499889699285468, "loss": 2.1149, "step": 106 }, { "epoch": 0.019088395326019088, "grad_norm": 1.055641770362854, "learning_rate": 0.0004998876084798714, "loss": 2.3814, "step": 107 }, { "epoch": 0.019266791544019266, "grad_norm": 0.9991637468338013, "learning_rate": 0.0004998854980482095, "loss": 2.4501, "step": 108 }, { "epoch": 0.019445187762019447, "grad_norm": 0.878653883934021, "learning_rate": 0.0004998833679906482, "loss": 2.2617, "step": 109 }, { "epoch": 0.019623583980019624, "grad_norm": 0.9334123134613037, "learning_rate": 0.0004998812183073547, "loss": 2.325, "step": 110 }, { "epoch": 0.019801980198019802, "grad_norm": 0.7886718511581421, "learning_rate": 0.0004998790489984978, "loss": 2.1459, "step": 111 }, { "epoch": 0.01998037641601998, "grad_norm": 0.8349006175994873, "learning_rate": 0.0004998768600642479, "loss": 2.3084, "step": 112 }, { "epoch": 0.020158772634020158, "grad_norm": 0.9214372038841248, "learning_rate": 0.000499874651504777, "loss": 2.3816, "step": 113 }, { "epoch": 0.020337168852020335, "grad_norm": 0.8497713804244995, "learning_rate": 0.0004998724233202585, "loss": 2.2302, "step": 114 }, { "epoch": 0.020515565070020517, "grad_norm": 0.7643554210662842, "learning_rate": 0.0004998701755108674, "loss": 2.3578, "step": 115 }, { "epoch": 0.020693961288020694, "grad_norm": 1.057411551475525, "learning_rate": 0.0004998679080767802, "loss": 2.1724, "step": 116 }, { "epoch": 0.020872357506020872, "grad_norm": 1.0113954544067383, "learning_rate": 0.000499865621018175, "loss": 2.375, "step": 117 }, { "epoch": 0.02105075372402105, "grad_norm": 0.8105971813201904, "learning_rate": 0.0004998633143352315, "loss": 2.0368, "step": 118 }, { "epoch": 0.021229149942021228, "grad_norm": 1.2691274881362915, "learning_rate": 0.0004998609880281309, "loss": 2.2663, "step": 119 }, { "epoch": 0.02140754616002141, "grad_norm": 0.8678051829338074, "learning_rate": 0.0004998586420970557, "loss": 2.0612, "step": 120 }, { "epoch": 0.021585942378021587, "grad_norm": 0.7809403538703918, "learning_rate": 0.0004998562765421903, "loss": 2.0222, "step": 121 }, { "epoch": 0.021764338596021764, "grad_norm": 0.8958665728569031, "learning_rate": 0.0004998538913637205, "loss": 2.2086, "step": 122 }, { "epoch": 0.021942734814021942, "grad_norm": 0.8660983443260193, "learning_rate": 0.0004998514865618335, "loss": 2.0885, "step": 123 }, { "epoch": 0.02212113103202212, "grad_norm": 0.8884083032608032, "learning_rate": 0.0004998490621367184, "loss": 2.0391, "step": 124 }, { "epoch": 0.0222995272500223, "grad_norm": 0.9233642220497131, "learning_rate": 0.0004998466180885653, "loss": 2.3625, "step": 125 }, { "epoch": 0.02247792346802248, "grad_norm": 0.9836124181747437, "learning_rate": 0.0004998441544175666, "loss": 2.2373, "step": 126 }, { "epoch": 0.022656319686022657, "grad_norm": 0.8701616525650024, "learning_rate": 0.0004998416711239153, "loss": 2.2678, "step": 127 }, { "epoch": 0.022834715904022834, "grad_norm": 0.8650346994400024, "learning_rate": 0.0004998391682078067, "loss": 2.1102, "step": 128 }, { "epoch": 0.023013112122023012, "grad_norm": 1.1175918579101562, "learning_rate": 0.0004998366456694374, "loss": 2.1723, "step": 129 }, { "epoch": 0.023191508340023193, "grad_norm": 0.8660110235214233, "learning_rate": 0.0004998341035090055, "loss": 2.0303, "step": 130 }, { "epoch": 0.02336990455802337, "grad_norm": 0.7795286774635315, "learning_rate": 0.0004998315417267105, "loss": 2.2806, "step": 131 }, { "epoch": 0.02354830077602355, "grad_norm": 0.8152791857719421, "learning_rate": 0.0004998289603227538, "loss": 2.0361, "step": 132 }, { "epoch": 0.023726696994023726, "grad_norm": 0.8521414995193481, "learning_rate": 0.0004998263592973381, "loss": 2.3676, "step": 133 }, { "epoch": 0.023905093212023904, "grad_norm": 0.8271788954734802, "learning_rate": 0.0004998237386506676, "loss": 2.2346, "step": 134 }, { "epoch": 0.024083489430024082, "grad_norm": 0.8122360706329346, "learning_rate": 0.0004998210983829482, "loss": 2.2109, "step": 135 }, { "epoch": 0.024261885648024263, "grad_norm": 0.8585101962089539, "learning_rate": 0.0004998184384943871, "loss": 2.2372, "step": 136 }, { "epoch": 0.02444028186602444, "grad_norm": 1.338280200958252, "learning_rate": 0.0004998157589851935, "loss": 2.3498, "step": 137 }, { "epoch": 0.02461867808402462, "grad_norm": 0.8073638677597046, "learning_rate": 0.0004998130598555776, "loss": 1.9356, "step": 138 }, { "epoch": 0.024797074302024796, "grad_norm": 1.4015569686889648, "learning_rate": 0.0004998103411057517, "loss": 2.12, "step": 139 }, { "epoch": 0.024975470520024974, "grad_norm": 3.684471845626831, "learning_rate": 0.0004998076027359289, "loss": 2.1707, "step": 140 }, { "epoch": 0.025153866738025155, "grad_norm": 1.0060230493545532, "learning_rate": 0.0004998048447463245, "loss": 2.1753, "step": 141 }, { "epoch": 0.025332262956025333, "grad_norm": 0.8520100116729736, "learning_rate": 0.0004998020671371551, "loss": 2.2941, "step": 142 }, { "epoch": 0.02551065917402551, "grad_norm": 0.8406434059143066, "learning_rate": 0.0004997992699086389, "loss": 1.9893, "step": 143 }, { "epoch": 0.02568905539202569, "grad_norm": 1.1668790578842163, "learning_rate": 0.0004997964530609956, "loss": 2.0301, "step": 144 }, { "epoch": 0.025867451610025866, "grad_norm": 0.8116411566734314, "learning_rate": 0.0004997936165944462, "loss": 1.976, "step": 145 }, { "epoch": 0.026045847828026047, "grad_norm": 1.0072658061981201, "learning_rate": 0.0004997907605092138, "loss": 2.0916, "step": 146 }, { "epoch": 0.026224244046026225, "grad_norm": 1.1760748624801636, "learning_rate": 0.0004997878848055225, "loss": 1.9554, "step": 147 }, { "epoch": 0.026402640264026403, "grad_norm": 0.9567267894744873, "learning_rate": 0.0004997849894835982, "loss": 2.1948, "step": 148 }, { "epoch": 0.02658103648202658, "grad_norm": 1.109018325805664, "learning_rate": 0.0004997820745436683, "loss": 1.9971, "step": 149 }, { "epoch": 0.02675943270002676, "grad_norm": 0.9176613092422485, "learning_rate": 0.0004997791399859618, "loss": 2.4487, "step": 150 }, { "epoch": 0.026937828918026936, "grad_norm": 0.8620409369468689, "learning_rate": 0.0004997761858107091, "loss": 2.1291, "step": 151 }, { "epoch": 0.027116225136027117, "grad_norm": 0.7816916108131409, "learning_rate": 0.0004997732120181423, "loss": 2.1157, "step": 152 }, { "epoch": 0.027294621354027295, "grad_norm": 0.8684608340263367, "learning_rate": 0.0004997702186084949, "loss": 2.0322, "step": 153 }, { "epoch": 0.027473017572027473, "grad_norm": 0.9221858382225037, "learning_rate": 0.000499767205582002, "loss": 1.9962, "step": 154 }, { "epoch": 0.02765141379002765, "grad_norm": 0.9276618361473083, "learning_rate": 0.0004997641729389002, "loss": 2.2969, "step": 155 }, { "epoch": 0.02782981000802783, "grad_norm": 0.9027115106582642, "learning_rate": 0.0004997611206794278, "loss": 2.1261, "step": 156 }, { "epoch": 0.02800820622602801, "grad_norm": 0.7453658580780029, "learning_rate": 0.0004997580488038245, "loss": 1.9345, "step": 157 }, { "epoch": 0.028186602444028187, "grad_norm": 0.8788852095603943, "learning_rate": 0.0004997549573123314, "loss": 2.096, "step": 158 }, { "epoch": 0.028364998662028365, "grad_norm": 0.8055654764175415, "learning_rate": 0.0004997518462051916, "loss": 1.8397, "step": 159 }, { "epoch": 0.028543394880028543, "grad_norm": 0.9946992993354797, "learning_rate": 0.0004997487154826492, "loss": 1.8003, "step": 160 }, { "epoch": 0.02872179109802872, "grad_norm": 0.9577097296714783, "learning_rate": 0.0004997455651449502, "loss": 2.1386, "step": 161 }, { "epoch": 0.028900187316028902, "grad_norm": 1.0292500257492065, "learning_rate": 0.0004997423951923419, "loss": 2.0328, "step": 162 }, { "epoch": 0.02907858353402908, "grad_norm": 0.8872460126876831, "learning_rate": 0.0004997392056250733, "loss": 1.9567, "step": 163 }, { "epoch": 0.029256979752029257, "grad_norm": 4.496621608734131, "learning_rate": 0.0004997359964433952, "loss": 2.2306, "step": 164 }, { "epoch": 0.029435375970029435, "grad_norm": 1.3478816747665405, "learning_rate": 0.0004997327676475593, "loss": 1.9389, "step": 165 }, { "epoch": 0.029613772188029613, "grad_norm": 1.18978750705719, "learning_rate": 0.0004997295192378192, "loss": 1.9183, "step": 166 }, { "epoch": 0.02979216840602979, "grad_norm": 0.8998153805732727, "learning_rate": 0.0004997262512144302, "loss": 2.0974, "step": 167 }, { "epoch": 0.02997056462402997, "grad_norm": 0.9464337229728699, "learning_rate": 0.0004997229635776488, "loss": 1.8474, "step": 168 }, { "epoch": 0.03014896084203015, "grad_norm": 0.8955067992210388, "learning_rate": 0.0004997196563277334, "loss": 2.0262, "step": 169 }, { "epoch": 0.030327357060030327, "grad_norm": 0.727933943271637, "learning_rate": 0.0004997163294649437, "loss": 1.7326, "step": 170 }, { "epoch": 0.030505753278030505, "grad_norm": 1.0948885679244995, "learning_rate": 0.0004997129829895409, "loss": 2.1483, "step": 171 }, { "epoch": 0.030684149496030683, "grad_norm": 0.9044740200042725, "learning_rate": 0.0004997096169017879, "loss": 1.8832, "step": 172 }, { "epoch": 0.030862545714030864, "grad_norm": 1.0726819038391113, "learning_rate": 0.0004997062312019489, "loss": 1.9131, "step": 173 }, { "epoch": 0.03104094193203104, "grad_norm": 0.9715486764907837, "learning_rate": 0.0004997028258902902, "loss": 2.0829, "step": 174 }, { "epoch": 0.03121933815003122, "grad_norm": 0.8400108218193054, "learning_rate": 0.0004996994009670788, "loss": 1.9984, "step": 175 }, { "epoch": 0.0313977343680314, "grad_norm": 0.9738950729370117, "learning_rate": 0.000499695956432584, "loss": 1.9813, "step": 176 }, { "epoch": 0.03157613058603158, "grad_norm": 1.125439167022705, "learning_rate": 0.0004996924922870762, "loss": 1.8655, "step": 177 }, { "epoch": 0.031754526804031756, "grad_norm": 1.0050427913665771, "learning_rate": 0.0004996890085308275, "loss": 2.1677, "step": 178 }, { "epoch": 0.031932923022031934, "grad_norm": 0.7742742300033569, "learning_rate": 0.0004996855051641116, "loss": 2.0985, "step": 179 }, { "epoch": 0.03211131924003211, "grad_norm": 1.3308488130569458, "learning_rate": 0.0004996819821872035, "loss": 1.7866, "step": 180 }, { "epoch": 0.03228971545803229, "grad_norm": 0.776884913444519, "learning_rate": 0.00049967843960038, "loss": 1.8104, "step": 181 }, { "epoch": 0.03246811167603247, "grad_norm": 0.8655677437782288, "learning_rate": 0.0004996748774039192, "loss": 2.0098, "step": 182 }, { "epoch": 0.032646507894032645, "grad_norm": 0.7239806652069092, "learning_rate": 0.000499671295598101, "loss": 1.6073, "step": 183 }, { "epoch": 0.03282490411203282, "grad_norm": 0.8207644820213318, "learning_rate": 0.0004996676941832069, "loss": 1.8347, "step": 184 }, { "epoch": 0.033003300330033, "grad_norm": 0.7517510652542114, "learning_rate": 0.0004996640731595194, "loss": 2.0775, "step": 185 }, { "epoch": 0.033181696548033185, "grad_norm": 0.7683122158050537, "learning_rate": 0.000499660432527323, "loss": 1.9587, "step": 186 }, { "epoch": 0.03336009276603336, "grad_norm": 0.7496309280395508, "learning_rate": 0.0004996567722869036, "loss": 1.9149, "step": 187 }, { "epoch": 0.03353848898403354, "grad_norm": 0.7267098426818848, "learning_rate": 0.0004996530924385489, "loss": 2.1301, "step": 188 }, { "epoch": 0.03371688520203372, "grad_norm": 1.1069225072860718, "learning_rate": 0.0004996493929825477, "loss": 1.74, "step": 189 }, { "epoch": 0.033895281420033896, "grad_norm": 0.795403778553009, "learning_rate": 0.0004996456739191905, "loss": 2.1685, "step": 190 }, { "epoch": 0.034073677638034074, "grad_norm": 0.7450090050697327, "learning_rate": 0.0004996419352487696, "loss": 2.1011, "step": 191 }, { "epoch": 0.03425207385603425, "grad_norm": 0.7762404084205627, "learning_rate": 0.0004996381769715785, "loss": 2.0786, "step": 192 }, { "epoch": 0.03443047007403443, "grad_norm": 0.7745615839958191, "learning_rate": 0.0004996343990879125, "loss": 1.6322, "step": 193 }, { "epoch": 0.03460886629203461, "grad_norm": 0.8149433732032776, "learning_rate": 0.0004996306015980681, "loss": 1.646, "step": 194 }, { "epoch": 0.034787262510034785, "grad_norm": 0.8039779663085938, "learning_rate": 0.0004996267845023437, "loss": 1.6153, "step": 195 }, { "epoch": 0.03496565872803496, "grad_norm": 0.7436607480049133, "learning_rate": 0.0004996229478010392, "loss": 1.9986, "step": 196 }, { "epoch": 0.03514405494603515, "grad_norm": 1.0629464387893677, "learning_rate": 0.0004996190914944556, "loss": 1.7696, "step": 197 }, { "epoch": 0.035322451164035325, "grad_norm": 0.7059940099716187, "learning_rate": 0.0004996152155828961, "loss": 1.75, "step": 198 }, { "epoch": 0.0355008473820355, "grad_norm": 0.7760869860649109, "learning_rate": 0.0004996113200666649, "loss": 1.6804, "step": 199 }, { "epoch": 0.03567924360003568, "grad_norm": 0.723596453666687, "learning_rate": 0.000499607404946068, "loss": 1.7709, "step": 200 }, { "epoch": 0.03585763981803586, "grad_norm": 0.7342411279678345, "learning_rate": 0.0004996034702214131, "loss": 2.1074, "step": 201 }, { "epoch": 0.036036036036036036, "grad_norm": 1.2572263479232788, "learning_rate": 0.000499599515893009, "loss": 1.8228, "step": 202 }, { "epoch": 0.03621443225403621, "grad_norm": 0.9054358005523682, "learning_rate": 0.0004995955419611663, "loss": 2.0835, "step": 203 }, { "epoch": 0.03639282847203639, "grad_norm": 0.8486645817756653, "learning_rate": 0.0004995915484261971, "loss": 2.1984, "step": 204 }, { "epoch": 0.03657122469003657, "grad_norm": 0.7532051801681519, "learning_rate": 0.0004995875352884152, "loss": 1.9638, "step": 205 }, { "epoch": 0.03674962090803675, "grad_norm": 0.9060899615287781, "learning_rate": 0.0004995835025481357, "loss": 1.8939, "step": 206 }, { "epoch": 0.03692801712603693, "grad_norm": 0.7113338112831116, "learning_rate": 0.0004995794502056751, "loss": 1.6417, "step": 207 }, { "epoch": 0.03710641334403711, "grad_norm": 3.6220555305480957, "learning_rate": 0.0004995753782613521, "loss": 1.7458, "step": 208 }, { "epoch": 0.03728480956203729, "grad_norm": 1.1314635276794434, "learning_rate": 0.0004995712867154863, "loss": 1.9954, "step": 209 }, { "epoch": 0.037463205780037465, "grad_norm": 0.8598477244377136, "learning_rate": 0.0004995671755683989, "loss": 1.7875, "step": 210 }, { "epoch": 0.03764160199803764, "grad_norm": 0.725925087928772, "learning_rate": 0.0004995630448204131, "loss": 1.9205, "step": 211 }, { "epoch": 0.03781999821603782, "grad_norm": 0.9679766297340393, "learning_rate": 0.000499558894471853, "loss": 2.1183, "step": 212 }, { "epoch": 0.037998394434038, "grad_norm": 0.7008562684059143, "learning_rate": 0.0004995547245230448, "loss": 1.8477, "step": 213 }, { "epoch": 0.038176790652038176, "grad_norm": 0.7993139624595642, "learning_rate": 0.0004995505349743158, "loss": 1.6427, "step": 214 }, { "epoch": 0.03835518687003835, "grad_norm": 1.6259403228759766, "learning_rate": 0.0004995463258259953, "loss": 1.7742, "step": 215 }, { "epoch": 0.03853358308803853, "grad_norm": 0.8765113353729248, "learning_rate": 0.0004995420970784137, "loss": 1.6492, "step": 216 }, { "epoch": 0.03871197930603871, "grad_norm": 0.6928495168685913, "learning_rate": 0.0004995378487319032, "loss": 1.7392, "step": 217 }, { "epoch": 0.038890375524038893, "grad_norm": 2.987717390060425, "learning_rate": 0.0004995335807867975, "loss": 1.8262, "step": 218 }, { "epoch": 0.03906877174203907, "grad_norm": 1.99364173412323, "learning_rate": 0.0004995292932434317, "loss": 2.138, "step": 219 }, { "epoch": 0.03924716796003925, "grad_norm": 1.4841055870056152, "learning_rate": 0.0004995249861021425, "loss": 1.7652, "step": 220 }, { "epoch": 0.03942556417803943, "grad_norm": 0.9593233466148376, "learning_rate": 0.0004995206593632685, "loss": 2.2273, "step": 221 }, { "epoch": 0.039603960396039604, "grad_norm": 0.7397097945213318, "learning_rate": 0.0004995163130271491, "loss": 1.6452, "step": 222 }, { "epoch": 0.03978235661403978, "grad_norm": 0.7248395085334778, "learning_rate": 0.0004995119470941259, "loss": 1.7841, "step": 223 }, { "epoch": 0.03996075283203996, "grad_norm": 0.8945801854133606, "learning_rate": 0.0004995075615645418, "loss": 1.8039, "step": 224 }, { "epoch": 0.04013914905004014, "grad_norm": 1.0206605195999146, "learning_rate": 0.0004995031564387411, "loss": 1.8187, "step": 225 }, { "epoch": 0.040317545268040315, "grad_norm": 0.802527129650116, "learning_rate": 0.00049949873171707, "loss": 1.7456, "step": 226 }, { "epoch": 0.04049594148604049, "grad_norm": 0.8061395883560181, "learning_rate": 0.0004994942873998757, "loss": 1.7857, "step": 227 }, { "epoch": 0.04067433770404067, "grad_norm": 0.8295297026634216, "learning_rate": 0.0004994898234875074, "loss": 1.6685, "step": 228 }, { "epoch": 0.040852733922040856, "grad_norm": 0.9258911609649658, "learning_rate": 0.000499485339980316, "loss": 2.0146, "step": 229 }, { "epoch": 0.04103113014004103, "grad_norm": 0.7650581002235413, "learning_rate": 0.0004994808368786531, "loss": 1.7862, "step": 230 }, { "epoch": 0.04120952635804121, "grad_norm": 0.8616735339164734, "learning_rate": 0.0004994763141828728, "loss": 1.7706, "step": 231 }, { "epoch": 0.04138792257604139, "grad_norm": 0.8802286386489868, "learning_rate": 0.00049947177189333, "loss": 1.7055, "step": 232 }, { "epoch": 0.04156631879404157, "grad_norm": 0.6563844680786133, "learning_rate": 0.0004994672100103818, "loss": 1.7655, "step": 233 }, { "epoch": 0.041744715012041744, "grad_norm": 0.8318186402320862, "learning_rate": 0.0004994626285343861, "loss": 1.7316, "step": 234 }, { "epoch": 0.04192311123004192, "grad_norm": 0.8289990425109863, "learning_rate": 0.0004994580274657029, "loss": 1.9734, "step": 235 }, { "epoch": 0.0421015074480421, "grad_norm": 0.8461620807647705, "learning_rate": 0.0004994534068046936, "loss": 1.8266, "step": 236 }, { "epoch": 0.04227990366604228, "grad_norm": 0.8519775867462158, "learning_rate": 0.0004994487665517212, "loss": 1.8579, "step": 237 }, { "epoch": 0.042458299884042455, "grad_norm": 0.707134485244751, "learning_rate": 0.0004994441067071499, "loss": 1.845, "step": 238 }, { "epoch": 0.04263669610204264, "grad_norm": 1.2753993272781372, "learning_rate": 0.0004994394272713459, "loss": 1.8513, "step": 239 }, { "epoch": 0.04281509232004282, "grad_norm": 0.9627553224563599, "learning_rate": 0.0004994347282446765, "loss": 2.2214, "step": 240 }, { "epoch": 0.042993488538042995, "grad_norm": 0.8274680972099304, "learning_rate": 0.0004994300096275108, "loss": 1.8324, "step": 241 }, { "epoch": 0.04317188475604317, "grad_norm": 1.0614216327667236, "learning_rate": 0.0004994252714202198, "loss": 1.8447, "step": 242 }, { "epoch": 0.04335028097404335, "grad_norm": 0.853103518486023, "learning_rate": 0.0004994205136231751, "loss": 1.8614, "step": 243 }, { "epoch": 0.04352867719204353, "grad_norm": 0.6934187412261963, "learning_rate": 0.0004994157362367506, "loss": 1.6686, "step": 244 }, { "epoch": 0.043707073410043706, "grad_norm": 0.856378972530365, "learning_rate": 0.0004994109392613215, "loss": 1.7998, "step": 245 }, { "epoch": 0.043885469628043884, "grad_norm": 0.8739482760429382, "learning_rate": 0.0004994061226972647, "loss": 1.7582, "step": 246 }, { "epoch": 0.04406386584604406, "grad_norm": 0.904869794845581, "learning_rate": 0.0004994012865449582, "loss": 1.966, "step": 247 }, { "epoch": 0.04424226206404424, "grad_norm": 0.8391656279563904, "learning_rate": 0.0004993964308047821, "loss": 1.7177, "step": 248 }, { "epoch": 0.04442065828204442, "grad_norm": 0.9165405035018921, "learning_rate": 0.0004993915554771175, "loss": 1.8742, "step": 249 }, { "epoch": 0.0445990545000446, "grad_norm": 0.7904878854751587, "learning_rate": 0.0004993866605623475, "loss": 1.8811, "step": 250 }, { "epoch": 0.04477745071804478, "grad_norm": 0.7866482734680176, "learning_rate": 0.0004993817460608566, "loss": 1.6096, "step": 251 }, { "epoch": 0.04495584693604496, "grad_norm": 0.8852795362472534, "learning_rate": 0.0004993768119730306, "loss": 1.8237, "step": 252 }, { "epoch": 0.045134243154045135, "grad_norm": 0.9104618430137634, "learning_rate": 0.0004993718582992572, "loss": 1.8859, "step": 253 }, { "epoch": 0.04531263937204531, "grad_norm": 0.7363934516906738, "learning_rate": 0.0004993668850399252, "loss": 1.3366, "step": 254 }, { "epoch": 0.04549103559004549, "grad_norm": 0.7602558732032776, "learning_rate": 0.0004993618921954254, "loss": 2.0173, "step": 255 }, { "epoch": 0.04566943180804567, "grad_norm": 0.8223975896835327, "learning_rate": 0.00049935687976615, "loss": 1.6692, "step": 256 }, { "epoch": 0.045847828026045846, "grad_norm": 0.7281943559646606, "learning_rate": 0.0004993518477524924, "loss": 1.6527, "step": 257 }, { "epoch": 0.046026224244046024, "grad_norm": 0.7140815854072571, "learning_rate": 0.0004993467961548482, "loss": 1.895, "step": 258 }, { "epoch": 0.0462046204620462, "grad_norm": 1.0137954950332642, "learning_rate": 0.0004993417249736138, "loss": 2.0086, "step": 259 }, { "epoch": 0.046383016680046386, "grad_norm": 0.7407935261726379, "learning_rate": 0.0004993366342091876, "loss": 1.6048, "step": 260 }, { "epoch": 0.046561412898046564, "grad_norm": 0.7034161686897278, "learning_rate": 0.0004993315238619695, "loss": 1.8503, "step": 261 }, { "epoch": 0.04673980911604674, "grad_norm": 0.6958416104316711, "learning_rate": 0.0004993263939323608, "loss": 1.6034, "step": 262 }, { "epoch": 0.04691820533404692, "grad_norm": 0.7423539161682129, "learning_rate": 0.0004993212444207644, "loss": 1.598, "step": 263 }, { "epoch": 0.0470966015520471, "grad_norm": 0.7731009125709534, "learning_rate": 0.0004993160753275849, "loss": 1.9353, "step": 264 }, { "epoch": 0.047274997770047275, "grad_norm": 0.7424893379211426, "learning_rate": 0.0004993108866532279, "loss": 1.712, "step": 265 }, { "epoch": 0.04745339398804745, "grad_norm": 0.8562301993370056, "learning_rate": 0.0004993056783981013, "loss": 1.8745, "step": 266 }, { "epoch": 0.04763179020604763, "grad_norm": 1.6230698823928833, "learning_rate": 0.000499300450562614, "loss": 1.7503, "step": 267 }, { "epoch": 0.04781018642404781, "grad_norm": 0.938933253288269, "learning_rate": 0.0004992952031471765, "loss": 1.683, "step": 268 }, { "epoch": 0.047988582642047986, "grad_norm": 0.8198557496070862, "learning_rate": 0.0004992899361522011, "loss": 1.9077, "step": 269 }, { "epoch": 0.048166978860048164, "grad_norm": 0.9028448462486267, "learning_rate": 0.0004992846495781013, "loss": 1.9227, "step": 270 }, { "epoch": 0.04834537507804835, "grad_norm": 0.7884741425514221, "learning_rate": 0.0004992793434252925, "loss": 1.58, "step": 271 }, { "epoch": 0.048523771296048526, "grad_norm": 1.0796072483062744, "learning_rate": 0.0004992740176941912, "loss": 1.6761, "step": 272 }, { "epoch": 0.048702167514048704, "grad_norm": 0.9148755073547363, "learning_rate": 0.0004992686723852161, "loss": 1.7409, "step": 273 }, { "epoch": 0.04888056373204888, "grad_norm": 0.7027503252029419, "learning_rate": 0.0004992633074987864, "loss": 1.4971, "step": 274 }, { "epoch": 0.04905895995004906, "grad_norm": 0.8004087209701538, "learning_rate": 0.000499257923035324, "loss": 1.7677, "step": 275 }, { "epoch": 0.04923735616804924, "grad_norm": 0.7200493216514587, "learning_rate": 0.0004992525189952516, "loss": 1.7596, "step": 276 }, { "epoch": 0.049415752386049415, "grad_norm": 0.7689985036849976, "learning_rate": 0.0004992470953789936, "loss": 1.7032, "step": 277 }, { "epoch": 0.04959414860404959, "grad_norm": 0.7748706936836243, "learning_rate": 0.000499241652186976, "loss": 1.972, "step": 278 }, { "epoch": 0.04977254482204977, "grad_norm": 1.406790018081665, "learning_rate": 0.0004992361894196263, "loss": 1.5758, "step": 279 }, { "epoch": 0.04995094104004995, "grad_norm": 0.6651303172111511, "learning_rate": 0.0004992307070773734, "loss": 1.6668, "step": 280 }, { "epoch": 0.050129337258050126, "grad_norm": 0.7641019821166992, "learning_rate": 0.0004992252051606481, "loss": 1.8233, "step": 281 }, { "epoch": 0.05030773347605031, "grad_norm": 0.6978675127029419, "learning_rate": 0.0004992196836698825, "loss": 1.6394, "step": 282 }, { "epoch": 0.05048612969405049, "grad_norm": 0.7156147956848145, "learning_rate": 0.0004992141426055101, "loss": 1.6568, "step": 283 }, { "epoch": 0.050664525912050666, "grad_norm": 0.791042149066925, "learning_rate": 0.0004992085819679662, "loss": 1.8138, "step": 284 }, { "epoch": 0.050842922130050844, "grad_norm": 0.7295787334442139, "learning_rate": 0.0004992030017576876, "loss": 1.3886, "step": 285 }, { "epoch": 0.05102131834805102, "grad_norm": 1.0952125787734985, "learning_rate": 0.0004991974019751124, "loss": 1.6662, "step": 286 }, { "epoch": 0.0511997145660512, "grad_norm": 1.1296299695968628, "learning_rate": 0.0004991917826206805, "loss": 1.5014, "step": 287 }, { "epoch": 0.05137811078405138, "grad_norm": 0.7258694767951965, "learning_rate": 0.0004991861436948333, "loss": 1.5864, "step": 288 }, { "epoch": 0.051556507002051555, "grad_norm": 0.9456144571304321, "learning_rate": 0.0004991804851980135, "loss": 1.9251, "step": 289 }, { "epoch": 0.05173490322005173, "grad_norm": 0.7830838561058044, "learning_rate": 0.0004991748071306657, "loss": 1.6641, "step": 290 }, { "epoch": 0.05191329943805191, "grad_norm": 0.7046554684638977, "learning_rate": 0.0004991691094932357, "loss": 1.7749, "step": 291 }, { "epoch": 0.052091695656052095, "grad_norm": 1.1261860132217407, "learning_rate": 0.000499163392286171, "loss": 1.7228, "step": 292 }, { "epoch": 0.05227009187405227, "grad_norm": 0.7298058867454529, "learning_rate": 0.0004991576555099208, "loss": 1.666, "step": 293 }, { "epoch": 0.05244848809205245, "grad_norm": 0.7975763082504272, "learning_rate": 0.0004991518991649356, "loss": 1.803, "step": 294 }, { "epoch": 0.05262688431005263, "grad_norm": 1.0635559558868408, "learning_rate": 0.0004991461232516675, "loss": 1.4519, "step": 295 }, { "epoch": 0.052805280528052806, "grad_norm": 0.9030710458755493, "learning_rate": 0.00049914032777057, "loss": 1.607, "step": 296 }, { "epoch": 0.052983676746052984, "grad_norm": 1.0126208066940308, "learning_rate": 0.0004991345127220983, "loss": 1.6796, "step": 297 }, { "epoch": 0.05316207296405316, "grad_norm": 0.8473824858665466, "learning_rate": 0.0004991286781067094, "loss": 1.6065, "step": 298 }, { "epoch": 0.05334046918205334, "grad_norm": 0.7614424228668213, "learning_rate": 0.0004991228239248611, "loss": 1.4427, "step": 299 }, { "epoch": 0.05351886540005352, "grad_norm": 0.8425862193107605, "learning_rate": 0.0004991169501770135, "loss": 1.6097, "step": 300 }, { "epoch": 0.053697261618053695, "grad_norm": 0.7978277802467346, "learning_rate": 0.0004991110568636278, "loss": 1.6084, "step": 301 }, { "epoch": 0.05387565783605387, "grad_norm": 0.9924404621124268, "learning_rate": 0.000499105143985167, "loss": 1.6886, "step": 302 }, { "epoch": 0.05405405405405406, "grad_norm": 1.2324185371398926, "learning_rate": 0.0004990992115420954, "loss": 2.0878, "step": 303 }, { "epoch": 0.054232450272054235, "grad_norm": 0.8193588256835938, "learning_rate": 0.0004990932595348789, "loss": 1.3275, "step": 304 }, { "epoch": 0.05441084649005441, "grad_norm": 0.9794548153877258, "learning_rate": 0.0004990872879639849, "loss": 1.9818, "step": 305 }, { "epoch": 0.05458924270805459, "grad_norm": 0.9556685090065002, "learning_rate": 0.0004990812968298825, "loss": 1.7266, "step": 306 }, { "epoch": 0.05476763892605477, "grad_norm": 0.8109720945358276, "learning_rate": 0.0004990752861330424, "loss": 1.626, "step": 307 }, { "epoch": 0.054946035144054946, "grad_norm": 0.7442929744720459, "learning_rate": 0.0004990692558739363, "loss": 1.6846, "step": 308 }, { "epoch": 0.055124431362055124, "grad_norm": 0.8138058185577393, "learning_rate": 0.0004990632060530381, "loss": 1.4798, "step": 309 }, { "epoch": 0.0553028275800553, "grad_norm": 1.426184892654419, "learning_rate": 0.0004990571366708229, "loss": 1.4324, "step": 310 }, { "epoch": 0.05548122379805548, "grad_norm": 0.9690107703208923, "learning_rate": 0.0004990510477277673, "loss": 1.7228, "step": 311 }, { "epoch": 0.05565962001605566, "grad_norm": 0.8152466416358948, "learning_rate": 0.0004990449392243497, "loss": 1.5993, "step": 312 }, { "epoch": 0.05583801623405584, "grad_norm": 0.7555820345878601, "learning_rate": 0.0004990388111610496, "loss": 1.5512, "step": 313 }, { "epoch": 0.05601641245205602, "grad_norm": 0.8185862302780151, "learning_rate": 0.0004990326635383486, "loss": 1.8048, "step": 314 }, { "epoch": 0.0561948086700562, "grad_norm": 1.3000257015228271, "learning_rate": 0.0004990264963567292, "loss": 1.9454, "step": 315 }, { "epoch": 0.056373204888056375, "grad_norm": 1.501250982284546, "learning_rate": 0.0004990203096166761, "loss": 1.7511, "step": 316 }, { "epoch": 0.05655160110605655, "grad_norm": 0.7413790822029114, "learning_rate": 0.000499014103318675, "loss": 1.6659, "step": 317 }, { "epoch": 0.05672999732405673, "grad_norm": 0.6846387982368469, "learning_rate": 0.0004990078774632134, "loss": 1.5829, "step": 318 }, { "epoch": 0.05690839354205691, "grad_norm": 0.8253748416900635, "learning_rate": 0.0004990016320507802, "loss": 1.6716, "step": 319 }, { "epoch": 0.057086789760057086, "grad_norm": 0.7216707468032837, "learning_rate": 0.000498995367081866, "loss": 1.6294, "step": 320 }, { "epoch": 0.05726518597805726, "grad_norm": 0.8067865371704102, "learning_rate": 0.0004989890825569628, "loss": 1.7566, "step": 321 }, { "epoch": 0.05744358219605744, "grad_norm": 0.867411732673645, "learning_rate": 0.0004989827784765643, "loss": 1.5055, "step": 322 }, { "epoch": 0.05762197841405762, "grad_norm": 0.7605225443840027, "learning_rate": 0.0004989764548411654, "loss": 1.6925, "step": 323 }, { "epoch": 0.057800374632057804, "grad_norm": 0.7990824580192566, "learning_rate": 0.0004989701116512629, "loss": 1.8807, "step": 324 }, { "epoch": 0.05797877085005798, "grad_norm": 0.7364970445632935, "learning_rate": 0.000498963748907355, "loss": 1.6086, "step": 325 }, { "epoch": 0.05815716706805816, "grad_norm": 0.7894652485847473, "learning_rate": 0.0004989573666099415, "loss": 1.9226, "step": 326 }, { "epoch": 0.05833556328605834, "grad_norm": 0.8756299018859863, "learning_rate": 0.0004989509647595234, "loss": 1.7023, "step": 327 }, { "epoch": 0.058513959504058514, "grad_norm": 1.0760093927383423, "learning_rate": 0.0004989445433566037, "loss": 1.4588, "step": 328 }, { "epoch": 0.05869235572205869, "grad_norm": 0.8876066207885742, "learning_rate": 0.0004989381024016867, "loss": 1.7266, "step": 329 }, { "epoch": 0.05887075194005887, "grad_norm": 4.222714900970459, "learning_rate": 0.0004989316418952782, "loss": 1.6749, "step": 330 }, { "epoch": 0.05904914815805905, "grad_norm": 1.0172480344772339, "learning_rate": 0.0004989251618378859, "loss": 1.6138, "step": 331 }, { "epoch": 0.059227544376059225, "grad_norm": 1.1439893245697021, "learning_rate": 0.0004989186622300183, "loss": 1.6511, "step": 332 }, { "epoch": 0.0594059405940594, "grad_norm": 0.7901877760887146, "learning_rate": 0.0004989121430721862, "loss": 1.5239, "step": 333 }, { "epoch": 0.05958433681205958, "grad_norm": 0.8215880393981934, "learning_rate": 0.0004989056043649014, "loss": 1.6242, "step": 334 }, { "epoch": 0.059762733030059766, "grad_norm": 0.7913690805435181, "learning_rate": 0.0004988990461086777, "loss": 1.9562, "step": 335 }, { "epoch": 0.05994112924805994, "grad_norm": 0.7676241397857666, "learning_rate": 0.0004988924683040298, "loss": 1.6554, "step": 336 }, { "epoch": 0.06011952546606012, "grad_norm": 0.7700140476226807, "learning_rate": 0.0004988858709514747, "loss": 1.5174, "step": 337 }, { "epoch": 0.0602979216840603, "grad_norm": 0.7194883227348328, "learning_rate": 0.0004988792540515304, "loss": 1.711, "step": 338 }, { "epoch": 0.06047631790206048, "grad_norm": 0.737006425857544, "learning_rate": 0.0004988726176047164, "loss": 1.7161, "step": 339 }, { "epoch": 0.060654714120060654, "grad_norm": 0.8978502154350281, "learning_rate": 0.0004988659616115544, "loss": 1.4597, "step": 340 }, { "epoch": 0.06083311033806083, "grad_norm": 0.850236177444458, "learning_rate": 0.0004988592860725667, "loss": 1.7958, "step": 341 }, { "epoch": 0.06101150655606101, "grad_norm": 0.6929709911346436, "learning_rate": 0.0004988525909882779, "loss": 1.4684, "step": 342 }, { "epoch": 0.06118990277406119, "grad_norm": 0.8175438642501831, "learning_rate": 0.0004988458763592135, "loss": 1.5871, "step": 343 }, { "epoch": 0.061368298992061365, "grad_norm": 1.5031802654266357, "learning_rate": 0.0004988391421859011, "loss": 1.8358, "step": 344 }, { "epoch": 0.06154669521006155, "grad_norm": 0.8213221430778503, "learning_rate": 0.0004988323884688696, "loss": 1.6868, "step": 345 }, { "epoch": 0.06172509142806173, "grad_norm": 0.855012059211731, "learning_rate": 0.0004988256152086495, "loss": 1.7024, "step": 346 }, { "epoch": 0.061903487646061905, "grad_norm": 0.9920960664749146, "learning_rate": 0.0004988188224057724, "loss": 1.641, "step": 347 }, { "epoch": 0.06208188386406208, "grad_norm": 0.7211305499076843, "learning_rate": 0.0004988120100607723, "loss": 1.3817, "step": 348 }, { "epoch": 0.06226028008206226, "grad_norm": 0.8306823968887329, "learning_rate": 0.0004988051781741839, "loss": 1.6479, "step": 349 }, { "epoch": 0.06243867630006244, "grad_norm": 0.6879592537879944, "learning_rate": 0.0004987983267465439, "loss": 1.7006, "step": 350 }, { "epoch": 0.06261707251806262, "grad_norm": 0.7928709387779236, "learning_rate": 0.0004987914557783905, "loss": 1.7222, "step": 351 }, { "epoch": 0.0627954687360628, "grad_norm": 0.8470203876495361, "learning_rate": 0.000498784565270263, "loss": 1.7225, "step": 352 }, { "epoch": 0.06297386495406297, "grad_norm": 0.7351035475730896, "learning_rate": 0.0004987776552227029, "loss": 1.574, "step": 353 }, { "epoch": 0.06315226117206316, "grad_norm": 0.8006918430328369, "learning_rate": 0.0004987707256362529, "loss": 1.9217, "step": 354 }, { "epoch": 0.06333065739006333, "grad_norm": 0.7959868907928467, "learning_rate": 0.0004987637765114571, "loss": 1.6191, "step": 355 }, { "epoch": 0.06350905360806351, "grad_norm": 0.6624523401260376, "learning_rate": 0.0004987568078488613, "loss": 1.4849, "step": 356 }, { "epoch": 0.06368744982606368, "grad_norm": 0.665393054485321, "learning_rate": 0.0004987498196490129, "loss": 1.67, "step": 357 }, { "epoch": 0.06386584604406387, "grad_norm": 1.2895721197128296, "learning_rate": 0.0004987428119124607, "loss": 1.6514, "step": 358 }, { "epoch": 0.06404424226206404, "grad_norm": 0.7853053212165833, "learning_rate": 0.0004987357846397551, "loss": 1.528, "step": 359 }, { "epoch": 0.06422263848006422, "grad_norm": 0.7855213284492493, "learning_rate": 0.000498728737831448, "loss": 1.3609, "step": 360 }, { "epoch": 0.06440103469806441, "grad_norm": 0.9577522277832031, "learning_rate": 0.0004987216714880929, "loss": 1.6681, "step": 361 }, { "epoch": 0.06457943091606458, "grad_norm": 0.7544964551925659, "learning_rate": 0.0004987145856102448, "loss": 1.5707, "step": 362 }, { "epoch": 0.06475782713406476, "grad_norm": 0.9541155695915222, "learning_rate": 0.00049870748019846, "loss": 1.5559, "step": 363 }, { "epoch": 0.06493622335206493, "grad_norm": 0.8656521439552307, "learning_rate": 0.0004987003552532969, "loss": 1.5096, "step": 364 }, { "epoch": 0.06511461957006512, "grad_norm": 0.9872167706489563, "learning_rate": 0.0004986932107753148, "loss": 1.575, "step": 365 }, { "epoch": 0.06529301578806529, "grad_norm": 0.7449053525924683, "learning_rate": 0.000498686046765075, "loss": 1.3222, "step": 366 }, { "epoch": 0.06547141200606547, "grad_norm": 0.7896831631660461, "learning_rate": 0.0004986788632231401, "loss": 1.6182, "step": 367 }, { "epoch": 0.06564980822406564, "grad_norm": 0.8574457168579102, "learning_rate": 0.0004986716601500744, "loss": 1.6993, "step": 368 }, { "epoch": 0.06582820444206583, "grad_norm": 0.8115587830543518, "learning_rate": 0.0004986644375464434, "loss": 1.6287, "step": 369 }, { "epoch": 0.066006600660066, "grad_norm": 1.8400505781173706, "learning_rate": 0.0004986571954128145, "loss": 1.6949, "step": 370 }, { "epoch": 0.06618499687806619, "grad_norm": 0.8843457698822021, "learning_rate": 0.0004986499337497565, "loss": 1.3939, "step": 371 }, { "epoch": 0.06636339309606637, "grad_norm": 0.9792248010635376, "learning_rate": 0.0004986426525578398, "loss": 1.6735, "step": 372 }, { "epoch": 0.06654178931406654, "grad_norm": 0.8991268277168274, "learning_rate": 0.000498635351837636, "loss": 1.6638, "step": 373 }, { "epoch": 0.06672018553206673, "grad_norm": 1.0878491401672363, "learning_rate": 0.0004986280315897188, "loss": 1.3675, "step": 374 }, { "epoch": 0.0668985817500669, "grad_norm": 0.9121967554092407, "learning_rate": 0.0004986206918146629, "loss": 1.4673, "step": 375 }, { "epoch": 0.06707697796806708, "grad_norm": 0.9130116105079651, "learning_rate": 0.0004986133325130448, "loss": 1.8798, "step": 376 }, { "epoch": 0.06725537418606725, "grad_norm": 2.4044368267059326, "learning_rate": 0.0004986059536854427, "loss": 1.396, "step": 377 }, { "epoch": 0.06743377040406744, "grad_norm": 1.2057360410690308, "learning_rate": 0.0004985985553324359, "loss": 1.6874, "step": 378 }, { "epoch": 0.06761216662206761, "grad_norm": 0.9697250723838806, "learning_rate": 0.0004985911374546056, "loss": 1.6581, "step": 379 }, { "epoch": 0.06779056284006779, "grad_norm": 8.421307563781738, "learning_rate": 0.0004985837000525343, "loss": 1.4384, "step": 380 }, { "epoch": 0.06796895905806796, "grad_norm": 1.0877493619918823, "learning_rate": 0.0004985762431268062, "loss": 1.6454, "step": 381 }, { "epoch": 0.06814735527606815, "grad_norm": 0.8904185891151428, "learning_rate": 0.0004985687666780069, "loss": 1.611, "step": 382 }, { "epoch": 0.06832575149406833, "grad_norm": 0.9544228911399841, "learning_rate": 0.0004985612707067237, "loss": 1.3702, "step": 383 }, { "epoch": 0.0685041477120685, "grad_norm": 0.9072893261909485, "learning_rate": 0.0004985537552135451, "loss": 1.6441, "step": 384 }, { "epoch": 0.06868254393006869, "grad_norm": 0.8297262191772461, "learning_rate": 0.0004985462201990617, "loss": 1.7376, "step": 385 }, { "epoch": 0.06886094014806886, "grad_norm": 1.0085870027542114, "learning_rate": 0.000498538665663865, "loss": 1.5545, "step": 386 }, { "epoch": 0.06903933636606904, "grad_norm": 0.7521477341651917, "learning_rate": 0.0004985310916085485, "loss": 1.77, "step": 387 }, { "epoch": 0.06921773258406921, "grad_norm": 0.8971685767173767, "learning_rate": 0.000498523498033707, "loss": 1.6057, "step": 388 }, { "epoch": 0.0693961288020694, "grad_norm": 0.7471177577972412, "learning_rate": 0.000498515884939937, "loss": 1.6315, "step": 389 }, { "epoch": 0.06957452502006957, "grad_norm": 1.5577987432479858, "learning_rate": 0.0004985082523278363, "loss": 1.7456, "step": 390 }, { "epoch": 0.06975292123806975, "grad_norm": 0.7326628565788269, "learning_rate": 0.0004985006001980044, "loss": 1.8009, "step": 391 }, { "epoch": 0.06993131745606992, "grad_norm": 0.6636003851890564, "learning_rate": 0.0004984929285510423, "loss": 1.6494, "step": 392 }, { "epoch": 0.07010971367407011, "grad_norm": 0.7074577808380127, "learning_rate": 0.0004984852373875524, "loss": 1.5621, "step": 393 }, { "epoch": 0.0702881098920703, "grad_norm": 0.682092547416687, "learning_rate": 0.000498477526708139, "loss": 1.3208, "step": 394 }, { "epoch": 0.07046650611007046, "grad_norm": 0.692871630191803, "learning_rate": 0.0004984697965134076, "loss": 1.2526, "step": 395 }, { "epoch": 0.07064490232807065, "grad_norm": 0.6794890761375427, "learning_rate": 0.0004984620468039653, "loss": 1.772, "step": 396 }, { "epoch": 0.07082329854607082, "grad_norm": 0.9123995304107666, "learning_rate": 0.0004984542775804207, "loss": 1.6829, "step": 397 }, { "epoch": 0.071001694764071, "grad_norm": 0.9195268154144287, "learning_rate": 0.0004984464888433842, "loss": 1.5221, "step": 398 }, { "epoch": 0.07118009098207118, "grad_norm": 0.6694832444190979, "learning_rate": 0.0004984386805934672, "loss": 1.5026, "step": 399 }, { "epoch": 0.07135848720007136, "grad_norm": 0.7524397969245911, "learning_rate": 0.0004984308528312833, "loss": 1.6038, "step": 400 }, { "epoch": 0.07153688341807153, "grad_norm": 1.0407260656356812, "learning_rate": 0.000498423005557447, "loss": 1.9059, "step": 401 }, { "epoch": 0.07171527963607172, "grad_norm": 0.9182829260826111, "learning_rate": 0.0004984151387725748, "loss": 1.8141, "step": 402 }, { "epoch": 0.07189367585407189, "grad_norm": 0.67998206615448, "learning_rate": 0.0004984072524772845, "loss": 1.607, "step": 403 }, { "epoch": 0.07207207207207207, "grad_norm": 0.6888518333435059, "learning_rate": 0.0004983993466721955, "loss": 1.8157, "step": 404 }, { "epoch": 0.07225046829007226, "grad_norm": 0.9161537289619446, "learning_rate": 0.0004983914213579287, "loss": 1.28, "step": 405 }, { "epoch": 0.07242886450807243, "grad_norm": 0.8046262860298157, "learning_rate": 0.0004983834765351065, "loss": 1.7245, "step": 406 }, { "epoch": 0.07260726072607261, "grad_norm": 0.7060288786888123, "learning_rate": 0.000498375512204353, "loss": 1.3604, "step": 407 }, { "epoch": 0.07278565694407278, "grad_norm": 0.6991462707519531, "learning_rate": 0.0004983675283662936, "loss": 1.3504, "step": 408 }, { "epoch": 0.07296405316207297, "grad_norm": 0.7093139886856079, "learning_rate": 0.0004983595250215556, "loss": 1.3739, "step": 409 }, { "epoch": 0.07314244938007314, "grad_norm": 0.7139406800270081, "learning_rate": 0.0004983515021707672, "loss": 1.6803, "step": 410 }, { "epoch": 0.07332084559807332, "grad_norm": 0.6523078680038452, "learning_rate": 0.0004983434598145587, "loss": 1.5356, "step": 411 }, { "epoch": 0.0734992418160735, "grad_norm": 0.7209059596061707, "learning_rate": 0.0004983353979535617, "loss": 1.5146, "step": 412 }, { "epoch": 0.07367763803407368, "grad_norm": 0.8449965715408325, "learning_rate": 0.0004983273165884096, "loss": 1.5411, "step": 413 }, { "epoch": 0.07385603425207386, "grad_norm": 0.8013091683387756, "learning_rate": 0.0004983192157197368, "loss": 1.6204, "step": 414 }, { "epoch": 0.07403443047007403, "grad_norm": 0.7177372574806213, "learning_rate": 0.0004983110953481796, "loss": 1.3559, "step": 415 }, { "epoch": 0.07421282668807422, "grad_norm": 1.297348976135254, "learning_rate": 0.0004983029554743759, "loss": 1.3917, "step": 416 }, { "epoch": 0.07439122290607439, "grad_norm": 0.7415265440940857, "learning_rate": 0.0004982947960989649, "loss": 1.4611, "step": 417 }, { "epoch": 0.07456961912407457, "grad_norm": 0.7333908677101135, "learning_rate": 0.0004982866172225875, "loss": 1.5049, "step": 418 }, { "epoch": 0.07474801534207474, "grad_norm": 0.6998400092124939, "learning_rate": 0.000498278418845886, "loss": 1.5281, "step": 419 }, { "epoch": 0.07492641156007493, "grad_norm": 0.8220930099487305, "learning_rate": 0.0004982702009695044, "loss": 1.6659, "step": 420 }, { "epoch": 0.0751048077780751, "grad_norm": 0.6637287139892578, "learning_rate": 0.0004982619635940879, "loss": 1.5122, "step": 421 }, { "epoch": 0.07528320399607528, "grad_norm": 0.6980449557304382, "learning_rate": 0.0004982537067202837, "loss": 1.4508, "step": 422 }, { "epoch": 0.07546160021407546, "grad_norm": 0.7847539782524109, "learning_rate": 0.0004982454303487403, "loss": 1.5511, "step": 423 }, { "epoch": 0.07563999643207564, "grad_norm": 0.7317553758621216, "learning_rate": 0.0004982371344801074, "loss": 1.6737, "step": 424 }, { "epoch": 0.07581839265007582, "grad_norm": 0.816692054271698, "learning_rate": 0.000498228819115037, "loss": 1.6792, "step": 425 }, { "epoch": 0.075996788868076, "grad_norm": 0.6910188794136047, "learning_rate": 0.0004982204842541818, "loss": 1.6721, "step": 426 }, { "epoch": 0.07617518508607618, "grad_norm": 0.6814787983894348, "learning_rate": 0.0004982121298981967, "loss": 1.3176, "step": 427 }, { "epoch": 0.07635358130407635, "grad_norm": 1.1283695697784424, "learning_rate": 0.0004982037560477377, "loss": 1.6094, "step": 428 }, { "epoch": 0.07653197752207654, "grad_norm": 0.7835364937782288, "learning_rate": 0.0004981953627034625, "loss": 1.6079, "step": 429 }, { "epoch": 0.0767103737400767, "grad_norm": 0.9054422974586487, "learning_rate": 0.0004981869498660304, "loss": 1.5844, "step": 430 }, { "epoch": 0.07688876995807689, "grad_norm": 1.4797576665878296, "learning_rate": 0.000498178517536102, "loss": 1.6547, "step": 431 }, { "epoch": 0.07706716617607706, "grad_norm": 0.9180254340171814, "learning_rate": 0.0004981700657143396, "loss": 1.6352, "step": 432 }, { "epoch": 0.07724556239407725, "grad_norm": 0.703322172164917, "learning_rate": 0.0004981615944014071, "loss": 1.3254, "step": 433 }, { "epoch": 0.07742395861207742, "grad_norm": 0.794771134853363, "learning_rate": 0.0004981531035979697, "loss": 1.6643, "step": 434 }, { "epoch": 0.0776023548300776, "grad_norm": 0.7554542422294617, "learning_rate": 0.0004981445933046944, "loss": 1.393, "step": 435 }, { "epoch": 0.07778075104807779, "grad_norm": 0.8355424404144287, "learning_rate": 0.0004981360635222495, "loss": 1.7901, "step": 436 }, { "epoch": 0.07795914726607796, "grad_norm": 0.6845025420188904, "learning_rate": 0.0004981275142513049, "loss": 1.6268, "step": 437 }, { "epoch": 0.07813754348407814, "grad_norm": 0.7474300265312195, "learning_rate": 0.0004981189454925322, "loss": 1.2496, "step": 438 }, { "epoch": 0.07831593970207831, "grad_norm": 0.6945734620094299, "learning_rate": 0.0004981103572466042, "loss": 1.7841, "step": 439 }, { "epoch": 0.0784943359200785, "grad_norm": 0.6879515051841736, "learning_rate": 0.0004981017495141955, "loss": 1.4973, "step": 440 }, { "epoch": 0.07867273213807867, "grad_norm": 0.9907708168029785, "learning_rate": 0.0004980931222959823, "loss": 1.9778, "step": 441 }, { "epoch": 0.07885112835607885, "grad_norm": 1.6716070175170898, "learning_rate": 0.000498084475592642, "loss": 1.8797, "step": 442 }, { "epoch": 0.07902952457407902, "grad_norm": 0.8019148707389832, "learning_rate": 0.0004980758094048536, "loss": 1.4991, "step": 443 }, { "epoch": 0.07920792079207921, "grad_norm": 0.7047806978225708, "learning_rate": 0.000498067123733298, "loss": 1.4349, "step": 444 }, { "epoch": 0.07938631701007938, "grad_norm": 0.7296415567398071, "learning_rate": 0.0004980584185786573, "loss": 1.504, "step": 445 }, { "epoch": 0.07956471322807956, "grad_norm": 0.8090648651123047, "learning_rate": 0.0004980496939416151, "loss": 1.5402, "step": 446 }, { "epoch": 0.07974310944607975, "grad_norm": 0.7694994807243347, "learning_rate": 0.0004980409498228566, "loss": 1.6298, "step": 447 }, { "epoch": 0.07992150566407992, "grad_norm": 1.529492735862732, "learning_rate": 0.0004980321862230688, "loss": 1.4919, "step": 448 }, { "epoch": 0.0800999018820801, "grad_norm": 0.7055628299713135, "learning_rate": 0.0004980234031429397, "loss": 1.3756, "step": 449 }, { "epoch": 0.08027829810008028, "grad_norm": 0.7243545055389404, "learning_rate": 0.0004980146005831592, "loss": 1.4947, "step": 450 }, { "epoch": 0.08045669431808046, "grad_norm": 0.7006012797355652, "learning_rate": 0.0004980057785444189, "loss": 1.6987, "step": 451 }, { "epoch": 0.08063509053608063, "grad_norm": 0.7193490862846375, "learning_rate": 0.0004979969370274113, "loss": 1.6076, "step": 452 }, { "epoch": 0.08081348675408082, "grad_norm": 0.7039264440536499, "learning_rate": 0.0004979880760328312, "loss": 1.708, "step": 453 }, { "epoch": 0.08099188297208099, "grad_norm": 0.7600789666175842, "learning_rate": 0.0004979791955613741, "loss": 1.9332, "step": 454 }, { "epoch": 0.08117027919008117, "grad_norm": 4.260866165161133, "learning_rate": 0.0004979702956137378, "loss": 1.5614, "step": 455 }, { "epoch": 0.08134867540808134, "grad_norm": 0.7909694314002991, "learning_rate": 0.0004979613761906212, "loss": 1.5609, "step": 456 }, { "epoch": 0.08152707162608153, "grad_norm": 1.7039103507995605, "learning_rate": 0.0004979524372927248, "loss": 1.6149, "step": 457 }, { "epoch": 0.08170546784408171, "grad_norm": 0.7937911152839661, "learning_rate": 0.0004979434789207506, "loss": 1.4368, "step": 458 }, { "epoch": 0.08188386406208188, "grad_norm": 1.0153762102127075, "learning_rate": 0.0004979345010754024, "loss": 1.4232, "step": 459 }, { "epoch": 0.08206226028008207, "grad_norm": 0.7721362709999084, "learning_rate": 0.0004979255037573851, "loss": 1.6384, "step": 460 }, { "epoch": 0.08224065649808224, "grad_norm": 0.6385205388069153, "learning_rate": 0.0004979164869674055, "loss": 1.2598, "step": 461 }, { "epoch": 0.08241905271608242, "grad_norm": 0.7460764646530151, "learning_rate": 0.0004979074507061716, "loss": 1.4009, "step": 462 }, { "epoch": 0.08259744893408259, "grad_norm": 0.6863163113594055, "learning_rate": 0.0004978983949743932, "loss": 1.156, "step": 463 }, { "epoch": 0.08277584515208278, "grad_norm": 0.9043185710906982, "learning_rate": 0.0004978893197727817, "loss": 1.4024, "step": 464 }, { "epoch": 0.08295424137008295, "grad_norm": 0.8068817853927612, "learning_rate": 0.0004978802251020494, "loss": 1.5392, "step": 465 }, { "epoch": 0.08313263758808313, "grad_norm": 0.8445210456848145, "learning_rate": 0.0004978711109629112, "loss": 1.3355, "step": 466 }, { "epoch": 0.08331103380608332, "grad_norm": 0.9372073411941528, "learning_rate": 0.0004978619773560824, "loss": 1.5414, "step": 467 }, { "epoch": 0.08348943002408349, "grad_norm": 0.7277926206588745, "learning_rate": 0.0004978528242822807, "loss": 1.3025, "step": 468 }, { "epoch": 0.08366782624208367, "grad_norm": 0.8729844093322754, "learning_rate": 0.0004978436517422248, "loss": 1.5157, "step": 469 }, { "epoch": 0.08384622246008384, "grad_norm": 0.6946988105773926, "learning_rate": 0.0004978344597366351, "loss": 1.5723, "step": 470 }, { "epoch": 0.08402461867808403, "grad_norm": 0.6131045818328857, "learning_rate": 0.0004978252482662337, "loss": 1.3419, "step": 471 }, { "epoch": 0.0842030148960842, "grad_norm": 0.8643115758895874, "learning_rate": 0.0004978160173317438, "loss": 1.609, "step": 472 }, { "epoch": 0.08438141111408438, "grad_norm": 0.8250529170036316, "learning_rate": 0.0004978067669338906, "loss": 1.7085, "step": 473 }, { "epoch": 0.08455980733208456, "grad_norm": 0.8330903649330139, "learning_rate": 0.0004977974970734006, "loss": 1.5074, "step": 474 }, { "epoch": 0.08473820355008474, "grad_norm": 0.7635675072669983, "learning_rate": 0.0004977882077510018, "loss": 1.4812, "step": 475 }, { "epoch": 0.08491659976808491, "grad_norm": 1.1247259378433228, "learning_rate": 0.0004977788989674238, "loss": 1.4147, "step": 476 }, { "epoch": 0.0850949959860851, "grad_norm": 0.6841705441474915, "learning_rate": 0.0004977695707233977, "loss": 1.3934, "step": 477 }, { "epoch": 0.08527339220408528, "grad_norm": 0.7214035987854004, "learning_rate": 0.0004977602230196561, "loss": 1.3389, "step": 478 }, { "epoch": 0.08545178842208545, "grad_norm": 0.7651268243789673, "learning_rate": 0.0004977508558569332, "loss": 1.5814, "step": 479 }, { "epoch": 0.08563018464008564, "grad_norm": 0.7980949282646179, "learning_rate": 0.0004977414692359648, "loss": 1.5766, "step": 480 }, { "epoch": 0.0858085808580858, "grad_norm": 2.5435357093811035, "learning_rate": 0.0004977320631574879, "loss": 1.4437, "step": 481 }, { "epoch": 0.08598697707608599, "grad_norm": 0.8370219469070435, "learning_rate": 0.0004977226376222415, "loss": 1.3928, "step": 482 }, { "epoch": 0.08616537329408616, "grad_norm": 0.6488362550735474, "learning_rate": 0.0004977131926309656, "loss": 1.6902, "step": 483 }, { "epoch": 0.08634376951208635, "grad_norm": 0.7219479084014893, "learning_rate": 0.0004977037281844023, "loss": 1.5692, "step": 484 }, { "epoch": 0.08652216573008652, "grad_norm": 1.9494524002075195, "learning_rate": 0.0004976942442832946, "loss": 1.5357, "step": 485 }, { "epoch": 0.0867005619480867, "grad_norm": 0.8252092003822327, "learning_rate": 0.0004976847409283876, "loss": 1.566, "step": 486 }, { "epoch": 0.08687895816608687, "grad_norm": 0.624910295009613, "learning_rate": 0.0004976752181204277, "loss": 1.5799, "step": 487 }, { "epoch": 0.08705735438408706, "grad_norm": 0.8229494094848633, "learning_rate": 0.0004976656758601628, "loss": 1.3698, "step": 488 }, { "epoch": 0.08723575060208724, "grad_norm": 0.8363699316978455, "learning_rate": 0.0004976561141483421, "loss": 1.3137, "step": 489 }, { "epoch": 0.08741414682008741, "grad_norm": 0.665203869342804, "learning_rate": 0.0004976465329857169, "loss": 1.579, "step": 490 }, { "epoch": 0.0875925430380876, "grad_norm": 0.7317224144935608, "learning_rate": 0.0004976369323730396, "loss": 1.6404, "step": 491 }, { "epoch": 0.08777093925608777, "grad_norm": 0.7230332493782043, "learning_rate": 0.0004976273123110642, "loss": 1.5054, "step": 492 }, { "epoch": 0.08794933547408795, "grad_norm": 0.9114980697631836, "learning_rate": 0.0004976176728005462, "loss": 1.5787, "step": 493 }, { "epoch": 0.08812773169208812, "grad_norm": 0.7472654581069946, "learning_rate": 0.0004976080138422428, "loss": 1.6348, "step": 494 }, { "epoch": 0.08830612791008831, "grad_norm": 0.7824138402938843, "learning_rate": 0.0004975983354369124, "loss": 1.5032, "step": 495 }, { "epoch": 0.08848452412808848, "grad_norm": 0.6334506869316101, "learning_rate": 0.0004975886375853155, "loss": 1.3295, "step": 496 }, { "epoch": 0.08866292034608866, "grad_norm": 1.3684911727905273, "learning_rate": 0.0004975789202882133, "loss": 1.6436, "step": 497 }, { "epoch": 0.08884131656408883, "grad_norm": 0.7262062430381775, "learning_rate": 0.0004975691835463694, "loss": 1.4747, "step": 498 }, { "epoch": 0.08901971278208902, "grad_norm": 0.6823199987411499, "learning_rate": 0.0004975594273605484, "loss": 1.4783, "step": 499 }, { "epoch": 0.0891981090000892, "grad_norm": 0.5289890766143799, "learning_rate": 0.0004975496517315164, "loss": 1.1087, "step": 500 }, { "epoch": 0.08937650521808937, "grad_norm": 0.8217448592185974, "learning_rate": 0.0004975398566600413, "loss": 1.9442, "step": 501 }, { "epoch": 0.08955490143608956, "grad_norm": 0.6704925894737244, "learning_rate": 0.0004975300421468925, "loss": 1.6343, "step": 502 }, { "epoch": 0.08973329765408973, "grad_norm": 0.8133907914161682, "learning_rate": 0.0004975202081928405, "loss": 1.7413, "step": 503 }, { "epoch": 0.08991169387208992, "grad_norm": 0.6570431590080261, "learning_rate": 0.0004975103547986581, "loss": 1.4866, "step": 504 }, { "epoch": 0.09009009009009009, "grad_norm": 0.7188069820404053, "learning_rate": 0.0004975004819651188, "loss": 1.6916, "step": 505 }, { "epoch": 0.09026848630809027, "grad_norm": 1.0071742534637451, "learning_rate": 0.0004974905896929981, "loss": 1.458, "step": 506 }, { "epoch": 0.09044688252609044, "grad_norm": 0.7110443711280823, "learning_rate": 0.0004974806779830731, "loss": 1.4134, "step": 507 }, { "epoch": 0.09062527874409063, "grad_norm": 0.6911363005638123, "learning_rate": 0.0004974707468361221, "loss": 1.5558, "step": 508 }, { "epoch": 0.0908036749620908, "grad_norm": 0.632495641708374, "learning_rate": 0.0004974607962529252, "loss": 1.2942, "step": 509 }, { "epoch": 0.09098207118009098, "grad_norm": 0.7343394160270691, "learning_rate": 0.0004974508262342638, "loss": 1.5239, "step": 510 }, { "epoch": 0.09116046739809117, "grad_norm": 1.4106143712997437, "learning_rate": 0.000497440836780921, "loss": 1.3772, "step": 511 }, { "epoch": 0.09133886361609134, "grad_norm": 0.7864501476287842, "learning_rate": 0.0004974308278936813, "loss": 1.672, "step": 512 }, { "epoch": 0.09151725983409152, "grad_norm": 0.7116146087646484, "learning_rate": 0.000497420799573331, "loss": 1.3471, "step": 513 }, { "epoch": 0.09169565605209169, "grad_norm": 0.6454455256462097, "learning_rate": 0.0004974107518206575, "loss": 1.5374, "step": 514 }, { "epoch": 0.09187405227009188, "grad_norm": 1.5066065788269043, "learning_rate": 0.00049740068463645, "loss": 1.1575, "step": 515 }, { "epoch": 0.09205244848809205, "grad_norm": 0.8750366568565369, "learning_rate": 0.0004973905980214992, "loss": 1.342, "step": 516 }, { "epoch": 0.09223084470609223, "grad_norm": 1.1781761646270752, "learning_rate": 0.0004973804919765973, "loss": 1.5306, "step": 517 }, { "epoch": 0.0924092409240924, "grad_norm": 0.7154638767242432, "learning_rate": 0.0004973703665025381, "loss": 1.3626, "step": 518 }, { "epoch": 0.09258763714209259, "grad_norm": 0.8861932158470154, "learning_rate": 0.0004973602216001166, "loss": 1.4045, "step": 519 }, { "epoch": 0.09276603336009277, "grad_norm": 0.8264048099517822, "learning_rate": 0.0004973500572701299, "loss": 1.5786, "step": 520 }, { "epoch": 0.09294442957809294, "grad_norm": 0.6503618955612183, "learning_rate": 0.0004973398735133761, "loss": 1.2953, "step": 521 }, { "epoch": 0.09312282579609313, "grad_norm": 1.5494425296783447, "learning_rate": 0.000497329670330655, "loss": 1.7067, "step": 522 }, { "epoch": 0.0933012220140933, "grad_norm": 0.7740722298622131, "learning_rate": 0.0004973194477227681, "loss": 1.7424, "step": 523 }, { "epoch": 0.09347961823209348, "grad_norm": 0.6886745095252991, "learning_rate": 0.0004973092056905181, "loss": 1.4517, "step": 524 }, { "epoch": 0.09365801445009365, "grad_norm": 0.9685157537460327, "learning_rate": 0.0004972989442347097, "loss": 1.3515, "step": 525 }, { "epoch": 0.09383641066809384, "grad_norm": 0.6641685962677002, "learning_rate": 0.0004972886633561486, "loss": 1.4647, "step": 526 }, { "epoch": 0.09401480688609401, "grad_norm": 0.705767035484314, "learning_rate": 0.0004972783630556421, "loss": 1.6306, "step": 527 }, { "epoch": 0.0941932031040942, "grad_norm": 0.6863012909889221, "learning_rate": 0.0004972680433339995, "loss": 1.5197, "step": 528 }, { "epoch": 0.09437159932209437, "grad_norm": 3.830077886581421, "learning_rate": 0.0004972577041920312, "loss": 1.2183, "step": 529 }, { "epoch": 0.09454999554009455, "grad_norm": 0.8609939217567444, "learning_rate": 0.0004972473456305493, "loss": 1.5419, "step": 530 }, { "epoch": 0.09472839175809473, "grad_norm": 1.5111016035079956, "learning_rate": 0.0004972369676503671, "loss": 1.4507, "step": 531 }, { "epoch": 0.0949067879760949, "grad_norm": 0.8439610600471497, "learning_rate": 0.0004972265702523, "loss": 1.5155, "step": 532 }, { "epoch": 0.09508518419409509, "grad_norm": 0.9149817228317261, "learning_rate": 0.0004972161534371643, "loss": 1.727, "step": 533 }, { "epoch": 0.09526358041209526, "grad_norm": 1.0200440883636475, "learning_rate": 0.0004972057172057784, "loss": 1.6166, "step": 534 }, { "epoch": 0.09544197663009545, "grad_norm": 0.9794648885726929, "learning_rate": 0.0004971952615589619, "loss": 1.5486, "step": 535 }, { "epoch": 0.09562037284809562, "grad_norm": 1.2553775310516357, "learning_rate": 0.0004971847864975358, "loss": 1.5297, "step": 536 }, { "epoch": 0.0957987690660958, "grad_norm": 0.9116058349609375, "learning_rate": 0.0004971742920223231, "loss": 1.7309, "step": 537 }, { "epoch": 0.09597716528409597, "grad_norm": 1.0907950401306152, "learning_rate": 0.0004971637781341478, "loss": 1.4598, "step": 538 }, { "epoch": 0.09615556150209616, "grad_norm": 1.32576322555542, "learning_rate": 0.0004971532448338357, "loss": 1.3408, "step": 539 }, { "epoch": 0.09633395772009633, "grad_norm": 0.7169795036315918, "learning_rate": 0.0004971426921222141, "loss": 1.3837, "step": 540 }, { "epoch": 0.09651235393809651, "grad_norm": 1.4852778911590576, "learning_rate": 0.0004971321200001119, "loss": 1.3068, "step": 541 }, { "epoch": 0.0966907501560967, "grad_norm": 5.810755729675293, "learning_rate": 0.0004971215284683592, "loss": 1.3056, "step": 542 }, { "epoch": 0.09686914637409687, "grad_norm": 1.1541168689727783, "learning_rate": 0.0004971109175277882, "loss": 1.3768, "step": 543 }, { "epoch": 0.09704754259209705, "grad_norm": 0.8170933127403259, "learning_rate": 0.000497100287179232, "loss": 1.6132, "step": 544 }, { "epoch": 0.09722593881009722, "grad_norm": 0.8202213644981384, "learning_rate": 0.0004970896374235255, "loss": 1.7538, "step": 545 }, { "epoch": 0.09740433502809741, "grad_norm": 0.7768821120262146, "learning_rate": 0.0004970789682615052, "loss": 1.5705, "step": 546 }, { "epoch": 0.09758273124609758, "grad_norm": 1.4286799430847168, "learning_rate": 0.0004970682796940091, "loss": 1.5751, "step": 547 }, { "epoch": 0.09776112746409776, "grad_norm": 2.075110912322998, "learning_rate": 0.0004970575717218767, "loss": 1.5745, "step": 548 }, { "epoch": 0.09793952368209793, "grad_norm": 0.8903990387916565, "learning_rate": 0.0004970468443459489, "loss": 1.4809, "step": 549 }, { "epoch": 0.09811791990009812, "grad_norm": 0.8855850696563721, "learning_rate": 0.0004970360975670682, "loss": 1.7035, "step": 550 }, { "epoch": 0.09829631611809829, "grad_norm": 0.8159774541854858, "learning_rate": 0.0004970253313860787, "loss": 1.2233, "step": 551 }, { "epoch": 0.09847471233609847, "grad_norm": 2.709984302520752, "learning_rate": 0.0004970145458038261, "loss": 1.497, "step": 552 }, { "epoch": 0.09865310855409866, "grad_norm": 1.0269032716751099, "learning_rate": 0.0004970037408211573, "loss": 1.5952, "step": 553 }, { "epoch": 0.09883150477209883, "grad_norm": 1.1201132535934448, "learning_rate": 0.000496992916438921, "loss": 1.3275, "step": 554 }, { "epoch": 0.09900990099009901, "grad_norm": 0.8947777152061462, "learning_rate": 0.0004969820726579673, "loss": 1.4586, "step": 555 }, { "epoch": 0.09918829720809919, "grad_norm": 0.9649630784988403, "learning_rate": 0.0004969712094791479, "loss": 1.4422, "step": 556 }, { "epoch": 0.09936669342609937, "grad_norm": 0.9738620519638062, "learning_rate": 0.0004969603269033159, "loss": 1.4171, "step": 557 }, { "epoch": 0.09954508964409954, "grad_norm": 0.7529584169387817, "learning_rate": 0.0004969494249313262, "loss": 1.2664, "step": 558 }, { "epoch": 0.09972348586209973, "grad_norm": 0.8182567358016968, "learning_rate": 0.000496938503564035, "loss": 1.5923, "step": 559 }, { "epoch": 0.0999018820800999, "grad_norm": 0.7630960941314697, "learning_rate": 0.0004969275628023, "loss": 1.3102, "step": 560 }, { "epoch": 0.10008027829810008, "grad_norm": 0.7095026969909668, "learning_rate": 0.0004969166026469803, "loss": 1.3446, "step": 561 }, { "epoch": 0.10025867451610025, "grad_norm": 0.7493692636489868, "learning_rate": 0.0004969056230989371, "loss": 1.5735, "step": 562 }, { "epoch": 0.10043707073410044, "grad_norm": 0.6589444279670715, "learning_rate": 0.0004968946241590324, "loss": 1.2374, "step": 563 }, { "epoch": 0.10061546695210062, "grad_norm": 1.1761233806610107, "learning_rate": 0.0004968836058281301, "loss": 1.4974, "step": 564 }, { "epoch": 0.10079386317010079, "grad_norm": 0.6165998578071594, "learning_rate": 0.0004968725681070957, "loss": 1.1654, "step": 565 }, { "epoch": 0.10097225938810098, "grad_norm": 0.7280291318893433, "learning_rate": 0.0004968615109967961, "loss": 1.4225, "step": 566 }, { "epoch": 0.10115065560610115, "grad_norm": 1.2140183448791504, "learning_rate": 0.0004968504344980997, "loss": 1.6689, "step": 567 }, { "epoch": 0.10132905182410133, "grad_norm": 0.7183876037597656, "learning_rate": 0.0004968393386118763, "loss": 1.5669, "step": 568 }, { "epoch": 0.1015074480421015, "grad_norm": 0.6638524532318115, "learning_rate": 0.0004968282233389974, "loss": 1.1266, "step": 569 }, { "epoch": 0.10168584426010169, "grad_norm": 0.7190263271331787, "learning_rate": 0.0004968170886803362, "loss": 1.363, "step": 570 }, { "epoch": 0.10186424047810186, "grad_norm": 0.8314970135688782, "learning_rate": 0.000496805934636767, "loss": 1.6896, "step": 571 }, { "epoch": 0.10204263669610204, "grad_norm": 0.6682543158531189, "learning_rate": 0.0004967947612091659, "loss": 1.4487, "step": 572 }, { "epoch": 0.10222103291410223, "grad_norm": 0.806235134601593, "learning_rate": 0.0004967835683984105, "loss": 1.3844, "step": 573 }, { "epoch": 0.1023994291321024, "grad_norm": 0.7934510707855225, "learning_rate": 0.0004967723562053798, "loss": 1.5275, "step": 574 }, { "epoch": 0.10257782535010258, "grad_norm": 0.6725155115127563, "learning_rate": 0.0004967611246309544, "loss": 1.3457, "step": 575 }, { "epoch": 0.10275622156810275, "grad_norm": 1.1390475034713745, "learning_rate": 0.0004967498736760165, "loss": 1.5195, "step": 576 }, { "epoch": 0.10293461778610294, "grad_norm": 0.7463436126708984, "learning_rate": 0.0004967386033414498, "loss": 1.4778, "step": 577 }, { "epoch": 0.10311301400410311, "grad_norm": 0.6401159763336182, "learning_rate": 0.0004967273136281392, "loss": 1.0325, "step": 578 }, { "epoch": 0.1032914102221033, "grad_norm": 0.7498372793197632, "learning_rate": 0.0004967160045369716, "loss": 1.6773, "step": 579 }, { "epoch": 0.10346980644010347, "grad_norm": 0.6850476861000061, "learning_rate": 0.0004967046760688353, "loss": 1.4975, "step": 580 }, { "epoch": 0.10364820265810365, "grad_norm": 0.6645762324333191, "learning_rate": 0.0004966933282246198, "loss": 1.5224, "step": 581 }, { "epoch": 0.10382659887610382, "grad_norm": 0.9928523898124695, "learning_rate": 0.0004966819610052165, "loss": 1.2436, "step": 582 }, { "epoch": 0.104004995094104, "grad_norm": 2.335838794708252, "learning_rate": 0.0004966705744115182, "loss": 1.4003, "step": 583 }, { "epoch": 0.10418339131210419, "grad_norm": 0.7920882701873779, "learning_rate": 0.0004966591684444191, "loss": 1.3903, "step": 584 }, { "epoch": 0.10436178753010436, "grad_norm": 1.7378654479980469, "learning_rate": 0.000496647743104815, "loss": 1.5465, "step": 585 }, { "epoch": 0.10454018374810455, "grad_norm": 1.0265392065048218, "learning_rate": 0.0004966362983936033, "loss": 1.639, "step": 586 }, { "epoch": 0.10471857996610472, "grad_norm": 25.1031436920166, "learning_rate": 0.0004966248343116828, "loss": 1.8437, "step": 587 }, { "epoch": 0.1048969761841049, "grad_norm": 2.059586524963379, "learning_rate": 0.0004966133508599541, "loss": 1.4263, "step": 588 }, { "epoch": 0.10507537240210507, "grad_norm": 1.6019502878189087, "learning_rate": 0.0004966018480393188, "loss": 1.2302, "step": 589 }, { "epoch": 0.10525376862010526, "grad_norm": 7.920602798461914, "learning_rate": 0.0004965903258506806, "loss": 1.5145, "step": 590 }, { "epoch": 0.10543216483810543, "grad_norm": 8.575254440307617, "learning_rate": 0.0004965787842949443, "loss": 1.138, "step": 591 }, { "epoch": 0.10561056105610561, "grad_norm": 9.764083862304688, "learning_rate": 0.0004965672233730163, "loss": 1.8386, "step": 592 }, { "epoch": 0.10578895727410578, "grad_norm": 2.423666000366211, "learning_rate": 0.0004965556430858049, "loss": 1.2881, "step": 593 }, { "epoch": 0.10596735349210597, "grad_norm": 1.2473442554473877, "learning_rate": 0.0004965440434342191, "loss": 1.1922, "step": 594 }, { "epoch": 0.10614574971010615, "grad_norm": 0.9045082330703735, "learning_rate": 0.0004965324244191704, "loss": 1.3325, "step": 595 }, { "epoch": 0.10632414592810632, "grad_norm": 1.219525694847107, "learning_rate": 0.0004965207860415711, "loss": 1.6335, "step": 596 }, { "epoch": 0.10650254214610651, "grad_norm": 5.38252592086792, "learning_rate": 0.0004965091283023355, "loss": 1.425, "step": 597 }, { "epoch": 0.10668093836410668, "grad_norm": 2.2056992053985596, "learning_rate": 0.0004964974512023789, "loss": 1.5319, "step": 598 }, { "epoch": 0.10685933458210686, "grad_norm": 0.9872840642929077, "learning_rate": 0.0004964857547426186, "loss": 1.1613, "step": 599 }, { "epoch": 0.10703773080010703, "grad_norm": 1.287277102470398, "learning_rate": 0.0004964740389239732, "loss": 1.6958, "step": 600 }, { "epoch": 0.10721612701810722, "grad_norm": 1.0453811883926392, "learning_rate": 0.0004964623037473628, "loss": 1.7414, "step": 601 }, { "epoch": 0.10739452323610739, "grad_norm": 1.040404200553894, "learning_rate": 0.0004964505492137092, "loss": 1.482, "step": 602 }, { "epoch": 0.10757291945410757, "grad_norm": 0.5612449645996094, "learning_rate": 0.0004964387753239355, "loss": 1.3086, "step": 603 }, { "epoch": 0.10775131567210774, "grad_norm": 42.32016372680664, "learning_rate": 0.0004964269820789664, "loss": 1.3425, "step": 604 }, { "epoch": 0.10792971189010793, "grad_norm": 51.25515365600586, "learning_rate": 0.0004964151694797282, "loss": 1.6645, "step": 605 }, { "epoch": 0.10810810810810811, "grad_norm": 1.5014013051986694, "learning_rate": 0.0004964033375271486, "loss": 1.5083, "step": 606 }, { "epoch": 0.10828650432610828, "grad_norm": 2.11055588722229, "learning_rate": 0.0004963914862221569, "loss": 1.2624, "step": 607 }, { "epoch": 0.10846490054410847, "grad_norm": 1.610378623008728, "learning_rate": 0.000496379615565684, "loss": 1.5693, "step": 608 }, { "epoch": 0.10864329676210864, "grad_norm": 0.88837069272995, "learning_rate": 0.000496367725558662, "loss": 1.5379, "step": 609 }, { "epoch": 0.10882169298010883, "grad_norm": 1.831912875175476, "learning_rate": 0.000496355816202025, "loss": 1.3983, "step": 610 }, { "epoch": 0.109000089198109, "grad_norm": 0.7277733087539673, "learning_rate": 0.0004963438874967081, "loss": 1.5717, "step": 611 }, { "epoch": 0.10917848541610918, "grad_norm": 0.7317838072776794, "learning_rate": 0.0004963319394436483, "loss": 1.2432, "step": 612 }, { "epoch": 0.10935688163410935, "grad_norm": 5.225924491882324, "learning_rate": 0.000496319972043784, "loss": 1.3503, "step": 613 }, { "epoch": 0.10953527785210954, "grad_norm": 1.8268285989761353, "learning_rate": 0.0004963079852980551, "loss": 1.5332, "step": 614 }, { "epoch": 0.1097136740701097, "grad_norm": 0.9872994422912598, "learning_rate": 0.000496295979207403, "loss": 1.5261, "step": 615 }, { "epoch": 0.10989207028810989, "grad_norm": 8.904617309570312, "learning_rate": 0.0004962839537727706, "loss": 1.4517, "step": 616 }, { "epoch": 0.11007046650611008, "grad_norm": 1.2641127109527588, "learning_rate": 0.0004962719089951027, "loss": 1.6554, "step": 617 }, { "epoch": 0.11024886272411025, "grad_norm": 0.7095901370048523, "learning_rate": 0.0004962598448753448, "loss": 1.5814, "step": 618 }, { "epoch": 0.11042725894211043, "grad_norm": 0.8702186346054077, "learning_rate": 0.0004962477614144448, "loss": 1.2384, "step": 619 }, { "epoch": 0.1106056551601106, "grad_norm": 0.9688485860824585, "learning_rate": 0.0004962356586133515, "loss": 1.6793, "step": 620 }, { "epoch": 0.11078405137811079, "grad_norm": 0.9615238308906555, "learning_rate": 0.0004962235364730157, "loss": 1.4395, "step": 621 }, { "epoch": 0.11096244759611096, "grad_norm": 1.0574663877487183, "learning_rate": 0.0004962113949943891, "loss": 1.6388, "step": 622 }, { "epoch": 0.11114084381411114, "grad_norm": 0.8646270036697388, "learning_rate": 0.0004961992341784256, "loss": 1.4026, "step": 623 }, { "epoch": 0.11131924003211131, "grad_norm": 0.6639731526374817, "learning_rate": 0.0004961870540260801, "loss": 1.2448, "step": 624 }, { "epoch": 0.1114976362501115, "grad_norm": 0.8900001049041748, "learning_rate": 0.0004961748545383094, "loss": 1.66, "step": 625 }, { "epoch": 0.11167603246811168, "grad_norm": 1.1087528467178345, "learning_rate": 0.0004961626357160716, "loss": 1.3272, "step": 626 }, { "epoch": 0.11185442868611185, "grad_norm": 0.6758600473403931, "learning_rate": 0.0004961503975603262, "loss": 1.1703, "step": 627 }, { "epoch": 0.11203282490411204, "grad_norm": 0.7017180323600769, "learning_rate": 0.0004961381400720346, "loss": 1.6468, "step": 628 }, { "epoch": 0.11221122112211221, "grad_norm": 1.1828702688217163, "learning_rate": 0.0004961258632521595, "loss": 1.2218, "step": 629 }, { "epoch": 0.1123896173401124, "grad_norm": 0.7270997762680054, "learning_rate": 0.0004961135671016647, "loss": 1.435, "step": 630 }, { "epoch": 0.11256801355811256, "grad_norm": 0.6998748183250427, "learning_rate": 0.0004961012516215166, "loss": 1.4037, "step": 631 }, { "epoch": 0.11274640977611275, "grad_norm": 0.6797101497650146, "learning_rate": 0.0004960889168126819, "loss": 1.3327, "step": 632 }, { "epoch": 0.11292480599411292, "grad_norm": 0.7535882592201233, "learning_rate": 0.0004960765626761296, "loss": 1.2727, "step": 633 }, { "epoch": 0.1131032022121131, "grad_norm": 0.7744285464286804, "learning_rate": 0.00049606418921283, "loss": 1.3627, "step": 634 }, { "epoch": 0.11328159843011328, "grad_norm": 1.608994483947754, "learning_rate": 0.0004960517964237548, "loss": 1.4621, "step": 635 }, { "epoch": 0.11345999464811346, "grad_norm": 5.228842258453369, "learning_rate": 0.0004960393843098775, "loss": 1.2311, "step": 636 }, { "epoch": 0.11363839086611364, "grad_norm": 0.7488652467727661, "learning_rate": 0.0004960269528721728, "loss": 1.3578, "step": 637 }, { "epoch": 0.11381678708411382, "grad_norm": 0.6869979500770569, "learning_rate": 0.0004960145021116171, "loss": 1.4778, "step": 638 }, { "epoch": 0.113995183302114, "grad_norm": 0.8673625588417053, "learning_rate": 0.0004960020320291882, "loss": 1.5803, "step": 639 }, { "epoch": 0.11417357952011417, "grad_norm": 1.1181416511535645, "learning_rate": 0.0004959895426258656, "loss": 1.2287, "step": 640 }, { "epoch": 0.11435197573811436, "grad_norm": 1.0992931127548218, "learning_rate": 0.0004959770339026301, "loss": 1.5307, "step": 641 }, { "epoch": 0.11453037195611453, "grad_norm": 0.8505348563194275, "learning_rate": 0.0004959645058604644, "loss": 1.2554, "step": 642 }, { "epoch": 0.11470876817411471, "grad_norm": 1.2507725954055786, "learning_rate": 0.0004959519585003521, "loss": 1.3439, "step": 643 }, { "epoch": 0.11488716439211488, "grad_norm": 0.9388852119445801, "learning_rate": 0.0004959393918232789, "loss": 1.6679, "step": 644 }, { "epoch": 0.11506556061011507, "grad_norm": 0.9033030271530151, "learning_rate": 0.0004959268058302318, "loss": 1.5723, "step": 645 }, { "epoch": 0.11524395682811524, "grad_norm": 0.7030625939369202, "learning_rate": 0.0004959142005221991, "loss": 1.4426, "step": 646 }, { "epoch": 0.11542235304611542, "grad_norm": 0.7073930501937866, "learning_rate": 0.0004959015759001708, "loss": 1.3419, "step": 647 }, { "epoch": 0.11560074926411561, "grad_norm": 0.7212799191474915, "learning_rate": 0.0004958889319651386, "loss": 1.4213, "step": 648 }, { "epoch": 0.11577914548211578, "grad_norm": 0.7988331317901611, "learning_rate": 0.0004958762687180956, "loss": 1.5477, "step": 649 }, { "epoch": 0.11595754170011596, "grad_norm": 0.6533039212226868, "learning_rate": 0.0004958635861600362, "loss": 1.3518, "step": 650 }, { "epoch": 0.11613593791811613, "grad_norm": 1.167569637298584, "learning_rate": 0.0004958508842919565, "loss": 1.5068, "step": 651 }, { "epoch": 0.11631433413611632, "grad_norm": 0.8685398101806641, "learning_rate": 0.0004958381631148543, "loss": 1.5344, "step": 652 }, { "epoch": 0.11649273035411649, "grad_norm": 0.6582388281822205, "learning_rate": 0.0004958254226297284, "loss": 1.4228, "step": 653 }, { "epoch": 0.11667112657211667, "grad_norm": 0.6874244213104248, "learning_rate": 0.0004958126628375797, "loss": 1.3626, "step": 654 }, { "epoch": 0.11684952279011684, "grad_norm": 0.6385990977287292, "learning_rate": 0.0004957998837394102, "loss": 1.4008, "step": 655 }, { "epoch": 0.11702791900811703, "grad_norm": 0.7000042796134949, "learning_rate": 0.0004957870853362237, "loss": 1.4344, "step": 656 }, { "epoch": 0.1172063152261172, "grad_norm": 0.6392539143562317, "learning_rate": 0.0004957742676290251, "loss": 1.2199, "step": 657 }, { "epoch": 0.11738471144411738, "grad_norm": 0.6661501526832581, "learning_rate": 0.0004957614306188214, "loss": 1.5327, "step": 658 }, { "epoch": 0.11756310766211757, "grad_norm": 0.6941922903060913, "learning_rate": 0.0004957485743066207, "loss": 1.157, "step": 659 }, { "epoch": 0.11774150388011774, "grad_norm": 0.8714244365692139, "learning_rate": 0.0004957356986934326, "loss": 1.4848, "step": 660 }, { "epoch": 0.11791990009811792, "grad_norm": 0.5838067531585693, "learning_rate": 0.0004957228037802687, "loss": 1.3737, "step": 661 }, { "epoch": 0.1180982963161181, "grad_norm": 0.9917201399803162, "learning_rate": 0.0004957098895681414, "loss": 1.5201, "step": 662 }, { "epoch": 0.11827669253411828, "grad_norm": 5.74738883972168, "learning_rate": 0.0004956969560580651, "loss": 1.3064, "step": 663 }, { "epoch": 0.11845508875211845, "grad_norm": 0.6317707896232605, "learning_rate": 0.0004956840032510556, "loss": 1.1942, "step": 664 }, { "epoch": 0.11863348497011864, "grad_norm": 0.7513093948364258, "learning_rate": 0.0004956710311481302, "loss": 1.4782, "step": 665 }, { "epoch": 0.1188118811881188, "grad_norm": 0.8310014605522156, "learning_rate": 0.0004956580397503078, "loss": 1.5899, "step": 666 }, { "epoch": 0.11899027740611899, "grad_norm": 1.0249019861221313, "learning_rate": 0.0004956450290586087, "loss": 1.1852, "step": 667 }, { "epoch": 0.11916867362411916, "grad_norm": 1.868385910987854, "learning_rate": 0.0004956319990740547, "loss": 1.4115, "step": 668 }, { "epoch": 0.11934706984211935, "grad_norm": 0.7085257768630981, "learning_rate": 0.0004956189497976691, "loss": 1.578, "step": 669 }, { "epoch": 0.11952546606011953, "grad_norm": 0.607820451259613, "learning_rate": 0.0004956058812304769, "loss": 1.4083, "step": 670 }, { "epoch": 0.1197038622781197, "grad_norm": 0.6980863213539124, "learning_rate": 0.0004955927933735046, "loss": 1.2609, "step": 671 }, { "epoch": 0.11988225849611989, "grad_norm": 0.5971283316612244, "learning_rate": 0.0004955796862277799, "loss": 1.2083, "step": 672 }, { "epoch": 0.12006065471412006, "grad_norm": 0.641476571559906, "learning_rate": 0.0004955665597943323, "loss": 1.2241, "step": 673 }, { "epoch": 0.12023905093212024, "grad_norm": 0.7836436629295349, "learning_rate": 0.0004955534140741928, "loss": 1.511, "step": 674 }, { "epoch": 0.12041744715012041, "grad_norm": 0.716119110584259, "learning_rate": 0.0004955402490683939, "loss": 1.7792, "step": 675 }, { "epoch": 0.1205958433681206, "grad_norm": 0.7199398279190063, "learning_rate": 0.0004955270647779695, "loss": 1.3504, "step": 676 }, { "epoch": 0.12077423958612077, "grad_norm": 1.583276629447937, "learning_rate": 0.000495513861203955, "loss": 1.4377, "step": 677 }, { "epoch": 0.12095263580412095, "grad_norm": 0.5903739333152771, "learning_rate": 0.0004955006383473876, "loss": 1.2851, "step": 678 }, { "epoch": 0.12113103202212114, "grad_norm": 0.85763019323349, "learning_rate": 0.0004954873962093056, "loss": 1.6958, "step": 679 }, { "epoch": 0.12130942824012131, "grad_norm": 0.5933085083961487, "learning_rate": 0.0004954741347907492, "loss": 1.4475, "step": 680 }, { "epoch": 0.1214878244581215, "grad_norm": 0.6299702525138855, "learning_rate": 0.0004954608540927599, "loss": 1.5114, "step": 681 }, { "epoch": 0.12166622067612166, "grad_norm": 0.588097333908081, "learning_rate": 0.0004954475541163807, "loss": 1.2504, "step": 682 }, { "epoch": 0.12184461689412185, "grad_norm": 0.7014090418815613, "learning_rate": 0.0004954342348626562, "loss": 1.7175, "step": 683 }, { "epoch": 0.12202301311212202, "grad_norm": 0.6995732188224792, "learning_rate": 0.0004954208963326327, "loss": 1.4363, "step": 684 }, { "epoch": 0.1222014093301222, "grad_norm": 0.5883218050003052, "learning_rate": 0.0004954075385273574, "loss": 1.385, "step": 685 }, { "epoch": 0.12237980554812238, "grad_norm": 0.6858941912651062, "learning_rate": 0.0004953941614478797, "loss": 1.5209, "step": 686 }, { "epoch": 0.12255820176612256, "grad_norm": 2.024874687194824, "learning_rate": 0.0004953807650952502, "loss": 1.4826, "step": 687 }, { "epoch": 0.12273659798412273, "grad_norm": 0.6671762466430664, "learning_rate": 0.000495367349470521, "loss": 1.3409, "step": 688 }, { "epoch": 0.12291499420212292, "grad_norm": 0.6432123184204102, "learning_rate": 0.0004953539145747457, "loss": 1.3691, "step": 689 }, { "epoch": 0.1230933904201231, "grad_norm": 0.6529083251953125, "learning_rate": 0.0004953404604089796, "loss": 1.4347, "step": 690 }, { "epoch": 0.12327178663812327, "grad_norm": 0.6190119981765747, "learning_rate": 0.0004953269869742792, "loss": 1.2487, "step": 691 }, { "epoch": 0.12345018285612346, "grad_norm": 0.6484703421592712, "learning_rate": 0.000495313494271703, "loss": 1.5288, "step": 692 }, { "epoch": 0.12362857907412363, "grad_norm": 0.6164755821228027, "learning_rate": 0.0004952999823023104, "loss": 1.2645, "step": 693 }, { "epoch": 0.12380697529212381, "grad_norm": 0.6002447009086609, "learning_rate": 0.0004952864510671628, "loss": 1.2468, "step": 694 }, { "epoch": 0.12398537151012398, "grad_norm": 0.6057468056678772, "learning_rate": 0.0004952729005673229, "loss": 1.5349, "step": 695 }, { "epoch": 0.12416376772812417, "grad_norm": 0.5792039036750793, "learning_rate": 0.0004952593308038549, "loss": 1.1507, "step": 696 }, { "epoch": 0.12434216394612434, "grad_norm": 0.7286936044692993, "learning_rate": 0.0004952457417778247, "loss": 1.4709, "step": 697 }, { "epoch": 0.12452056016412452, "grad_norm": 0.6991254687309265, "learning_rate": 0.0004952321334902993, "loss": 1.4715, "step": 698 }, { "epoch": 0.12469895638212469, "grad_norm": 2.0999631881713867, "learning_rate": 0.0004952185059423478, "loss": 1.4724, "step": 699 }, { "epoch": 0.12487735260012488, "grad_norm": 0.7571597099304199, "learning_rate": 0.0004952048591350403, "loss": 1.4842, "step": 700 }, { "epoch": 0.12505574881812506, "grad_norm": 0.7109989523887634, "learning_rate": 0.0004951911930694487, "loss": 1.2872, "step": 701 }, { "epoch": 0.12523414503612523, "grad_norm": 3.125427484512329, "learning_rate": 0.0004951775077466463, "loss": 1.1666, "step": 702 }, { "epoch": 0.1254125412541254, "grad_norm": 0.9019857048988342, "learning_rate": 0.0004951638031677081, "loss": 1.3661, "step": 703 }, { "epoch": 0.1255909374721256, "grad_norm": 0.7541314363479614, "learning_rate": 0.00049515007933371, "loss": 1.2676, "step": 704 }, { "epoch": 0.12576933369012577, "grad_norm": 0.6455008387565613, "learning_rate": 0.0004951363362457304, "loss": 1.2095, "step": 705 }, { "epoch": 0.12594772990812594, "grad_norm": 0.7301307916641235, "learning_rate": 0.0004951225739048484, "loss": 1.5632, "step": 706 }, { "epoch": 0.12612612612612611, "grad_norm": 0.6640974879264832, "learning_rate": 0.0004951087923121449, "loss": 1.4562, "step": 707 }, { "epoch": 0.1263045223441263, "grad_norm": 0.6812661290168762, "learning_rate": 0.0004950949914687023, "loss": 1.2051, "step": 708 }, { "epoch": 0.12648291856212648, "grad_norm": 1.516214370727539, "learning_rate": 0.0004950811713756047, "loss": 1.3933, "step": 709 }, { "epoch": 0.12666131478012665, "grad_norm": 0.8335700631141663, "learning_rate": 0.0004950673320339372, "loss": 1.6518, "step": 710 }, { "epoch": 0.12683971099812685, "grad_norm": 0.6542342901229858, "learning_rate": 0.0004950534734447869, "loss": 1.3769, "step": 711 }, { "epoch": 0.12701810721612702, "grad_norm": 0.8510260581970215, "learning_rate": 0.0004950395956092423, "loss": 1.3511, "step": 712 }, { "epoch": 0.1271965034341272, "grad_norm": 2.36539626121521, "learning_rate": 0.0004950256985283934, "loss": 1.35, "step": 713 }, { "epoch": 0.12737489965212737, "grad_norm": 0.6594715118408203, "learning_rate": 0.0004950117822033315, "loss": 1.2426, "step": 714 }, { "epoch": 0.12755329587012756, "grad_norm": 0.619835615158081, "learning_rate": 0.0004949978466351495, "loss": 1.256, "step": 715 }, { "epoch": 0.12773169208812774, "grad_norm": 0.792833685874939, "learning_rate": 0.0004949838918249423, "loss": 1.4526, "step": 716 }, { "epoch": 0.1279100883061279, "grad_norm": 0.7402137517929077, "learning_rate": 0.0004949699177738056, "loss": 1.4895, "step": 717 }, { "epoch": 0.12808848452412808, "grad_norm": 0.7301040291786194, "learning_rate": 0.0004949559244828369, "loss": 1.435, "step": 718 }, { "epoch": 0.12826688074212828, "grad_norm": 0.6888275742530823, "learning_rate": 0.0004949419119531354, "loss": 1.4007, "step": 719 }, { "epoch": 0.12844527696012845, "grad_norm": 13.487852096557617, "learning_rate": 0.0004949278801858015, "loss": 1.2581, "step": 720 }, { "epoch": 0.12862367317812862, "grad_norm": 0.8741459846496582, "learning_rate": 0.0004949138291819372, "loss": 1.3751, "step": 721 }, { "epoch": 0.12880206939612882, "grad_norm": 14.084300994873047, "learning_rate": 0.0004948997589426463, "loss": 1.6692, "step": 722 }, { "epoch": 0.128980465614129, "grad_norm": 1.240329623222351, "learning_rate": 0.0004948856694690337, "loss": 1.6749, "step": 723 }, { "epoch": 0.12915886183212916, "grad_norm": 0.7830163240432739, "learning_rate": 0.000494871560762206, "loss": 1.5147, "step": 724 }, { "epoch": 0.12933725805012933, "grad_norm": 0.8917336463928223, "learning_rate": 0.0004948574328232713, "loss": 1.4268, "step": 725 }, { "epoch": 0.12951565426812953, "grad_norm": 0.8025479912757874, "learning_rate": 0.000494843285653339, "loss": 1.4808, "step": 726 }, { "epoch": 0.1296940504861297, "grad_norm": 0.8113172650337219, "learning_rate": 0.0004948291192535206, "loss": 1.6472, "step": 727 }, { "epoch": 0.12987244670412987, "grad_norm": 0.6620337963104248, "learning_rate": 0.0004948149336249286, "loss": 1.4085, "step": 728 }, { "epoch": 0.13005084292213004, "grad_norm": 1.0298174619674683, "learning_rate": 0.0004948007287686769, "loss": 1.2783, "step": 729 }, { "epoch": 0.13022923914013024, "grad_norm": 0.6767082810401917, "learning_rate": 0.0004947865046858814, "loss": 1.2708, "step": 730 }, { "epoch": 0.1304076353581304, "grad_norm": 0.7776159644126892, "learning_rate": 0.0004947722613776591, "loss": 1.1688, "step": 731 }, { "epoch": 0.13058603157613058, "grad_norm": 0.7061336636543274, "learning_rate": 0.0004947579988451288, "loss": 1.4495, "step": 732 }, { "epoch": 0.13076442779413078, "grad_norm": 0.5810830593109131, "learning_rate": 0.0004947437170894105, "loss": 1.0836, "step": 733 }, { "epoch": 0.13094282401213095, "grad_norm": 0.6294886469841003, "learning_rate": 0.000494729416111626, "loss": 1.3136, "step": 734 }, { "epoch": 0.13112122023013112, "grad_norm": 0.6610152721405029, "learning_rate": 0.0004947150959128986, "loss": 1.4622, "step": 735 }, { "epoch": 0.1312996164481313, "grad_norm": 0.6016314625740051, "learning_rate": 0.0004947007564943527, "loss": 1.1905, "step": 736 }, { "epoch": 0.1314780126661315, "grad_norm": 0.6377681493759155, "learning_rate": 0.0004946863978571148, "loss": 1.2777, "step": 737 }, { "epoch": 0.13165640888413166, "grad_norm": 0.6914573907852173, "learning_rate": 0.0004946720200023125, "loss": 1.4642, "step": 738 }, { "epoch": 0.13183480510213183, "grad_norm": 0.6614610552787781, "learning_rate": 0.000494657622931075, "loss": 1.4415, "step": 739 }, { "epoch": 0.132013201320132, "grad_norm": 1.029931902885437, "learning_rate": 0.0004946432066445331, "loss": 1.2224, "step": 740 }, { "epoch": 0.1321915975381322, "grad_norm": 0.7482788562774658, "learning_rate": 0.000494628771143819, "loss": 1.4412, "step": 741 }, { "epoch": 0.13236999375613237, "grad_norm": 0.9311326146125793, "learning_rate": 0.0004946143164300665, "loss": 1.5545, "step": 742 }, { "epoch": 0.13254838997413254, "grad_norm": 0.6425170302391052, "learning_rate": 0.0004945998425044109, "loss": 1.3134, "step": 743 }, { "epoch": 0.13272678619213274, "grad_norm": 0.674411952495575, "learning_rate": 0.0004945853493679889, "loss": 1.4689, "step": 744 }, { "epoch": 0.1329051824101329, "grad_norm": 0.6528111696243286, "learning_rate": 0.0004945708370219388, "loss": 1.3335, "step": 745 }, { "epoch": 0.13308357862813308, "grad_norm": 0.663161039352417, "learning_rate": 0.0004945563054674005, "loss": 1.6369, "step": 746 }, { "epoch": 0.13326197484613325, "grad_norm": 1.808029294013977, "learning_rate": 0.0004945417547055151, "loss": 1.2105, "step": 747 }, { "epoch": 0.13344037106413345, "grad_norm": 0.683954656124115, "learning_rate": 0.0004945271847374255, "loss": 1.4248, "step": 748 }, { "epoch": 0.13361876728213362, "grad_norm": 0.5650470852851868, "learning_rate": 0.0004945125955642761, "loss": 1.2412, "step": 749 }, { "epoch": 0.1337971635001338, "grad_norm": 0.7181941866874695, "learning_rate": 0.0004944979871872126, "loss": 1.5134, "step": 750 }, { "epoch": 0.13397555971813396, "grad_norm": 0.6759237051010132, "learning_rate": 0.0004944833596073825, "loss": 1.4109, "step": 751 }, { "epoch": 0.13415395593613416, "grad_norm": 1.112088918685913, "learning_rate": 0.0004944687128259345, "loss": 1.4521, "step": 752 }, { "epoch": 0.13433235215413433, "grad_norm": 0.8726287484169006, "learning_rate": 0.000494454046844019, "loss": 1.4233, "step": 753 }, { "epoch": 0.1345107483721345, "grad_norm": 0.61822509765625, "learning_rate": 0.0004944393616627879, "loss": 1.1642, "step": 754 }, { "epoch": 0.1346891445901347, "grad_norm": 0.7049110531806946, "learning_rate": 0.0004944246572833945, "loss": 1.3176, "step": 755 }, { "epoch": 0.13486754080813487, "grad_norm": 0.7006151676177979, "learning_rate": 0.0004944099337069936, "loss": 1.4621, "step": 756 }, { "epoch": 0.13504593702613504, "grad_norm": 0.907748818397522, "learning_rate": 0.0004943951909347419, "loss": 1.4472, "step": 757 }, { "epoch": 0.13522433324413521, "grad_norm": 1.7227107286453247, "learning_rate": 0.0004943804289677969, "loss": 1.381, "step": 758 }, { "epoch": 0.1354027294621354, "grad_norm": 0.8136081695556641, "learning_rate": 0.0004943656478073182, "loss": 1.2206, "step": 759 }, { "epoch": 0.13558112568013558, "grad_norm": 1.1220283508300781, "learning_rate": 0.0004943508474544667, "loss": 1.5002, "step": 760 }, { "epoch": 0.13575952189813575, "grad_norm": 2.818793296813965, "learning_rate": 0.0004943360279104047, "loss": 1.4188, "step": 761 }, { "epoch": 0.13593791811613593, "grad_norm": 7.106777191162109, "learning_rate": 0.0004943211891762964, "loss": 1.6358, "step": 762 }, { "epoch": 0.13611631433413612, "grad_norm": 1.419669270515442, "learning_rate": 0.0004943063312533069, "loss": 1.4433, "step": 763 }, { "epoch": 0.1362947105521363, "grad_norm": 1.2436542510986328, "learning_rate": 0.0004942914541426033, "loss": 1.5007, "step": 764 }, { "epoch": 0.13647310677013647, "grad_norm": 1.268811821937561, "learning_rate": 0.0004942765578453541, "loss": 1.6141, "step": 765 }, { "epoch": 0.13665150298813666, "grad_norm": 1.0248541831970215, "learning_rate": 0.000494261642362729, "loss": 1.2407, "step": 766 }, { "epoch": 0.13682989920613683, "grad_norm": 0.8062700629234314, "learning_rate": 0.0004942467076958999, "loss": 1.2817, "step": 767 }, { "epoch": 0.137008295424137, "grad_norm": 1.0193575620651245, "learning_rate": 0.0004942317538460392, "loss": 1.2323, "step": 768 }, { "epoch": 0.13718669164213718, "grad_norm": 1.2625796794891357, "learning_rate": 0.0004942167808143218, "loss": 1.6804, "step": 769 }, { "epoch": 0.13736508786013737, "grad_norm": 0.7931640148162842, "learning_rate": 0.0004942017886019236, "loss": 1.2993, "step": 770 }, { "epoch": 0.13754348407813755, "grad_norm": 1.35048508644104, "learning_rate": 0.0004941867772100218, "loss": 1.4314, "step": 771 }, { "epoch": 0.13772188029613772, "grad_norm": 1.2043832540512085, "learning_rate": 0.0004941717466397957, "loss": 1.4401, "step": 772 }, { "epoch": 0.1379002765141379, "grad_norm": 0.8232206106185913, "learning_rate": 0.0004941566968924258, "loss": 1.4218, "step": 773 }, { "epoch": 0.13807867273213809, "grad_norm": 0.7783534526824951, "learning_rate": 0.0004941416279690939, "loss": 1.5267, "step": 774 }, { "epoch": 0.13825706895013826, "grad_norm": 0.6729874014854431, "learning_rate": 0.0004941265398709835, "loss": 1.368, "step": 775 }, { "epoch": 0.13843546516813843, "grad_norm": 0.7547962069511414, "learning_rate": 0.0004941114325992798, "loss": 1.3377, "step": 776 }, { "epoch": 0.13861386138613863, "grad_norm": 0.7145437002182007, "learning_rate": 0.0004940963061551693, "loss": 1.3902, "step": 777 }, { "epoch": 0.1387922576041388, "grad_norm": 0.6485962271690369, "learning_rate": 0.0004940811605398399, "loss": 1.0085, "step": 778 }, { "epoch": 0.13897065382213897, "grad_norm": 0.630803644657135, "learning_rate": 0.0004940659957544813, "loss": 1.4345, "step": 779 }, { "epoch": 0.13914905004013914, "grad_norm": 1.3144874572753906, "learning_rate": 0.0004940508118002842, "loss": 1.2102, "step": 780 }, { "epoch": 0.13932744625813934, "grad_norm": 0.6318569779396057, "learning_rate": 0.0004940356086784415, "loss": 1.1991, "step": 781 }, { "epoch": 0.1395058424761395, "grad_norm": 0.9692159295082092, "learning_rate": 0.0004940203863901472, "loss": 1.1838, "step": 782 }, { "epoch": 0.13968423869413968, "grad_norm": 0.6397761106491089, "learning_rate": 0.0004940051449365966, "loss": 1.301, "step": 783 }, { "epoch": 0.13986263491213985, "grad_norm": 0.7027068734169006, "learning_rate": 0.000493989884318987, "loss": 1.1906, "step": 784 }, { "epoch": 0.14004103113014005, "grad_norm": 0.5374085307121277, "learning_rate": 0.0004939746045385168, "loss": 1.2157, "step": 785 }, { "epoch": 0.14021942734814022, "grad_norm": 0.7548506855964661, "learning_rate": 0.0004939593055963863, "loss": 1.2354, "step": 786 }, { "epoch": 0.1403978235661404, "grad_norm": 0.8988766670227051, "learning_rate": 0.0004939439874937967, "loss": 1.2367, "step": 787 }, { "epoch": 0.1405762197841406, "grad_norm": 0.6512690186500549, "learning_rate": 0.0004939286502319515, "loss": 1.1598, "step": 788 }, { "epoch": 0.14075461600214076, "grad_norm": 0.664986789226532, "learning_rate": 0.0004939132938120551, "loss": 1.4823, "step": 789 }, { "epoch": 0.14093301222014093, "grad_norm": 0.6778481006622314, "learning_rate": 0.0004938979182353134, "loss": 1.2677, "step": 790 }, { "epoch": 0.1411114084381411, "grad_norm": 0.570472240447998, "learning_rate": 0.0004938825235029343, "loss": 0.8828, "step": 791 }, { "epoch": 0.1412898046561413, "grad_norm": 0.9883807897567749, "learning_rate": 0.0004938671096161267, "loss": 1.2965, "step": 792 }, { "epoch": 0.14146820087414147, "grad_norm": 0.7180954217910767, "learning_rate": 0.0004938516765761011, "loss": 1.4603, "step": 793 }, { "epoch": 0.14164659709214164, "grad_norm": 0.6821011900901794, "learning_rate": 0.00049383622438407, "loss": 1.6537, "step": 794 }, { "epoch": 0.1418249933101418, "grad_norm": 0.6514670848846436, "learning_rate": 0.0004938207530412467, "loss": 1.1712, "step": 795 }, { "epoch": 0.142003389528142, "grad_norm": 0.5831206440925598, "learning_rate": 0.0004938052625488464, "loss": 1.2547, "step": 796 }, { "epoch": 0.14218178574614218, "grad_norm": 0.6372058987617493, "learning_rate": 0.0004937897529080856, "loss": 1.3056, "step": 797 }, { "epoch": 0.14236018196414235, "grad_norm": 0.6786195635795593, "learning_rate": 0.0004937742241201826, "loss": 1.5176, "step": 798 }, { "epoch": 0.14253857818214255, "grad_norm": 0.6339353322982788, "learning_rate": 0.000493758676186357, "loss": 1.2932, "step": 799 }, { "epoch": 0.14271697440014272, "grad_norm": 0.6407662630081177, "learning_rate": 0.0004937431091078297, "loss": 1.2913, "step": 800 }, { "epoch": 0.1428953706181429, "grad_norm": 0.7081295251846313, "learning_rate": 0.0004937275228858235, "loss": 1.2895, "step": 801 }, { "epoch": 0.14307376683614306, "grad_norm": 0.5862438678741455, "learning_rate": 0.0004937119175215627, "loss": 1.2476, "step": 802 }, { "epoch": 0.14325216305414326, "grad_norm": 0.6276576519012451, "learning_rate": 0.0004936962930162728, "loss": 1.2236, "step": 803 }, { "epoch": 0.14343055927214343, "grad_norm": 0.6015689969062805, "learning_rate": 0.0004936806493711808, "loss": 1.2797, "step": 804 }, { "epoch": 0.1436089554901436, "grad_norm": 1.0078672170639038, "learning_rate": 0.0004936649865875155, "loss": 1.5178, "step": 805 }, { "epoch": 0.14378735170814377, "grad_norm": 0.6235846281051636, "learning_rate": 0.000493649304666507, "loss": 1.2727, "step": 806 }, { "epoch": 0.14396574792614397, "grad_norm": 0.6958335638046265, "learning_rate": 0.0004936336036093869, "loss": 1.4622, "step": 807 }, { "epoch": 0.14414414414414414, "grad_norm": 0.7218196392059326, "learning_rate": 0.0004936178834173884, "loss": 1.3002, "step": 808 }, { "epoch": 0.1443225403621443, "grad_norm": 0.5912010669708252, "learning_rate": 0.0004936021440917462, "loss": 1.406, "step": 809 }, { "epoch": 0.1445009365801445, "grad_norm": 1.7334743738174438, "learning_rate": 0.0004935863856336965, "loss": 1.2606, "step": 810 }, { "epoch": 0.14467933279814468, "grad_norm": 0.7477133274078369, "learning_rate": 0.000493570608044477, "loss": 1.3254, "step": 811 }, { "epoch": 0.14485772901614485, "grad_norm": 0.6125087141990662, "learning_rate": 0.0004935548113253266, "loss": 1.448, "step": 812 }, { "epoch": 0.14503612523414502, "grad_norm": 0.6391841173171997, "learning_rate": 0.0004935389954774861, "loss": 1.0645, "step": 813 }, { "epoch": 0.14521452145214522, "grad_norm": 0.6377097368240356, "learning_rate": 0.0004935231605021977, "loss": 1.2738, "step": 814 }, { "epoch": 0.1453929176701454, "grad_norm": 0.6253454685211182, "learning_rate": 0.0004935073064007052, "loss": 1.6608, "step": 815 }, { "epoch": 0.14557131388814556, "grad_norm": 0.7210360169410706, "learning_rate": 0.0004934914331742536, "loss": 1.5487, "step": 816 }, { "epoch": 0.14574971010614576, "grad_norm": 0.589590311050415, "learning_rate": 0.0004934755408240896, "loss": 1.1513, "step": 817 }, { "epoch": 0.14592810632414593, "grad_norm": 0.6044402718544006, "learning_rate": 0.0004934596293514614, "loss": 1.2131, "step": 818 }, { "epoch": 0.1461065025421461, "grad_norm": 0.6587312817573547, "learning_rate": 0.0004934436987576186, "loss": 1.5939, "step": 819 }, { "epoch": 0.14628489876014628, "grad_norm": 0.5897753238677979, "learning_rate": 0.0004934277490438126, "loss": 1.2628, "step": 820 }, { "epoch": 0.14646329497814647, "grad_norm": 0.5983254909515381, "learning_rate": 0.0004934117802112959, "loss": 1.1709, "step": 821 }, { "epoch": 0.14664169119614665, "grad_norm": 0.6504422426223755, "learning_rate": 0.0004933957922613227, "loss": 1.4531, "step": 822 }, { "epoch": 0.14682008741414682, "grad_norm": 0.5873280763626099, "learning_rate": 0.0004933797851951487, "loss": 1.1531, "step": 823 }, { "epoch": 0.146998483632147, "grad_norm": 2.0532479286193848, "learning_rate": 0.0004933637590140311, "loss": 1.4533, "step": 824 }, { "epoch": 0.14717687985014719, "grad_norm": 0.6756530404090881, "learning_rate": 0.0004933477137192287, "loss": 1.4504, "step": 825 }, { "epoch": 0.14735527606814736, "grad_norm": 0.5908591747283936, "learning_rate": 0.0004933316493120015, "loss": 1.1947, "step": 826 }, { "epoch": 0.14753367228614753, "grad_norm": 0.5556735396385193, "learning_rate": 0.0004933155657936112, "loss": 1.0962, "step": 827 }, { "epoch": 0.14771206850414773, "grad_norm": 0.5515411496162415, "learning_rate": 0.0004932994631653212, "loss": 1.1774, "step": 828 }, { "epoch": 0.1478904647221479, "grad_norm": 0.8183857202529907, "learning_rate": 0.000493283341428396, "loss": 1.3823, "step": 829 }, { "epoch": 0.14806886094014807, "grad_norm": 0.6181221008300781, "learning_rate": 0.0004932672005841019, "loss": 1.4347, "step": 830 }, { "epoch": 0.14824725715814824, "grad_norm": 0.632533848285675, "learning_rate": 0.0004932510406337065, "loss": 1.6746, "step": 831 }, { "epoch": 0.14842565337614844, "grad_norm": 0.679986834526062, "learning_rate": 0.0004932348615784791, "loss": 1.2888, "step": 832 }, { "epoch": 0.1486040495941486, "grad_norm": 0.6554014086723328, "learning_rate": 0.0004932186634196903, "loss": 1.4259, "step": 833 }, { "epoch": 0.14878244581214878, "grad_norm": 0.6169226169586182, "learning_rate": 0.0004932024461586124, "loss": 1.206, "step": 834 }, { "epoch": 0.14896084203014895, "grad_norm": 0.6929764747619629, "learning_rate": 0.000493186209796519, "loss": 1.2828, "step": 835 }, { "epoch": 0.14913923824814915, "grad_norm": 0.5675246119499207, "learning_rate": 0.0004931699543346854, "loss": 1.4443, "step": 836 }, { "epoch": 0.14931763446614932, "grad_norm": 0.6860451698303223, "learning_rate": 0.0004931536797743881, "loss": 1.3001, "step": 837 }, { "epoch": 0.1494960306841495, "grad_norm": 0.6203287243843079, "learning_rate": 0.0004931373861169055, "loss": 1.6086, "step": 838 }, { "epoch": 0.1496744269021497, "grad_norm": 0.5558333992958069, "learning_rate": 0.0004931210733635172, "loss": 1.2781, "step": 839 }, { "epoch": 0.14985282312014986, "grad_norm": 0.6401587128639221, "learning_rate": 0.0004931047415155044, "loss": 1.278, "step": 840 }, { "epoch": 0.15003121933815003, "grad_norm": 0.6404081583023071, "learning_rate": 0.00049308839057415, "loss": 1.2592, "step": 841 }, { "epoch": 0.1502096155561502, "grad_norm": 0.646500289440155, "learning_rate": 0.0004930720205407378, "loss": 1.4263, "step": 842 }, { "epoch": 0.1503880117741504, "grad_norm": 0.6502192616462708, "learning_rate": 0.0004930556314165538, "loss": 1.3815, "step": 843 }, { "epoch": 0.15056640799215057, "grad_norm": 0.7043633460998535, "learning_rate": 0.0004930392232028851, "loss": 1.3289, "step": 844 }, { "epoch": 0.15074480421015074, "grad_norm": 0.566808819770813, "learning_rate": 0.0004930227959010204, "loss": 1.226, "step": 845 }, { "epoch": 0.1509232004281509, "grad_norm": 0.5885351896286011, "learning_rate": 0.0004930063495122498, "loss": 1.0517, "step": 846 }, { "epoch": 0.1511015966461511, "grad_norm": 0.696162760257721, "learning_rate": 0.0004929898840378651, "loss": 1.4365, "step": 847 }, { "epoch": 0.15127999286415128, "grad_norm": 0.676964282989502, "learning_rate": 0.0004929733994791597, "loss": 1.5412, "step": 848 }, { "epoch": 0.15145838908215145, "grad_norm": 0.5642474293708801, "learning_rate": 0.0004929568958374278, "loss": 1.155, "step": 849 }, { "epoch": 0.15163678530015165, "grad_norm": 0.9656538367271423, "learning_rate": 0.0004929403731139659, "loss": 1.7266, "step": 850 }, { "epoch": 0.15181518151815182, "grad_norm": 0.6227328777313232, "learning_rate": 0.0004929238313100717, "loss": 1.4751, "step": 851 }, { "epoch": 0.151993577736152, "grad_norm": 0.6424437761306763, "learning_rate": 0.0004929072704270444, "loss": 1.4661, "step": 852 }, { "epoch": 0.15217197395415216, "grad_norm": 0.5338221192359924, "learning_rate": 0.0004928906904661845, "loss": 1.2984, "step": 853 }, { "epoch": 0.15235037017215236, "grad_norm": 0.594628632068634, "learning_rate": 0.0004928740914287944, "loss": 1.1771, "step": 854 }, { "epoch": 0.15252876639015253, "grad_norm": 1.8328267335891724, "learning_rate": 0.0004928574733161775, "loss": 1.241, "step": 855 }, { "epoch": 0.1527071626081527, "grad_norm": 4.153714656829834, "learning_rate": 0.0004928408361296393, "loss": 1.2908, "step": 856 }, { "epoch": 0.15288555882615287, "grad_norm": 0.6561400294303894, "learning_rate": 0.0004928241798704862, "loss": 1.1795, "step": 857 }, { "epoch": 0.15306395504415307, "grad_norm": 0.9632255434989929, "learning_rate": 0.0004928075045400267, "loss": 1.3548, "step": 858 }, { "epoch": 0.15324235126215324, "grad_norm": 0.6676768064498901, "learning_rate": 0.0004927908101395701, "loss": 1.3005, "step": 859 }, { "epoch": 0.1534207474801534, "grad_norm": 0.6931350827217102, "learning_rate": 0.0004927740966704278, "loss": 1.4234, "step": 860 }, { "epoch": 0.1535991436981536, "grad_norm": 0.582984983921051, "learning_rate": 0.0004927573641339125, "loss": 1.286, "step": 861 }, { "epoch": 0.15377753991615378, "grad_norm": 1.9343520402908325, "learning_rate": 0.0004927406125313382, "loss": 1.1929, "step": 862 }, { "epoch": 0.15395593613415395, "grad_norm": 0.8297525644302368, "learning_rate": 0.0004927238418640208, "loss": 1.3183, "step": 863 }, { "epoch": 0.15413433235215412, "grad_norm": 0.6805528998374939, "learning_rate": 0.0004927070521332772, "loss": 1.3282, "step": 864 }, { "epoch": 0.15431272857015432, "grad_norm": 0.8657769560813904, "learning_rate": 0.0004926902433404261, "loss": 1.253, "step": 865 }, { "epoch": 0.1544911247881545, "grad_norm": 0.7054359912872314, "learning_rate": 0.0004926734154867878, "loss": 1.2187, "step": 866 }, { "epoch": 0.15466952100615466, "grad_norm": 0.6124011874198914, "learning_rate": 0.0004926565685736839, "loss": 1.1525, "step": 867 }, { "epoch": 0.15484791722415484, "grad_norm": 0.6714728474617004, "learning_rate": 0.0004926397026024375, "loss": 1.1008, "step": 868 }, { "epoch": 0.15502631344215503, "grad_norm": 0.7281408905982971, "learning_rate": 0.0004926228175743733, "loss": 1.4181, "step": 869 }, { "epoch": 0.1552047096601552, "grad_norm": 0.6333009004592896, "learning_rate": 0.0004926059134908173, "loss": 1.3329, "step": 870 }, { "epoch": 0.15538310587815538, "grad_norm": 0.6089525818824768, "learning_rate": 0.0004925889903530973, "loss": 1.257, "step": 871 }, { "epoch": 0.15556150209615557, "grad_norm": 0.6068894267082214, "learning_rate": 0.0004925720481625426, "loss": 1.126, "step": 872 }, { "epoch": 0.15573989831415574, "grad_norm": 0.594664990901947, "learning_rate": 0.0004925550869204835, "loss": 1.3666, "step": 873 }, { "epoch": 0.15591829453215592, "grad_norm": 0.6410947442054749, "learning_rate": 0.0004925381066282522, "loss": 1.6397, "step": 874 }, { "epoch": 0.1560966907501561, "grad_norm": 0.6146419644355774, "learning_rate": 0.0004925211072871824, "loss": 1.4024, "step": 875 }, { "epoch": 0.15627508696815628, "grad_norm": 0.6187763810157776, "learning_rate": 0.0004925040888986091, "loss": 1.4961, "step": 876 }, { "epoch": 0.15645348318615646, "grad_norm": 0.6881486773490906, "learning_rate": 0.0004924870514638691, "loss": 1.5297, "step": 877 }, { "epoch": 0.15663187940415663, "grad_norm": 0.69078129529953, "learning_rate": 0.0004924699949843004, "loss": 1.2651, "step": 878 }, { "epoch": 0.1568102756221568, "grad_norm": 0.5774492621421814, "learning_rate": 0.0004924529194612428, "loss": 1.4284, "step": 879 }, { "epoch": 0.156988671840157, "grad_norm": 0.5534959435462952, "learning_rate": 0.0004924358248960372, "loss": 1.4157, "step": 880 }, { "epoch": 0.15716706805815717, "grad_norm": 0.5735335350036621, "learning_rate": 0.000492418711290026, "loss": 1.1534, "step": 881 }, { "epoch": 0.15734546427615734, "grad_norm": 0.5932490229606628, "learning_rate": 0.0004924015786445537, "loss": 1.3835, "step": 882 }, { "epoch": 0.15752386049415754, "grad_norm": 0.6592042446136475, "learning_rate": 0.0004923844269609657, "loss": 1.2475, "step": 883 }, { "epoch": 0.1577022567121577, "grad_norm": 0.6756531000137329, "learning_rate": 0.0004923672562406092, "loss": 1.5197, "step": 884 }, { "epoch": 0.15788065293015788, "grad_norm": 0.6795757412910461, "learning_rate": 0.0004923500664848326, "loss": 1.466, "step": 885 }, { "epoch": 0.15805904914815805, "grad_norm": 0.6750994920730591, "learning_rate": 0.0004923328576949862, "loss": 1.494, "step": 886 }, { "epoch": 0.15823744536615825, "grad_norm": 0.5709784030914307, "learning_rate": 0.0004923156298724213, "loss": 1.2837, "step": 887 }, { "epoch": 0.15841584158415842, "grad_norm": 0.6380211114883423, "learning_rate": 0.000492298383018491, "loss": 1.4147, "step": 888 }, { "epoch": 0.1585942378021586, "grad_norm": 0.6981920599937439, "learning_rate": 0.0004922811171345502, "loss": 1.297, "step": 889 }, { "epoch": 0.15877263402015876, "grad_norm": 0.6991307139396667, "learning_rate": 0.0004922638322219546, "loss": 1.445, "step": 890 }, { "epoch": 0.15895103023815896, "grad_norm": 0.6050926446914673, "learning_rate": 0.000492246528282062, "loss": 1.3701, "step": 891 }, { "epoch": 0.15912942645615913, "grad_norm": 0.6252057552337646, "learning_rate": 0.0004922292053162312, "loss": 1.1218, "step": 892 }, { "epoch": 0.1593078226741593, "grad_norm": 0.6167547702789307, "learning_rate": 0.0004922118633258228, "loss": 1.2547, "step": 893 }, { "epoch": 0.1594862188921595, "grad_norm": 0.5581344366073608, "learning_rate": 0.0004921945023121989, "loss": 1.2666, "step": 894 }, { "epoch": 0.15966461511015967, "grad_norm": 0.6345050930976868, "learning_rate": 0.0004921771222767231, "loss": 1.3042, "step": 895 }, { "epoch": 0.15984301132815984, "grad_norm": 0.6487632393836975, "learning_rate": 0.0004921597232207604, "loss": 1.3426, "step": 896 }, { "epoch": 0.16002140754616, "grad_norm": 0.6063089966773987, "learning_rate": 0.0004921423051456772, "loss": 1.2359, "step": 897 }, { "epoch": 0.1601998037641602, "grad_norm": 0.6432583928108215, "learning_rate": 0.0004921248680528414, "loss": 1.2015, "step": 898 }, { "epoch": 0.16037819998216038, "grad_norm": 0.6074740290641785, "learning_rate": 0.0004921074119436229, "loss": 1.0048, "step": 899 }, { "epoch": 0.16055659620016055, "grad_norm": 0.6494312882423401, "learning_rate": 0.0004920899368193923, "loss": 1.2296, "step": 900 }, { "epoch": 0.16073499241816072, "grad_norm": 0.6664918661117554, "learning_rate": 0.0004920724426815222, "loss": 1.4211, "step": 901 }, { "epoch": 0.16091338863616092, "grad_norm": 0.6054812073707581, "learning_rate": 0.0004920549295313867, "loss": 1.1764, "step": 902 }, { "epoch": 0.1610917848541611, "grad_norm": 0.5707181096076965, "learning_rate": 0.0004920373973703612, "loss": 1.3884, "step": 903 }, { "epoch": 0.16127018107216126, "grad_norm": 0.5909110307693481, "learning_rate": 0.0004920198461998227, "loss": 1.2596, "step": 904 }, { "epoch": 0.16144857729016146, "grad_norm": 0.592135488986969, "learning_rate": 0.0004920022760211496, "loss": 1.2927, "step": 905 }, { "epoch": 0.16162697350816163, "grad_norm": 0.6415541172027588, "learning_rate": 0.0004919846868357218, "loss": 1.3682, "step": 906 }, { "epoch": 0.1618053697261618, "grad_norm": 0.5409737229347229, "learning_rate": 0.0004919670786449208, "loss": 1.2196, "step": 907 }, { "epoch": 0.16198376594416197, "grad_norm": 25.429161071777344, "learning_rate": 0.0004919494514501298, "loss": 1.6991, "step": 908 }, { "epoch": 0.16216216216216217, "grad_norm": 4.827995777130127, "learning_rate": 0.0004919318052527328, "loss": 1.4752, "step": 909 }, { "epoch": 0.16234055838016234, "grad_norm": 0.8540498614311218, "learning_rate": 0.0004919141400541161, "loss": 1.6279, "step": 910 }, { "epoch": 0.1625189545981625, "grad_norm": 0.6418678760528564, "learning_rate": 0.0004918964558556668, "loss": 1.4733, "step": 911 }, { "epoch": 0.16269735081616268, "grad_norm": 0.7732519507408142, "learning_rate": 0.0004918787526587739, "loss": 1.2503, "step": 912 }, { "epoch": 0.16287574703416288, "grad_norm": 0.678031325340271, "learning_rate": 0.000491861030464828, "loss": 1.208, "step": 913 }, { "epoch": 0.16305414325216305, "grad_norm": 0.6401492357254028, "learning_rate": 0.0004918432892752208, "loss": 1.0655, "step": 914 }, { "epoch": 0.16323253947016322, "grad_norm": 0.904754102230072, "learning_rate": 0.0004918255290913457, "loss": 1.3065, "step": 915 }, { "epoch": 0.16341093568816342, "grad_norm": 0.6582340002059937, "learning_rate": 0.0004918077499145977, "loss": 1.3543, "step": 916 }, { "epoch": 0.1635893319061636, "grad_norm": 1.050347089767456, "learning_rate": 0.000491789951746373, "loss": 1.3799, "step": 917 }, { "epoch": 0.16376772812416376, "grad_norm": 6.393340587615967, "learning_rate": 0.0004917721345880696, "loss": 1.2498, "step": 918 }, { "epoch": 0.16394612434216393, "grad_norm": 4.43874454498291, "learning_rate": 0.0004917542984410867, "loss": 1.3987, "step": 919 }, { "epoch": 0.16412452056016413, "grad_norm": 2.1143059730529785, "learning_rate": 0.0004917364433068253, "loss": 1.4354, "step": 920 }, { "epoch": 0.1643029167781643, "grad_norm": 2.551389455795288, "learning_rate": 0.0004917185691866876, "loss": 1.4108, "step": 921 }, { "epoch": 0.16448131299616447, "grad_norm": 4.1894426345825195, "learning_rate": 0.0004917006760820776, "loss": 1.4468, "step": 922 }, { "epoch": 0.16465970921416467, "grad_norm": 0.6970653533935547, "learning_rate": 0.0004916827639944005, "loss": 1.3746, "step": 923 }, { "epoch": 0.16483810543216484, "grad_norm": 0.559752345085144, "learning_rate": 0.0004916648329250631, "loss": 1.1099, "step": 924 }, { "epoch": 0.16501650165016502, "grad_norm": 0.6053521037101746, "learning_rate": 0.0004916468828754737, "loss": 1.3007, "step": 925 }, { "epoch": 0.16519489786816519, "grad_norm": 0.5920356512069702, "learning_rate": 0.000491628913847042, "loss": 1.5636, "step": 926 }, { "epoch": 0.16537329408616538, "grad_norm": 0.5610060095787048, "learning_rate": 0.0004916109258411795, "loss": 1.0346, "step": 927 }, { "epoch": 0.16555169030416556, "grad_norm": 0.6758180260658264, "learning_rate": 0.0004915929188592989, "loss": 1.0975, "step": 928 }, { "epoch": 0.16573008652216573, "grad_norm": 0.6995829343795776, "learning_rate": 0.0004915748929028145, "loss": 1.574, "step": 929 }, { "epoch": 0.1659084827401659, "grad_norm": 0.6252689957618713, "learning_rate": 0.0004915568479731417, "loss": 1.3219, "step": 930 }, { "epoch": 0.1660868789581661, "grad_norm": 0.7008538842201233, "learning_rate": 0.0004915387840716982, "loss": 1.3542, "step": 931 }, { "epoch": 0.16626527517616627, "grad_norm": 0.6104601621627808, "learning_rate": 0.0004915207011999025, "loss": 1.2171, "step": 932 }, { "epoch": 0.16644367139416644, "grad_norm": 0.5563907623291016, "learning_rate": 0.0004915025993591748, "loss": 1.2237, "step": 933 }, { "epoch": 0.16662206761216664, "grad_norm": 0.7092035412788391, "learning_rate": 0.000491484478550937, "loss": 1.5328, "step": 934 }, { "epoch": 0.1668004638301668, "grad_norm": 0.5745419263839722, "learning_rate": 0.0004914663387766121, "loss": 1.2546, "step": 935 }, { "epoch": 0.16697886004816698, "grad_norm": 0.6714827418327332, "learning_rate": 0.0004914481800376248, "loss": 1.2575, "step": 936 }, { "epoch": 0.16715725626616715, "grad_norm": 0.6761769652366638, "learning_rate": 0.0004914300023354015, "loss": 1.4926, "step": 937 }, { "epoch": 0.16733565248416735, "grad_norm": 0.5255780220031738, "learning_rate": 0.0004914118056713695, "loss": 1.0914, "step": 938 }, { "epoch": 0.16751404870216752, "grad_norm": 0.5681695938110352, "learning_rate": 0.0004913935900469584, "loss": 1.1869, "step": 939 }, { "epoch": 0.1676924449201677, "grad_norm": 0.6561183929443359, "learning_rate": 0.0004913753554635985, "loss": 1.6089, "step": 940 }, { "epoch": 0.16787084113816786, "grad_norm": 0.6278382539749146, "learning_rate": 0.0004913571019227221, "loss": 1.3546, "step": 941 }, { "epoch": 0.16804923735616806, "grad_norm": 0.6552897095680237, "learning_rate": 0.0004913388294257627, "loss": 1.1408, "step": 942 }, { "epoch": 0.16822763357416823, "grad_norm": 0.7079983353614807, "learning_rate": 0.0004913205379741555, "loss": 1.704, "step": 943 }, { "epoch": 0.1684060297921684, "grad_norm": 0.8556866645812988, "learning_rate": 0.0004913022275693372, "loss": 1.3536, "step": 944 }, { "epoch": 0.1685844260101686, "grad_norm": 0.6923747062683105, "learning_rate": 0.0004912838982127456, "loss": 1.2432, "step": 945 }, { "epoch": 0.16876282222816877, "grad_norm": 0.6427940726280212, "learning_rate": 0.0004912655499058207, "loss": 1.0336, "step": 946 }, { "epoch": 0.16894121844616894, "grad_norm": 0.7308516502380371, "learning_rate": 0.0004912471826500032, "loss": 1.5323, "step": 947 }, { "epoch": 0.1691196146641691, "grad_norm": 0.6658211946487427, "learning_rate": 0.0004912287964467358, "loss": 1.3749, "step": 948 }, { "epoch": 0.1692980108821693, "grad_norm": 0.5936264395713806, "learning_rate": 0.0004912103912974626, "loss": 1.1564, "step": 949 }, { "epoch": 0.16947640710016948, "grad_norm": 0.6166514754295349, "learning_rate": 0.000491191967203629, "loss": 1.5516, "step": 950 }, { "epoch": 0.16965480331816965, "grad_norm": 0.6150327920913696, "learning_rate": 0.0004911735241666821, "loss": 1.4399, "step": 951 }, { "epoch": 0.16983319953616982, "grad_norm": 0.6488068699836731, "learning_rate": 0.0004911550621880704, "loss": 1.4561, "step": 952 }, { "epoch": 0.17001159575417002, "grad_norm": 0.565139889717102, "learning_rate": 0.0004911365812692439, "loss": 1.3605, "step": 953 }, { "epoch": 0.1701899919721702, "grad_norm": 0.5767154693603516, "learning_rate": 0.0004911180814116541, "loss": 1.2484, "step": 954 }, { "epoch": 0.17036838819017036, "grad_norm": 0.6060677766799927, "learning_rate": 0.0004910995626167539, "loss": 1.1449, "step": 955 }, { "epoch": 0.17054678440817056, "grad_norm": 0.5569210648536682, "learning_rate": 0.0004910810248859979, "loss": 1.2146, "step": 956 }, { "epoch": 0.17072518062617073, "grad_norm": 1.7405076026916504, "learning_rate": 0.0004910624682208418, "loss": 1.1172, "step": 957 }, { "epoch": 0.1709035768441709, "grad_norm": 0.6181856393814087, "learning_rate": 0.0004910438926227433, "loss": 1.3699, "step": 958 }, { "epoch": 0.17108197306217107, "grad_norm": 0.6462327837944031, "learning_rate": 0.0004910252980931611, "loss": 1.3112, "step": 959 }, { "epoch": 0.17126036928017127, "grad_norm": 0.5451391935348511, "learning_rate": 0.0004910066846335558, "loss": 1.1995, "step": 960 }, { "epoch": 0.17143876549817144, "grad_norm": 0.5842812061309814, "learning_rate": 0.0004909880522453891, "loss": 1.2324, "step": 961 }, { "epoch": 0.1716171617161716, "grad_norm": 0.56785649061203, "learning_rate": 0.0004909694009301247, "loss": 1.0771, "step": 962 }, { "epoch": 0.17179555793417178, "grad_norm": 0.6656644344329834, "learning_rate": 0.000490950730689227, "loss": 1.3911, "step": 963 }, { "epoch": 0.17197395415217198, "grad_norm": 0.6386826038360596, "learning_rate": 0.0004909320415241627, "loss": 1.1475, "step": 964 }, { "epoch": 0.17215235037017215, "grad_norm": 0.6195803284645081, "learning_rate": 0.0004909133334363996, "loss": 1.3983, "step": 965 }, { "epoch": 0.17233074658817232, "grad_norm": 0.6595250368118286, "learning_rate": 0.000490894606427407, "loss": 1.1298, "step": 966 }, { "epoch": 0.17250914280617252, "grad_norm": 0.5828666090965271, "learning_rate": 0.0004908758604986555, "loss": 1.0375, "step": 967 }, { "epoch": 0.1726875390241727, "grad_norm": 0.5903612375259399, "learning_rate": 0.0004908570956516176, "loss": 1.3681, "step": 968 }, { "epoch": 0.17286593524217286, "grad_norm": 0.587887167930603, "learning_rate": 0.0004908383118877672, "loss": 1.1854, "step": 969 }, { "epoch": 0.17304433146017303, "grad_norm": 0.5458610653877258, "learning_rate": 0.0004908195092085794, "loss": 1.3502, "step": 970 }, { "epoch": 0.17322272767817323, "grad_norm": 0.5689892768859863, "learning_rate": 0.0004908006876155309, "loss": 1.072, "step": 971 }, { "epoch": 0.1734011238961734, "grad_norm": 0.5631921887397766, "learning_rate": 0.0004907818471101, "loss": 1.1866, "step": 972 }, { "epoch": 0.17357952011417357, "grad_norm": 2.34973406791687, "learning_rate": 0.0004907629876937665, "loss": 1.3553, "step": 973 }, { "epoch": 0.17375791633217375, "grad_norm": 0.5664592385292053, "learning_rate": 0.0004907441093680115, "loss": 1.2058, "step": 974 }, { "epoch": 0.17393631255017394, "grad_norm": 0.6143642663955688, "learning_rate": 0.0004907252121343178, "loss": 1.4795, "step": 975 }, { "epoch": 0.17411470876817411, "grad_norm": 0.5856114625930786, "learning_rate": 0.0004907062959941695, "loss": 1.2277, "step": 976 }, { "epoch": 0.17429310498617429, "grad_norm": 0.5994637608528137, "learning_rate": 0.0004906873609490523, "loss": 1.3448, "step": 977 }, { "epoch": 0.17447150120417448, "grad_norm": 0.5852928161621094, "learning_rate": 0.0004906684070004534, "loss": 1.1422, "step": 978 }, { "epoch": 0.17464989742217465, "grad_norm": 1.4769107103347778, "learning_rate": 0.0004906494341498614, "loss": 1.351, "step": 979 }, { "epoch": 0.17482829364017483, "grad_norm": 0.6843876838684082, "learning_rate": 0.0004906304423987663, "loss": 1.1986, "step": 980 }, { "epoch": 0.175006689858175, "grad_norm": 0.7055404782295227, "learning_rate": 0.0004906114317486599, "loss": 1.3418, "step": 981 }, { "epoch": 0.1751850860761752, "grad_norm": 0.6515609622001648, "learning_rate": 0.000490592402201035, "loss": 1.011, "step": 982 }, { "epoch": 0.17536348229417537, "grad_norm": 0.6670236587524414, "learning_rate": 0.0004905733537573867, "loss": 1.2945, "step": 983 }, { "epoch": 0.17554187851217554, "grad_norm": 0.6488009691238403, "learning_rate": 0.0004905542864192104, "loss": 1.3415, "step": 984 }, { "epoch": 0.1757202747301757, "grad_norm": 1.1373000144958496, "learning_rate": 0.0004905352001880041, "loss": 1.562, "step": 985 }, { "epoch": 0.1758986709481759, "grad_norm": 0.5701956748962402, "learning_rate": 0.0004905160950652667, "loss": 1.0377, "step": 986 }, { "epoch": 0.17607706716617608, "grad_norm": 2.7552855014801025, "learning_rate": 0.0004904969710524987, "loss": 1.2074, "step": 987 }, { "epoch": 0.17625546338417625, "grad_norm": 0.6393254995346069, "learning_rate": 0.0004904778281512021, "loss": 1.1729, "step": 988 }, { "epoch": 0.17643385960217645, "grad_norm": 0.6372584104537964, "learning_rate": 0.0004904586663628804, "loss": 1.2313, "step": 989 }, { "epoch": 0.17661225582017662, "grad_norm": 0.6509175896644592, "learning_rate": 0.0004904394856890384, "loss": 1.3779, "step": 990 }, { "epoch": 0.1767906520381768, "grad_norm": 0.668499231338501, "learning_rate": 0.0004904202861311827, "loss": 1.3286, "step": 991 }, { "epoch": 0.17696904825617696, "grad_norm": 0.6670610308647156, "learning_rate": 0.0004904010676908213, "loss": 1.3467, "step": 992 }, { "epoch": 0.17714744447417716, "grad_norm": 0.6278952956199646, "learning_rate": 0.0004903818303694633, "loss": 1.2545, "step": 993 }, { "epoch": 0.17732584069217733, "grad_norm": 0.6193589568138123, "learning_rate": 0.0004903625741686199, "loss": 1.2678, "step": 994 }, { "epoch": 0.1775042369101775, "grad_norm": 0.6413539052009583, "learning_rate": 0.0004903432990898033, "loss": 1.5118, "step": 995 }, { "epoch": 0.17768263312817767, "grad_norm": 1.0503791570663452, "learning_rate": 0.0004903240051345276, "loss": 1.3905, "step": 996 }, { "epoch": 0.17786102934617787, "grad_norm": 0.5560838580131531, "learning_rate": 0.0004903046923043077, "loss": 0.9987, "step": 997 }, { "epoch": 0.17803942556417804, "grad_norm": 0.7070960402488708, "learning_rate": 0.0004902853606006609, "loss": 1.3521, "step": 998 }, { "epoch": 0.1782178217821782, "grad_norm": 0.7161464691162109, "learning_rate": 0.0004902660100251051, "loss": 1.1022, "step": 999 }, { "epoch": 0.1783962180001784, "grad_norm": 0.6511627435684204, "learning_rate": 0.0004902466405791604, "loss": 1.284, "step": 1000 }, { "epoch": 0.17857461421817858, "grad_norm": 0.6245794892311096, "learning_rate": 0.0004902272522643478, "loss": 1.1996, "step": 1001 }, { "epoch": 0.17875301043617875, "grad_norm": 0.6342217922210693, "learning_rate": 0.0004902078450821904, "loss": 1.2702, "step": 1002 }, { "epoch": 0.17893140665417892, "grad_norm": 0.5756664872169495, "learning_rate": 0.0004901884190342121, "loss": 1.2978, "step": 1003 }, { "epoch": 0.17910980287217912, "grad_norm": 0.653351902961731, "learning_rate": 0.0004901689741219386, "loss": 1.4452, "step": 1004 }, { "epoch": 0.1792881990901793, "grad_norm": 0.6309232115745544, "learning_rate": 0.0004901495103468974, "loss": 1.4723, "step": 1005 }, { "epoch": 0.17946659530817946, "grad_norm": 0.617276668548584, "learning_rate": 0.000490130027710617, "loss": 1.2532, "step": 1006 }, { "epoch": 0.17964499152617963, "grad_norm": 0.599210798740387, "learning_rate": 0.0004901105262146275, "loss": 1.2724, "step": 1007 }, { "epoch": 0.17982338774417983, "grad_norm": 0.9026014804840088, "learning_rate": 0.0004900910058604606, "loss": 1.2691, "step": 1008 }, { "epoch": 0.18000178396218, "grad_norm": 0.7611701488494873, "learning_rate": 0.0004900714666496494, "loss": 1.2221, "step": 1009 }, { "epoch": 0.18018018018018017, "grad_norm": 0.6050606966018677, "learning_rate": 0.0004900519085837285, "loss": 1.2423, "step": 1010 }, { "epoch": 0.18035857639818037, "grad_norm": 0.6327321529388428, "learning_rate": 0.0004900323316642341, "loss": 1.5898, "step": 1011 }, { "epoch": 0.18053697261618054, "grad_norm": 3.784043788909912, "learning_rate": 0.0004900127358927036, "loss": 1.3516, "step": 1012 }, { "epoch": 0.1807153688341807, "grad_norm": 1.5190318822860718, "learning_rate": 0.0004899931212706761, "loss": 1.2567, "step": 1013 }, { "epoch": 0.18089376505218088, "grad_norm": 0.7018091082572937, "learning_rate": 0.0004899734877996922, "loss": 1.3094, "step": 1014 }, { "epoch": 0.18107216127018108, "grad_norm": 1.1731371879577637, "learning_rate": 0.0004899538354812937, "loss": 1.3155, "step": 1015 }, { "epoch": 0.18125055748818125, "grad_norm": 0.6494358777999878, "learning_rate": 0.0004899341643170243, "loss": 1.2901, "step": 1016 }, { "epoch": 0.18142895370618142, "grad_norm": 0.6760775446891785, "learning_rate": 0.0004899144743084289, "loss": 1.3969, "step": 1017 }, { "epoch": 0.1816073499241816, "grad_norm": 0.6156429648399353, "learning_rate": 0.000489894765457054, "loss": 1.1901, "step": 1018 }, { "epoch": 0.1817857461421818, "grad_norm": 0.9414156079292297, "learning_rate": 0.0004898750377644474, "loss": 1.1387, "step": 1019 }, { "epoch": 0.18196414236018196, "grad_norm": 0.6034333109855652, "learning_rate": 0.0004898552912321586, "loss": 1.3314, "step": 1020 }, { "epoch": 0.18214253857818213, "grad_norm": 0.5967845916748047, "learning_rate": 0.0004898355258617383, "loss": 1.3435, "step": 1021 }, { "epoch": 0.18232093479618233, "grad_norm": 0.6214697957038879, "learning_rate": 0.000489815741654739, "loss": 1.1333, "step": 1022 }, { "epoch": 0.1824993310141825, "grad_norm": 0.7541596293449402, "learning_rate": 0.0004897959386127148, "loss": 1.1722, "step": 1023 }, { "epoch": 0.18267772723218267, "grad_norm": 0.6246985793113708, "learning_rate": 0.0004897761167372205, "loss": 1.6439, "step": 1024 }, { "epoch": 0.18285612345018284, "grad_norm": 2.4204161167144775, "learning_rate": 0.0004897562760298134, "loss": 1.442, "step": 1025 }, { "epoch": 0.18303451966818304, "grad_norm": 8.560677528381348, "learning_rate": 0.0004897364164920514, "loss": 1.6211, "step": 1026 }, { "epoch": 0.18321291588618321, "grad_norm": 0.8490382432937622, "learning_rate": 0.0004897165381254945, "loss": 1.2842, "step": 1027 }, { "epoch": 0.18339131210418338, "grad_norm": 0.6359208226203918, "learning_rate": 0.0004896966409317038, "loss": 1.1782, "step": 1028 }, { "epoch": 0.18356970832218358, "grad_norm": 0.6399941444396973, "learning_rate": 0.0004896767249122421, "loss": 1.1316, "step": 1029 }, { "epoch": 0.18374810454018375, "grad_norm": 0.680789589881897, "learning_rate": 0.0004896567900686736, "loss": 1.2123, "step": 1030 }, { "epoch": 0.18392650075818393, "grad_norm": 0.7316074371337891, "learning_rate": 0.0004896368364025639, "loss": 1.4407, "step": 1031 }, { "epoch": 0.1841048969761841, "grad_norm": 1.80794095993042, "learning_rate": 0.0004896168639154802, "loss": 1.3116, "step": 1032 }, { "epoch": 0.1842832931941843, "grad_norm": 1.869382619857788, "learning_rate": 0.000489596872608991, "loss": 1.3759, "step": 1033 }, { "epoch": 0.18446168941218447, "grad_norm": 0.9936087727546692, "learning_rate": 0.0004895768624846667, "loss": 1.4019, "step": 1034 }, { "epoch": 0.18464008563018464, "grad_norm": 1.2543047666549683, "learning_rate": 0.0004895568335440786, "loss": 1.2554, "step": 1035 }, { "epoch": 0.1848184818481848, "grad_norm": 1.1524609327316284, "learning_rate": 0.0004895367857888, "loss": 1.1582, "step": 1036 }, { "epoch": 0.184996878066185, "grad_norm": 0.8680905103683472, "learning_rate": 0.0004895167192204053, "loss": 1.4434, "step": 1037 }, { "epoch": 0.18517527428418518, "grad_norm": 0.7361879348754883, "learning_rate": 0.0004894966338404705, "loss": 1.3415, "step": 1038 }, { "epoch": 0.18535367050218535, "grad_norm": 0.6387485861778259, "learning_rate": 0.0004894765296505732, "loss": 1.2213, "step": 1039 }, { "epoch": 0.18553206672018555, "grad_norm": 2.19881534576416, "learning_rate": 0.0004894564066522923, "loss": 1.1374, "step": 1040 }, { "epoch": 0.18571046293818572, "grad_norm": 0.7040894627571106, "learning_rate": 0.0004894362648472082, "loss": 1.3711, "step": 1041 }, { "epoch": 0.1858888591561859, "grad_norm": 0.6495254039764404, "learning_rate": 0.000489416104236903, "loss": 1.106, "step": 1042 }, { "epoch": 0.18606725537418606, "grad_norm": 0.832444429397583, "learning_rate": 0.00048939592482296, "loss": 1.3915, "step": 1043 }, { "epoch": 0.18624565159218626, "grad_norm": 0.5508606433868408, "learning_rate": 0.000489375726606964, "loss": 0.9048, "step": 1044 }, { "epoch": 0.18642404781018643, "grad_norm": 0.6425449848175049, "learning_rate": 0.0004893555095905013, "loss": 1.3206, "step": 1045 }, { "epoch": 0.1866024440281866, "grad_norm": 0.5754274129867554, "learning_rate": 0.0004893352737751601, "loss": 1.2648, "step": 1046 }, { "epoch": 0.18678084024618677, "grad_norm": 0.5647794604301453, "learning_rate": 0.0004893150191625295, "loss": 1.171, "step": 1047 }, { "epoch": 0.18695923646418697, "grad_norm": 0.6045464277267456, "learning_rate": 0.0004892947457542002, "loss": 0.9138, "step": 1048 }, { "epoch": 0.18713763268218714, "grad_norm": 0.7438944578170776, "learning_rate": 0.0004892744535517646, "loss": 1.4838, "step": 1049 }, { "epoch": 0.1873160289001873, "grad_norm": 0.5550808310508728, "learning_rate": 0.0004892541425568163, "loss": 1.2546, "step": 1050 }, { "epoch": 0.1874944251181875, "grad_norm": 0.574200451374054, "learning_rate": 0.0004892338127709507, "loss": 1.2506, "step": 1051 }, { "epoch": 0.18767282133618768, "grad_norm": 1.666972041130066, "learning_rate": 0.0004892134641957642, "loss": 1.389, "step": 1052 }, { "epoch": 0.18785121755418785, "grad_norm": 0.6545609831809998, "learning_rate": 0.0004891930968328554, "loss": 1.2801, "step": 1053 }, { "epoch": 0.18802961377218802, "grad_norm": 1.3686062097549438, "learning_rate": 0.0004891727106838236, "loss": 1.2763, "step": 1054 }, { "epoch": 0.18820800999018822, "grad_norm": 1.248139500617981, "learning_rate": 0.0004891523057502701, "loss": 1.4209, "step": 1055 }, { "epoch": 0.1883864062081884, "grad_norm": 0.7310791611671448, "learning_rate": 0.0004891318820337974, "loss": 1.4907, "step": 1056 }, { "epoch": 0.18856480242618856, "grad_norm": 0.6431208848953247, "learning_rate": 0.0004891114395360096, "loss": 1.5441, "step": 1057 }, { "epoch": 0.18874319864418873, "grad_norm": 2.24438738822937, "learning_rate": 0.0004890909782585121, "loss": 1.2141, "step": 1058 }, { "epoch": 0.18892159486218893, "grad_norm": 4.204579830169678, "learning_rate": 0.0004890704982029122, "loss": 1.4053, "step": 1059 }, { "epoch": 0.1890999910801891, "grad_norm": 0.8106665015220642, "learning_rate": 0.0004890499993708182, "loss": 1.1461, "step": 1060 }, { "epoch": 0.18927838729818927, "grad_norm": 0.7268997430801392, "learning_rate": 0.0004890294817638401, "loss": 1.1508, "step": 1061 }, { "epoch": 0.18945678351618947, "grad_norm": 0.9513046741485596, "learning_rate": 0.0004890089453835894, "loss": 1.2753, "step": 1062 }, { "epoch": 0.18963517973418964, "grad_norm": 0.7538788318634033, "learning_rate": 0.000488988390231679, "loss": 1.088, "step": 1063 }, { "epoch": 0.1898135759521898, "grad_norm": 0.6763918399810791, "learning_rate": 0.0004889678163097233, "loss": 1.1825, "step": 1064 }, { "epoch": 0.18999197217018998, "grad_norm": 0.7881626486778259, "learning_rate": 0.0004889472236193381, "loss": 1.3298, "step": 1065 }, { "epoch": 0.19017036838819018, "grad_norm": 0.6589952111244202, "learning_rate": 0.0004889266121621407, "loss": 1.2218, "step": 1066 }, { "epoch": 0.19034876460619035, "grad_norm": 0.7166324853897095, "learning_rate": 0.0004889059819397501, "loss": 1.5962, "step": 1067 }, { "epoch": 0.19052716082419052, "grad_norm": 0.6386669874191284, "learning_rate": 0.0004888853329537865, "loss": 1.2342, "step": 1068 }, { "epoch": 0.1907055570421907, "grad_norm": 0.7325186729431152, "learning_rate": 0.0004888646652058716, "loss": 1.2617, "step": 1069 }, { "epoch": 0.1908839532601909, "grad_norm": 0.5728015899658203, "learning_rate": 0.0004888439786976287, "loss": 1.3414, "step": 1070 }, { "epoch": 0.19106234947819106, "grad_norm": 0.6659836769104004, "learning_rate": 0.0004888232734306825, "loss": 1.2854, "step": 1071 }, { "epoch": 0.19124074569619123, "grad_norm": 0.7242840528488159, "learning_rate": 0.0004888025494066592, "loss": 1.4503, "step": 1072 }, { "epoch": 0.19141914191419143, "grad_norm": 0.5824533700942993, "learning_rate": 0.0004887818066271864, "loss": 1.3502, "step": 1073 }, { "epoch": 0.1915975381321916, "grad_norm": 0.6332762241363525, "learning_rate": 0.0004887610450938932, "loss": 1.0815, "step": 1074 }, { "epoch": 0.19177593435019177, "grad_norm": 1.1359984874725342, "learning_rate": 0.0004887402648084104, "loss": 1.2975, "step": 1075 }, { "epoch": 0.19195433056819194, "grad_norm": 0.8233804106712341, "learning_rate": 0.0004887194657723699, "loss": 1.1015, "step": 1076 }, { "epoch": 0.19213272678619214, "grad_norm": 0.6143079996109009, "learning_rate": 0.0004886986479874052, "loss": 1.2998, "step": 1077 }, { "epoch": 0.1923111230041923, "grad_norm": 1.4513376951217651, "learning_rate": 0.0004886778114551515, "loss": 1.3469, "step": 1078 }, { "epoch": 0.19248951922219248, "grad_norm": 0.6782541275024414, "learning_rate": 0.0004886569561772452, "loss": 1.1655, "step": 1079 }, { "epoch": 0.19266791544019266, "grad_norm": 0.6781834959983826, "learning_rate": 0.0004886360821553242, "loss": 1.4877, "step": 1080 }, { "epoch": 0.19284631165819285, "grad_norm": 0.6629863977432251, "learning_rate": 0.000488615189391028, "loss": 1.5273, "step": 1081 }, { "epoch": 0.19302470787619302, "grad_norm": 0.5616167783737183, "learning_rate": 0.0004885942778859976, "loss": 1.1904, "step": 1082 }, { "epoch": 0.1932031040941932, "grad_norm": 0.5528833270072937, "learning_rate": 0.0004885733476418752, "loss": 0.9634, "step": 1083 }, { "epoch": 0.1933815003121934, "grad_norm": 0.7818634510040283, "learning_rate": 0.0004885523986603048, "loss": 1.3033, "step": 1084 }, { "epoch": 0.19355989653019356, "grad_norm": 0.6581936478614807, "learning_rate": 0.0004885314309429316, "loss": 1.4896, "step": 1085 }, { "epoch": 0.19373829274819374, "grad_norm": 0.652714192867279, "learning_rate": 0.0004885104444914026, "loss": 1.2453, "step": 1086 }, { "epoch": 0.1939166889661939, "grad_norm": 3.307530641555786, "learning_rate": 0.0004884894393073658, "loss": 1.173, "step": 1087 }, { "epoch": 0.1940950851841941, "grad_norm": 0.685991644859314, "learning_rate": 0.0004884684153924711, "loss": 0.9765, "step": 1088 }, { "epoch": 0.19427348140219428, "grad_norm": 0.678576648235321, "learning_rate": 0.0004884473727483697, "loss": 1.1766, "step": 1089 }, { "epoch": 0.19445187762019445, "grad_norm": 0.7492129802703857, "learning_rate": 0.0004884263113767143, "loss": 1.4711, "step": 1090 }, { "epoch": 0.19463027383819462, "grad_norm": 0.6048595309257507, "learning_rate": 0.0004884052312791588, "loss": 1.0355, "step": 1091 }, { "epoch": 0.19480867005619482, "grad_norm": 0.6212369203567505, "learning_rate": 0.0004883841324573592, "loss": 1.2895, "step": 1092 }, { "epoch": 0.194987066274195, "grad_norm": 0.5634220838546753, "learning_rate": 0.0004883630149129725, "loss": 0.9151, "step": 1093 }, { "epoch": 0.19516546249219516, "grad_norm": 0.6526708602905273, "learning_rate": 0.000488341878647657, "loss": 1.3422, "step": 1094 }, { "epoch": 0.19534385871019536, "grad_norm": 0.5777380466461182, "learning_rate": 0.000488320723663073, "loss": 1.3257, "step": 1095 }, { "epoch": 0.19552225492819553, "grad_norm": 0.9771785736083984, "learning_rate": 0.0004882995499608819, "loss": 1.4458, "step": 1096 }, { "epoch": 0.1957006511461957, "grad_norm": 0.942666232585907, "learning_rate": 0.00048827835754274674, "loss": 1.3359, "step": 1097 }, { "epoch": 0.19587904736419587, "grad_norm": 0.6071109175682068, "learning_rate": 0.00048825714641033185, "loss": 1.2979, "step": 1098 }, { "epoch": 0.19605744358219607, "grad_norm": 0.9285879135131836, "learning_rate": 0.0004882359165653033, "loss": 1.2042, "step": 1099 }, { "epoch": 0.19623583980019624, "grad_norm": 0.6125981211662292, "learning_rate": 0.00048821466800932835, "loss": 1.2767, "step": 1100 }, { "epoch": 0.1964142360181964, "grad_norm": 0.549712061882019, "learning_rate": 0.00048819340074407583, "loss": 1.0559, "step": 1101 }, { "epoch": 0.19659263223619658, "grad_norm": 0.6521591544151306, "learning_rate": 0.00048817211477121617, "loss": 1.0833, "step": 1102 }, { "epoch": 0.19677102845419678, "grad_norm": 0.6663747429847717, "learning_rate": 0.0004881508100924211, "loss": 1.4768, "step": 1103 }, { "epoch": 0.19694942467219695, "grad_norm": 1.3679718971252441, "learning_rate": 0.00048812948670936385, "loss": 1.43, "step": 1104 }, { "epoch": 0.19712782089019712, "grad_norm": 0.7124596238136292, "learning_rate": 0.0004881081446237192, "loss": 1.0491, "step": 1105 }, { "epoch": 0.19730621710819732, "grad_norm": 0.6204227209091187, "learning_rate": 0.0004880867838371634, "loss": 1.2674, "step": 1106 }, { "epoch": 0.1974846133261975, "grad_norm": 1.057226300239563, "learning_rate": 0.00048806540435137404, "loss": 1.2598, "step": 1107 }, { "epoch": 0.19766300954419766, "grad_norm": 0.7484321594238281, "learning_rate": 0.00048804400616803026, "loss": 1.2401, "step": 1108 }, { "epoch": 0.19784140576219783, "grad_norm": 12.229165077209473, "learning_rate": 0.0004880225892888126, "loss": 1.2016, "step": 1109 }, { "epoch": 0.19801980198019803, "grad_norm": 0.7973271608352661, "learning_rate": 0.0004880011537154033, "loss": 1.4108, "step": 1110 }, { "epoch": 0.1981981981981982, "grad_norm": 1.3274434804916382, "learning_rate": 0.0004879796994494858, "loss": 1.013, "step": 1111 }, { "epoch": 0.19837659441619837, "grad_norm": 0.8325060606002808, "learning_rate": 0.00048795822649274506, "loss": 1.3261, "step": 1112 }, { "epoch": 0.19855499063419854, "grad_norm": 0.9182140231132507, "learning_rate": 0.0004879367348468676, "loss": 1.1414, "step": 1113 }, { "epoch": 0.19873338685219874, "grad_norm": 0.7945131063461304, "learning_rate": 0.0004879152245135415, "loss": 1.3566, "step": 1114 }, { "epoch": 0.1989117830701989, "grad_norm": 0.7313281297683716, "learning_rate": 0.00048789369549445596, "loss": 1.2533, "step": 1115 }, { "epoch": 0.19909017928819908, "grad_norm": 0.7198520302772522, "learning_rate": 0.00048787214779130196, "loss": 1.4449, "step": 1116 }, { "epoch": 0.19926857550619928, "grad_norm": 0.6107659935951233, "learning_rate": 0.00048785058140577185, "loss": 1.1473, "step": 1117 }, { "epoch": 0.19944697172419945, "grad_norm": 0.6234009861946106, "learning_rate": 0.0004878289963395594, "loss": 1.0506, "step": 1118 }, { "epoch": 0.19962536794219962, "grad_norm": 0.6401305198669434, "learning_rate": 0.0004878073925943599, "loss": 1.2247, "step": 1119 }, { "epoch": 0.1998037641601998, "grad_norm": 0.6568447947502136, "learning_rate": 0.0004877857701718702, "loss": 1.2032, "step": 1120 }, { "epoch": 0.1999821603782, "grad_norm": 0.6193853616714478, "learning_rate": 0.0004877641290737884, "loss": 1.2807, "step": 1121 }, { "epoch": 0.20016055659620016, "grad_norm": 0.605709969997406, "learning_rate": 0.0004877424693018142, "loss": 1.3687, "step": 1122 }, { "epoch": 0.20033895281420033, "grad_norm": 0.5979405045509338, "learning_rate": 0.0004877207908576488, "loss": 1.1466, "step": 1123 }, { "epoch": 0.2005173490322005, "grad_norm": 0.5929616093635559, "learning_rate": 0.00048769909374299483, "loss": 1.3955, "step": 1124 }, { "epoch": 0.2006957452502007, "grad_norm": 0.6005091071128845, "learning_rate": 0.00048767737795955623, "loss": 1.468, "step": 1125 }, { "epoch": 0.20087414146820087, "grad_norm": 0.5641143918037415, "learning_rate": 0.0004876556435090387, "loss": 1.495, "step": 1126 }, { "epoch": 0.20105253768620104, "grad_norm": 0.6153658628463745, "learning_rate": 0.0004876338903931492, "loss": 1.1535, "step": 1127 }, { "epoch": 0.20123093390420124, "grad_norm": 0.5413753390312195, "learning_rate": 0.0004876121186135962, "loss": 1.002, "step": 1128 }, { "epoch": 0.2014093301222014, "grad_norm": 0.573294460773468, "learning_rate": 0.00048759032817208964, "loss": 1.3631, "step": 1129 }, { "epoch": 0.20158772634020158, "grad_norm": 0.569232702255249, "learning_rate": 0.00048756851907034093, "loss": 1.105, "step": 1130 }, { "epoch": 0.20176612255820175, "grad_norm": 0.6847321391105652, "learning_rate": 0.000487546691310063, "loss": 1.1282, "step": 1131 }, { "epoch": 0.20194451877620195, "grad_norm": 0.7410063743591309, "learning_rate": 0.00048752484489297023, "loss": 1.2903, "step": 1132 }, { "epoch": 0.20212291499420212, "grad_norm": 0.6502756476402283, "learning_rate": 0.0004875029798207783, "loss": 1.4029, "step": 1133 }, { "epoch": 0.2023013112122023, "grad_norm": 0.569385826587677, "learning_rate": 0.0004874810960952045, "loss": 1.2896, "step": 1134 }, { "epoch": 0.2024797074302025, "grad_norm": 0.5342479348182678, "learning_rate": 0.00048745919371796765, "loss": 1.1572, "step": 1135 }, { "epoch": 0.20265810364820266, "grad_norm": 0.750543475151062, "learning_rate": 0.0004874372726907879, "loss": 1.3087, "step": 1136 }, { "epoch": 0.20283649986620284, "grad_norm": 0.6126813292503357, "learning_rate": 0.00048741533301538685, "loss": 1.4071, "step": 1137 }, { "epoch": 0.203014896084203, "grad_norm": 0.648357093334198, "learning_rate": 0.00048739337469348785, "loss": 1.3212, "step": 1138 }, { "epoch": 0.2031932923022032, "grad_norm": 0.6305087208747864, "learning_rate": 0.00048737139772681525, "loss": 1.1886, "step": 1139 }, { "epoch": 0.20337168852020338, "grad_norm": 0.5908694863319397, "learning_rate": 0.0004873494021170953, "loss": 1.1184, "step": 1140 }, { "epoch": 0.20355008473820355, "grad_norm": 0.6326526999473572, "learning_rate": 0.0004873273878660555, "loss": 1.358, "step": 1141 }, { "epoch": 0.20372848095620372, "grad_norm": 0.9167863130569458, "learning_rate": 0.00048730535497542465, "loss": 1.3994, "step": 1142 }, { "epoch": 0.20390687717420392, "grad_norm": 0.5850123167037964, "learning_rate": 0.0004872833034469334, "loss": 1.4304, "step": 1143 }, { "epoch": 0.2040852733922041, "grad_norm": 0.5937606692314148, "learning_rate": 0.00048726123328231367, "loss": 1.1403, "step": 1144 }, { "epoch": 0.20426366961020426, "grad_norm": 0.6057407855987549, "learning_rate": 0.00048723914448329863, "loss": 1.5015, "step": 1145 }, { "epoch": 0.20444206582820446, "grad_norm": 0.5308231711387634, "learning_rate": 0.0004872170370516234, "loss": 1.2105, "step": 1146 }, { "epoch": 0.20462046204620463, "grad_norm": 1.8873573541641235, "learning_rate": 0.0004871949109890241, "loss": 1.3811, "step": 1147 }, { "epoch": 0.2047988582642048, "grad_norm": 0.6975631713867188, "learning_rate": 0.0004871727662972386, "loss": 1.331, "step": 1148 }, { "epoch": 0.20497725448220497, "grad_norm": 4.755294322967529, "learning_rate": 0.00048715060297800606, "loss": 1.3582, "step": 1149 }, { "epoch": 0.20515565070020517, "grad_norm": 5.158052921295166, "learning_rate": 0.0004871284210330673, "loss": 1.445, "step": 1150 }, { "epoch": 0.20533404691820534, "grad_norm": 9.641525268554688, "learning_rate": 0.0004871062204641643, "loss": 1.25, "step": 1151 }, { "epoch": 0.2055124431362055, "grad_norm": 1.7820740938186646, "learning_rate": 0.00048708400127304085, "loss": 1.4857, "step": 1152 }, { "epoch": 0.20569083935420568, "grad_norm": 1.1601697206497192, "learning_rate": 0.0004870617634614419, "loss": 1.3956, "step": 1153 }, { "epoch": 0.20586923557220588, "grad_norm": 0.9471563100814819, "learning_rate": 0.0004870395070311141, "loss": 1.2032, "step": 1154 }, { "epoch": 0.20604763179020605, "grad_norm": 1.270116925239563, "learning_rate": 0.00048701723198380545, "loss": 1.5996, "step": 1155 }, { "epoch": 0.20622602800820622, "grad_norm": 1.2946140766143799, "learning_rate": 0.00048699493832126534, "loss": 1.2046, "step": 1156 }, { "epoch": 0.20640442422620642, "grad_norm": 0.6727069020271301, "learning_rate": 0.00048697262604524485, "loss": 1.1865, "step": 1157 }, { "epoch": 0.2065828204442066, "grad_norm": 0.7931843400001526, "learning_rate": 0.00048695029515749615, "loss": 1.2591, "step": 1158 }, { "epoch": 0.20676121666220676, "grad_norm": 0.7511464953422546, "learning_rate": 0.0004869279456597733, "loss": 1.2946, "step": 1159 }, { "epoch": 0.20693961288020693, "grad_norm": 0.6185610294342041, "learning_rate": 0.00048690557755383157, "loss": 1.0467, "step": 1160 }, { "epoch": 0.20711800909820713, "grad_norm": 1.2515671253204346, "learning_rate": 0.00048688319084142775, "loss": 1.3274, "step": 1161 }, { "epoch": 0.2072964053162073, "grad_norm": 66.89749908447266, "learning_rate": 0.00048686078552432, "loss": 1.4026, "step": 1162 }, { "epoch": 0.20747480153420747, "grad_norm": 1.6944903135299683, "learning_rate": 0.0004868383616042682, "loss": 1.2216, "step": 1163 }, { "epoch": 0.20765319775220764, "grad_norm": 0.8577490448951721, "learning_rate": 0.0004868159190830333, "loss": 1.1645, "step": 1164 }, { "epoch": 0.20783159397020784, "grad_norm": 0.8082415461540222, "learning_rate": 0.00048679345796237805, "loss": 1.3516, "step": 1165 }, { "epoch": 0.208009990188208, "grad_norm": 0.6959142088890076, "learning_rate": 0.00048677097824406646, "loss": 1.4433, "step": 1166 }, { "epoch": 0.20818838640620818, "grad_norm": 0.6914120316505432, "learning_rate": 0.0004867484799298642, "loss": 1.2154, "step": 1167 }, { "epoch": 0.20836678262420838, "grad_norm": 0.6086413860321045, "learning_rate": 0.00048672596302153814, "loss": 1.3274, "step": 1168 }, { "epoch": 0.20854517884220855, "grad_norm": 0.6582692861557007, "learning_rate": 0.0004867034275208569, "loss": 1.0341, "step": 1169 }, { "epoch": 0.20872357506020872, "grad_norm": 0.9762216210365295, "learning_rate": 0.0004866808734295903, "loss": 1.4215, "step": 1170 }, { "epoch": 0.2089019712782089, "grad_norm": 45.90314483642578, "learning_rate": 0.00048665830074950966, "loss": 1.5744, "step": 1171 }, { "epoch": 0.2090803674962091, "grad_norm": 2.054628372192383, "learning_rate": 0.00048663570948238806, "loss": 1.3446, "step": 1172 }, { "epoch": 0.20925876371420926, "grad_norm": 0.6909850239753723, "learning_rate": 0.00048661309962999956, "loss": 1.378, "step": 1173 }, { "epoch": 0.20943715993220943, "grad_norm": 0.8046495318412781, "learning_rate": 0.00048659047119412014, "loss": 1.2083, "step": 1174 }, { "epoch": 0.2096155561502096, "grad_norm": 0.6526099443435669, "learning_rate": 0.00048656782417652686, "loss": 1.3512, "step": 1175 }, { "epoch": 0.2097939523682098, "grad_norm": 0.6259192824363708, "learning_rate": 0.0004865451585789985, "loss": 1.0709, "step": 1176 }, { "epoch": 0.20997234858620997, "grad_norm": 0.5453342795372009, "learning_rate": 0.0004865224744033151, "loss": 1.0345, "step": 1177 }, { "epoch": 0.21015074480421014, "grad_norm": 0.6736587285995483, "learning_rate": 0.0004864997716512584, "loss": 1.1594, "step": 1178 }, { "epoch": 0.21032914102221034, "grad_norm": 0.6084941625595093, "learning_rate": 0.0004864770503246114, "loss": 1.1488, "step": 1179 }, { "epoch": 0.2105075372402105, "grad_norm": 0.6036438941955566, "learning_rate": 0.00048645431042515866, "loss": 1.2402, "step": 1180 }, { "epoch": 0.21068593345821068, "grad_norm": 0.6231617331504822, "learning_rate": 0.0004864315519546861, "loss": 0.8676, "step": 1181 }, { "epoch": 0.21086432967621085, "grad_norm": 0.6669137477874756, "learning_rate": 0.00048640877491498127, "loss": 1.3004, "step": 1182 }, { "epoch": 0.21104272589421105, "grad_norm": 0.6060863137245178, "learning_rate": 0.0004863859793078329, "loss": 1.2152, "step": 1183 }, { "epoch": 0.21122112211221122, "grad_norm": 0.5332959294319153, "learning_rate": 0.0004863631651350315, "loss": 1.0145, "step": 1184 }, { "epoch": 0.2113995183302114, "grad_norm": 0.5904478430747986, "learning_rate": 0.0004863403323983688, "loss": 1.2652, "step": 1185 }, { "epoch": 0.21157791454821157, "grad_norm": 0.6089318990707397, "learning_rate": 0.0004863174810996381, "loss": 1.2047, "step": 1186 }, { "epoch": 0.21175631076621176, "grad_norm": 0.5423917770385742, "learning_rate": 0.00048629461124063413, "loss": 1.0264, "step": 1187 }, { "epoch": 0.21193470698421193, "grad_norm": 0.5629234910011292, "learning_rate": 0.00048627172282315304, "loss": 1.1713, "step": 1188 }, { "epoch": 0.2121131032022121, "grad_norm": 0.7865850925445557, "learning_rate": 0.00048624881584899253, "loss": 1.3266, "step": 1189 }, { "epoch": 0.2122914994202123, "grad_norm": 0.5741007328033447, "learning_rate": 0.00048622589031995173, "loss": 1.0702, "step": 1190 }, { "epoch": 0.21246989563821247, "grad_norm": 0.7023731470108032, "learning_rate": 0.0004862029462378311, "loss": 1.2425, "step": 1191 }, { "epoch": 0.21264829185621265, "grad_norm": 0.6102539300918579, "learning_rate": 0.0004861799836044328, "loss": 1.3116, "step": 1192 }, { "epoch": 0.21282668807421282, "grad_norm": 0.6883063316345215, "learning_rate": 0.0004861570024215602, "loss": 1.4091, "step": 1193 }, { "epoch": 0.21300508429221301, "grad_norm": 0.5917825698852539, "learning_rate": 0.00048613400269101824, "loss": 1.3487, "step": 1194 }, { "epoch": 0.21318348051021319, "grad_norm": 0.5880262851715088, "learning_rate": 0.00048611098441461335, "loss": 1.2805, "step": 1195 }, { "epoch": 0.21336187672821336, "grad_norm": 0.633368730545044, "learning_rate": 0.00048608794759415333, "loss": 1.0161, "step": 1196 }, { "epoch": 0.21354027294621353, "grad_norm": 0.659824550151825, "learning_rate": 0.00048606489223144744, "loss": 1.2684, "step": 1197 }, { "epoch": 0.21371866916421373, "grad_norm": 5.748763561248779, "learning_rate": 0.0004860418183283066, "loss": 1.2505, "step": 1198 }, { "epoch": 0.2138970653822139, "grad_norm": 0.6809664368629456, "learning_rate": 0.00048601872588654283, "loss": 1.2108, "step": 1199 }, { "epoch": 0.21407546160021407, "grad_norm": 0.6129047870635986, "learning_rate": 0.00048599561490796995, "loss": 1.0522, "step": 1200 }, { "epoch": 0.21425385781821427, "grad_norm": 0.6308250427246094, "learning_rate": 0.0004859724853944031, "loss": 1.2155, "step": 1201 }, { "epoch": 0.21443225403621444, "grad_norm": 0.5974701642990112, "learning_rate": 0.00048594933734765866, "loss": 1.2953, "step": 1202 }, { "epoch": 0.2146106502542146, "grad_norm": 0.6131543517112732, "learning_rate": 0.00048592617076955493, "loss": 1.3646, "step": 1203 }, { "epoch": 0.21478904647221478, "grad_norm": 0.6104893684387207, "learning_rate": 0.00048590298566191116, "loss": 1.629, "step": 1204 }, { "epoch": 0.21496744269021498, "grad_norm": 0.8043147921562195, "learning_rate": 0.0004858797820265485, "loss": 1.2002, "step": 1205 }, { "epoch": 0.21514583890821515, "grad_norm": 0.61087965965271, "learning_rate": 0.0004858565598652892, "loss": 1.3669, "step": 1206 }, { "epoch": 0.21532423512621532, "grad_norm": 0.5664741396903992, "learning_rate": 0.0004858333191799572, "loss": 1.5041, "step": 1207 }, { "epoch": 0.2155026313442155, "grad_norm": 0.6045664548873901, "learning_rate": 0.0004858100599723778, "loss": 1.3528, "step": 1208 }, { "epoch": 0.2156810275622157, "grad_norm": 0.8556034564971924, "learning_rate": 0.00048578678224437777, "loss": 1.3253, "step": 1209 }, { "epoch": 0.21585942378021586, "grad_norm": 0.7069472670555115, "learning_rate": 0.0004857634859977854, "loss": 1.3242, "step": 1210 }, { "epoch": 0.21603781999821603, "grad_norm": 0.5635353922843933, "learning_rate": 0.00048574017123443025, "loss": 1.1976, "step": 1211 }, { "epoch": 0.21621621621621623, "grad_norm": 0.6107478141784668, "learning_rate": 0.00048571683795614346, "loss": 1.2678, "step": 1212 }, { "epoch": 0.2163946124342164, "grad_norm": 0.6950213313102722, "learning_rate": 0.0004856934861647577, "loss": 1.1423, "step": 1213 }, { "epoch": 0.21657300865221657, "grad_norm": 0.7915967106819153, "learning_rate": 0.00048567011586210697, "loss": 1.5536, "step": 1214 }, { "epoch": 0.21675140487021674, "grad_norm": 0.5662855505943298, "learning_rate": 0.00048564672705002663, "loss": 1.081, "step": 1215 }, { "epoch": 0.21692980108821694, "grad_norm": 0.7298009991645813, "learning_rate": 0.0004856233197303539, "loss": 1.328, "step": 1216 }, { "epoch": 0.2171081973062171, "grad_norm": 0.5519408583641052, "learning_rate": 0.000485599893904927, "loss": 1.2547, "step": 1217 }, { "epoch": 0.21728659352421728, "grad_norm": 0.6197732090950012, "learning_rate": 0.0004855764495755858, "loss": 1.2905, "step": 1218 }, { "epoch": 0.21746498974221745, "grad_norm": 0.6033538579940796, "learning_rate": 0.00048555298674417166, "loss": 1.1542, "step": 1219 }, { "epoch": 0.21764338596021765, "grad_norm": 0.7289498448371887, "learning_rate": 0.00048552950541252727, "loss": 1.4689, "step": 1220 }, { "epoch": 0.21782178217821782, "grad_norm": 0.5700170397758484, "learning_rate": 0.0004855060055824969, "loss": 0.9664, "step": 1221 }, { "epoch": 0.218000178396218, "grad_norm": 0.6593589782714844, "learning_rate": 0.00048548248725592617, "loss": 1.2289, "step": 1222 }, { "epoch": 0.2181785746142182, "grad_norm": 0.7251504063606262, "learning_rate": 0.00048545895043466226, "loss": 1.4661, "step": 1223 }, { "epoch": 0.21835697083221836, "grad_norm": 0.6131605505943298, "learning_rate": 0.00048543539512055367, "loss": 1.182, "step": 1224 }, { "epoch": 0.21853536705021853, "grad_norm": 0.6947142481803894, "learning_rate": 0.00048541182131545054, "loss": 1.5384, "step": 1225 }, { "epoch": 0.2187137632682187, "grad_norm": 0.6361053586006165, "learning_rate": 0.00048538822902120424, "loss": 1.4648, "step": 1226 }, { "epoch": 0.2188921594862189, "grad_norm": 0.5528272390365601, "learning_rate": 0.0004853646182396677, "loss": 1.1138, "step": 1227 }, { "epoch": 0.21907055570421907, "grad_norm": 0.6252143979072571, "learning_rate": 0.0004853409889726953, "loss": 1.402, "step": 1228 }, { "epoch": 0.21924895192221924, "grad_norm": 0.6612663865089417, "learning_rate": 0.000485317341222143, "loss": 1.3349, "step": 1229 }, { "epoch": 0.2194273481402194, "grad_norm": 0.5366940498352051, "learning_rate": 0.00048529367498986785, "loss": 1.0664, "step": 1230 }, { "epoch": 0.2196057443582196, "grad_norm": 1.6709811687469482, "learning_rate": 0.00048526999027772883, "loss": 1.0385, "step": 1231 }, { "epoch": 0.21978414057621978, "grad_norm": 0.737723708152771, "learning_rate": 0.000485246287087586, "loss": 1.3627, "step": 1232 }, { "epoch": 0.21996253679421995, "grad_norm": 0.6255607008934021, "learning_rate": 0.0004852225654213009, "loss": 1.1994, "step": 1233 }, { "epoch": 0.22014093301222015, "grad_norm": 0.6627714037895203, "learning_rate": 0.00048519882528073687, "loss": 1.3014, "step": 1234 }, { "epoch": 0.22031932923022032, "grad_norm": 0.5637165307998657, "learning_rate": 0.00048517506666775835, "loss": 1.1434, "step": 1235 }, { "epoch": 0.2204977254482205, "grad_norm": 0.8759331703186035, "learning_rate": 0.00048515128958423123, "loss": 1.3106, "step": 1236 }, { "epoch": 0.22067612166622066, "grad_norm": 0.5403386950492859, "learning_rate": 0.000485127494032023, "loss": 1.0078, "step": 1237 }, { "epoch": 0.22085451788422086, "grad_norm": 0.628709614276886, "learning_rate": 0.0004851036800130026, "loss": 1.1152, "step": 1238 }, { "epoch": 0.22103291410222103, "grad_norm": 0.7338658571243286, "learning_rate": 0.00048507984752904035, "loss": 1.4054, "step": 1239 }, { "epoch": 0.2212113103202212, "grad_norm": 0.6589036583900452, "learning_rate": 0.000485055996582008, "loss": 1.252, "step": 1240 }, { "epoch": 0.2213897065382214, "grad_norm": 0.672472357749939, "learning_rate": 0.0004850321271737789, "loss": 1.4335, "step": 1241 }, { "epoch": 0.22156810275622157, "grad_norm": 0.6786160469055176, "learning_rate": 0.0004850082393062276, "loss": 1.3884, "step": 1242 }, { "epoch": 0.22174649897422175, "grad_norm": 1.999408483505249, "learning_rate": 0.00048498433298123036, "loss": 1.2055, "step": 1243 }, { "epoch": 0.22192489519222192, "grad_norm": 0.7847235202789307, "learning_rate": 0.00048496040820066467, "loss": 1.1009, "step": 1244 }, { "epoch": 0.22210329141022211, "grad_norm": 0.6475226879119873, "learning_rate": 0.0004849364649664097, "loss": 1.3426, "step": 1245 }, { "epoch": 0.22228168762822229, "grad_norm": 0.56676185131073, "learning_rate": 0.0004849125032803459, "loss": 1.0316, "step": 1246 }, { "epoch": 0.22246008384622246, "grad_norm": 0.6990684270858765, "learning_rate": 0.00048488852314435503, "loss": 1.2661, "step": 1247 }, { "epoch": 0.22263848006422263, "grad_norm": 0.6938690543174744, "learning_rate": 0.0004848645245603208, "loss": 1.3882, "step": 1248 }, { "epoch": 0.22281687628222283, "grad_norm": 2.238471508026123, "learning_rate": 0.00048484050753012784, "loss": 1.3287, "step": 1249 }, { "epoch": 0.222995272500223, "grad_norm": 0.8358233571052551, "learning_rate": 0.0004848164720556624, "loss": 1.1643, "step": 1250 }, { "epoch": 0.22317366871822317, "grad_norm": 2.564124822616577, "learning_rate": 0.00048479241813881237, "loss": 1.2204, "step": 1251 }, { "epoch": 0.22335206493622337, "grad_norm": 1.5827924013137817, "learning_rate": 0.00048476834578146683, "loss": 1.4587, "step": 1252 }, { "epoch": 0.22353046115422354, "grad_norm": 1.2166929244995117, "learning_rate": 0.00048474425498551643, "loss": 1.6144, "step": 1253 }, { "epoch": 0.2237088573722237, "grad_norm": 8.74429702758789, "learning_rate": 0.00048472014575285326, "loss": 2.0011, "step": 1254 }, { "epoch": 0.22388725359022388, "grad_norm": 11.300551414489746, "learning_rate": 0.0004846960180853709, "loss": 2.7429, "step": 1255 }, { "epoch": 0.22406564980822408, "grad_norm": 1.6926195621490479, "learning_rate": 0.00048467187198496426, "loss": 1.1161, "step": 1256 }, { "epoch": 0.22424404602622425, "grad_norm": 0.9503287672996521, "learning_rate": 0.00048464770745352984, "loss": 1.4126, "step": 1257 }, { "epoch": 0.22442244224422442, "grad_norm": 1.7149012088775635, "learning_rate": 0.00048462352449296533, "loss": 1.318, "step": 1258 }, { "epoch": 0.2246008384622246, "grad_norm": 9.479455947875977, "learning_rate": 0.00048459932310517017, "loss": 1.2574, "step": 1259 }, { "epoch": 0.2247792346802248, "grad_norm": 12.350288391113281, "learning_rate": 0.0004845751032920452, "loss": 1.4817, "step": 1260 }, { "epoch": 0.22495763089822496, "grad_norm": 16.48604393005371, "learning_rate": 0.0004845508650554926, "loss": 1.3518, "step": 1261 }, { "epoch": 0.22513602711622513, "grad_norm": 6.238397598266602, "learning_rate": 0.00048452660839741593, "loss": 0.9875, "step": 1262 }, { "epoch": 0.22531442333422533, "grad_norm": 1.4720149040222168, "learning_rate": 0.0004845023333197204, "loss": 1.3612, "step": 1263 }, { "epoch": 0.2254928195522255, "grad_norm": 2.780336856842041, "learning_rate": 0.0004844780398243125, "loss": 1.3331, "step": 1264 }, { "epoch": 0.22567121577022567, "grad_norm": 1.0210349559783936, "learning_rate": 0.0004844537279131002, "loss": 1.5075, "step": 1265 }, { "epoch": 0.22584961198822584, "grad_norm": 1.9614717960357666, "learning_rate": 0.0004844293975879931, "loss": 1.1595, "step": 1266 }, { "epoch": 0.22602800820622604, "grad_norm": 0.7749541997909546, "learning_rate": 0.000484405048850902, "loss": 1.3533, "step": 1267 }, { "epoch": 0.2262064044242262, "grad_norm": 1.7619514465332031, "learning_rate": 0.00048438068170373916, "loss": 1.0186, "step": 1268 }, { "epoch": 0.22638480064222638, "grad_norm": 0.9682341814041138, "learning_rate": 0.0004843562961484185, "loss": 1.18, "step": 1269 }, { "epoch": 0.22656319686022655, "grad_norm": 0.7163513898849487, "learning_rate": 0.00048433189218685516, "loss": 1.2096, "step": 1270 }, { "epoch": 0.22674159307822675, "grad_norm": 0.5259701013565063, "learning_rate": 0.0004843074698209658, "loss": 1.1834, "step": 1271 }, { "epoch": 0.22691998929622692, "grad_norm": 0.8285301923751831, "learning_rate": 0.00048428302905266865, "loss": 1.1552, "step": 1272 }, { "epoch": 0.2270983855142271, "grad_norm": 0.6478269100189209, "learning_rate": 0.0004842585698838832, "loss": 1.2377, "step": 1273 }, { "epoch": 0.2272767817322273, "grad_norm": 0.7141563296318054, "learning_rate": 0.0004842340923165305, "loss": 1.0266, "step": 1274 }, { "epoch": 0.22745517795022746, "grad_norm": 1.3157190084457397, "learning_rate": 0.000484209596352533, "loss": 1.139, "step": 1275 }, { "epoch": 0.22763357416822763, "grad_norm": 1.4093172550201416, "learning_rate": 0.0004841850819938145, "loss": 1.0584, "step": 1276 }, { "epoch": 0.2278119703862278, "grad_norm": 0.805065929889679, "learning_rate": 0.0004841605492423006, "loss": 1.3242, "step": 1277 }, { "epoch": 0.227990366604228, "grad_norm": 0.5310962796211243, "learning_rate": 0.00048413599809991783, "loss": 1.0605, "step": 1278 }, { "epoch": 0.22816876282222817, "grad_norm": 0.5967015027999878, "learning_rate": 0.00048411142856859447, "loss": 1.1277, "step": 1279 }, { "epoch": 0.22834715904022834, "grad_norm": 0.6595317721366882, "learning_rate": 0.00048408684065026034, "loss": 1.3209, "step": 1280 }, { "epoch": 0.2285255552582285, "grad_norm": 0.583308219909668, "learning_rate": 0.0004840622343468465, "loss": 1.1515, "step": 1281 }, { "epoch": 0.2287039514762287, "grad_norm": 0.9470168948173523, "learning_rate": 0.0004840376096602854, "loss": 1.1452, "step": 1282 }, { "epoch": 0.22888234769422888, "grad_norm": 0.6106230616569519, "learning_rate": 0.00048401296659251125, "loss": 1.5261, "step": 1283 }, { "epoch": 0.22906074391222905, "grad_norm": 0.656694233417511, "learning_rate": 0.00048398830514545933, "loss": 1.4598, "step": 1284 }, { "epoch": 0.22923914013022925, "grad_norm": 0.7199791669845581, "learning_rate": 0.0004839636253210667, "loss": 1.0677, "step": 1285 }, { "epoch": 0.22941753634822942, "grad_norm": 0.748662531375885, "learning_rate": 0.0004839389271212715, "loss": 1.1233, "step": 1286 }, { "epoch": 0.2295959325662296, "grad_norm": 0.8158852458000183, "learning_rate": 0.00048391421054801376, "loss": 1.2534, "step": 1287 }, { "epoch": 0.22977432878422976, "grad_norm": 1.9527506828308105, "learning_rate": 0.0004838894756032345, "loss": 1.2561, "step": 1288 }, { "epoch": 0.22995272500222996, "grad_norm": 1.1054450273513794, "learning_rate": 0.0004838647222888766, "loss": 1.0608, "step": 1289 }, { "epoch": 0.23013112122023013, "grad_norm": 0.5444337725639343, "learning_rate": 0.0004838399506068839, "loss": 1.193, "step": 1290 }, { "epoch": 0.2303095174382303, "grad_norm": 0.6436626315116882, "learning_rate": 0.0004838151605592022, "loss": 1.4263, "step": 1291 }, { "epoch": 0.23048791365623048, "grad_norm": 0.7222276926040649, "learning_rate": 0.00048379035214777833, "loss": 1.482, "step": 1292 }, { "epoch": 0.23066630987423067, "grad_norm": 0.7238490581512451, "learning_rate": 0.00048376552537456085, "loss": 1.1443, "step": 1293 }, { "epoch": 0.23084470609223084, "grad_norm": 0.9711647033691406, "learning_rate": 0.00048374068024149966, "loss": 1.2471, "step": 1294 }, { "epoch": 0.23102310231023102, "grad_norm": 0.54137122631073, "learning_rate": 0.000483715816750546, "loss": 1.1579, "step": 1295 }, { "epoch": 0.23120149852823121, "grad_norm": 0.8173871636390686, "learning_rate": 0.0004836909349036527, "loss": 1.4205, "step": 1296 }, { "epoch": 0.23137989474623138, "grad_norm": 8.518444061279297, "learning_rate": 0.0004836660347027738, "loss": 1.0929, "step": 1297 }, { "epoch": 0.23155829096423156, "grad_norm": 0.603255569934845, "learning_rate": 0.0004836411161498652, "loss": 1.2047, "step": 1298 }, { "epoch": 0.23173668718223173, "grad_norm": 0.6280937790870667, "learning_rate": 0.0004836161792468839, "loss": 1.2314, "step": 1299 }, { "epoch": 0.23191508340023193, "grad_norm": 0.5769376158714294, "learning_rate": 0.00048359122399578835, "loss": 1.077, "step": 1300 }, { "epoch": 0.2320934796182321, "grad_norm": 0.6672999858856201, "learning_rate": 0.00048356625039853865, "loss": 1.1322, "step": 1301 }, { "epoch": 0.23227187583623227, "grad_norm": 1.1087464094161987, "learning_rate": 0.00048354125845709604, "loss": 1.5064, "step": 1302 }, { "epoch": 0.23245027205423244, "grad_norm": 0.6089938879013062, "learning_rate": 0.0004835162481734237, "loss": 0.9383, "step": 1303 }, { "epoch": 0.23262866827223264, "grad_norm": 0.6506130695343018, "learning_rate": 0.0004834912195494855, "loss": 1.2445, "step": 1304 }, { "epoch": 0.2328070644902328, "grad_norm": 0.6899453401565552, "learning_rate": 0.0004834661725872475, "loss": 1.2801, "step": 1305 }, { "epoch": 0.23298546070823298, "grad_norm": 0.9193609952926636, "learning_rate": 0.00048344110728867673, "loss": 1.3748, "step": 1306 }, { "epoch": 0.23316385692623318, "grad_norm": 0.905149519443512, "learning_rate": 0.0004834160236557419, "loss": 1.2297, "step": 1307 }, { "epoch": 0.23334225314423335, "grad_norm": 0.5802024602890015, "learning_rate": 0.00048339092169041306, "loss": 1.0281, "step": 1308 }, { "epoch": 0.23352064936223352, "grad_norm": 0.644010603427887, "learning_rate": 0.0004833658013946616, "loss": 1.4227, "step": 1309 }, { "epoch": 0.2336990455802337, "grad_norm": 0.5992226600646973, "learning_rate": 0.0004833406627704605, "loss": 1.229, "step": 1310 }, { "epoch": 0.2338774417982339, "grad_norm": 0.5435013771057129, "learning_rate": 0.00048331550581978423, "loss": 1.2623, "step": 1311 }, { "epoch": 0.23405583801623406, "grad_norm": 0.5842602252960205, "learning_rate": 0.0004832903305446085, "loss": 1.3458, "step": 1312 }, { "epoch": 0.23423423423423423, "grad_norm": 0.6166499257087708, "learning_rate": 0.00048326513694691055, "loss": 1.1492, "step": 1313 }, { "epoch": 0.2344126304522344, "grad_norm": 0.586531400680542, "learning_rate": 0.0004832399250286692, "loss": 1.179, "step": 1314 }, { "epoch": 0.2345910266702346, "grad_norm": 1.1618958711624146, "learning_rate": 0.0004832146947918645, "loss": 1.0842, "step": 1315 }, { "epoch": 0.23476942288823477, "grad_norm": 0.5545247197151184, "learning_rate": 0.000483189446238478, "loss": 1.0436, "step": 1316 }, { "epoch": 0.23494781910623494, "grad_norm": 0.5850834846496582, "learning_rate": 0.00048316417937049275, "loss": 1.1775, "step": 1317 }, { "epoch": 0.23512621532423514, "grad_norm": 0.654733419418335, "learning_rate": 0.0004831388941898932, "loss": 1.2601, "step": 1318 }, { "epoch": 0.2353046115422353, "grad_norm": 0.5766509175300598, "learning_rate": 0.00048311359069866524, "loss": 1.2645, "step": 1319 }, { "epoch": 0.23548300776023548, "grad_norm": 0.577642560005188, "learning_rate": 0.00048308826889879614, "loss": 1.1026, "step": 1320 }, { "epoch": 0.23566140397823565, "grad_norm": 0.6444463729858398, "learning_rate": 0.0004830629287922748, "loss": 1.2969, "step": 1321 }, { "epoch": 0.23583980019623585, "grad_norm": 0.7246226072311401, "learning_rate": 0.00048303757038109117, "loss": 1.1957, "step": 1322 }, { "epoch": 0.23601819641423602, "grad_norm": 0.7728081345558167, "learning_rate": 0.00048301219366723714, "loss": 1.1193, "step": 1323 }, { "epoch": 0.2361965926322362, "grad_norm": 0.6015149354934692, "learning_rate": 0.00048298679865270574, "loss": 1.3462, "step": 1324 }, { "epoch": 0.23637498885023636, "grad_norm": 0.5546750426292419, "learning_rate": 0.0004829613853394914, "loss": 1.2633, "step": 1325 }, { "epoch": 0.23655338506823656, "grad_norm": 0.7235816717147827, "learning_rate": 0.0004829359537295901, "loss": 1.5118, "step": 1326 }, { "epoch": 0.23673178128623673, "grad_norm": 0.6748101711273193, "learning_rate": 0.0004829105038249992, "loss": 1.1664, "step": 1327 }, { "epoch": 0.2369101775042369, "grad_norm": 0.7603464722633362, "learning_rate": 0.0004828850356277176, "loss": 1.2698, "step": 1328 }, { "epoch": 0.2370885737222371, "grad_norm": 0.6377245187759399, "learning_rate": 0.0004828595491397455, "loss": 0.9762, "step": 1329 }, { "epoch": 0.23726696994023727, "grad_norm": 0.6112083792686462, "learning_rate": 0.0004828340443630846, "loss": 0.9882, "step": 1330 }, { "epoch": 0.23744536615823744, "grad_norm": 0.5406150817871094, "learning_rate": 0.00048280852129973807, "loss": 0.9828, "step": 1331 }, { "epoch": 0.2376237623762376, "grad_norm": 0.592891275882721, "learning_rate": 0.0004827829799517105, "loss": 1.0252, "step": 1332 }, { "epoch": 0.2378021585942378, "grad_norm": 0.564150869846344, "learning_rate": 0.0004827574203210078, "loss": 1.2145, "step": 1333 }, { "epoch": 0.23798055481223798, "grad_norm": 0.8308655619621277, "learning_rate": 0.0004827318424096375, "loss": 0.9988, "step": 1334 }, { "epoch": 0.23815895103023815, "grad_norm": 0.6468952894210815, "learning_rate": 0.00048270624621960846, "loss": 1.0637, "step": 1335 }, { "epoch": 0.23833734724823832, "grad_norm": 0.5933250784873962, "learning_rate": 0.000482680631752931, "loss": 1.2847, "step": 1336 }, { "epoch": 0.23851574346623852, "grad_norm": 0.8203790187835693, "learning_rate": 0.0004826549990116168, "loss": 1.3192, "step": 1337 }, { "epoch": 0.2386941396842387, "grad_norm": 0.8273621201515198, "learning_rate": 0.0004826293479976791, "loss": 1.2088, "step": 1338 }, { "epoch": 0.23887253590223886, "grad_norm": 0.6572198271751404, "learning_rate": 0.0004826036787131326, "loss": 1.3699, "step": 1339 }, { "epoch": 0.23905093212023906, "grad_norm": 0.6077407002449036, "learning_rate": 0.0004825779911599932, "loss": 1.2602, "step": 1340 }, { "epoch": 0.23922932833823923, "grad_norm": 0.58214271068573, "learning_rate": 0.00048255228534027845, "loss": 0.9227, "step": 1341 }, { "epoch": 0.2394077245562394, "grad_norm": 0.5967569947242737, "learning_rate": 0.0004825265612560073, "loss": 1.1136, "step": 1342 }, { "epoch": 0.23958612077423957, "grad_norm": 0.5940139889717102, "learning_rate": 0.0004825008189092001, "loss": 1.1137, "step": 1343 }, { "epoch": 0.23976451699223977, "grad_norm": 0.5328962802886963, "learning_rate": 0.00048247505830187863, "loss": 1.0354, "step": 1344 }, { "epoch": 0.23994291321023994, "grad_norm": 0.548477828502655, "learning_rate": 0.00048244927943606617, "loss": 0.9724, "step": 1345 }, { "epoch": 0.24012130942824012, "grad_norm": 0.6590238809585571, "learning_rate": 0.0004824234823137873, "loss": 1.2702, "step": 1346 }, { "epoch": 0.24029970564624029, "grad_norm": 0.5709405541419983, "learning_rate": 0.0004823976669370681, "loss": 1.3085, "step": 1347 }, { "epoch": 0.24047810186424048, "grad_norm": 0.6574723720550537, "learning_rate": 0.0004823718333079362, "loss": 1.5324, "step": 1348 }, { "epoch": 0.24065649808224066, "grad_norm": 0.5990821123123169, "learning_rate": 0.00048234598142842043, "loss": 1.1941, "step": 1349 }, { "epoch": 0.24083489430024083, "grad_norm": 0.6324416995048523, "learning_rate": 0.0004823201113005514, "loss": 1.3861, "step": 1350 }, { "epoch": 0.24101329051824102, "grad_norm": 0.5532766580581665, "learning_rate": 0.0004822942229263607, "loss": 1.1808, "step": 1351 }, { "epoch": 0.2411916867362412, "grad_norm": 0.5371944904327393, "learning_rate": 0.00048226831630788174, "loss": 0.9721, "step": 1352 }, { "epoch": 0.24137008295424137, "grad_norm": 0.6007786393165588, "learning_rate": 0.0004822423914471492, "loss": 1.2506, "step": 1353 }, { "epoch": 0.24154847917224154, "grad_norm": 0.5308194756507874, "learning_rate": 0.0004822164483461991, "loss": 1.1275, "step": 1354 }, { "epoch": 0.24172687539024174, "grad_norm": 0.5771823525428772, "learning_rate": 0.00048219048700706913, "loss": 1.138, "step": 1355 }, { "epoch": 0.2419052716082419, "grad_norm": 0.6079913377761841, "learning_rate": 0.00048216450743179817, "loss": 1.4897, "step": 1356 }, { "epoch": 0.24208366782624208, "grad_norm": 0.5161705613136292, "learning_rate": 0.0004821385096224268, "loss": 1.0735, "step": 1357 }, { "epoch": 0.24226206404424228, "grad_norm": 0.6866333484649658, "learning_rate": 0.00048211249358099675, "loss": 1.316, "step": 1358 }, { "epoch": 0.24244046026224245, "grad_norm": 0.5263490080833435, "learning_rate": 0.0004820864593095513, "loss": 1.1486, "step": 1359 }, { "epoch": 0.24261885648024262, "grad_norm": 0.5457320213317871, "learning_rate": 0.0004820604068101352, "loss": 1.076, "step": 1360 }, { "epoch": 0.2427972526982428, "grad_norm": 0.5485474467277527, "learning_rate": 0.00048203433608479465, "loss": 1.4718, "step": 1361 }, { "epoch": 0.242975648916243, "grad_norm": 0.5592158436775208, "learning_rate": 0.0004820082471355772, "loss": 1.123, "step": 1362 }, { "epoch": 0.24315404513424316, "grad_norm": 0.6344873905181885, "learning_rate": 0.0004819821399645319, "loss": 1.2592, "step": 1363 }, { "epoch": 0.24333244135224333, "grad_norm": 0.5310791730880737, "learning_rate": 0.00048195601457370907, "loss": 1.1477, "step": 1364 }, { "epoch": 0.2435108375702435, "grad_norm": 0.5795305371284485, "learning_rate": 0.0004819298709651607, "loss": 1.3579, "step": 1365 }, { "epoch": 0.2436892337882437, "grad_norm": 0.6433924436569214, "learning_rate": 0.0004819037091409401, "loss": 1.3058, "step": 1366 }, { "epoch": 0.24386763000624387, "grad_norm": 0.6262935996055603, "learning_rate": 0.00048187752910310196, "loss": 1.4104, "step": 1367 }, { "epoch": 0.24404602622424404, "grad_norm": 0.6437758207321167, "learning_rate": 0.0004818513308537025, "loss": 1.4279, "step": 1368 }, { "epoch": 0.24422442244224424, "grad_norm": 0.5018840432167053, "learning_rate": 0.00048182511439479926, "loss": 1.0561, "step": 1369 }, { "epoch": 0.2444028186602444, "grad_norm": 0.5912278890609741, "learning_rate": 0.00048179887972845124, "loss": 1.2844, "step": 1370 }, { "epoch": 0.24458121487824458, "grad_norm": 0.5428308844566345, "learning_rate": 0.0004817726268567191, "loss": 1.1105, "step": 1371 }, { "epoch": 0.24475961109624475, "grad_norm": 0.5821102857589722, "learning_rate": 0.0004817463557816644, "loss": 1.3421, "step": 1372 }, { "epoch": 0.24493800731424495, "grad_norm": 0.5811814665794373, "learning_rate": 0.0004817200665053508, "loss": 1.1854, "step": 1373 }, { "epoch": 0.24511640353224512, "grad_norm": 0.5271221399307251, "learning_rate": 0.00048169375902984283, "loss": 1.1517, "step": 1374 }, { "epoch": 0.2452947997502453, "grad_norm": 0.592476487159729, "learning_rate": 0.00048166743335720675, "loss": 1.459, "step": 1375 }, { "epoch": 0.24547319596824546, "grad_norm": 0.5430518984794617, "learning_rate": 0.00048164108948951014, "loss": 1.0736, "step": 1376 }, { "epoch": 0.24565159218624566, "grad_norm": 0.601703405380249, "learning_rate": 0.00048161472742882204, "loss": 1.358, "step": 1377 }, { "epoch": 0.24582998840424583, "grad_norm": 0.7517264485359192, "learning_rate": 0.0004815883471772129, "loss": 1.1916, "step": 1378 }, { "epoch": 0.246008384622246, "grad_norm": 0.6537923216819763, "learning_rate": 0.00048156194873675466, "loss": 1.3555, "step": 1379 }, { "epoch": 0.2461867808402462, "grad_norm": 0.5317115187644958, "learning_rate": 0.0004815355321095206, "loss": 1.1173, "step": 1380 }, { "epoch": 0.24636517705824637, "grad_norm": 0.5854525566101074, "learning_rate": 0.00048150909729758554, "loss": 1.2466, "step": 1381 }, { "epoch": 0.24654357327624654, "grad_norm": 0.5315707325935364, "learning_rate": 0.0004814826443030256, "loss": 1.0387, "step": 1382 }, { "epoch": 0.2467219694942467, "grad_norm": 0.5110334157943726, "learning_rate": 0.00048145617312791837, "loss": 1.0514, "step": 1383 }, { "epoch": 0.2469003657122469, "grad_norm": 0.5571600198745728, "learning_rate": 0.0004814296837743428, "loss": 1.1521, "step": 1384 }, { "epoch": 0.24707876193024708, "grad_norm": 0.5861849188804626, "learning_rate": 0.0004814031762443796, "loss": 1.26, "step": 1385 }, { "epoch": 0.24725715814824725, "grad_norm": 0.5479332208633423, "learning_rate": 0.00048137665054011044, "loss": 1.1034, "step": 1386 }, { "epoch": 0.24743555436624742, "grad_norm": 0.5551263689994812, "learning_rate": 0.0004813501066636187, "loss": 1.0253, "step": 1387 }, { "epoch": 0.24761395058424762, "grad_norm": 0.6580809354782104, "learning_rate": 0.00048132354461698923, "loss": 1.2498, "step": 1388 }, { "epoch": 0.2477923468022478, "grad_norm": 0.5350015759468079, "learning_rate": 0.0004812969644023081, "loss": 1.1355, "step": 1389 }, { "epoch": 0.24797074302024796, "grad_norm": 0.5560418367385864, "learning_rate": 0.00048127036602166285, "loss": 1.0944, "step": 1390 }, { "epoch": 0.24814913923824816, "grad_norm": 0.5966114401817322, "learning_rate": 0.00048124374947714263, "loss": 1.0267, "step": 1391 }, { "epoch": 0.24832753545624833, "grad_norm": 0.5361310243606567, "learning_rate": 0.0004812171147708378, "loss": 1.1974, "step": 1392 }, { "epoch": 0.2485059316742485, "grad_norm": 0.5971125960350037, "learning_rate": 0.00048119046190484027, "loss": 1.2728, "step": 1393 }, { "epoch": 0.24868432789224867, "grad_norm": 0.5017391443252563, "learning_rate": 0.0004811637908812434, "loss": 1.0279, "step": 1394 }, { "epoch": 0.24886272411024887, "grad_norm": 0.5720884203910828, "learning_rate": 0.00048113710170214185, "loss": 1.3989, "step": 1395 }, { "epoch": 0.24904112032824904, "grad_norm": 0.5782856345176697, "learning_rate": 0.00048111039436963177, "loss": 1.2528, "step": 1396 }, { "epoch": 0.24921951654624921, "grad_norm": 0.5723220109939575, "learning_rate": 0.00048108366888581077, "loss": 1.0068, "step": 1397 }, { "epoch": 0.24939791276424939, "grad_norm": 0.5289225578308105, "learning_rate": 0.00048105692525277793, "loss": 0.9357, "step": 1398 }, { "epoch": 0.24957630898224958, "grad_norm": 0.5223532319068909, "learning_rate": 0.00048103016347263356, "loss": 1.0046, "step": 1399 }, { "epoch": 0.24975470520024975, "grad_norm": 0.6456426978111267, "learning_rate": 0.0004810033835474796, "loss": 1.5626, "step": 1400 }, { "epoch": 0.24993310141824993, "grad_norm": 0.5884763598442078, "learning_rate": 0.00048097658547941927, "loss": 1.4618, "step": 1401 }, { "epoch": 0.2501114976362501, "grad_norm": 0.5833611488342285, "learning_rate": 0.00048094976927055735, "loss": 1.396, "step": 1402 }, { "epoch": 0.25028989385425027, "grad_norm": 0.5198600888252258, "learning_rate": 0.00048092293492299986, "loss": 1.1263, "step": 1403 }, { "epoch": 0.25046829007225047, "grad_norm": 0.5795050859451294, "learning_rate": 0.00048089608243885454, "loss": 1.2508, "step": 1404 }, { "epoch": 0.25064668629025066, "grad_norm": 0.7242421507835388, "learning_rate": 0.0004808692118202302, "loss": 1.2361, "step": 1405 }, { "epoch": 0.2508250825082508, "grad_norm": 0.6138631105422974, "learning_rate": 0.00048084232306923737, "loss": 1.3318, "step": 1406 }, { "epoch": 0.251003478726251, "grad_norm": 0.5212423801422119, "learning_rate": 0.0004808154161879877, "loss": 1.2418, "step": 1407 }, { "epoch": 0.2511818749442512, "grad_norm": 0.5970634818077087, "learning_rate": 0.0004807884911785947, "loss": 1.1968, "step": 1408 }, { "epoch": 0.25136027116225135, "grad_norm": 0.6800307035446167, "learning_rate": 0.0004807615480431729, "loss": 1.2915, "step": 1409 }, { "epoch": 0.25153866738025155, "grad_norm": 0.5408998727798462, "learning_rate": 0.0004807345867838384, "loss": 0.9737, "step": 1410 }, { "epoch": 0.25171706359825174, "grad_norm": 0.6598271727561951, "learning_rate": 0.00048070760740270873, "loss": 1.2425, "step": 1411 }, { "epoch": 0.2518954598162519, "grad_norm": 0.6781688332557678, "learning_rate": 0.0004806806099019029, "loss": 1.0545, "step": 1412 }, { "epoch": 0.2520738560342521, "grad_norm": 0.6171199679374695, "learning_rate": 0.00048065359428354115, "loss": 1.1862, "step": 1413 }, { "epoch": 0.25225225225225223, "grad_norm": 0.5618746876716614, "learning_rate": 0.00048062656054974546, "loss": 1.3709, "step": 1414 }, { "epoch": 0.25243064847025243, "grad_norm": 2.3955955505371094, "learning_rate": 0.0004805995087026389, "loss": 1.2006, "step": 1415 }, { "epoch": 0.2526090446882526, "grad_norm": 0.6166795492172241, "learning_rate": 0.0004805724387443462, "loss": 1.3305, "step": 1416 }, { "epoch": 0.25278744090625277, "grad_norm": 0.5519673228263855, "learning_rate": 0.00048054535067699333, "loss": 1.1071, "step": 1417 }, { "epoch": 0.25296583712425297, "grad_norm": 0.6297189593315125, "learning_rate": 0.0004805182445027079, "loss": 1.4462, "step": 1418 }, { "epoch": 0.25314423334225317, "grad_norm": 0.5049257278442383, "learning_rate": 0.0004804911202236187, "loss": 0.9542, "step": 1419 }, { "epoch": 0.2533226295602533, "grad_norm": 0.5491045713424683, "learning_rate": 0.0004804639778418561, "loss": 1.1949, "step": 1420 }, { "epoch": 0.2535010257782535, "grad_norm": 0.6792704463005066, "learning_rate": 0.00048043681735955183, "loss": 1.4088, "step": 1421 }, { "epoch": 0.2536794219962537, "grad_norm": 0.543935239315033, "learning_rate": 0.0004804096387788391, "loss": 1.3297, "step": 1422 }, { "epoch": 0.25385781821425385, "grad_norm": 0.6691182851791382, "learning_rate": 0.00048038244210185253, "loss": 1.1259, "step": 1423 }, { "epoch": 0.25403621443225405, "grad_norm": 0.6011500954627991, "learning_rate": 0.0004803552273307281, "loss": 1.2238, "step": 1424 }, { "epoch": 0.2542146106502542, "grad_norm": 0.667797863483429, "learning_rate": 0.0004803279944676032, "loss": 1.206, "step": 1425 }, { "epoch": 0.2543930068682544, "grad_norm": 0.676861047744751, "learning_rate": 0.0004803007435146168, "loss": 1.0519, "step": 1426 }, { "epoch": 0.2545714030862546, "grad_norm": 0.5236627459526062, "learning_rate": 0.00048027347447390914, "loss": 1.0689, "step": 1427 }, { "epoch": 0.25474979930425473, "grad_norm": 0.6431361436843872, "learning_rate": 0.00048024618734762183, "loss": 1.0586, "step": 1428 }, { "epoch": 0.25492819552225493, "grad_norm": 0.6550641655921936, "learning_rate": 0.00048021888213789797, "loss": 1.2463, "step": 1429 }, { "epoch": 0.25510659174025513, "grad_norm": 0.5982055068016052, "learning_rate": 0.0004801915588468823, "loss": 1.1063, "step": 1430 }, { "epoch": 0.25528498795825527, "grad_norm": 0.5872926115989685, "learning_rate": 0.00048016421747672054, "loss": 1.2272, "step": 1431 }, { "epoch": 0.25546338417625547, "grad_norm": 0.6466538310050964, "learning_rate": 0.0004801368580295603, "loss": 1.1145, "step": 1432 }, { "epoch": 0.25564178039425567, "grad_norm": 0.5384798645973206, "learning_rate": 0.0004801094805075502, "loss": 1.1214, "step": 1433 }, { "epoch": 0.2558201766122558, "grad_norm": 0.5813401937484741, "learning_rate": 0.00048008208491284054, "loss": 1.2142, "step": 1434 }, { "epoch": 0.255998572830256, "grad_norm": 0.5265066623687744, "learning_rate": 0.00048005467124758296, "loss": 1.0926, "step": 1435 }, { "epoch": 0.25617696904825615, "grad_norm": 0.5532288551330566, "learning_rate": 0.0004800272395139305, "loss": 1.2191, "step": 1436 }, { "epoch": 0.25635536526625635, "grad_norm": 0.5451021194458008, "learning_rate": 0.00047999978971403765, "loss": 1.1107, "step": 1437 }, { "epoch": 0.25653376148425655, "grad_norm": 0.5395956039428711, "learning_rate": 0.0004799723218500602, "loss": 1.2165, "step": 1438 }, { "epoch": 0.2567121577022567, "grad_norm": 0.5500022172927856, "learning_rate": 0.00047994483592415574, "loss": 1.3835, "step": 1439 }, { "epoch": 0.2568905539202569, "grad_norm": 0.5290274024009705, "learning_rate": 0.0004799173319384826, "loss": 0.966, "step": 1440 }, { "epoch": 0.2570689501382571, "grad_norm": 0.6000217795372009, "learning_rate": 0.0004798898098952013, "loss": 1.3913, "step": 1441 }, { "epoch": 0.25724734635625723, "grad_norm": 0.5701634883880615, "learning_rate": 0.0004798622697964732, "loss": 1.0587, "step": 1442 }, { "epoch": 0.25742574257425743, "grad_norm": 0.5983652472496033, "learning_rate": 0.00047983471164446135, "loss": 1.2844, "step": 1443 }, { "epoch": 0.25760413879225763, "grad_norm": 0.5445163249969482, "learning_rate": 0.0004798071354413302, "loss": 1.1172, "step": 1444 }, { "epoch": 0.2577825350102578, "grad_norm": 0.5581110715866089, "learning_rate": 0.0004797795411892455, "loss": 1.1949, "step": 1445 }, { "epoch": 0.257960931228258, "grad_norm": 0.585059642791748, "learning_rate": 0.0004797519288903745, "loss": 1.187, "step": 1446 }, { "epoch": 0.2581393274462581, "grad_norm": 0.6527815461158752, "learning_rate": 0.00047972429854688595, "loss": 1.1534, "step": 1447 }, { "epoch": 0.2583177236642583, "grad_norm": 0.7034025192260742, "learning_rate": 0.00047969665016094976, "loss": 1.0801, "step": 1448 }, { "epoch": 0.2584961198822585, "grad_norm": 1.5441768169403076, "learning_rate": 0.00047966898373473754, "loss": 1.3137, "step": 1449 }, { "epoch": 0.25867451610025866, "grad_norm": 0.5990326404571533, "learning_rate": 0.0004796412992704221, "loss": 1.3426, "step": 1450 }, { "epoch": 0.25885291231825885, "grad_norm": 0.5593619346618652, "learning_rate": 0.0004796135967701779, "loss": 1.2633, "step": 1451 }, { "epoch": 0.25903130853625905, "grad_norm": 0.5932374000549316, "learning_rate": 0.00047958587623618066, "loss": 1.111, "step": 1452 }, { "epoch": 0.2592097047542592, "grad_norm": 0.9718675017356873, "learning_rate": 0.0004795581376706075, "loss": 0.9386, "step": 1453 }, { "epoch": 0.2593881009722594, "grad_norm": 0.5909976363182068, "learning_rate": 0.00047953038107563696, "loss": 1.0972, "step": 1454 }, { "epoch": 0.2595664971902596, "grad_norm": 0.550532877445221, "learning_rate": 0.000479502606453449, "loss": 0.9393, "step": 1455 }, { "epoch": 0.25974489340825974, "grad_norm": 0.5949730277061462, "learning_rate": 0.00047947481380622523, "loss": 1.1803, "step": 1456 }, { "epoch": 0.25992328962625993, "grad_norm": 4.032388210296631, "learning_rate": 0.0004794470031361482, "loss": 1.5666, "step": 1457 }, { "epoch": 0.2601016858442601, "grad_norm": 0.5784013271331787, "learning_rate": 0.0004794191744454024, "loss": 1.3467, "step": 1458 }, { "epoch": 0.2602800820622603, "grad_norm": 0.6242582201957703, "learning_rate": 0.00047939132773617334, "loss": 1.1208, "step": 1459 }, { "epoch": 0.2604584782802605, "grad_norm": 0.7698078155517578, "learning_rate": 0.00047936346301064806, "loss": 1.0162, "step": 1460 }, { "epoch": 0.2606368744982606, "grad_norm": 0.8937914967536926, "learning_rate": 0.0004793355802710151, "loss": 1.3314, "step": 1461 }, { "epoch": 0.2608152707162608, "grad_norm": 0.8126354217529297, "learning_rate": 0.0004793076795194644, "loss": 1.1186, "step": 1462 }, { "epoch": 0.260993666934261, "grad_norm": 0.6322470307350159, "learning_rate": 0.0004792797607581872, "loss": 1.1986, "step": 1463 }, { "epoch": 0.26117206315226116, "grad_norm": 0.6327349543571472, "learning_rate": 0.0004792518239893763, "loss": 1.1737, "step": 1464 }, { "epoch": 0.26135045937026136, "grad_norm": 0.5458834767341614, "learning_rate": 0.00047922386921522576, "loss": 1.0218, "step": 1465 }, { "epoch": 0.26152885558826156, "grad_norm": 0.685711681842804, "learning_rate": 0.0004791958964379312, "loss": 1.2088, "step": 1466 }, { "epoch": 0.2617072518062617, "grad_norm": 0.7403172850608826, "learning_rate": 0.0004791679056596895, "loss": 1.4212, "step": 1467 }, { "epoch": 0.2618856480242619, "grad_norm": 0.6377992033958435, "learning_rate": 0.0004791398968826991, "loss": 1.2633, "step": 1468 }, { "epoch": 0.26206404424226204, "grad_norm": 0.6038281321525574, "learning_rate": 0.0004791118701091599, "loss": 1.2117, "step": 1469 }, { "epoch": 0.26224244046026224, "grad_norm": 2.225592613220215, "learning_rate": 0.0004790838253412729, "loss": 1.2892, "step": 1470 }, { "epoch": 0.26242083667826244, "grad_norm": 0.5641833543777466, "learning_rate": 0.0004790557625812409, "loss": 1.1663, "step": 1471 }, { "epoch": 0.2625992328962626, "grad_norm": 0.5233538746833801, "learning_rate": 0.00047902768183126797, "loss": 1.1302, "step": 1472 }, { "epoch": 0.2627776291142628, "grad_norm": 0.5378060340881348, "learning_rate": 0.0004789995830935594, "loss": 1.0252, "step": 1473 }, { "epoch": 0.262956025332263, "grad_norm": 0.5588990449905396, "learning_rate": 0.0004789714663703221, "loss": 1.2761, "step": 1474 }, { "epoch": 0.2631344215502631, "grad_norm": 0.5507195591926575, "learning_rate": 0.00047894333166376434, "loss": 1.2709, "step": 1475 }, { "epoch": 0.2633128177682633, "grad_norm": 0.6437628865242004, "learning_rate": 0.00047891517897609594, "loss": 1.1084, "step": 1476 }, { "epoch": 0.2634912139862635, "grad_norm": 0.5252813100814819, "learning_rate": 0.0004788870083095278, "loss": 1.1753, "step": 1477 }, { "epoch": 0.26366961020426366, "grad_norm": 0.721181333065033, "learning_rate": 0.00047885881966627255, "loss": 1.3439, "step": 1478 }, { "epoch": 0.26384800642226386, "grad_norm": 0.6929256916046143, "learning_rate": 0.000478830613048544, "loss": 1.15, "step": 1479 }, { "epoch": 0.264026402640264, "grad_norm": 0.5794677734375, "learning_rate": 0.0004788023884585577, "loss": 0.991, "step": 1480 }, { "epoch": 0.2642047988582642, "grad_norm": 0.5833317041397095, "learning_rate": 0.00047877414589853024, "loss": 1.23, "step": 1481 }, { "epoch": 0.2643831950762644, "grad_norm": 0.5699701309204102, "learning_rate": 0.0004787458853706798, "loss": 1.0603, "step": 1482 }, { "epoch": 0.26456159129426454, "grad_norm": 0.6602557897567749, "learning_rate": 0.00047871760687722597, "loss": 1.2557, "step": 1483 }, { "epoch": 0.26473998751226474, "grad_norm": 0.5460817813873291, "learning_rate": 0.0004786893104203897, "loss": 1.0461, "step": 1484 }, { "epoch": 0.26491838373026494, "grad_norm": 0.5744298100471497, "learning_rate": 0.0004786609960023934, "loss": 1.3121, "step": 1485 }, { "epoch": 0.2650967799482651, "grad_norm": 0.6171600222587585, "learning_rate": 0.00047863266362546095, "loss": 1.264, "step": 1486 }, { "epoch": 0.2652751761662653, "grad_norm": 0.5980626344680786, "learning_rate": 0.00047860431329181744, "loss": 1.4093, "step": 1487 }, { "epoch": 0.2654535723842655, "grad_norm": 0.674322247505188, "learning_rate": 0.0004785759450036895, "loss": 1.2626, "step": 1488 }, { "epoch": 0.2656319686022656, "grad_norm": 0.7270940542221069, "learning_rate": 0.0004785475587633052, "loss": 1.0291, "step": 1489 }, { "epoch": 0.2658103648202658, "grad_norm": 0.5281042456626892, "learning_rate": 0.00047851915457289404, "loss": 1.0533, "step": 1490 }, { "epoch": 0.26598876103826596, "grad_norm": 0.498197466135025, "learning_rate": 0.0004784907324346868, "loss": 0.8067, "step": 1491 }, { "epoch": 0.26616715725626616, "grad_norm": 0.583159863948822, "learning_rate": 0.00047846229235091575, "loss": 0.9819, "step": 1492 }, { "epoch": 0.26634555347426636, "grad_norm": 0.5671996474266052, "learning_rate": 0.0004784338343238146, "loss": 1.2338, "step": 1493 }, { "epoch": 0.2665239496922665, "grad_norm": 0.5304687023162842, "learning_rate": 0.0004784053583556184, "loss": 1.2323, "step": 1494 }, { "epoch": 0.2667023459102667, "grad_norm": 0.8755894303321838, "learning_rate": 0.0004783768644485636, "loss": 1.0927, "step": 1495 }, { "epoch": 0.2668807421282669, "grad_norm": 0.5539624691009521, "learning_rate": 0.0004783483526048882, "loss": 1.1342, "step": 1496 }, { "epoch": 0.26705913834626704, "grad_norm": 0.5056388974189758, "learning_rate": 0.0004783198228268314, "loss": 1.0296, "step": 1497 }, { "epoch": 0.26723753456426724, "grad_norm": 0.5661635994911194, "learning_rate": 0.00047829127511663395, "loss": 1.0945, "step": 1498 }, { "epoch": 0.26741593078226744, "grad_norm": 0.5565497875213623, "learning_rate": 0.00047826270947653803, "loss": 1.0299, "step": 1499 }, { "epoch": 0.2675943270002676, "grad_norm": 0.5293715596199036, "learning_rate": 0.0004782341259087872, "loss": 1.1213, "step": 1500 }, { "epoch": 0.2677727232182678, "grad_norm": 0.6879062056541443, "learning_rate": 0.00047820552441562626, "loss": 1.1906, "step": 1501 }, { "epoch": 0.2679511194362679, "grad_norm": 0.6181793212890625, "learning_rate": 0.00047817690499930165, "loss": 1.2696, "step": 1502 }, { "epoch": 0.2681295156542681, "grad_norm": 0.8244830369949341, "learning_rate": 0.00047814826766206115, "loss": 1.1163, "step": 1503 }, { "epoch": 0.2683079118722683, "grad_norm": 0.6630935072898865, "learning_rate": 0.0004781196124061539, "loss": 1.2271, "step": 1504 }, { "epoch": 0.26848630809026847, "grad_norm": 0.6512210369110107, "learning_rate": 0.0004780909392338304, "loss": 1.3788, "step": 1505 }, { "epoch": 0.26866470430826866, "grad_norm": 0.900384247303009, "learning_rate": 0.00047806224814734275, "loss": 1.1045, "step": 1506 }, { "epoch": 0.26884310052626886, "grad_norm": 1.1172149181365967, "learning_rate": 0.0004780335391489442, "loss": 1.116, "step": 1507 }, { "epoch": 0.269021496744269, "grad_norm": 0.6222420930862427, "learning_rate": 0.00047800481224088965, "loss": 1.0094, "step": 1508 }, { "epoch": 0.2691998929622692, "grad_norm": 0.7224119901657104, "learning_rate": 0.00047797606742543526, "loss": 1.0397, "step": 1509 }, { "epoch": 0.2693782891802694, "grad_norm": 0.5364158749580383, "learning_rate": 0.00047794730470483867, "loss": 1.0348, "step": 1510 }, { "epoch": 0.26955668539826955, "grad_norm": 0.5481920838356018, "learning_rate": 0.00047791852408135885, "loss": 0.8935, "step": 1511 }, { "epoch": 0.26973508161626975, "grad_norm": 4.288261890411377, "learning_rate": 0.0004778897255572562, "loss": 0.963, "step": 1512 }, { "epoch": 0.2699134778342699, "grad_norm": 0.6022629141807556, "learning_rate": 0.00047786090913479255, "loss": 1.0822, "step": 1513 }, { "epoch": 0.2700918740522701, "grad_norm": 0.609437882900238, "learning_rate": 0.00047783207481623126, "loss": 0.8158, "step": 1514 }, { "epoch": 0.2702702702702703, "grad_norm": 0.5600314736366272, "learning_rate": 0.00047780322260383674, "loss": 1.1993, "step": 1515 }, { "epoch": 0.27044866648827043, "grad_norm": 0.6165355443954468, "learning_rate": 0.00047777435249987525, "loss": 1.1175, "step": 1516 }, { "epoch": 0.2706270627062706, "grad_norm": 0.5497164726257324, "learning_rate": 0.00047774546450661407, "loss": 0.9579, "step": 1517 }, { "epoch": 0.2708054589242708, "grad_norm": 0.5620499849319458, "learning_rate": 0.0004777165586263221, "loss": 1.162, "step": 1518 }, { "epoch": 0.27098385514227097, "grad_norm": 0.6000719666481018, "learning_rate": 0.00047768763486126964, "loss": 1.0962, "step": 1519 }, { "epoch": 0.27116225136027117, "grad_norm": 0.6456661224365234, "learning_rate": 0.00047765869321372834, "loss": 1.2405, "step": 1520 }, { "epoch": 0.27134064757827137, "grad_norm": 0.6640433073043823, "learning_rate": 0.00047762973368597117, "loss": 1.2917, "step": 1521 }, { "epoch": 0.2715190437962715, "grad_norm": 0.5571081042289734, "learning_rate": 0.0004776007562802728, "loss": 1.1319, "step": 1522 }, { "epoch": 0.2716974400142717, "grad_norm": 0.6709672212600708, "learning_rate": 0.0004775717609989089, "loss": 1.1581, "step": 1523 }, { "epoch": 0.27187583623227185, "grad_norm": 0.6140976548194885, "learning_rate": 0.00047754274784415673, "loss": 1.0881, "step": 1524 }, { "epoch": 0.27205423245027205, "grad_norm": 0.5703726410865784, "learning_rate": 0.0004775137168182952, "loss": 1.1018, "step": 1525 }, { "epoch": 0.27223262866827225, "grad_norm": 1.8433053493499756, "learning_rate": 0.00047748466792360425, "loss": 1.0196, "step": 1526 }, { "epoch": 0.2724110248862724, "grad_norm": 0.5568527579307556, "learning_rate": 0.0004774556011623653, "loss": 0.9479, "step": 1527 }, { "epoch": 0.2725894211042726, "grad_norm": 0.6627964377403259, "learning_rate": 0.00047742651653686133, "loss": 0.8791, "step": 1528 }, { "epoch": 0.2727678173222728, "grad_norm": 0.655723512172699, "learning_rate": 0.00047739741404937666, "loss": 1.5567, "step": 1529 }, { "epoch": 0.27294621354027293, "grad_norm": 1.177575707435608, "learning_rate": 0.00047736829370219694, "loss": 1.1893, "step": 1530 }, { "epoch": 0.27312460975827313, "grad_norm": 0.5675112009048462, "learning_rate": 0.0004773391554976093, "loss": 1.2078, "step": 1531 }, { "epoch": 0.2733030059762733, "grad_norm": 0.6315929889678955, "learning_rate": 0.00047730999943790216, "loss": 1.0456, "step": 1532 }, { "epoch": 0.27348140219427347, "grad_norm": 1.2165286540985107, "learning_rate": 0.0004772808255253655, "loss": 1.3296, "step": 1533 }, { "epoch": 0.27365979841227367, "grad_norm": 0.6034713983535767, "learning_rate": 0.00047725163376229063, "loss": 1.2784, "step": 1534 }, { "epoch": 0.2738381946302738, "grad_norm": 0.5982022285461426, "learning_rate": 0.0004772224241509702, "loss": 1.2338, "step": 1535 }, { "epoch": 0.274016590848274, "grad_norm": 0.797183096408844, "learning_rate": 0.00047719319669369843, "loss": 1.2911, "step": 1536 }, { "epoch": 0.2741949870662742, "grad_norm": 0.5509144067764282, "learning_rate": 0.0004771639513927707, "loss": 1.3141, "step": 1537 }, { "epoch": 0.27437338328427435, "grad_norm": 0.6552438735961914, "learning_rate": 0.000477134688250484, "loss": 1.3726, "step": 1538 }, { "epoch": 0.27455177950227455, "grad_norm": 0.5664661526679993, "learning_rate": 0.0004771054072691367, "loss": 1.484, "step": 1539 }, { "epoch": 0.27473017572027475, "grad_norm": 0.657057523727417, "learning_rate": 0.0004770761084510283, "loss": 1.5254, "step": 1540 }, { "epoch": 0.2749085719382749, "grad_norm": 0.5660455822944641, "learning_rate": 0.00047704679179846014, "loss": 1.1427, "step": 1541 }, { "epoch": 0.2750869681562751, "grad_norm": 0.5524396300315857, "learning_rate": 0.00047701745731373467, "loss": 1.1335, "step": 1542 }, { "epoch": 0.2752653643742753, "grad_norm": 0.5262104272842407, "learning_rate": 0.00047698810499915577, "loss": 1.0167, "step": 1543 }, { "epoch": 0.27544376059227543, "grad_norm": 0.5536545515060425, "learning_rate": 0.0004769587348570288, "loss": 1.0548, "step": 1544 }, { "epoch": 0.27562215681027563, "grad_norm": 0.5155540108680725, "learning_rate": 0.0004769293468896605, "loss": 1.1564, "step": 1545 }, { "epoch": 0.2758005530282758, "grad_norm": 0.5515694618225098, "learning_rate": 0.00047689994109935884, "loss": 1.1428, "step": 1546 }, { "epoch": 0.275978949246276, "grad_norm": 0.5768205523490906, "learning_rate": 0.00047687051748843357, "loss": 1.371, "step": 1547 }, { "epoch": 0.27615734546427617, "grad_norm": 0.6669476628303528, "learning_rate": 0.0004768410760591955, "loss": 1.3972, "step": 1548 }, { "epoch": 0.2763357416822763, "grad_norm": 1.1227506399154663, "learning_rate": 0.0004768116168139568, "loss": 0.9764, "step": 1549 }, { "epoch": 0.2765141379002765, "grad_norm": 0.5911063551902771, "learning_rate": 0.00047678213975503136, "loss": 1.2668, "step": 1550 }, { "epoch": 0.2766925341182767, "grad_norm": 0.47411879897117615, "learning_rate": 0.00047675264488473436, "loss": 0.9734, "step": 1551 }, { "epoch": 0.27687093033627685, "grad_norm": 0.5374860763549805, "learning_rate": 0.0004767231322053821, "loss": 1.1893, "step": 1552 }, { "epoch": 0.27704932655427705, "grad_norm": 0.5706403851509094, "learning_rate": 0.00047669360171929265, "loss": 1.1484, "step": 1553 }, { "epoch": 0.27722772277227725, "grad_norm": 0.6070848703384399, "learning_rate": 0.0004766640534287853, "loss": 1.3872, "step": 1554 }, { "epoch": 0.2774061189902774, "grad_norm": 0.5827165246009827, "learning_rate": 0.00047663448733618066, "loss": 1.3347, "step": 1555 }, { "epoch": 0.2775845152082776, "grad_norm": 3.5079944133758545, "learning_rate": 0.00047660490344380094, "loss": 1.1723, "step": 1556 }, { "epoch": 0.27776291142627774, "grad_norm": 0.6186500787734985, "learning_rate": 0.00047657530175396955, "loss": 1.5012, "step": 1557 }, { "epoch": 0.27794130764427794, "grad_norm": 0.6020243763923645, "learning_rate": 0.0004765456822690116, "loss": 1.0751, "step": 1558 }, { "epoch": 0.27811970386227813, "grad_norm": 0.5379880666732788, "learning_rate": 0.00047651604499125325, "loss": 0.9193, "step": 1559 }, { "epoch": 0.2782981000802783, "grad_norm": 0.6437864899635315, "learning_rate": 0.0004764863899230221, "loss": 1.2344, "step": 1560 }, { "epoch": 0.2784764962982785, "grad_norm": 0.5693362951278687, "learning_rate": 0.00047645671706664737, "loss": 1.1764, "step": 1561 }, { "epoch": 0.2786548925162787, "grad_norm": 0.5577165484428406, "learning_rate": 0.00047642702642445954, "loss": 1.2728, "step": 1562 }, { "epoch": 0.2788332887342788, "grad_norm": 0.5637865662574768, "learning_rate": 0.0004763973179987906, "loss": 1.3064, "step": 1563 }, { "epoch": 0.279011684952279, "grad_norm": 0.5422176122665405, "learning_rate": 0.00047636759179197366, "loss": 0.9754, "step": 1564 }, { "epoch": 0.2791900811702792, "grad_norm": 0.5627367496490479, "learning_rate": 0.00047633784780634343, "loss": 1.0802, "step": 1565 }, { "epoch": 0.27936847738827936, "grad_norm": 0.6842982769012451, "learning_rate": 0.0004763080860442361, "loss": 1.0098, "step": 1566 }, { "epoch": 0.27954687360627956, "grad_norm": 0.6131782531738281, "learning_rate": 0.00047627830650798903, "loss": 1.4362, "step": 1567 }, { "epoch": 0.2797252698242797, "grad_norm": 0.6176859736442566, "learning_rate": 0.00047624850919994113, "loss": 1.3148, "step": 1568 }, { "epoch": 0.2799036660422799, "grad_norm": 0.6015002727508545, "learning_rate": 0.00047621869412243275, "loss": 1.1703, "step": 1569 }, { "epoch": 0.2800820622602801, "grad_norm": 0.750379204750061, "learning_rate": 0.0004761888612778054, "loss": 1.2353, "step": 1570 }, { "epoch": 0.28026045847828024, "grad_norm": 0.567866325378418, "learning_rate": 0.0004761590106684023, "loss": 1.2543, "step": 1571 }, { "epoch": 0.28043885469628044, "grad_norm": 0.5942226052284241, "learning_rate": 0.0004761291422965678, "loss": 1.2128, "step": 1572 }, { "epoch": 0.28061725091428064, "grad_norm": 0.4971591830253601, "learning_rate": 0.00047609925616464777, "loss": 1.0342, "step": 1573 }, { "epoch": 0.2807956471322808, "grad_norm": 0.5671881437301636, "learning_rate": 0.0004760693522749894, "loss": 1.0644, "step": 1574 }, { "epoch": 0.280974043350281, "grad_norm": 0.887310266494751, "learning_rate": 0.00047603943062994147, "loss": 1.0307, "step": 1575 }, { "epoch": 0.2811524395682812, "grad_norm": 0.5171691179275513, "learning_rate": 0.00047600949123185386, "loss": 0.9699, "step": 1576 }, { "epoch": 0.2813308357862813, "grad_norm": 0.799001932144165, "learning_rate": 0.00047597953408307813, "loss": 1.2391, "step": 1577 }, { "epoch": 0.2815092320042815, "grad_norm": 0.5518134832382202, "learning_rate": 0.00047594955918596704, "loss": 0.9825, "step": 1578 }, { "epoch": 0.28168762822228166, "grad_norm": 0.512997567653656, "learning_rate": 0.00047591956654287484, "loss": 1.1596, "step": 1579 }, { "epoch": 0.28186602444028186, "grad_norm": 0.5603839755058289, "learning_rate": 0.00047588955615615705, "loss": 1.1735, "step": 1580 }, { "epoch": 0.28204442065828206, "grad_norm": 0.5397359728813171, "learning_rate": 0.0004758595280281707, "loss": 1.1707, "step": 1581 }, { "epoch": 0.2822228168762822, "grad_norm": 0.5633308291435242, "learning_rate": 0.0004758294821612742, "loss": 1.1632, "step": 1582 }, { "epoch": 0.2824012130942824, "grad_norm": 1.6372519731521606, "learning_rate": 0.00047579941855782745, "loss": 0.9778, "step": 1583 }, { "epoch": 0.2825796093122826, "grad_norm": 0.790706217288971, "learning_rate": 0.00047576933722019146, "loss": 1.2109, "step": 1584 }, { "epoch": 0.28275800553028274, "grad_norm": 0.5796968340873718, "learning_rate": 0.0004757392381507289, "loss": 1.3131, "step": 1585 }, { "epoch": 0.28293640174828294, "grad_norm": 0.7055728435516357, "learning_rate": 0.0004757091213518037, "loss": 1.1037, "step": 1586 }, { "epoch": 0.28311479796628314, "grad_norm": 1.4809973239898682, "learning_rate": 0.00047567898682578124, "loss": 1.1257, "step": 1587 }, { "epoch": 0.2832931941842833, "grad_norm": 0.6054453253746033, "learning_rate": 0.00047564883457502835, "loss": 1.0544, "step": 1588 }, { "epoch": 0.2834715904022835, "grad_norm": 6.443731307983398, "learning_rate": 0.0004756186646019131, "loss": 0.8428, "step": 1589 }, { "epoch": 0.2836499866202836, "grad_norm": 9.057873725891113, "learning_rate": 0.0004755884769088049, "loss": 1.5444, "step": 1590 }, { "epoch": 0.2838283828382838, "grad_norm": 0.7593368291854858, "learning_rate": 0.00047555827149807484, "loss": 1.1721, "step": 1591 }, { "epoch": 0.284006779056284, "grad_norm": 0.6235383749008179, "learning_rate": 0.00047552804837209525, "loss": 1.3072, "step": 1592 }, { "epoch": 0.28418517527428416, "grad_norm": 0.9655506610870361, "learning_rate": 0.0004754978075332398, "loss": 1.1129, "step": 1593 }, { "epoch": 0.28436357149228436, "grad_norm": 0.7398672699928284, "learning_rate": 0.0004754675489838835, "loss": 0.8643, "step": 1594 }, { "epoch": 0.28454196771028456, "grad_norm": 0.6276271343231201, "learning_rate": 0.000475437272726403, "loss": 1.1541, "step": 1595 }, { "epoch": 0.2847203639282847, "grad_norm": 0.6548689603805542, "learning_rate": 0.0004754069787631761, "loss": 1.024, "step": 1596 }, { "epoch": 0.2848987601462849, "grad_norm": 0.7249738574028015, "learning_rate": 0.0004753766670965821, "loss": 1.0359, "step": 1597 }, { "epoch": 0.2850771563642851, "grad_norm": 0.6566236615180969, "learning_rate": 0.0004753463377290016, "loss": 0.8981, "step": 1598 }, { "epoch": 0.28525555258228524, "grad_norm": 0.7142555117607117, "learning_rate": 0.0004753159906628167, "loss": 1.2243, "step": 1599 }, { "epoch": 0.28543394880028544, "grad_norm": 0.5189533233642578, "learning_rate": 0.00047528562590041097, "loss": 1.1284, "step": 1600 }, { "epoch": 0.2856123450182856, "grad_norm": 0.619817316532135, "learning_rate": 0.000475255243444169, "loss": 1.0063, "step": 1601 }, { "epoch": 0.2857907412362858, "grad_norm": 3.838261127471924, "learning_rate": 0.00047522484329647725, "loss": 1.0014, "step": 1602 }, { "epoch": 0.285969137454286, "grad_norm": 0.6709682941436768, "learning_rate": 0.0004751944254597232, "loss": 1.1519, "step": 1603 }, { "epoch": 0.2861475336722861, "grad_norm": 0.6216676235198975, "learning_rate": 0.0004751639899362958, "loss": 1.17, "step": 1604 }, { "epoch": 0.2863259298902863, "grad_norm": 2.6110217571258545, "learning_rate": 0.00047513353672858565, "loss": 1.3296, "step": 1605 }, { "epoch": 0.2865043261082865, "grad_norm": 0.6154805421829224, "learning_rate": 0.0004751030658389843, "loss": 1.4798, "step": 1606 }, { "epoch": 0.28668272232628667, "grad_norm": 0.6929340958595276, "learning_rate": 0.00047507257726988515, "loss": 0.8626, "step": 1607 }, { "epoch": 0.28686111854428686, "grad_norm": 0.5480323433876038, "learning_rate": 0.00047504207102368255, "loss": 1.0958, "step": 1608 }, { "epoch": 0.28703951476228706, "grad_norm": 0.6278098821640015, "learning_rate": 0.00047501154710277255, "loss": 1.4003, "step": 1609 }, { "epoch": 0.2872179109802872, "grad_norm": 0.5721479058265686, "learning_rate": 0.0004749810055095525, "loss": 0.9552, "step": 1610 }, { "epoch": 0.2873963071982874, "grad_norm": 0.5661513209342957, "learning_rate": 0.000474950446246421, "loss": 1.3078, "step": 1611 }, { "epoch": 0.28757470341628755, "grad_norm": 0.508573055267334, "learning_rate": 0.00047491986931577835, "loss": 1.1201, "step": 1612 }, { "epoch": 0.28775309963428775, "grad_norm": 0.5927449464797974, "learning_rate": 0.00047488927472002596, "loss": 1.26, "step": 1613 }, { "epoch": 0.28793149585228794, "grad_norm": 0.5334576964378357, "learning_rate": 0.00047485866246156665, "loss": 1.313, "step": 1614 }, { "epoch": 0.2881098920702881, "grad_norm": 0.5566635131835938, "learning_rate": 0.0004748280325428048, "loss": 1.4478, "step": 1615 }, { "epoch": 0.2882882882882883, "grad_norm": 0.47653868794441223, "learning_rate": 0.000474797384966146, "loss": 1.0392, "step": 1616 }, { "epoch": 0.2884666845062885, "grad_norm": 0.6128924489021301, "learning_rate": 0.0004747667197339974, "loss": 1.2961, "step": 1617 }, { "epoch": 0.2886450807242886, "grad_norm": 0.4769141376018524, "learning_rate": 0.0004747360368487672, "loss": 0.9468, "step": 1618 }, { "epoch": 0.2888234769422888, "grad_norm": 0.5372063517570496, "learning_rate": 0.0004747053363128655, "loss": 1.1281, "step": 1619 }, { "epoch": 0.289001873160289, "grad_norm": 0.499072402715683, "learning_rate": 0.0004746746181287034, "loss": 1.094, "step": 1620 }, { "epoch": 0.28918026937828917, "grad_norm": 0.6395072937011719, "learning_rate": 0.0004746438822986934, "loss": 1.3721, "step": 1621 }, { "epoch": 0.28935866559628937, "grad_norm": 0.5611110925674438, "learning_rate": 0.00047461312882524954, "loss": 1.1692, "step": 1622 }, { "epoch": 0.28953706181428956, "grad_norm": 0.6090511679649353, "learning_rate": 0.0004745823577107873, "loss": 1.175, "step": 1623 }, { "epoch": 0.2897154580322897, "grad_norm": 0.5791161060333252, "learning_rate": 0.0004745515689577233, "loss": 1.2759, "step": 1624 }, { "epoch": 0.2898938542502899, "grad_norm": 0.5539647936820984, "learning_rate": 0.0004745207625684756, "loss": 1.1852, "step": 1625 }, { "epoch": 0.29007225046829005, "grad_norm": 1.770546317100525, "learning_rate": 0.000474489938545464, "loss": 1.0354, "step": 1626 }, { "epoch": 0.29025064668629025, "grad_norm": 0.640678882598877, "learning_rate": 0.0004744590968911091, "loss": 1.2128, "step": 1627 }, { "epoch": 0.29042904290429045, "grad_norm": 0.6019294261932373, "learning_rate": 0.00047442823760783336, "loss": 1.3319, "step": 1628 }, { "epoch": 0.2906074391222906, "grad_norm": 0.6891437768936157, "learning_rate": 0.0004743973606980604, "loss": 0.8761, "step": 1629 }, { "epoch": 0.2907858353402908, "grad_norm": 0.5156970620155334, "learning_rate": 0.0004743664661642153, "loss": 1.139, "step": 1630 }, { "epoch": 0.290964231558291, "grad_norm": 0.5733403563499451, "learning_rate": 0.0004743355540087245, "loss": 1.3032, "step": 1631 }, { "epoch": 0.29114262777629113, "grad_norm": 0.7720729112625122, "learning_rate": 0.00047430462423401587, "loss": 1.1981, "step": 1632 }, { "epoch": 0.29132102399429133, "grad_norm": 0.4814129173755646, "learning_rate": 0.00047427367684251855, "loss": 0.7758, "step": 1633 }, { "epoch": 0.2914994202122915, "grad_norm": 0.6132440567016602, "learning_rate": 0.0004742427118366632, "loss": 1.4416, "step": 1634 }, { "epoch": 0.29167781643029167, "grad_norm": 0.5137991309165955, "learning_rate": 0.0004742117292188817, "loss": 0.913, "step": 1635 }, { "epoch": 0.29185621264829187, "grad_norm": 0.561093270778656, "learning_rate": 0.0004741807289916075, "loss": 1.0205, "step": 1636 }, { "epoch": 0.292034608866292, "grad_norm": 0.5749049782752991, "learning_rate": 0.00047414971115727536, "loss": 1.1973, "step": 1637 }, { "epoch": 0.2922130050842922, "grad_norm": 0.5146419405937195, "learning_rate": 0.00047411867571832135, "loss": 1.0004, "step": 1638 }, { "epoch": 0.2923914013022924, "grad_norm": 0.6099346280097961, "learning_rate": 0.00047408762267718297, "loss": 1.1748, "step": 1639 }, { "epoch": 0.29256979752029255, "grad_norm": 0.5745267271995544, "learning_rate": 0.0004740565520362991, "loss": 1.1932, "step": 1640 }, { "epoch": 0.29274819373829275, "grad_norm": 0.5441005229949951, "learning_rate": 0.0004740254637981101, "loss": 1.2205, "step": 1641 }, { "epoch": 0.29292658995629295, "grad_norm": 0.5015900135040283, "learning_rate": 0.00047399435796505754, "loss": 0.8114, "step": 1642 }, { "epoch": 0.2931049861742931, "grad_norm": 0.6179179549217224, "learning_rate": 0.0004739632345395846, "loss": 1.1337, "step": 1643 }, { "epoch": 0.2932833823922933, "grad_norm": 0.5103684663772583, "learning_rate": 0.0004739320935241355, "loss": 0.8642, "step": 1644 }, { "epoch": 0.2934617786102935, "grad_norm": 0.68010014295578, "learning_rate": 0.0004739009349211561, "loss": 1.284, "step": 1645 }, { "epoch": 0.29364017482829363, "grad_norm": 0.5158190131187439, "learning_rate": 0.0004738697587330937, "loss": 1.1043, "step": 1646 }, { "epoch": 0.29381857104629383, "grad_norm": 0.5020380616188049, "learning_rate": 0.00047383856496239677, "loss": 0.9218, "step": 1647 }, { "epoch": 0.293996967264294, "grad_norm": 0.5497924089431763, "learning_rate": 0.00047380735361151526, "loss": 1.3251, "step": 1648 }, { "epoch": 0.29417536348229417, "grad_norm": 0.5309639573097229, "learning_rate": 0.00047377612468290053, "loss": 1.0265, "step": 1649 }, { "epoch": 0.29435375970029437, "grad_norm": 0.528611958026886, "learning_rate": 0.0004737448781790052, "loss": 1.1084, "step": 1650 }, { "epoch": 0.2945321559182945, "grad_norm": 0.7254324555397034, "learning_rate": 0.0004737136141022836, "loss": 1.2578, "step": 1651 }, { "epoch": 0.2947105521362947, "grad_norm": 0.6030469536781311, "learning_rate": 0.0004736823324551909, "loss": 1.0314, "step": 1652 }, { "epoch": 0.2948889483542949, "grad_norm": 0.5361880660057068, "learning_rate": 0.00047365103324018405, "loss": 1.1275, "step": 1653 }, { "epoch": 0.29506734457229505, "grad_norm": 0.5910381078720093, "learning_rate": 0.00047361971645972135, "loss": 0.9726, "step": 1654 }, { "epoch": 0.29524574079029525, "grad_norm": 0.6346861124038696, "learning_rate": 0.00047358838211626234, "loss": 1.0678, "step": 1655 }, { "epoch": 0.29542413700829545, "grad_norm": 2.6803693771362305, "learning_rate": 0.0004735570302122679, "loss": 1.2372, "step": 1656 }, { "epoch": 0.2956025332262956, "grad_norm": 0.571451723575592, "learning_rate": 0.0004735256607502006, "loss": 1.1094, "step": 1657 }, { "epoch": 0.2957809294442958, "grad_norm": 0.5796542167663574, "learning_rate": 0.0004734942737325242, "loss": 1.05, "step": 1658 }, { "epoch": 0.29595932566229594, "grad_norm": 0.6353934407234192, "learning_rate": 0.00047346286916170356, "loss": 1.1633, "step": 1659 }, { "epoch": 0.29613772188029613, "grad_norm": 0.7311161160469055, "learning_rate": 0.00047343144704020543, "loss": 1.1351, "step": 1660 }, { "epoch": 0.29631611809829633, "grad_norm": 0.5946211218833923, "learning_rate": 0.00047340000737049756, "loss": 1.0721, "step": 1661 }, { "epoch": 0.2964945143162965, "grad_norm": 0.6070974469184875, "learning_rate": 0.00047336855015504923, "loss": 1.0058, "step": 1662 }, { "epoch": 0.2966729105342967, "grad_norm": 0.5165389180183411, "learning_rate": 0.0004733370753963311, "loss": 0.9556, "step": 1663 }, { "epoch": 0.2968513067522969, "grad_norm": 0.7445652484893799, "learning_rate": 0.0004733055830968152, "loss": 0.9277, "step": 1664 }, { "epoch": 0.297029702970297, "grad_norm": 0.520530641078949, "learning_rate": 0.0004732740732589749, "loss": 1.0856, "step": 1665 }, { "epoch": 0.2972080991882972, "grad_norm": 0.6134181618690491, "learning_rate": 0.00047324254588528497, "loss": 1.3027, "step": 1666 }, { "epoch": 0.2973864954062974, "grad_norm": 0.5679133534431458, "learning_rate": 0.00047321100097822154, "loss": 1.1749, "step": 1667 }, { "epoch": 0.29756489162429756, "grad_norm": 0.5943863391876221, "learning_rate": 0.0004731794385402621, "loss": 1.2488, "step": 1668 }, { "epoch": 0.29774328784229775, "grad_norm": 0.5449469089508057, "learning_rate": 0.00047314785857388575, "loss": 1.1212, "step": 1669 }, { "epoch": 0.2979216840602979, "grad_norm": 0.882116973400116, "learning_rate": 0.0004731162610815725, "loss": 1.2176, "step": 1670 }, { "epoch": 0.2981000802782981, "grad_norm": 0.6479673385620117, "learning_rate": 0.0004730846460658041, "loss": 1.3039, "step": 1671 }, { "epoch": 0.2982784764962983, "grad_norm": 0.5808786749839783, "learning_rate": 0.0004730530135290637, "loss": 1.0677, "step": 1672 }, { "epoch": 0.29845687271429844, "grad_norm": 0.527851402759552, "learning_rate": 0.0004730213634738355, "loss": 1.0218, "step": 1673 }, { "epoch": 0.29863526893229864, "grad_norm": 0.8989919424057007, "learning_rate": 0.00047298969590260545, "loss": 1.0192, "step": 1674 }, { "epoch": 0.29881366515029884, "grad_norm": 0.5305653214454651, "learning_rate": 0.0004729580108178606, "loss": 1.0954, "step": 1675 }, { "epoch": 0.298992061368299, "grad_norm": 0.6758263111114502, "learning_rate": 0.0004729263082220896, "loss": 1.1321, "step": 1676 }, { "epoch": 0.2991704575862992, "grad_norm": 0.7814352512359619, "learning_rate": 0.0004728945881177822, "loss": 0.8791, "step": 1677 }, { "epoch": 0.2993488538042994, "grad_norm": 1.4971033334732056, "learning_rate": 0.00047286285050742984, "loss": 1.2261, "step": 1678 }, { "epoch": 0.2995272500222995, "grad_norm": 0.6250385046005249, "learning_rate": 0.0004728310953935251, "loss": 1.0965, "step": 1679 }, { "epoch": 0.2997056462402997, "grad_norm": 0.5605043172836304, "learning_rate": 0.00047279932277856195, "loss": 1.2065, "step": 1680 }, { "epoch": 0.29988404245829986, "grad_norm": 0.5820797085762024, "learning_rate": 0.0004727675326650359, "loss": 0.892, "step": 1681 }, { "epoch": 0.30006243867630006, "grad_norm": 0.626677930355072, "learning_rate": 0.0004727357250554437, "loss": 1.2638, "step": 1682 }, { "epoch": 0.30024083489430026, "grad_norm": 0.5350278615951538, "learning_rate": 0.00047270389995228353, "loss": 1.2105, "step": 1683 }, { "epoch": 0.3004192311123004, "grad_norm": 0.6105818748474121, "learning_rate": 0.0004726720573580549, "loss": 1.0251, "step": 1684 }, { "epoch": 0.3005976273303006, "grad_norm": 0.6602348685264587, "learning_rate": 0.00047264019727525866, "loss": 1.3229, "step": 1685 }, { "epoch": 0.3007760235483008, "grad_norm": 0.5458388924598694, "learning_rate": 0.00047260831970639716, "loss": 1.2786, "step": 1686 }, { "epoch": 0.30095441976630094, "grad_norm": 0.6178166270256042, "learning_rate": 0.000472576424653974, "loss": 1.1324, "step": 1687 }, { "epoch": 0.30113281598430114, "grad_norm": 0.6096773147583008, "learning_rate": 0.0004725445121204943, "loss": 1.0911, "step": 1688 }, { "epoch": 0.30131121220230134, "grad_norm": 0.6458967924118042, "learning_rate": 0.0004725125821084643, "loss": 1.0113, "step": 1689 }, { "epoch": 0.3014896084203015, "grad_norm": 0.6352248191833496, "learning_rate": 0.00047248063462039194, "loss": 1.206, "step": 1690 }, { "epoch": 0.3016680046383017, "grad_norm": 0.7942283749580383, "learning_rate": 0.0004724486696587862, "loss": 1.3854, "step": 1691 }, { "epoch": 0.3018464008563018, "grad_norm": 0.6200905442237854, "learning_rate": 0.00047241668722615773, "loss": 1.0581, "step": 1692 }, { "epoch": 0.302024797074302, "grad_norm": 0.6159539222717285, "learning_rate": 0.0004723846873250183, "loss": 1.15, "step": 1693 }, { "epoch": 0.3022031932923022, "grad_norm": 0.5503310561180115, "learning_rate": 0.00047235266995788127, "loss": 1.0242, "step": 1694 }, { "epoch": 0.30238158951030236, "grad_norm": 0.8836873173713684, "learning_rate": 0.00047232063512726125, "loss": 1.2961, "step": 1695 }, { "epoch": 0.30255998572830256, "grad_norm": 0.6245954036712646, "learning_rate": 0.0004722885828356742, "loss": 1.2354, "step": 1696 }, { "epoch": 0.30273838194630276, "grad_norm": 1.8814623355865479, "learning_rate": 0.0004722565130856375, "loss": 0.9584, "step": 1697 }, { "epoch": 0.3029167781643029, "grad_norm": 0.6291859149932861, "learning_rate": 0.0004722244258796699, "loss": 1.3523, "step": 1698 }, { "epoch": 0.3030951743823031, "grad_norm": 0.52773517370224, "learning_rate": 0.00047219232122029154, "loss": 1.1431, "step": 1699 }, { "epoch": 0.3032735706003033, "grad_norm": 0.6015790104866028, "learning_rate": 0.0004721601991100239, "loss": 1.2411, "step": 1700 }, { "epoch": 0.30345196681830344, "grad_norm": 0.5504578351974487, "learning_rate": 0.0004721280595513898, "loss": 1.1042, "step": 1701 }, { "epoch": 0.30363036303630364, "grad_norm": 0.5806344747543335, "learning_rate": 0.00047209590254691347, "loss": 1.4875, "step": 1702 }, { "epoch": 0.3038087592543038, "grad_norm": 0.6146537065505981, "learning_rate": 0.0004720637280991206, "loss": 1.3344, "step": 1703 }, { "epoch": 0.303987155472304, "grad_norm": 0.5845167636871338, "learning_rate": 0.00047203153621053803, "loss": 1.3061, "step": 1704 }, { "epoch": 0.3041655516903042, "grad_norm": 0.555408775806427, "learning_rate": 0.0004719993268836942, "loss": 0.9783, "step": 1705 }, { "epoch": 0.3043439479083043, "grad_norm": 0.7523461580276489, "learning_rate": 0.00047196710012111865, "loss": 1.0337, "step": 1706 }, { "epoch": 0.3045223441263045, "grad_norm": 0.5478354096412659, "learning_rate": 0.00047193485592534267, "loss": 1.1427, "step": 1707 }, { "epoch": 0.3047007403443047, "grad_norm": 0.5235744118690491, "learning_rate": 0.0004719025942988986, "loss": 1.1299, "step": 1708 }, { "epoch": 0.30487913656230486, "grad_norm": 0.5737703442573547, "learning_rate": 0.00047187031524432033, "loss": 1.259, "step": 1709 }, { "epoch": 0.30505753278030506, "grad_norm": 0.515281617641449, "learning_rate": 0.0004718380187641429, "loss": 1.0624, "step": 1710 }, { "epoch": 0.30523592899830526, "grad_norm": 0.4825386703014374, "learning_rate": 0.000471805704860903, "loss": 0.9467, "step": 1711 }, { "epoch": 0.3054143252163054, "grad_norm": 0.73399418592453, "learning_rate": 0.00047177337353713843, "loss": 1.2483, "step": 1712 }, { "epoch": 0.3055927214343056, "grad_norm": 0.5785844922065735, "learning_rate": 0.00047174102479538853, "loss": 1.1172, "step": 1713 }, { "epoch": 0.30577111765230575, "grad_norm": 0.559877336025238, "learning_rate": 0.000471708658638194, "loss": 1.1775, "step": 1714 }, { "epoch": 0.30594951387030594, "grad_norm": 0.5484994649887085, "learning_rate": 0.00047167627506809686, "loss": 1.0763, "step": 1715 }, { "epoch": 0.30612791008830614, "grad_norm": 0.6489263772964478, "learning_rate": 0.0004716438740876404, "loss": 1.0646, "step": 1716 }, { "epoch": 0.3063063063063063, "grad_norm": 0.5778820514678955, "learning_rate": 0.0004716114556993695, "loss": 1.0459, "step": 1717 }, { "epoch": 0.3064847025243065, "grad_norm": 2.8568458557128906, "learning_rate": 0.00047157901990583026, "loss": 0.9765, "step": 1718 }, { "epoch": 0.3066630987423067, "grad_norm": 0.6178241968154907, "learning_rate": 0.0004715465667095701, "loss": 1.2187, "step": 1719 }, { "epoch": 0.3068414949603068, "grad_norm": 0.5349613428115845, "learning_rate": 0.0004715140961131379, "loss": 1.063, "step": 1720 }, { "epoch": 0.307019891178307, "grad_norm": 0.556450366973877, "learning_rate": 0.00047148160811908395, "loss": 1.0956, "step": 1721 }, { "epoch": 0.3071982873963072, "grad_norm": 0.4912685453891754, "learning_rate": 0.0004714491027299599, "loss": 0.8442, "step": 1722 }, { "epoch": 0.30737668361430737, "grad_norm": 0.5774837136268616, "learning_rate": 0.0004714165799483185, "loss": 1.2867, "step": 1723 }, { "epoch": 0.30755507983230757, "grad_norm": 0.5329927206039429, "learning_rate": 0.0004713840397767142, "loss": 0.8711, "step": 1724 }, { "epoch": 0.3077334760503077, "grad_norm": 0.5508580803871155, "learning_rate": 0.00047135148221770273, "loss": 1.1184, "step": 1725 }, { "epoch": 0.3079118722683079, "grad_norm": 0.6747270822525024, "learning_rate": 0.0004713189072738411, "loss": 1.3457, "step": 1726 }, { "epoch": 0.3080902684863081, "grad_norm": 0.5561022162437439, "learning_rate": 0.0004712863149476877, "loss": 1.0277, "step": 1727 }, { "epoch": 0.30826866470430825, "grad_norm": 0.6054466962814331, "learning_rate": 0.0004712537052418024, "loss": 1.3957, "step": 1728 }, { "epoch": 0.30844706092230845, "grad_norm": 84.19719696044922, "learning_rate": 0.00047122107815874626, "loss": 1.2228, "step": 1729 }, { "epoch": 0.30862545714030865, "grad_norm": 0.6073416471481323, "learning_rate": 0.00047118843370108187, "loss": 1.0421, "step": 1730 }, { "epoch": 0.3088038533583088, "grad_norm": 0.7347621917724609, "learning_rate": 0.00047115577187137304, "loss": 1.2824, "step": 1731 }, { "epoch": 0.308982249576309, "grad_norm": 0.5585038661956787, "learning_rate": 0.00047112309267218513, "loss": 1.0725, "step": 1732 }, { "epoch": 0.3091606457943092, "grad_norm": 0.5505072474479675, "learning_rate": 0.00047109039610608464, "loss": 1.1923, "step": 1733 }, { "epoch": 0.30933904201230933, "grad_norm": 2.2346675395965576, "learning_rate": 0.0004710576821756395, "loss": 1.3058, "step": 1734 }, { "epoch": 0.3095174382303095, "grad_norm": 0.5852141976356506, "learning_rate": 0.0004710249508834192, "loss": 1.3095, "step": 1735 }, { "epoch": 0.30969583444830967, "grad_norm": 3.471550464630127, "learning_rate": 0.00047099220223199444, "loss": 1.2149, "step": 1736 }, { "epoch": 0.30987423066630987, "grad_norm": 4.984081268310547, "learning_rate": 0.0004709594362239371, "loss": 1.286, "step": 1737 }, { "epoch": 0.31005262688431007, "grad_norm": 0.8435764908790588, "learning_rate": 0.0004709266528618208, "loss": 0.8668, "step": 1738 }, { "epoch": 0.3102310231023102, "grad_norm": 0.6224226355552673, "learning_rate": 0.0004708938521482202, "loss": 1.2869, "step": 1739 }, { "epoch": 0.3104094193203104, "grad_norm": 0.569222092628479, "learning_rate": 0.00047086103408571157, "loss": 1.2534, "step": 1740 }, { "epoch": 0.3105878155383106, "grad_norm": 5.230443000793457, "learning_rate": 0.0004708281986768724, "loss": 1.1547, "step": 1741 }, { "epoch": 0.31076621175631075, "grad_norm": 0.726719319820404, "learning_rate": 0.00047079534592428144, "loss": 1.1936, "step": 1742 }, { "epoch": 0.31094460797431095, "grad_norm": 0.794158399105072, "learning_rate": 0.0004707624758305191, "loss": 1.0309, "step": 1743 }, { "epoch": 0.31112300419231115, "grad_norm": 0.580272376537323, "learning_rate": 0.0004707295883981668, "loss": 1.0352, "step": 1744 }, { "epoch": 0.3113014004103113, "grad_norm": 0.6194749474525452, "learning_rate": 0.00047069668362980776, "loss": 1.2419, "step": 1745 }, { "epoch": 0.3114797966283115, "grad_norm": 0.6249500513076782, "learning_rate": 0.0004706637615280261, "loss": 1.0573, "step": 1746 }, { "epoch": 0.31165819284631163, "grad_norm": 0.6039573550224304, "learning_rate": 0.0004706308220954075, "loss": 1.0936, "step": 1747 }, { "epoch": 0.31183658906431183, "grad_norm": 0.570054829120636, "learning_rate": 0.00047059786533453917, "loss": 0.9585, "step": 1748 }, { "epoch": 0.31201498528231203, "grad_norm": 0.5929118394851685, "learning_rate": 0.0004705648912480094, "loss": 0.9652, "step": 1749 }, { "epoch": 0.3121933815003122, "grad_norm": 0.6323100328445435, "learning_rate": 0.000470531899838408, "loss": 1.3066, "step": 1750 }, { "epoch": 0.31237177771831237, "grad_norm": 0.5521016120910645, "learning_rate": 0.00047049889110832604, "loss": 1.082, "step": 1751 }, { "epoch": 0.31255017393631257, "grad_norm": 0.5617358684539795, "learning_rate": 0.00047046586506035613, "loss": 1.2556, "step": 1752 }, { "epoch": 0.3127285701543127, "grad_norm": 1.5406019687652588, "learning_rate": 0.000470432821697092, "loss": 1.303, "step": 1753 }, { "epoch": 0.3129069663723129, "grad_norm": 0.5704224109649658, "learning_rate": 0.0004703997610211289, "loss": 1.0045, "step": 1754 }, { "epoch": 0.3130853625903131, "grad_norm": 0.5109691023826599, "learning_rate": 0.00047036668303506346, "loss": 0.9764, "step": 1755 }, { "epoch": 0.31326375880831325, "grad_norm": 0.5933413505554199, "learning_rate": 0.0004703335877414936, "loss": 1.1017, "step": 1756 }, { "epoch": 0.31344215502631345, "grad_norm": 0.6716773509979248, "learning_rate": 0.0004703004751430185, "loss": 1.3598, "step": 1757 }, { "epoch": 0.3136205512443136, "grad_norm": 0.5351828932762146, "learning_rate": 0.0004702673452422389, "loss": 0.9856, "step": 1758 }, { "epoch": 0.3137989474623138, "grad_norm": 0.6115684509277344, "learning_rate": 0.0004702341980417568, "loss": 1.1048, "step": 1759 }, { "epoch": 0.313977343680314, "grad_norm": 0.5649376511573792, "learning_rate": 0.00047020103354417553, "loss": 1.3311, "step": 1760 }, { "epoch": 0.31415573989831413, "grad_norm": 0.5363413095474243, "learning_rate": 0.0004701678517520999, "loss": 1.1215, "step": 1761 }, { "epoch": 0.31433413611631433, "grad_norm": 0.5385435223579407, "learning_rate": 0.00047013465266813594, "loss": 1.0061, "step": 1762 }, { "epoch": 0.31451253233431453, "grad_norm": 0.6086006760597229, "learning_rate": 0.0004701014362948911, "loss": 1.2379, "step": 1763 }, { "epoch": 0.3146909285523147, "grad_norm": 0.49953147768974304, "learning_rate": 0.0004700682026349741, "loss": 1.0193, "step": 1764 }, { "epoch": 0.3148693247703149, "grad_norm": 0.5452679395675659, "learning_rate": 0.0004700349516909952, "loss": 1.2415, "step": 1765 }, { "epoch": 0.31504772098831507, "grad_norm": 0.5797132253646851, "learning_rate": 0.00047000168346556594, "loss": 1.3095, "step": 1766 }, { "epoch": 0.3152261172063152, "grad_norm": 0.586613655090332, "learning_rate": 0.00046996839796129903, "loss": 1.0955, "step": 1767 }, { "epoch": 0.3154045134243154, "grad_norm": 0.5819451808929443, "learning_rate": 0.0004699350951808089, "loss": 1.2054, "step": 1768 }, { "epoch": 0.31558290964231556, "grad_norm": 0.6394988298416138, "learning_rate": 0.00046990177512671097, "loss": 1.3011, "step": 1769 }, { "epoch": 0.31576130586031576, "grad_norm": 0.5528479218482971, "learning_rate": 0.00046986843780162223, "loss": 1.3098, "step": 1770 }, { "epoch": 0.31593970207831595, "grad_norm": 0.5443571209907532, "learning_rate": 0.0004698350832081611, "loss": 1.3117, "step": 1771 }, { "epoch": 0.3161180982963161, "grad_norm": 0.4671507477760315, "learning_rate": 0.00046980171134894714, "loss": 1.0019, "step": 1772 }, { "epoch": 0.3162964945143163, "grad_norm": 0.5098404884338379, "learning_rate": 0.00046976832222660127, "loss": 0.9793, "step": 1773 }, { "epoch": 0.3164748907323165, "grad_norm": 0.7493408918380737, "learning_rate": 0.000469734915843746, "loss": 1.1057, "step": 1774 }, { "epoch": 0.31665328695031664, "grad_norm": 0.5108188986778259, "learning_rate": 0.00046970149220300496, "loss": 1.0821, "step": 1775 }, { "epoch": 0.31683168316831684, "grad_norm": 0.4858883321285248, "learning_rate": 0.0004696680513070033, "loss": 0.9252, "step": 1776 }, { "epoch": 0.31701007938631703, "grad_norm": 0.5160908102989197, "learning_rate": 0.00046963459315836744, "loss": 1.2089, "step": 1777 }, { "epoch": 0.3171884756043172, "grad_norm": 0.5474086403846741, "learning_rate": 0.0004696011177597251, "loss": 0.9082, "step": 1778 }, { "epoch": 0.3173668718223174, "grad_norm": 0.5507884621620178, "learning_rate": 0.0004695676251137055, "loss": 1.2827, "step": 1779 }, { "epoch": 0.3175452680403175, "grad_norm": 0.5097134113311768, "learning_rate": 0.0004695341152229391, "loss": 0.9908, "step": 1780 }, { "epoch": 0.3177236642583177, "grad_norm": 0.5379379391670227, "learning_rate": 0.00046950058809005775, "loss": 1.1212, "step": 1781 }, { "epoch": 0.3179020604763179, "grad_norm": 0.5245591998100281, "learning_rate": 0.00046946704371769466, "loss": 1.0469, "step": 1782 }, { "epoch": 0.31808045669431806, "grad_norm": 0.5721558928489685, "learning_rate": 0.0004694334821084845, "loss": 1.5169, "step": 1783 }, { "epoch": 0.31825885291231826, "grad_norm": 0.4801085889339447, "learning_rate": 0.00046939990326506296, "loss": 0.9409, "step": 1784 }, { "epoch": 0.31843724913031846, "grad_norm": 0.5646408796310425, "learning_rate": 0.0004693663071900675, "loss": 1.1511, "step": 1785 }, { "epoch": 0.3186156453483186, "grad_norm": 0.6158486008644104, "learning_rate": 0.00046933269388613663, "loss": 1.3416, "step": 1786 }, { "epoch": 0.3187940415663188, "grad_norm": 0.5548638701438904, "learning_rate": 0.0004692990633559104, "loss": 1.0945, "step": 1787 }, { "epoch": 0.318972437784319, "grad_norm": 0.5320894718170166, "learning_rate": 0.0004692654156020302, "loss": 1.1061, "step": 1788 }, { "epoch": 0.31915083400231914, "grad_norm": 0.5542237162590027, "learning_rate": 0.0004692317506271385, "loss": 1.2178, "step": 1789 }, { "epoch": 0.31932923022031934, "grad_norm": 0.49565958976745605, "learning_rate": 0.0004691980684338795, "loss": 0.9038, "step": 1790 }, { "epoch": 0.3195076264383195, "grad_norm": 0.4972078502178192, "learning_rate": 0.00046916436902489847, "loss": 1.0761, "step": 1791 }, { "epoch": 0.3196860226563197, "grad_norm": 0.5097312927246094, "learning_rate": 0.00046913065240284226, "loss": 0.9506, "step": 1792 }, { "epoch": 0.3198644188743199, "grad_norm": 0.5239170789718628, "learning_rate": 0.00046909691857035884, "loss": 1.1015, "step": 1793 }, { "epoch": 0.32004281509232, "grad_norm": 0.5465912818908691, "learning_rate": 0.0004690631675300978, "loss": 1.04, "step": 1794 }, { "epoch": 0.3202212113103202, "grad_norm": 0.5346068143844604, "learning_rate": 0.00046902939928470977, "loss": 1.0701, "step": 1795 }, { "epoch": 0.3203996075283204, "grad_norm": 0.5574382543563843, "learning_rate": 0.00046899561383684707, "loss": 1.3175, "step": 1796 }, { "epoch": 0.32057800374632056, "grad_norm": 0.5112574100494385, "learning_rate": 0.00046896181118916304, "loss": 0.9372, "step": 1797 }, { "epoch": 0.32075639996432076, "grad_norm": 0.5132248997688293, "learning_rate": 0.00046892799134431263, "loss": 1.0925, "step": 1798 }, { "epoch": 0.32093479618232096, "grad_norm": 0.5472792983055115, "learning_rate": 0.000468894154304952, "loss": 0.8669, "step": 1799 }, { "epoch": 0.3211131924003211, "grad_norm": 0.5460713505744934, "learning_rate": 0.0004688603000737386, "loss": 0.9067, "step": 1800 }, { "epoch": 0.3212915886183213, "grad_norm": 0.5631927847862244, "learning_rate": 0.00046882642865333146, "loss": 1.0512, "step": 1801 }, { "epoch": 0.32146998483632144, "grad_norm": 0.5665701627731323, "learning_rate": 0.00046879254004639085, "loss": 1.1049, "step": 1802 }, { "epoch": 0.32164838105432164, "grad_norm": 0.6018973588943481, "learning_rate": 0.00046875863425557823, "loss": 1.1779, "step": 1803 }, { "epoch": 0.32182677727232184, "grad_norm": 0.5643223524093628, "learning_rate": 0.00046872471128355664, "loss": 1.3245, "step": 1804 }, { "epoch": 0.322005173490322, "grad_norm": 0.5305078625679016, "learning_rate": 0.0004686907711329903, "loss": 1.1447, "step": 1805 }, { "epoch": 0.3221835697083222, "grad_norm": 0.5465999245643616, "learning_rate": 0.000468656813806545, "loss": 1.1083, "step": 1806 }, { "epoch": 0.3223619659263224, "grad_norm": 0.5518834590911865, "learning_rate": 0.0004686228393068875, "loss": 1.4034, "step": 1807 }, { "epoch": 0.3225403621443225, "grad_norm": 0.5887888669967651, "learning_rate": 0.0004685888476366864, "loss": 1.2044, "step": 1808 }, { "epoch": 0.3227187583623227, "grad_norm": 0.5048173069953918, "learning_rate": 0.0004685548387986113, "loss": 1.0136, "step": 1809 }, { "epoch": 0.3228971545803229, "grad_norm": 0.5357009172439575, "learning_rate": 0.0004685208127953331, "loss": 1.1239, "step": 1810 }, { "epoch": 0.32307555079832306, "grad_norm": 0.4944056272506714, "learning_rate": 0.00046848676962952434, "loss": 1.1098, "step": 1811 }, { "epoch": 0.32325394701632326, "grad_norm": 0.5399764776229858, "learning_rate": 0.00046845270930385876, "loss": 1.2677, "step": 1812 }, { "epoch": 0.3234323432343234, "grad_norm": 0.541485607624054, "learning_rate": 0.0004684186318210114, "loss": 0.9894, "step": 1813 }, { "epoch": 0.3236107394523236, "grad_norm": 0.4978035092353821, "learning_rate": 0.0004683845371836586, "loss": 1.0137, "step": 1814 }, { "epoch": 0.3237891356703238, "grad_norm": 0.560565710067749, "learning_rate": 0.0004683504253944783, "loss": 1.1744, "step": 1815 }, { "epoch": 0.32396753188832395, "grad_norm": 0.6290241479873657, "learning_rate": 0.00046831629645614954, "loss": 1.3022, "step": 1816 }, { "epoch": 0.32414592810632414, "grad_norm": 0.5639137029647827, "learning_rate": 0.0004682821503713528, "loss": 1.3467, "step": 1817 }, { "epoch": 0.32432432432432434, "grad_norm": 0.9618589282035828, "learning_rate": 0.0004682479871427699, "loss": 1.1614, "step": 1818 }, { "epoch": 0.3245027205423245, "grad_norm": 0.5054482817649841, "learning_rate": 0.000468213806773084, "loss": 1.0439, "step": 1819 }, { "epoch": 0.3246811167603247, "grad_norm": 0.604265570640564, "learning_rate": 0.00046817960926497966, "loss": 1.2124, "step": 1820 }, { "epoch": 0.3248595129783249, "grad_norm": 0.6163999438285828, "learning_rate": 0.00046814539462114267, "loss": 1.3959, "step": 1821 }, { "epoch": 0.325037909196325, "grad_norm": 0.5077233910560608, "learning_rate": 0.00046811116284426027, "loss": 0.9646, "step": 1822 }, { "epoch": 0.3252163054143252, "grad_norm": 1.0014747381210327, "learning_rate": 0.000468076913937021, "loss": 1.2883, "step": 1823 }, { "epoch": 0.32539470163232537, "grad_norm": 0.5301506519317627, "learning_rate": 0.0004680426479021147, "loss": 1.2094, "step": 1824 }, { "epoch": 0.32557309785032557, "grad_norm": 0.4948747456073761, "learning_rate": 0.00046800836474223274, "loss": 0.9282, "step": 1825 }, { "epoch": 0.32575149406832576, "grad_norm": 0.5325037837028503, "learning_rate": 0.0004679740644600676, "loss": 1.316, "step": 1826 }, { "epoch": 0.3259298902863259, "grad_norm": 1.2007477283477783, "learning_rate": 0.0004679397470583133, "loss": 1.1872, "step": 1827 }, { "epoch": 0.3261082865043261, "grad_norm": 0.5436804294586182, "learning_rate": 0.00046790541253966493, "loss": 1.32, "step": 1828 }, { "epoch": 0.3262866827223263, "grad_norm": 1.843799114227295, "learning_rate": 0.0004678710609068193, "loss": 1.0474, "step": 1829 }, { "epoch": 0.32646507894032645, "grad_norm": 0.5451673269271851, "learning_rate": 0.0004678366921624743, "loss": 0.938, "step": 1830 }, { "epoch": 0.32664347515832665, "grad_norm": 0.6391348242759705, "learning_rate": 0.0004678023063093293, "loss": 1.0066, "step": 1831 }, { "epoch": 0.32682187137632684, "grad_norm": 0.8246572613716125, "learning_rate": 0.0004677679033500848, "loss": 1.2115, "step": 1832 }, { "epoch": 0.327000267594327, "grad_norm": 0.5913776159286499, "learning_rate": 0.0004677334832874429, "loss": 1.0055, "step": 1833 }, { "epoch": 0.3271786638123272, "grad_norm": 0.7291193008422852, "learning_rate": 0.00046769904612410694, "loss": 1.2235, "step": 1834 }, { "epoch": 0.3273570600303274, "grad_norm": 0.5839000940322876, "learning_rate": 0.0004676645918627816, "loss": 1.0966, "step": 1835 }, { "epoch": 0.32753545624832753, "grad_norm": 0.6921905875205994, "learning_rate": 0.0004676301205061728, "loss": 1.0023, "step": 1836 }, { "epoch": 0.3277138524663277, "grad_norm": 0.5414957404136658, "learning_rate": 0.0004675956320569881, "loss": 1.0105, "step": 1837 }, { "epoch": 0.32789224868432787, "grad_norm": 0.6554968357086182, "learning_rate": 0.000467561126517936, "loss": 1.3459, "step": 1838 }, { "epoch": 0.32807064490232807, "grad_norm": 0.6246145367622375, "learning_rate": 0.00046752660389172673, "loss": 1.2114, "step": 1839 }, { "epoch": 0.32824904112032827, "grad_norm": 0.5389582514762878, "learning_rate": 0.0004674920641810716, "loss": 0.8482, "step": 1840 }, { "epoch": 0.3284274373383284, "grad_norm": 0.5214718580245972, "learning_rate": 0.0004674575073886833, "loss": 0.8899, "step": 1841 }, { "epoch": 0.3286058335563286, "grad_norm": 0.6277864575386047, "learning_rate": 0.00046742293351727596, "loss": 1.1186, "step": 1842 }, { "epoch": 0.3287842297743288, "grad_norm": 0.4919874668121338, "learning_rate": 0.00046738834256956495, "loss": 0.8872, "step": 1843 }, { "epoch": 0.32896262599232895, "grad_norm": 0.545361340045929, "learning_rate": 0.00046735373454826715, "loss": 1.0465, "step": 1844 }, { "epoch": 0.32914102221032915, "grad_norm": 1.7328273057937622, "learning_rate": 0.00046731910945610044, "loss": 1.1124, "step": 1845 }, { "epoch": 0.32931941842832935, "grad_norm": 0.6040815711021423, "learning_rate": 0.0004672844672957846, "loss": 1.1342, "step": 1846 }, { "epoch": 0.3294978146463295, "grad_norm": 0.4832199513912201, "learning_rate": 0.00046724980807004, "loss": 0.825, "step": 1847 }, { "epoch": 0.3296762108643297, "grad_norm": 0.7503164410591125, "learning_rate": 0.00046721513178158905, "loss": 0.9456, "step": 1848 }, { "epoch": 0.32985460708232983, "grad_norm": 0.5634304881095886, "learning_rate": 0.0004671804384331551, "loss": 1.1604, "step": 1849 }, { "epoch": 0.33003300330033003, "grad_norm": 0.5407414436340332, "learning_rate": 0.00046714572802746305, "loss": 1.0941, "step": 1850 }, { "epoch": 0.33021139951833023, "grad_norm": 0.5838576555252075, "learning_rate": 0.0004671110005672389, "loss": 1.3654, "step": 1851 }, { "epoch": 0.33038979573633037, "grad_norm": 0.5957556962966919, "learning_rate": 0.00046707625605521016, "loss": 1.153, "step": 1852 }, { "epoch": 0.33056819195433057, "grad_norm": 0.5474072098731995, "learning_rate": 0.0004670414944941057, "loss": 1.1032, "step": 1853 }, { "epoch": 0.33074658817233077, "grad_norm": 0.48801934719085693, "learning_rate": 0.00046700671588665574, "loss": 0.8788, "step": 1854 }, { "epoch": 0.3309249843903309, "grad_norm": 0.6753059029579163, "learning_rate": 0.00046697192023559167, "loss": 1.2628, "step": 1855 }, { "epoch": 0.3311033806083311, "grad_norm": 0.5072078108787537, "learning_rate": 0.00046693710754364625, "loss": 0.9892, "step": 1856 }, { "epoch": 0.3312817768263313, "grad_norm": 0.6533268690109253, "learning_rate": 0.0004669022778135539, "loss": 1.1148, "step": 1857 }, { "epoch": 0.33146017304433145, "grad_norm": 0.5230735540390015, "learning_rate": 0.0004668674310480499, "loss": 0.8552, "step": 1858 }, { "epoch": 0.33163856926233165, "grad_norm": 0.5872694253921509, "learning_rate": 0.00046683256724987124, "loss": 1.1711, "step": 1859 }, { "epoch": 0.3318169654803318, "grad_norm": 0.6011821627616882, "learning_rate": 0.000466797686421756, "loss": 1.1871, "step": 1860 }, { "epoch": 0.331995361698332, "grad_norm": 0.643999457359314, "learning_rate": 0.0004667627885664438, "loss": 1.0446, "step": 1861 }, { "epoch": 0.3321737579163322, "grad_norm": 0.5912252068519592, "learning_rate": 0.00046672787368667553, "loss": 1.1304, "step": 1862 }, { "epoch": 0.33235215413433233, "grad_norm": 0.7412044405937195, "learning_rate": 0.0004666929417851933, "loss": 1.3677, "step": 1863 }, { "epoch": 0.33253055035233253, "grad_norm": 0.5437285900115967, "learning_rate": 0.0004666579928647406, "loss": 1.039, "step": 1864 }, { "epoch": 0.33270894657033273, "grad_norm": 0.5716599822044373, "learning_rate": 0.0004666230269280625, "loss": 1.1614, "step": 1865 }, { "epoch": 0.3328873427883329, "grad_norm": 0.48691585659980774, "learning_rate": 0.00046658804397790496, "loss": 0.9387, "step": 1866 }, { "epoch": 0.3330657390063331, "grad_norm": 0.5562944412231445, "learning_rate": 0.00046655304401701565, "loss": 1.1568, "step": 1867 }, { "epoch": 0.33324413522433327, "grad_norm": 0.5540555119514465, "learning_rate": 0.0004665180270481435, "loss": 1.283, "step": 1868 }, { "epoch": 0.3334225314423334, "grad_norm": 0.44283947348594666, "learning_rate": 0.0004664829930740387, "loss": 0.9378, "step": 1869 }, { "epoch": 0.3336009276603336, "grad_norm": 0.7194570899009705, "learning_rate": 0.00046644794209745277, "loss": 1.3644, "step": 1870 }, { "epoch": 0.33377932387833376, "grad_norm": 0.5517498254776001, "learning_rate": 0.00046641287412113857, "loss": 1.2408, "step": 1871 }, { "epoch": 0.33395772009633395, "grad_norm": 0.504384458065033, "learning_rate": 0.00046637778914785044, "loss": 0.9018, "step": 1872 }, { "epoch": 0.33413611631433415, "grad_norm": 1.1955726146697998, "learning_rate": 0.0004663426871803438, "loss": 0.9939, "step": 1873 }, { "epoch": 0.3343145125323343, "grad_norm": 0.5495529174804688, "learning_rate": 0.00046630756822137567, "loss": 1.2006, "step": 1874 }, { "epoch": 0.3344929087503345, "grad_norm": 0.5532088875770569, "learning_rate": 0.00046627243227370413, "loss": 1.0194, "step": 1875 }, { "epoch": 0.3346713049683347, "grad_norm": 0.4987606406211853, "learning_rate": 0.00046623727934008886, "loss": 0.9675, "step": 1876 }, { "epoch": 0.33484970118633484, "grad_norm": 0.6175581216812134, "learning_rate": 0.0004662021094232908, "loss": 1.1119, "step": 1877 }, { "epoch": 0.33502809740433503, "grad_norm": 0.5381768345832825, "learning_rate": 0.00046616692252607204, "loss": 1.0028, "step": 1878 }, { "epoch": 0.33520649362233523, "grad_norm": 0.5099619030952454, "learning_rate": 0.00046613171865119617, "loss": 0.9536, "step": 1879 }, { "epoch": 0.3353848898403354, "grad_norm": 0.5081475377082825, "learning_rate": 0.0004660964978014282, "loss": 1.0749, "step": 1880 }, { "epoch": 0.3355632860583356, "grad_norm": 0.5307644009590149, "learning_rate": 0.00046606125997953425, "loss": 1.3082, "step": 1881 }, { "epoch": 0.3357416822763357, "grad_norm": 0.5692281126976013, "learning_rate": 0.0004660260051882819, "loss": 1.2991, "step": 1882 }, { "epoch": 0.3359200784943359, "grad_norm": 0.5353856682777405, "learning_rate": 0.00046599073343044014, "loss": 1.0892, "step": 1883 }, { "epoch": 0.3360984747123361, "grad_norm": 0.5255775451660156, "learning_rate": 0.000465955444708779, "loss": 1.2572, "step": 1884 }, { "epoch": 0.33627687093033626, "grad_norm": 0.5284912586212158, "learning_rate": 0.0004659201390260703, "loss": 1.1411, "step": 1885 }, { "epoch": 0.33645526714833646, "grad_norm": 0.5372970700263977, "learning_rate": 0.0004658848163850867, "loss": 0.9713, "step": 1886 }, { "epoch": 0.33663366336633666, "grad_norm": 0.4853118062019348, "learning_rate": 0.00046584947678860264, "loss": 0.839, "step": 1887 }, { "epoch": 0.3368120595843368, "grad_norm": 0.543500542640686, "learning_rate": 0.0004658141202393935, "loss": 1.2338, "step": 1888 }, { "epoch": 0.336990455802337, "grad_norm": 0.5506289005279541, "learning_rate": 0.00046577874674023624, "loss": 0.9966, "step": 1889 }, { "epoch": 0.3371688520203372, "grad_norm": 0.81171053647995, "learning_rate": 0.00046574335629390894, "loss": 0.9698, "step": 1890 }, { "epoch": 0.33734724823833734, "grad_norm": 0.5851067304611206, "learning_rate": 0.0004657079489031915, "loss": 1.2608, "step": 1891 }, { "epoch": 0.33752564445633754, "grad_norm": 0.5415493845939636, "learning_rate": 0.0004656725245708644, "loss": 1.2209, "step": 1892 }, { "epoch": 0.3377040406743377, "grad_norm": 1.5110948085784912, "learning_rate": 0.0004656370832997101, "loss": 1.1676, "step": 1893 }, { "epoch": 0.3378824368923379, "grad_norm": 0.4858531951904297, "learning_rate": 0.00046560162509251204, "loss": 0.8358, "step": 1894 }, { "epoch": 0.3380608331103381, "grad_norm": 0.5556557774543762, "learning_rate": 0.00046556614995205516, "loss": 1.1761, "step": 1895 }, { "epoch": 0.3382392293283382, "grad_norm": 0.5492879748344421, "learning_rate": 0.00046553065788112563, "loss": 1.0874, "step": 1896 }, { "epoch": 0.3384176255463384, "grad_norm": 0.6402779817581177, "learning_rate": 0.000465495148882511, "loss": 1.186, "step": 1897 }, { "epoch": 0.3385960217643386, "grad_norm": 0.5240576863288879, "learning_rate": 0.0004654596229590001, "loss": 1.0237, "step": 1898 }, { "epoch": 0.33877441798233876, "grad_norm": 0.5123072862625122, "learning_rate": 0.0004654240801133831, "loss": 1.2273, "step": 1899 }, { "epoch": 0.33895281420033896, "grad_norm": 0.5299893617630005, "learning_rate": 0.0004653885203484515, "loss": 1.009, "step": 1900 }, { "epoch": 0.33913121041833916, "grad_norm": 0.548248291015625, "learning_rate": 0.0004653529436669983, "loss": 1.0276, "step": 1901 }, { "epoch": 0.3393096066363393, "grad_norm": 0.5219201445579529, "learning_rate": 0.00046531735007181754, "loss": 1.1695, "step": 1902 }, { "epoch": 0.3394880028543395, "grad_norm": 0.4921947419643402, "learning_rate": 0.00046528173956570474, "loss": 0.9397, "step": 1903 }, { "epoch": 0.33966639907233964, "grad_norm": 0.6388809680938721, "learning_rate": 0.00046524611215145685, "loss": 1.3252, "step": 1904 }, { "epoch": 0.33984479529033984, "grad_norm": 0.5055054426193237, "learning_rate": 0.0004652104678318718, "loss": 1.0557, "step": 1905 }, { "epoch": 0.34002319150834004, "grad_norm": 0.5897945761680603, "learning_rate": 0.0004651748066097493, "loss": 1.3774, "step": 1906 }, { "epoch": 0.3402015877263402, "grad_norm": 0.4601035714149475, "learning_rate": 0.00046513912848789013, "loss": 0.8346, "step": 1907 }, { "epoch": 0.3403799839443404, "grad_norm": 0.515330970287323, "learning_rate": 0.00046510343346909636, "loss": 1.0378, "step": 1908 }, { "epoch": 0.3405583801623406, "grad_norm": 0.5221341252326965, "learning_rate": 0.0004650677215561714, "loss": 1.2198, "step": 1909 }, { "epoch": 0.3407367763803407, "grad_norm": 0.6407569050788879, "learning_rate": 0.00046503199275192025, "loss": 1.2767, "step": 1910 }, { "epoch": 0.3409151725983409, "grad_norm": 0.5762472748756409, "learning_rate": 0.0004649962470591489, "loss": 1.2465, "step": 1911 }, { "epoch": 0.3410935688163411, "grad_norm": 0.514964759349823, "learning_rate": 0.00046496048448066484, "loss": 1.1349, "step": 1912 }, { "epoch": 0.34127196503434126, "grad_norm": 0.482735276222229, "learning_rate": 0.00046492470501927676, "loss": 0.9251, "step": 1913 }, { "epoch": 0.34145036125234146, "grad_norm": 0.49272486567497253, "learning_rate": 0.0004648889086777949, "loss": 0.8477, "step": 1914 }, { "epoch": 0.3416287574703416, "grad_norm": 0.5680924654006958, "learning_rate": 0.00046485309545903064, "loss": 1.2822, "step": 1915 }, { "epoch": 0.3418071536883418, "grad_norm": 0.7367702722549438, "learning_rate": 0.00046481726536579674, "loss": 1.1015, "step": 1916 }, { "epoch": 0.341985549906342, "grad_norm": 0.8924338817596436, "learning_rate": 0.0004647814184009072, "loss": 1.113, "step": 1917 }, { "epoch": 0.34216394612434214, "grad_norm": 0.48184382915496826, "learning_rate": 0.00046474555456717747, "loss": 0.9971, "step": 1918 }, { "epoch": 0.34234234234234234, "grad_norm": 0.6294741034507751, "learning_rate": 0.0004647096738674243, "loss": 1.1922, "step": 1919 }, { "epoch": 0.34252073856034254, "grad_norm": 0.5504223108291626, "learning_rate": 0.0004646737763044658, "loss": 1.0714, "step": 1920 }, { "epoch": 0.3426991347783427, "grad_norm": 0.6702556610107422, "learning_rate": 0.0004646378618811212, "loss": 1.2097, "step": 1921 }, { "epoch": 0.3428775309963429, "grad_norm": 0.5340100526809692, "learning_rate": 0.0004646019306002114, "loss": 1.2057, "step": 1922 }, { "epoch": 0.3430559272143431, "grad_norm": 0.49807506799697876, "learning_rate": 0.0004645659824645583, "loss": 0.8643, "step": 1923 }, { "epoch": 0.3432343234323432, "grad_norm": 0.5424205660820007, "learning_rate": 0.0004645300174769852, "loss": 1.257, "step": 1924 }, { "epoch": 0.3434127196503434, "grad_norm": 0.9090192317962646, "learning_rate": 0.0004644940356403169, "loss": 1.1699, "step": 1925 }, { "epoch": 0.34359111586834357, "grad_norm": 0.6018331050872803, "learning_rate": 0.0004644580369573793, "loss": 1.1373, "step": 1926 }, { "epoch": 0.34376951208634376, "grad_norm": 0.5179253220558167, "learning_rate": 0.00046442202143099986, "loss": 0.8974, "step": 1927 }, { "epoch": 0.34394790830434396, "grad_norm": 0.5565230846405029, "learning_rate": 0.00046438598906400707, "loss": 1.0234, "step": 1928 }, { "epoch": 0.3441263045223441, "grad_norm": 0.6058740019798279, "learning_rate": 0.0004643499398592309, "loss": 1.2254, "step": 1929 }, { "epoch": 0.3443047007403443, "grad_norm": 0.6367807984352112, "learning_rate": 0.0004643138738195028, "loss": 0.9239, "step": 1930 }, { "epoch": 0.3444830969583445, "grad_norm": 0.49235963821411133, "learning_rate": 0.0004642777909476552, "loss": 1.0013, "step": 1931 }, { "epoch": 0.34466149317634465, "grad_norm": 0.5881189107894897, "learning_rate": 0.00046424169124652216, "loss": 1.2899, "step": 1932 }, { "epoch": 0.34483988939434485, "grad_norm": 0.5489835143089294, "learning_rate": 0.00046420557471893887, "loss": 0.8001, "step": 1933 }, { "epoch": 0.34501828561234504, "grad_norm": 0.5183538198471069, "learning_rate": 0.0004641694413677419, "loss": 0.9889, "step": 1934 }, { "epoch": 0.3451966818303452, "grad_norm": 0.493448942899704, "learning_rate": 0.0004641332911957692, "loss": 0.9633, "step": 1935 }, { "epoch": 0.3453750780483454, "grad_norm": 0.6520765423774719, "learning_rate": 0.0004640971242058599, "loss": 1.3168, "step": 1936 }, { "epoch": 0.34555347426634553, "grad_norm": 0.5311633944511414, "learning_rate": 0.00046406094040085465, "loss": 1.1453, "step": 1937 }, { "epoch": 0.3457318704843457, "grad_norm": 0.6454037427902222, "learning_rate": 0.0004640247397835953, "loss": 1.2844, "step": 1938 }, { "epoch": 0.3459102667023459, "grad_norm": 0.5690452456474304, "learning_rate": 0.00046398852235692494, "loss": 1.0391, "step": 1939 }, { "epoch": 0.34608866292034607, "grad_norm": 0.555842936038971, "learning_rate": 0.0004639522881236881, "loss": 1.3138, "step": 1940 }, { "epoch": 0.34626705913834627, "grad_norm": 0.5336434841156006, "learning_rate": 0.0004639160370867307, "loss": 1.127, "step": 1941 }, { "epoch": 0.34644545535634647, "grad_norm": 0.6537164449691772, "learning_rate": 0.00046387976924889976, "loss": 1.3808, "step": 1942 }, { "epoch": 0.3466238515743466, "grad_norm": 0.5720651149749756, "learning_rate": 0.00046384348461304373, "loss": 1.4115, "step": 1943 }, { "epoch": 0.3468022477923468, "grad_norm": 0.5567306280136108, "learning_rate": 0.00046380718318201247, "loss": 1.0522, "step": 1944 }, { "epoch": 0.346980644010347, "grad_norm": 0.658480167388916, "learning_rate": 0.00046377086495865707, "loss": 1.1822, "step": 1945 }, { "epoch": 0.34715904022834715, "grad_norm": 0.5562303066253662, "learning_rate": 0.00046373452994582994, "loss": 1.2199, "step": 1946 }, { "epoch": 0.34733743644634735, "grad_norm": 0.585731029510498, "learning_rate": 0.00046369817814638476, "loss": 1.0057, "step": 1947 }, { "epoch": 0.3475158326643475, "grad_norm": 0.5658949017524719, "learning_rate": 0.0004636618095631767, "loss": 1.1701, "step": 1948 }, { "epoch": 0.3476942288823477, "grad_norm": 1.204848051071167, "learning_rate": 0.00046362542419906195, "loss": 1.1882, "step": 1949 }, { "epoch": 0.3478726251003479, "grad_norm": 0.5047358274459839, "learning_rate": 0.0004635890220568984, "loss": 1.0479, "step": 1950 }, { "epoch": 0.34805102131834803, "grad_norm": 0.6019084453582764, "learning_rate": 0.0004635526031395449, "loss": 1.2855, "step": 1951 }, { "epoch": 0.34822941753634823, "grad_norm": 0.5449895858764648, "learning_rate": 0.00046351616744986194, "loss": 1.2736, "step": 1952 }, { "epoch": 0.3484078137543484, "grad_norm": 0.5767678022384644, "learning_rate": 0.00046347971499071104, "loss": 1.0872, "step": 1953 }, { "epoch": 0.34858620997234857, "grad_norm": 0.589439868927002, "learning_rate": 0.0004634432457649552, "loss": 1.1026, "step": 1954 }, { "epoch": 0.34876460619034877, "grad_norm": 0.534095823764801, "learning_rate": 0.00046340675977545867, "loss": 1.1419, "step": 1955 }, { "epoch": 0.34894300240834897, "grad_norm": 0.5593207478523254, "learning_rate": 0.00046337025702508704, "loss": 1.1605, "step": 1956 }, { "epoch": 0.3491213986263491, "grad_norm": 0.5366277694702148, "learning_rate": 0.00046333373751670734, "loss": 1.039, "step": 1957 }, { "epoch": 0.3492997948443493, "grad_norm": 0.5536123514175415, "learning_rate": 0.00046329720125318766, "loss": 1.1518, "step": 1958 }, { "epoch": 0.34947819106234945, "grad_norm": 0.4309159219264984, "learning_rate": 0.0004632606482373976, "loss": 0.8568, "step": 1959 }, { "epoch": 0.34965658728034965, "grad_norm": 0.7334818243980408, "learning_rate": 0.0004632240784722079, "loss": 1.2216, "step": 1960 }, { "epoch": 0.34983498349834985, "grad_norm": 0.5555823445320129, "learning_rate": 0.0004631874919604909, "loss": 1.1513, "step": 1961 }, { "epoch": 0.35001337971635, "grad_norm": 0.5582665205001831, "learning_rate": 0.00046315088870512003, "loss": 1.096, "step": 1962 }, { "epoch": 0.3501917759343502, "grad_norm": 0.5533860921859741, "learning_rate": 0.00046311426870897, "loss": 1.1806, "step": 1963 }, { "epoch": 0.3503701721523504, "grad_norm": 0.6270779371261597, "learning_rate": 0.00046307763197491717, "loss": 1.1347, "step": 1964 }, { "epoch": 0.35054856837035053, "grad_norm": 0.5797653794288635, "learning_rate": 0.0004630409785058387, "loss": 1.176, "step": 1965 }, { "epoch": 0.35072696458835073, "grad_norm": 0.5614036917686462, "learning_rate": 0.0004630043083046135, "loss": 1.2459, "step": 1966 }, { "epoch": 0.35090536080635093, "grad_norm": 0.5455766916275024, "learning_rate": 0.0004629676213741216, "loss": 1.2499, "step": 1967 }, { "epoch": 0.3510837570243511, "grad_norm": 0.5755292177200317, "learning_rate": 0.00046293091771724433, "loss": 0.9262, "step": 1968 }, { "epoch": 0.35126215324235127, "grad_norm": 0.5601824522018433, "learning_rate": 0.00046289419733686445, "loss": 1.2802, "step": 1969 }, { "epoch": 0.3514405494603514, "grad_norm": 0.5256120562553406, "learning_rate": 0.00046285746023586586, "loss": 1.0818, "step": 1970 }, { "epoch": 0.3516189456783516, "grad_norm": 0.5001375675201416, "learning_rate": 0.000462820706417134, "loss": 0.8263, "step": 1971 }, { "epoch": 0.3517973418963518, "grad_norm": 0.6029314994812012, "learning_rate": 0.0004627839358835554, "loss": 1.1909, "step": 1972 }, { "epoch": 0.35197573811435195, "grad_norm": 0.5109052658081055, "learning_rate": 0.00046274714863801813, "loss": 0.9954, "step": 1973 }, { "epoch": 0.35215413433235215, "grad_norm": 0.6349180936813354, "learning_rate": 0.0004627103446834113, "loss": 1.1535, "step": 1974 }, { "epoch": 0.35233253055035235, "grad_norm": 0.5841493606567383, "learning_rate": 0.0004626735240226255, "loss": 0.9702, "step": 1975 }, { "epoch": 0.3525109267683525, "grad_norm": 0.5837200880050659, "learning_rate": 0.00046263668665855276, "loss": 1.2831, "step": 1976 }, { "epoch": 0.3526893229863527, "grad_norm": 0.5340337157249451, "learning_rate": 0.000462599832594086, "loss": 1.0643, "step": 1977 }, { "epoch": 0.3528677192043529, "grad_norm": 0.5065892338752747, "learning_rate": 0.00046256296183212, "loss": 0.9666, "step": 1978 }, { "epoch": 0.35304611542235304, "grad_norm": 0.5460461378097534, "learning_rate": 0.00046252607437555037, "loss": 1.0391, "step": 1979 }, { "epoch": 0.35322451164035323, "grad_norm": 0.5758165717124939, "learning_rate": 0.0004624891702272744, "loss": 1.0593, "step": 1980 }, { "epoch": 0.3534029078583534, "grad_norm": 0.739166259765625, "learning_rate": 0.0004624522493901904, "loss": 1.0089, "step": 1981 }, { "epoch": 0.3535813040763536, "grad_norm": 0.5259382724761963, "learning_rate": 0.0004624153118671981, "loss": 0.8894, "step": 1982 }, { "epoch": 0.3537597002943538, "grad_norm": 0.47911664843559265, "learning_rate": 0.00046237835766119867, "loss": 0.8527, "step": 1983 }, { "epoch": 0.3539380965123539, "grad_norm": 0.5415805578231812, "learning_rate": 0.0004623413867750944, "loss": 1.1951, "step": 1984 }, { "epoch": 0.3541164927303541, "grad_norm": 0.5259605050086975, "learning_rate": 0.00046230439921178903, "loss": 1.1803, "step": 1985 }, { "epoch": 0.3542948889483543, "grad_norm": 0.5450373291969299, "learning_rate": 0.00046226739497418745, "loss": 1.0821, "step": 1986 }, { "epoch": 0.35447328516635446, "grad_norm": 0.5040337443351746, "learning_rate": 0.0004622303740651961, "loss": 1.0438, "step": 1987 }, { "epoch": 0.35465168138435466, "grad_norm": 0.531369686126709, "learning_rate": 0.00046219333648772246, "loss": 1.0472, "step": 1988 }, { "epoch": 0.35483007760235485, "grad_norm": 0.5246288180351257, "learning_rate": 0.00046215628224467546, "loss": 1.0166, "step": 1989 }, { "epoch": 0.355008473820355, "grad_norm": 0.5832285284996033, "learning_rate": 0.0004621192113389654, "loss": 1.3904, "step": 1990 }, { "epoch": 0.3551868700383552, "grad_norm": 0.5046032667160034, "learning_rate": 0.0004620821237735037, "loss": 0.9671, "step": 1991 }, { "epoch": 0.35536526625635534, "grad_norm": 0.505187451839447, "learning_rate": 0.00046204501955120333, "loss": 1.1395, "step": 1992 }, { "epoch": 0.35554366247435554, "grad_norm": 0.5264232158660889, "learning_rate": 0.00046200789867497836, "loss": 1.1737, "step": 1993 }, { "epoch": 0.35572205869235574, "grad_norm": 0.6432974338531494, "learning_rate": 0.00046197076114774425, "loss": 1.4899, "step": 1994 }, { "epoch": 0.3559004549103559, "grad_norm": 0.5268731713294983, "learning_rate": 0.00046193360697241773, "loss": 1.3145, "step": 1995 }, { "epoch": 0.3560788511283561, "grad_norm": 0.5824031829833984, "learning_rate": 0.000461896436151917, "loss": 1.0268, "step": 1996 }, { "epoch": 0.3562572473463563, "grad_norm": 0.5190456509590149, "learning_rate": 0.0004618592486891613, "loss": 0.9869, "step": 1997 }, { "epoch": 0.3564356435643564, "grad_norm": 0.5111797451972961, "learning_rate": 0.0004618220445870714, "loss": 0.8515, "step": 1998 }, { "epoch": 0.3566140397823566, "grad_norm": 0.6227758526802063, "learning_rate": 0.0004617848238485693, "loss": 1.2615, "step": 1999 }, { "epoch": 0.3567924360003568, "grad_norm": 0.5635523200035095, "learning_rate": 0.0004617475864765782, "loss": 1.1813, "step": 2000 }, { "epoch": 0.35697083221835696, "grad_norm": 0.6283045411109924, "learning_rate": 0.0004617103324740228, "loss": 1.0593, "step": 2001 }, { "epoch": 0.35714922843635716, "grad_norm": 0.5376402735710144, "learning_rate": 0.00046167306184382906, "loss": 0.9929, "step": 2002 }, { "epoch": 0.3573276246543573, "grad_norm": 0.5043478608131409, "learning_rate": 0.00046163577458892415, "loss": 1.1796, "step": 2003 }, { "epoch": 0.3575060208723575, "grad_norm": 0.5074126720428467, "learning_rate": 0.00046159847071223646, "loss": 0.8408, "step": 2004 }, { "epoch": 0.3576844170903577, "grad_norm": 0.6142756938934326, "learning_rate": 0.00046156115021669605, "loss": 1.159, "step": 2005 }, { "epoch": 0.35786281330835784, "grad_norm": 0.4879589378833771, "learning_rate": 0.00046152381310523384, "loss": 0.8117, "step": 2006 }, { "epoch": 0.35804120952635804, "grad_norm": 0.5088807940483093, "learning_rate": 0.0004614864593807825, "loss": 1.0544, "step": 2007 }, { "epoch": 0.35821960574435824, "grad_norm": 0.5416921377182007, "learning_rate": 0.00046144908904627557, "loss": 1.1541, "step": 2008 }, { "epoch": 0.3583980019623584, "grad_norm": 0.5295444130897522, "learning_rate": 0.0004614117021046482, "loss": 1.1063, "step": 2009 }, { "epoch": 0.3585763981803586, "grad_norm": 0.5223750472068787, "learning_rate": 0.0004613742985588367, "loss": 1.0807, "step": 2010 }, { "epoch": 0.3587547943983588, "grad_norm": 0.5627544522285461, "learning_rate": 0.0004613368784117787, "loss": 1.0165, "step": 2011 }, { "epoch": 0.3589331906163589, "grad_norm": 0.5210590958595276, "learning_rate": 0.0004612994416664134, "loss": 1.0561, "step": 2012 }, { "epoch": 0.3591115868343591, "grad_norm": 0.5011810660362244, "learning_rate": 0.00046126198832568065, "loss": 1.175, "step": 2013 }, { "epoch": 0.35928998305235926, "grad_norm": 0.47756415605545044, "learning_rate": 0.0004612245183925224, "loss": 1.052, "step": 2014 }, { "epoch": 0.35946837927035946, "grad_norm": 0.5775620937347412, "learning_rate": 0.0004611870318698813, "loss": 1.0646, "step": 2015 }, { "epoch": 0.35964677548835966, "grad_norm": 0.5540309548377991, "learning_rate": 0.00046114952876070167, "loss": 1.2858, "step": 2016 }, { "epoch": 0.3598251717063598, "grad_norm": 0.5066989660263062, "learning_rate": 0.0004611120090679289, "loss": 0.9978, "step": 2017 }, { "epoch": 0.36000356792436, "grad_norm": 1.06368887424469, "learning_rate": 0.00046107447279450977, "loss": 1.2269, "step": 2018 }, { "epoch": 0.3601819641423602, "grad_norm": 0.5219504833221436, "learning_rate": 0.00046103691994339236, "loss": 0.9725, "step": 2019 }, { "epoch": 0.36036036036036034, "grad_norm": 0.5700012445449829, "learning_rate": 0.00046099935051752605, "loss": 1.2358, "step": 2020 }, { "epoch": 0.36053875657836054, "grad_norm": 0.514971911907196, "learning_rate": 0.0004609617645198616, "loss": 1.0931, "step": 2021 }, { "epoch": 0.36071715279636074, "grad_norm": 0.5299590229988098, "learning_rate": 0.00046092416195335094, "loss": 1.1742, "step": 2022 }, { "epoch": 0.3608955490143609, "grad_norm": 0.5127617120742798, "learning_rate": 0.00046088654282094735, "loss": 1.0115, "step": 2023 }, { "epoch": 0.3610739452323611, "grad_norm": 0.5403224229812622, "learning_rate": 0.00046084890712560546, "loss": 1.2271, "step": 2024 }, { "epoch": 0.3612523414503612, "grad_norm": 0.5359017252922058, "learning_rate": 0.0004608112548702811, "loss": 1.1065, "step": 2025 }, { "epoch": 0.3614307376683614, "grad_norm": 0.5404418110847473, "learning_rate": 0.00046077358605793156, "loss": 1.0741, "step": 2026 }, { "epoch": 0.3616091338863616, "grad_norm": 0.541814386844635, "learning_rate": 0.0004607359006915153, "loss": 1.2468, "step": 2027 }, { "epoch": 0.36178753010436177, "grad_norm": 0.5587732791900635, "learning_rate": 0.000460698198773992, "loss": 1.0027, "step": 2028 }, { "epoch": 0.36196592632236196, "grad_norm": 0.5569438338279724, "learning_rate": 0.00046066048030832284, "loss": 1.045, "step": 2029 }, { "epoch": 0.36214432254036216, "grad_norm": 0.5397847294807434, "learning_rate": 0.0004606227452974703, "loss": 1.1198, "step": 2030 }, { "epoch": 0.3623227187583623, "grad_norm": 0.4841654598712921, "learning_rate": 0.0004605849937443979, "loss": 1.0022, "step": 2031 }, { "epoch": 0.3625011149763625, "grad_norm": 0.5069999694824219, "learning_rate": 0.00046054722565207077, "loss": 1.1917, "step": 2032 }, { "epoch": 0.3626795111943627, "grad_norm": 0.5341327786445618, "learning_rate": 0.0004605094410234551, "loss": 0.9297, "step": 2033 }, { "epoch": 0.36285790741236285, "grad_norm": 0.4698069095611572, "learning_rate": 0.00046047163986151854, "loss": 0.7909, "step": 2034 }, { "epoch": 0.36303630363036304, "grad_norm": 0.6396573185920715, "learning_rate": 0.0004604338221692299, "loss": 1.26, "step": 2035 }, { "epoch": 0.3632146998483632, "grad_norm": 0.5331511497497559, "learning_rate": 0.0004603959879495595, "loss": 1.0438, "step": 2036 }, { "epoch": 0.3633930960663634, "grad_norm": 0.6419988870620728, "learning_rate": 0.00046035813720547876, "loss": 1.232, "step": 2037 }, { "epoch": 0.3635714922843636, "grad_norm": 0.5257839560508728, "learning_rate": 0.0004603202699399604, "loss": 0.8447, "step": 2038 }, { "epoch": 0.3637498885023637, "grad_norm": 0.49433434009552, "learning_rate": 0.0004602823861559785, "loss": 0.9731, "step": 2039 }, { "epoch": 0.3639282847203639, "grad_norm": 0.5541132688522339, "learning_rate": 0.00046024448585650857, "loss": 1.0478, "step": 2040 }, { "epoch": 0.3641066809383641, "grad_norm": 0.5009471774101257, "learning_rate": 0.00046020656904452716, "loss": 1.0364, "step": 2041 }, { "epoch": 0.36428507715636427, "grad_norm": 0.5421225428581238, "learning_rate": 0.0004601686357230123, "loss": 1.0979, "step": 2042 }, { "epoch": 0.36446347337436447, "grad_norm": 0.49714601039886475, "learning_rate": 0.0004601306858949432, "loss": 0.9815, "step": 2043 }, { "epoch": 0.36464186959236466, "grad_norm": 0.5659156441688538, "learning_rate": 0.0004600927195633005, "loss": 1.0267, "step": 2044 }, { "epoch": 0.3648202658103648, "grad_norm": 0.5777513980865479, "learning_rate": 0.000460054736731066, "loss": 1.1066, "step": 2045 }, { "epoch": 0.364998662028365, "grad_norm": 0.4904979169368744, "learning_rate": 0.00046001673740122287, "loss": 1.0805, "step": 2046 }, { "epoch": 0.36517705824636515, "grad_norm": 0.5406635403633118, "learning_rate": 0.0004599787215767556, "loss": 0.9333, "step": 2047 }, { "epoch": 0.36535545446436535, "grad_norm": 0.5242641568183899, "learning_rate": 0.0004599406892606498, "loss": 0.9977, "step": 2048 }, { "epoch": 0.36553385068236555, "grad_norm": 0.538411021232605, "learning_rate": 0.00045990264045589263, "loss": 1.0446, "step": 2049 }, { "epoch": 0.3657122469003657, "grad_norm": 0.5017095804214478, "learning_rate": 0.0004598645751654724, "loss": 0.9025, "step": 2050 }, { "epoch": 0.3658906431183659, "grad_norm": 0.5771719813346863, "learning_rate": 0.0004598264933923788, "loss": 1.1323, "step": 2051 }, { "epoch": 0.3660690393363661, "grad_norm": 0.5927982926368713, "learning_rate": 0.0004597883951396027, "loss": 1.4047, "step": 2052 }, { "epoch": 0.36624743555436623, "grad_norm": 0.5014179944992065, "learning_rate": 0.00045975028041013633, "loss": 0.9932, "step": 2053 }, { "epoch": 0.36642583177236643, "grad_norm": 0.5538795590400696, "learning_rate": 0.00045971214920697325, "loss": 1.128, "step": 2054 }, { "epoch": 0.3666042279903666, "grad_norm": 0.5837451219558716, "learning_rate": 0.00045967400153310814, "loss": 1.1844, "step": 2055 }, { "epoch": 0.36678262420836677, "grad_norm": 0.4952576458454132, "learning_rate": 0.00045963583739153715, "loss": 0.9711, "step": 2056 }, { "epoch": 0.36696102042636697, "grad_norm": 0.4757169187068939, "learning_rate": 0.00045959765678525776, "loss": 1.0064, "step": 2057 }, { "epoch": 0.36713941664436717, "grad_norm": 0.5288332104682922, "learning_rate": 0.00045955945971726855, "loss": 1.4253, "step": 2058 }, { "epoch": 0.3673178128623673, "grad_norm": 0.5419899225234985, "learning_rate": 0.00045952124619056957, "loss": 1.2097, "step": 2059 }, { "epoch": 0.3674962090803675, "grad_norm": 0.5925987958908081, "learning_rate": 0.00045948301620816215, "loss": 1.1952, "step": 2060 }, { "epoch": 0.36767460529836765, "grad_norm": 0.5184533596038818, "learning_rate": 0.00045944476977304873, "loss": 1.0255, "step": 2061 }, { "epoch": 0.36785300151636785, "grad_norm": 0.5200556516647339, "learning_rate": 0.0004594065068882332, "loss": 1.1104, "step": 2062 }, { "epoch": 0.36803139773436805, "grad_norm": 0.5943669080734253, "learning_rate": 0.0004593682275567208, "loss": 1.1162, "step": 2063 }, { "epoch": 0.3682097939523682, "grad_norm": 1.1349503993988037, "learning_rate": 0.00045932993178151785, "loss": 1.0188, "step": 2064 }, { "epoch": 0.3683881901703684, "grad_norm": 0.5693401098251343, "learning_rate": 0.00045929161956563216, "loss": 1.0564, "step": 2065 }, { "epoch": 0.3685665863883686, "grad_norm": 0.5493849515914917, "learning_rate": 0.00045925329091207266, "loss": 0.9755, "step": 2066 }, { "epoch": 0.36874498260636873, "grad_norm": 0.5377547740936279, "learning_rate": 0.00045921494582384985, "loss": 1.0967, "step": 2067 }, { "epoch": 0.36892337882436893, "grad_norm": 0.5204565525054932, "learning_rate": 0.00045917658430397526, "loss": 0.9365, "step": 2068 }, { "epoch": 0.36910177504236913, "grad_norm": 0.5643702149391174, "learning_rate": 0.0004591382063554617, "loss": 1.2203, "step": 2069 }, { "epoch": 0.36928017126036927, "grad_norm": 0.49709245562553406, "learning_rate": 0.00045909981198132336, "loss": 0.9819, "step": 2070 }, { "epoch": 0.36945856747836947, "grad_norm": 0.5403639674186707, "learning_rate": 0.00045906140118457577, "loss": 1.0955, "step": 2071 }, { "epoch": 0.3696369636963696, "grad_norm": 0.643334686756134, "learning_rate": 0.00045902297396823577, "loss": 1.3619, "step": 2072 }, { "epoch": 0.3698153599143698, "grad_norm": 0.5306541919708252, "learning_rate": 0.0004589845303353213, "loss": 0.9215, "step": 2073 }, { "epoch": 0.36999375613237, "grad_norm": 0.5806733965873718, "learning_rate": 0.00045894607028885173, "loss": 1.0277, "step": 2074 }, { "epoch": 0.37017215235037015, "grad_norm": 0.5571582317352295, "learning_rate": 0.0004589075938318478, "loss": 0.845, "step": 2075 }, { "epoch": 0.37035054856837035, "grad_norm": 0.56407231092453, "learning_rate": 0.00045886910096733127, "loss": 1.0461, "step": 2076 }, { "epoch": 0.37052894478637055, "grad_norm": 0.5526430010795593, "learning_rate": 0.0004588305916983254, "loss": 0.915, "step": 2077 }, { "epoch": 0.3707073410043707, "grad_norm": 0.5798369646072388, "learning_rate": 0.00045879206602785486, "loss": 1.0021, "step": 2078 }, { "epoch": 0.3708857372223709, "grad_norm": 0.5774779915809631, "learning_rate": 0.0004587535239589452, "loss": 1.1147, "step": 2079 }, { "epoch": 0.3710641334403711, "grad_norm": 0.5789695978164673, "learning_rate": 0.00045871496549462364, "loss": 1.1118, "step": 2080 }, { "epoch": 0.37124252965837123, "grad_norm": 0.4506717622280121, "learning_rate": 0.0004586763906379184, "loss": 0.8033, "step": 2081 }, { "epoch": 0.37142092587637143, "grad_norm": 0.4841929078102112, "learning_rate": 0.0004586377993918594, "loss": 1.1138, "step": 2082 }, { "epoch": 0.3715993220943716, "grad_norm": 0.5651434659957886, "learning_rate": 0.0004585991917594774, "loss": 1.1166, "step": 2083 }, { "epoch": 0.3717777183123718, "grad_norm": 0.5341058969497681, "learning_rate": 0.0004585605677438046, "loss": 1.315, "step": 2084 }, { "epoch": 0.371956114530372, "grad_norm": 1.9402390718460083, "learning_rate": 0.0004585219273478745, "loss": 1.2177, "step": 2085 }, { "epoch": 0.3721345107483721, "grad_norm": 0.5437238812446594, "learning_rate": 0.00045848327057472206, "loss": 0.9626, "step": 2086 }, { "epoch": 0.3723129069663723, "grad_norm": 0.5549441576004028, "learning_rate": 0.0004584445974273832, "loss": 1.0923, "step": 2087 }, { "epoch": 0.3724913031843725, "grad_norm": 0.5701561570167542, "learning_rate": 0.0004584059079088954, "loss": 1.1516, "step": 2088 }, { "epoch": 0.37266969940237266, "grad_norm": 0.5430149435997009, "learning_rate": 0.00045836720202229727, "loss": 1.2877, "step": 2089 }, { "epoch": 0.37284809562037285, "grad_norm": 0.555594801902771, "learning_rate": 0.00045832847977062875, "loss": 0.9959, "step": 2090 }, { "epoch": 0.37302649183837305, "grad_norm": 0.5235658288002014, "learning_rate": 0.0004582897411569311, "loss": 1.0502, "step": 2091 }, { "epoch": 0.3732048880563732, "grad_norm": 0.6449779868125916, "learning_rate": 0.0004582509861842468, "loss": 1.1183, "step": 2092 }, { "epoch": 0.3733832842743734, "grad_norm": 0.592603325843811, "learning_rate": 0.0004582122148556196, "loss": 1.1536, "step": 2093 }, { "epoch": 0.37356168049237354, "grad_norm": 0.4949917197227478, "learning_rate": 0.0004581734271740948, "loss": 1.0221, "step": 2094 }, { "epoch": 0.37374007671037374, "grad_norm": 0.6672910451889038, "learning_rate": 0.0004581346231427185, "loss": 1.064, "step": 2095 }, { "epoch": 0.37391847292837394, "grad_norm": 0.4738200604915619, "learning_rate": 0.00045809580276453843, "loss": 1.0283, "step": 2096 }, { "epoch": 0.3740968691463741, "grad_norm": 1.385906457901001, "learning_rate": 0.00045805696604260364, "loss": 0.9435, "step": 2097 }, { "epoch": 0.3742752653643743, "grad_norm": 0.6020336747169495, "learning_rate": 0.0004580181129799643, "loss": 1.0175, "step": 2098 }, { "epoch": 0.3744536615823745, "grad_norm": 0.7528032660484314, "learning_rate": 0.0004579792435796718, "loss": 1.2405, "step": 2099 }, { "epoch": 0.3746320578003746, "grad_norm": 0.5541552305221558, "learning_rate": 0.00045794035784477903, "loss": 1.0247, "step": 2100 }, { "epoch": 0.3748104540183748, "grad_norm": 0.5061083436012268, "learning_rate": 0.00045790145577834007, "loss": 0.8521, "step": 2101 }, { "epoch": 0.374988850236375, "grad_norm": 1.0418980121612549, "learning_rate": 0.0004578625373834102, "loss": 1.3496, "step": 2102 }, { "epoch": 0.37516724645437516, "grad_norm": 0.5358394384384155, "learning_rate": 0.00045782360266304615, "loss": 0.9427, "step": 2103 }, { "epoch": 0.37534564267237536, "grad_norm": 0.5550572276115417, "learning_rate": 0.0004577846516203057, "loss": 1.29, "step": 2104 }, { "epoch": 0.3755240388903755, "grad_norm": 0.5666816830635071, "learning_rate": 0.0004577456842582482, "loss": 0.9784, "step": 2105 }, { "epoch": 0.3757024351083757, "grad_norm": 0.6263391375541687, "learning_rate": 0.000457706700579934, "loss": 1.3776, "step": 2106 }, { "epoch": 0.3758808313263759, "grad_norm": 0.5329514145851135, "learning_rate": 0.000457667700588425, "loss": 0.9403, "step": 2107 }, { "epoch": 0.37605922754437604, "grad_norm": 0.6623142957687378, "learning_rate": 0.00045762868428678405, "loss": 1.1171, "step": 2108 }, { "epoch": 0.37623762376237624, "grad_norm": 0.5798327326774597, "learning_rate": 0.00045758965167807565, "loss": 1.1791, "step": 2109 }, { "epoch": 0.37641601998037644, "grad_norm": 0.5603600144386292, "learning_rate": 0.00045755060276536543, "loss": 1.0435, "step": 2110 }, { "epoch": 0.3765944161983766, "grad_norm": 0.5544630885124207, "learning_rate": 0.00045751153755172006, "loss": 1.1783, "step": 2111 }, { "epoch": 0.3767728124163768, "grad_norm": 0.633995771408081, "learning_rate": 0.0004574724560402078, "loss": 1.1487, "step": 2112 }, { "epoch": 0.376951208634377, "grad_norm": 0.6015441417694092, "learning_rate": 0.00045743335823389826, "loss": 1.0372, "step": 2113 }, { "epoch": 0.3771296048523771, "grad_norm": 0.5980955958366394, "learning_rate": 0.00045739424413586194, "loss": 1.2749, "step": 2114 }, { "epoch": 0.3773080010703773, "grad_norm": 0.5361245274543762, "learning_rate": 0.00045735511374917095, "loss": 1.0022, "step": 2115 }, { "epoch": 0.37748639728837746, "grad_norm": 0.5869659781455994, "learning_rate": 0.0004573159670768986, "loss": 1.214, "step": 2116 }, { "epoch": 0.37766479350637766, "grad_norm": 0.6182494759559631, "learning_rate": 0.00045727680412211937, "loss": 0.9911, "step": 2117 }, { "epoch": 0.37784318972437786, "grad_norm": 0.5352988243103027, "learning_rate": 0.0004572376248879091, "loss": 1.0801, "step": 2118 }, { "epoch": 0.378021585942378, "grad_norm": 0.5704354047775269, "learning_rate": 0.000457198429377345, "loss": 0.9513, "step": 2119 }, { "epoch": 0.3781999821603782, "grad_norm": 0.5778263807296753, "learning_rate": 0.00045715921759350545, "loss": 1.1593, "step": 2120 }, { "epoch": 0.3783783783783784, "grad_norm": 0.5286616683006287, "learning_rate": 0.0004571199895394701, "loss": 1.2545, "step": 2121 }, { "epoch": 0.37855677459637854, "grad_norm": 0.5010552406311035, "learning_rate": 0.00045708074521831984, "loss": 0.8689, "step": 2122 }, { "epoch": 0.37873517081437874, "grad_norm": 0.8215789198875427, "learning_rate": 0.000457041484633137, "loss": 1.1025, "step": 2123 }, { "epoch": 0.37891356703237894, "grad_norm": 0.8685697317123413, "learning_rate": 0.000457002207787005, "loss": 1.0335, "step": 2124 }, { "epoch": 0.3790919632503791, "grad_norm": 0.5928163528442383, "learning_rate": 0.00045696291468300874, "loss": 1.1333, "step": 2125 }, { "epoch": 0.3792703594683793, "grad_norm": 0.5363757014274597, "learning_rate": 0.00045692360532423423, "loss": 1.3034, "step": 2126 }, { "epoch": 0.3794487556863794, "grad_norm": 0.6252255439758301, "learning_rate": 0.00045688427971376876, "loss": 1.1585, "step": 2127 }, { "epoch": 0.3796271519043796, "grad_norm": 0.6210330128669739, "learning_rate": 0.00045684493785470105, "loss": 0.9278, "step": 2128 }, { "epoch": 0.3798055481223798, "grad_norm": 0.8249781131744385, "learning_rate": 0.00045680557975012086, "loss": 1.4528, "step": 2129 }, { "epoch": 0.37998394434037996, "grad_norm": 0.5406894683837891, "learning_rate": 0.00045676620540311953, "loss": 0.9968, "step": 2130 }, { "epoch": 0.38016234055838016, "grad_norm": 0.631753146648407, "learning_rate": 0.00045672681481678936, "loss": 1.05, "step": 2131 }, { "epoch": 0.38034073677638036, "grad_norm": 0.5768477916717529, "learning_rate": 0.000456687407994224, "loss": 1.1387, "step": 2132 }, { "epoch": 0.3805191329943805, "grad_norm": 0.5343456268310547, "learning_rate": 0.00045664798493851873, "loss": 1.0884, "step": 2133 }, { "epoch": 0.3806975292123807, "grad_norm": 0.5370036363601685, "learning_rate": 0.00045660854565276955, "loss": 1.0014, "step": 2134 }, { "epoch": 0.3808759254303809, "grad_norm": 0.5855540633201599, "learning_rate": 0.0004565690901400742, "loss": 1.2283, "step": 2135 }, { "epoch": 0.38105432164838104, "grad_norm": 0.5994966626167297, "learning_rate": 0.00045652961840353135, "loss": 0.801, "step": 2136 }, { "epoch": 0.38123271786638124, "grad_norm": 0.7402109503746033, "learning_rate": 0.0004564901304462411, "loss": 0.9229, "step": 2137 }, { "epoch": 0.3814111140843814, "grad_norm": 0.8662267327308655, "learning_rate": 0.0004564506262713049, "loss": 1.0499, "step": 2138 }, { "epoch": 0.3815895103023816, "grad_norm": 0.5050637125968933, "learning_rate": 0.0004564111058818254, "loss": 0.9188, "step": 2139 }, { "epoch": 0.3817679065203818, "grad_norm": 0.5128204822540283, "learning_rate": 0.0004563715692809064, "loss": 0.9959, "step": 2140 }, { "epoch": 0.3819463027383819, "grad_norm": 0.5767316818237305, "learning_rate": 0.00045633201647165325, "loss": 1.1759, "step": 2141 }, { "epoch": 0.3821246989563821, "grad_norm": 0.5560585856437683, "learning_rate": 0.0004562924474571722, "loss": 1.2575, "step": 2142 }, { "epoch": 0.3823030951743823, "grad_norm": 0.5354772210121155, "learning_rate": 0.0004562528622405712, "loss": 1.0975, "step": 2143 }, { "epoch": 0.38248149139238247, "grad_norm": 0.5775118470191956, "learning_rate": 0.0004562132608249591, "loss": 1.0457, "step": 2144 }, { "epoch": 0.38265988761038267, "grad_norm": 0.5361456274986267, "learning_rate": 0.00045617364321344625, "loss": 1.0162, "step": 2145 }, { "epoch": 0.38283828382838286, "grad_norm": 0.6208562254905701, "learning_rate": 0.00045613400940914417, "loss": 1.0852, "step": 2146 }, { "epoch": 0.383016680046383, "grad_norm": 0.5321409106254578, "learning_rate": 0.0004560943594151657, "loss": 1.0978, "step": 2147 }, { "epoch": 0.3831950762643832, "grad_norm": 0.4865065813064575, "learning_rate": 0.00045605469323462493, "loss": 0.8982, "step": 2148 }, { "epoch": 0.38337347248238335, "grad_norm": 0.5791817903518677, "learning_rate": 0.0004560150108706372, "loss": 1.0734, "step": 2149 }, { "epoch": 0.38355186870038355, "grad_norm": 0.5767939686775208, "learning_rate": 0.0004559753123263193, "loss": 1.112, "step": 2150 }, { "epoch": 0.38373026491838375, "grad_norm": 0.5261261463165283, "learning_rate": 0.0004559355976047889, "loss": 1.0849, "step": 2151 }, { "epoch": 0.3839086611363839, "grad_norm": 0.5370740294456482, "learning_rate": 0.00045589586670916526, "loss": 0.8849, "step": 2152 }, { "epoch": 0.3840870573543841, "grad_norm": 0.4868060350418091, "learning_rate": 0.0004558561196425689, "loss": 0.9771, "step": 2153 }, { "epoch": 0.3842654535723843, "grad_norm": 0.5003844499588013, "learning_rate": 0.0004558163564081215, "loss": 0.9858, "step": 2154 }, { "epoch": 0.38444384979038443, "grad_norm": 0.4873872399330139, "learning_rate": 0.000455776577008946, "loss": 1.082, "step": 2155 }, { "epoch": 0.3846222460083846, "grad_norm": 0.5657439231872559, "learning_rate": 0.0004557367814481668, "loss": 1.1766, "step": 2156 }, { "epoch": 0.3848006422263848, "grad_norm": 0.5301082134246826, "learning_rate": 0.00045569696972890916, "loss": 0.9749, "step": 2157 }, { "epoch": 0.38497903844438497, "grad_norm": 0.5159010291099548, "learning_rate": 0.0004556571418543001, "loss": 1.0766, "step": 2158 }, { "epoch": 0.38515743466238517, "grad_norm": 0.5375173687934875, "learning_rate": 0.00045561729782746767, "loss": 1.0563, "step": 2159 }, { "epoch": 0.3853358308803853, "grad_norm": 0.5973328351974487, "learning_rate": 0.0004555774376515411, "loss": 1.3044, "step": 2160 }, { "epoch": 0.3855142270983855, "grad_norm": 0.4792105555534363, "learning_rate": 0.00045553756132965105, "loss": 0.8944, "step": 2161 }, { "epoch": 0.3856926233163857, "grad_norm": 0.5174294114112854, "learning_rate": 0.00045549766886492945, "loss": 1.0259, "step": 2162 }, { "epoch": 0.38587101953438585, "grad_norm": 0.48922815918922424, "learning_rate": 0.0004554577602605093, "loss": 0.8496, "step": 2163 }, { "epoch": 0.38604941575238605, "grad_norm": 0.49794623255729675, "learning_rate": 0.0004554178355195251, "loss": 0.8427, "step": 2164 }, { "epoch": 0.38622781197038625, "grad_norm": 0.5163681507110596, "learning_rate": 0.00045537789464511247, "loss": 1.0703, "step": 2165 }, { "epoch": 0.3864062081883864, "grad_norm": 0.5273317694664001, "learning_rate": 0.00045533793764040845, "loss": 1.0474, "step": 2166 }, { "epoch": 0.3865846044063866, "grad_norm": 0.563606858253479, "learning_rate": 0.0004552979645085511, "loss": 1.3796, "step": 2167 }, { "epoch": 0.3867630006243868, "grad_norm": 0.5003736615180969, "learning_rate": 0.0004552579752526799, "loss": 0.8776, "step": 2168 }, { "epoch": 0.38694139684238693, "grad_norm": 0.5471118688583374, "learning_rate": 0.0004552179698759358, "loss": 1.2153, "step": 2169 }, { "epoch": 0.38711979306038713, "grad_norm": 0.5452420115470886, "learning_rate": 0.0004551779483814605, "loss": 1.1194, "step": 2170 }, { "epoch": 0.3872981892783873, "grad_norm": 0.5115416049957275, "learning_rate": 0.0004551379107723976, "loss": 1.2556, "step": 2171 }, { "epoch": 0.38747658549638747, "grad_norm": 0.5057587027549744, "learning_rate": 0.0004550978570518913, "loss": 0.9426, "step": 2172 }, { "epoch": 0.38765498171438767, "grad_norm": 0.5084266662597656, "learning_rate": 0.00045505778722308764, "loss": 0.9323, "step": 2173 }, { "epoch": 0.3878333779323878, "grad_norm": 0.5315088629722595, "learning_rate": 0.00045501770128913364, "loss": 1.1016, "step": 2174 }, { "epoch": 0.388011774150388, "grad_norm": 0.6192378401756287, "learning_rate": 0.0004549775992531776, "loss": 1.3612, "step": 2175 }, { "epoch": 0.3881901703683882, "grad_norm": 0.49113729596138, "learning_rate": 0.0004549374811183692, "loss": 0.984, "step": 2176 }, { "epoch": 0.38836856658638835, "grad_norm": 0.5000951886177063, "learning_rate": 0.0004548973468878591, "loss": 1.0827, "step": 2177 }, { "epoch": 0.38854696280438855, "grad_norm": 0.5333259701728821, "learning_rate": 0.00045485719656479957, "loss": 1.0745, "step": 2178 }, { "epoch": 0.38872535902238875, "grad_norm": 0.5190644860267639, "learning_rate": 0.000454817030152344, "loss": 1.1067, "step": 2179 }, { "epoch": 0.3889037552403889, "grad_norm": 0.5464503765106201, "learning_rate": 0.00045477684765364703, "loss": 1.1484, "step": 2180 }, { "epoch": 0.3890821514583891, "grad_norm": 0.5542097091674805, "learning_rate": 0.0004547366490718645, "loss": 1.1321, "step": 2181 }, { "epoch": 0.38926054767638923, "grad_norm": 0.5173625349998474, "learning_rate": 0.0004546964344101537, "loss": 1.2251, "step": 2182 }, { "epoch": 0.38943894389438943, "grad_norm": 0.5054041743278503, "learning_rate": 0.0004546562036716732, "loss": 0.9788, "step": 2183 }, { "epoch": 0.38961734011238963, "grad_norm": 0.48386964201927185, "learning_rate": 0.0004546159568595823, "loss": 1.0653, "step": 2184 }, { "epoch": 0.3897957363303898, "grad_norm": 0.5080329775810242, "learning_rate": 0.00045457569397704226, "loss": 1.1174, "step": 2185 }, { "epoch": 0.38997413254839, "grad_norm": 1.7605923414230347, "learning_rate": 0.0004545354150272153, "loss": 1.0394, "step": 2186 }, { "epoch": 0.39015252876639017, "grad_norm": 0.45817258954048157, "learning_rate": 0.0004544951200132648, "loss": 0.8228, "step": 2187 }, { "epoch": 0.3903309249843903, "grad_norm": 0.5302280783653259, "learning_rate": 0.00045445480893835567, "loss": 1.1017, "step": 2188 }, { "epoch": 0.3905093212023905, "grad_norm": 0.5459249019622803, "learning_rate": 0.0004544144818056537, "loss": 1.2306, "step": 2189 }, { "epoch": 0.3906877174203907, "grad_norm": 0.6084957122802734, "learning_rate": 0.0004543741386183264, "loss": 1.0647, "step": 2190 }, { "epoch": 0.39086611363839086, "grad_norm": 0.6476156711578369, "learning_rate": 0.00045433377937954215, "loss": 1.2749, "step": 2191 }, { "epoch": 0.39104450985639105, "grad_norm": 0.5217106938362122, "learning_rate": 0.00045429340409247084, "loss": 0.9928, "step": 2192 }, { "epoch": 0.3912229060743912, "grad_norm": 0.5622875690460205, "learning_rate": 0.0004542530127602834, "loss": 1.0067, "step": 2193 }, { "epoch": 0.3914013022923914, "grad_norm": 1.472040057182312, "learning_rate": 0.00045421260538615235, "loss": 1.1993, "step": 2194 }, { "epoch": 0.3915796985103916, "grad_norm": 0.5135729908943176, "learning_rate": 0.00045417218197325106, "loss": 0.9331, "step": 2195 }, { "epoch": 0.39175809472839174, "grad_norm": 0.662834644317627, "learning_rate": 0.00045413174252475455, "loss": 1.1606, "step": 2196 }, { "epoch": 0.39193649094639194, "grad_norm": 0.9087008237838745, "learning_rate": 0.00045409128704383873, "loss": 1.2032, "step": 2197 }, { "epoch": 0.39211488716439213, "grad_norm": 0.5726129412651062, "learning_rate": 0.0004540508155336811, "loss": 1.1915, "step": 2198 }, { "epoch": 0.3922932833823923, "grad_norm": 0.6241844296455383, "learning_rate": 0.0004540103279974602, "loss": 1.2713, "step": 2199 }, { "epoch": 0.3924716796003925, "grad_norm": 0.5253695249557495, "learning_rate": 0.000453969824438356, "loss": 0.9503, "step": 2200 }, { "epoch": 0.3926500758183927, "grad_norm": 0.4767884612083435, "learning_rate": 0.00045392930485954955, "loss": 0.9004, "step": 2201 }, { "epoch": 0.3928284720363928, "grad_norm": 0.5421171188354492, "learning_rate": 0.0004538887692642232, "loss": 1.057, "step": 2202 }, { "epoch": 0.393006868254393, "grad_norm": 0.6273959279060364, "learning_rate": 0.0004538482176555607, "loss": 1.0529, "step": 2203 }, { "epoch": 0.39318526447239316, "grad_norm": 0.5183041095733643, "learning_rate": 0.00045380765003674684, "loss": 1.0245, "step": 2204 }, { "epoch": 0.39336366069039336, "grad_norm": 0.5289639830589294, "learning_rate": 0.00045376706641096786, "loss": 1.1824, "step": 2205 }, { "epoch": 0.39354205690839356, "grad_norm": 0.520853579044342, "learning_rate": 0.00045372646678141127, "loss": 1.0964, "step": 2206 }, { "epoch": 0.3937204531263937, "grad_norm": 0.6077725291252136, "learning_rate": 0.0004536858511512656, "loss": 1.155, "step": 2207 }, { "epoch": 0.3938988493443939, "grad_norm": 0.5906806588172913, "learning_rate": 0.0004536452195237208, "loss": 1.3433, "step": 2208 }, { "epoch": 0.3940772455623941, "grad_norm": 1.7862430810928345, "learning_rate": 0.00045360457190196814, "loss": 1.2213, "step": 2209 }, { "epoch": 0.39425564178039424, "grad_norm": 0.530116856098175, "learning_rate": 0.0004535639082892, "loss": 1.0639, "step": 2210 }, { "epoch": 0.39443403799839444, "grad_norm": 0.5256516933441162, "learning_rate": 0.00045352322868861004, "loss": 1.1793, "step": 2211 }, { "epoch": 0.39461243421639464, "grad_norm": 0.46268409490585327, "learning_rate": 0.00045348253310339336, "loss": 0.9103, "step": 2212 }, { "epoch": 0.3947908304343948, "grad_norm": 0.5721630454063416, "learning_rate": 0.0004534418215367461, "loss": 1.3037, "step": 2213 }, { "epoch": 0.394969226652395, "grad_norm": 0.550315797328949, "learning_rate": 0.0004534010939918657, "loss": 1.0648, "step": 2214 }, { "epoch": 0.3951476228703951, "grad_norm": 0.48966914415359497, "learning_rate": 0.0004533603504719509, "loss": 0.9247, "step": 2215 }, { "epoch": 0.3953260190883953, "grad_norm": 0.5156149864196777, "learning_rate": 0.0004533195909802017, "loss": 1.1591, "step": 2216 }, { "epoch": 0.3955044153063955, "grad_norm": 0.5864170789718628, "learning_rate": 0.0004532788155198193, "loss": 0.967, "step": 2217 }, { "epoch": 0.39568281152439566, "grad_norm": 0.4873606562614441, "learning_rate": 0.00045323802409400626, "loss": 0.9788, "step": 2218 }, { "epoch": 0.39586120774239586, "grad_norm": 0.5299291014671326, "learning_rate": 0.00045319721670596623, "loss": 1.16, "step": 2219 }, { "epoch": 0.39603960396039606, "grad_norm": 0.5208775997161865, "learning_rate": 0.00045315639335890423, "loss": 1.1583, "step": 2220 }, { "epoch": 0.3962180001783962, "grad_norm": 0.5344860553741455, "learning_rate": 0.00045311555405602656, "loss": 1.0034, "step": 2221 }, { "epoch": 0.3963963963963964, "grad_norm": 0.5340254306793213, "learning_rate": 0.00045307469880054063, "loss": 1.0769, "step": 2222 }, { "epoch": 0.3965747926143966, "grad_norm": 0.48012998700141907, "learning_rate": 0.00045303382759565524, "loss": 0.9031, "step": 2223 }, { "epoch": 0.39675318883239674, "grad_norm": 0.511830747127533, "learning_rate": 0.0004529929404445805, "loss": 0.9729, "step": 2224 }, { "epoch": 0.39693158505039694, "grad_norm": 0.5484462976455688, "learning_rate": 0.00045295203735052747, "loss": 0.9895, "step": 2225 }, { "epoch": 0.3971099812683971, "grad_norm": 0.5423020720481873, "learning_rate": 0.0004529111183167088, "loss": 1.1713, "step": 2226 }, { "epoch": 0.3972883774863973, "grad_norm": 0.5270297527313232, "learning_rate": 0.00045287018334633824, "loss": 1.0229, "step": 2227 }, { "epoch": 0.3974667737043975, "grad_norm": 0.5546413064002991, "learning_rate": 0.00045282923244263076, "loss": 1.2354, "step": 2228 }, { "epoch": 0.3976451699223976, "grad_norm": 0.5324910283088684, "learning_rate": 0.00045278826560880267, "loss": 1.225, "step": 2229 }, { "epoch": 0.3978235661403978, "grad_norm": 0.48066118359565735, "learning_rate": 0.00045274728284807144, "loss": 0.8824, "step": 2230 }, { "epoch": 0.398001962358398, "grad_norm": 0.5693602561950684, "learning_rate": 0.00045270628416365586, "loss": 1.0692, "step": 2231 }, { "epoch": 0.39818035857639816, "grad_norm": 0.5462480783462524, "learning_rate": 0.00045266526955877595, "loss": 1.0537, "step": 2232 }, { "epoch": 0.39835875479439836, "grad_norm": 0.510565459728241, "learning_rate": 0.000452624239036653, "loss": 1.0072, "step": 2233 }, { "epoch": 0.39853715101239856, "grad_norm": 0.4951585829257965, "learning_rate": 0.0004525831926005095, "loss": 0.9697, "step": 2234 }, { "epoch": 0.3987155472303987, "grad_norm": 0.49053049087524414, "learning_rate": 0.0004525421302535693, "loss": 0.9513, "step": 2235 }, { "epoch": 0.3988939434483989, "grad_norm": 1.259932041168213, "learning_rate": 0.0004525010519990572, "loss": 0.9762, "step": 2236 }, { "epoch": 0.39907233966639905, "grad_norm": 0.5876604914665222, "learning_rate": 0.00045245995784019973, "loss": 1.1267, "step": 2237 }, { "epoch": 0.39925073588439924, "grad_norm": 2.0633914470672607, "learning_rate": 0.00045241884778022423, "loss": 1.0779, "step": 2238 }, { "epoch": 0.39942913210239944, "grad_norm": 0.4787190854549408, "learning_rate": 0.0004523777218223596, "loss": 0.9998, "step": 2239 }, { "epoch": 0.3996075283203996, "grad_norm": 2.782982110977173, "learning_rate": 0.0004523365799698358, "loss": 1.0978, "step": 2240 }, { "epoch": 0.3997859245383998, "grad_norm": 0.5323127508163452, "learning_rate": 0.00045229542222588405, "loss": 1.0, "step": 2241 }, { "epoch": 0.3999643207564, "grad_norm": 0.49209773540496826, "learning_rate": 0.0004522542485937369, "loss": 0.9856, "step": 2242 }, { "epoch": 0.4001427169744001, "grad_norm": 0.9652454257011414, "learning_rate": 0.000452213059076628, "loss": 1.0638, "step": 2243 }, { "epoch": 0.4003211131924003, "grad_norm": 0.5381683111190796, "learning_rate": 0.00045217185367779265, "loss": 1.0115, "step": 2244 }, { "epoch": 0.4004995094104005, "grad_norm": 0.48560935258865356, "learning_rate": 0.0004521306324004668, "loss": 1.0621, "step": 2245 }, { "epoch": 0.40067790562840067, "grad_norm": 0.5368553996086121, "learning_rate": 0.00045208939524788805, "loss": 0.9944, "step": 2246 }, { "epoch": 0.40085630184640086, "grad_norm": 0.5911176204681396, "learning_rate": 0.00045204814222329513, "loss": 1.1529, "step": 2247 }, { "epoch": 0.401034698064401, "grad_norm": 0.4951234459877014, "learning_rate": 0.0004520068733299282, "loss": 1.0468, "step": 2248 }, { "epoch": 0.4012130942824012, "grad_norm": 0.5037215948104858, "learning_rate": 0.0004519655885710283, "loss": 0.9121, "step": 2249 }, { "epoch": 0.4013914905004014, "grad_norm": 0.6309484243392944, "learning_rate": 0.000451924287949838, "loss": 1.2108, "step": 2250 }, { "epoch": 0.40156988671840155, "grad_norm": 0.5557146072387695, "learning_rate": 0.000451882971469601, "loss": 1.1713, "step": 2251 }, { "epoch": 0.40174828293640175, "grad_norm": 0.8298324346542358, "learning_rate": 0.0004518416391335623, "loss": 1.1566, "step": 2252 }, { "epoch": 0.40192667915440194, "grad_norm": 4.0279459953308105, "learning_rate": 0.00045180029094496813, "loss": 1.1309, "step": 2253 }, { "epoch": 0.4021050753724021, "grad_norm": 0.5673984885215759, "learning_rate": 0.0004517589269070659, "loss": 1.0474, "step": 2254 }, { "epoch": 0.4022834715904023, "grad_norm": 0.4788002669811249, "learning_rate": 0.0004517175470231044, "loss": 0.8327, "step": 2255 }, { "epoch": 0.4024618678084025, "grad_norm": 0.549028217792511, "learning_rate": 0.0004516761512963337, "loss": 1.1673, "step": 2256 }, { "epoch": 0.40264026402640263, "grad_norm": 0.7566819787025452, "learning_rate": 0.0004516347397300047, "loss": 1.103, "step": 2257 }, { "epoch": 0.4028186602444028, "grad_norm": 0.5571449995040894, "learning_rate": 0.00045159331232737, "loss": 1.4132, "step": 2258 }, { "epoch": 0.40299705646240297, "grad_norm": 0.47221845388412476, "learning_rate": 0.00045155186909168345, "loss": 0.8425, "step": 2259 }, { "epoch": 0.40317545268040317, "grad_norm": 0.956231415271759, "learning_rate": 0.0004515104100261997, "loss": 1.0242, "step": 2260 }, { "epoch": 0.40335384889840337, "grad_norm": 0.5245352387428284, "learning_rate": 0.0004514689351341751, "loss": 1.2424, "step": 2261 }, { "epoch": 0.4035322451164035, "grad_norm": 0.5289427638053894, "learning_rate": 0.00045142744441886706, "loss": 0.9463, "step": 2262 }, { "epoch": 0.4037106413344037, "grad_norm": 0.5312831997871399, "learning_rate": 0.00045138593788353424, "loss": 1.0078, "step": 2263 }, { "epoch": 0.4038890375524039, "grad_norm": 0.5654627680778503, "learning_rate": 0.00045134441553143647, "loss": 1.2575, "step": 2264 }, { "epoch": 0.40406743377040405, "grad_norm": 0.5695852637290955, "learning_rate": 0.00045130287736583493, "loss": 1.0575, "step": 2265 }, { "epoch": 0.40424582998840425, "grad_norm": 0.5789728164672852, "learning_rate": 0.00045126132338999203, "loss": 1.0646, "step": 2266 }, { "epoch": 0.40442422620640445, "grad_norm": 0.526038408279419, "learning_rate": 0.0004512197536071715, "loss": 1.035, "step": 2267 }, { "epoch": 0.4046026224244046, "grad_norm": 0.537186324596405, "learning_rate": 0.000451178168020638, "loss": 1.0141, "step": 2268 }, { "epoch": 0.4047810186424048, "grad_norm": 0.5389789342880249, "learning_rate": 0.0004511365666336578, "loss": 1.0923, "step": 2269 }, { "epoch": 0.404959414860405, "grad_norm": 0.5258745551109314, "learning_rate": 0.00045109494944949827, "loss": 1.277, "step": 2270 }, { "epoch": 0.40513781107840513, "grad_norm": 0.5116296410560608, "learning_rate": 0.0004510533164714278, "loss": 0.9776, "step": 2271 }, { "epoch": 0.40531620729640533, "grad_norm": 0.4564659297466278, "learning_rate": 0.0004510116677027165, "loss": 1.0638, "step": 2272 }, { "epoch": 0.40549460351440547, "grad_norm": 0.5655273199081421, "learning_rate": 0.00045097000314663527, "loss": 1.0859, "step": 2273 }, { "epoch": 0.40567299973240567, "grad_norm": 0.5012032389640808, "learning_rate": 0.0004509283228064565, "loss": 0.9221, "step": 2274 }, { "epoch": 0.40585139595040587, "grad_norm": 0.48415374755859375, "learning_rate": 0.00045088662668545375, "loss": 1.0236, "step": 2275 }, { "epoch": 0.406029792168406, "grad_norm": 0.47426506876945496, "learning_rate": 0.00045084491478690177, "loss": 1.0458, "step": 2276 }, { "epoch": 0.4062081883864062, "grad_norm": 0.4942657947540283, "learning_rate": 0.0004508031871140765, "loss": 1.1705, "step": 2277 }, { "epoch": 0.4063865846044064, "grad_norm": 0.5193724036216736, "learning_rate": 0.0004507614436702555, "loss": 1.0264, "step": 2278 }, { "epoch": 0.40656498082240655, "grad_norm": 0.4963577091693878, "learning_rate": 0.000450719684458717, "loss": 0.8334, "step": 2279 }, { "epoch": 0.40674337704040675, "grad_norm": 1.6158602237701416, "learning_rate": 0.0004506779094827409, "loss": 1.128, "step": 2280 }, { "epoch": 0.40692177325840695, "grad_norm": 1.3288167715072632, "learning_rate": 0.00045063611874560815, "loss": 1.006, "step": 2281 }, { "epoch": 0.4071001694764071, "grad_norm": 0.5889169573783875, "learning_rate": 0.000450594312250601, "loss": 1.0914, "step": 2282 }, { "epoch": 0.4072785656944073, "grad_norm": 0.562699019908905, "learning_rate": 0.00045055249000100283, "loss": 1.0634, "step": 2283 }, { "epoch": 0.40745696191240743, "grad_norm": 0.5045656561851501, "learning_rate": 0.00045051065200009844, "loss": 1.0903, "step": 2284 }, { "epoch": 0.40763535813040763, "grad_norm": 0.5693420767784119, "learning_rate": 0.0004504687982511737, "loss": 1.0265, "step": 2285 }, { "epoch": 0.40781375434840783, "grad_norm": 0.556315004825592, "learning_rate": 0.00045042692875751585, "loss": 1.0908, "step": 2286 }, { "epoch": 0.407992150566408, "grad_norm": 1.0736392736434937, "learning_rate": 0.00045038504352241324, "loss": 1.2133, "step": 2287 }, { "epoch": 0.4081705467844082, "grad_norm": 0.6816602349281311, "learning_rate": 0.00045034314254915555, "loss": 0.9846, "step": 2288 }, { "epoch": 0.40834894300240837, "grad_norm": 0.6575304865837097, "learning_rate": 0.0004503012258410336, "loss": 1.014, "step": 2289 }, { "epoch": 0.4085273392204085, "grad_norm": 4.394046306610107, "learning_rate": 0.00045025929340133963, "loss": 1.0483, "step": 2290 }, { "epoch": 0.4087057354384087, "grad_norm": 1.7720059156417847, "learning_rate": 0.000450217345233367, "loss": 0.9815, "step": 2291 }, { "epoch": 0.4088841316564089, "grad_norm": 0.8023939728736877, "learning_rate": 0.00045017538134041013, "loss": 0.9606, "step": 2292 }, { "epoch": 0.40906252787440905, "grad_norm": 0.7400693893432617, "learning_rate": 0.000450133401725765, "loss": 1.2293, "step": 2293 }, { "epoch": 0.40924092409240925, "grad_norm": 0.6982590556144714, "learning_rate": 0.0004500914063927286, "loss": 1.056, "step": 2294 }, { "epoch": 0.4094193203104094, "grad_norm": 0.7190932035446167, "learning_rate": 0.00045004939534459923, "loss": 1.333, "step": 2295 }, { "epoch": 0.4095977165284096, "grad_norm": 0.5682306885719299, "learning_rate": 0.0004500073685846765, "loss": 1.0389, "step": 2296 }, { "epoch": 0.4097761127464098, "grad_norm": 0.5392178297042847, "learning_rate": 0.0004499653261162611, "loss": 0.8848, "step": 2297 }, { "epoch": 0.40995450896440994, "grad_norm": 0.7155362963676453, "learning_rate": 0.0004499232679426549, "loss": 1.0928, "step": 2298 }, { "epoch": 0.41013290518241013, "grad_norm": 0.5469278693199158, "learning_rate": 0.00044988119406716144, "loss": 1.0405, "step": 2299 }, { "epoch": 0.41031130140041033, "grad_norm": 0.5402587056159973, "learning_rate": 0.000449839104493085, "loss": 0.8677, "step": 2300 }, { "epoch": 0.4104896976184105, "grad_norm": 0.5472204089164734, "learning_rate": 0.0004497969992237312, "loss": 0.8406, "step": 2301 }, { "epoch": 0.4106680938364107, "grad_norm": 0.529851496219635, "learning_rate": 0.00044975487826240715, "loss": 1.058, "step": 2302 }, { "epoch": 0.4108464900544109, "grad_norm": 0.5373932719230652, "learning_rate": 0.00044971274161242084, "loss": 1.0915, "step": 2303 }, { "epoch": 0.411024886272411, "grad_norm": 0.5543463826179504, "learning_rate": 0.0004496705892770818, "loss": 1.0177, "step": 2304 }, { "epoch": 0.4112032824904112, "grad_norm": 0.5836469531059265, "learning_rate": 0.0004496284212597006, "loss": 0.9446, "step": 2305 }, { "epoch": 0.41138167870841136, "grad_norm": 0.6079695820808411, "learning_rate": 0.00044958623756358905, "loss": 0.9742, "step": 2306 }, { "epoch": 0.41156007492641156, "grad_norm": 0.4826619625091553, "learning_rate": 0.00044954403819206037, "loss": 1.0292, "step": 2307 }, { "epoch": 0.41173847114441176, "grad_norm": 0.6502671241760254, "learning_rate": 0.00044950182314842875, "loss": 1.0641, "step": 2308 }, { "epoch": 0.4119168673624119, "grad_norm": 0.4860910475254059, "learning_rate": 0.0004494595924360098, "loss": 0.9264, "step": 2309 }, { "epoch": 0.4120952635804121, "grad_norm": 0.6258044838905334, "learning_rate": 0.00044941734605812033, "loss": 0.9347, "step": 2310 }, { "epoch": 0.4122736597984123, "grad_norm": 0.5750169157981873, "learning_rate": 0.00044937508401807826, "loss": 1.2065, "step": 2311 }, { "epoch": 0.41245205601641244, "grad_norm": 0.560260534286499, "learning_rate": 0.0004493328063192029, "loss": 1.2164, "step": 2312 }, { "epoch": 0.41263045223441264, "grad_norm": 0.5267819166183472, "learning_rate": 0.0004492905129648147, "loss": 1.1227, "step": 2313 }, { "epoch": 0.41280884845241284, "grad_norm": 0.5849962830543518, "learning_rate": 0.0004492482039582354, "loss": 1.2503, "step": 2314 }, { "epoch": 0.412987244670413, "grad_norm": 0.5180212259292603, "learning_rate": 0.00044920587930278796, "loss": 0.92, "step": 2315 }, { "epoch": 0.4131656408884132, "grad_norm": 0.4263732135295868, "learning_rate": 0.0004491635390017964, "loss": 0.8326, "step": 2316 }, { "epoch": 0.4133440371064133, "grad_norm": 0.5409104824066162, "learning_rate": 0.0004491211830585862, "loss": 1.1766, "step": 2317 }, { "epoch": 0.4135224333244135, "grad_norm": 0.5557611584663391, "learning_rate": 0.000449078811476484, "loss": 1.0369, "step": 2318 }, { "epoch": 0.4137008295424137, "grad_norm": 0.5291704535484314, "learning_rate": 0.0004490364242588176, "loss": 1.024, "step": 2319 }, { "epoch": 0.41387922576041386, "grad_norm": 0.4819130301475525, "learning_rate": 0.0004489940214089161, "loss": 1.0032, "step": 2320 }, { "epoch": 0.41405762197841406, "grad_norm": 0.5592676401138306, "learning_rate": 0.0004489516029301098, "loss": 1.1959, "step": 2321 }, { "epoch": 0.41423601819641426, "grad_norm": 0.612910270690918, "learning_rate": 0.0004489091688257303, "loss": 1.1389, "step": 2322 }, { "epoch": 0.4144144144144144, "grad_norm": 0.47932949662208557, "learning_rate": 0.00044886671909911014, "loss": 0.9523, "step": 2323 }, { "epoch": 0.4145928106324146, "grad_norm": 0.49534985423088074, "learning_rate": 0.0004488242537535835, "loss": 1.1314, "step": 2324 }, { "epoch": 0.4147712068504148, "grad_norm": 0.4879145920276642, "learning_rate": 0.00044878177279248553, "loss": 1.0072, "step": 2325 }, { "epoch": 0.41494960306841494, "grad_norm": 0.6173017621040344, "learning_rate": 0.00044873927621915267, "loss": 1.3328, "step": 2326 }, { "epoch": 0.41512799928641514, "grad_norm": 0.4791456162929535, "learning_rate": 0.00044869676403692254, "loss": 1.0026, "step": 2327 }, { "epoch": 0.4153063955044153, "grad_norm": 0.4833122491836548, "learning_rate": 0.0004486542362491341, "loss": 0.9301, "step": 2328 }, { "epoch": 0.4154847917224155, "grad_norm": 0.6367433667182922, "learning_rate": 0.00044861169285912746, "loss": 1.2782, "step": 2329 }, { "epoch": 0.4156631879404157, "grad_norm": 0.47810354828834534, "learning_rate": 0.0004485691338702439, "loss": 0.8764, "step": 2330 }, { "epoch": 0.4158415841584158, "grad_norm": 0.5335472226142883, "learning_rate": 0.000448526559285826, "loss": 1.033, "step": 2331 }, { "epoch": 0.416019980376416, "grad_norm": 0.5508213043212891, "learning_rate": 0.00044848396910921763, "loss": 1.2068, "step": 2332 }, { "epoch": 0.4161983765944162, "grad_norm": 0.5242919325828552, "learning_rate": 0.00044844136334376366, "loss": 1.1176, "step": 2333 }, { "epoch": 0.41637677281241636, "grad_norm": 0.49891209602355957, "learning_rate": 0.0004483987419928104, "loss": 0.8453, "step": 2334 }, { "epoch": 0.41655516903041656, "grad_norm": 0.4879932999610901, "learning_rate": 0.0004483561050597054, "loss": 0.8443, "step": 2335 }, { "epoch": 0.41673356524841676, "grad_norm": 0.49955081939697266, "learning_rate": 0.00044831345254779724, "loss": 0.9135, "step": 2336 }, { "epoch": 0.4169119614664169, "grad_norm": 0.5931764841079712, "learning_rate": 0.0004482707844604359, "loss": 0.8997, "step": 2337 }, { "epoch": 0.4170903576844171, "grad_norm": 0.882546603679657, "learning_rate": 0.00044822810080097245, "loss": 1.1035, "step": 2338 }, { "epoch": 0.41726875390241724, "grad_norm": 0.5271819233894348, "learning_rate": 0.00044818540157275924, "loss": 1.0151, "step": 2339 }, { "epoch": 0.41744715012041744, "grad_norm": 0.5230399370193481, "learning_rate": 0.00044814268677914983, "loss": 1.21, "step": 2340 }, { "epoch": 0.41762554633841764, "grad_norm": 0.5473104119300842, "learning_rate": 0.0004480999564234991, "loss": 1.2911, "step": 2341 }, { "epoch": 0.4178039425564178, "grad_norm": 0.5277518033981323, "learning_rate": 0.0004480572105091631, "loss": 1.1298, "step": 2342 }, { "epoch": 0.417982338774418, "grad_norm": 1.1111489534378052, "learning_rate": 0.00044801444903949894, "loss": 1.1337, "step": 2343 }, { "epoch": 0.4181607349924182, "grad_norm": 0.5263912677764893, "learning_rate": 0.00044797167201786526, "loss": 0.9498, "step": 2344 }, { "epoch": 0.4183391312104183, "grad_norm": 0.4941869378089905, "learning_rate": 0.00044792887944762155, "loss": 0.9439, "step": 2345 }, { "epoch": 0.4185175274284185, "grad_norm": 0.5406994223594666, "learning_rate": 0.0004478860713321289, "loss": 1.0799, "step": 2346 }, { "epoch": 0.4186959236464187, "grad_norm": 0.4971858263015747, "learning_rate": 0.0004478432476747493, "loss": 0.909, "step": 2347 }, { "epoch": 0.41887431986441886, "grad_norm": 0.5575050115585327, "learning_rate": 0.0004478004084788462, "loss": 0.8755, "step": 2348 }, { "epoch": 0.41905271608241906, "grad_norm": 0.6695868372917175, "learning_rate": 0.00044775755374778413, "loss": 1.1526, "step": 2349 }, { "epoch": 0.4192311123004192, "grad_norm": 0.5547947883605957, "learning_rate": 0.00044771468348492896, "loss": 1.1636, "step": 2350 }, { "epoch": 0.4194095085184194, "grad_norm": 1.2415190935134888, "learning_rate": 0.00044767179769364754, "loss": 0.9455, "step": 2351 }, { "epoch": 0.4195879047364196, "grad_norm": 0.5791499614715576, "learning_rate": 0.0004476288963773082, "loss": 0.9658, "step": 2352 }, { "epoch": 0.41976630095441975, "grad_norm": 0.576765239238739, "learning_rate": 0.00044758597953928044, "loss": 1.0575, "step": 2353 }, { "epoch": 0.41994469717241995, "grad_norm": 0.5667539238929749, "learning_rate": 0.0004475430471829348, "loss": 1.1187, "step": 2354 }, { "epoch": 0.42012309339042014, "grad_norm": 0.5511680245399475, "learning_rate": 0.00044750009931164336, "loss": 0.9857, "step": 2355 }, { "epoch": 0.4203014896084203, "grad_norm": 2.5863170623779297, "learning_rate": 0.00044745713592877904, "loss": 0.9495, "step": 2356 }, { "epoch": 0.4204798858264205, "grad_norm": 0.5761575102806091, "learning_rate": 0.00044741415703771615, "loss": 0.8793, "step": 2357 }, { "epoch": 0.4206582820444207, "grad_norm": 2.08255672454834, "learning_rate": 0.0004473711626418304, "loss": 1.1166, "step": 2358 }, { "epoch": 0.4208366782624208, "grad_norm": 1.4676316976547241, "learning_rate": 0.00044732815274449856, "loss": 1.1489, "step": 2359 }, { "epoch": 0.421015074480421, "grad_norm": 0.5451486706733704, "learning_rate": 0.00044728512734909845, "loss": 0.9492, "step": 2360 }, { "epoch": 0.42119347069842117, "grad_norm": 0.650875985622406, "learning_rate": 0.0004472420864590093, "loss": 1.0389, "step": 2361 }, { "epoch": 0.42137186691642137, "grad_norm": 0.5705146193504333, "learning_rate": 0.00044719903007761153, "loss": 0.9989, "step": 2362 }, { "epoch": 0.42155026313442157, "grad_norm": 0.4973379671573639, "learning_rate": 0.00044715595820828694, "loss": 1.0576, "step": 2363 }, { "epoch": 0.4217286593524217, "grad_norm": 0.5207223892211914, "learning_rate": 0.0004471128708544181, "loss": 1.2406, "step": 2364 }, { "epoch": 0.4219070555704219, "grad_norm": 0.5396470427513123, "learning_rate": 0.00044706976801938927, "loss": 1.1947, "step": 2365 }, { "epoch": 0.4220854517884221, "grad_norm": 0.5786265730857849, "learning_rate": 0.0004470266497065856, "loss": 1.1694, "step": 2366 }, { "epoch": 0.42226384800642225, "grad_norm": 0.5583718419075012, "learning_rate": 0.0004469835159193937, "loss": 1.1314, "step": 2367 }, { "epoch": 0.42244224422442245, "grad_norm": 0.4932156205177307, "learning_rate": 0.0004469403666612013, "loss": 0.8307, "step": 2368 }, { "epoch": 0.42262064044242265, "grad_norm": 0.48325487971305847, "learning_rate": 0.0004468972019353972, "loss": 1.0107, "step": 2369 }, { "epoch": 0.4227990366604228, "grad_norm": 0.583512008190155, "learning_rate": 0.0004468540217453715, "loss": 0.897, "step": 2370 }, { "epoch": 0.422977432878423, "grad_norm": 0.5151441097259521, "learning_rate": 0.0004468108260945157, "loss": 0.955, "step": 2371 }, { "epoch": 0.42315582909642313, "grad_norm": 0.5649638772010803, "learning_rate": 0.00044676761498622236, "loss": 1.0448, "step": 2372 }, { "epoch": 0.42333422531442333, "grad_norm": 0.7743201851844788, "learning_rate": 0.0004467243884238852, "loss": 1.0063, "step": 2373 }, { "epoch": 0.4235126215324235, "grad_norm": 0.535677433013916, "learning_rate": 0.0004466811464108992, "loss": 1.2873, "step": 2374 }, { "epoch": 0.42369101775042367, "grad_norm": 0.9543548822402954, "learning_rate": 0.00044663788895066064, "loss": 0.9269, "step": 2375 }, { "epoch": 0.42386941396842387, "grad_norm": 0.5472463965415955, "learning_rate": 0.00044659461604656687, "loss": 1.1469, "step": 2376 }, { "epoch": 0.42404781018642407, "grad_norm": 0.5283387303352356, "learning_rate": 0.00044655132770201657, "loss": 0.9037, "step": 2377 }, { "epoch": 0.4242262064044242, "grad_norm": 0.8139501810073853, "learning_rate": 0.00044650802392040957, "loss": 0.9288, "step": 2378 }, { "epoch": 0.4244046026224244, "grad_norm": 0.4796660542488098, "learning_rate": 0.0004464647047051469, "loss": 1.0693, "step": 2379 }, { "epoch": 0.4245829988404246, "grad_norm": 0.648838222026825, "learning_rate": 0.0004464213700596309, "loss": 1.3307, "step": 2380 }, { "epoch": 0.42476139505842475, "grad_norm": 0.5429944396018982, "learning_rate": 0.0004463780199872651, "loss": 1.1117, "step": 2381 }, { "epoch": 0.42493979127642495, "grad_norm": 0.47701987624168396, "learning_rate": 0.000446334654491454, "loss": 0.9715, "step": 2382 }, { "epoch": 0.4251181874944251, "grad_norm": 0.479390412569046, "learning_rate": 0.00044629127357560366, "loss": 0.7807, "step": 2383 }, { "epoch": 0.4252965837124253, "grad_norm": 0.5513942241668701, "learning_rate": 0.00044624787724312123, "loss": 0.945, "step": 2384 }, { "epoch": 0.4254749799304255, "grad_norm": 0.5833590626716614, "learning_rate": 0.0004462044654974149, "loss": 1.0181, "step": 2385 }, { "epoch": 0.42565337614842563, "grad_norm": 0.5582590699195862, "learning_rate": 0.00044616103834189426, "loss": 1.2099, "step": 2386 }, { "epoch": 0.42583177236642583, "grad_norm": 0.7029179930686951, "learning_rate": 0.0004461175957799701, "loss": 1.0159, "step": 2387 }, { "epoch": 0.42601016858442603, "grad_norm": 0.6166037321090698, "learning_rate": 0.0004460741378150544, "loss": 1.157, "step": 2388 }, { "epoch": 0.4261885648024262, "grad_norm": 0.4319639801979065, "learning_rate": 0.0004460306644505603, "loss": 1.0279, "step": 2389 }, { "epoch": 0.42636696102042637, "grad_norm": 0.5709377527236938, "learning_rate": 0.00044598717568990214, "loss": 1.1857, "step": 2390 }, { "epoch": 0.42654535723842657, "grad_norm": 0.5123762488365173, "learning_rate": 0.00044594367153649554, "loss": 0.8416, "step": 2391 }, { "epoch": 0.4267237534564267, "grad_norm": 0.5814945101737976, "learning_rate": 0.0004459001519937573, "loss": 0.8822, "step": 2392 }, { "epoch": 0.4269021496744269, "grad_norm": 0.5209176540374756, "learning_rate": 0.0004458566170651055, "loss": 0.8742, "step": 2393 }, { "epoch": 0.42708054589242705, "grad_norm": 0.5272184610366821, "learning_rate": 0.00044581306675395917, "loss": 0.9116, "step": 2394 }, { "epoch": 0.42725894211042725, "grad_norm": 0.6671530604362488, "learning_rate": 0.00044576950106373894, "loss": 1.5091, "step": 2395 }, { "epoch": 0.42743733832842745, "grad_norm": 0.5785043835639954, "learning_rate": 0.00044572591999786625, "loss": 0.9558, "step": 2396 }, { "epoch": 0.4276157345464276, "grad_norm": 0.5078967213630676, "learning_rate": 0.0004456823235597641, "loss": 0.9042, "step": 2397 }, { "epoch": 0.4277941307644278, "grad_norm": 0.5760866403579712, "learning_rate": 0.0004456387117528564, "loss": 1.1446, "step": 2398 }, { "epoch": 0.427972526982428, "grad_norm": 0.5377951264381409, "learning_rate": 0.00044559508458056853, "loss": 1.185, "step": 2399 }, { "epoch": 0.42815092320042814, "grad_norm": 0.5057874917984009, "learning_rate": 0.0004455514420463269, "loss": 1.0324, "step": 2400 }, { "epoch": 0.42832931941842833, "grad_norm": 0.45790576934814453, "learning_rate": 0.0004455077841535591, "loss": 1.1008, "step": 2401 }, { "epoch": 0.42850771563642853, "grad_norm": 0.5488193035125732, "learning_rate": 0.0004454641109056941, "loss": 1.0925, "step": 2402 }, { "epoch": 0.4286861118544287, "grad_norm": 0.5256093740463257, "learning_rate": 0.0004454204223061619, "loss": 1.1341, "step": 2403 }, { "epoch": 0.4288645080724289, "grad_norm": 0.4891919493675232, "learning_rate": 0.00044537671835839386, "loss": 1.0355, "step": 2404 }, { "epoch": 0.429042904290429, "grad_norm": 0.4800609052181244, "learning_rate": 0.0004453329990658225, "loss": 0.9841, "step": 2405 }, { "epoch": 0.4292213005084292, "grad_norm": 0.5242645740509033, "learning_rate": 0.00044528926443188137, "loss": 0.9167, "step": 2406 }, { "epoch": 0.4293996967264294, "grad_norm": 0.7704287767410278, "learning_rate": 0.0004452455144600055, "loss": 1.1246, "step": 2407 }, { "epoch": 0.42957809294442956, "grad_norm": 0.5126969814300537, "learning_rate": 0.0004452017491536309, "loss": 1.1712, "step": 2408 }, { "epoch": 0.42975648916242976, "grad_norm": 0.5316195487976074, "learning_rate": 0.00044515796851619493, "loss": 1.017, "step": 2409 }, { "epoch": 0.42993488538042995, "grad_norm": 0.5696223974227905, "learning_rate": 0.0004451141725511362, "loss": 1.0804, "step": 2410 }, { "epoch": 0.4301132815984301, "grad_norm": 0.45580875873565674, "learning_rate": 0.00044507036126189414, "loss": 0.9472, "step": 2411 }, { "epoch": 0.4302916778164303, "grad_norm": 0.579031765460968, "learning_rate": 0.00044502653465191, "loss": 1.0372, "step": 2412 }, { "epoch": 0.4304700740344305, "grad_norm": 0.5286597609519958, "learning_rate": 0.00044498269272462567, "loss": 1.2533, "step": 2413 }, { "epoch": 0.43064847025243064, "grad_norm": 0.6354557275772095, "learning_rate": 0.00044493883548348456, "loss": 1.0672, "step": 2414 }, { "epoch": 0.43082686647043084, "grad_norm": 0.6015318036079407, "learning_rate": 0.0004448949629319313, "loss": 1.1149, "step": 2415 }, { "epoch": 0.431005262688431, "grad_norm": 0.5161615014076233, "learning_rate": 0.0004448510750734114, "loss": 1.1315, "step": 2416 }, { "epoch": 0.4311836589064312, "grad_norm": 0.5163938403129578, "learning_rate": 0.000444807171911372, "loss": 1.0624, "step": 2417 }, { "epoch": 0.4313620551244314, "grad_norm": 0.558363676071167, "learning_rate": 0.00044476325344926113, "loss": 1.3461, "step": 2418 }, { "epoch": 0.4315404513424315, "grad_norm": 0.5142703652381897, "learning_rate": 0.00044471931969052817, "loss": 0.9696, "step": 2419 }, { "epoch": 0.4317188475604317, "grad_norm": 0.5983433127403259, "learning_rate": 0.00044467537063862353, "loss": 1.0887, "step": 2420 }, { "epoch": 0.4318972437784319, "grad_norm": 0.5289214253425598, "learning_rate": 0.00044463140629699916, "loss": 1.2276, "step": 2421 }, { "epoch": 0.43207563999643206, "grad_norm": 0.5035809874534607, "learning_rate": 0.00044458742666910785, "loss": 1.048, "step": 2422 }, { "epoch": 0.43225403621443226, "grad_norm": 0.5544365048408508, "learning_rate": 0.0004445434317584038, "loss": 1.0358, "step": 2423 }, { "epoch": 0.43243243243243246, "grad_norm": 0.4954850673675537, "learning_rate": 0.00044449942156834236, "loss": 0.9533, "step": 2424 }, { "epoch": 0.4326108286504326, "grad_norm": 0.5363656282424927, "learning_rate": 0.0004444553961023801, "loss": 1.0444, "step": 2425 }, { "epoch": 0.4327892248684328, "grad_norm": 0.4978641867637634, "learning_rate": 0.00044441135536397455, "loss": 1.0043, "step": 2426 }, { "epoch": 0.43296762108643294, "grad_norm": 0.536332905292511, "learning_rate": 0.0004443672993565849, "loss": 0.8316, "step": 2427 }, { "epoch": 0.43314601730443314, "grad_norm": 0.47319361567497253, "learning_rate": 0.0004443232280836712, "loss": 0.9772, "step": 2428 }, { "epoch": 0.43332441352243334, "grad_norm": 0.47262993454933167, "learning_rate": 0.0004442791415486948, "loss": 0.8884, "step": 2429 }, { "epoch": 0.4335028097404335, "grad_norm": 0.5313016176223755, "learning_rate": 0.0004442350397551182, "loss": 1.0347, "step": 2430 }, { "epoch": 0.4336812059584337, "grad_norm": 0.47843775153160095, "learning_rate": 0.0004441909227064052, "loss": 0.9594, "step": 2431 }, { "epoch": 0.4338596021764339, "grad_norm": 0.5179790258407593, "learning_rate": 0.00044414679040602066, "loss": 0.9443, "step": 2432 }, { "epoch": 0.434037998394434, "grad_norm": 0.5228581428527832, "learning_rate": 0.0004441026428574308, "loss": 0.8851, "step": 2433 }, { "epoch": 0.4342163946124342, "grad_norm": 0.5385475754737854, "learning_rate": 0.0004440584800641029, "loss": 1.1273, "step": 2434 }, { "epoch": 0.4343947908304344, "grad_norm": 0.5584160685539246, "learning_rate": 0.0004440143020295054, "loss": 1.1722, "step": 2435 }, { "epoch": 0.43457318704843456, "grad_norm": 0.5520399808883667, "learning_rate": 0.0004439701087571082, "loss": 1.1687, "step": 2436 }, { "epoch": 0.43475158326643476, "grad_norm": 0.5202842950820923, "learning_rate": 0.000443925900250382, "loss": 1.1505, "step": 2437 }, { "epoch": 0.4349299794844349, "grad_norm": 0.49009421467781067, "learning_rate": 0.0004438816765127992, "loss": 1.0714, "step": 2438 }, { "epoch": 0.4351083757024351, "grad_norm": 0.5388837456703186, "learning_rate": 0.0004438374375478329, "loss": 0.9845, "step": 2439 }, { "epoch": 0.4352867719204353, "grad_norm": 0.5120062232017517, "learning_rate": 0.0004437931833589577, "loss": 0.8921, "step": 2440 }, { "epoch": 0.43546516813843544, "grad_norm": 0.4710652530193329, "learning_rate": 0.00044374891394964925, "loss": 0.9668, "step": 2441 }, { "epoch": 0.43564356435643564, "grad_norm": 1.251098871231079, "learning_rate": 0.0004437046293233845, "loss": 1.2214, "step": 2442 }, { "epoch": 0.43582196057443584, "grad_norm": 0.5326117873191833, "learning_rate": 0.00044366032948364145, "loss": 1.2166, "step": 2443 }, { "epoch": 0.436000356792436, "grad_norm": 0.558892011642456, "learning_rate": 0.0004436160144338995, "loss": 1.0941, "step": 2444 }, { "epoch": 0.4361787530104362, "grad_norm": 0.5412200093269348, "learning_rate": 0.0004435716841776391, "loss": 0.9558, "step": 2445 }, { "epoch": 0.4363571492284364, "grad_norm": 0.50641268491745, "learning_rate": 0.00044352733871834193, "loss": 0.87, "step": 2446 }, { "epoch": 0.4365355454464365, "grad_norm": 0.5298489332199097, "learning_rate": 0.0004434829780594909, "loss": 1.0362, "step": 2447 }, { "epoch": 0.4367139416644367, "grad_norm": 0.5438408851623535, "learning_rate": 0.00044343860220456995, "loss": 0.8945, "step": 2448 }, { "epoch": 0.43689233788243687, "grad_norm": 0.5930485725402832, "learning_rate": 0.00044339421115706444, "loss": 1.1371, "step": 2449 }, { "epoch": 0.43707073410043706, "grad_norm": 0.5204629302024841, "learning_rate": 0.00044334980492046085, "loss": 1.047, "step": 2450 }, { "epoch": 0.43724913031843726, "grad_norm": 0.46508142352104187, "learning_rate": 0.0004433053834982468, "loss": 0.9044, "step": 2451 }, { "epoch": 0.4374275265364374, "grad_norm": 0.5729008913040161, "learning_rate": 0.00044326094689391105, "loss": 1.0573, "step": 2452 }, { "epoch": 0.4376059227544376, "grad_norm": 0.5557507276535034, "learning_rate": 0.00044321649511094375, "loss": 1.1433, "step": 2453 }, { "epoch": 0.4377843189724378, "grad_norm": 0.4726797044277191, "learning_rate": 0.00044317202815283605, "loss": 1.0822, "step": 2454 }, { "epoch": 0.43796271519043795, "grad_norm": 0.5183228254318237, "learning_rate": 0.00044312754602308035, "loss": 1.1219, "step": 2455 }, { "epoch": 0.43814111140843814, "grad_norm": 0.5738768577575684, "learning_rate": 0.00044308304872517037, "loss": 1.2338, "step": 2456 }, { "epoch": 0.43831950762643834, "grad_norm": 0.5397445559501648, "learning_rate": 0.0004430385362626008, "loss": 1.1709, "step": 2457 }, { "epoch": 0.4384979038444385, "grad_norm": 0.5338584780693054, "learning_rate": 0.0004429940086388676, "loss": 1.1662, "step": 2458 }, { "epoch": 0.4386763000624387, "grad_norm": 0.5330877304077148, "learning_rate": 0.00044294946585746815, "loss": 0.9068, "step": 2459 }, { "epoch": 0.4388546962804388, "grad_norm": 0.5154666304588318, "learning_rate": 0.0004429049079219006, "loss": 0.8542, "step": 2460 }, { "epoch": 0.439033092498439, "grad_norm": 0.5239487886428833, "learning_rate": 0.00044286033483566456, "loss": 1.1177, "step": 2461 }, { "epoch": 0.4392114887164392, "grad_norm": 1.6719294786453247, "learning_rate": 0.00044281574660226086, "loss": 0.7778, "step": 2462 }, { "epoch": 0.43938988493443937, "grad_norm": 0.515557587146759, "learning_rate": 0.0004427711432251914, "loss": 0.9718, "step": 2463 }, { "epoch": 0.43956828115243957, "grad_norm": 0.51800537109375, "learning_rate": 0.0004427265247079594, "loss": 0.8828, "step": 2464 }, { "epoch": 0.43974667737043976, "grad_norm": 0.7687791585922241, "learning_rate": 0.00044268189105406896, "loss": 1.1033, "step": 2465 }, { "epoch": 0.4399250735884399, "grad_norm": 0.6673977971076965, "learning_rate": 0.00044263724226702573, "loss": 1.1026, "step": 2466 }, { "epoch": 0.4401034698064401, "grad_norm": 1.0776582956314087, "learning_rate": 0.0004425925783503364, "loss": 1.1335, "step": 2467 }, { "epoch": 0.4402818660244403, "grad_norm": 0.5958316326141357, "learning_rate": 0.00044254789930750884, "loss": 0.968, "step": 2468 }, { "epoch": 0.44046026224244045, "grad_norm": 0.5925278663635254, "learning_rate": 0.0004425032051420522, "loss": 1.2383, "step": 2469 }, { "epoch": 0.44063865846044065, "grad_norm": 0.5400102138519287, "learning_rate": 0.00044245849585747656, "loss": 1.0723, "step": 2470 }, { "epoch": 0.4408170546784408, "grad_norm": 0.5745581388473511, "learning_rate": 0.00044241377145729356, "loss": 1.1578, "step": 2471 }, { "epoch": 0.440995450896441, "grad_norm": 0.514392077922821, "learning_rate": 0.00044236903194501566, "loss": 0.9727, "step": 2472 }, { "epoch": 0.4411738471144412, "grad_norm": 1.0300344228744507, "learning_rate": 0.00044232427732415687, "loss": 1.1997, "step": 2473 }, { "epoch": 0.44135224333244133, "grad_norm": 0.45754095911979675, "learning_rate": 0.0004422795075982321, "loss": 0.9891, "step": 2474 }, { "epoch": 0.44153063955044153, "grad_norm": 0.5444067120552063, "learning_rate": 0.0004422347227707575, "loss": 1.0424, "step": 2475 }, { "epoch": 0.4417090357684417, "grad_norm": 2.9430816173553467, "learning_rate": 0.0004421899228452505, "loss": 1.0375, "step": 2476 }, { "epoch": 0.44188743198644187, "grad_norm": 0.570728600025177, "learning_rate": 0.00044214510782522966, "loss": 1.257, "step": 2477 }, { "epoch": 0.44206582820444207, "grad_norm": 0.5782069563865662, "learning_rate": 0.0004421002777142148, "loss": 1.0603, "step": 2478 }, { "epoch": 0.44224422442244227, "grad_norm": 0.505310595035553, "learning_rate": 0.00044205543251572664, "loss": 0.9608, "step": 2479 }, { "epoch": 0.4424226206404424, "grad_norm": 0.5967997312545776, "learning_rate": 0.00044201057223328767, "loss": 1.2066, "step": 2480 }, { "epoch": 0.4426010168584426, "grad_norm": 0.5399075746536255, "learning_rate": 0.00044196569687042085, "loss": 1.0442, "step": 2481 }, { "epoch": 0.4427794130764428, "grad_norm": 0.5512840151786804, "learning_rate": 0.0004419208064306509, "loss": 1.1372, "step": 2482 }, { "epoch": 0.44295780929444295, "grad_norm": 0.540708601474762, "learning_rate": 0.0004418759009175033, "loss": 1.2523, "step": 2483 }, { "epoch": 0.44313620551244315, "grad_norm": 0.5288528800010681, "learning_rate": 0.0004418309803345051, "loss": 1.1503, "step": 2484 }, { "epoch": 0.4433146017304433, "grad_norm": 0.48563152551651, "learning_rate": 0.0004417860446851841, "loss": 0.9179, "step": 2485 }, { "epoch": 0.4434929979484435, "grad_norm": 0.4692551791667938, "learning_rate": 0.00044174109397306983, "loss": 0.9259, "step": 2486 }, { "epoch": 0.4436713941664437, "grad_norm": 0.5046235918998718, "learning_rate": 0.0004416961282016926, "loss": 0.8989, "step": 2487 }, { "epoch": 0.44384979038444383, "grad_norm": 0.5326201915740967, "learning_rate": 0.00044165114737458377, "loss": 1.1311, "step": 2488 }, { "epoch": 0.44402818660244403, "grad_norm": 0.4889252185821533, "learning_rate": 0.00044160615149527643, "loss": 0.9787, "step": 2489 }, { "epoch": 0.44420658282044423, "grad_norm": 0.5168465971946716, "learning_rate": 0.0004415611405673044, "loss": 1.1548, "step": 2490 }, { "epoch": 0.44438497903844437, "grad_norm": 0.49787530303001404, "learning_rate": 0.0004415161145942028, "loss": 1.1097, "step": 2491 }, { "epoch": 0.44456337525644457, "grad_norm": 0.47516435384750366, "learning_rate": 0.000441471073579508, "loss": 1.0759, "step": 2492 }, { "epoch": 0.44474177147444477, "grad_norm": 0.5186420679092407, "learning_rate": 0.0004414260175267574, "loss": 0.9333, "step": 2493 }, { "epoch": 0.4449201676924449, "grad_norm": 0.538212239742279, "learning_rate": 0.0004413809464394899, "loss": 1.0112, "step": 2494 }, { "epoch": 0.4450985639104451, "grad_norm": 0.6391366124153137, "learning_rate": 0.0004413358603212451, "loss": 1.0763, "step": 2495 }, { "epoch": 0.44527696012844525, "grad_norm": 0.49085238575935364, "learning_rate": 0.00044129075917556415, "loss": 0.9748, "step": 2496 }, { "epoch": 0.44545535634644545, "grad_norm": 3.5137956142425537, "learning_rate": 0.00044124564300598943, "loss": 0.9487, "step": 2497 }, { "epoch": 0.44563375256444565, "grad_norm": 0.5351426601409912, "learning_rate": 0.00044120051181606413, "loss": 1.0537, "step": 2498 }, { "epoch": 0.4458121487824458, "grad_norm": 0.5824923515319824, "learning_rate": 0.0004411553656093329, "loss": 0.9824, "step": 2499 }, { "epoch": 0.445990545000446, "grad_norm": 0.5528644323348999, "learning_rate": 0.00044111020438934153, "loss": 1.1647, "step": 2500 }, { "epoch": 0.4461689412184462, "grad_norm": 0.6510967016220093, "learning_rate": 0.0004410650281596369, "loss": 1.101, "step": 2501 }, { "epoch": 0.44634733743644633, "grad_norm": 0.5696078538894653, "learning_rate": 0.00044101983692376723, "loss": 1.0418, "step": 2502 }, { "epoch": 0.44652573365444653, "grad_norm": 0.5028705596923828, "learning_rate": 0.0004409746306852818, "loss": 0.8853, "step": 2503 }, { "epoch": 0.44670412987244673, "grad_norm": 0.5344279408454895, "learning_rate": 0.00044092940944773104, "loss": 1.0267, "step": 2504 }, { "epoch": 0.4468825260904469, "grad_norm": 0.5304036140441895, "learning_rate": 0.00044088417321466657, "loss": 1.0281, "step": 2505 }, { "epoch": 0.4470609223084471, "grad_norm": 0.5835813879966736, "learning_rate": 0.0004408389219896413, "loss": 1.1255, "step": 2506 }, { "epoch": 0.4472393185264472, "grad_norm": 0.48519715666770935, "learning_rate": 0.0004407936557762093, "loss": 1.0046, "step": 2507 }, { "epoch": 0.4474177147444474, "grad_norm": 0.5301933288574219, "learning_rate": 0.0004407483745779256, "loss": 1.0998, "step": 2508 }, { "epoch": 0.4475961109624476, "grad_norm": 0.4589798152446747, "learning_rate": 0.00044070307839834665, "loss": 0.7962, "step": 2509 }, { "epoch": 0.44777450718044776, "grad_norm": 0.496530681848526, "learning_rate": 0.00044065776724103, "loss": 0.7726, "step": 2510 }, { "epoch": 0.44795290339844795, "grad_norm": 0.4880942702293396, "learning_rate": 0.00044061244110953437, "loss": 0.8395, "step": 2511 }, { "epoch": 0.44813129961644815, "grad_norm": 0.5023302435874939, "learning_rate": 0.0004405671000074196, "loss": 0.9507, "step": 2512 }, { "epoch": 0.4483096958344483, "grad_norm": 0.561840832233429, "learning_rate": 0.00044052174393824687, "loss": 1.0694, "step": 2513 }, { "epoch": 0.4484880920524485, "grad_norm": 0.5529274344444275, "learning_rate": 0.00044047637290557835, "loss": 1.221, "step": 2514 }, { "epoch": 0.4486664882704487, "grad_norm": 0.44090861082077026, "learning_rate": 0.00044043098691297746, "loss": 1.0016, "step": 2515 }, { "epoch": 0.44884488448844884, "grad_norm": 0.5670448541641235, "learning_rate": 0.0004403855859640088, "loss": 0.9971, "step": 2516 }, { "epoch": 0.44902328070644904, "grad_norm": 0.45753970742225647, "learning_rate": 0.00044034017006223817, "loss": 0.7955, "step": 2517 }, { "epoch": 0.4492016769244492, "grad_norm": 0.7095730900764465, "learning_rate": 0.0004402947392112324, "loss": 0.7677, "step": 2518 }, { "epoch": 0.4493800731424494, "grad_norm": 0.5540285110473633, "learning_rate": 0.00044024929341455977, "loss": 1.34, "step": 2519 }, { "epoch": 0.4495584693604496, "grad_norm": 0.5628980994224548, "learning_rate": 0.00044020383267578955, "loss": 1.0199, "step": 2520 }, { "epoch": 0.4497368655784497, "grad_norm": 0.5465995073318481, "learning_rate": 0.0004401583569984921, "loss": 1.1564, "step": 2521 }, { "epoch": 0.4499152617964499, "grad_norm": 0.4945712089538574, "learning_rate": 0.00044011286638623916, "loss": 1.0976, "step": 2522 }, { "epoch": 0.4500936580144501, "grad_norm": 0.5624112486839294, "learning_rate": 0.00044006736084260345, "loss": 1.0677, "step": 2523 }, { "epoch": 0.45027205423245026, "grad_norm": 0.5418776869773865, "learning_rate": 0.0004400218403711591, "loss": 1.2596, "step": 2524 }, { "epoch": 0.45045045045045046, "grad_norm": 0.4920550584793091, "learning_rate": 0.0004399763049754811, "loss": 1.0434, "step": 2525 }, { "epoch": 0.45062884666845066, "grad_norm": 0.5869379639625549, "learning_rate": 0.0004399307546591459, "loss": 1.0799, "step": 2526 }, { "epoch": 0.4508072428864508, "grad_norm": 0.619750440120697, "learning_rate": 0.0004398851894257309, "loss": 1.1675, "step": 2527 }, { "epoch": 0.450985639104451, "grad_norm": 0.5530658960342407, "learning_rate": 0.00043983960927881493, "loss": 0.9814, "step": 2528 }, { "epoch": 0.45116403532245114, "grad_norm": 0.5448163151741028, "learning_rate": 0.0004397940142219776, "loss": 1.2051, "step": 2529 }, { "epoch": 0.45134243154045134, "grad_norm": 0.5526171922683716, "learning_rate": 0.00043974840425880027, "loss": 1.2632, "step": 2530 }, { "epoch": 0.45152082775845154, "grad_norm": 0.46508967876434326, "learning_rate": 0.0004397027793928648, "loss": 0.859, "step": 2531 }, { "epoch": 0.4516992239764517, "grad_norm": 0.9926976561546326, "learning_rate": 0.00043965713962775465, "loss": 0.924, "step": 2532 }, { "epoch": 0.4518776201944519, "grad_norm": 0.5205312967300415, "learning_rate": 0.00043961148496705443, "loss": 0.8696, "step": 2533 }, { "epoch": 0.4520560164124521, "grad_norm": 0.5645661354064941, "learning_rate": 0.0004395658154143498, "loss": 1.0412, "step": 2534 }, { "epoch": 0.4522344126304522, "grad_norm": 0.6434807181358337, "learning_rate": 0.00043952013097322754, "loss": 1.1406, "step": 2535 }, { "epoch": 0.4524128088484524, "grad_norm": 0.5284314751625061, "learning_rate": 0.00043947443164727573, "loss": 0.9631, "step": 2536 }, { "epoch": 0.4525912050664526, "grad_norm": 0.5045761466026306, "learning_rate": 0.00043942871744008375, "loss": 0.9317, "step": 2537 }, { "epoch": 0.45276960128445276, "grad_norm": 0.5030120015144348, "learning_rate": 0.00043938298835524166, "loss": 0.8189, "step": 2538 }, { "epoch": 0.45294799750245296, "grad_norm": 0.4574146270751953, "learning_rate": 0.0004393372443963412, "loss": 0.7983, "step": 2539 }, { "epoch": 0.4531263937204531, "grad_norm": 0.5439005494117737, "learning_rate": 0.00043929148556697505, "loss": 1.0398, "step": 2540 }, { "epoch": 0.4533047899384533, "grad_norm": 0.5667563080787659, "learning_rate": 0.0004392457118707371, "loss": 1.1755, "step": 2541 }, { "epoch": 0.4534831861564535, "grad_norm": 0.5080005526542664, "learning_rate": 0.0004391999233112224, "loss": 1.0836, "step": 2542 }, { "epoch": 0.45366158237445364, "grad_norm": 0.4875447750091553, "learning_rate": 0.00043915411989202706, "loss": 0.8928, "step": 2543 }, { "epoch": 0.45383997859245384, "grad_norm": 0.5923610925674438, "learning_rate": 0.0004391083016167486, "loss": 1.0488, "step": 2544 }, { "epoch": 0.45401837481045404, "grad_norm": 0.49262282252311707, "learning_rate": 0.00043906246848898544, "loss": 0.9689, "step": 2545 }, { "epoch": 0.4541967710284542, "grad_norm": 0.4940873086452484, "learning_rate": 0.0004390166205123375, "loss": 1.0168, "step": 2546 }, { "epoch": 0.4543751672464544, "grad_norm": 0.44292840361595154, "learning_rate": 0.00043897075769040543, "loss": 0.6536, "step": 2547 }, { "epoch": 0.4545535634644546, "grad_norm": 0.4958019256591797, "learning_rate": 0.00043892488002679144, "loss": 0.918, "step": 2548 }, { "epoch": 0.4547319596824547, "grad_norm": 8.831953048706055, "learning_rate": 0.0004388789875250986, "loss": 1.2642, "step": 2549 }, { "epoch": 0.4549103559004549, "grad_norm": 0.8993814587593079, "learning_rate": 0.0004388330801889314, "loss": 1.0951, "step": 2550 }, { "epoch": 0.45508875211845506, "grad_norm": 0.47165387868881226, "learning_rate": 0.00043878715802189526, "loss": 0.9744, "step": 2551 }, { "epoch": 0.45526714833645526, "grad_norm": 0.4588290750980377, "learning_rate": 0.0004387412210275971, "loss": 0.7959, "step": 2552 }, { "epoch": 0.45544554455445546, "grad_norm": 0.5076047778129578, "learning_rate": 0.00043869526920964466, "loss": 1.1043, "step": 2553 }, { "epoch": 0.4556239407724556, "grad_norm": 0.5313244462013245, "learning_rate": 0.0004386493025716469, "loss": 1.1507, "step": 2554 }, { "epoch": 0.4558023369904558, "grad_norm": 0.7201479077339172, "learning_rate": 0.00043860332111721407, "loss": 1.0751, "step": 2555 }, { "epoch": 0.455980733208456, "grad_norm": 0.503410816192627, "learning_rate": 0.0004385573248499576, "loss": 1.0373, "step": 2556 }, { "epoch": 0.45615912942645614, "grad_norm": 0.5491756200790405, "learning_rate": 0.00043851131377349004, "loss": 1.0991, "step": 2557 }, { "epoch": 0.45633752564445634, "grad_norm": 0.5328734517097473, "learning_rate": 0.0004384652878914249, "loss": 1.0833, "step": 2558 }, { "epoch": 0.45651592186245654, "grad_norm": 0.564434826374054, "learning_rate": 0.00043841924720737724, "loss": 1.1307, "step": 2559 }, { "epoch": 0.4566943180804567, "grad_norm": 0.5131104588508606, "learning_rate": 0.000438373191724963, "loss": 0.9291, "step": 2560 }, { "epoch": 0.4568727142984569, "grad_norm": 0.4851999282836914, "learning_rate": 0.0004383271214477993, "loss": 0.8158, "step": 2561 }, { "epoch": 0.457051110516457, "grad_norm": 0.5580843091011047, "learning_rate": 0.00043828103637950455, "loss": 1.1642, "step": 2562 }, { "epoch": 0.4572295067344572, "grad_norm": 0.5292948484420776, "learning_rate": 0.00043823493652369824, "loss": 1.0218, "step": 2563 }, { "epoch": 0.4574079029524574, "grad_norm": 0.4889855682849884, "learning_rate": 0.000438188821884001, "loss": 0.9337, "step": 2564 }, { "epoch": 0.45758629917045757, "grad_norm": 0.5168807506561279, "learning_rate": 0.0004381426924640346, "loss": 1.0195, "step": 2565 }, { "epoch": 0.45776469538845777, "grad_norm": 0.5251139998435974, "learning_rate": 0.0004380965482674222, "loss": 1.0455, "step": 2566 }, { "epoch": 0.45794309160645796, "grad_norm": 0.49821674823760986, "learning_rate": 0.00043805038929778785, "loss": 0.9241, "step": 2567 }, { "epoch": 0.4581214878244581, "grad_norm": 0.48444879055023193, "learning_rate": 0.0004380042155587568, "loss": 1.0078, "step": 2568 }, { "epoch": 0.4582998840424583, "grad_norm": 0.4438982903957367, "learning_rate": 0.00043795802705395555, "loss": 0.7366, "step": 2569 }, { "epoch": 0.4584782802604585, "grad_norm": 0.48436489701271057, "learning_rate": 0.0004379118237870118, "loss": 0.9042, "step": 2570 }, { "epoch": 0.45865667647845865, "grad_norm": 0.5146360397338867, "learning_rate": 0.0004378656057615542, "loss": 0.9988, "step": 2571 }, { "epoch": 0.45883507269645885, "grad_norm": 0.5639342665672302, "learning_rate": 0.00043781937298121275, "loss": 1.2857, "step": 2572 }, { "epoch": 0.459013468914459, "grad_norm": 0.5131158828735352, "learning_rate": 0.00043777312544961865, "loss": 1.1494, "step": 2573 }, { "epoch": 0.4591918651324592, "grad_norm": 0.5281994342803955, "learning_rate": 0.0004377268631704041, "loss": 1.1338, "step": 2574 }, { "epoch": 0.4593702613504594, "grad_norm": 0.6242533922195435, "learning_rate": 0.0004376805861472024, "loss": 1.1631, "step": 2575 }, { "epoch": 0.45954865756845953, "grad_norm": 0.48754405975341797, "learning_rate": 0.0004376342943836483, "loss": 1.016, "step": 2576 }, { "epoch": 0.4597270537864597, "grad_norm": 0.5026589632034302, "learning_rate": 0.0004375879878833775, "loss": 1.0022, "step": 2577 }, { "epoch": 0.4599054500044599, "grad_norm": 0.4625113606452942, "learning_rate": 0.0004375416666500268, "loss": 0.942, "step": 2578 }, { "epoch": 0.46008384622246007, "grad_norm": 0.552786111831665, "learning_rate": 0.00043749533068723436, "loss": 1.1234, "step": 2579 }, { "epoch": 0.46026224244046027, "grad_norm": 0.5415485501289368, "learning_rate": 0.0004374489799986393, "loss": 0.8123, "step": 2580 }, { "epoch": 0.46044063865846047, "grad_norm": 0.49887794256210327, "learning_rate": 0.000437402614587882, "loss": 1.0032, "step": 2581 }, { "epoch": 0.4606190348764606, "grad_norm": 0.5717129111289978, "learning_rate": 0.00043735623445860397, "loss": 1.2778, "step": 2582 }, { "epoch": 0.4607974310944608, "grad_norm": 0.4855397939682007, "learning_rate": 0.00043730983961444794, "loss": 1.0836, "step": 2583 }, { "epoch": 0.46097582731246095, "grad_norm": 0.46733468770980835, "learning_rate": 0.0004372634300590578, "loss": 0.9161, "step": 2584 }, { "epoch": 0.46115422353046115, "grad_norm": 0.5448064804077148, "learning_rate": 0.0004372170057960783, "loss": 1.0957, "step": 2585 }, { "epoch": 0.46133261974846135, "grad_norm": 0.5003434419631958, "learning_rate": 0.0004371705668291558, "loss": 1.1793, "step": 2586 }, { "epoch": 0.4615110159664615, "grad_norm": 0.5096003413200378, "learning_rate": 0.00043712411316193755, "loss": 1.29, "step": 2587 }, { "epoch": 0.4616894121844617, "grad_norm": 0.5056552290916443, "learning_rate": 0.00043707764479807194, "loss": 1.0382, "step": 2588 }, { "epoch": 0.4618678084024619, "grad_norm": 0.4839148223400116, "learning_rate": 0.0004370311617412086, "loss": 0.9115, "step": 2589 }, { "epoch": 0.46204620462046203, "grad_norm": 0.49606168270111084, "learning_rate": 0.0004369846639949984, "loss": 1.1419, "step": 2590 }, { "epoch": 0.46222460083846223, "grad_norm": 0.4850931763648987, "learning_rate": 0.000436938151563093, "loss": 1.0411, "step": 2591 }, { "epoch": 0.46240299705646243, "grad_norm": 0.5619013905525208, "learning_rate": 0.0004368916244491458, "loss": 1.0051, "step": 2592 }, { "epoch": 0.46258139327446257, "grad_norm": 0.5425794720649719, "learning_rate": 0.00043684508265681065, "loss": 1.3862, "step": 2593 }, { "epoch": 0.46275978949246277, "grad_norm": 0.5217241644859314, "learning_rate": 0.00043679852618974327, "loss": 1.2932, "step": 2594 }, { "epoch": 0.4629381857104629, "grad_norm": 0.5080055594444275, "learning_rate": 0.0004367519550515999, "loss": 1.1496, "step": 2595 }, { "epoch": 0.4631165819284631, "grad_norm": 0.5273397564888, "learning_rate": 0.0004367053692460385, "loss": 1.1263, "step": 2596 }, { "epoch": 0.4632949781464633, "grad_norm": 0.5620000958442688, "learning_rate": 0.0004366587687767176, "loss": 0.9767, "step": 2597 }, { "epoch": 0.46347337436446345, "grad_norm": 0.49073854088783264, "learning_rate": 0.0004366121536472974, "loss": 1.2086, "step": 2598 }, { "epoch": 0.46365177058246365, "grad_norm": 0.5440914034843445, "learning_rate": 0.000436565523861439, "loss": 1.248, "step": 2599 }, { "epoch": 0.46383016680046385, "grad_norm": 0.5432100892066956, "learning_rate": 0.00043651887942280454, "loss": 0.9665, "step": 2600 }, { "epoch": 0.464008563018464, "grad_norm": 0.5001043081283569, "learning_rate": 0.00043647222033505763, "loss": 0.8812, "step": 2601 }, { "epoch": 0.4641869592364642, "grad_norm": 0.4610103964805603, "learning_rate": 0.0004364255466018627, "loss": 0.9032, "step": 2602 }, { "epoch": 0.4643653554544644, "grad_norm": 0.5426071286201477, "learning_rate": 0.0004363788582268857, "loss": 1.1128, "step": 2603 }, { "epoch": 0.46454375167246453, "grad_norm": 0.5425922274589539, "learning_rate": 0.00043633215521379326, "loss": 1.134, "step": 2604 }, { "epoch": 0.46472214789046473, "grad_norm": 0.5755911469459534, "learning_rate": 0.0004362854375662536, "loss": 1.345, "step": 2605 }, { "epoch": 0.4649005441084649, "grad_norm": 0.5014548301696777, "learning_rate": 0.0004362387052879358, "loss": 1.0121, "step": 2606 }, { "epoch": 0.4650789403264651, "grad_norm": 0.5047395825386047, "learning_rate": 0.00043619195838251023, "loss": 1.0983, "step": 2607 }, { "epoch": 0.46525733654446527, "grad_norm": 0.5320248603820801, "learning_rate": 0.0004361451968536484, "loss": 0.9432, "step": 2608 }, { "epoch": 0.4654357327624654, "grad_norm": 0.5131525993347168, "learning_rate": 0.0004360984207050229, "loss": 0.9492, "step": 2609 }, { "epoch": 0.4656141289804656, "grad_norm": 0.57234787940979, "learning_rate": 0.0004360516299403075, "loss": 1.3292, "step": 2610 }, { "epoch": 0.4657925251984658, "grad_norm": 0.502707302570343, "learning_rate": 0.0004360048245631772, "loss": 1.0897, "step": 2611 }, { "epoch": 0.46597092141646596, "grad_norm": 0.5083783268928528, "learning_rate": 0.00043595800457730795, "loss": 0.9891, "step": 2612 }, { "epoch": 0.46614931763446615, "grad_norm": 0.5372927188873291, "learning_rate": 0.00043591116998637717, "loss": 1.1991, "step": 2613 }, { "epoch": 0.46632771385246635, "grad_norm": 0.46498778462409973, "learning_rate": 0.00043586432079406297, "loss": 0.8831, "step": 2614 }, { "epoch": 0.4665061100704665, "grad_norm": 0.5247387886047363, "learning_rate": 0.0004358174570040451, "loss": 1.2432, "step": 2615 }, { "epoch": 0.4666845062884667, "grad_norm": 0.5532581210136414, "learning_rate": 0.000435770578620004, "loss": 1.1609, "step": 2616 }, { "epoch": 0.46686290250646684, "grad_norm": 0.5167216062545776, "learning_rate": 0.0004357236856456217, "loss": 1.0657, "step": 2617 }, { "epoch": 0.46704129872446704, "grad_norm": 0.48156222701072693, "learning_rate": 0.000435676778084581, "loss": 1.1393, "step": 2618 }, { "epoch": 0.46721969494246723, "grad_norm": 0.5826752185821533, "learning_rate": 0.0004356298559405661, "loss": 1.1713, "step": 2619 }, { "epoch": 0.4673980911604674, "grad_norm": 0.5204340815544128, "learning_rate": 0.00043558291921726215, "loss": 1.1773, "step": 2620 }, { "epoch": 0.4675764873784676, "grad_norm": 0.5820581316947937, "learning_rate": 0.00043553596791835557, "loss": 0.9531, "step": 2621 }, { "epoch": 0.4677548835964678, "grad_norm": 0.535647988319397, "learning_rate": 0.000435489002047534, "loss": 0.8793, "step": 2622 }, { "epoch": 0.4679332798144679, "grad_norm": 0.4820907413959503, "learning_rate": 0.000435442021608486, "loss": 1.0139, "step": 2623 }, { "epoch": 0.4681116760324681, "grad_norm": 0.5509294867515564, "learning_rate": 0.0004353950266049014, "loss": 1.0987, "step": 2624 }, { "epoch": 0.4682900722504683, "grad_norm": 0.554036021232605, "learning_rate": 0.00043534801704047115, "loss": 1.0175, "step": 2625 }, { "epoch": 0.46846846846846846, "grad_norm": 0.5267329812049866, "learning_rate": 0.0004353009929188875, "loss": 1.0262, "step": 2626 }, { "epoch": 0.46864686468646866, "grad_norm": 0.5234902501106262, "learning_rate": 0.0004352539542438436, "loss": 0.8124, "step": 2627 }, { "epoch": 0.4688252609044688, "grad_norm": 0.6302539706230164, "learning_rate": 0.00043520690101903374, "loss": 1.3212, "step": 2628 }, { "epoch": 0.469003657122469, "grad_norm": 0.5413920879364014, "learning_rate": 0.00043515983324815365, "loss": 1.1461, "step": 2629 }, { "epoch": 0.4691820533404692, "grad_norm": 0.4936441481113434, "learning_rate": 0.0004351127509349, "loss": 0.9603, "step": 2630 }, { "epoch": 0.46936044955846934, "grad_norm": 0.4682817757129669, "learning_rate": 0.00043506565408297045, "loss": 0.8536, "step": 2631 }, { "epoch": 0.46953884577646954, "grad_norm": 0.6088268160820007, "learning_rate": 0.0004350185426960641, "loss": 1.1274, "step": 2632 }, { "epoch": 0.46971724199446974, "grad_norm": 0.5248781442642212, "learning_rate": 0.00043497141677788107, "loss": 1.1042, "step": 2633 }, { "epoch": 0.4698956382124699, "grad_norm": 0.48056891560554504, "learning_rate": 0.0004349242763321225, "loss": 1.0877, "step": 2634 }, { "epoch": 0.4700740344304701, "grad_norm": 0.48219484090805054, "learning_rate": 0.0004348771213624909, "loss": 1.0188, "step": 2635 }, { "epoch": 0.4702524306484703, "grad_norm": 0.5074059963226318, "learning_rate": 0.0004348299518726897, "loss": 0.8514, "step": 2636 }, { "epoch": 0.4704308268664704, "grad_norm": 0.48974233865737915, "learning_rate": 0.00043478276786642364, "loss": 1.0564, "step": 2637 }, { "epoch": 0.4706092230844706, "grad_norm": 0.45620694756507874, "learning_rate": 0.0004347355693473985, "loss": 0.8984, "step": 2638 }, { "epoch": 0.47078761930247076, "grad_norm": 0.5297985672950745, "learning_rate": 0.00043468835631932124, "loss": 0.9644, "step": 2639 }, { "epoch": 0.47096601552047096, "grad_norm": 0.5142080187797546, "learning_rate": 0.0004346411287859, "loss": 1.0379, "step": 2640 }, { "epoch": 0.47114441173847116, "grad_norm": 0.5011320114135742, "learning_rate": 0.00043459388675084386, "loss": 1.1448, "step": 2641 }, { "epoch": 0.4713228079564713, "grad_norm": 0.5459404587745667, "learning_rate": 0.00043454663021786337, "loss": 1.0494, "step": 2642 }, { "epoch": 0.4715012041744715, "grad_norm": 0.4870483875274658, "learning_rate": 0.00043449935919066997, "loss": 1.1025, "step": 2643 }, { "epoch": 0.4716796003924717, "grad_norm": 0.4572905898094177, "learning_rate": 0.00043445207367297624, "loss": 1.0561, "step": 2644 }, { "epoch": 0.47185799661047184, "grad_norm": 0.5108131766319275, "learning_rate": 0.00043440477366849607, "loss": 0.8723, "step": 2645 }, { "epoch": 0.47203639282847204, "grad_norm": 0.49937039613723755, "learning_rate": 0.00043435745918094437, "loss": 1.0159, "step": 2646 }, { "epoch": 0.47221478904647224, "grad_norm": 0.5099616050720215, "learning_rate": 0.00043431013021403707, "loss": 0.9302, "step": 2647 }, { "epoch": 0.4723931852644724, "grad_norm": 0.5088136792182922, "learning_rate": 0.0004342627867714915, "loss": 1.054, "step": 2648 }, { "epoch": 0.4725715814824726, "grad_norm": 0.48349374532699585, "learning_rate": 0.0004342154288570259, "loss": 1.0577, "step": 2649 }, { "epoch": 0.4727499777004727, "grad_norm": 0.528062105178833, "learning_rate": 0.0004341680564743599, "loss": 1.0324, "step": 2650 }, { "epoch": 0.4729283739184729, "grad_norm": 1.0809184312820435, "learning_rate": 0.0004341206696272139, "loss": 1.1805, "step": 2651 }, { "epoch": 0.4731067701364731, "grad_norm": 0.5047591328620911, "learning_rate": 0.00043407326831930985, "loss": 1.0002, "step": 2652 }, { "epoch": 0.47328516635447326, "grad_norm": 0.5000717639923096, "learning_rate": 0.00043402585255437035, "loss": 1.0015, "step": 2653 }, { "epoch": 0.47346356257247346, "grad_norm": 0.608333170413971, "learning_rate": 0.0004339784223361197, "loss": 0.9044, "step": 2654 }, { "epoch": 0.47364195879047366, "grad_norm": 0.5130841135978699, "learning_rate": 0.00043393097766828293, "loss": 1.1026, "step": 2655 }, { "epoch": 0.4738203550084738, "grad_norm": 0.5287538170814514, "learning_rate": 0.0004338835185545863, "loss": 1.0682, "step": 2656 }, { "epoch": 0.473998751226474, "grad_norm": 0.5100160837173462, "learning_rate": 0.00043383604499875727, "loss": 1.231, "step": 2657 }, { "epoch": 0.4741771474444742, "grad_norm": 0.4938991963863373, "learning_rate": 0.0004337885570045244, "loss": 1.0292, "step": 2658 }, { "epoch": 0.47435554366247434, "grad_norm": 0.45146873593330383, "learning_rate": 0.0004337410545756173, "loss": 0.8997, "step": 2659 }, { "epoch": 0.47453393988047454, "grad_norm": 0.45835426449775696, "learning_rate": 0.0004336935377157668, "loss": 0.773, "step": 2660 }, { "epoch": 0.4747123360984747, "grad_norm": 0.4573782980442047, "learning_rate": 0.000433646006428705, "loss": 0.8717, "step": 2661 }, { "epoch": 0.4748907323164749, "grad_norm": 0.4976518154144287, "learning_rate": 0.00043359846071816484, "loss": 0.862, "step": 2662 }, { "epoch": 0.4750691285344751, "grad_norm": 0.5328902006149292, "learning_rate": 0.0004335509005878806, "loss": 0.9372, "step": 2663 }, { "epoch": 0.4752475247524752, "grad_norm": 0.5134164690971375, "learning_rate": 0.0004335033260415876, "loss": 0.9136, "step": 2664 }, { "epoch": 0.4754259209704754, "grad_norm": 0.5302520990371704, "learning_rate": 0.00043345573708302235, "loss": 1.0568, "step": 2665 }, { "epoch": 0.4756043171884756, "grad_norm": 0.4945065379142761, "learning_rate": 0.0004334081337159225, "loss": 0.8435, "step": 2666 }, { "epoch": 0.47578271340647577, "grad_norm": 1.0435439348220825, "learning_rate": 0.0004333605159440266, "loss": 1.0573, "step": 2667 }, { "epoch": 0.47596110962447596, "grad_norm": 2.6927244663238525, "learning_rate": 0.0004333128837710748, "loss": 1.0532, "step": 2668 }, { "epoch": 0.47613950584247616, "grad_norm": 0.5720566511154175, "learning_rate": 0.000433265237200808, "loss": 1.0378, "step": 2669 }, { "epoch": 0.4763179020604763, "grad_norm": 0.49117255210876465, "learning_rate": 0.00043321757623696836, "loss": 1.0109, "step": 2670 }, { "epoch": 0.4764962982784765, "grad_norm": 0.5019311904907227, "learning_rate": 0.00043316990088329907, "loss": 1.0556, "step": 2671 }, { "epoch": 0.47667469449647665, "grad_norm": 1.1491711139678955, "learning_rate": 0.00043312221114354463, "loss": 1.0185, "step": 2672 }, { "epoch": 0.47685309071447685, "grad_norm": 1.0251749753952026, "learning_rate": 0.00043307450702145047, "loss": 1.1779, "step": 2673 }, { "epoch": 0.47703148693247704, "grad_norm": 0.5985758900642395, "learning_rate": 0.00043302678852076337, "loss": 1.1387, "step": 2674 }, { "epoch": 0.4772098831504772, "grad_norm": 0.6510484218597412, "learning_rate": 0.000432979055645231, "loss": 1.1599, "step": 2675 }, { "epoch": 0.4773882793684774, "grad_norm": 0.5804511904716492, "learning_rate": 0.0004329313083986024, "loss": 1.0075, "step": 2676 }, { "epoch": 0.4775666755864776, "grad_norm": 0.5480517745018005, "learning_rate": 0.00043288354678462757, "loss": 1.1907, "step": 2677 }, { "epoch": 0.47774507180447773, "grad_norm": 0.5310084223747253, "learning_rate": 0.0004328357708070576, "loss": 0.9193, "step": 2678 }, { "epoch": 0.4779234680224779, "grad_norm": 0.5491518974304199, "learning_rate": 0.00043278798046964494, "loss": 1.2297, "step": 2679 }, { "epoch": 0.4781018642404781, "grad_norm": 0.4863007664680481, "learning_rate": 0.0004327401757761429, "loss": 0.9668, "step": 2680 }, { "epoch": 0.47828026045847827, "grad_norm": 0.5505911111831665, "learning_rate": 0.00043269235673030614, "loss": 1.0962, "step": 2681 }, { "epoch": 0.47845865667647847, "grad_norm": 0.4990256428718567, "learning_rate": 0.00043264452333589034, "loss": 0.8941, "step": 2682 }, { "epoch": 0.4786370528944786, "grad_norm": 0.5696044564247131, "learning_rate": 0.0004325966755966522, "loss": 1.1817, "step": 2683 }, { "epoch": 0.4788154491124788, "grad_norm": 0.5234004259109497, "learning_rate": 0.00043254881351634976, "loss": 1.0851, "step": 2684 }, { "epoch": 0.478993845330479, "grad_norm": 0.5903022289276123, "learning_rate": 0.0004325009370987421, "loss": 1.2182, "step": 2685 }, { "epoch": 0.47917224154847915, "grad_norm": 0.5186327695846558, "learning_rate": 0.0004324530463475893, "loss": 0.9566, "step": 2686 }, { "epoch": 0.47935063776647935, "grad_norm": 0.5512902736663818, "learning_rate": 0.00043240514126665274, "loss": 0.9735, "step": 2687 }, { "epoch": 0.47952903398447955, "grad_norm": 0.5918879508972168, "learning_rate": 0.00043235722185969497, "loss": 1.1593, "step": 2688 }, { "epoch": 0.4797074302024797, "grad_norm": 0.5881208777427673, "learning_rate": 0.0004323092881304794, "loss": 1.0421, "step": 2689 }, { "epoch": 0.4798858264204799, "grad_norm": 0.5843618512153625, "learning_rate": 0.00043226134008277084, "loss": 1.199, "step": 2690 }, { "epoch": 0.4800642226384801, "grad_norm": 0.5014129877090454, "learning_rate": 0.0004322133777203351, "loss": 0.852, "step": 2691 }, { "epoch": 0.48024261885648023, "grad_norm": 0.5492891073226929, "learning_rate": 0.000432165401046939, "loss": 0.9382, "step": 2692 }, { "epoch": 0.48042101507448043, "grad_norm": 0.5708451271057129, "learning_rate": 0.00043211741006635076, "loss": 0.9182, "step": 2693 }, { "epoch": 0.48059941129248057, "grad_norm": 0.5316191911697388, "learning_rate": 0.00043206940478233947, "loss": 1.05, "step": 2694 }, { "epoch": 0.48077780751048077, "grad_norm": 0.5057091116905212, "learning_rate": 0.0004320213851986755, "loss": 0.8626, "step": 2695 }, { "epoch": 0.48095620372848097, "grad_norm": 0.5974101424217224, "learning_rate": 0.00043197335131913025, "loss": 1.1793, "step": 2696 }, { "epoch": 0.4811345999464811, "grad_norm": 0.5790625214576721, "learning_rate": 0.00043192530314747625, "loss": 1.0063, "step": 2697 }, { "epoch": 0.4813129961644813, "grad_norm": 0.4502911865711212, "learning_rate": 0.0004318772406874873, "loss": 0.8672, "step": 2698 }, { "epoch": 0.4814913923824815, "grad_norm": 0.5690745711326599, "learning_rate": 0.00043182916394293817, "loss": 0.9999, "step": 2699 }, { "epoch": 0.48166978860048165, "grad_norm": 0.584483802318573, "learning_rate": 0.00043178107291760463, "loss": 1.0083, "step": 2700 }, { "epoch": 0.48184818481848185, "grad_norm": 0.48927435278892517, "learning_rate": 0.00043173296761526395, "loss": 0.7844, "step": 2701 }, { "epoch": 0.48202658103648205, "grad_norm": 0.6203662157058716, "learning_rate": 0.0004316848480396941, "loss": 0.9332, "step": 2702 }, { "epoch": 0.4822049772544822, "grad_norm": 0.5448145866394043, "learning_rate": 0.00043163671419467454, "loss": 1.0309, "step": 2703 }, { "epoch": 0.4823833734724824, "grad_norm": 0.540557324886322, "learning_rate": 0.0004315885660839857, "loss": 0.9622, "step": 2704 }, { "epoch": 0.4825617696904826, "grad_norm": 0.4740718603134155, "learning_rate": 0.0004315404037114089, "loss": 1.003, "step": 2705 }, { "epoch": 0.48274016590848273, "grad_norm": 0.5453110933303833, "learning_rate": 0.0004314922270807269, "loss": 1.2003, "step": 2706 }, { "epoch": 0.48291856212648293, "grad_norm": 0.4645708501338959, "learning_rate": 0.0004314440361957235, "loss": 0.9038, "step": 2707 }, { "epoch": 0.4830969583444831, "grad_norm": 0.48761603236198425, "learning_rate": 0.00043139583106018367, "loss": 1.1861, "step": 2708 }, { "epoch": 0.4832753545624833, "grad_norm": 0.48210960626602173, "learning_rate": 0.0004313476116778933, "loss": 1.0031, "step": 2709 }, { "epoch": 0.48345375078048347, "grad_norm": 0.4890519380569458, "learning_rate": 0.00043129937805263944, "loss": 0.9203, "step": 2710 }, { "epoch": 0.4836321469984836, "grad_norm": 0.5152431726455688, "learning_rate": 0.00043125113018821054, "loss": 1.001, "step": 2711 }, { "epoch": 0.4838105432164838, "grad_norm": 0.5150320529937744, "learning_rate": 0.00043120286808839587, "loss": 0.9304, "step": 2712 }, { "epoch": 0.483988939434484, "grad_norm": 0.4525447487831116, "learning_rate": 0.0004311545917569859, "loss": 0.8478, "step": 2713 }, { "epoch": 0.48416733565248415, "grad_norm": 0.47689542174339294, "learning_rate": 0.0004311063011977723, "loss": 1.032, "step": 2714 }, { "epoch": 0.48434573187048435, "grad_norm": 0.5542166829109192, "learning_rate": 0.0004310579964145477, "loss": 1.2234, "step": 2715 }, { "epoch": 0.48452412808848455, "grad_norm": 0.513331949710846, "learning_rate": 0.00043100967741110593, "loss": 0.9973, "step": 2716 }, { "epoch": 0.4847025243064847, "grad_norm": 0.4602411389350891, "learning_rate": 0.000430961344191242, "loss": 0.8423, "step": 2717 }, { "epoch": 0.4848809205244849, "grad_norm": 0.5340333580970764, "learning_rate": 0.000430912996758752, "loss": 1.2688, "step": 2718 }, { "epoch": 0.48505931674248504, "grad_norm": 0.47053262591362, "learning_rate": 0.00043086463511743313, "loss": 0.8783, "step": 2719 }, { "epoch": 0.48523771296048523, "grad_norm": 0.5520641207695007, "learning_rate": 0.0004308162592710836, "loss": 1.1515, "step": 2720 }, { "epoch": 0.48541610917848543, "grad_norm": 0.48937782645225525, "learning_rate": 0.0004307678692235029, "loss": 1.0731, "step": 2721 }, { "epoch": 0.4855945053964856, "grad_norm": 0.5234431624412537, "learning_rate": 0.0004307194649784915, "loss": 1.081, "step": 2722 }, { "epoch": 0.4857729016144858, "grad_norm": 0.5052714943885803, "learning_rate": 0.0004306710465398511, "loss": 1.2472, "step": 2723 }, { "epoch": 0.485951297832486, "grad_norm": 0.466281920671463, "learning_rate": 0.00043062261391138454, "loss": 0.875, "step": 2724 }, { "epoch": 0.4861296940504861, "grad_norm": 0.47504422068595886, "learning_rate": 0.00043057416709689554, "loss": 1.0092, "step": 2725 }, { "epoch": 0.4863080902684863, "grad_norm": 0.46300631761550903, "learning_rate": 0.00043052570610018913, "loss": 0.9929, "step": 2726 }, { "epoch": 0.4864864864864865, "grad_norm": 0.4238641560077667, "learning_rate": 0.0004304772309250715, "loss": 0.8075, "step": 2727 }, { "epoch": 0.48666488270448666, "grad_norm": 0.509427011013031, "learning_rate": 0.00043042874157534985, "loss": 0.9091, "step": 2728 }, { "epoch": 0.48684327892248686, "grad_norm": 1.843088150024414, "learning_rate": 0.0004303802380548324, "loss": 1.6363, "step": 2729 }, { "epoch": 0.487021675140487, "grad_norm": 0.5678585767745972, "learning_rate": 0.0004303317203673287, "loss": 1.1911, "step": 2730 }, { "epoch": 0.4872000713584872, "grad_norm": 0.49592193961143494, "learning_rate": 0.00043028318851664934, "loss": 0.8901, "step": 2731 }, { "epoch": 0.4873784675764874, "grad_norm": 0.4731305241584778, "learning_rate": 0.0004302346425066059, "loss": 0.9217, "step": 2732 }, { "epoch": 0.48755686379448754, "grad_norm": 0.6168728470802307, "learning_rate": 0.0004301860823410112, "loss": 1.081, "step": 2733 }, { "epoch": 0.48773526001248774, "grad_norm": 0.48209506273269653, "learning_rate": 0.00043013750802367915, "loss": 0.7949, "step": 2734 }, { "epoch": 0.48791365623048794, "grad_norm": 0.7890462875366211, "learning_rate": 0.00043008891955842465, "loss": 1.1409, "step": 2735 }, { "epoch": 0.4880920524484881, "grad_norm": 2.6161890029907227, "learning_rate": 0.00043004031694906397, "loss": 1.1929, "step": 2736 }, { "epoch": 0.4882704486664883, "grad_norm": 0.7953986525535583, "learning_rate": 0.0004299917001994143, "loss": 0.956, "step": 2737 }, { "epoch": 0.4884488448844885, "grad_norm": 1.4645198583602905, "learning_rate": 0.00042994306931329394, "loss": 1.0847, "step": 2738 }, { "epoch": 0.4886272411024886, "grad_norm": 0.5251899361610413, "learning_rate": 0.00042989442429452237, "loss": 1.0796, "step": 2739 }, { "epoch": 0.4888056373204888, "grad_norm": 0.4881310760974884, "learning_rate": 0.0004298457651469201, "loss": 0.9101, "step": 2740 }, { "epoch": 0.48898403353848896, "grad_norm": 0.7399401068687439, "learning_rate": 0.0004297970918743088, "loss": 1.1056, "step": 2741 }, { "epoch": 0.48916242975648916, "grad_norm": 0.531498908996582, "learning_rate": 0.00042974840448051135, "loss": 1.104, "step": 2742 }, { "epoch": 0.48934082597448936, "grad_norm": 0.5090386867523193, "learning_rate": 0.00042969970296935153, "loss": 0.7713, "step": 2743 }, { "epoch": 0.4895192221924895, "grad_norm": 0.5436068177223206, "learning_rate": 0.00042965098734465434, "loss": 0.9884, "step": 2744 }, { "epoch": 0.4896976184104897, "grad_norm": 0.5477145314216614, "learning_rate": 0.00042960225761024597, "loss": 1.1816, "step": 2745 }, { "epoch": 0.4898760146284899, "grad_norm": 0.5642074942588806, "learning_rate": 0.00042955351376995355, "loss": 1.057, "step": 2746 }, { "epoch": 0.49005441084649004, "grad_norm": 0.5721654295921326, "learning_rate": 0.0004295047558276054, "loss": 0.8484, "step": 2747 }, { "epoch": 0.49023280706449024, "grad_norm": 0.4718813896179199, "learning_rate": 0.000429455983787031, "loss": 0.8456, "step": 2748 }, { "epoch": 0.49041120328249044, "grad_norm": 0.4769846498966217, "learning_rate": 0.0004294071976520608, "loss": 0.7766, "step": 2749 }, { "epoch": 0.4905895995004906, "grad_norm": 0.571636438369751, "learning_rate": 0.0004293583974265266, "loss": 0.921, "step": 2750 }, { "epoch": 0.4907679957184908, "grad_norm": 0.5464839935302734, "learning_rate": 0.000429309583114261, "loss": 1.1658, "step": 2751 }, { "epoch": 0.4909463919364909, "grad_norm": 0.5281213521957397, "learning_rate": 0.00042926075471909787, "loss": 1.1353, "step": 2752 }, { "epoch": 0.4911247881544911, "grad_norm": 0.5068462491035461, "learning_rate": 0.0004292119122448723, "loss": 0.9014, "step": 2753 }, { "epoch": 0.4913031843724913, "grad_norm": 0.532954752445221, "learning_rate": 0.0004291630556954202, "loss": 1.0867, "step": 2754 }, { "epoch": 0.49148158059049146, "grad_norm": 0.48287615180015564, "learning_rate": 0.0004291141850745788, "loss": 0.9296, "step": 2755 }, { "epoch": 0.49165997680849166, "grad_norm": 0.4885706603527069, "learning_rate": 0.0004290653003861864, "loss": 0.9466, "step": 2756 }, { "epoch": 0.49183837302649186, "grad_norm": 0.5004509091377258, "learning_rate": 0.0004290164016340824, "loss": 0.9552, "step": 2757 }, { "epoch": 0.492016769244492, "grad_norm": 0.5846733450889587, "learning_rate": 0.0004289674888221072, "loss": 1.3886, "step": 2758 }, { "epoch": 0.4921951654624922, "grad_norm": 0.4464746415615082, "learning_rate": 0.00042891856195410237, "loss": 0.7713, "step": 2759 }, { "epoch": 0.4923735616804924, "grad_norm": 0.48179852962493896, "learning_rate": 0.0004288696210339108, "loss": 0.9217, "step": 2760 }, { "epoch": 0.49255195789849254, "grad_norm": 0.5003469586372375, "learning_rate": 0.0004288206660653762, "loss": 0.8922, "step": 2761 }, { "epoch": 0.49273035411649274, "grad_norm": 0.5266236066818237, "learning_rate": 0.00042877169705234335, "loss": 0.9248, "step": 2762 }, { "epoch": 0.4929087503344929, "grad_norm": 0.5324473977088928, "learning_rate": 0.00042872271399865835, "loss": 1.0357, "step": 2763 }, { "epoch": 0.4930871465524931, "grad_norm": 0.7425055503845215, "learning_rate": 0.0004286737169081684, "loss": 1.026, "step": 2764 }, { "epoch": 0.4932655427704933, "grad_norm": 0.8448666334152222, "learning_rate": 0.0004286247057847215, "loss": 0.9976, "step": 2765 }, { "epoch": 0.4934439389884934, "grad_norm": 0.5740126371383667, "learning_rate": 0.0004285756806321671, "loss": 1.1577, "step": 2766 }, { "epoch": 0.4936223352064936, "grad_norm": 0.5506383180618286, "learning_rate": 0.0004285266414543556, "loss": 1.0981, "step": 2767 }, { "epoch": 0.4938007314244938, "grad_norm": 0.49879491329193115, "learning_rate": 0.00042847758825513847, "loss": 0.8611, "step": 2768 }, { "epoch": 0.49397912764249396, "grad_norm": 0.5577161908149719, "learning_rate": 0.0004284285210383685, "loss": 0.8755, "step": 2769 }, { "epoch": 0.49415752386049416, "grad_norm": 1.8733638525009155, "learning_rate": 0.00042837943980789914, "loss": 1.0863, "step": 2770 }, { "epoch": 0.49433592007849436, "grad_norm": 0.620151162147522, "learning_rate": 0.00042833034456758533, "loss": 1.0697, "step": 2771 }, { "epoch": 0.4945143162964945, "grad_norm": 0.6251631379127502, "learning_rate": 0.00042828123532128305, "loss": 1.163, "step": 2772 }, { "epoch": 0.4946927125144947, "grad_norm": 0.55173259973526, "learning_rate": 0.0004282321120728493, "loss": 1.0081, "step": 2773 }, { "epoch": 0.49487110873249485, "grad_norm": 0.5649335384368896, "learning_rate": 0.0004281829748261421, "loss": 1.2093, "step": 2774 }, { "epoch": 0.49504950495049505, "grad_norm": 0.5768599510192871, "learning_rate": 0.0004281338235850208, "loss": 1.1961, "step": 2775 }, { "epoch": 0.49522790116849524, "grad_norm": 0.4870094358921051, "learning_rate": 0.0004280846583533456, "loss": 1.0099, "step": 2776 }, { "epoch": 0.4954062973864954, "grad_norm": 0.5303916335105896, "learning_rate": 0.00042803547913497795, "loss": 0.934, "step": 2777 }, { "epoch": 0.4955846936044956, "grad_norm": 1.3991183042526245, "learning_rate": 0.00042798628593378044, "loss": 1.0462, "step": 2778 }, { "epoch": 0.4957630898224958, "grad_norm": 0.5671733617782593, "learning_rate": 0.00042793707875361667, "loss": 1.0136, "step": 2779 }, { "epoch": 0.4959414860404959, "grad_norm": 0.5110854506492615, "learning_rate": 0.00042788785759835117, "loss": 0.9371, "step": 2780 }, { "epoch": 0.4961198822584961, "grad_norm": 0.48012104630470276, "learning_rate": 0.00042783862247185, "loss": 1.0671, "step": 2781 }, { "epoch": 0.4962982784764963, "grad_norm": 1.609937310218811, "learning_rate": 0.0004277893733779798, "loss": 1.1734, "step": 2782 }, { "epoch": 0.49647667469449647, "grad_norm": 0.49931612610816956, "learning_rate": 0.0004277401103206089, "loss": 0.8801, "step": 2783 }, { "epoch": 0.49665507091249667, "grad_norm": 0.5576810240745544, "learning_rate": 0.0004276908333036061, "loss": 0.998, "step": 2784 }, { "epoch": 0.4968334671304968, "grad_norm": 0.5244370102882385, "learning_rate": 0.00042764154233084184, "loss": 1.1198, "step": 2785 }, { "epoch": 0.497011863348497, "grad_norm": 0.5110731720924377, "learning_rate": 0.00042759223740618723, "loss": 1.0418, "step": 2786 }, { "epoch": 0.4971902595664972, "grad_norm": 0.5166656374931335, "learning_rate": 0.0004275429185335147, "loss": 1.0333, "step": 2787 }, { "epoch": 0.49736865578449735, "grad_norm": 0.558217465877533, "learning_rate": 0.00042749358571669783, "loss": 1.0004, "step": 2788 }, { "epoch": 0.49754705200249755, "grad_norm": 0.5724664926528931, "learning_rate": 0.000427444238959611, "loss": 1.0876, "step": 2789 }, { "epoch": 0.49772544822049775, "grad_norm": 0.5535562038421631, "learning_rate": 0.00042739487826613006, "loss": 1.0584, "step": 2790 }, { "epoch": 0.4979038444384979, "grad_norm": 0.6063182950019836, "learning_rate": 0.0004273455036401317, "loss": 0.9144, "step": 2791 }, { "epoch": 0.4980822406564981, "grad_norm": 0.6180481314659119, "learning_rate": 0.00042729611508549384, "loss": 1.1755, "step": 2792 }, { "epoch": 0.4982606368744983, "grad_norm": 0.524019718170166, "learning_rate": 0.0004272467126060954, "loss": 1.0019, "step": 2793 }, { "epoch": 0.49843903309249843, "grad_norm": 0.49826425313949585, "learning_rate": 0.00042719729620581637, "loss": 0.9266, "step": 2794 }, { "epoch": 0.4986174293104986, "grad_norm": 0.5382309556007385, "learning_rate": 0.000427147865888538, "loss": 1.105, "step": 2795 }, { "epoch": 0.49879582552849877, "grad_norm": 0.4761876165866852, "learning_rate": 0.0004270984216581425, "loss": 0.8737, "step": 2796 }, { "epoch": 0.49897422174649897, "grad_norm": 0.4881121516227722, "learning_rate": 0.0004270489635185131, "loss": 0.9282, "step": 2797 }, { "epoch": 0.49915261796449917, "grad_norm": 0.6625795960426331, "learning_rate": 0.00042699949147353435, "loss": 1.1568, "step": 2798 }, { "epoch": 0.4993310141824993, "grad_norm": 0.5215954184532166, "learning_rate": 0.00042695000552709164, "loss": 1.0541, "step": 2799 }, { "epoch": 0.4995094104004995, "grad_norm": 0.4769946336746216, "learning_rate": 0.0004269005056830717, "loss": 0.9529, "step": 2800 }, { "epoch": 0.4996878066184997, "grad_norm": 0.46005573868751526, "learning_rate": 0.00042685099194536216, "loss": 1.0152, "step": 2801 }, { "epoch": 0.49986620283649985, "grad_norm": 0.8600802421569824, "learning_rate": 0.00042680146431785184, "loss": 0.891, "step": 2802 }, { "epoch": 0.5000445990545, "grad_norm": 0.47280198335647583, "learning_rate": 0.00042675192280443053, "loss": 1.0193, "step": 2803 }, { "epoch": 0.5002229952725002, "grad_norm": 0.44590240716934204, "learning_rate": 0.00042670236740898935, "loss": 0.8403, "step": 2804 }, { "epoch": 0.5004013914905004, "grad_norm": 0.5397332906723022, "learning_rate": 0.00042665279813542024, "loss": 1.0327, "step": 2805 }, { "epoch": 0.5005797877085005, "grad_norm": 0.49650824069976807, "learning_rate": 0.0004266032149876163, "loss": 0.8893, "step": 2806 }, { "epoch": 0.5007581839265007, "grad_norm": 0.5721169710159302, "learning_rate": 0.000426553617969472, "loss": 0.8712, "step": 2807 }, { "epoch": 0.5009365801445009, "grad_norm": 0.4694698750972748, "learning_rate": 0.00042650400708488245, "loss": 0.8884, "step": 2808 }, { "epoch": 0.5011149763625011, "grad_norm": 0.5037152767181396, "learning_rate": 0.00042645438233774414, "loss": 1.2473, "step": 2809 }, { "epoch": 0.5012933725805013, "grad_norm": 0.4780758321285248, "learning_rate": 0.00042640474373195457, "loss": 0.9399, "step": 2810 }, { "epoch": 0.5014717687985015, "grad_norm": 0.5098987817764282, "learning_rate": 0.00042635509127141236, "loss": 1.0123, "step": 2811 }, { "epoch": 0.5016501650165016, "grad_norm": 0.4265352487564087, "learning_rate": 0.0004263054249600172, "loss": 0.704, "step": 2812 }, { "epoch": 0.5018285612345018, "grad_norm": 0.45242688059806824, "learning_rate": 0.0004262557448016697, "loss": 0.8694, "step": 2813 }, { "epoch": 0.502006957452502, "grad_norm": 0.503936767578125, "learning_rate": 0.00042620605080027197, "loss": 0.9914, "step": 2814 }, { "epoch": 0.5021853536705022, "grad_norm": 0.45868226885795593, "learning_rate": 0.0004261563429597268, "loss": 0.9463, "step": 2815 }, { "epoch": 0.5023637498885024, "grad_norm": 0.5536309480667114, "learning_rate": 0.0004261066212839383, "loss": 1.2046, "step": 2816 }, { "epoch": 0.5025421461065025, "grad_norm": 0.43166589736938477, "learning_rate": 0.00042605688577681156, "loss": 0.793, "step": 2817 }, { "epoch": 0.5027205423245027, "grad_norm": 0.5023233294487, "learning_rate": 0.00042600713644225274, "loss": 1.0378, "step": 2818 }, { "epoch": 0.5028989385425029, "grad_norm": 0.5103394985198975, "learning_rate": 0.0004259573732841692, "loss": 1.0274, "step": 2819 }, { "epoch": 0.5030773347605031, "grad_norm": 0.4885903298854828, "learning_rate": 0.0004259075963064692, "loss": 0.9873, "step": 2820 }, { "epoch": 0.5032557309785033, "grad_norm": 0.46850115060806274, "learning_rate": 0.0004258578055130623, "loss": 1.0053, "step": 2821 }, { "epoch": 0.5034341271965035, "grad_norm": 1.0507224798202515, "learning_rate": 0.0004258080009078591, "loss": 0.9356, "step": 2822 }, { "epoch": 0.5036125234145036, "grad_norm": 0.48565196990966797, "learning_rate": 0.0004257581824947711, "loss": 0.9913, "step": 2823 }, { "epoch": 0.5037909196325038, "grad_norm": 0.4672144651412964, "learning_rate": 0.00042570835027771114, "loss": 1.0402, "step": 2824 }, { "epoch": 0.503969315850504, "grad_norm": 0.5116181373596191, "learning_rate": 0.00042565850426059295, "loss": 1.0377, "step": 2825 }, { "epoch": 0.5041477120685042, "grad_norm": 0.5363556742668152, "learning_rate": 0.0004256086444473314, "loss": 1.0358, "step": 2826 }, { "epoch": 0.5043261082865044, "grad_norm": 0.4890466332435608, "learning_rate": 0.0004255587708418425, "loss": 1.0442, "step": 2827 }, { "epoch": 0.5045045045045045, "grad_norm": 0.50138258934021, "learning_rate": 0.0004255088834480433, "loss": 1.009, "step": 2828 }, { "epoch": 0.5046829007225047, "grad_norm": 0.5070481300354004, "learning_rate": 0.00042545898226985186, "loss": 1.0232, "step": 2829 }, { "epoch": 0.5048612969405049, "grad_norm": 0.5242338180541992, "learning_rate": 0.00042540906731118746, "loss": 1.099, "step": 2830 }, { "epoch": 0.505039693158505, "grad_norm": 0.5118682980537415, "learning_rate": 0.00042535913857597046, "loss": 1.0284, "step": 2831 }, { "epoch": 0.5052180893765053, "grad_norm": 0.4739261269569397, "learning_rate": 0.00042530919606812215, "loss": 1.0342, "step": 2832 }, { "epoch": 0.5053964855945055, "grad_norm": 0.4524725675582886, "learning_rate": 0.00042525923979156507, "loss": 0.8644, "step": 2833 }, { "epoch": 0.5055748818125055, "grad_norm": 0.499525249004364, "learning_rate": 0.00042520926975022266, "loss": 1.0513, "step": 2834 }, { "epoch": 0.5057532780305057, "grad_norm": 0.4836816191673279, "learning_rate": 0.00042515928594801964, "loss": 0.8886, "step": 2835 }, { "epoch": 0.5059316742485059, "grad_norm": 0.5527642965316772, "learning_rate": 0.00042510928838888163, "loss": 1.2704, "step": 2836 }, { "epoch": 0.5061100704665061, "grad_norm": 0.5419808030128479, "learning_rate": 0.0004250592770767355, "loss": 1.1396, "step": 2837 }, { "epoch": 0.5062884666845063, "grad_norm": 0.4811457395553589, "learning_rate": 0.0004250092520155091, "loss": 0.9047, "step": 2838 }, { "epoch": 0.5064668629025064, "grad_norm": 0.4570630192756653, "learning_rate": 0.0004249592132091313, "loss": 0.8595, "step": 2839 }, { "epoch": 0.5066452591205066, "grad_norm": 0.5125581622123718, "learning_rate": 0.0004249091606615322, "loss": 1.2342, "step": 2840 }, { "epoch": 0.5068236553385068, "grad_norm": 0.5075557231903076, "learning_rate": 0.0004248590943766429, "loss": 1.032, "step": 2841 }, { "epoch": 0.507002051556507, "grad_norm": 0.7127680778503418, "learning_rate": 0.00042480901435839566, "loss": 1.1435, "step": 2842 }, { "epoch": 0.5071804477745072, "grad_norm": 0.5281385779380798, "learning_rate": 0.0004247589206107236, "loss": 1.1247, "step": 2843 }, { "epoch": 0.5073588439925074, "grad_norm": 0.6512944102287292, "learning_rate": 0.00042470881313756107, "loss": 1.1724, "step": 2844 }, { "epoch": 0.5075372402105075, "grad_norm": 0.6242824196815491, "learning_rate": 0.0004246586919428436, "loss": 1.0296, "step": 2845 }, { "epoch": 0.5077156364285077, "grad_norm": 0.5622745752334595, "learning_rate": 0.0004246085570305076, "loss": 0.9505, "step": 2846 }, { "epoch": 0.5078940326465079, "grad_norm": 0.5400723218917847, "learning_rate": 0.0004245584084044907, "loss": 1.081, "step": 2847 }, { "epoch": 0.5080724288645081, "grad_norm": 0.8070915937423706, "learning_rate": 0.00042450824606873145, "loss": 0.9311, "step": 2848 }, { "epoch": 0.5082508250825083, "grad_norm": 0.6803063154220581, "learning_rate": 0.00042445807002716967, "loss": 1.2538, "step": 2849 }, { "epoch": 0.5084292213005084, "grad_norm": 0.6261130571365356, "learning_rate": 0.0004244078802837462, "loss": 1.1796, "step": 2850 }, { "epoch": 0.5086076175185086, "grad_norm": 0.5159333348274231, "learning_rate": 0.00042435767684240286, "loss": 1.074, "step": 2851 }, { "epoch": 0.5087860137365088, "grad_norm": 0.5446671843528748, "learning_rate": 0.0004243074597070826, "loss": 0.9219, "step": 2852 }, { "epoch": 0.508964409954509, "grad_norm": 0.5136865377426147, "learning_rate": 0.00042425722888172937, "loss": 0.966, "step": 2853 }, { "epoch": 0.5091428061725092, "grad_norm": 0.7158051133155823, "learning_rate": 0.00042420698437028846, "loss": 0.9874, "step": 2854 }, { "epoch": 0.5093212023905094, "grad_norm": 0.4794641137123108, "learning_rate": 0.000424156726176706, "loss": 0.9166, "step": 2855 }, { "epoch": 0.5094995986085095, "grad_norm": 0.5229628086090088, "learning_rate": 0.0004241064543049292, "loss": 0.9187, "step": 2856 }, { "epoch": 0.5096779948265097, "grad_norm": 0.5953057408332825, "learning_rate": 0.00042405616875890634, "loss": 1.205, "step": 2857 }, { "epoch": 0.5098563910445099, "grad_norm": 0.629094660282135, "learning_rate": 0.0004240058695425869, "loss": 1.5532, "step": 2858 }, { "epoch": 0.5100347872625101, "grad_norm": 0.5075649619102478, "learning_rate": 0.0004239555566599214, "loss": 1.0137, "step": 2859 }, { "epoch": 0.5102131834805103, "grad_norm": 0.5661181807518005, "learning_rate": 0.00042390523011486133, "loss": 0.9655, "step": 2860 }, { "epoch": 0.5103915796985103, "grad_norm": 0.5415096282958984, "learning_rate": 0.00042385488991135927, "loss": 1.1261, "step": 2861 }, { "epoch": 0.5105699759165105, "grad_norm": 0.5538334846496582, "learning_rate": 0.00042380453605336897, "loss": 0.8759, "step": 2862 }, { "epoch": 0.5107483721345107, "grad_norm": 0.5417324900627136, "learning_rate": 0.00042375416854484527, "loss": 0.9578, "step": 2863 }, { "epoch": 0.5109267683525109, "grad_norm": 0.5347568392753601, "learning_rate": 0.0004237037873897439, "loss": 1.0563, "step": 2864 }, { "epoch": 0.5111051645705111, "grad_norm": 0.5405890941619873, "learning_rate": 0.00042365339259202184, "loss": 1.1051, "step": 2865 }, { "epoch": 0.5112835607885113, "grad_norm": 0.4867878258228302, "learning_rate": 0.00042360298415563706, "loss": 0.9752, "step": 2866 }, { "epoch": 0.5114619570065114, "grad_norm": 0.519904375076294, "learning_rate": 0.0004235525620845486, "loss": 1.0394, "step": 2867 }, { "epoch": 0.5116403532245116, "grad_norm": 0.6319023966789246, "learning_rate": 0.00042350212638271655, "loss": 1.163, "step": 2868 }, { "epoch": 0.5118187494425118, "grad_norm": 0.5480625033378601, "learning_rate": 0.00042345167705410227, "loss": 1.1205, "step": 2869 }, { "epoch": 0.511997145660512, "grad_norm": 0.5435265898704529, "learning_rate": 0.00042340121410266784, "loss": 0.9511, "step": 2870 }, { "epoch": 0.5121755418785122, "grad_norm": 0.4731312096118927, "learning_rate": 0.00042335073753237666, "loss": 0.8017, "step": 2871 }, { "epoch": 0.5123539380965123, "grad_norm": 0.49716416001319885, "learning_rate": 0.00042330024734719317, "loss": 0.8897, "step": 2872 }, { "epoch": 0.5125323343145125, "grad_norm": 0.48113831877708435, "learning_rate": 0.00042324974355108285, "loss": 0.9106, "step": 2873 }, { "epoch": 0.5127107305325127, "grad_norm": 0.4926007390022278, "learning_rate": 0.0004231992261480122, "loss": 0.9383, "step": 2874 }, { "epoch": 0.5128891267505129, "grad_norm": 0.537855327129364, "learning_rate": 0.000423148695141949, "loss": 1.0949, "step": 2875 }, { "epoch": 0.5130675229685131, "grad_norm": 0.5071426033973694, "learning_rate": 0.0004230981505368616, "loss": 1.0838, "step": 2876 }, { "epoch": 0.5132459191865133, "grad_norm": 0.5214564800262451, "learning_rate": 0.00042304759233672, "loss": 1.0188, "step": 2877 }, { "epoch": 0.5134243154045134, "grad_norm": 0.48505401611328125, "learning_rate": 0.000422997020545495, "loss": 1.0605, "step": 2878 }, { "epoch": 0.5136027116225136, "grad_norm": 0.5197986364364624, "learning_rate": 0.0004229464351671585, "loss": 0.8301, "step": 2879 }, { "epoch": 0.5137811078405138, "grad_norm": 0.4954800605773926, "learning_rate": 0.00042289583620568326, "loss": 1.0674, "step": 2880 }, { "epoch": 0.513959504058514, "grad_norm": 0.5165004730224609, "learning_rate": 0.00042284522366504355, "loss": 0.9053, "step": 2881 }, { "epoch": 0.5141379002765142, "grad_norm": 0.6076773405075073, "learning_rate": 0.00042279459754921436, "loss": 1.08, "step": 2882 }, { "epoch": 0.5143162964945143, "grad_norm": 0.7019060850143433, "learning_rate": 0.0004227439578621718, "loss": 0.994, "step": 2883 }, { "epoch": 0.5144946927125145, "grad_norm": 0.5617488026618958, "learning_rate": 0.00042269330460789314, "loss": 1.1718, "step": 2884 }, { "epoch": 0.5146730889305147, "grad_norm": 0.46552005410194397, "learning_rate": 0.0004226426377903566, "loss": 0.8396, "step": 2885 }, { "epoch": 0.5148514851485149, "grad_norm": 0.49444034695625305, "learning_rate": 0.00042259195741354167, "loss": 0.8349, "step": 2886 }, { "epoch": 0.5150298813665151, "grad_norm": 0.46994128823280334, "learning_rate": 0.0004225412634814287, "loss": 0.9429, "step": 2887 }, { "epoch": 0.5152082775845153, "grad_norm": 0.5488457083702087, "learning_rate": 0.0004224905559979991, "loss": 0.9858, "step": 2888 }, { "epoch": 0.5153866738025153, "grad_norm": 0.5592421889305115, "learning_rate": 0.0004224398349672354, "loss": 1.0363, "step": 2889 }, { "epoch": 0.5155650700205155, "grad_norm": 0.4703425168991089, "learning_rate": 0.00042238910039312134, "loss": 0.9297, "step": 2890 }, { "epoch": 0.5157434662385157, "grad_norm": 0.9002471566200256, "learning_rate": 0.00042233835227964146, "loss": 0.8405, "step": 2891 }, { "epoch": 0.515921862456516, "grad_norm": 0.5256131291389465, "learning_rate": 0.0004222875906307816, "loss": 0.9992, "step": 2892 }, { "epoch": 0.5161002586745161, "grad_norm": 0.47402289509773254, "learning_rate": 0.0004222368154505285, "loss": 0.7949, "step": 2893 }, { "epoch": 0.5162786548925162, "grad_norm": 0.5556320548057556, "learning_rate": 0.0004221860267428701, "loss": 1.0749, "step": 2894 }, { "epoch": 0.5164570511105164, "grad_norm": 0.5696842670440674, "learning_rate": 0.0004221352245117952, "loss": 0.9672, "step": 2895 }, { "epoch": 0.5166354473285166, "grad_norm": 0.586262047290802, "learning_rate": 0.00042208440876129384, "loss": 1.2136, "step": 2896 }, { "epoch": 0.5168138435465168, "grad_norm": 0.49055540561676025, "learning_rate": 0.000422033579495357, "loss": 0.9203, "step": 2897 }, { "epoch": 0.516992239764517, "grad_norm": 0.5185585618019104, "learning_rate": 0.00042198273671797693, "loss": 0.9001, "step": 2898 }, { "epoch": 0.5171706359825172, "grad_norm": 0.5147084593772888, "learning_rate": 0.00042193188043314675, "loss": 1.0424, "step": 2899 }, { "epoch": 0.5173490322005173, "grad_norm": 0.5919014811515808, "learning_rate": 0.0004218810106448606, "loss": 1.3816, "step": 2900 }, { "epoch": 0.5175274284185175, "grad_norm": 0.494163453578949, "learning_rate": 0.0004218301273571139, "loss": 0.8105, "step": 2901 }, { "epoch": 0.5177058246365177, "grad_norm": 0.558854877948761, "learning_rate": 0.000421779230573903, "loss": 1.0603, "step": 2902 }, { "epoch": 0.5178842208545179, "grad_norm": 0.47792795300483704, "learning_rate": 0.00042172832029922514, "loss": 0.9657, "step": 2903 }, { "epoch": 0.5180626170725181, "grad_norm": 0.4979441463947296, "learning_rate": 0.0004216773965370789, "loss": 0.8444, "step": 2904 }, { "epoch": 0.5182410132905182, "grad_norm": 0.5534027814865112, "learning_rate": 0.00042162645929146394, "loss": 1.0369, "step": 2905 }, { "epoch": 0.5184194095085184, "grad_norm": 0.525301992893219, "learning_rate": 0.0004215755085663806, "loss": 1.0527, "step": 2906 }, { "epoch": 0.5185978057265186, "grad_norm": 0.5665169954299927, "learning_rate": 0.0004215245443658307, "loss": 1.0721, "step": 2907 }, { "epoch": 0.5187762019445188, "grad_norm": 0.4932265281677246, "learning_rate": 0.0004214735666938169, "loss": 1.0579, "step": 2908 }, { "epoch": 0.518954598162519, "grad_norm": 0.47899937629699707, "learning_rate": 0.0004214225755543429, "loss": 0.9099, "step": 2909 }, { "epoch": 0.5191329943805192, "grad_norm": 0.459412157535553, "learning_rate": 0.00042137157095141367, "loss": 0.8198, "step": 2910 }, { "epoch": 0.5193113905985193, "grad_norm": 0.4750489890575409, "learning_rate": 0.00042132055288903505, "loss": 0.9241, "step": 2911 }, { "epoch": 0.5194897868165195, "grad_norm": 0.4672413468360901, "learning_rate": 0.0004212695213712138, "loss": 0.7955, "step": 2912 }, { "epoch": 0.5196681830345197, "grad_norm": 0.44326889514923096, "learning_rate": 0.0004212184764019581, "loss": 0.8813, "step": 2913 }, { "epoch": 0.5198465792525199, "grad_norm": 0.5115212798118591, "learning_rate": 0.00042116741798527694, "loss": 1.0218, "step": 2914 }, { "epoch": 0.5200249754705201, "grad_norm": 0.471718430519104, "learning_rate": 0.0004211163461251804, "loss": 0.9756, "step": 2915 }, { "epoch": 0.5202033716885202, "grad_norm": 0.4659128785133362, "learning_rate": 0.0004210652608256798, "loss": 0.9583, "step": 2916 }, { "epoch": 0.5203817679065204, "grad_norm": 0.5611960887908936, "learning_rate": 0.00042101416209078707, "loss": 1.1081, "step": 2917 }, { "epoch": 0.5205601641245206, "grad_norm": 0.5797203183174133, "learning_rate": 0.00042096304992451575, "loss": 1.1466, "step": 2918 }, { "epoch": 0.5207385603425208, "grad_norm": 0.522687554359436, "learning_rate": 0.00042091192433088, "loss": 0.9893, "step": 2919 }, { "epoch": 0.520916956560521, "grad_norm": 0.5880950093269348, "learning_rate": 0.00042086078531389524, "loss": 1.0184, "step": 2920 }, { "epoch": 0.5210953527785211, "grad_norm": 1.1611589193344116, "learning_rate": 0.000420809632877578, "loss": 1.0494, "step": 2921 }, { "epoch": 0.5212737489965212, "grad_norm": 0.5259320735931396, "learning_rate": 0.00042075846702594567, "loss": 1.0708, "step": 2922 }, { "epoch": 0.5214521452145214, "grad_norm": 0.5445079803466797, "learning_rate": 0.0004207072877630168, "loss": 1.2258, "step": 2923 }, { "epoch": 0.5216305414325216, "grad_norm": 0.43372058868408203, "learning_rate": 0.00042065609509281106, "loss": 0.9371, "step": 2924 }, { "epoch": 0.5218089376505218, "grad_norm": 0.4435960650444031, "learning_rate": 0.000420604889019349, "loss": 0.8437, "step": 2925 }, { "epoch": 0.521987333868522, "grad_norm": 0.6090930104255676, "learning_rate": 0.00042055366954665244, "loss": 1.4102, "step": 2926 }, { "epoch": 0.5221657300865221, "grad_norm": 0.5087897777557373, "learning_rate": 0.000420502436678744, "loss": 0.9809, "step": 2927 }, { "epoch": 0.5223441263045223, "grad_norm": 0.4611089825630188, "learning_rate": 0.0004204511904196476, "loss": 0.9503, "step": 2928 }, { "epoch": 0.5225225225225225, "grad_norm": 0.5042673945426941, "learning_rate": 0.000420399930773388, "loss": 0.865, "step": 2929 }, { "epoch": 0.5227009187405227, "grad_norm": 0.4814876914024353, "learning_rate": 0.00042034865774399124, "loss": 1.1542, "step": 2930 }, { "epoch": 0.5228793149585229, "grad_norm": 0.47714847326278687, "learning_rate": 0.0004202973713354842, "loss": 0.9069, "step": 2931 }, { "epoch": 0.5230577111765231, "grad_norm": 0.5801002979278564, "learning_rate": 0.0004202460715518948, "loss": 1.1885, "step": 2932 }, { "epoch": 0.5232361073945232, "grad_norm": 0.45451635122299194, "learning_rate": 0.0004201947583972523, "loss": 0.8125, "step": 2933 }, { "epoch": 0.5234145036125234, "grad_norm": 0.5364030003547668, "learning_rate": 0.00042014343187558666, "loss": 0.9351, "step": 2934 }, { "epoch": 0.5235928998305236, "grad_norm": 0.5005396604537964, "learning_rate": 0.0004200920919909292, "loss": 1.0081, "step": 2935 }, { "epoch": 0.5237712960485238, "grad_norm": 0.5246373414993286, "learning_rate": 0.00042004073874731196, "loss": 1.0374, "step": 2936 }, { "epoch": 0.523949692266524, "grad_norm": 0.5010136365890503, "learning_rate": 0.0004199893721487682, "loss": 1.0534, "step": 2937 }, { "epoch": 0.5241280884845241, "grad_norm": 0.9451296329498291, "learning_rate": 0.00041993799219933235, "loss": 1.0364, "step": 2938 }, { "epoch": 0.5243064847025243, "grad_norm": 0.5468368530273438, "learning_rate": 0.0004198865989030398, "loss": 1.0716, "step": 2939 }, { "epoch": 0.5244848809205245, "grad_norm": 0.46857789158821106, "learning_rate": 0.00041983519226392686, "loss": 1.0187, "step": 2940 }, { "epoch": 0.5246632771385247, "grad_norm": 0.4949086606502533, "learning_rate": 0.00041978377228603093, "loss": 0.8706, "step": 2941 }, { "epoch": 0.5248416733565249, "grad_norm": 0.4794335961341858, "learning_rate": 0.00041973233897339067, "loss": 0.9308, "step": 2942 }, { "epoch": 0.5250200695745251, "grad_norm": 0.4741996228694916, "learning_rate": 0.0004196808923300455, "loss": 1.0586, "step": 2943 }, { "epoch": 0.5251984657925252, "grad_norm": 0.49954500794410706, "learning_rate": 0.0004196294323600361, "loss": 0.9257, "step": 2944 }, { "epoch": 0.5253768620105254, "grad_norm": 0.5138105154037476, "learning_rate": 0.000419577959067404, "loss": 0.9921, "step": 2945 }, { "epoch": 0.5255552582285256, "grad_norm": 0.4848662316799164, "learning_rate": 0.00041952647245619204, "loss": 1.0388, "step": 2946 }, { "epoch": 0.5257336544465258, "grad_norm": 0.4525747001171112, "learning_rate": 0.00041947497253044385, "loss": 0.9866, "step": 2947 }, { "epoch": 0.525912050664526, "grad_norm": 0.5498698949813843, "learning_rate": 0.0004194234592942043, "loss": 1.1586, "step": 2948 }, { "epoch": 0.526090446882526, "grad_norm": 0.4874838590621948, "learning_rate": 0.0004193719327515192, "loss": 1.0105, "step": 2949 }, { "epoch": 0.5262688431005262, "grad_norm": 0.5352873206138611, "learning_rate": 0.0004193203929064353, "loss": 1.0994, "step": 2950 }, { "epoch": 0.5264472393185264, "grad_norm": 0.4859972596168518, "learning_rate": 0.0004192688397630006, "loss": 0.9533, "step": 2951 }, { "epoch": 0.5266256355365266, "grad_norm": 0.5137537717819214, "learning_rate": 0.0004192172733252641, "loss": 1.0008, "step": 2952 }, { "epoch": 0.5268040317545268, "grad_norm": 0.47844600677490234, "learning_rate": 0.00041916569359727574, "loss": 1.0954, "step": 2953 }, { "epoch": 0.526982427972527, "grad_norm": 0.52599036693573, "learning_rate": 0.00041911410058308667, "loss": 1.0523, "step": 2954 }, { "epoch": 0.5271608241905271, "grad_norm": 0.47040775418281555, "learning_rate": 0.0004190624942867489, "loss": 0.8066, "step": 2955 }, { "epoch": 0.5273392204085273, "grad_norm": 0.5250223278999329, "learning_rate": 0.0004190108747123156, "loss": 1.2687, "step": 2956 }, { "epoch": 0.5275176166265275, "grad_norm": 0.5163100361824036, "learning_rate": 0.0004189592418638408, "loss": 1.1398, "step": 2957 }, { "epoch": 0.5276960128445277, "grad_norm": 0.47173264622688293, "learning_rate": 0.00041890759574538, "loss": 0.9907, "step": 2958 }, { "epoch": 0.5278744090625279, "grad_norm": 0.5113806128501892, "learning_rate": 0.0004188559363609893, "loss": 1.2076, "step": 2959 }, { "epoch": 0.528052805280528, "grad_norm": 0.47075924277305603, "learning_rate": 0.0004188042637147259, "loss": 0.8586, "step": 2960 }, { "epoch": 0.5282312014985282, "grad_norm": 0.4917936325073242, "learning_rate": 0.00041875257781064833, "loss": 0.913, "step": 2961 }, { "epoch": 0.5284095977165284, "grad_norm": 0.49223649501800537, "learning_rate": 0.000418700878652816, "loss": 1.0004, "step": 2962 }, { "epoch": 0.5285879939345286, "grad_norm": 0.4502630829811096, "learning_rate": 0.0004186491662452892, "loss": 0.8698, "step": 2963 }, { "epoch": 0.5287663901525288, "grad_norm": 0.5051430463790894, "learning_rate": 0.00041859744059212945, "loss": 1.093, "step": 2964 }, { "epoch": 0.528944786370529, "grad_norm": 0.7281007170677185, "learning_rate": 0.0004185457016973993, "loss": 1.0823, "step": 2965 }, { "epoch": 0.5291231825885291, "grad_norm": 0.4868094027042389, "learning_rate": 0.00041849394956516227, "loss": 1.0735, "step": 2966 }, { "epoch": 0.5293015788065293, "grad_norm": 0.4833069443702698, "learning_rate": 0.0004184421841994829, "loss": 0.8735, "step": 2967 }, { "epoch": 0.5294799750245295, "grad_norm": 0.5559689402580261, "learning_rate": 0.0004183904056044269, "loss": 1.0569, "step": 2968 }, { "epoch": 0.5296583712425297, "grad_norm": 0.5828696489334106, "learning_rate": 0.0004183386137840609, "loss": 1.126, "step": 2969 }, { "epoch": 0.5298367674605299, "grad_norm": 0.48288482427597046, "learning_rate": 0.0004182868087424526, "loss": 1.1427, "step": 2970 }, { "epoch": 0.53001516367853, "grad_norm": 0.4690031111240387, "learning_rate": 0.0004182349904836708, "loss": 0.9756, "step": 2971 }, { "epoch": 0.5301935598965302, "grad_norm": 0.5362717509269714, "learning_rate": 0.00041818315901178527, "loss": 1.1166, "step": 2972 }, { "epoch": 0.5303719561145304, "grad_norm": 0.5287902355194092, "learning_rate": 0.0004181313143308667, "loss": 1.1939, "step": 2973 }, { "epoch": 0.5305503523325306, "grad_norm": 0.4559158682823181, "learning_rate": 0.0004180794564449872, "loss": 0.7655, "step": 2974 }, { "epoch": 0.5307287485505308, "grad_norm": 0.5267648696899414, "learning_rate": 0.0004180275853582194, "loss": 0.9355, "step": 2975 }, { "epoch": 0.530907144768531, "grad_norm": 0.46050286293029785, "learning_rate": 0.00041797570107463737, "loss": 0.931, "step": 2976 }, { "epoch": 0.531085540986531, "grad_norm": 0.4590972363948822, "learning_rate": 0.0004179238035983161, "loss": 0.9246, "step": 2977 }, { "epoch": 0.5312639372045312, "grad_norm": 0.5887408256530762, "learning_rate": 0.00041787189293333155, "loss": 0.9576, "step": 2978 }, { "epoch": 0.5314423334225314, "grad_norm": 0.5028809309005737, "learning_rate": 0.00041781996908376077, "loss": 1.0257, "step": 2979 }, { "epoch": 0.5316207296405316, "grad_norm": 0.4969048500061035, "learning_rate": 0.00041776803205368187, "loss": 1.0092, "step": 2980 }, { "epoch": 0.5317991258585318, "grad_norm": 0.5167542099952698, "learning_rate": 0.00041771608184717384, "loss": 1.0632, "step": 2981 }, { "epoch": 0.5319775220765319, "grad_norm": 0.4737110733985901, "learning_rate": 0.00041766411846831696, "loss": 0.8571, "step": 2982 }, { "epoch": 0.5321559182945321, "grad_norm": 0.5081421732902527, "learning_rate": 0.00041761214192119234, "loss": 1.216, "step": 2983 }, { "epoch": 0.5323343145125323, "grad_norm": 0.47318655252456665, "learning_rate": 0.0004175601522098823, "loss": 0.939, "step": 2984 }, { "epoch": 0.5325127107305325, "grad_norm": 0.5063821077346802, "learning_rate": 0.0004175081493384699, "loss": 0.9156, "step": 2985 }, { "epoch": 0.5326911069485327, "grad_norm": 0.483039915561676, "learning_rate": 0.00041745613331103964, "loss": 0.9842, "step": 2986 }, { "epoch": 0.5328695031665329, "grad_norm": 0.4720330536365509, "learning_rate": 0.0004174041041316767, "loss": 1.0204, "step": 2987 }, { "epoch": 0.533047899384533, "grad_norm": 11.117390632629395, "learning_rate": 0.0004173520618044675, "loss": 1.2214, "step": 2988 }, { "epoch": 0.5332262956025332, "grad_norm": 0.5497750043869019, "learning_rate": 0.00041730000633349927, "loss": 1.0853, "step": 2989 }, { "epoch": 0.5334046918205334, "grad_norm": 0.5697376132011414, "learning_rate": 0.00041724793772286066, "loss": 1.0129, "step": 2990 }, { "epoch": 0.5335830880385336, "grad_norm": 0.527197003364563, "learning_rate": 0.0004171958559766409, "loss": 1.2534, "step": 2991 }, { "epoch": 0.5337614842565338, "grad_norm": 1.886084794998169, "learning_rate": 0.0004171437610989306, "loss": 1.1865, "step": 2992 }, { "epoch": 0.5339398804745339, "grad_norm": 0.5286929607391357, "learning_rate": 0.00041709165309382123, "loss": 1.2268, "step": 2993 }, { "epoch": 0.5341182766925341, "grad_norm": 0.5276958346366882, "learning_rate": 0.0004170395319654054, "loss": 1.024, "step": 2994 }, { "epoch": 0.5342966729105343, "grad_norm": 0.44607049226760864, "learning_rate": 0.0004169873977177765, "loss": 0.8801, "step": 2995 }, { "epoch": 0.5344750691285345, "grad_norm": 0.4721534252166748, "learning_rate": 0.0004169352503550293, "loss": 0.9466, "step": 2996 }, { "epoch": 0.5346534653465347, "grad_norm": 0.5831915736198425, "learning_rate": 0.00041688308988125944, "loss": 1.2001, "step": 2997 }, { "epoch": 0.5348318615645349, "grad_norm": 0.49683353304862976, "learning_rate": 0.00041683091630056334, "loss": 0.9686, "step": 2998 }, { "epoch": 0.535010257782535, "grad_norm": 0.6138126850128174, "learning_rate": 0.000416778729617039, "loss": 1.0234, "step": 2999 }, { "epoch": 0.5351886540005352, "grad_norm": 0.506582498550415, "learning_rate": 0.000416726529834785, "loss": 0.9276, "step": 3000 }, { "epoch": 0.5353670502185354, "grad_norm": 0.5279721617698669, "learning_rate": 0.000416674316957901, "loss": 1.1615, "step": 3001 }, { "epoch": 0.5355454464365356, "grad_norm": 0.48710203170776367, "learning_rate": 0.000416622090990488, "loss": 1.0638, "step": 3002 }, { "epoch": 0.5357238426545358, "grad_norm": 0.5252576470375061, "learning_rate": 0.00041656985193664763, "loss": 0.9231, "step": 3003 }, { "epoch": 0.5359022388725359, "grad_norm": 0.529524028301239, "learning_rate": 0.00041651759980048276, "loss": 0.9436, "step": 3004 }, { "epoch": 0.536080635090536, "grad_norm": 0.4857736825942993, "learning_rate": 0.00041646533458609725, "loss": 0.8424, "step": 3005 }, { "epoch": 0.5362590313085362, "grad_norm": 0.5015032291412354, "learning_rate": 0.00041641305629759595, "loss": 1.0656, "step": 3006 }, { "epoch": 0.5364374275265364, "grad_norm": 0.4542873501777649, "learning_rate": 0.0004163607649390849, "loss": 1.0041, "step": 3007 }, { "epoch": 0.5366158237445366, "grad_norm": 0.5027053952217102, "learning_rate": 0.000416308460514671, "loss": 1.209, "step": 3008 }, { "epoch": 0.5367942199625368, "grad_norm": 0.4859675168991089, "learning_rate": 0.00041625614302846206, "loss": 0.9146, "step": 3009 }, { "epoch": 0.5369726161805369, "grad_norm": 0.5812960863113403, "learning_rate": 0.0004162038124845673, "loss": 1.044, "step": 3010 }, { "epoch": 0.5371510123985371, "grad_norm": 0.5171328186988831, "learning_rate": 0.00041615146888709654, "loss": 0.9578, "step": 3011 }, { "epoch": 0.5373294086165373, "grad_norm": 0.5038743615150452, "learning_rate": 0.000416099112240161, "loss": 0.9988, "step": 3012 }, { "epoch": 0.5375078048345375, "grad_norm": 0.5556420087814331, "learning_rate": 0.0004160467425478726, "loss": 1.0005, "step": 3013 }, { "epoch": 0.5376862010525377, "grad_norm": 0.45622992515563965, "learning_rate": 0.0004159943598143445, "loss": 0.9728, "step": 3014 }, { "epoch": 0.5378645972705378, "grad_norm": 0.5222346782684326, "learning_rate": 0.00041594196404369076, "loss": 1.1148, "step": 3015 }, { "epoch": 0.538042993488538, "grad_norm": 0.6195734143257141, "learning_rate": 0.0004158895552400267, "loss": 1.0752, "step": 3016 }, { "epoch": 0.5382213897065382, "grad_norm": 0.5263445973396301, "learning_rate": 0.0004158371334074683, "loss": 1.023, "step": 3017 }, { "epoch": 0.5383997859245384, "grad_norm": 0.4883100688457489, "learning_rate": 0.00041578469855013277, "loss": 0.8856, "step": 3018 }, { "epoch": 0.5385781821425386, "grad_norm": 0.517374575138092, "learning_rate": 0.0004157322506721384, "loss": 1.1114, "step": 3019 }, { "epoch": 0.5387565783605388, "grad_norm": 0.5005238652229309, "learning_rate": 0.00041567978977760444, "loss": 1.1022, "step": 3020 }, { "epoch": 0.5389349745785389, "grad_norm": 0.46176883578300476, "learning_rate": 0.00041562731587065093, "loss": 0.758, "step": 3021 }, { "epoch": 0.5391133707965391, "grad_norm": 0.43858736753463745, "learning_rate": 0.00041557482895539943, "loss": 0.8694, "step": 3022 }, { "epoch": 0.5392917670145393, "grad_norm": 0.4803151488304138, "learning_rate": 0.0004155223290359721, "loss": 0.8272, "step": 3023 }, { "epoch": 0.5394701632325395, "grad_norm": 0.5557406544685364, "learning_rate": 0.0004154698161164923, "loss": 1.0889, "step": 3024 }, { "epoch": 0.5396485594505397, "grad_norm": 2.459235668182373, "learning_rate": 0.0004154172902010843, "loss": 1.1591, "step": 3025 }, { "epoch": 0.5398269556685398, "grad_norm": 0.5599625706672668, "learning_rate": 0.0004153647512938735, "loss": 1.1661, "step": 3026 }, { "epoch": 0.54000535188654, "grad_norm": 0.5514055490493774, "learning_rate": 0.00041531219939898635, "loss": 1.0657, "step": 3027 }, { "epoch": 0.5401837481045402, "grad_norm": 0.8572493195533752, "learning_rate": 0.0004152596345205502, "loss": 0.923, "step": 3028 }, { "epoch": 0.5403621443225404, "grad_norm": 0.497321754693985, "learning_rate": 0.00041520705666269343, "loss": 0.9463, "step": 3029 }, { "epoch": 0.5405405405405406, "grad_norm": 0.5045643448829651, "learning_rate": 0.0004151544658295455, "loss": 0.9698, "step": 3030 }, { "epoch": 0.5407189367585408, "grad_norm": 0.519391655921936, "learning_rate": 0.00041510186202523697, "loss": 0.8543, "step": 3031 }, { "epoch": 0.5408973329765409, "grad_norm": 1.3143996000289917, "learning_rate": 0.0004150492452538992, "loss": 0.9187, "step": 3032 }, { "epoch": 0.541075729194541, "grad_norm": 0.5469959378242493, "learning_rate": 0.0004149966155196648, "loss": 1.3002, "step": 3033 }, { "epoch": 0.5412541254125413, "grad_norm": 0.4593210518360138, "learning_rate": 0.0004149439728266671, "loss": 0.9523, "step": 3034 }, { "epoch": 0.5414325216305415, "grad_norm": 0.4348868727684021, "learning_rate": 0.0004148913171790408, "loss": 0.7697, "step": 3035 }, { "epoch": 0.5416109178485417, "grad_norm": 0.5638412833213806, "learning_rate": 0.00041483864858092145, "loss": 1.1035, "step": 3036 }, { "epoch": 0.5417893140665417, "grad_norm": 0.4964679479598999, "learning_rate": 0.00041478596703644553, "loss": 1.0256, "step": 3037 }, { "epoch": 0.5419677102845419, "grad_norm": 0.5189842581748962, "learning_rate": 0.0004147332725497507, "loss": 0.9451, "step": 3038 }, { "epoch": 0.5421461065025421, "grad_norm": 0.5478013157844543, "learning_rate": 0.0004146805651249755, "loss": 0.938, "step": 3039 }, { "epoch": 0.5423245027205423, "grad_norm": 0.5358539819717407, "learning_rate": 0.0004146278447662597, "loss": 0.8084, "step": 3040 }, { "epoch": 0.5425028989385425, "grad_norm": 0.5389429330825806, "learning_rate": 0.00041457511147774374, "loss": 1.1265, "step": 3041 }, { "epoch": 0.5426812951565427, "grad_norm": 0.4505546987056732, "learning_rate": 0.0004145223652635693, "loss": 0.8022, "step": 3042 }, { "epoch": 0.5428596913745428, "grad_norm": 0.5185169577598572, "learning_rate": 0.00041446960612787916, "loss": 1.1086, "step": 3043 }, { "epoch": 0.543038087592543, "grad_norm": 0.4975489377975464, "learning_rate": 0.00041441683407481683, "loss": 0.9063, "step": 3044 }, { "epoch": 0.5432164838105432, "grad_norm": 0.48402321338653564, "learning_rate": 0.0004143640491085272, "loss": 0.9616, "step": 3045 }, { "epoch": 0.5433948800285434, "grad_norm": 0.47887566685676575, "learning_rate": 0.0004143112512331558, "loss": 0.9543, "step": 3046 }, { "epoch": 0.5435732762465436, "grad_norm": 0.5627540946006775, "learning_rate": 0.00041425844045284957, "loss": 1.1141, "step": 3047 }, { "epoch": 0.5437516724645437, "grad_norm": 0.4868026077747345, "learning_rate": 0.0004142056167717561, "loss": 0.8547, "step": 3048 }, { "epoch": 0.5439300686825439, "grad_norm": 0.4439375102519989, "learning_rate": 0.000414152780194024, "loss": 0.7576, "step": 3049 }, { "epoch": 0.5441084649005441, "grad_norm": 0.5482441782951355, "learning_rate": 0.00041409993072380333, "loss": 1.1234, "step": 3050 }, { "epoch": 0.5442868611185443, "grad_norm": 0.5158576965332031, "learning_rate": 0.00041404706836524463, "loss": 0.9731, "step": 3051 }, { "epoch": 0.5444652573365445, "grad_norm": 0.44664448499679565, "learning_rate": 0.0004139941931224998, "loss": 0.9086, "step": 3052 }, { "epoch": 0.5446436535545447, "grad_norm": 0.6744524240493774, "learning_rate": 0.0004139413049997216, "loss": 1.1988, "step": 3053 }, { "epoch": 0.5448220497725448, "grad_norm": 0.47927117347717285, "learning_rate": 0.0004138884040010639, "loss": 1.06, "step": 3054 }, { "epoch": 0.545000445990545, "grad_norm": 0.4434390962123871, "learning_rate": 0.00041383549013068147, "loss": 0.9022, "step": 3055 }, { "epoch": 0.5451788422085452, "grad_norm": 0.5459398627281189, "learning_rate": 0.0004137825633927301, "loss": 1.183, "step": 3056 }, { "epoch": 0.5453572384265454, "grad_norm": 0.48765432834625244, "learning_rate": 0.00041372962379136676, "loss": 1.0203, "step": 3057 }, { "epoch": 0.5455356346445456, "grad_norm": 0.8096850514411926, "learning_rate": 0.00041367667133074916, "loss": 1.0457, "step": 3058 }, { "epoch": 0.5457140308625457, "grad_norm": 0.4843488037586212, "learning_rate": 0.0004136237060150363, "loss": 1.0277, "step": 3059 }, { "epoch": 0.5458924270805459, "grad_norm": 0.48028862476348877, "learning_rate": 0.0004135707278483879, "loss": 0.9519, "step": 3060 }, { "epoch": 0.5460708232985461, "grad_norm": 0.7017949223518372, "learning_rate": 0.00041351773683496497, "loss": 0.9328, "step": 3061 }, { "epoch": 0.5462492195165463, "grad_norm": 1.0810225009918213, "learning_rate": 0.0004134647329789293, "loss": 1.1302, "step": 3062 }, { "epoch": 0.5464276157345465, "grad_norm": 0.5587711930274963, "learning_rate": 0.0004134117162844439, "loss": 1.0982, "step": 3063 }, { "epoch": 0.5466060119525467, "grad_norm": 0.579770028591156, "learning_rate": 0.00041335868675567263, "loss": 1.14, "step": 3064 }, { "epoch": 0.5467844081705467, "grad_norm": 0.7189714908599854, "learning_rate": 0.0004133056443967804, "loss": 1.0545, "step": 3065 }, { "epoch": 0.5469628043885469, "grad_norm": 0.7987565398216248, "learning_rate": 0.0004132525892119331, "loss": 1.0576, "step": 3066 }, { "epoch": 0.5471412006065471, "grad_norm": 0.6334807276725769, "learning_rate": 0.00041319952120529767, "loss": 1.0614, "step": 3067 }, { "epoch": 0.5473195968245473, "grad_norm": 27.262441635131836, "learning_rate": 0.00041314644038104216, "loss": 1.2708, "step": 3068 }, { "epoch": 0.5474979930425475, "grad_norm": 0.5688011646270752, "learning_rate": 0.00041309334674333544, "loss": 0.8942, "step": 3069 }, { "epoch": 0.5476763892605476, "grad_norm": 0.5215781331062317, "learning_rate": 0.00041304024029634737, "loss": 0.895, "step": 3070 }, { "epoch": 0.5478547854785478, "grad_norm": 0.5432342886924744, "learning_rate": 0.00041298712104424903, "loss": 0.9245, "step": 3071 }, { "epoch": 0.548033181696548, "grad_norm": 0.49601709842681885, "learning_rate": 0.0004129339889912123, "loss": 0.9943, "step": 3072 }, { "epoch": 0.5482115779145482, "grad_norm": 0.5069965124130249, "learning_rate": 0.0004128808441414103, "loss": 0.8476, "step": 3073 }, { "epoch": 0.5483899741325484, "grad_norm": 0.7115893959999084, "learning_rate": 0.0004128276864990168, "loss": 1.1841, "step": 3074 }, { "epoch": 0.5485683703505486, "grad_norm": 1.3543672561645508, "learning_rate": 0.0004127745160682068, "loss": 1.011, "step": 3075 }, { "epoch": 0.5487467665685487, "grad_norm": 0.6685739159584045, "learning_rate": 0.0004127213328531565, "loss": 1.1114, "step": 3076 }, { "epoch": 0.5489251627865489, "grad_norm": 1.056970477104187, "learning_rate": 0.0004126681368580427, "loss": 1.0183, "step": 3077 }, { "epoch": 0.5491035590045491, "grad_norm": 0.7646797299385071, "learning_rate": 0.00041261492808704336, "loss": 1.1184, "step": 3078 }, { "epoch": 0.5492819552225493, "grad_norm": 0.5978376269340515, "learning_rate": 0.00041256170654433767, "loss": 0.8961, "step": 3079 }, { "epoch": 0.5494603514405495, "grad_norm": 0.5183614492416382, "learning_rate": 0.0004125084722341054, "loss": 0.8404, "step": 3080 }, { "epoch": 0.5496387476585496, "grad_norm": 0.9243518114089966, "learning_rate": 0.0004124552251605277, "loss": 1.2223, "step": 3081 }, { "epoch": 0.5498171438765498, "grad_norm": 0.5044293403625488, "learning_rate": 0.0004124019653277865, "loss": 0.8062, "step": 3082 }, { "epoch": 0.54999554009455, "grad_norm": 0.8046514391899109, "learning_rate": 0.0004123486927400649, "loss": 0.9456, "step": 3083 }, { "epoch": 0.5501739363125502, "grad_norm": 0.5840319991111755, "learning_rate": 0.0004122954074015468, "loss": 1.2686, "step": 3084 }, { "epoch": 0.5503523325305504, "grad_norm": 1.5194361209869385, "learning_rate": 0.0004122421093164172, "loss": 1.1059, "step": 3085 }, { "epoch": 0.5505307287485506, "grad_norm": 0.5864779353141785, "learning_rate": 0.0004121887984888622, "loss": 1.0505, "step": 3086 }, { "epoch": 0.5507091249665507, "grad_norm": 1.8019944429397583, "learning_rate": 0.00041213547492306875, "loss": 1.0039, "step": 3087 }, { "epoch": 0.5508875211845509, "grad_norm": 0.5829388499259949, "learning_rate": 0.00041208213862322485, "loss": 1.1364, "step": 3088 }, { "epoch": 0.5510659174025511, "grad_norm": 0.7362989783287048, "learning_rate": 0.0004120287895935196, "loss": 1.0522, "step": 3089 }, { "epoch": 0.5512443136205513, "grad_norm": 0.4867869019508362, "learning_rate": 0.00041197542783814287, "loss": 0.7512, "step": 3090 }, { "epoch": 0.5514227098385515, "grad_norm": 0.6978092193603516, "learning_rate": 0.0004119220533612857, "loss": 1.1723, "step": 3091 }, { "epoch": 0.5516011060565515, "grad_norm": 0.5516607761383057, "learning_rate": 0.00041186866616714024, "loss": 1.0468, "step": 3092 }, { "epoch": 0.5517795022745517, "grad_norm": 0.7430167198181152, "learning_rate": 0.0004118152662598994, "loss": 1.1032, "step": 3093 }, { "epoch": 0.551957898492552, "grad_norm": 0.48890167474746704, "learning_rate": 0.0004117618536437571, "loss": 0.7917, "step": 3094 }, { "epoch": 0.5521362947105521, "grad_norm": 0.5151326060295105, "learning_rate": 0.00041170842832290844, "loss": 0.8906, "step": 3095 }, { "epoch": 0.5523146909285523, "grad_norm": 0.4648483991622925, "learning_rate": 0.0004116549903015495, "loss": 0.9603, "step": 3096 }, { "epoch": 0.5524930871465525, "grad_norm": 0.7839336395263672, "learning_rate": 0.00041160153958387714, "loss": 1.0892, "step": 3097 }, { "epoch": 0.5526714833645526, "grad_norm": 0.48735418915748596, "learning_rate": 0.0004115480761740893, "loss": 0.9049, "step": 3098 }, { "epoch": 0.5528498795825528, "grad_norm": 0.5337851047515869, "learning_rate": 0.0004114946000763852, "loss": 1.0263, "step": 3099 }, { "epoch": 0.553028275800553, "grad_norm": 0.7853549122810364, "learning_rate": 0.0004114411112949647, "loss": 1.3071, "step": 3100 }, { "epoch": 0.5532066720185532, "grad_norm": 0.5989134907722473, "learning_rate": 0.0004113876098340288, "loss": 1.1487, "step": 3101 }, { "epoch": 0.5533850682365534, "grad_norm": 0.5571014881134033, "learning_rate": 0.00041133409569777936, "loss": 0.8642, "step": 3102 }, { "epoch": 0.5535634644545535, "grad_norm": 0.5321959853172302, "learning_rate": 0.0004112805688904196, "loss": 0.9383, "step": 3103 }, { "epoch": 0.5537418606725537, "grad_norm": 0.7023414373397827, "learning_rate": 0.00041122702941615334, "loss": 1.0434, "step": 3104 }, { "epoch": 0.5539202568905539, "grad_norm": 0.6236468553543091, "learning_rate": 0.00041117347727918555, "loss": 0.9331, "step": 3105 }, { "epoch": 0.5540986531085541, "grad_norm": 0.5762714743614197, "learning_rate": 0.00041111991248372215, "loss": 0.9617, "step": 3106 }, { "epoch": 0.5542770493265543, "grad_norm": 0.9596529603004456, "learning_rate": 0.00041106633503397016, "loss": 1.0038, "step": 3107 }, { "epoch": 0.5544554455445545, "grad_norm": 0.46744972467422485, "learning_rate": 0.00041101274493413764, "loss": 0.8293, "step": 3108 }, { "epoch": 0.5546338417625546, "grad_norm": 0.514829158782959, "learning_rate": 0.0004109591421884334, "loss": 0.9105, "step": 3109 }, { "epoch": 0.5548122379805548, "grad_norm": 0.5770788788795471, "learning_rate": 0.0004109055268010674, "loss": 1.164, "step": 3110 }, { "epoch": 0.554990634198555, "grad_norm": 9.369170188903809, "learning_rate": 0.00041085189877625053, "loss": 1.8313, "step": 3111 }, { "epoch": 0.5551690304165552, "grad_norm": 0.4911796748638153, "learning_rate": 0.0004107982581181947, "loss": 0.8055, "step": 3112 }, { "epoch": 0.5553474266345554, "grad_norm": 0.5030617117881775, "learning_rate": 0.00041074460483111287, "loss": 0.9508, "step": 3113 }, { "epoch": 0.5555258228525555, "grad_norm": 0.49075013399124146, "learning_rate": 0.000410690938919219, "loss": 0.9634, "step": 3114 }, { "epoch": 0.5557042190705557, "grad_norm": 0.5361286401748657, "learning_rate": 0.000410637260386728, "loss": 0.8745, "step": 3115 }, { "epoch": 0.5558826152885559, "grad_norm": 0.5814827084541321, "learning_rate": 0.00041058356923785565, "loss": 1.1225, "step": 3116 }, { "epoch": 0.5560610115065561, "grad_norm": 0.48355334997177124, "learning_rate": 0.0004105298654768189, "loss": 0.758, "step": 3117 }, { "epoch": 0.5562394077245563, "grad_norm": 0.6016453504562378, "learning_rate": 0.0004104761491078355, "loss": 1.1899, "step": 3118 }, { "epoch": 0.5564178039425565, "grad_norm": 0.7221107482910156, "learning_rate": 0.0004104224201351245, "loss": 1.1253, "step": 3119 }, { "epoch": 0.5565962001605566, "grad_norm": 0.5367442965507507, "learning_rate": 0.00041036867856290567, "loss": 0.8561, "step": 3120 }, { "epoch": 0.5567745963785568, "grad_norm": 0.7140209078788757, "learning_rate": 0.00041031492439539975, "loss": 0.961, "step": 3121 }, { "epoch": 0.556952992596557, "grad_norm": 0.7320627570152283, "learning_rate": 0.0004102611576368287, "loss": 1.2107, "step": 3122 }, { "epoch": 0.5571313888145571, "grad_norm": 0.5843806862831116, "learning_rate": 0.0004102073782914153, "loss": 0.844, "step": 3123 }, { "epoch": 0.5573097850325573, "grad_norm": 0.7023711204528809, "learning_rate": 0.00041015358636338343, "loss": 1.1463, "step": 3124 }, { "epoch": 0.5574881812505574, "grad_norm": 0.5064725875854492, "learning_rate": 0.0004100997818569577, "loss": 0.9558, "step": 3125 }, { "epoch": 0.5576665774685576, "grad_norm": 21.4315128326416, "learning_rate": 0.00041004596477636405, "loss": 1.645, "step": 3126 }, { "epoch": 0.5578449736865578, "grad_norm": 0.8198938965797424, "learning_rate": 0.0004099921351258292, "loss": 0.9687, "step": 3127 }, { "epoch": 0.558023369904558, "grad_norm": 8.272482872009277, "learning_rate": 0.00040993829290958086, "loss": 1.1445, "step": 3128 }, { "epoch": 0.5582017661225582, "grad_norm": 0.8829731941223145, "learning_rate": 0.0004098844381318478, "loss": 1.2275, "step": 3129 }, { "epoch": 0.5583801623405584, "grad_norm": 1.6794313192367554, "learning_rate": 0.00040983057079685984, "loss": 1.118, "step": 3130 }, { "epoch": 0.5585585585585585, "grad_norm": 1.177979826927185, "learning_rate": 0.0004097766909088476, "loss": 1.0017, "step": 3131 }, { "epoch": 0.5587369547765587, "grad_norm": 2.6292262077331543, "learning_rate": 0.0004097227984720429, "loss": 0.9328, "step": 3132 }, { "epoch": 0.5589153509945589, "grad_norm": 1.0025197267532349, "learning_rate": 0.0004096688934906782, "loss": 1.0593, "step": 3133 }, { "epoch": 0.5590937472125591, "grad_norm": 0.5532941222190857, "learning_rate": 0.0004096149759689874, "loss": 1.1529, "step": 3134 }, { "epoch": 0.5592721434305593, "grad_norm": 0.6335615515708923, "learning_rate": 0.00040956104591120503, "loss": 1.0475, "step": 3135 }, { "epoch": 0.5594505396485594, "grad_norm": 0.663866400718689, "learning_rate": 0.00040950710332156683, "loss": 0.8865, "step": 3136 }, { "epoch": 0.5596289358665596, "grad_norm": 0.5401614308357239, "learning_rate": 0.00040945314820430934, "loss": 0.9367, "step": 3137 }, { "epoch": 0.5598073320845598, "grad_norm": 0.5373167991638184, "learning_rate": 0.0004093991805636702, "loss": 0.8718, "step": 3138 }, { "epoch": 0.55998572830256, "grad_norm": 0.5260427594184875, "learning_rate": 0.00040934520040388807, "loss": 0.911, "step": 3139 }, { "epoch": 0.5601641245205602, "grad_norm": 0.5970147252082825, "learning_rate": 0.00040929120772920243, "loss": 1.0448, "step": 3140 }, { "epoch": 0.5603425207385604, "grad_norm": 0.5675042271614075, "learning_rate": 0.0004092372025438539, "loss": 1.1786, "step": 3141 }, { "epoch": 0.5605209169565605, "grad_norm": 0.5005537867546082, "learning_rate": 0.0004091831848520839, "loss": 0.827, "step": 3142 }, { "epoch": 0.5606993131745607, "grad_norm": 0.5521116256713867, "learning_rate": 0.00040912915465813525, "loss": 0.8586, "step": 3143 }, { "epoch": 0.5608777093925609, "grad_norm": 0.5322579145431519, "learning_rate": 0.0004090751119662511, "loss": 0.9931, "step": 3144 }, { "epoch": 0.5610561056105611, "grad_norm": 0.5480323433876038, "learning_rate": 0.00040902105678067627, "loss": 1.031, "step": 3145 }, { "epoch": 0.5612345018285613, "grad_norm": 0.5208452343940735, "learning_rate": 0.00040896698910565597, "loss": 1.0623, "step": 3146 }, { "epoch": 0.5614128980465614, "grad_norm": 0.5295253992080688, "learning_rate": 0.00040891290894543676, "loss": 0.9008, "step": 3147 }, { "epoch": 0.5615912942645616, "grad_norm": 0.521321713924408, "learning_rate": 0.00040885881630426616, "loss": 1.0494, "step": 3148 }, { "epoch": 0.5617696904825618, "grad_norm": 0.49271395802497864, "learning_rate": 0.0004088047111863924, "loss": 0.8965, "step": 3149 }, { "epoch": 0.561948086700562, "grad_norm": 0.5301600098609924, "learning_rate": 0.000408750593596065, "loss": 1.0071, "step": 3150 }, { "epoch": 0.5621264829185622, "grad_norm": 0.5512881875038147, "learning_rate": 0.0004086964635375342, "loss": 1.0579, "step": 3151 }, { "epoch": 0.5623048791365624, "grad_norm": 0.5429875254631042, "learning_rate": 0.00040864232101505153, "loss": 0.9885, "step": 3152 }, { "epoch": 0.5624832753545624, "grad_norm": 0.4952228367328644, "learning_rate": 0.00040858816603286924, "loss": 0.8444, "step": 3153 }, { "epoch": 0.5626616715725626, "grad_norm": 0.6742690205574036, "learning_rate": 0.00040853399859524066, "loss": 1.0294, "step": 3154 }, { "epoch": 0.5628400677905628, "grad_norm": 0.5073679685592651, "learning_rate": 0.00040847981870642004, "loss": 1.0393, "step": 3155 }, { "epoch": 0.563018464008563, "grad_norm": 0.5249845385551453, "learning_rate": 0.0004084256263706626, "loss": 1.0371, "step": 3156 }, { "epoch": 0.5631968602265632, "grad_norm": 0.5406726002693176, "learning_rate": 0.00040837142159222466, "loss": 1.096, "step": 3157 }, { "epoch": 0.5633752564445633, "grad_norm": 0.4763408899307251, "learning_rate": 0.0004083172043753635, "loss": 0.948, "step": 3158 }, { "epoch": 0.5635536526625635, "grad_norm": 0.5455912947654724, "learning_rate": 0.0004082629747243371, "loss": 0.9939, "step": 3159 }, { "epoch": 0.5637320488805637, "grad_norm": 0.5416297316551208, "learning_rate": 0.00040820873264340484, "loss": 0.823, "step": 3160 }, { "epoch": 0.5639104450985639, "grad_norm": 0.5497506856918335, "learning_rate": 0.0004081544781368268, "loss": 0.8989, "step": 3161 }, { "epoch": 0.5640888413165641, "grad_norm": 0.8488923907279968, "learning_rate": 0.0004081002112088641, "loss": 0.869, "step": 3162 }, { "epoch": 0.5642672375345643, "grad_norm": 0.539082944393158, "learning_rate": 0.0004080459318637789, "loss": 1.0027, "step": 3163 }, { "epoch": 0.5644456337525644, "grad_norm": 0.6145626306533813, "learning_rate": 0.0004079916401058342, "loss": 1.1312, "step": 3164 }, { "epoch": 0.5646240299705646, "grad_norm": 0.5471964478492737, "learning_rate": 0.00040793733593929405, "loss": 1.0311, "step": 3165 }, { "epoch": 0.5648024261885648, "grad_norm": 1.0628180503845215, "learning_rate": 0.00040788301936842353, "loss": 0.9842, "step": 3166 }, { "epoch": 0.564980822406565, "grad_norm": 0.6143336892127991, "learning_rate": 0.00040782869039748847, "loss": 1.0282, "step": 3167 }, { "epoch": 0.5651592186245652, "grad_norm": 0.4468381702899933, "learning_rate": 0.0004077743490307562, "loss": 0.6927, "step": 3168 }, { "epoch": 0.5653376148425653, "grad_norm": 0.524698793888092, "learning_rate": 0.0004077199952724944, "loss": 1.0529, "step": 3169 }, { "epoch": 0.5655160110605655, "grad_norm": 0.4760201573371887, "learning_rate": 0.0004076656291269719, "loss": 0.8662, "step": 3170 }, { "epoch": 0.5656944072785657, "grad_norm": 0.5162889957427979, "learning_rate": 0.00040761125059845887, "loss": 0.9204, "step": 3171 }, { "epoch": 0.5658728034965659, "grad_norm": 0.5113767981529236, "learning_rate": 0.00040755685969122603, "loss": 1.0988, "step": 3172 }, { "epoch": 0.5660511997145661, "grad_norm": 0.5655469298362732, "learning_rate": 0.0004075024564095452, "loss": 0.918, "step": 3173 }, { "epoch": 0.5662295959325663, "grad_norm": 0.5044064521789551, "learning_rate": 0.0004074480407576892, "loss": 1.1065, "step": 3174 }, { "epoch": 0.5664079921505664, "grad_norm": 0.5524999499320984, "learning_rate": 0.0004073936127399319, "loss": 1.0733, "step": 3175 }, { "epoch": 0.5665863883685666, "grad_norm": 0.4410164952278137, "learning_rate": 0.000407339172360548, "loss": 0.84, "step": 3176 }, { "epoch": 0.5667647845865668, "grad_norm": 0.5362854599952698, "learning_rate": 0.0004072847196238131, "loss": 0.9016, "step": 3177 }, { "epoch": 0.566943180804567, "grad_norm": 1.1655226945877075, "learning_rate": 0.0004072302545340041, "loss": 1.0215, "step": 3178 }, { "epoch": 0.5671215770225672, "grad_norm": 5.327235698699951, "learning_rate": 0.00040717577709539857, "loss": 1.6297, "step": 3179 }, { "epoch": 0.5672999732405672, "grad_norm": 0.6369319558143616, "learning_rate": 0.00040712128731227513, "loss": 1.1747, "step": 3180 }, { "epoch": 0.5674783694585674, "grad_norm": 0.5702166557312012, "learning_rate": 0.0004070667851889134, "loss": 1.0747, "step": 3181 }, { "epoch": 0.5676567656765676, "grad_norm": 0.6028847694396973, "learning_rate": 0.0004070122707295939, "loss": 0.7787, "step": 3182 }, { "epoch": 0.5678351618945678, "grad_norm": 1.2918931245803833, "learning_rate": 0.0004069577439385982, "loss": 1.7838, "step": 3183 }, { "epoch": 0.568013558112568, "grad_norm": 0.5654813647270203, "learning_rate": 0.00040690320482020893, "loss": 0.9124, "step": 3184 }, { "epoch": 0.5681919543305682, "grad_norm": 0.902948796749115, "learning_rate": 0.00040684865337870945, "loss": 0.8613, "step": 3185 }, { "epoch": 0.5683703505485683, "grad_norm": 1.1347112655639648, "learning_rate": 0.00040679408961838426, "loss": 1.1453, "step": 3186 }, { "epoch": 0.5685487467665685, "grad_norm": 0.5079895853996277, "learning_rate": 0.0004067395135435187, "loss": 0.8407, "step": 3187 }, { "epoch": 0.5687271429845687, "grad_norm": 0.7240801453590393, "learning_rate": 0.0004066849251583992, "loss": 0.8808, "step": 3188 }, { "epoch": 0.5689055392025689, "grad_norm": 0.5793579816818237, "learning_rate": 0.0004066303244673132, "loss": 0.915, "step": 3189 }, { "epoch": 0.5690839354205691, "grad_norm": 1.3618862628936768, "learning_rate": 0.00040657571147454877, "loss": 0.9031, "step": 3190 }, { "epoch": 0.5692623316385692, "grad_norm": 14.210494995117188, "learning_rate": 0.0004065210861843954, "loss": 1.538, "step": 3191 }, { "epoch": 0.5694407278565694, "grad_norm": 2.7108030319213867, "learning_rate": 0.0004064664486011433, "loss": 0.8688, "step": 3192 }, { "epoch": 0.5696191240745696, "grad_norm": 1.628589391708374, "learning_rate": 0.0004064117987290836, "loss": 1.0493, "step": 3193 }, { "epoch": 0.5697975202925698, "grad_norm": 4.802014350891113, "learning_rate": 0.0004063571365725086, "loss": 1.0736, "step": 3194 }, { "epoch": 0.56997591651057, "grad_norm": 0.9495110511779785, "learning_rate": 0.00040630246213571136, "loss": 0.8673, "step": 3195 }, { "epoch": 0.5701543127285702, "grad_norm": 1.0658725500106812, "learning_rate": 0.000406247775422986, "loss": 1.0411, "step": 3196 }, { "epoch": 0.5703327089465703, "grad_norm": 0.6458380222320557, "learning_rate": 0.00040619307643862757, "loss": 1.1536, "step": 3197 }, { "epoch": 0.5705111051645705, "grad_norm": 91.1878662109375, "learning_rate": 0.00040613836518693213, "loss": 1.0854, "step": 3198 }, { "epoch": 0.5706895013825707, "grad_norm": 1.0638850927352905, "learning_rate": 0.0004060836416721968, "loss": 0.9599, "step": 3199 }, { "epoch": 0.5708678976005709, "grad_norm": 0.7722949981689453, "learning_rate": 0.00040602890589871933, "loss": 0.9146, "step": 3200 }, { "epoch": 0.5710462938185711, "grad_norm": 1.2300078868865967, "learning_rate": 0.0004059741578707987, "loss": 0.9866, "step": 3201 }, { "epoch": 0.5712246900365712, "grad_norm": 0.5897446870803833, "learning_rate": 0.00040591939759273486, "loss": 0.8663, "step": 3202 }, { "epoch": 0.5714030862545714, "grad_norm": 1.1152317523956299, "learning_rate": 0.0004058646250688287, "loss": 1.1437, "step": 3203 }, { "epoch": 0.5715814824725716, "grad_norm": 0.5559517741203308, "learning_rate": 0.00040580984030338187, "loss": 0.7943, "step": 3204 }, { "epoch": 0.5717598786905718, "grad_norm": 0.8801344633102417, "learning_rate": 0.0004057550433006972, "loss": 1.3015, "step": 3205 }, { "epoch": 0.571938274908572, "grad_norm": 0.9479907155036926, "learning_rate": 0.00040570023406507857, "loss": 0.8974, "step": 3206 }, { "epoch": 0.5721166711265722, "grad_norm": 0.6277695298194885, "learning_rate": 0.0004056454126008305, "loss": 1.0662, "step": 3207 }, { "epoch": 0.5722950673445723, "grad_norm": 0.476595014333725, "learning_rate": 0.0004055905789122587, "loss": 0.7801, "step": 3208 }, { "epoch": 0.5724734635625724, "grad_norm": 0.6994301080703735, "learning_rate": 0.00040553573300366986, "loss": 1.223, "step": 3209 }, { "epoch": 0.5726518597805726, "grad_norm": 0.8489440679550171, "learning_rate": 0.0004054808748793714, "loss": 0.863, "step": 3210 }, { "epoch": 0.5728302559985728, "grad_norm": 0.8950226902961731, "learning_rate": 0.00040542600454367193, "loss": 0.9557, "step": 3211 }, { "epoch": 0.573008652216573, "grad_norm": 0.6158692240715027, "learning_rate": 0.000405371122000881, "loss": 1.0143, "step": 3212 }, { "epoch": 0.5731870484345731, "grad_norm": 27.874290466308594, "learning_rate": 0.00040531622725530894, "loss": 0.8735, "step": 3213 }, { "epoch": 0.5733654446525733, "grad_norm": 33.128963470458984, "learning_rate": 0.0004052613203112673, "loss": 1.7168, "step": 3214 }, { "epoch": 0.5735438408705735, "grad_norm": 2.284363031387329, "learning_rate": 0.0004052064011730684, "loss": 0.9891, "step": 3215 }, { "epoch": 0.5737222370885737, "grad_norm": 0.8609527945518494, "learning_rate": 0.0004051514698450255, "loss": 0.9398, "step": 3216 }, { "epoch": 0.5739006333065739, "grad_norm": 0.9100723266601562, "learning_rate": 0.0004050965263314529, "loss": 0.7602, "step": 3217 }, { "epoch": 0.5740790295245741, "grad_norm": 8.203483581542969, "learning_rate": 0.0004050415706366659, "loss": 0.9475, "step": 3218 }, { "epoch": 0.5742574257425742, "grad_norm": 0.828872561454773, "learning_rate": 0.0004049866027649807, "loss": 0.9817, "step": 3219 }, { "epoch": 0.5744358219605744, "grad_norm": 0.49633899331092834, "learning_rate": 0.00040493162272071427, "loss": 0.5957, "step": 3220 }, { "epoch": 0.5746142181785746, "grad_norm": 4.844064712524414, "learning_rate": 0.000404876630508185, "loss": 1.3244, "step": 3221 }, { "epoch": 0.5747926143965748, "grad_norm": 0.8198102116584778, "learning_rate": 0.00040482162613171167, "loss": 1.0323, "step": 3222 }, { "epoch": 0.574971010614575, "grad_norm": 1.0812023878097534, "learning_rate": 0.00040476660959561464, "loss": 1.2488, "step": 3223 }, { "epoch": 0.5751494068325751, "grad_norm": 0.8954762816429138, "learning_rate": 0.0004047115809042146, "loss": 0.7903, "step": 3224 }, { "epoch": 0.5753278030505753, "grad_norm": 2.91465163230896, "learning_rate": 0.0004046565400618336, "loss": 1.1885, "step": 3225 }, { "epoch": 0.5755061992685755, "grad_norm": 1.1171669960021973, "learning_rate": 0.0004046014870727944, "loss": 1.0482, "step": 3226 }, { "epoch": 0.5756845954865757, "grad_norm": 0.8723791837692261, "learning_rate": 0.0004045464219414211, "loss": 0.8968, "step": 3227 }, { "epoch": 0.5758629917045759, "grad_norm": 0.8948122262954712, "learning_rate": 0.0004044913446720382, "loss": 0.9063, "step": 3228 }, { "epoch": 0.5760413879225761, "grad_norm": 0.7297793626785278, "learning_rate": 0.0004044362552689716, "loss": 0.9518, "step": 3229 }, { "epoch": 0.5762197841405762, "grad_norm": 0.7813421487808228, "learning_rate": 0.000404381153736548, "loss": 0.923, "step": 3230 }, { "epoch": 0.5763981803585764, "grad_norm": 0.5343239307403564, "learning_rate": 0.00040432604007909504, "loss": 0.9697, "step": 3231 }, { "epoch": 0.5765765765765766, "grad_norm": 0.5333415269851685, "learning_rate": 0.0004042709143009412, "loss": 0.7906, "step": 3232 }, { "epoch": 0.5767549727945768, "grad_norm": 0.5309287309646606, "learning_rate": 0.0004042157764064163, "loss": 0.8805, "step": 3233 }, { "epoch": 0.576933369012577, "grad_norm": 0.8856096863746643, "learning_rate": 0.00040416062639985053, "loss": 0.9808, "step": 3234 }, { "epoch": 0.5771117652305772, "grad_norm": 0.6096811890602112, "learning_rate": 0.0004041054642855756, "loss": 0.8629, "step": 3235 }, { "epoch": 0.5772901614485773, "grad_norm": 0.5578605532646179, "learning_rate": 0.0004040502900679237, "loss": 1.0188, "step": 3236 }, { "epoch": 0.5774685576665775, "grad_norm": 0.9917730689048767, "learning_rate": 0.0004039951037512284, "loss": 0.9557, "step": 3237 }, { "epoch": 0.5776469538845777, "grad_norm": 0.8114546537399292, "learning_rate": 0.00040393990533982397, "loss": 0.9911, "step": 3238 }, { "epoch": 0.5778253501025779, "grad_norm": 0.6181529760360718, "learning_rate": 0.0004038846948380456, "loss": 0.9745, "step": 3239 }, { "epoch": 0.578003746320578, "grad_norm": 1.0224014520645142, "learning_rate": 0.00040382947225022945, "loss": 1.1888, "step": 3240 }, { "epoch": 0.5781821425385781, "grad_norm": 0.6258061528205872, "learning_rate": 0.0004037742375807127, "loss": 0.9421, "step": 3241 }, { "epoch": 0.5783605387565783, "grad_norm": 1.0917785167694092, "learning_rate": 0.00040371899083383367, "loss": 1.0569, "step": 3242 }, { "epoch": 0.5785389349745785, "grad_norm": 0.6143338084220886, "learning_rate": 0.00040366373201393115, "loss": 1.0274, "step": 3243 }, { "epoch": 0.5787173311925787, "grad_norm": 1.4802119731903076, "learning_rate": 0.00040360846112534533, "loss": 1.0424, "step": 3244 }, { "epoch": 0.5788957274105789, "grad_norm": 7.478890895843506, "learning_rate": 0.000403553178172417, "loss": 2.1221, "step": 3245 }, { "epoch": 0.5790741236285791, "grad_norm": 0.7259086966514587, "learning_rate": 0.0004034978831594881, "loss": 0.9648, "step": 3246 }, { "epoch": 0.5792525198465792, "grad_norm": 0.8427548408508301, "learning_rate": 0.00040344257609090155, "loss": 0.8322, "step": 3247 }, { "epoch": 0.5794309160645794, "grad_norm": 0.5752832889556885, "learning_rate": 0.0004033872569710011, "loss": 1.046, "step": 3248 }, { "epoch": 0.5796093122825796, "grad_norm": 0.8446690440177917, "learning_rate": 0.0004033319258041316, "loss": 1.0119, "step": 3249 }, { "epoch": 0.5797877085005798, "grad_norm": 0.5777229070663452, "learning_rate": 0.0004032765825946385, "loss": 0.9398, "step": 3250 }, { "epoch": 0.57996610471858, "grad_norm": 1.039317011833191, "learning_rate": 0.0004032212273468686, "loss": 0.9415, "step": 3251 }, { "epoch": 0.5801445009365801, "grad_norm": 0.9003728628158569, "learning_rate": 0.0004031658600651694, "loss": 1.0614, "step": 3252 }, { "epoch": 0.5803228971545803, "grad_norm": 0.5967848896980286, "learning_rate": 0.0004031104807538896, "loss": 0.8657, "step": 3253 }, { "epoch": 0.5805012933725805, "grad_norm": 0.9425020813941956, "learning_rate": 0.0004030550894173783, "loss": 1.0047, "step": 3254 }, { "epoch": 0.5806796895905807, "grad_norm": 0.7595664262771606, "learning_rate": 0.0004029996860599864, "loss": 0.9598, "step": 3255 }, { "epoch": 0.5808580858085809, "grad_norm": 0.629330039024353, "learning_rate": 0.0004029442706860649, "loss": 1.0411, "step": 3256 }, { "epoch": 0.5810364820265811, "grad_norm": 0.588629961013794, "learning_rate": 0.0004028888432999661, "loss": 1.0998, "step": 3257 }, { "epoch": 0.5812148782445812, "grad_norm": 0.8526011109352112, "learning_rate": 0.0004028334039060434, "loss": 1.2057, "step": 3258 }, { "epoch": 0.5813932744625814, "grad_norm": 1.3286303281784058, "learning_rate": 0.00040277795250865094, "loss": 0.8182, "step": 3259 }, { "epoch": 0.5815716706805816, "grad_norm": 0.985548734664917, "learning_rate": 0.0004027224891121438, "loss": 1.0537, "step": 3260 }, { "epoch": 0.5817500668985818, "grad_norm": 0.6656898856163025, "learning_rate": 0.0004026670137208782, "loss": 0.7234, "step": 3261 }, { "epoch": 0.581928463116582, "grad_norm": 1.2689619064331055, "learning_rate": 0.00040261152633921097, "loss": 1.115, "step": 3262 }, { "epoch": 0.5821068593345821, "grad_norm": 1.3990188837051392, "learning_rate": 0.00040255602697150005, "loss": 1.0482, "step": 3263 }, { "epoch": 0.5822852555525823, "grad_norm": 0.6655217409133911, "learning_rate": 0.00040250051562210456, "loss": 1.0004, "step": 3264 }, { "epoch": 0.5824636517705825, "grad_norm": 0.6590464115142822, "learning_rate": 0.0004024449922953841, "loss": 1.2517, "step": 3265 }, { "epoch": 0.5826420479885827, "grad_norm": 1.0634833574295044, "learning_rate": 0.0004023894569956996, "loss": 1.582, "step": 3266 }, { "epoch": 0.5828204442065829, "grad_norm": 0.7259156703948975, "learning_rate": 0.00040233390972741276, "loss": 1.0507, "step": 3267 }, { "epoch": 0.582998840424583, "grad_norm": 0.5403652191162109, "learning_rate": 0.00040227835049488615, "loss": 0.8231, "step": 3268 }, { "epoch": 0.5831772366425831, "grad_norm": 0.6394296288490295, "learning_rate": 0.0004022227793024834, "loss": 0.9511, "step": 3269 }, { "epoch": 0.5833556328605833, "grad_norm": 0.8414486050605774, "learning_rate": 0.0004021671961545691, "loss": 1.097, "step": 3270 }, { "epoch": 0.5835340290785835, "grad_norm": 1.2080618143081665, "learning_rate": 0.0004021116010555087, "loss": 0.9097, "step": 3271 }, { "epoch": 0.5837124252965837, "grad_norm": 0.6970852613449097, "learning_rate": 0.00040205599400966864, "loss": 0.9379, "step": 3272 }, { "epoch": 0.5838908215145839, "grad_norm": 0.7079569101333618, "learning_rate": 0.00040200037502141617, "loss": 1.0276, "step": 3273 }, { "epoch": 0.584069217732584, "grad_norm": 0.6691045761108398, "learning_rate": 0.0004019447440951197, "loss": 1.1062, "step": 3274 }, { "epoch": 0.5842476139505842, "grad_norm": 0.7754026055335999, "learning_rate": 0.0004018891012351484, "loss": 1.0128, "step": 3275 }, { "epoch": 0.5844260101685844, "grad_norm": 0.774585485458374, "learning_rate": 0.0004018334464458725, "loss": 0.9005, "step": 3276 }, { "epoch": 0.5846044063865846, "grad_norm": 0.6378253698348999, "learning_rate": 0.000401777779731663, "loss": 1.1857, "step": 3277 }, { "epoch": 0.5847828026045848, "grad_norm": 0.6450658440589905, "learning_rate": 0.00040172210109689206, "loss": 0.7742, "step": 3278 }, { "epoch": 0.584961198822585, "grad_norm": 0.6561940908432007, "learning_rate": 0.00040166641054593255, "loss": 1.1851, "step": 3279 }, { "epoch": 0.5851395950405851, "grad_norm": 0.5854855179786682, "learning_rate": 0.0004016107080831584, "loss": 0.9683, "step": 3280 }, { "epoch": 0.5853179912585853, "grad_norm": 0.6039174199104309, "learning_rate": 0.00040155499371294454, "loss": 0.9314, "step": 3281 }, { "epoch": 0.5854963874765855, "grad_norm": 0.8320233225822449, "learning_rate": 0.0004014992674396666, "loss": 0.7973, "step": 3282 }, { "epoch": 0.5856747836945857, "grad_norm": 0.6969453692436218, "learning_rate": 0.00040144352926770147, "loss": 0.9077, "step": 3283 }, { "epoch": 0.5858531799125859, "grad_norm": 0.9570133686065674, "learning_rate": 0.0004013877792014267, "loss": 1.0072, "step": 3284 }, { "epoch": 0.586031576130586, "grad_norm": 0.6976554989814758, "learning_rate": 0.0004013320172452209, "loss": 1.054, "step": 3285 }, { "epoch": 0.5862099723485862, "grad_norm": 0.6149062514305115, "learning_rate": 0.00040127624340346356, "loss": 0.9388, "step": 3286 }, { "epoch": 0.5863883685665864, "grad_norm": 0.517910897731781, "learning_rate": 0.0004012204576805352, "loss": 0.9243, "step": 3287 }, { "epoch": 0.5865667647845866, "grad_norm": 0.5356748700141907, "learning_rate": 0.0004011646600808172, "loss": 0.9906, "step": 3288 }, { "epoch": 0.5867451610025868, "grad_norm": 0.5061875581741333, "learning_rate": 0.00040110885060869173, "loss": 1.0385, "step": 3289 }, { "epoch": 0.586923557220587, "grad_norm": 0.6880626082420349, "learning_rate": 0.00040105302926854224, "loss": 0.8258, "step": 3290 }, { "epoch": 0.5871019534385871, "grad_norm": 0.4615683853626251, "learning_rate": 0.00040099719606475286, "loss": 0.8729, "step": 3291 }, { "epoch": 0.5872803496565873, "grad_norm": 0.7369007468223572, "learning_rate": 0.0004009413510017087, "loss": 0.9377, "step": 3292 }, { "epoch": 0.5874587458745875, "grad_norm": 0.5646438598632812, "learning_rate": 0.0004008854940837957, "loss": 1.1106, "step": 3293 }, { "epoch": 0.5876371420925877, "grad_norm": 0.5093971490859985, "learning_rate": 0.000400829625315401, "loss": 0.9263, "step": 3294 }, { "epoch": 0.5878155383105879, "grad_norm": 0.8065229058265686, "learning_rate": 0.00040077374470091237, "loss": 0.7977, "step": 3295 }, { "epoch": 0.587993934528588, "grad_norm": 1.8008029460906982, "learning_rate": 0.0004007178522447188, "loss": 1.1636, "step": 3296 }, { "epoch": 0.5881723307465881, "grad_norm": 0.5000921487808228, "learning_rate": 0.00040066194795120984, "loss": 1.0567, "step": 3297 }, { "epoch": 0.5883507269645883, "grad_norm": 0.5407942533493042, "learning_rate": 0.0004006060318247764, "loss": 1.1229, "step": 3298 }, { "epoch": 0.5885291231825885, "grad_norm": 0.8123578429222107, "learning_rate": 0.00040055010386981006, "loss": 1.0442, "step": 3299 }, { "epoch": 0.5887075194005887, "grad_norm": 0.5218117237091064, "learning_rate": 0.00040049416409070326, "loss": 0.8096, "step": 3300 }, { "epoch": 0.5888859156185889, "grad_norm": 0.5112317204475403, "learning_rate": 0.0004004382124918497, "loss": 0.9776, "step": 3301 }, { "epoch": 0.589064311836589, "grad_norm": 0.539185643196106, "learning_rate": 0.00040038224907764356, "loss": 1.0191, "step": 3302 }, { "epoch": 0.5892427080545892, "grad_norm": 0.5724341869354248, "learning_rate": 0.0004003262738524804, "loss": 1.1246, "step": 3303 }, { "epoch": 0.5894211042725894, "grad_norm": 0.568626880645752, "learning_rate": 0.00040027028682075626, "loss": 1.0875, "step": 3304 }, { "epoch": 0.5895995004905896, "grad_norm": 0.48848995566368103, "learning_rate": 0.00040021428798686854, "loss": 0.8293, "step": 3305 }, { "epoch": 0.5897778967085898, "grad_norm": 0.5772598385810852, "learning_rate": 0.00040015827735521525, "loss": 0.8439, "step": 3306 }, { "epoch": 0.5899562929265899, "grad_norm": 1.8581256866455078, "learning_rate": 0.0004001022549301955, "loss": 1.0762, "step": 3307 }, { "epoch": 0.5901346891445901, "grad_norm": 0.6715808510780334, "learning_rate": 0.00040004622071620924, "loss": 0.8739, "step": 3308 }, { "epoch": 0.5903130853625903, "grad_norm": 0.5865873694419861, "learning_rate": 0.00039999017471765736, "loss": 1.2157, "step": 3309 }, { "epoch": 0.5904914815805905, "grad_norm": 0.6115093231201172, "learning_rate": 0.0003999341169389417, "loss": 0.9827, "step": 3310 }, { "epoch": 0.5906698777985907, "grad_norm": 1.7687232494354248, "learning_rate": 0.0003998780473844651, "loss": 0.7466, "step": 3311 }, { "epoch": 0.5908482740165909, "grad_norm": 0.5366143584251404, "learning_rate": 0.00039982196605863095, "loss": 0.6974, "step": 3312 }, { "epoch": 0.591026670234591, "grad_norm": 0.48479849100112915, "learning_rate": 0.0003997658729658442, "loss": 0.8932, "step": 3313 }, { "epoch": 0.5912050664525912, "grad_norm": 0.5348507761955261, "learning_rate": 0.0003997097681105103, "loss": 1.0747, "step": 3314 }, { "epoch": 0.5913834626705914, "grad_norm": 0.5859421491622925, "learning_rate": 0.00039965365149703555, "loss": 0.9456, "step": 3315 }, { "epoch": 0.5915618588885916, "grad_norm": 0.5688529014587402, "learning_rate": 0.00039959752312982745, "loss": 0.985, "step": 3316 }, { "epoch": 0.5917402551065918, "grad_norm": 0.5745177865028381, "learning_rate": 0.00039954138301329426, "loss": 1.0356, "step": 3317 }, { "epoch": 0.5919186513245919, "grad_norm": 0.45974186062812805, "learning_rate": 0.00039948523115184516, "loss": 0.8968, "step": 3318 }, { "epoch": 0.5920970475425921, "grad_norm": 0.5509001612663269, "learning_rate": 0.00039942906754989035, "loss": 1.0219, "step": 3319 }, { "epoch": 0.5922754437605923, "grad_norm": 3.9084794521331787, "learning_rate": 0.0003993728922118408, "loss": 1.1745, "step": 3320 }, { "epoch": 0.5924538399785925, "grad_norm": 20.251436233520508, "learning_rate": 0.0003993167051421087, "loss": 0.842, "step": 3321 }, { "epoch": 0.5926322361965927, "grad_norm": 0.7031044363975525, "learning_rate": 0.0003992605063451068, "loss": 0.9488, "step": 3322 }, { "epoch": 0.5928106324145929, "grad_norm": 0.5919948816299438, "learning_rate": 0.00039920429582524896, "loss": 1.0003, "step": 3323 }, { "epoch": 0.592989028632593, "grad_norm": 1.6224722862243652, "learning_rate": 0.00039914807358694995, "loss": 1.2006, "step": 3324 }, { "epoch": 0.5931674248505932, "grad_norm": 0.5909742712974548, "learning_rate": 0.00039909183963462535, "loss": 0.9017, "step": 3325 }, { "epoch": 0.5933458210685933, "grad_norm": 0.5360785126686096, "learning_rate": 0.0003990355939726919, "loss": 0.9806, "step": 3326 }, { "epoch": 0.5935242172865935, "grad_norm": 0.6022999286651611, "learning_rate": 0.00039897933660556703, "loss": 1.1784, "step": 3327 }, { "epoch": 0.5937026135045937, "grad_norm": 0.5530278086662292, "learning_rate": 0.0003989230675376691, "loss": 0.7749, "step": 3328 }, { "epoch": 0.5938810097225938, "grad_norm": 0.48187515139579773, "learning_rate": 0.0003988667867734176, "loss": 0.8854, "step": 3329 }, { "epoch": 0.594059405940594, "grad_norm": 0.5293228030204773, "learning_rate": 0.0003988104943172327, "loss": 0.9786, "step": 3330 }, { "epoch": 0.5942378021585942, "grad_norm": 1.1052613258361816, "learning_rate": 0.00039875419017353564, "loss": 1.1643, "step": 3331 }, { "epoch": 0.5944161983765944, "grad_norm": 0.663966953754425, "learning_rate": 0.00039869787434674853, "loss": 1.2176, "step": 3332 }, { "epoch": 0.5945945945945946, "grad_norm": 0.6256305575370789, "learning_rate": 0.0003986415468412943, "loss": 1.0519, "step": 3333 }, { "epoch": 0.5947729908125948, "grad_norm": 14.08318042755127, "learning_rate": 0.00039858520766159703, "loss": 1.3631, "step": 3334 }, { "epoch": 0.5949513870305949, "grad_norm": 0.6284887790679932, "learning_rate": 0.00039852885681208134, "loss": 0.8746, "step": 3335 }, { "epoch": 0.5951297832485951, "grad_norm": 0.9674341678619385, "learning_rate": 0.00039847249429717326, "loss": 1.2364, "step": 3336 }, { "epoch": 0.5953081794665953, "grad_norm": 0.6534430980682373, "learning_rate": 0.00039841612012129937, "loss": 1.1151, "step": 3337 }, { "epoch": 0.5954865756845955, "grad_norm": 0.5643439888954163, "learning_rate": 0.0003983597342888872, "loss": 0.8678, "step": 3338 }, { "epoch": 0.5956649719025957, "grad_norm": 0.5396862626075745, "learning_rate": 0.0003983033368043654, "loss": 0.9284, "step": 3339 }, { "epoch": 0.5958433681205958, "grad_norm": 0.6234003305435181, "learning_rate": 0.00039824692767216337, "loss": 1.0596, "step": 3340 }, { "epoch": 0.596021764338596, "grad_norm": 0.5749251246452332, "learning_rate": 0.00039819050689671143, "loss": 0.8237, "step": 3341 }, { "epoch": 0.5962001605565962, "grad_norm": 0.5611945390701294, "learning_rate": 0.0003981340744824408, "loss": 0.9064, "step": 3342 }, { "epoch": 0.5963785567745964, "grad_norm": 0.6729212403297424, "learning_rate": 0.0003980776304337838, "loss": 1.1313, "step": 3343 }, { "epoch": 0.5965569529925966, "grad_norm": 0.5482896566390991, "learning_rate": 0.0003980211747551733, "loss": 0.9448, "step": 3344 }, { "epoch": 0.5967353492105968, "grad_norm": 44.00550842285156, "learning_rate": 0.0003979647074510435, "loss": 1.6368, "step": 3345 }, { "epoch": 0.5969137454285969, "grad_norm": 0.5960306525230408, "learning_rate": 0.00039790822852582927, "loss": 1.0672, "step": 3346 }, { "epoch": 0.5970921416465971, "grad_norm": 6.083531856536865, "learning_rate": 0.00039785173798396637, "loss": 1.4715, "step": 3347 }, { "epoch": 0.5972705378645973, "grad_norm": 0.8834306597709656, "learning_rate": 0.00039779523582989163, "loss": 1.1152, "step": 3348 }, { "epoch": 0.5974489340825975, "grad_norm": 1.775227427482605, "learning_rate": 0.0003977387220680427, "loss": 1.1541, "step": 3349 }, { "epoch": 0.5976273303005977, "grad_norm": 0.7069253325462341, "learning_rate": 0.00039768219670285805, "loss": 0.889, "step": 3350 }, { "epoch": 0.5978057265185978, "grad_norm": 1.1338763236999512, "learning_rate": 0.00039762565973877726, "loss": 0.8112, "step": 3351 }, { "epoch": 0.597984122736598, "grad_norm": 1.5388410091400146, "learning_rate": 0.00039756911118024065, "loss": 1.0269, "step": 3352 }, { "epoch": 0.5981625189545982, "grad_norm": 0.7311750650405884, "learning_rate": 0.0003975125510316896, "loss": 0.9498, "step": 3353 }, { "epoch": 0.5983409151725984, "grad_norm": 2.2909646034240723, "learning_rate": 0.0003974559792975663, "loss": 0.9101, "step": 3354 }, { "epoch": 0.5985193113905986, "grad_norm": 0.8901395201683044, "learning_rate": 0.0003973993959823137, "loss": 0.8863, "step": 3355 }, { "epoch": 0.5986977076085988, "grad_norm": 0.6024214029312134, "learning_rate": 0.00039734280109037613, "loss": 1.0322, "step": 3356 }, { "epoch": 0.5988761038265988, "grad_norm": 0.6770859956741333, "learning_rate": 0.0003972861946261983, "loss": 1.1135, "step": 3357 }, { "epoch": 0.599054500044599, "grad_norm": 0.73415607213974, "learning_rate": 0.0003972295765942261, "loss": 0.9596, "step": 3358 }, { "epoch": 0.5992328962625992, "grad_norm": 0.6480002403259277, "learning_rate": 0.00039717294699890627, "loss": 0.9741, "step": 3359 }, { "epoch": 0.5994112924805994, "grad_norm": 1.2426977157592773, "learning_rate": 0.0003971163058446866, "loss": 1.0937, "step": 3360 }, { "epoch": 0.5995896886985996, "grad_norm": 0.8961933851242065, "learning_rate": 0.0003970596531360156, "loss": 0.9281, "step": 3361 }, { "epoch": 0.5997680849165997, "grad_norm": 0.7000132203102112, "learning_rate": 0.00039700298887734273, "loss": 1.0097, "step": 3362 }, { "epoch": 0.5999464811345999, "grad_norm": 1.2038791179656982, "learning_rate": 0.0003969463130731183, "loss": 0.9242, "step": 3363 }, { "epoch": 0.6001248773526001, "grad_norm": 1.661060094833374, "learning_rate": 0.00039688962572779373, "loss": 1.1603, "step": 3364 }, { "epoch": 0.6003032735706003, "grad_norm": 1.0910298824310303, "learning_rate": 0.0003968329268458212, "loss": 0.9369, "step": 3365 }, { "epoch": 0.6004816697886005, "grad_norm": 1.3606114387512207, "learning_rate": 0.00039677621643165363, "loss": 0.9468, "step": 3366 }, { "epoch": 0.6006600660066007, "grad_norm": 1.111076831817627, "learning_rate": 0.0003967194944897453, "loss": 1.1383, "step": 3367 }, { "epoch": 0.6008384622246008, "grad_norm": 1.8882251977920532, "learning_rate": 0.000396662761024551, "loss": 0.8431, "step": 3368 }, { "epoch": 0.601016858442601, "grad_norm": 1.5878911018371582, "learning_rate": 0.0003966060160405266, "loss": 1.0407, "step": 3369 }, { "epoch": 0.6011952546606012, "grad_norm": 0.7476792931556702, "learning_rate": 0.00039654925954212873, "loss": 0.7851, "step": 3370 }, { "epoch": 0.6013736508786014, "grad_norm": 0.8636188507080078, "learning_rate": 0.00039649249153381514, "loss": 1.0617, "step": 3371 }, { "epoch": 0.6015520470966016, "grad_norm": 0.7655645608901978, "learning_rate": 0.00039643571202004426, "loss": 0.9721, "step": 3372 }, { "epoch": 0.6017304433146017, "grad_norm": 0.6459742188453674, "learning_rate": 0.0003963789210052755, "loss": 0.8403, "step": 3373 }, { "epoch": 0.6019088395326019, "grad_norm": 0.7047243714332581, "learning_rate": 0.00039632211849396936, "loss": 1.1643, "step": 3374 }, { "epoch": 0.6020872357506021, "grad_norm": 0.5050498247146606, "learning_rate": 0.000396265304490587, "loss": 0.7855, "step": 3375 }, { "epoch": 0.6022656319686023, "grad_norm": 0.9648879766464233, "learning_rate": 0.0003962084789995906, "loss": 0.9511, "step": 3376 }, { "epoch": 0.6024440281866025, "grad_norm": 0.5397990345954895, "learning_rate": 0.00039615164202544314, "loss": 0.8164, "step": 3377 }, { "epoch": 0.6026224244046027, "grad_norm": 2.0958774089813232, "learning_rate": 0.0003960947935726086, "loss": 0.8944, "step": 3378 }, { "epoch": 0.6028008206226028, "grad_norm": 1.3503490686416626, "learning_rate": 0.00039603793364555184, "loss": 0.8005, "step": 3379 }, { "epoch": 0.602979216840603, "grad_norm": 0.5871055722236633, "learning_rate": 0.00039598106224873866, "loss": 0.9893, "step": 3380 }, { "epoch": 0.6031576130586032, "grad_norm": 1.3168357610702515, "learning_rate": 0.0003959241793866356, "loss": 0.9926, "step": 3381 }, { "epoch": 0.6033360092766034, "grad_norm": 0.8099937438964844, "learning_rate": 0.0003958672850637103, "loss": 0.9152, "step": 3382 }, { "epoch": 0.6035144054946036, "grad_norm": 0.5617466568946838, "learning_rate": 0.0003958103792844313, "loss": 0.9704, "step": 3383 }, { "epoch": 0.6036928017126036, "grad_norm": 0.758649468421936, "learning_rate": 0.00039575346205326776, "loss": 0.869, "step": 3384 }, { "epoch": 0.6038711979306038, "grad_norm": 0.5572230219841003, "learning_rate": 0.0003956965333746901, "loss": 1.0961, "step": 3385 }, { "epoch": 0.604049594148604, "grad_norm": 78.27342224121094, "learning_rate": 0.00039563959325316934, "loss": 1.2738, "step": 3386 }, { "epoch": 0.6042279903666042, "grad_norm": 2.010958194732666, "learning_rate": 0.00039558264169317766, "loss": 0.9795, "step": 3387 }, { "epoch": 0.6044063865846044, "grad_norm": 1.1752885580062866, "learning_rate": 0.0003955256786991879, "loss": 1.334, "step": 3388 }, { "epoch": 0.6045847828026046, "grad_norm": 0.719290018081665, "learning_rate": 0.0003954687042756739, "loss": 0.9268, "step": 3389 }, { "epoch": 0.6047631790206047, "grad_norm": 0.9729273915290833, "learning_rate": 0.00039541171842711063, "loss": 1.1478, "step": 3390 }, { "epoch": 0.6049415752386049, "grad_norm": 0.5412781834602356, "learning_rate": 0.00039535472115797345, "loss": 1.0843, "step": 3391 }, { "epoch": 0.6051199714566051, "grad_norm": 0.6292357444763184, "learning_rate": 0.00039529771247273903, "loss": 1.2036, "step": 3392 }, { "epoch": 0.6052983676746053, "grad_norm": 0.617896556854248, "learning_rate": 0.0003952406923758849, "loss": 1.0214, "step": 3393 }, { "epoch": 0.6054767638926055, "grad_norm": 0.4818667471408844, "learning_rate": 0.00039518366087188924, "loss": 1.0368, "step": 3394 }, { "epoch": 0.6056551601106056, "grad_norm": 0.7148341536521912, "learning_rate": 0.0003951266179652313, "loss": 0.9985, "step": 3395 }, { "epoch": 0.6058335563286058, "grad_norm": 1.9086631536483765, "learning_rate": 0.0003950695636603912, "loss": 1.0418, "step": 3396 }, { "epoch": 0.606011952546606, "grad_norm": 0.5101553201675415, "learning_rate": 0.00039501249796185006, "loss": 0.7728, "step": 3397 }, { "epoch": 0.6061903487646062, "grad_norm": 0.5302074551582336, "learning_rate": 0.00039495542087408976, "loss": 0.8685, "step": 3398 }, { "epoch": 0.6063687449826064, "grad_norm": 0.6329625844955444, "learning_rate": 0.000394898332401593, "loss": 1.0125, "step": 3399 }, { "epoch": 0.6065471412006066, "grad_norm": 0.6048588752746582, "learning_rate": 0.0003948412325488436, "loss": 1.1885, "step": 3400 }, { "epoch": 0.6067255374186067, "grad_norm": 0.5368157029151917, "learning_rate": 0.00039478412132032615, "loss": 1.0989, "step": 3401 }, { "epoch": 0.6069039336366069, "grad_norm": 0.5177137851715088, "learning_rate": 0.0003947269987205261, "loss": 0.9014, "step": 3402 }, { "epoch": 0.6070823298546071, "grad_norm": 0.6155733466148376, "learning_rate": 0.00039466986475392987, "loss": 1.1821, "step": 3403 }, { "epoch": 0.6072607260726073, "grad_norm": 0.49641674757003784, "learning_rate": 0.0003946127194250247, "loss": 1.0431, "step": 3404 }, { "epoch": 0.6074391222906075, "grad_norm": 0.48299992084503174, "learning_rate": 0.00039455556273829877, "loss": 0.8856, "step": 3405 }, { "epoch": 0.6076175185086076, "grad_norm": 0.49870428442955017, "learning_rate": 0.0003944983946982412, "loss": 0.9731, "step": 3406 }, { "epoch": 0.6077959147266078, "grad_norm": 0.47452113032341003, "learning_rate": 0.00039444121530934185, "loss": 0.7636, "step": 3407 }, { "epoch": 0.607974310944608, "grad_norm": 0.5165370106697083, "learning_rate": 0.0003943840245760916, "loss": 1.1063, "step": 3408 }, { "epoch": 0.6081527071626082, "grad_norm": 0.5465608835220337, "learning_rate": 0.00039432682250298225, "loss": 1.1446, "step": 3409 }, { "epoch": 0.6083311033806084, "grad_norm": 0.4796379804611206, "learning_rate": 0.00039426960909450627, "loss": 0.9792, "step": 3410 }, { "epoch": 0.6085094995986086, "grad_norm": 0.48459067940711975, "learning_rate": 0.00039421238435515736, "loss": 1.1648, "step": 3411 }, { "epoch": 0.6086878958166086, "grad_norm": 0.5157718062400818, "learning_rate": 0.00039415514828942976, "loss": 1.0494, "step": 3412 }, { "epoch": 0.6088662920346088, "grad_norm": 0.4796636998653412, "learning_rate": 0.00039409790090181896, "loss": 0.8812, "step": 3413 }, { "epoch": 0.609044688252609, "grad_norm": 0.5595741868019104, "learning_rate": 0.000394040642196821, "loss": 0.8946, "step": 3414 }, { "epoch": 0.6092230844706092, "grad_norm": 0.5820662975311279, "learning_rate": 0.00039398337217893295, "loss": 1.1571, "step": 3415 }, { "epoch": 0.6094014806886094, "grad_norm": 0.4832160770893097, "learning_rate": 0.0003939260908526528, "loss": 0.8372, "step": 3416 }, { "epoch": 0.6095798769066095, "grad_norm": 0.4839080274105072, "learning_rate": 0.00039386879822247945, "loss": 0.8548, "step": 3417 }, { "epoch": 0.6097582731246097, "grad_norm": 0.45255133509635925, "learning_rate": 0.00039381149429291263, "loss": 0.9553, "step": 3418 }, { "epoch": 0.6099366693426099, "grad_norm": 0.4793449342250824, "learning_rate": 0.00039375417906845284, "loss": 0.965, "step": 3419 }, { "epoch": 0.6101150655606101, "grad_norm": 0.4889264404773712, "learning_rate": 0.0003936968525536018, "loss": 1.0507, "step": 3420 }, { "epoch": 0.6102934617786103, "grad_norm": 0.5014956593513489, "learning_rate": 0.00039363951475286164, "loss": 0.9827, "step": 3421 }, { "epoch": 0.6104718579966105, "grad_norm": 0.9582971334457397, "learning_rate": 0.0003935821656707359, "loss": 1.0391, "step": 3422 }, { "epoch": 0.6106502542146106, "grad_norm": 0.5389576554298401, "learning_rate": 0.00039352480531172873, "loss": 1.0455, "step": 3423 }, { "epoch": 0.6108286504326108, "grad_norm": 0.4678865373134613, "learning_rate": 0.000393467433680345, "loss": 0.8876, "step": 3424 }, { "epoch": 0.611007046650611, "grad_norm": 0.581387996673584, "learning_rate": 0.00039341005078109083, "loss": 1.0716, "step": 3425 }, { "epoch": 0.6111854428686112, "grad_norm": 0.47930246591567993, "learning_rate": 0.000393352656618473, "loss": 0.7716, "step": 3426 }, { "epoch": 0.6113638390866114, "grad_norm": 0.5663626790046692, "learning_rate": 0.0003932952511969991, "loss": 0.8454, "step": 3427 }, { "epoch": 0.6115422353046115, "grad_norm": 0.5635462403297424, "learning_rate": 0.0003932378345211779, "loss": 0.9529, "step": 3428 }, { "epoch": 0.6117206315226117, "grad_norm": 0.46473008394241333, "learning_rate": 0.0003931804065955188, "loss": 0.8475, "step": 3429 }, { "epoch": 0.6118990277406119, "grad_norm": 0.5133330225944519, "learning_rate": 0.00039312296742453223, "loss": 0.896, "step": 3430 }, { "epoch": 0.6120774239586121, "grad_norm": 0.5049217343330383, "learning_rate": 0.0003930655170127294, "loss": 1.0923, "step": 3431 }, { "epoch": 0.6122558201766123, "grad_norm": 0.4742034375667572, "learning_rate": 0.00039300805536462237, "loss": 0.7796, "step": 3432 }, { "epoch": 0.6124342163946125, "grad_norm": 0.5486430525779724, "learning_rate": 0.0003929505824847243, "loss": 0.92, "step": 3433 }, { "epoch": 0.6126126126126126, "grad_norm": 0.514842689037323, "learning_rate": 0.00039289309837754895, "loss": 1.0119, "step": 3434 }, { "epoch": 0.6127910088306128, "grad_norm": 0.4459700584411621, "learning_rate": 0.000392835603047611, "loss": 0.6496, "step": 3435 }, { "epoch": 0.612969405048613, "grad_norm": 0.525751531124115, "learning_rate": 0.00039277809649942644, "loss": 0.8839, "step": 3436 }, { "epoch": 0.6131478012666132, "grad_norm": 0.5619456171989441, "learning_rate": 0.0003927205787375115, "loss": 0.8713, "step": 3437 }, { "epoch": 0.6133261974846134, "grad_norm": 0.4977574348449707, "learning_rate": 0.0003926630497663839, "loss": 0.9844, "step": 3438 }, { "epoch": 0.6135045937026135, "grad_norm": 0.5254572033882141, "learning_rate": 0.0003926055095905616, "loss": 1.085, "step": 3439 }, { "epoch": 0.6136829899206137, "grad_norm": 0.4378454089164734, "learning_rate": 0.000392547958214564, "loss": 0.7315, "step": 3440 }, { "epoch": 0.6138613861386139, "grad_norm": 0.5086297392845154, "learning_rate": 0.0003924903956429111, "loss": 1.0405, "step": 3441 }, { "epoch": 0.614039782356614, "grad_norm": 0.47228503227233887, "learning_rate": 0.00039243282188012387, "loss": 0.8687, "step": 3442 }, { "epoch": 0.6142181785746142, "grad_norm": 0.46949103474617004, "learning_rate": 0.0003923752369307241, "loss": 0.7208, "step": 3443 }, { "epoch": 0.6143965747926144, "grad_norm": 0.5485444664955139, "learning_rate": 0.00039231764079923447, "loss": 1.071, "step": 3444 }, { "epoch": 0.6145749710106145, "grad_norm": 0.610989511013031, "learning_rate": 0.0003922600334901786, "loss": 0.989, "step": 3445 }, { "epoch": 0.6147533672286147, "grad_norm": 0.48183125257492065, "learning_rate": 0.0003922024150080808, "loss": 0.9965, "step": 3446 }, { "epoch": 0.6149317634466149, "grad_norm": 0.540808379650116, "learning_rate": 0.00039214478535746665, "loss": 1.0388, "step": 3447 }, { "epoch": 0.6151101596646151, "grad_norm": 0.4990650415420532, "learning_rate": 0.0003920871445428622, "loss": 1.1055, "step": 3448 }, { "epoch": 0.6152885558826153, "grad_norm": 0.5104119181632996, "learning_rate": 0.00039202949256879463, "loss": 0.9729, "step": 3449 }, { "epoch": 0.6154669521006154, "grad_norm": 0.48018768429756165, "learning_rate": 0.0003919718294397917, "loss": 0.9862, "step": 3450 }, { "epoch": 0.6156453483186156, "grad_norm": 0.48195213079452515, "learning_rate": 0.0003919141551603824, "loss": 1.0078, "step": 3451 }, { "epoch": 0.6158237445366158, "grad_norm": 0.4913448095321655, "learning_rate": 0.0003918564697350965, "loss": 0.8028, "step": 3452 }, { "epoch": 0.616002140754616, "grad_norm": 1.1352670192718506, "learning_rate": 0.00039179877316846453, "loss": 1.019, "step": 3453 }, { "epoch": 0.6161805369726162, "grad_norm": 0.5746923685073853, "learning_rate": 0.0003917410654650179, "loss": 0.8682, "step": 3454 }, { "epoch": 0.6163589331906164, "grad_norm": 0.7708411812782288, "learning_rate": 0.00039168334662928895, "loss": 1.3231, "step": 3455 }, { "epoch": 0.6165373294086165, "grad_norm": 0.794039249420166, "learning_rate": 0.00039162561666581096, "loss": 0.8659, "step": 3456 }, { "epoch": 0.6167157256266167, "grad_norm": 0.5135617256164551, "learning_rate": 0.000391567875579118, "loss": 1.0136, "step": 3457 }, { "epoch": 0.6168941218446169, "grad_norm": 0.47095632553100586, "learning_rate": 0.00039151012337374495, "loss": 0.8962, "step": 3458 }, { "epoch": 0.6170725180626171, "grad_norm": 0.5985879302024841, "learning_rate": 0.0003914523600542277, "loss": 1.0213, "step": 3459 }, { "epoch": 0.6172509142806173, "grad_norm": 0.6508655548095703, "learning_rate": 0.000391394585625103, "loss": 1.2593, "step": 3460 }, { "epoch": 0.6174293104986174, "grad_norm": 0.5690905451774597, "learning_rate": 0.00039133680009090845, "loss": 0.9674, "step": 3461 }, { "epoch": 0.6176077067166176, "grad_norm": 0.610589325428009, "learning_rate": 0.0003912790034561824, "loss": 0.8924, "step": 3462 }, { "epoch": 0.6177861029346178, "grad_norm": 0.7118551135063171, "learning_rate": 0.00039122119572546424, "loss": 1.1188, "step": 3463 }, { "epoch": 0.617964499152618, "grad_norm": 0.4738007187843323, "learning_rate": 0.0003911633769032941, "loss": 0.8131, "step": 3464 }, { "epoch": 0.6181428953706182, "grad_norm": 0.5291326642036438, "learning_rate": 0.000391105546994213, "loss": 0.8803, "step": 3465 }, { "epoch": 0.6183212915886184, "grad_norm": 17.467649459838867, "learning_rate": 0.0003910477060027631, "loss": 1.0413, "step": 3466 }, { "epoch": 0.6184996878066185, "grad_norm": 0.5017330646514893, "learning_rate": 0.00039098985393348697, "loss": 0.7768, "step": 3467 }, { "epoch": 0.6186780840246187, "grad_norm": 0.49945586919784546, "learning_rate": 0.00039093199079092843, "loss": 0.8372, "step": 3468 }, { "epoch": 0.6188564802426189, "grad_norm": 0.5586909651756287, "learning_rate": 0.000390874116579632, "loss": 0.8797, "step": 3469 }, { "epoch": 0.619034876460619, "grad_norm": 0.5252298712730408, "learning_rate": 0.0003908162313041431, "loss": 0.917, "step": 3470 }, { "epoch": 0.6192132726786193, "grad_norm": 0.48841506242752075, "learning_rate": 0.00039075833496900794, "loss": 0.8785, "step": 3471 }, { "epoch": 0.6193916688966193, "grad_norm": 0.4840584099292755, "learning_rate": 0.0003907004275787737, "loss": 0.9556, "step": 3472 }, { "epoch": 0.6195700651146195, "grad_norm": 0.47886261343955994, "learning_rate": 0.0003906425091379885, "loss": 1.1327, "step": 3473 }, { "epoch": 0.6197484613326197, "grad_norm": 0.51800936460495, "learning_rate": 0.0003905845796512011, "loss": 0.8071, "step": 3474 }, { "epoch": 0.6199268575506199, "grad_norm": 0.5068585872650146, "learning_rate": 0.00039052663912296135, "loss": 0.959, "step": 3475 }, { "epoch": 0.6201052537686201, "grad_norm": 0.4845023453235626, "learning_rate": 0.00039046868755781986, "loss": 0.8652, "step": 3476 }, { "epoch": 0.6202836499866203, "grad_norm": 0.4877776503562927, "learning_rate": 0.00039041072496032804, "loss": 1.0887, "step": 3477 }, { "epoch": 0.6204620462046204, "grad_norm": 0.4742729663848877, "learning_rate": 0.0003903527513350383, "loss": 1.0202, "step": 3478 }, { "epoch": 0.6206404424226206, "grad_norm": 0.4939587712287903, "learning_rate": 0.0003902947666865039, "loss": 0.8183, "step": 3479 }, { "epoch": 0.6208188386406208, "grad_norm": 0.4751134216785431, "learning_rate": 0.0003902367710192789, "loss": 1.1003, "step": 3480 }, { "epoch": 0.620997234858621, "grad_norm": 0.45981040596961975, "learning_rate": 0.00039017876433791824, "loss": 0.8677, "step": 3481 }, { "epoch": 0.6211756310766212, "grad_norm": 0.5187476873397827, "learning_rate": 0.00039012074664697774, "loss": 0.8187, "step": 3482 }, { "epoch": 0.6213540272946213, "grad_norm": 0.4452672600746155, "learning_rate": 0.0003900627179510141, "loss": 0.8628, "step": 3483 }, { "epoch": 0.6215324235126215, "grad_norm": 0.5037822127342224, "learning_rate": 0.0003900046782545849, "loss": 0.9356, "step": 3484 }, { "epoch": 0.6217108197306217, "grad_norm": 0.4531655013561249, "learning_rate": 0.00038994662756224843, "loss": 0.8167, "step": 3485 }, { "epoch": 0.6218892159486219, "grad_norm": 0.5035321116447449, "learning_rate": 0.00038988856587856413, "loss": 0.8075, "step": 3486 }, { "epoch": 0.6220676121666221, "grad_norm": 0.4611433148384094, "learning_rate": 0.00038983049320809207, "loss": 1.1105, "step": 3487 }, { "epoch": 0.6222460083846223, "grad_norm": 0.48017269372940063, "learning_rate": 0.00038977240955539316, "loss": 0.8857, "step": 3488 }, { "epoch": 0.6224244046026224, "grad_norm": 0.5595654845237732, "learning_rate": 0.0003897143149250295, "loss": 0.8854, "step": 3489 }, { "epoch": 0.6226028008206226, "grad_norm": 0.5068066716194153, "learning_rate": 0.00038965620932156355, "loss": 0.9231, "step": 3490 }, { "epoch": 0.6227811970386228, "grad_norm": 0.5294225215911865, "learning_rate": 0.00038959809274955907, "loss": 0.9997, "step": 3491 }, { "epoch": 0.622959593256623, "grad_norm": 0.5624359846115112, "learning_rate": 0.0003895399652135805, "loss": 0.9754, "step": 3492 }, { "epoch": 0.6231379894746232, "grad_norm": 0.4356614053249359, "learning_rate": 0.00038948182671819304, "loss": 0.7682, "step": 3493 }, { "epoch": 0.6233163856926233, "grad_norm": 0.5377020239830017, "learning_rate": 0.00038942367726796297, "loss": 1.2892, "step": 3494 }, { "epoch": 0.6234947819106235, "grad_norm": 0.5128370523452759, "learning_rate": 0.0003893655168674572, "loss": 0.8542, "step": 3495 }, { "epoch": 0.6236731781286237, "grad_norm": 0.5239914059638977, "learning_rate": 0.0003893073455212438, "loss": 0.922, "step": 3496 }, { "epoch": 0.6238515743466239, "grad_norm": 0.5009093284606934, "learning_rate": 0.00038924916323389145, "loss": 1.2072, "step": 3497 }, { "epoch": 0.6240299705646241, "grad_norm": 0.5097877383232117, "learning_rate": 0.00038919097000996973, "loss": 1.0078, "step": 3498 }, { "epoch": 0.6242083667826243, "grad_norm": 0.5235947370529175, "learning_rate": 0.0003891327658540491, "loss": 1.0455, "step": 3499 }, { "epoch": 0.6243867630006243, "grad_norm": 0.4533289670944214, "learning_rate": 0.00038907455077070085, "loss": 0.7731, "step": 3500 }, { "epoch": 0.6245651592186245, "grad_norm": 0.5486941337585449, "learning_rate": 0.0003890163247644973, "loss": 0.9088, "step": 3501 }, { "epoch": 0.6247435554366247, "grad_norm": 0.5437402129173279, "learning_rate": 0.0003889580878400115, "loss": 0.9288, "step": 3502 }, { "epoch": 0.6249219516546249, "grad_norm": 0.49082422256469727, "learning_rate": 0.00038889984000181724, "loss": 0.8177, "step": 3503 }, { "epoch": 0.6251003478726251, "grad_norm": 0.7093980312347412, "learning_rate": 0.0003888415812544892, "loss": 1.1146, "step": 3504 }, { "epoch": 0.6252787440906252, "grad_norm": 0.45868930220603943, "learning_rate": 0.00038878331160260317, "loss": 0.9161, "step": 3505 }, { "epoch": 0.6254571403086254, "grad_norm": 0.527919590473175, "learning_rate": 0.00038872503105073563, "loss": 1.0866, "step": 3506 }, { "epoch": 0.6256355365266256, "grad_norm": 0.5056113600730896, "learning_rate": 0.0003886667396034638, "loss": 0.8475, "step": 3507 }, { "epoch": 0.6258139327446258, "grad_norm": 0.46975040435791016, "learning_rate": 0.00038860843726536593, "loss": 0.8076, "step": 3508 }, { "epoch": 0.625992328962626, "grad_norm": 0.5507307052612305, "learning_rate": 0.00038855012404102104, "loss": 0.9643, "step": 3509 }, { "epoch": 0.6261707251806262, "grad_norm": 0.6463258862495422, "learning_rate": 0.00038849179993500905, "loss": 0.9585, "step": 3510 }, { "epoch": 0.6263491213986263, "grad_norm": 0.5084063410758972, "learning_rate": 0.0003884334649519106, "loss": 1.1892, "step": 3511 }, { "epoch": 0.6265275176166265, "grad_norm": 0.5068933367729187, "learning_rate": 0.0003883751190963075, "loss": 0.8802, "step": 3512 }, { "epoch": 0.6267059138346267, "grad_norm": 0.5390312075614929, "learning_rate": 0.0003883167623727821, "loss": 1.0255, "step": 3513 }, { "epoch": 0.6268843100526269, "grad_norm": 0.5518016815185547, "learning_rate": 0.0003882583947859176, "loss": 1.0363, "step": 3514 }, { "epoch": 0.6270627062706271, "grad_norm": 0.435151606798172, "learning_rate": 0.0003882000163402983, "loss": 0.8027, "step": 3515 }, { "epoch": 0.6272411024886272, "grad_norm": 0.4693804979324341, "learning_rate": 0.00038814162704050925, "loss": 0.7981, "step": 3516 }, { "epoch": 0.6274194987066274, "grad_norm": 0.44543004035949707, "learning_rate": 0.0003880832268911363, "loss": 0.7381, "step": 3517 }, { "epoch": 0.6275978949246276, "grad_norm": 0.47917303442955017, "learning_rate": 0.00038802481589676605, "loss": 0.8535, "step": 3518 }, { "epoch": 0.6277762911426278, "grad_norm": 0.5061355233192444, "learning_rate": 0.0003879663940619861, "loss": 0.9866, "step": 3519 }, { "epoch": 0.627954687360628, "grad_norm": 0.5138995051383972, "learning_rate": 0.00038790796139138506, "loss": 0.9151, "step": 3520 }, { "epoch": 0.6281330835786282, "grad_norm": 0.5253726840019226, "learning_rate": 0.000387849517889552, "loss": 1.0164, "step": 3521 }, { "epoch": 0.6283114797966283, "grad_norm": 0.4949781000614166, "learning_rate": 0.00038779106356107715, "loss": 1.0074, "step": 3522 }, { "epoch": 0.6284898760146285, "grad_norm": 0.49461349844932556, "learning_rate": 0.0003877325984105514, "loss": 0.9045, "step": 3523 }, { "epoch": 0.6286682722326287, "grad_norm": 0.4767039716243744, "learning_rate": 0.00038767412244256673, "loss": 0.7782, "step": 3524 }, { "epoch": 0.6288466684506289, "grad_norm": 0.5934154391288757, "learning_rate": 0.00038761563566171576, "loss": 1.2435, "step": 3525 }, { "epoch": 0.6290250646686291, "grad_norm": 0.5259369611740112, "learning_rate": 0.00038755713807259184, "loss": 0.9742, "step": 3526 }, { "epoch": 0.6292034608866292, "grad_norm": 0.44396549463272095, "learning_rate": 0.0003874986296797896, "loss": 0.7857, "step": 3527 }, { "epoch": 0.6293818571046293, "grad_norm": 0.5298401713371277, "learning_rate": 0.0003874401104879041, "loss": 0.9236, "step": 3528 }, { "epoch": 0.6295602533226295, "grad_norm": 0.5219202637672424, "learning_rate": 0.00038738158050153157, "loss": 1.0415, "step": 3529 }, { "epoch": 0.6297386495406297, "grad_norm": 0.5267150402069092, "learning_rate": 0.0003873230397252687, "loss": 0.9082, "step": 3530 }, { "epoch": 0.62991704575863, "grad_norm": 0.5192800164222717, "learning_rate": 0.0003872644881637135, "loss": 0.8653, "step": 3531 }, { "epoch": 0.6300954419766301, "grad_norm": 0.5463857054710388, "learning_rate": 0.0003872059258214644, "loss": 0.9292, "step": 3532 }, { "epoch": 0.6302738381946302, "grad_norm": 0.659090518951416, "learning_rate": 0.00038714735270312095, "loss": 0.8808, "step": 3533 }, { "epoch": 0.6304522344126304, "grad_norm": 0.5042756795883179, "learning_rate": 0.0003870887688132834, "loss": 0.8481, "step": 3534 }, { "epoch": 0.6306306306306306, "grad_norm": 0.4895661771297455, "learning_rate": 0.00038703017415655296, "loss": 0.9983, "step": 3535 }, { "epoch": 0.6308090268486308, "grad_norm": 0.46938470005989075, "learning_rate": 0.00038697156873753163, "loss": 0.9572, "step": 3536 }, { "epoch": 0.630987423066631, "grad_norm": 0.4477916359901428, "learning_rate": 0.00038691295256082227, "loss": 0.798, "step": 3537 }, { "epoch": 0.6311658192846311, "grad_norm": 0.5256043076515198, "learning_rate": 0.0003868543256310284, "loss": 0.9967, "step": 3538 }, { "epoch": 0.6313442155026313, "grad_norm": 0.5086610913276672, "learning_rate": 0.0003867956879527548, "loss": 1.1852, "step": 3539 }, { "epoch": 0.6315226117206315, "grad_norm": 0.47662028670310974, "learning_rate": 0.00038673703953060677, "loss": 0.9776, "step": 3540 }, { "epoch": 0.6317010079386317, "grad_norm": 0.461160272359848, "learning_rate": 0.00038667838036919046, "loss": 0.8206, "step": 3541 }, { "epoch": 0.6318794041566319, "grad_norm": 0.5197997093200684, "learning_rate": 0.0003866197104731129, "loss": 0.9646, "step": 3542 }, { "epoch": 0.6320578003746321, "grad_norm": 0.48244959115982056, "learning_rate": 0.0003865610298469821, "loss": 0.7589, "step": 3543 }, { "epoch": 0.6322361965926322, "grad_norm": 0.4731544852256775, "learning_rate": 0.00038650233849540683, "loss": 0.9059, "step": 3544 }, { "epoch": 0.6324145928106324, "grad_norm": 0.520389199256897, "learning_rate": 0.00038644363642299665, "loss": 1.0436, "step": 3545 }, { "epoch": 0.6325929890286326, "grad_norm": 0.49500808119773865, "learning_rate": 0.00038638492363436195, "loss": 0.9817, "step": 3546 }, { "epoch": 0.6327713852466328, "grad_norm": 0.4835432469844818, "learning_rate": 0.000386326200134114, "loss": 0.9696, "step": 3547 }, { "epoch": 0.632949781464633, "grad_norm": 0.45955875515937805, "learning_rate": 0.000386267465926865, "loss": 0.9491, "step": 3548 }, { "epoch": 0.6331281776826331, "grad_norm": 0.5106682777404785, "learning_rate": 0.00038620872101722783, "loss": 0.9027, "step": 3549 }, { "epoch": 0.6333065739006333, "grad_norm": 0.5530784130096436, "learning_rate": 0.0003861499654098164, "loss": 1.1835, "step": 3550 }, { "epoch": 0.6334849701186335, "grad_norm": 0.45463332533836365, "learning_rate": 0.0003860911991092452, "loss": 0.8381, "step": 3551 }, { "epoch": 0.6336633663366337, "grad_norm": 0.5137251615524292, "learning_rate": 0.0003860324221201298, "loss": 0.9746, "step": 3552 }, { "epoch": 0.6338417625546339, "grad_norm": 0.4892425835132599, "learning_rate": 0.00038597363444708657, "loss": 1.0123, "step": 3553 }, { "epoch": 0.6340201587726341, "grad_norm": 0.482229083776474, "learning_rate": 0.00038591483609473257, "loss": 0.796, "step": 3554 }, { "epoch": 0.6341985549906342, "grad_norm": 0.44259580969810486, "learning_rate": 0.0003858560270676858, "loss": 1.0132, "step": 3555 }, { "epoch": 0.6343769512086344, "grad_norm": 0.5081446170806885, "learning_rate": 0.00038579720737056517, "loss": 0.9065, "step": 3556 }, { "epoch": 0.6345553474266346, "grad_norm": 0.4761013090610504, "learning_rate": 0.0003857383770079902, "loss": 0.9184, "step": 3557 }, { "epoch": 0.6347337436446348, "grad_norm": 0.5290868282318115, "learning_rate": 0.00038567953598458163, "loss": 0.9764, "step": 3558 }, { "epoch": 0.634912139862635, "grad_norm": 0.5481404066085815, "learning_rate": 0.00038562068430496066, "loss": 0.9443, "step": 3559 }, { "epoch": 0.635090536080635, "grad_norm": 0.48112472891807556, "learning_rate": 0.00038556182197374957, "loss": 1.0576, "step": 3560 }, { "epoch": 0.6352689322986352, "grad_norm": 0.46322664618492126, "learning_rate": 0.0003855029489955713, "loss": 0.8526, "step": 3561 }, { "epoch": 0.6354473285166354, "grad_norm": 0.501327633857727, "learning_rate": 0.0003854440653750496, "loss": 1.0425, "step": 3562 }, { "epoch": 0.6356257247346356, "grad_norm": 0.9595483541488647, "learning_rate": 0.0003853851711168094, "loss": 0.73, "step": 3563 }, { "epoch": 0.6358041209526358, "grad_norm": 0.44412168860435486, "learning_rate": 0.00038532626622547614, "loss": 0.8409, "step": 3564 }, { "epoch": 0.635982517170636, "grad_norm": 0.5374020338058472, "learning_rate": 0.0003852673507056761, "loss": 1.0608, "step": 3565 }, { "epoch": 0.6361609133886361, "grad_norm": 0.5410022735595703, "learning_rate": 0.0003852084245620365, "loss": 1.0212, "step": 3566 }, { "epoch": 0.6363393096066363, "grad_norm": 0.46647214889526367, "learning_rate": 0.0003851494877991856, "loss": 0.9998, "step": 3567 }, { "epoch": 0.6365177058246365, "grad_norm": 0.49088791012763977, "learning_rate": 0.0003850905404217519, "loss": 1.0369, "step": 3568 }, { "epoch": 0.6366961020426367, "grad_norm": 0.5109038352966309, "learning_rate": 0.00038503158243436537, "loss": 1.1344, "step": 3569 }, { "epoch": 0.6368744982606369, "grad_norm": 0.5097025036811829, "learning_rate": 0.0003849726138416565, "loss": 0.9358, "step": 3570 }, { "epoch": 0.637052894478637, "grad_norm": 0.4957421123981476, "learning_rate": 0.00038491363464825655, "loss": 1.1196, "step": 3571 }, { "epoch": 0.6372312906966372, "grad_norm": 0.5967016220092773, "learning_rate": 0.00038485464485879783, "loss": 1.1693, "step": 3572 }, { "epoch": 0.6374096869146374, "grad_norm": 0.4782984256744385, "learning_rate": 0.0003847956444779133, "loss": 1.0175, "step": 3573 }, { "epoch": 0.6375880831326376, "grad_norm": 0.4927447438240051, "learning_rate": 0.0003847366335102369, "loss": 1.0854, "step": 3574 }, { "epoch": 0.6377664793506378, "grad_norm": 0.4766659140586853, "learning_rate": 0.0003846776119604033, "loss": 0.8279, "step": 3575 }, { "epoch": 0.637944875568638, "grad_norm": 0.46618908643722534, "learning_rate": 0.000384618579833048, "loss": 0.8925, "step": 3576 }, { "epoch": 0.6381232717866381, "grad_norm": 0.48249977827072144, "learning_rate": 0.0003845595371328074, "loss": 0.7996, "step": 3577 }, { "epoch": 0.6383016680046383, "grad_norm": 0.463788777589798, "learning_rate": 0.0003845004838643186, "loss": 0.8095, "step": 3578 }, { "epoch": 0.6384800642226385, "grad_norm": 0.5117577910423279, "learning_rate": 0.0003844414200322197, "loss": 0.939, "step": 3579 }, { "epoch": 0.6386584604406387, "grad_norm": 1.0510876178741455, "learning_rate": 0.0003843823456411495, "loss": 0.9856, "step": 3580 }, { "epoch": 0.6388368566586389, "grad_norm": 0.4879765808582306, "learning_rate": 0.00038432326069574776, "loss": 0.8623, "step": 3581 }, { "epoch": 0.639015252876639, "grad_norm": 0.5112807154655457, "learning_rate": 0.0003842641652006549, "loss": 0.7826, "step": 3582 }, { "epoch": 0.6391936490946392, "grad_norm": 0.49718165397644043, "learning_rate": 0.0003842050591605122, "loss": 0.9188, "step": 3583 }, { "epoch": 0.6393720453126394, "grad_norm": 0.4578002989292145, "learning_rate": 0.00038414594257996207, "loss": 0.8357, "step": 3584 }, { "epoch": 0.6395504415306396, "grad_norm": 0.5022866725921631, "learning_rate": 0.0003840868154636472, "loss": 0.9341, "step": 3585 }, { "epoch": 0.6397288377486398, "grad_norm": 0.5059497356414795, "learning_rate": 0.00038402767781621163, "loss": 0.9208, "step": 3586 }, { "epoch": 0.63990723396664, "grad_norm": 0.46369996666908264, "learning_rate": 0.0003839685296422999, "loss": 1.0211, "step": 3587 }, { "epoch": 0.64008563018464, "grad_norm": 0.46570396423339844, "learning_rate": 0.0003839093709465574, "loss": 0.7812, "step": 3588 }, { "epoch": 0.6402640264026402, "grad_norm": 0.5327849388122559, "learning_rate": 0.00038385020173363065, "loss": 0.868, "step": 3589 }, { "epoch": 0.6404424226206404, "grad_norm": 0.5208088755607605, "learning_rate": 0.0003837910220081667, "loss": 1.0375, "step": 3590 }, { "epoch": 0.6406208188386406, "grad_norm": 0.6217350363731384, "learning_rate": 0.00038373183177481336, "loss": 0.9972, "step": 3591 }, { "epoch": 0.6407992150566408, "grad_norm": 0.5081048607826233, "learning_rate": 0.00038367263103821956, "loss": 0.9218, "step": 3592 }, { "epoch": 0.6409776112746409, "grad_norm": 0.540647566318512, "learning_rate": 0.00038361341980303477, "loss": 0.838, "step": 3593 }, { "epoch": 0.6411560074926411, "grad_norm": 0.4405638873577118, "learning_rate": 0.0003835541980739096, "loss": 0.9328, "step": 3594 }, { "epoch": 0.6413344037106413, "grad_norm": 0.501441240310669, "learning_rate": 0.00038349496585549504, "loss": 0.9606, "step": 3595 }, { "epoch": 0.6415127999286415, "grad_norm": 0.6081332564353943, "learning_rate": 0.00038343572315244337, "loss": 1.032, "step": 3596 }, { "epoch": 0.6416911961466417, "grad_norm": 0.5091875791549683, "learning_rate": 0.00038337646996940746, "loss": 0.7836, "step": 3597 }, { "epoch": 0.6418695923646419, "grad_norm": 0.48131605982780457, "learning_rate": 0.00038331720631104094, "loss": 0.8946, "step": 3598 }, { "epoch": 0.642047988582642, "grad_norm": 1.0239105224609375, "learning_rate": 0.00038325793218199844, "loss": 0.7923, "step": 3599 }, { "epoch": 0.6422263848006422, "grad_norm": 0.5322194695472717, "learning_rate": 0.00038319864758693537, "loss": 1.0488, "step": 3600 }, { "epoch": 0.6424047810186424, "grad_norm": 0.47548213601112366, "learning_rate": 0.00038313935253050767, "loss": 1.0053, "step": 3601 }, { "epoch": 0.6425831772366426, "grad_norm": 0.5237056016921997, "learning_rate": 0.00038308004701737263, "loss": 1.1428, "step": 3602 }, { "epoch": 0.6427615734546428, "grad_norm": 0.6333736181259155, "learning_rate": 0.00038302073105218794, "loss": 1.0623, "step": 3603 }, { "epoch": 0.6429399696726429, "grad_norm": 0.49249467253685, "learning_rate": 0.00038296140463961226, "loss": 1.0911, "step": 3604 }, { "epoch": 0.6431183658906431, "grad_norm": 0.479319304227829, "learning_rate": 0.00038290206778430515, "loss": 0.8087, "step": 3605 }, { "epoch": 0.6432967621086433, "grad_norm": 0.5194647312164307, "learning_rate": 0.00038284272049092673, "loss": 0.8521, "step": 3606 }, { "epoch": 0.6434751583266435, "grad_norm": 0.47771453857421875, "learning_rate": 0.00038278336276413827, "loss": 1.0094, "step": 3607 }, { "epoch": 0.6436535545446437, "grad_norm": 1.2369436025619507, "learning_rate": 0.00038272399460860166, "loss": 1.007, "step": 3608 }, { "epoch": 0.6438319507626439, "grad_norm": 0.4876594841480255, "learning_rate": 0.00038266461602897957, "loss": 1.0556, "step": 3609 }, { "epoch": 0.644010346980644, "grad_norm": 0.5744562149047852, "learning_rate": 0.0003826052270299356, "loss": 1.0985, "step": 3610 }, { "epoch": 0.6441887431986442, "grad_norm": 0.4735945165157318, "learning_rate": 0.00038254582761613424, "loss": 1.0116, "step": 3611 }, { "epoch": 0.6443671394166444, "grad_norm": 0.5286600589752197, "learning_rate": 0.0003824864177922406, "loss": 1.1372, "step": 3612 }, { "epoch": 0.6445455356346446, "grad_norm": 0.5075108408927917, "learning_rate": 0.0003824269975629207, "loss": 1.0383, "step": 3613 }, { "epoch": 0.6447239318526448, "grad_norm": 0.5046312212944031, "learning_rate": 0.00038236756693284143, "loss": 1.0341, "step": 3614 }, { "epoch": 0.6449023280706448, "grad_norm": 0.49919357895851135, "learning_rate": 0.00038230812590667044, "loss": 1.0578, "step": 3615 }, { "epoch": 0.645080724288645, "grad_norm": 0.4575040340423584, "learning_rate": 0.0003822486744890761, "loss": 0.7656, "step": 3616 }, { "epoch": 0.6452591205066452, "grad_norm": 0.4908180236816406, "learning_rate": 0.00038218921268472786, "loss": 0.7803, "step": 3617 }, { "epoch": 0.6454375167246454, "grad_norm": 0.4667602479457855, "learning_rate": 0.00038212974049829564, "loss": 0.9217, "step": 3618 }, { "epoch": 0.6456159129426456, "grad_norm": 0.49208131432533264, "learning_rate": 0.00038207025793445047, "loss": 1.3218, "step": 3619 }, { "epoch": 0.6457943091606458, "grad_norm": 0.47899407148361206, "learning_rate": 0.0003820107649978641, "loss": 0.9374, "step": 3620 }, { "epoch": 0.6459727053786459, "grad_norm": 0.5173959136009216, "learning_rate": 0.00038195126169320915, "loss": 1.1439, "step": 3621 }, { "epoch": 0.6461511015966461, "grad_norm": 1.7555327415466309, "learning_rate": 0.00038189174802515883, "loss": 1.1495, "step": 3622 }, { "epoch": 0.6463294978146463, "grad_norm": 0.5820402503013611, "learning_rate": 0.0003818322239983873, "loss": 0.9644, "step": 3623 }, { "epoch": 0.6465078940326465, "grad_norm": 0.5061475038528442, "learning_rate": 0.0003817726896175697, "loss": 1.0867, "step": 3624 }, { "epoch": 0.6466862902506467, "grad_norm": 0.5175919532775879, "learning_rate": 0.00038171314488738176, "loss": 0.9003, "step": 3625 }, { "epoch": 0.6468646864686468, "grad_norm": 0.5276834964752197, "learning_rate": 0.0003816535898125001, "loss": 1.0232, "step": 3626 }, { "epoch": 0.647043082686647, "grad_norm": 0.4682513177394867, "learning_rate": 0.0003815940243976022, "loss": 0.8202, "step": 3627 }, { "epoch": 0.6472214789046472, "grad_norm": 0.5589001774787903, "learning_rate": 0.00038153444864736616, "loss": 0.9364, "step": 3628 }, { "epoch": 0.6473998751226474, "grad_norm": 0.5058655738830566, "learning_rate": 0.00038147486256647113, "loss": 1.0972, "step": 3629 }, { "epoch": 0.6475782713406476, "grad_norm": 0.44797369837760925, "learning_rate": 0.0003814152661595971, "loss": 0.8263, "step": 3630 }, { "epoch": 0.6477566675586478, "grad_norm": 0.5152768492698669, "learning_rate": 0.00038135565943142445, "loss": 0.9512, "step": 3631 }, { "epoch": 0.6479350637766479, "grad_norm": 0.4588833451271057, "learning_rate": 0.00038129604238663494, "loss": 0.8475, "step": 3632 }, { "epoch": 0.6481134599946481, "grad_norm": 0.5147669911384583, "learning_rate": 0.00038123641502991074, "loss": 0.9934, "step": 3633 }, { "epoch": 0.6482918562126483, "grad_norm": 0.46724918484687805, "learning_rate": 0.0003811767773659349, "loss": 0.7732, "step": 3634 }, { "epoch": 0.6484702524306485, "grad_norm": 0.6038320064544678, "learning_rate": 0.00038111712939939153, "loss": 1.3013, "step": 3635 }, { "epoch": 0.6486486486486487, "grad_norm": 0.4923225939273834, "learning_rate": 0.0003810574711349652, "loss": 1.205, "step": 3636 }, { "epoch": 0.6488270448666488, "grad_norm": 0.4713652431964874, "learning_rate": 0.0003809978025773415, "loss": 0.8677, "step": 3637 }, { "epoch": 0.649005441084649, "grad_norm": 0.4485718607902527, "learning_rate": 0.00038093812373120675, "loss": 0.7994, "step": 3638 }, { "epoch": 0.6491838373026492, "grad_norm": 0.5080364942550659, "learning_rate": 0.00038087843460124813, "loss": 0.9032, "step": 3639 }, { "epoch": 0.6493622335206494, "grad_norm": 0.5731015801429749, "learning_rate": 0.0003808187351921535, "loss": 0.8565, "step": 3640 }, { "epoch": 0.6495406297386496, "grad_norm": 0.47458168864250183, "learning_rate": 0.00038075902550861176, "loss": 1.0262, "step": 3641 }, { "epoch": 0.6497190259566498, "grad_norm": 1.2307530641555786, "learning_rate": 0.0003806993055553124, "loss": 0.8389, "step": 3642 }, { "epoch": 0.6498974221746499, "grad_norm": 0.481864333152771, "learning_rate": 0.00038063957533694594, "loss": 0.9069, "step": 3643 }, { "epoch": 0.65007581839265, "grad_norm": 0.46057215332984924, "learning_rate": 0.0003805798348582034, "loss": 0.7955, "step": 3644 }, { "epoch": 0.6502542146106502, "grad_norm": 0.5290510058403015, "learning_rate": 0.0003805200841237767, "loss": 1.1641, "step": 3645 }, { "epoch": 0.6504326108286504, "grad_norm": 0.4636685252189636, "learning_rate": 0.0003804603231383589, "loss": 0.8106, "step": 3646 }, { "epoch": 0.6506110070466506, "grad_norm": 0.5197182893753052, "learning_rate": 0.00038040055190664336, "loss": 1.0978, "step": 3647 }, { "epoch": 0.6507894032646507, "grad_norm": 0.5658067464828491, "learning_rate": 0.0003803407704333246, "loss": 1.0294, "step": 3648 }, { "epoch": 0.6509677994826509, "grad_norm": 0.4995347261428833, "learning_rate": 0.0003802809787230979, "loss": 1.0974, "step": 3649 }, { "epoch": 0.6511461957006511, "grad_norm": 0.5178309679031372, "learning_rate": 0.00038022117678065915, "loss": 1.0917, "step": 3650 }, { "epoch": 0.6513245919186513, "grad_norm": 0.450339674949646, "learning_rate": 0.0003801613646107052, "loss": 0.7781, "step": 3651 }, { "epoch": 0.6515029881366515, "grad_norm": 0.48041054606437683, "learning_rate": 0.0003801015422179337, "loss": 0.7998, "step": 3652 }, { "epoch": 0.6516813843546517, "grad_norm": 0.5061138868331909, "learning_rate": 0.00038004170960704306, "loss": 0.9966, "step": 3653 }, { "epoch": 0.6518597805726518, "grad_norm": 0.4858459234237671, "learning_rate": 0.0003799818667827325, "loss": 0.8406, "step": 3654 }, { "epoch": 0.652038176790652, "grad_norm": 0.49688011407852173, "learning_rate": 0.00037992201374970205, "loss": 1.0493, "step": 3655 }, { "epoch": 0.6522165730086522, "grad_norm": 0.47799569368362427, "learning_rate": 0.0003798621505126526, "loss": 0.8846, "step": 3656 }, { "epoch": 0.6523949692266524, "grad_norm": 0.500594437122345, "learning_rate": 0.0003798022770762857, "loss": 1.0214, "step": 3657 }, { "epoch": 0.6525733654446526, "grad_norm": 0.5715755224227905, "learning_rate": 0.0003797423934453038, "loss": 1.312, "step": 3658 }, { "epoch": 0.6527517616626527, "grad_norm": 0.47365105152130127, "learning_rate": 0.00037968249962441015, "loss": 0.9041, "step": 3659 }, { "epoch": 0.6529301578806529, "grad_norm": 0.5020959973335266, "learning_rate": 0.00037962259561830883, "loss": 0.9089, "step": 3660 }, { "epoch": 0.6531085540986531, "grad_norm": 0.5108777284622192, "learning_rate": 0.0003795626814317046, "loss": 0.7617, "step": 3661 }, { "epoch": 0.6532869503166533, "grad_norm": 0.6099095940589905, "learning_rate": 0.0003795027570693032, "loss": 1.2797, "step": 3662 }, { "epoch": 0.6534653465346535, "grad_norm": 0.5367801189422607, "learning_rate": 0.00037944282253581086, "loss": 0.9654, "step": 3663 }, { "epoch": 0.6536437427526537, "grad_norm": 0.4482106566429138, "learning_rate": 0.000379382877835935, "loss": 0.8033, "step": 3664 }, { "epoch": 0.6538221389706538, "grad_norm": 0.48024243116378784, "learning_rate": 0.0003793229229743836, "loss": 0.7399, "step": 3665 }, { "epoch": 0.654000535188654, "grad_norm": 0.49091434478759766, "learning_rate": 0.00037926295795586546, "loss": 0.9129, "step": 3666 }, { "epoch": 0.6541789314066542, "grad_norm": 0.428303062915802, "learning_rate": 0.00037920298278509027, "loss": 0.8762, "step": 3667 }, { "epoch": 0.6543573276246544, "grad_norm": 0.4690133035182953, "learning_rate": 0.00037914299746676837, "loss": 0.748, "step": 3668 }, { "epoch": 0.6545357238426546, "grad_norm": 0.5279445052146912, "learning_rate": 0.00037908300200561107, "loss": 0.9056, "step": 3669 }, { "epoch": 0.6547141200606548, "grad_norm": 0.5457646250724792, "learning_rate": 0.0003790229964063303, "loss": 0.8575, "step": 3670 }, { "epoch": 0.6548925162786549, "grad_norm": 0.4674092233181, "learning_rate": 0.00037896298067363897, "loss": 0.7337, "step": 3671 }, { "epoch": 0.6550709124966551, "grad_norm": 0.539322555065155, "learning_rate": 0.00037890295481225056, "loss": 1.1518, "step": 3672 }, { "epoch": 0.6552493087146553, "grad_norm": 0.5358078479766846, "learning_rate": 0.00037884291882687955, "loss": 1.2239, "step": 3673 }, { "epoch": 0.6554277049326555, "grad_norm": 0.4866465926170349, "learning_rate": 0.0003787828727222412, "loss": 1.1942, "step": 3674 }, { "epoch": 0.6556061011506557, "grad_norm": 0.5002499222755432, "learning_rate": 0.0003787228165030514, "loss": 0.8127, "step": 3675 }, { "epoch": 0.6557844973686557, "grad_norm": 0.5849499106407166, "learning_rate": 0.00037866275017402694, "loss": 0.9762, "step": 3676 }, { "epoch": 0.6559628935866559, "grad_norm": 0.46588608622550964, "learning_rate": 0.0003786026737398857, "loss": 0.8933, "step": 3677 }, { "epoch": 0.6561412898046561, "grad_norm": 0.48897257447242737, "learning_rate": 0.0003785425872053455, "loss": 0.8101, "step": 3678 }, { "epoch": 0.6563196860226563, "grad_norm": 0.46128103137016296, "learning_rate": 0.00037848249057512596, "loss": 0.8682, "step": 3679 }, { "epoch": 0.6564980822406565, "grad_norm": 0.5110675692558289, "learning_rate": 0.00037842238385394684, "loss": 0.8593, "step": 3680 }, { "epoch": 0.6566764784586567, "grad_norm": 0.4921587407588959, "learning_rate": 0.00037836226704652897, "loss": 0.9287, "step": 3681 }, { "epoch": 0.6568548746766568, "grad_norm": 0.5147567987442017, "learning_rate": 0.00037830214015759393, "loss": 0.8533, "step": 3682 }, { "epoch": 0.657033270894657, "grad_norm": 0.4901014268398285, "learning_rate": 0.0003782420031918641, "loss": 0.9394, "step": 3683 }, { "epoch": 0.6572116671126572, "grad_norm": 0.5078468918800354, "learning_rate": 0.00037818185615406236, "loss": 0.9224, "step": 3684 }, { "epoch": 0.6573900633306574, "grad_norm": 0.5428446531295776, "learning_rate": 0.0003781216990489129, "loss": 1.1286, "step": 3685 }, { "epoch": 0.6575684595486576, "grad_norm": 0.5101941227912903, "learning_rate": 0.0003780615318811402, "loss": 0.9386, "step": 3686 }, { "epoch": 0.6577468557666577, "grad_norm": 0.5293329358100891, "learning_rate": 0.00037800135465547, "loss": 0.9849, "step": 3687 }, { "epoch": 0.6579252519846579, "grad_norm": 0.47697338461875916, "learning_rate": 0.00037794116737662847, "loss": 0.8925, "step": 3688 }, { "epoch": 0.6581036482026581, "grad_norm": 0.5124788284301758, "learning_rate": 0.00037788097004934275, "loss": 1.0044, "step": 3689 }, { "epoch": 0.6582820444206583, "grad_norm": 0.4236263632774353, "learning_rate": 0.00037782076267834063, "loss": 0.8828, "step": 3690 }, { "epoch": 0.6584604406386585, "grad_norm": 0.4617786109447479, "learning_rate": 0.00037776054526835086, "loss": 0.8969, "step": 3691 }, { "epoch": 0.6586388368566587, "grad_norm": 0.596630871295929, "learning_rate": 0.0003777003178241028, "loss": 1.0182, "step": 3692 }, { "epoch": 0.6588172330746588, "grad_norm": 0.4604688882827759, "learning_rate": 0.00037764008035032676, "loss": 0.9566, "step": 3693 }, { "epoch": 0.658995629292659, "grad_norm": 0.8154518604278564, "learning_rate": 0.00037757983285175367, "loss": 0.8091, "step": 3694 }, { "epoch": 0.6591740255106592, "grad_norm": 0.5884724259376526, "learning_rate": 0.00037751957533311545, "loss": 0.9988, "step": 3695 }, { "epoch": 0.6593524217286594, "grad_norm": 0.43609222769737244, "learning_rate": 0.0003774593077991447, "loss": 0.8279, "step": 3696 }, { "epoch": 0.6595308179466596, "grad_norm": 0.5332170128822327, "learning_rate": 0.0003773990302545748, "loss": 0.8307, "step": 3697 }, { "epoch": 0.6597092141646597, "grad_norm": 0.4429665505886078, "learning_rate": 0.0003773387427041398, "loss": 0.9021, "step": 3698 }, { "epoch": 0.6598876103826599, "grad_norm": 0.48425301909446716, "learning_rate": 0.00037727844515257473, "loss": 0.8488, "step": 3699 }, { "epoch": 0.6600660066006601, "grad_norm": 0.45970770716667175, "learning_rate": 0.00037721813760461544, "loss": 0.9876, "step": 3700 }, { "epoch": 0.6602444028186603, "grad_norm": 0.47570955753326416, "learning_rate": 0.00037715782006499826, "loss": 0.9802, "step": 3701 }, { "epoch": 0.6604227990366605, "grad_norm": 0.5924715399742126, "learning_rate": 0.0003770974925384607, "loss": 0.9458, "step": 3702 }, { "epoch": 0.6606011952546607, "grad_norm": 0.47865262627601624, "learning_rate": 0.0003770371550297407, "loss": 0.7191, "step": 3703 }, { "epoch": 0.6607795914726607, "grad_norm": 0.6037271618843079, "learning_rate": 0.00037697680754357726, "loss": 1.1374, "step": 3704 }, { "epoch": 0.6609579876906609, "grad_norm": 0.499804824590683, "learning_rate": 0.00037691645008471, "loss": 0.9418, "step": 3705 }, { "epoch": 0.6611363839086611, "grad_norm": 0.4647560715675354, "learning_rate": 0.00037685608265787936, "loss": 0.7682, "step": 3706 }, { "epoch": 0.6613147801266613, "grad_norm": 0.48037344217300415, "learning_rate": 0.0003767957052678266, "loss": 0.6937, "step": 3707 }, { "epoch": 0.6614931763446615, "grad_norm": 0.5261335968971252, "learning_rate": 0.00037673531791929365, "loss": 1.2998, "step": 3708 }, { "epoch": 0.6616715725626616, "grad_norm": 0.48943454027175903, "learning_rate": 0.0003766749206170234, "loss": 0.7968, "step": 3709 }, { "epoch": 0.6618499687806618, "grad_norm": 0.4693853557109833, "learning_rate": 0.0003766145133657594, "loss": 0.9241, "step": 3710 }, { "epoch": 0.662028364998662, "grad_norm": 0.49054959416389465, "learning_rate": 0.00037655409617024606, "loss": 0.913, "step": 3711 }, { "epoch": 0.6622067612166622, "grad_norm": 0.49110257625579834, "learning_rate": 0.0003764936690352284, "loss": 1.0838, "step": 3712 }, { "epoch": 0.6623851574346624, "grad_norm": 0.4667607843875885, "learning_rate": 0.00037643323196545245, "loss": 0.8978, "step": 3713 }, { "epoch": 0.6625635536526626, "grad_norm": 0.5172199010848999, "learning_rate": 0.0003763727849656648, "loss": 1.2393, "step": 3714 }, { "epoch": 0.6627419498706627, "grad_norm": 0.4816920757293701, "learning_rate": 0.0003763123280406131, "loss": 1.0024, "step": 3715 }, { "epoch": 0.6629203460886629, "grad_norm": 0.5231166481971741, "learning_rate": 0.00037625186119504537, "loss": 1.0882, "step": 3716 }, { "epoch": 0.6630987423066631, "grad_norm": 0.5482428669929504, "learning_rate": 0.000376191384433711, "loss": 1.3842, "step": 3717 }, { "epoch": 0.6632771385246633, "grad_norm": 0.5781173706054688, "learning_rate": 0.00037613089776135947, "loss": 1.4105, "step": 3718 }, { "epoch": 0.6634555347426635, "grad_norm": 0.5220258235931396, "learning_rate": 0.0003760704011827415, "loss": 0.9949, "step": 3719 }, { "epoch": 0.6636339309606636, "grad_norm": 0.46553224325180054, "learning_rate": 0.0003760098947026085, "loss": 0.7985, "step": 3720 }, { "epoch": 0.6638123271786638, "grad_norm": 0.47272759675979614, "learning_rate": 0.00037594937832571254, "loss": 0.819, "step": 3721 }, { "epoch": 0.663990723396664, "grad_norm": 0.5201370716094971, "learning_rate": 0.0003758888520568067, "loss": 0.899, "step": 3722 }, { "epoch": 0.6641691196146642, "grad_norm": 0.52834552526474, "learning_rate": 0.0003758283159006446, "loss": 0.9406, "step": 3723 }, { "epoch": 0.6643475158326644, "grad_norm": 0.4457343518733978, "learning_rate": 0.00037576776986198064, "loss": 0.841, "step": 3724 }, { "epoch": 0.6645259120506646, "grad_norm": 0.49147358536720276, "learning_rate": 0.00037570721394557016, "loss": 0.9662, "step": 3725 }, { "epoch": 0.6647043082686647, "grad_norm": 0.5019490122795105, "learning_rate": 0.00037564664815616924, "loss": 1.1227, "step": 3726 }, { "epoch": 0.6648827044866649, "grad_norm": 0.4801279902458191, "learning_rate": 0.0003755860724985346, "loss": 0.7102, "step": 3727 }, { "epoch": 0.6650611007046651, "grad_norm": 0.5752885937690735, "learning_rate": 0.00037552548697742386, "loss": 1.1417, "step": 3728 }, { "epoch": 0.6652394969226653, "grad_norm": 0.45870140194892883, "learning_rate": 0.00037546489159759545, "loss": 0.763, "step": 3729 }, { "epoch": 0.6654178931406655, "grad_norm": 0.4713680148124695, "learning_rate": 0.0003754042863638084, "loss": 0.8104, "step": 3730 }, { "epoch": 0.6655962893586655, "grad_norm": 0.5558474659919739, "learning_rate": 0.0003753436712808227, "loss": 1.0264, "step": 3731 }, { "epoch": 0.6657746855766657, "grad_norm": 0.5492153167724609, "learning_rate": 0.000375283046353399, "loss": 1.1912, "step": 3732 }, { "epoch": 0.665953081794666, "grad_norm": 0.4615228474140167, "learning_rate": 0.00037522241158629866, "loss": 0.9509, "step": 3733 }, { "epoch": 0.6661314780126661, "grad_norm": 0.4563251733779907, "learning_rate": 0.00037516176698428413, "loss": 0.8065, "step": 3734 }, { "epoch": 0.6663098742306663, "grad_norm": 0.5740782618522644, "learning_rate": 0.0003751011125521182, "loss": 0.9293, "step": 3735 }, { "epoch": 0.6664882704486665, "grad_norm": 0.49503761529922485, "learning_rate": 0.0003750404482945648, "loss": 0.9861, "step": 3736 }, { "epoch": 0.6666666666666666, "grad_norm": 0.4828610122203827, "learning_rate": 0.0003749797742163883, "loss": 1.1359, "step": 3737 }, { "epoch": 0.6668450628846668, "grad_norm": 0.4599243402481079, "learning_rate": 0.00037491909032235423, "loss": 0.7794, "step": 3738 }, { "epoch": 0.667023459102667, "grad_norm": 0.5307605862617493, "learning_rate": 0.0003748583966172285, "loss": 0.8625, "step": 3739 }, { "epoch": 0.6672018553206672, "grad_norm": 0.49819859862327576, "learning_rate": 0.000374797693105778, "loss": 0.8321, "step": 3740 }, { "epoch": 0.6673802515386674, "grad_norm": 0.5101906657218933, "learning_rate": 0.0003747369797927704, "loss": 1.0208, "step": 3741 }, { "epoch": 0.6675586477566675, "grad_norm": 0.4965439438819885, "learning_rate": 0.0003746762566829742, "loss": 0.9561, "step": 3742 }, { "epoch": 0.6677370439746677, "grad_norm": 0.4781574308872223, "learning_rate": 0.00037461552378115833, "loss": 0.8842, "step": 3743 }, { "epoch": 0.6679154401926679, "grad_norm": 0.512837827205658, "learning_rate": 0.00037455478109209284, "loss": 0.9223, "step": 3744 }, { "epoch": 0.6680938364106681, "grad_norm": 0.484651654958725, "learning_rate": 0.0003744940286205485, "loss": 0.8012, "step": 3745 }, { "epoch": 0.6682722326286683, "grad_norm": 0.5075222849845886, "learning_rate": 0.00037443326637129674, "loss": 0.9905, "step": 3746 }, { "epoch": 0.6684506288466685, "grad_norm": 0.48406416177749634, "learning_rate": 0.0003743724943491097, "loss": 1.0252, "step": 3747 }, { "epoch": 0.6686290250646686, "grad_norm": 0.4906768202781677, "learning_rate": 0.0003743117125587606, "loss": 0.8734, "step": 3748 }, { "epoch": 0.6688074212826688, "grad_norm": 0.4577210545539856, "learning_rate": 0.00037425092100502297, "loss": 0.9074, "step": 3749 }, { "epoch": 0.668985817500669, "grad_norm": 0.4762527346611023, "learning_rate": 0.0003741901196926715, "loss": 0.9042, "step": 3750 }, { "epoch": 0.6691642137186692, "grad_norm": 0.5320841073989868, "learning_rate": 0.00037412930862648153, "loss": 1.1009, "step": 3751 }, { "epoch": 0.6693426099366694, "grad_norm": 0.4859885275363922, "learning_rate": 0.00037406848781122904, "loss": 0.9112, "step": 3752 }, { "epoch": 0.6695210061546695, "grad_norm": 0.5073356628417969, "learning_rate": 0.0003740076572516909, "loss": 1.1243, "step": 3753 }, { "epoch": 0.6696994023726697, "grad_norm": 0.492034912109375, "learning_rate": 0.00037394681695264475, "loss": 1.0717, "step": 3754 }, { "epoch": 0.6698777985906699, "grad_norm": 0.40963229537010193, "learning_rate": 0.0003738859669188689, "loss": 0.7091, "step": 3755 }, { "epoch": 0.6700561948086701, "grad_norm": 0.5281137824058533, "learning_rate": 0.00037382510715514255, "loss": 1.0417, "step": 3756 }, { "epoch": 0.6702345910266703, "grad_norm": 0.48045825958251953, "learning_rate": 0.0003737642376662456, "loss": 0.9008, "step": 3757 }, { "epoch": 0.6704129872446705, "grad_norm": 0.565067708492279, "learning_rate": 0.0003737033584569586, "loss": 0.9471, "step": 3758 }, { "epoch": 0.6705913834626706, "grad_norm": 0.5361825227737427, "learning_rate": 0.0003736424695320631, "loss": 1.1245, "step": 3759 }, { "epoch": 0.6707697796806708, "grad_norm": 0.47022882103919983, "learning_rate": 0.00037358157089634127, "loss": 0.9096, "step": 3760 }, { "epoch": 0.670948175898671, "grad_norm": 0.49209490418434143, "learning_rate": 0.000373520662554576, "loss": 0.9196, "step": 3761 }, { "epoch": 0.6711265721166711, "grad_norm": 0.4670359194278717, "learning_rate": 0.0003734597445115511, "loss": 0.7455, "step": 3762 }, { "epoch": 0.6713049683346713, "grad_norm": 0.520520806312561, "learning_rate": 0.000373398816772051, "loss": 0.9116, "step": 3763 }, { "epoch": 0.6714833645526714, "grad_norm": 0.5995724201202393, "learning_rate": 0.0003733378793408609, "loss": 1.3693, "step": 3764 }, { "epoch": 0.6716617607706716, "grad_norm": 0.4709326922893524, "learning_rate": 0.00037327693222276683, "loss": 0.9914, "step": 3765 }, { "epoch": 0.6718401569886718, "grad_norm": 0.4547140598297119, "learning_rate": 0.00037321597542255554, "loss": 0.9951, "step": 3766 }, { "epoch": 0.672018553206672, "grad_norm": 0.46117034554481506, "learning_rate": 0.0003731550089450146, "loss": 0.9847, "step": 3767 }, { "epoch": 0.6721969494246722, "grad_norm": 0.6557008028030396, "learning_rate": 0.00037309403279493227, "loss": 0.9359, "step": 3768 }, { "epoch": 0.6723753456426724, "grad_norm": 0.4666654169559479, "learning_rate": 0.00037303304697709755, "loss": 0.8247, "step": 3769 }, { "epoch": 0.6725537418606725, "grad_norm": 0.4964097738265991, "learning_rate": 0.00037297205149630023, "loss": 0.8682, "step": 3770 }, { "epoch": 0.6727321380786727, "grad_norm": 0.47724971175193787, "learning_rate": 0.000372911046357331, "loss": 0.7927, "step": 3771 }, { "epoch": 0.6729105342966729, "grad_norm": 0.4240172803401947, "learning_rate": 0.00037285003156498097, "loss": 0.6913, "step": 3772 }, { "epoch": 0.6730889305146731, "grad_norm": 0.7888692021369934, "learning_rate": 0.00037278900712404235, "loss": 0.734, "step": 3773 }, { "epoch": 0.6732673267326733, "grad_norm": 0.46591684222221375, "learning_rate": 0.000372727973039308, "loss": 0.8113, "step": 3774 }, { "epoch": 0.6734457229506734, "grad_norm": 0.47991588711738586, "learning_rate": 0.00037266692931557145, "loss": 0.9792, "step": 3775 }, { "epoch": 0.6736241191686736, "grad_norm": 0.4709876775741577, "learning_rate": 0.00037260587595762705, "loss": 0.8581, "step": 3776 }, { "epoch": 0.6738025153866738, "grad_norm": 0.4677605628967285, "learning_rate": 0.0003725448129702699, "loss": 0.9992, "step": 3777 }, { "epoch": 0.673980911604674, "grad_norm": 0.47087326645851135, "learning_rate": 0.0003724837403582959, "loss": 0.9046, "step": 3778 }, { "epoch": 0.6741593078226742, "grad_norm": 0.44651949405670166, "learning_rate": 0.0003724226581265016, "loss": 0.6954, "step": 3779 }, { "epoch": 0.6743377040406744, "grad_norm": 0.4970155656337738, "learning_rate": 0.0003723615662796844, "loss": 0.9304, "step": 3780 }, { "epoch": 0.6745161002586745, "grad_norm": 0.5014781355857849, "learning_rate": 0.0003723004648226425, "loss": 1.093, "step": 3781 }, { "epoch": 0.6746944964766747, "grad_norm": 0.43172088265419006, "learning_rate": 0.0003722393537601748, "loss": 0.7743, "step": 3782 }, { "epoch": 0.6748728926946749, "grad_norm": 0.4725377559661865, "learning_rate": 0.0003721782330970808, "loss": 1.0071, "step": 3783 }, { "epoch": 0.6750512889126751, "grad_norm": 0.4693569839000702, "learning_rate": 0.0003721171028381609, "loss": 0.9637, "step": 3784 }, { "epoch": 0.6752296851306753, "grad_norm": 1.366469144821167, "learning_rate": 0.0003720559629882163, "loss": 0.8897, "step": 3785 }, { "epoch": 0.6754080813486754, "grad_norm": 0.4972679615020752, "learning_rate": 0.0003719948135520489, "loss": 0.8672, "step": 3786 }, { "epoch": 0.6755864775666756, "grad_norm": 0.47456255555152893, "learning_rate": 0.00037193365453446126, "loss": 0.8747, "step": 3787 }, { "epoch": 0.6757648737846758, "grad_norm": 0.4588843286037445, "learning_rate": 0.000371872485940257, "loss": 0.7423, "step": 3788 }, { "epoch": 0.675943270002676, "grad_norm": 0.5166525840759277, "learning_rate": 0.0003718113077742401, "loss": 1.0436, "step": 3789 }, { "epoch": 0.6761216662206762, "grad_norm": 0.5156573057174683, "learning_rate": 0.0003717501200412154, "loss": 0.8229, "step": 3790 }, { "epoch": 0.6763000624386764, "grad_norm": 0.4706706702709198, "learning_rate": 0.00037168892274598884, "loss": 0.9703, "step": 3791 }, { "epoch": 0.6764784586566764, "grad_norm": 0.48749077320098877, "learning_rate": 0.0003716277158933666, "loss": 0.9563, "step": 3792 }, { "epoch": 0.6766568548746766, "grad_norm": 0.49335891008377075, "learning_rate": 0.00037156649948815585, "loss": 1.1277, "step": 3793 }, { "epoch": 0.6768352510926768, "grad_norm": 0.45877259969711304, "learning_rate": 0.00037150527353516457, "loss": 0.7111, "step": 3794 }, { "epoch": 0.677013647310677, "grad_norm": 0.5235643982887268, "learning_rate": 0.00037144403803920136, "loss": 0.9515, "step": 3795 }, { "epoch": 0.6771920435286772, "grad_norm": 0.43552201986312866, "learning_rate": 0.00037138279300507574, "loss": 0.7094, "step": 3796 }, { "epoch": 0.6773704397466773, "grad_norm": 0.4871746301651001, "learning_rate": 0.0003713215384375977, "loss": 1.0252, "step": 3797 }, { "epoch": 0.6775488359646775, "grad_norm": 0.6767199635505676, "learning_rate": 0.00037126027434157826, "loss": 1.0102, "step": 3798 }, { "epoch": 0.6777272321826777, "grad_norm": 0.514312207698822, "learning_rate": 0.0003711990007218291, "loss": 0.921, "step": 3799 }, { "epoch": 0.6779056284006779, "grad_norm": 0.5360898375511169, "learning_rate": 0.00037113771758316255, "loss": 0.9598, "step": 3800 }, { "epoch": 0.6780840246186781, "grad_norm": 0.47847217321395874, "learning_rate": 0.00037107642493039184, "loss": 0.8728, "step": 3801 }, { "epoch": 0.6782624208366783, "grad_norm": 0.49450331926345825, "learning_rate": 0.0003710151227683307, "loss": 0.7853, "step": 3802 }, { "epoch": 0.6784408170546784, "grad_norm": 0.47471120953559875, "learning_rate": 0.00037095381110179406, "loss": 1.0686, "step": 3803 }, { "epoch": 0.6786192132726786, "grad_norm": 0.5599477887153625, "learning_rate": 0.000370892489935597, "loss": 1.1936, "step": 3804 }, { "epoch": 0.6787976094906788, "grad_norm": 0.525962233543396, "learning_rate": 0.0003708311592745559, "loss": 0.7819, "step": 3805 }, { "epoch": 0.678976005708679, "grad_norm": 2.3493411540985107, "learning_rate": 0.00037076981912348753, "loss": 0.8769, "step": 3806 }, { "epoch": 0.6791544019266792, "grad_norm": 0.4811551868915558, "learning_rate": 0.0003707084694872095, "loss": 0.9784, "step": 3807 }, { "epoch": 0.6793327981446793, "grad_norm": 0.4610568583011627, "learning_rate": 0.0003706471103705402, "loss": 1.0097, "step": 3808 }, { "epoch": 0.6795111943626795, "grad_norm": 0.5705133080482483, "learning_rate": 0.0003705857417782989, "loss": 1.113, "step": 3809 }, { "epoch": 0.6796895905806797, "grad_norm": 0.4349512457847595, "learning_rate": 0.00037052436371530517, "loss": 0.7286, "step": 3810 }, { "epoch": 0.6798679867986799, "grad_norm": 0.5243642330169678, "learning_rate": 0.00037046297618637984, "loss": 0.8463, "step": 3811 }, { "epoch": 0.6800463830166801, "grad_norm": 0.7079116702079773, "learning_rate": 0.0003704015791963442, "loss": 1.1995, "step": 3812 }, { "epoch": 0.6802247792346803, "grad_norm": 0.547192394733429, "learning_rate": 0.00037034017275002043, "loss": 0.9706, "step": 3813 }, { "epoch": 0.6804031754526804, "grad_norm": 1.1593716144561768, "learning_rate": 0.00037027875685223115, "loss": 0.8588, "step": 3814 }, { "epoch": 0.6805815716706806, "grad_norm": 0.46274659037590027, "learning_rate": 0.0003702173315078001, "loss": 0.9027, "step": 3815 }, { "epoch": 0.6807599678886808, "grad_norm": 0.5039249658584595, "learning_rate": 0.0003701558967215517, "loss": 1.0564, "step": 3816 }, { "epoch": 0.680938364106681, "grad_norm": 0.43996962904930115, "learning_rate": 0.00037009445249831075, "loss": 0.9811, "step": 3817 }, { "epoch": 0.6811167603246812, "grad_norm": 0.5002626180648804, "learning_rate": 0.00037003299884290315, "loss": 1.0044, "step": 3818 }, { "epoch": 0.6812951565426812, "grad_norm": 0.5452658534049988, "learning_rate": 0.0003699715357601555, "loss": 0.82, "step": 3819 }, { "epoch": 0.6814735527606814, "grad_norm": 0.5324759483337402, "learning_rate": 0.00036991006325489507, "loss": 0.8513, "step": 3820 }, { "epoch": 0.6816519489786816, "grad_norm": 0.4961846172809601, "learning_rate": 0.00036984858133194985, "loss": 0.8615, "step": 3821 }, { "epoch": 0.6818303451966818, "grad_norm": 0.46880999207496643, "learning_rate": 0.0003697870899961487, "loss": 0.907, "step": 3822 }, { "epoch": 0.682008741414682, "grad_norm": 0.4610176086425781, "learning_rate": 0.0003697255892523211, "loss": 0.9399, "step": 3823 }, { "epoch": 0.6821871376326822, "grad_norm": 0.5100571513175964, "learning_rate": 0.00036966407910529715, "loss": 0.9109, "step": 3824 }, { "epoch": 0.6823655338506823, "grad_norm": 0.4634707570075989, "learning_rate": 0.00036960255955990787, "loss": 0.8546, "step": 3825 }, { "epoch": 0.6825439300686825, "grad_norm": 0.5142379403114319, "learning_rate": 0.0003695410306209851, "loss": 0.9521, "step": 3826 }, { "epoch": 0.6827223262866827, "grad_norm": 0.4953165352344513, "learning_rate": 0.0003694794922933612, "loss": 0.9574, "step": 3827 }, { "epoch": 0.6829007225046829, "grad_norm": 0.530571460723877, "learning_rate": 0.0003694179445818694, "loss": 1.0247, "step": 3828 }, { "epoch": 0.6830791187226831, "grad_norm": 0.49260106682777405, "learning_rate": 0.0003693563874913437, "loss": 0.8785, "step": 3829 }, { "epoch": 0.6832575149406832, "grad_norm": 0.48824089765548706, "learning_rate": 0.0003692948210266186, "loss": 0.9205, "step": 3830 }, { "epoch": 0.6834359111586834, "grad_norm": 0.5169446468353271, "learning_rate": 0.0003692332451925296, "loss": 0.7974, "step": 3831 }, { "epoch": 0.6836143073766836, "grad_norm": 0.5154136419296265, "learning_rate": 0.0003691716599939129, "loss": 0.8386, "step": 3832 }, { "epoch": 0.6837927035946838, "grad_norm": 0.47398611903190613, "learning_rate": 0.00036911006543560514, "loss": 0.9594, "step": 3833 }, { "epoch": 0.683971099812684, "grad_norm": 0.4491838812828064, "learning_rate": 0.00036904846152244425, "loss": 0.8647, "step": 3834 }, { "epoch": 0.6841494960306842, "grad_norm": 0.5062149167060852, "learning_rate": 0.0003689868482592684, "loss": 1.0463, "step": 3835 }, { "epoch": 0.6843278922486843, "grad_norm": 0.7602079510688782, "learning_rate": 0.00036892522565091666, "loss": 1.035, "step": 3836 }, { "epoch": 0.6845062884666845, "grad_norm": 0.46411171555519104, "learning_rate": 0.00036886359370222896, "loss": 0.8845, "step": 3837 }, { "epoch": 0.6846846846846847, "grad_norm": 1.1559884548187256, "learning_rate": 0.00036880195241804567, "loss": 1.0434, "step": 3838 }, { "epoch": 0.6848630809026849, "grad_norm": 0.477621853351593, "learning_rate": 0.0003687403018032082, "loss": 1.0325, "step": 3839 }, { "epoch": 0.6850414771206851, "grad_norm": 0.47134527564048767, "learning_rate": 0.0003686786418625585, "loss": 0.8306, "step": 3840 }, { "epoch": 0.6852198733386852, "grad_norm": 0.47260260581970215, "learning_rate": 0.0003686169726009393, "loss": 1.0148, "step": 3841 }, { "epoch": 0.6853982695566854, "grad_norm": 0.4394271671772003, "learning_rate": 0.0003685552940231942, "loss": 0.8118, "step": 3842 }, { "epoch": 0.6855766657746856, "grad_norm": 0.5549933910369873, "learning_rate": 0.0003684936061341673, "loss": 0.8082, "step": 3843 }, { "epoch": 0.6857550619926858, "grad_norm": 0.49931079149246216, "learning_rate": 0.00036843190893870356, "loss": 0.9249, "step": 3844 }, { "epoch": 0.685933458210686, "grad_norm": 0.5112189054489136, "learning_rate": 0.00036837020244164865, "loss": 0.9894, "step": 3845 }, { "epoch": 0.6861118544286862, "grad_norm": 0.544303297996521, "learning_rate": 0.00036830848664784894, "loss": 1.0843, "step": 3846 }, { "epoch": 0.6862902506466863, "grad_norm": 0.5149818658828735, "learning_rate": 0.00036824676156215164, "loss": 1.1905, "step": 3847 }, { "epoch": 0.6864686468646864, "grad_norm": 0.44846704602241516, "learning_rate": 0.00036818502718940463, "loss": 0.7878, "step": 3848 }, { "epoch": 0.6866470430826866, "grad_norm": 0.5292502641677856, "learning_rate": 0.00036812328353445637, "loss": 1.015, "step": 3849 }, { "epoch": 0.6868254393006868, "grad_norm": 0.4887588918209076, "learning_rate": 0.00036806153060215627, "loss": 0.9761, "step": 3850 }, { "epoch": 0.687003835518687, "grad_norm": 0.4837714433670044, "learning_rate": 0.00036799976839735436, "loss": 0.9546, "step": 3851 }, { "epoch": 0.6871822317366871, "grad_norm": 0.4452599883079529, "learning_rate": 0.00036793799692490145, "loss": 0.9103, "step": 3852 }, { "epoch": 0.6873606279546873, "grad_norm": 0.4157600402832031, "learning_rate": 0.000367876216189649, "loss": 0.7636, "step": 3853 }, { "epoch": 0.6875390241726875, "grad_norm": 0.4187622666358948, "learning_rate": 0.0003678144261964492, "loss": 0.7129, "step": 3854 }, { "epoch": 0.6877174203906877, "grad_norm": 1.3808401823043823, "learning_rate": 0.0003677526269501551, "loss": 1.03, "step": 3855 }, { "epoch": 0.6878958166086879, "grad_norm": 0.4691815972328186, "learning_rate": 0.00036769081845562033, "loss": 0.8812, "step": 3856 }, { "epoch": 0.6880742128266881, "grad_norm": 0.5647638440132141, "learning_rate": 0.0003676290007176994, "loss": 1.0311, "step": 3857 }, { "epoch": 0.6882526090446882, "grad_norm": 0.47513964772224426, "learning_rate": 0.0003675671737412473, "loss": 0.8746, "step": 3858 }, { "epoch": 0.6884310052626884, "grad_norm": 0.5500690340995789, "learning_rate": 0.00036750533753112004, "loss": 1.1705, "step": 3859 }, { "epoch": 0.6886094014806886, "grad_norm": 0.45833203196525574, "learning_rate": 0.0003674434920921741, "loss": 0.8107, "step": 3860 }, { "epoch": 0.6887877976986888, "grad_norm": 0.6918094158172607, "learning_rate": 0.00036738163742926677, "loss": 0.9976, "step": 3861 }, { "epoch": 0.688966193916689, "grad_norm": 0.49966564774513245, "learning_rate": 0.0003673197735472563, "loss": 0.7383, "step": 3862 }, { "epoch": 0.6891445901346891, "grad_norm": 0.5262913107872009, "learning_rate": 0.0003672579004510012, "loss": 0.924, "step": 3863 }, { "epoch": 0.6893229863526893, "grad_norm": 0.5179538726806641, "learning_rate": 0.000367196018145361, "loss": 0.9421, "step": 3864 }, { "epoch": 0.6895013825706895, "grad_norm": 0.502363920211792, "learning_rate": 0.00036713412663519606, "loss": 1.0026, "step": 3865 }, { "epoch": 0.6896797787886897, "grad_norm": 0.5639678239822388, "learning_rate": 0.0003670722259253672, "loss": 1.0164, "step": 3866 }, { "epoch": 0.6898581750066899, "grad_norm": 0.4934401214122772, "learning_rate": 0.0003670103160207361, "loss": 1.126, "step": 3867 }, { "epoch": 0.6900365712246901, "grad_norm": 0.4546942710876465, "learning_rate": 0.0003669483969261651, "loss": 0.8328, "step": 3868 }, { "epoch": 0.6902149674426902, "grad_norm": 0.5671041011810303, "learning_rate": 0.00036688646864651745, "loss": 1.0304, "step": 3869 }, { "epoch": 0.6903933636606904, "grad_norm": 0.5230646133422852, "learning_rate": 0.0003668245311866567, "loss": 0.9212, "step": 3870 }, { "epoch": 0.6905717598786906, "grad_norm": 0.4050388038158417, "learning_rate": 0.0003667625845514476, "loss": 0.5513, "step": 3871 }, { "epoch": 0.6907501560966908, "grad_norm": 0.6363729238510132, "learning_rate": 0.00036670062874575535, "loss": 0.9514, "step": 3872 }, { "epoch": 0.690928552314691, "grad_norm": 0.49325573444366455, "learning_rate": 0.0003666386637744459, "loss": 1.0872, "step": 3873 }, { "epoch": 0.6911069485326911, "grad_norm": 0.49835440516471863, "learning_rate": 0.00036657668964238613, "loss": 1.1338, "step": 3874 }, { "epoch": 0.6912853447506913, "grad_norm": 0.4174972176551819, "learning_rate": 0.0003665147063544432, "loss": 0.7673, "step": 3875 }, { "epoch": 0.6914637409686915, "grad_norm": 0.43042904138565063, "learning_rate": 0.0003664527139154854, "loss": 0.768, "step": 3876 }, { "epoch": 0.6916421371866917, "grad_norm": 0.46625080704689026, "learning_rate": 0.00036639071233038155, "loss": 0.9156, "step": 3877 }, { "epoch": 0.6918205334046919, "grad_norm": 0.4662775993347168, "learning_rate": 0.0003663287016040013, "loss": 0.8285, "step": 3878 }, { "epoch": 0.691998929622692, "grad_norm": 0.4520246982574463, "learning_rate": 0.0003662666817412148, "loss": 0.8158, "step": 3879 }, { "epoch": 0.6921773258406921, "grad_norm": 0.5535463690757751, "learning_rate": 0.0003662046527468932, "loss": 1.1017, "step": 3880 }, { "epoch": 0.6923557220586923, "grad_norm": 1.1918361186981201, "learning_rate": 0.00036614261462590824, "loss": 0.793, "step": 3881 }, { "epoch": 0.6925341182766925, "grad_norm": 0.5035448670387268, "learning_rate": 0.00036608056738313225, "loss": 0.8917, "step": 3882 }, { "epoch": 0.6927125144946927, "grad_norm": 0.5091261267662048, "learning_rate": 0.00036601851102343843, "loss": 1.0292, "step": 3883 }, { "epoch": 0.6928909107126929, "grad_norm": 0.45190200209617615, "learning_rate": 0.0003659564455517007, "loss": 1.043, "step": 3884 }, { "epoch": 0.693069306930693, "grad_norm": 0.5362012386322021, "learning_rate": 0.0003658943709727936, "loss": 1.0457, "step": 3885 }, { "epoch": 0.6932477031486932, "grad_norm": 0.5333651304244995, "learning_rate": 0.00036583228729159244, "loss": 1.0046, "step": 3886 }, { "epoch": 0.6934260993666934, "grad_norm": 0.5217045545578003, "learning_rate": 0.0003657701945129734, "loss": 0.9786, "step": 3887 }, { "epoch": 0.6936044955846936, "grad_norm": 0.5744450688362122, "learning_rate": 0.0003657080926418131, "loss": 1.03, "step": 3888 }, { "epoch": 0.6937828918026938, "grad_norm": 0.5800135135650635, "learning_rate": 0.0003656459816829889, "loss": 1.0379, "step": 3889 }, { "epoch": 0.693961288020694, "grad_norm": 0.4809887707233429, "learning_rate": 0.0003655838616413791, "loss": 0.9534, "step": 3890 }, { "epoch": 0.6941396842386941, "grad_norm": 0.46151599287986755, "learning_rate": 0.0003655217325218626, "loss": 0.9506, "step": 3891 }, { "epoch": 0.6943180804566943, "grad_norm": 1.1912611722946167, "learning_rate": 0.0003654595943293189, "loss": 0.7983, "step": 3892 }, { "epoch": 0.6944964766746945, "grad_norm": 0.48154789209365845, "learning_rate": 0.00036539744706862837, "loss": 0.8602, "step": 3893 }, { "epoch": 0.6946748728926947, "grad_norm": 0.5169458985328674, "learning_rate": 0.000365335290744672, "loss": 1.0327, "step": 3894 }, { "epoch": 0.6948532691106949, "grad_norm": 0.4628457725048065, "learning_rate": 0.00036527312536233146, "loss": 0.8947, "step": 3895 }, { "epoch": 0.695031665328695, "grad_norm": 0.5199170708656311, "learning_rate": 0.00036521095092648933, "loss": 0.9375, "step": 3896 }, { "epoch": 0.6952100615466952, "grad_norm": 0.5095205307006836, "learning_rate": 0.0003651487674420287, "loss": 1.0075, "step": 3897 }, { "epoch": 0.6953884577646954, "grad_norm": 0.5311485528945923, "learning_rate": 0.0003650865749138334, "loss": 0.8594, "step": 3898 }, { "epoch": 0.6955668539826956, "grad_norm": 0.5111739635467529, "learning_rate": 0.000365024373346788, "loss": 0.8538, "step": 3899 }, { "epoch": 0.6957452502006958, "grad_norm": 0.48374149203300476, "learning_rate": 0.0003649621627457779, "loss": 0.9959, "step": 3900 }, { "epoch": 0.695923646418696, "grad_norm": 0.4373980760574341, "learning_rate": 0.00036489994311568897, "loss": 0.8307, "step": 3901 }, { "epoch": 0.6961020426366961, "grad_norm": 0.47654423117637634, "learning_rate": 0.0003648377144614079, "loss": 0.8287, "step": 3902 }, { "epoch": 0.6962804388546963, "grad_norm": 0.5087730884552002, "learning_rate": 0.0003647754767878222, "loss": 1.21, "step": 3903 }, { "epoch": 0.6964588350726965, "grad_norm": 0.503084123134613, "learning_rate": 0.0003647132300998199, "loss": 0.8816, "step": 3904 }, { "epoch": 0.6966372312906967, "grad_norm": 0.4780065417289734, "learning_rate": 0.00036465097440229003, "loss": 0.9056, "step": 3905 }, { "epoch": 0.6968156275086969, "grad_norm": 0.48324069380760193, "learning_rate": 0.0003645887097001218, "loss": 0.9316, "step": 3906 }, { "epoch": 0.6969940237266969, "grad_norm": 0.4731059968471527, "learning_rate": 0.0003645264359982057, "loss": 0.8059, "step": 3907 }, { "epoch": 0.6971724199446971, "grad_norm": 0.4973444640636444, "learning_rate": 0.0003644641533014326, "loss": 1.0185, "step": 3908 }, { "epoch": 0.6973508161626973, "grad_norm": 0.5493341088294983, "learning_rate": 0.00036440186161469407, "loss": 1.1505, "step": 3909 }, { "epoch": 0.6975292123806975, "grad_norm": 0.5255337357521057, "learning_rate": 0.0003643395609428827, "loss": 1.0592, "step": 3910 }, { "epoch": 0.6977076085986977, "grad_norm": 0.4478547275066376, "learning_rate": 0.0003642772512908913, "loss": 0.8705, "step": 3911 }, { "epoch": 0.6978860048166979, "grad_norm": 0.49308907985687256, "learning_rate": 0.00036421493266361384, "loss": 0.745, "step": 3912 }, { "epoch": 0.698064401034698, "grad_norm": 0.5503551363945007, "learning_rate": 0.00036415260506594463, "loss": 1.0582, "step": 3913 }, { "epoch": 0.6982427972526982, "grad_norm": 0.5110753178596497, "learning_rate": 0.00036409026850277906, "loss": 0.7994, "step": 3914 }, { "epoch": 0.6984211934706984, "grad_norm": 0.5047644376754761, "learning_rate": 0.00036402792297901283, "loss": 0.9037, "step": 3915 }, { "epoch": 0.6985995896886986, "grad_norm": 0.5323673486709595, "learning_rate": 0.0003639655684995426, "loss": 0.8984, "step": 3916 }, { "epoch": 0.6987779859066988, "grad_norm": 0.49493974447250366, "learning_rate": 0.0003639032050692656, "loss": 0.949, "step": 3917 }, { "epoch": 0.6989563821246989, "grad_norm": 0.4778236150741577, "learning_rate": 0.00036384083269308, "loss": 0.6483, "step": 3918 }, { "epoch": 0.6991347783426991, "grad_norm": 0.519985556602478, "learning_rate": 0.00036377845137588435, "loss": 1.0307, "step": 3919 }, { "epoch": 0.6993131745606993, "grad_norm": 0.49531275033950806, "learning_rate": 0.00036371606112257807, "loss": 0.7647, "step": 3920 }, { "epoch": 0.6994915707786995, "grad_norm": 0.5061904788017273, "learning_rate": 0.00036365366193806135, "loss": 0.93, "step": 3921 }, { "epoch": 0.6996699669966997, "grad_norm": 0.45195135474205017, "learning_rate": 0.0003635912538272349, "loss": 0.831, "step": 3922 }, { "epoch": 0.6998483632146999, "grad_norm": 0.5661010146141052, "learning_rate": 0.00036352883679500027, "loss": 0.9327, "step": 3923 }, { "epoch": 0.7000267594327, "grad_norm": 0.5504164099693298, "learning_rate": 0.0003634664108462596, "loss": 0.8443, "step": 3924 }, { "epoch": 0.7002051556507002, "grad_norm": 0.6557819843292236, "learning_rate": 0.0003634039759859158, "loss": 1.0994, "step": 3925 }, { "epoch": 0.7003835518687004, "grad_norm": 0.48669978976249695, "learning_rate": 0.00036334153221887264, "loss": 0.7105, "step": 3926 }, { "epoch": 0.7005619480867006, "grad_norm": 0.4523518979549408, "learning_rate": 0.00036327907955003425, "loss": 0.8413, "step": 3927 }, { "epoch": 0.7007403443047008, "grad_norm": 0.469539999961853, "learning_rate": 0.0003632166179843058, "loss": 0.7825, "step": 3928 }, { "epoch": 0.7009187405227009, "grad_norm": 0.5297795534133911, "learning_rate": 0.0003631541475265928, "loss": 1.044, "step": 3929 }, { "epoch": 0.7010971367407011, "grad_norm": 0.46915116906166077, "learning_rate": 0.0003630916681818018, "loss": 1.0134, "step": 3930 }, { "epoch": 0.7012755329587013, "grad_norm": 0.7894530296325684, "learning_rate": 0.0003630291799548398, "loss": 1.0422, "step": 3931 }, { "epoch": 0.7014539291767015, "grad_norm": 0.4228399395942688, "learning_rate": 0.00036296668285061464, "loss": 0.8499, "step": 3932 }, { "epoch": 0.7016323253947017, "grad_norm": 0.5037341713905334, "learning_rate": 0.00036290417687403483, "loss": 0.9448, "step": 3933 }, { "epoch": 0.7018107216127019, "grad_norm": 0.43407323956489563, "learning_rate": 0.00036284166203000957, "loss": 0.7291, "step": 3934 }, { "epoch": 0.701989117830702, "grad_norm": 0.46290135383605957, "learning_rate": 0.00036277913832344875, "loss": 0.8746, "step": 3935 }, { "epoch": 0.7021675140487021, "grad_norm": 0.4666912257671356, "learning_rate": 0.0003627166057592629, "loss": 0.8483, "step": 3936 }, { "epoch": 0.7023459102667023, "grad_norm": 0.44675248861312866, "learning_rate": 0.0003626540643423634, "loss": 0.8229, "step": 3937 }, { "epoch": 0.7025243064847025, "grad_norm": 0.46909114718437195, "learning_rate": 0.0003625915140776621, "loss": 0.7874, "step": 3938 }, { "epoch": 0.7027027027027027, "grad_norm": 0.47172433137893677, "learning_rate": 0.00036252895497007175, "loss": 0.9997, "step": 3939 }, { "epoch": 0.7028810989207028, "grad_norm": 0.5105620622634888, "learning_rate": 0.0003624663870245057, "loss": 0.9346, "step": 3940 }, { "epoch": 0.703059495138703, "grad_norm": 0.4702140688896179, "learning_rate": 0.0003624038102458781, "loss": 0.9306, "step": 3941 }, { "epoch": 0.7032378913567032, "grad_norm": 0.49693727493286133, "learning_rate": 0.0003623412246391035, "loss": 0.979, "step": 3942 }, { "epoch": 0.7034162875747034, "grad_norm": 0.47724950313568115, "learning_rate": 0.00036227863020909753, "loss": 0.9922, "step": 3943 }, { "epoch": 0.7035946837927036, "grad_norm": 0.4332534968852997, "learning_rate": 0.0003622160269607762, "loss": 0.8226, "step": 3944 }, { "epoch": 0.7037730800107038, "grad_norm": 0.4521944522857666, "learning_rate": 0.00036215341489905645, "loss": 0.9031, "step": 3945 }, { "epoch": 0.7039514762287039, "grad_norm": 0.46443673968315125, "learning_rate": 0.00036209079402885577, "loss": 0.8799, "step": 3946 }, { "epoch": 0.7041298724467041, "grad_norm": 0.48729583621025085, "learning_rate": 0.00036202816435509233, "loss": 0.8665, "step": 3947 }, { "epoch": 0.7043082686647043, "grad_norm": 0.48398932814598083, "learning_rate": 0.00036196552588268506, "loss": 0.8229, "step": 3948 }, { "epoch": 0.7044866648827045, "grad_norm": 0.47495603561401367, "learning_rate": 0.0003619028786165536, "loss": 0.8878, "step": 3949 }, { "epoch": 0.7046650611007047, "grad_norm": 0.5229374766349792, "learning_rate": 0.0003618402225616182, "loss": 0.9401, "step": 3950 }, { "epoch": 0.7048434573187048, "grad_norm": 0.4787358343601227, "learning_rate": 0.00036177755772279983, "loss": 0.9042, "step": 3951 }, { "epoch": 0.705021853536705, "grad_norm": 0.4975121021270752, "learning_rate": 0.00036171488410502016, "loss": 1.0987, "step": 3952 }, { "epoch": 0.7052002497547052, "grad_norm": 0.44728225469589233, "learning_rate": 0.00036165220171320166, "loss": 0.774, "step": 3953 }, { "epoch": 0.7053786459727054, "grad_norm": 0.5024580955505371, "learning_rate": 0.0003615895105522672, "loss": 0.9369, "step": 3954 }, { "epoch": 0.7055570421907056, "grad_norm": 0.482075035572052, "learning_rate": 0.00036152681062714064, "loss": 0.9221, "step": 3955 }, { "epoch": 0.7057354384087058, "grad_norm": 0.4962591230869293, "learning_rate": 0.0003614641019427464, "loss": 0.8499, "step": 3956 }, { "epoch": 0.7059138346267059, "grad_norm": 0.5466794967651367, "learning_rate": 0.0003614013845040095, "loss": 0.8993, "step": 3957 }, { "epoch": 0.7060922308447061, "grad_norm": 0.5112261176109314, "learning_rate": 0.00036133865831585577, "loss": 0.8757, "step": 3958 }, { "epoch": 0.7062706270627063, "grad_norm": 0.4203137159347534, "learning_rate": 0.0003612759233832118, "loss": 0.7496, "step": 3959 }, { "epoch": 0.7064490232807065, "grad_norm": 0.4720518887042999, "learning_rate": 0.00036121317971100464, "loss": 0.9851, "step": 3960 }, { "epoch": 0.7066274194987067, "grad_norm": 0.43325722217559814, "learning_rate": 0.0003611504273041623, "loss": 0.6659, "step": 3961 }, { "epoch": 0.7068058157167068, "grad_norm": 0.44432467222213745, "learning_rate": 0.0003610876661676131, "loss": 0.8509, "step": 3962 }, { "epoch": 0.706984211934707, "grad_norm": 0.4786689281463623, "learning_rate": 0.0003610248963062865, "loss": 0.9216, "step": 3963 }, { "epoch": 0.7071626081527072, "grad_norm": 0.4814835488796234, "learning_rate": 0.00036096211772511226, "loss": 1.0138, "step": 3964 }, { "epoch": 0.7073410043707073, "grad_norm": 0.4865817725658417, "learning_rate": 0.0003608993304290211, "loss": 1.0312, "step": 3965 }, { "epoch": 0.7075194005887075, "grad_norm": 0.49532395601272583, "learning_rate": 0.00036083653442294417, "loss": 0.7395, "step": 3966 }, { "epoch": 0.7076977968067077, "grad_norm": 0.4562307298183441, "learning_rate": 0.0003607737297118136, "loss": 0.7416, "step": 3967 }, { "epoch": 0.7078761930247078, "grad_norm": 0.5441547632217407, "learning_rate": 0.00036071091630056204, "loss": 1.2372, "step": 3968 }, { "epoch": 0.708054589242708, "grad_norm": 0.4169851839542389, "learning_rate": 0.00036064809419412264, "loss": 0.8411, "step": 3969 }, { "epoch": 0.7082329854607082, "grad_norm": 0.48741570115089417, "learning_rate": 0.0003605852633974296, "loss": 1.1458, "step": 3970 }, { "epoch": 0.7084113816787084, "grad_norm": 0.47009482979774475, "learning_rate": 0.0003605224239154175, "loss": 0.8117, "step": 3971 }, { "epoch": 0.7085897778967086, "grad_norm": 0.4695878028869629, "learning_rate": 0.00036045957575302174, "loss": 0.8465, "step": 3972 }, { "epoch": 0.7087681741147087, "grad_norm": 0.46119317412376404, "learning_rate": 0.0003603967189151785, "loss": 0.7996, "step": 3973 }, { "epoch": 0.7089465703327089, "grad_norm": 0.46736615896224976, "learning_rate": 0.0003603338534068245, "loss": 0.8513, "step": 3974 }, { "epoch": 0.7091249665507091, "grad_norm": 0.588634729385376, "learning_rate": 0.00036027097923289707, "loss": 1.0962, "step": 3975 }, { "epoch": 0.7093033627687093, "grad_norm": 0.4701951742172241, "learning_rate": 0.00036020809639833446, "loss": 0.8302, "step": 3976 }, { "epoch": 0.7094817589867095, "grad_norm": 0.4576779305934906, "learning_rate": 0.00036014520490807535, "loss": 0.8504, "step": 3977 }, { "epoch": 0.7096601552047097, "grad_norm": 0.5647838711738586, "learning_rate": 0.00036008230476705915, "loss": 0.8446, "step": 3978 }, { "epoch": 0.7098385514227098, "grad_norm": 0.48183631896972656, "learning_rate": 0.00036001939598022625, "loss": 0.9849, "step": 3979 }, { "epoch": 0.71001694764071, "grad_norm": 0.4493800699710846, "learning_rate": 0.00035995647855251726, "loss": 0.8262, "step": 3980 }, { "epoch": 0.7101953438587102, "grad_norm": 0.5250908136367798, "learning_rate": 0.00035989355248887384, "loss": 0.7036, "step": 3981 }, { "epoch": 0.7103737400767104, "grad_norm": 0.4472370743751526, "learning_rate": 0.000359830617794238, "loss": 0.8584, "step": 3982 }, { "epoch": 0.7105521362947106, "grad_norm": 0.48600542545318604, "learning_rate": 0.00035976767447355273, "loss": 0.7812, "step": 3983 }, { "epoch": 0.7107305325127107, "grad_norm": 0.49188825488090515, "learning_rate": 0.00035970472253176155, "loss": 0.8518, "step": 3984 }, { "epoch": 0.7109089287307109, "grad_norm": 0.482270210981369, "learning_rate": 0.0003596417619738087, "loss": 0.767, "step": 3985 }, { "epoch": 0.7110873249487111, "grad_norm": 0.460519403219223, "learning_rate": 0.000359578792804639, "loss": 0.9311, "step": 3986 }, { "epoch": 0.7112657211667113, "grad_norm": 0.5443057417869568, "learning_rate": 0.00035951581502919813, "loss": 0.8974, "step": 3987 }, { "epoch": 0.7114441173847115, "grad_norm": 0.44756433367729187, "learning_rate": 0.0003594528286524322, "loss": 0.8031, "step": 3988 }, { "epoch": 0.7116225136027117, "grad_norm": 0.7050688862800598, "learning_rate": 0.0003593898336792883, "loss": 1.1276, "step": 3989 }, { "epoch": 0.7118009098207118, "grad_norm": 0.5873562097549438, "learning_rate": 0.0003593268301147139, "loss": 1.1023, "step": 3990 }, { "epoch": 0.711979306038712, "grad_norm": 0.4501679241657257, "learning_rate": 0.0003592638179636573, "loss": 0.8911, "step": 3991 }, { "epoch": 0.7121577022567122, "grad_norm": 0.48664426803588867, "learning_rate": 0.0003592007972310674, "loss": 0.8171, "step": 3992 }, { "epoch": 0.7123360984747124, "grad_norm": 0.45436549186706543, "learning_rate": 0.000359137767921894, "loss": 0.7589, "step": 3993 }, { "epoch": 0.7125144946927126, "grad_norm": 0.5350733995437622, "learning_rate": 0.0003590747300410871, "loss": 1.1801, "step": 3994 }, { "epoch": 0.7126928909107126, "grad_norm": 0.49254482984542847, "learning_rate": 0.00035901168359359797, "loss": 0.9704, "step": 3995 }, { "epoch": 0.7128712871287128, "grad_norm": 0.4413221776485443, "learning_rate": 0.0003589486285843781, "loss": 0.9475, "step": 3996 }, { "epoch": 0.713049683346713, "grad_norm": 0.4686570465564728, "learning_rate": 0.0003588855650183798, "loss": 0.8909, "step": 3997 }, { "epoch": 0.7132280795647132, "grad_norm": 0.5192868113517761, "learning_rate": 0.0003588224929005561, "loss": 0.9769, "step": 3998 }, { "epoch": 0.7134064757827134, "grad_norm": 0.4699283242225647, "learning_rate": 0.0003587594122358607, "loss": 0.8673, "step": 3999 }, { "epoch": 0.7135848720007136, "grad_norm": 0.44109442830085754, "learning_rate": 0.00035869632302924776, "loss": 0.8594, "step": 4000 }, { "epoch": 0.7137632682187137, "grad_norm": 0.44390279054641724, "learning_rate": 0.00035863322528567246, "loss": 0.8449, "step": 4001 }, { "epoch": 0.7139416644367139, "grad_norm": 0.5160883069038391, "learning_rate": 0.00035857011901009036, "loss": 0.9412, "step": 4002 }, { "epoch": 0.7141200606547141, "grad_norm": 0.49634233117103577, "learning_rate": 0.00035850700420745783, "loss": 1.0827, "step": 4003 }, { "epoch": 0.7142984568727143, "grad_norm": 0.45757240056991577, "learning_rate": 0.0003584438808827319, "loss": 0.8453, "step": 4004 }, { "epoch": 0.7144768530907145, "grad_norm": 2.338979959487915, "learning_rate": 0.0003583807490408702, "loss": 1.1377, "step": 4005 }, { "epoch": 0.7146552493087146, "grad_norm": 0.46473217010498047, "learning_rate": 0.00035831760868683117, "loss": 0.9048, "step": 4006 }, { "epoch": 0.7148336455267148, "grad_norm": 0.47779056429862976, "learning_rate": 0.0003582544598255737, "loss": 1.0637, "step": 4007 }, { "epoch": 0.715012041744715, "grad_norm": 0.4706021845340729, "learning_rate": 0.0003581913024620577, "loss": 0.7752, "step": 4008 }, { "epoch": 0.7151904379627152, "grad_norm": 0.4808463752269745, "learning_rate": 0.0003581281366012431, "loss": 0.8006, "step": 4009 }, { "epoch": 0.7153688341807154, "grad_norm": 0.5095382928848267, "learning_rate": 0.0003580649622480914, "loss": 0.9802, "step": 4010 }, { "epoch": 0.7155472303987156, "grad_norm": 0.5496415495872498, "learning_rate": 0.000358001779407564, "loss": 1.2067, "step": 4011 }, { "epoch": 0.7157256266167157, "grad_norm": 0.47136303782463074, "learning_rate": 0.0003579385880846232, "loss": 0.8652, "step": 4012 }, { "epoch": 0.7159040228347159, "grad_norm": 0.7725960612297058, "learning_rate": 0.00035787538828423225, "loss": 0.7905, "step": 4013 }, { "epoch": 0.7160824190527161, "grad_norm": 0.5106984376907349, "learning_rate": 0.0003578121800113548, "loss": 0.8218, "step": 4014 }, { "epoch": 0.7162608152707163, "grad_norm": 0.4280400276184082, "learning_rate": 0.0003577489632709551, "loss": 0.7157, "step": 4015 }, { "epoch": 0.7164392114887165, "grad_norm": 0.439709335565567, "learning_rate": 0.0003576857380679981, "loss": 0.9857, "step": 4016 }, { "epoch": 0.7166176077067166, "grad_norm": 0.4796721041202545, "learning_rate": 0.0003576225044074496, "loss": 0.7725, "step": 4017 }, { "epoch": 0.7167960039247168, "grad_norm": 0.5130892992019653, "learning_rate": 0.00035755926229427595, "loss": 1.048, "step": 4018 }, { "epoch": 0.716974400142717, "grad_norm": 0.6783162951469421, "learning_rate": 0.0003574960117334441, "loss": 0.9083, "step": 4019 }, { "epoch": 0.7171527963607172, "grad_norm": 0.551443874835968, "learning_rate": 0.00035743275272992177, "loss": 0.9846, "step": 4020 }, { "epoch": 0.7173311925787174, "grad_norm": 0.5131356716156006, "learning_rate": 0.0003573694852886773, "loss": 0.8985, "step": 4021 }, { "epoch": 0.7175095887967176, "grad_norm": 0.5424959659576416, "learning_rate": 0.0003573062094146796, "loss": 1.0965, "step": 4022 }, { "epoch": 0.7176879850147176, "grad_norm": 0.5547580122947693, "learning_rate": 0.0003572429251128984, "loss": 0.9774, "step": 4023 }, { "epoch": 0.7178663812327178, "grad_norm": 0.4463725686073303, "learning_rate": 0.000357179632388304, "loss": 0.7453, "step": 4024 }, { "epoch": 0.718044777450718, "grad_norm": 0.4756935238838196, "learning_rate": 0.0003571163312458674, "loss": 0.8413, "step": 4025 }, { "epoch": 0.7182231736687182, "grad_norm": 0.47553977370262146, "learning_rate": 0.0003570530216905603, "loss": 0.7793, "step": 4026 }, { "epoch": 0.7184015698867184, "grad_norm": 0.48220735788345337, "learning_rate": 0.000356989703727355, "loss": 0.858, "step": 4027 }, { "epoch": 0.7185799661047185, "grad_norm": 1.1826428174972534, "learning_rate": 0.0003569263773612242, "loss": 1.007, "step": 4028 }, { "epoch": 0.7187583623227187, "grad_norm": 0.5101688504219055, "learning_rate": 0.0003568630425971419, "loss": 1.0152, "step": 4029 }, { "epoch": 0.7189367585407189, "grad_norm": 0.6145039200782776, "learning_rate": 0.00035679969944008217, "loss": 0.8944, "step": 4030 }, { "epoch": 0.7191151547587191, "grad_norm": 0.4975384771823883, "learning_rate": 0.00035673634789502, "loss": 0.9689, "step": 4031 }, { "epoch": 0.7192935509767193, "grad_norm": 0.5056141018867493, "learning_rate": 0.00035667298796693097, "loss": 0.8644, "step": 4032 }, { "epoch": 0.7194719471947195, "grad_norm": 0.51641845703125, "learning_rate": 0.00035660961966079146, "loss": 1.1757, "step": 4033 }, { "epoch": 0.7196503434127196, "grad_norm": 0.6307793259620667, "learning_rate": 0.00035654624298157823, "loss": 0.9336, "step": 4034 }, { "epoch": 0.7198287396307198, "grad_norm": 0.5330437421798706, "learning_rate": 0.0003564828579342689, "loss": 0.9126, "step": 4035 }, { "epoch": 0.72000713584872, "grad_norm": 0.4748086631298065, "learning_rate": 0.00035641946452384183, "loss": 0.9391, "step": 4036 }, { "epoch": 0.7201855320667202, "grad_norm": 0.4811098873615265, "learning_rate": 0.00035635606275527575, "loss": 0.9427, "step": 4037 }, { "epoch": 0.7203639282847204, "grad_norm": 0.551956832408905, "learning_rate": 0.00035629265263355025, "loss": 1.0388, "step": 4038 }, { "epoch": 0.7205423245027205, "grad_norm": 0.5036234855651855, "learning_rate": 0.0003562292341636456, "loss": 0.9119, "step": 4039 }, { "epoch": 0.7207207207207207, "grad_norm": 0.5007266402244568, "learning_rate": 0.0003561658073505426, "loss": 1.0323, "step": 4040 }, { "epoch": 0.7208991169387209, "grad_norm": 0.5283923745155334, "learning_rate": 0.0003561023721992228, "loss": 0.8999, "step": 4041 }, { "epoch": 0.7210775131567211, "grad_norm": 0.885420024394989, "learning_rate": 0.0003560389287146683, "loss": 0.9517, "step": 4042 }, { "epoch": 0.7212559093747213, "grad_norm": 0.4583801329135895, "learning_rate": 0.000355975476901862, "loss": 0.8263, "step": 4043 }, { "epoch": 0.7214343055927215, "grad_norm": 0.4987337291240692, "learning_rate": 0.00035591201676578733, "loss": 0.8681, "step": 4044 }, { "epoch": 0.7216127018107216, "grad_norm": 0.5699477195739746, "learning_rate": 0.00035584854831142846, "loss": 0.9642, "step": 4045 }, { "epoch": 0.7217910980287218, "grad_norm": 0.5087426900863647, "learning_rate": 0.00035578507154377016, "loss": 0.9291, "step": 4046 }, { "epoch": 0.721969494246722, "grad_norm": 0.520293653011322, "learning_rate": 0.00035572158646779786, "loss": 0.988, "step": 4047 }, { "epoch": 0.7221478904647222, "grad_norm": 0.511569082736969, "learning_rate": 0.0003556580930884976, "loss": 0.9026, "step": 4048 }, { "epoch": 0.7223262866827224, "grad_norm": 0.4560140371322632, "learning_rate": 0.0003555945914108562, "loss": 0.8434, "step": 4049 }, { "epoch": 0.7225046829007225, "grad_norm": 0.49847978353500366, "learning_rate": 0.00035553108143986106, "loss": 1.0567, "step": 4050 }, { "epoch": 0.7226830791187226, "grad_norm": 0.4683671295642853, "learning_rate": 0.0003554675631805002, "loss": 0.7456, "step": 4051 }, { "epoch": 0.7228614753367228, "grad_norm": 0.5200768113136292, "learning_rate": 0.0003554040366377623, "loss": 0.9804, "step": 4052 }, { "epoch": 0.723039871554723, "grad_norm": 0.5331013202667236, "learning_rate": 0.0003553405018166367, "loss": 1.0793, "step": 4053 }, { "epoch": 0.7232182677727232, "grad_norm": 0.48200780153274536, "learning_rate": 0.00035527695872211354, "loss": 0.9059, "step": 4054 }, { "epoch": 0.7233966639907234, "grad_norm": 0.5711750388145447, "learning_rate": 0.00035521340735918317, "loss": 1.0259, "step": 4055 }, { "epoch": 0.7235750602087235, "grad_norm": 0.5371613502502441, "learning_rate": 0.00035514984773283713, "loss": 0.9537, "step": 4056 }, { "epoch": 0.7237534564267237, "grad_norm": 0.47757935523986816, "learning_rate": 0.0003550862798480673, "loss": 0.9782, "step": 4057 }, { "epoch": 0.7239318526447239, "grad_norm": 0.5152607560157776, "learning_rate": 0.0003550227037098663, "loss": 0.9834, "step": 4058 }, { "epoch": 0.7241102488627241, "grad_norm": 0.44710201025009155, "learning_rate": 0.0003549591193232273, "loss": 0.9038, "step": 4059 }, { "epoch": 0.7242886450807243, "grad_norm": 0.4328174591064453, "learning_rate": 0.00035489552669314427, "loss": 0.5936, "step": 4060 }, { "epoch": 0.7244670412987244, "grad_norm": 0.5450094938278198, "learning_rate": 0.00035483192582461175, "loss": 0.9184, "step": 4061 }, { "epoch": 0.7246454375167246, "grad_norm": 0.4884642958641052, "learning_rate": 0.0003547683167226249, "loss": 0.982, "step": 4062 }, { "epoch": 0.7248238337347248, "grad_norm": 0.4127320647239685, "learning_rate": 0.00035470469939217944, "loss": 0.6299, "step": 4063 }, { "epoch": 0.725002229952725, "grad_norm": 0.5126365423202515, "learning_rate": 0.0003546410738382719, "loss": 0.7333, "step": 4064 }, { "epoch": 0.7251806261707252, "grad_norm": 1.068241000175476, "learning_rate": 0.0003545774400658996, "loss": 0.7118, "step": 4065 }, { "epoch": 0.7253590223887254, "grad_norm": 0.5835636258125305, "learning_rate": 0.00035451379808006014, "loss": 0.9209, "step": 4066 }, { "epoch": 0.7255374186067255, "grad_norm": 0.4724158048629761, "learning_rate": 0.0003544501478857519, "loss": 0.7558, "step": 4067 }, { "epoch": 0.7257158148247257, "grad_norm": 0.536562442779541, "learning_rate": 0.000354386489487974, "loss": 0.9178, "step": 4068 }, { "epoch": 0.7258942110427259, "grad_norm": 1.3931955099105835, "learning_rate": 0.0003543228228917262, "loss": 1.0701, "step": 4069 }, { "epoch": 0.7260726072607261, "grad_norm": 0.6029762029647827, "learning_rate": 0.0003542591481020087, "loss": 0.7614, "step": 4070 }, { "epoch": 0.7262510034787263, "grad_norm": 2.0984432697296143, "learning_rate": 0.0003541954651238226, "loss": 0.9829, "step": 4071 }, { "epoch": 0.7264293996967264, "grad_norm": 1.1561270952224731, "learning_rate": 0.0003541317739621695, "loss": 1.1042, "step": 4072 }, { "epoch": 0.7266077959147266, "grad_norm": 0.9806638360023499, "learning_rate": 0.0003540680746220518, "loss": 0.7303, "step": 4073 }, { "epoch": 0.7267861921327268, "grad_norm": 0.5632558465003967, "learning_rate": 0.0003540043671084722, "loss": 1.0854, "step": 4074 }, { "epoch": 0.726964588350727, "grad_norm": 0.5446335077285767, "learning_rate": 0.00035394065142643435, "loss": 1.0717, "step": 4075 }, { "epoch": 0.7271429845687272, "grad_norm": 1.9541873931884766, "learning_rate": 0.0003538769275809425, "loss": 0.9602, "step": 4076 }, { "epoch": 0.7273213807867274, "grad_norm": 0.5483476519584656, "learning_rate": 0.00035381319557700145, "loss": 1.0379, "step": 4077 }, { "epoch": 0.7274997770047275, "grad_norm": 0.5389299392700195, "learning_rate": 0.00035374945541961656, "loss": 0.9395, "step": 4078 }, { "epoch": 0.7276781732227277, "grad_norm": 0.5572075247764587, "learning_rate": 0.00035368570711379423, "loss": 1.0089, "step": 4079 }, { "epoch": 0.7278565694407279, "grad_norm": 0.4937756359577179, "learning_rate": 0.00035362195066454116, "loss": 0.9257, "step": 4080 }, { "epoch": 0.728034965658728, "grad_norm": 0.5375781059265137, "learning_rate": 0.00035355818607686455, "loss": 0.9355, "step": 4081 }, { "epoch": 0.7282133618767282, "grad_norm": 0.4889019727706909, "learning_rate": 0.0003534944133557726, "loss": 0.8256, "step": 4082 }, { "epoch": 0.7283917580947283, "grad_norm": 0.5278054475784302, "learning_rate": 0.000353430632506274, "loss": 1.029, "step": 4083 }, { "epoch": 0.7285701543127285, "grad_norm": 0.5437285900115967, "learning_rate": 0.000353366843533378, "loss": 1.1467, "step": 4084 }, { "epoch": 0.7287485505307287, "grad_norm": 0.5061957240104675, "learning_rate": 0.00035330304644209454, "loss": 1.0266, "step": 4085 }, { "epoch": 0.7289269467487289, "grad_norm": 0.5521642565727234, "learning_rate": 0.00035323924123743436, "loss": 0.9845, "step": 4086 }, { "epoch": 0.7291053429667291, "grad_norm": 0.5419567823410034, "learning_rate": 0.00035317542792440853, "loss": 0.7985, "step": 4087 }, { "epoch": 0.7292837391847293, "grad_norm": 0.510177731513977, "learning_rate": 0.00035311160650802905, "loss": 0.9469, "step": 4088 }, { "epoch": 0.7294621354027294, "grad_norm": 0.5522062182426453, "learning_rate": 0.0003530477769933083, "loss": 0.9675, "step": 4089 }, { "epoch": 0.7296405316207296, "grad_norm": 0.5909835696220398, "learning_rate": 0.00035298393938525954, "loss": 1.0044, "step": 4090 }, { "epoch": 0.7298189278387298, "grad_norm": 0.5589690804481506, "learning_rate": 0.0003529200936888965, "loss": 1.0368, "step": 4091 }, { "epoch": 0.72999732405673, "grad_norm": 0.5542118549346924, "learning_rate": 0.00035285623990923356, "loss": 0.7456, "step": 4092 }, { "epoch": 0.7301757202747302, "grad_norm": 2.88665509223938, "learning_rate": 0.00035279237805128585, "loss": 0.7459, "step": 4093 }, { "epoch": 0.7303541164927303, "grad_norm": 0.5373327732086182, "learning_rate": 0.000352728508120069, "loss": 1.2578, "step": 4094 }, { "epoch": 0.7305325127107305, "grad_norm": 0.48414483666419983, "learning_rate": 0.00035266463012059924, "loss": 0.7918, "step": 4095 }, { "epoch": 0.7307109089287307, "grad_norm": 0.45521920919418335, "learning_rate": 0.00035260074405789365, "loss": 0.8527, "step": 4096 }, { "epoch": 0.7308893051467309, "grad_norm": 0.44525134563446045, "learning_rate": 0.00035253684993696984, "loss": 0.7479, "step": 4097 }, { "epoch": 0.7310677013647311, "grad_norm": 0.4846455454826355, "learning_rate": 0.0003524729477628459, "loss": 0.7777, "step": 4098 }, { "epoch": 0.7312460975827313, "grad_norm": 0.5616981983184814, "learning_rate": 0.00035240903754054075, "loss": 1.0553, "step": 4099 }, { "epoch": 0.7314244938007314, "grad_norm": 0.5334004163742065, "learning_rate": 0.00035234511927507387, "loss": 0.8418, "step": 4100 }, { "epoch": 0.7316028900187316, "grad_norm": 0.5169308185577393, "learning_rate": 0.00035228119297146533, "loss": 0.7405, "step": 4101 }, { "epoch": 0.7317812862367318, "grad_norm": 0.7203896045684814, "learning_rate": 0.00035221725863473596, "loss": 0.8365, "step": 4102 }, { "epoch": 0.731959682454732, "grad_norm": 0.4874943196773529, "learning_rate": 0.0003521533162699071, "loss": 0.8711, "step": 4103 }, { "epoch": 0.7321380786727322, "grad_norm": 0.5255458950996399, "learning_rate": 0.0003520893658820007, "loss": 1.1175, "step": 4104 }, { "epoch": 0.7323164748907324, "grad_norm": 0.4852457642555237, "learning_rate": 0.0003520254074760394, "loss": 0.9222, "step": 4105 }, { "epoch": 0.7324948711087325, "grad_norm": 0.49769327044487, "learning_rate": 0.00035196144105704654, "loss": 0.8612, "step": 4106 }, { "epoch": 0.7326732673267327, "grad_norm": 0.528090238571167, "learning_rate": 0.00035189746663004607, "loss": 0.8552, "step": 4107 }, { "epoch": 0.7328516635447329, "grad_norm": 0.5445149540901184, "learning_rate": 0.00035183348420006233, "loss": 0.9377, "step": 4108 }, { "epoch": 0.733030059762733, "grad_norm": 0.49806612730026245, "learning_rate": 0.00035176949377212045, "loss": 0.8447, "step": 4109 }, { "epoch": 0.7332084559807333, "grad_norm": 0.5145043134689331, "learning_rate": 0.00035170549535124647, "loss": 0.8682, "step": 4110 }, { "epoch": 0.7333868521987333, "grad_norm": 0.45539185404777527, "learning_rate": 0.0003516414889424666, "loss": 0.7736, "step": 4111 }, { "epoch": 0.7335652484167335, "grad_norm": 0.5117284059524536, "learning_rate": 0.00035157747455080794, "loss": 0.8903, "step": 4112 }, { "epoch": 0.7337436446347337, "grad_norm": 0.5072416663169861, "learning_rate": 0.0003515134521812983, "loss": 0.8481, "step": 4113 }, { "epoch": 0.7339220408527339, "grad_norm": 0.46144771575927734, "learning_rate": 0.0003514494218389656, "loss": 0.7497, "step": 4114 }, { "epoch": 0.7341004370707341, "grad_norm": 0.4636537432670593, "learning_rate": 0.000351385383528839, "loss": 0.9987, "step": 4115 }, { "epoch": 0.7342788332887343, "grad_norm": 1.1357630491256714, "learning_rate": 0.00035132133725594803, "loss": 1.2211, "step": 4116 }, { "epoch": 0.7344572295067344, "grad_norm": 0.48777201771736145, "learning_rate": 0.0003512572830253228, "loss": 0.9007, "step": 4117 }, { "epoch": 0.7346356257247346, "grad_norm": 0.5612165331840515, "learning_rate": 0.0003511932208419942, "loss": 0.9013, "step": 4118 }, { "epoch": 0.7348140219427348, "grad_norm": 0.6152064204216003, "learning_rate": 0.00035112915071099354, "loss": 1.0285, "step": 4119 }, { "epoch": 0.734992418160735, "grad_norm": 0.6013907790184021, "learning_rate": 0.000351065072637353, "loss": 0.8736, "step": 4120 }, { "epoch": 0.7351708143787352, "grad_norm": 0.5271583199501038, "learning_rate": 0.000351000986626105, "loss": 1.1314, "step": 4121 }, { "epoch": 0.7353492105967353, "grad_norm": 0.583275556564331, "learning_rate": 0.00035093689268228306, "loss": 0.8167, "step": 4122 }, { "epoch": 0.7355276068147355, "grad_norm": 0.49837273359298706, "learning_rate": 0.000350872790810921, "loss": 0.9489, "step": 4123 }, { "epoch": 0.7357060030327357, "grad_norm": 0.4844464957714081, "learning_rate": 0.0003508086810170533, "loss": 0.9135, "step": 4124 }, { "epoch": 0.7358843992507359, "grad_norm": 0.5695939660072327, "learning_rate": 0.00035074456330571517, "loss": 0.9493, "step": 4125 }, { "epoch": 0.7360627954687361, "grad_norm": 0.5401460528373718, "learning_rate": 0.0003506804376819425, "loss": 0.9202, "step": 4126 }, { "epoch": 0.7362411916867363, "grad_norm": 0.4903864860534668, "learning_rate": 0.0003506163041507715, "loss": 1.0455, "step": 4127 }, { "epoch": 0.7364195879047364, "grad_norm": 0.7425463795661926, "learning_rate": 0.00035055216271723933, "loss": 0.9451, "step": 4128 }, { "epoch": 0.7365979841227366, "grad_norm": 0.5137443542480469, "learning_rate": 0.0003504880133863835, "loss": 1.0321, "step": 4129 }, { "epoch": 0.7367763803407368, "grad_norm": 0.5113863348960876, "learning_rate": 0.0003504238561632424, "loss": 1.0488, "step": 4130 }, { "epoch": 0.736954776558737, "grad_norm": 0.469612717628479, "learning_rate": 0.0003503596910528548, "loss": 0.897, "step": 4131 }, { "epoch": 0.7371331727767372, "grad_norm": 0.5417755246162415, "learning_rate": 0.00035029551806026025, "loss": 0.9615, "step": 4132 }, { "epoch": 0.7373115689947373, "grad_norm": 2.0405690670013428, "learning_rate": 0.00035023133719049894, "loss": 0.8717, "step": 4133 }, { "epoch": 0.7374899652127375, "grad_norm": 0.4939228594303131, "learning_rate": 0.00035016714844861155, "loss": 0.7748, "step": 4134 }, { "epoch": 0.7376683614307377, "grad_norm": 0.5170210003852844, "learning_rate": 0.00035010295183963936, "loss": 0.9805, "step": 4135 }, { "epoch": 0.7378467576487379, "grad_norm": 0.5120583772659302, "learning_rate": 0.0003500387473686244, "loss": 0.7109, "step": 4136 }, { "epoch": 0.7380251538667381, "grad_norm": 0.5130133628845215, "learning_rate": 0.0003499745350406093, "loss": 0.9658, "step": 4137 }, { "epoch": 0.7382035500847383, "grad_norm": 0.4344574511051178, "learning_rate": 0.0003499103148606372, "loss": 0.783, "step": 4138 }, { "epoch": 0.7383819463027383, "grad_norm": 0.5518962144851685, "learning_rate": 0.00034984608683375206, "loss": 1.0517, "step": 4139 }, { "epoch": 0.7385603425207385, "grad_norm": 0.46893310546875, "learning_rate": 0.00034978185096499814, "loss": 1.0466, "step": 4140 }, { "epoch": 0.7387387387387387, "grad_norm": 0.566422164440155, "learning_rate": 0.00034971760725942063, "loss": 1.1278, "step": 4141 }, { "epoch": 0.7389171349567389, "grad_norm": 0.47913649678230286, "learning_rate": 0.00034965335572206515, "loss": 0.965, "step": 4142 }, { "epoch": 0.7390955311747391, "grad_norm": 0.4865665137767792, "learning_rate": 0.000349589096357978, "loss": 0.832, "step": 4143 }, { "epoch": 0.7392739273927392, "grad_norm": 0.7697198987007141, "learning_rate": 0.0003495248291722061, "loss": 0.9949, "step": 4144 }, { "epoch": 0.7394523236107394, "grad_norm": 0.49303874373435974, "learning_rate": 0.00034946055416979686, "loss": 1.1002, "step": 4145 }, { "epoch": 0.7396307198287396, "grad_norm": 0.4551609456539154, "learning_rate": 0.00034939627135579854, "loss": 0.826, "step": 4146 }, { "epoch": 0.7398091160467398, "grad_norm": 0.5011882781982422, "learning_rate": 0.00034933198073525986, "loss": 0.9986, "step": 4147 }, { "epoch": 0.73998751226474, "grad_norm": 0.4582299590110779, "learning_rate": 0.0003492676823132301, "loss": 0.8219, "step": 4148 }, { "epoch": 0.7401659084827402, "grad_norm": 0.4924689531326294, "learning_rate": 0.00034920337609475936, "loss": 1.0327, "step": 4149 }, { "epoch": 0.7403443047007403, "grad_norm": 0.5348846912384033, "learning_rate": 0.00034913906208489814, "loss": 1.0617, "step": 4150 }, { "epoch": 0.7405227009187405, "grad_norm": 0.4941270649433136, "learning_rate": 0.0003490747402886977, "loss": 0.8433, "step": 4151 }, { "epoch": 0.7407010971367407, "grad_norm": 0.5572370290756226, "learning_rate": 0.0003490104107112097, "loss": 0.9576, "step": 4152 }, { "epoch": 0.7408794933547409, "grad_norm": 0.5385303497314453, "learning_rate": 0.00034894607335748674, "loss": 0.9523, "step": 4153 }, { "epoch": 0.7410578895727411, "grad_norm": 0.5019118189811707, "learning_rate": 0.00034888172823258165, "loss": 0.6984, "step": 4154 }, { "epoch": 0.7412362857907412, "grad_norm": 0.4633517861366272, "learning_rate": 0.0003488173753415482, "loss": 0.8653, "step": 4155 }, { "epoch": 0.7414146820087414, "grad_norm": 0.5317572355270386, "learning_rate": 0.0003487530146894407, "loss": 1.1626, "step": 4156 }, { "epoch": 0.7415930782267416, "grad_norm": 0.45128071308135986, "learning_rate": 0.0003486886462813138, "loss": 0.8697, "step": 4157 }, { "epoch": 0.7417714744447418, "grad_norm": 0.4344140589237213, "learning_rate": 0.0003486242701222232, "loss": 0.7773, "step": 4158 }, { "epoch": 0.741949870662742, "grad_norm": 0.443740576505661, "learning_rate": 0.0003485598862172248, "loss": 0.8047, "step": 4159 }, { "epoch": 0.7421282668807422, "grad_norm": 0.4513658583164215, "learning_rate": 0.00034849549457137543, "loss": 0.8605, "step": 4160 }, { "epoch": 0.7423066630987423, "grad_norm": 0.4627962112426758, "learning_rate": 0.00034843109518973225, "loss": 0.768, "step": 4161 }, { "epoch": 0.7424850593167425, "grad_norm": 0.5590542554855347, "learning_rate": 0.00034836668807735314, "loss": 0.9791, "step": 4162 }, { "epoch": 0.7426634555347427, "grad_norm": 0.45700347423553467, "learning_rate": 0.00034830227323929674, "loss": 0.8522, "step": 4163 }, { "epoch": 0.7428418517527429, "grad_norm": 2.536508798599243, "learning_rate": 0.00034823785068062213, "loss": 0.9045, "step": 4164 }, { "epoch": 0.7430202479707431, "grad_norm": 0.5341298580169678, "learning_rate": 0.00034817342040638897, "loss": 0.9195, "step": 4165 }, { "epoch": 0.7431986441887432, "grad_norm": 0.45887595415115356, "learning_rate": 0.00034810898242165766, "loss": 0.8731, "step": 4166 }, { "epoch": 0.7433770404067434, "grad_norm": 0.4361880421638489, "learning_rate": 0.00034804453673148905, "loss": 0.8234, "step": 4167 }, { "epoch": 0.7435554366247435, "grad_norm": 0.4275573790073395, "learning_rate": 0.0003479800833409448, "loss": 0.7071, "step": 4168 }, { "epoch": 0.7437338328427437, "grad_norm": 0.5099348425865173, "learning_rate": 0.0003479156222550869, "loss": 0.8876, "step": 4169 }, { "epoch": 0.743912229060744, "grad_norm": 0.4776555895805359, "learning_rate": 0.00034785115347897805, "loss": 0.7928, "step": 4170 }, { "epoch": 0.7440906252787441, "grad_norm": 63.1339225769043, "learning_rate": 0.00034778667701768187, "loss": 1.9367, "step": 4171 }, { "epoch": 0.7442690214967442, "grad_norm": 0.5373305678367615, "learning_rate": 0.00034772219287626207, "loss": 0.9005, "step": 4172 }, { "epoch": 0.7444474177147444, "grad_norm": 0.5590469837188721, "learning_rate": 0.0003476577010597834, "loss": 0.8572, "step": 4173 }, { "epoch": 0.7446258139327446, "grad_norm": 0.4844629466533661, "learning_rate": 0.0003475932015733109, "loss": 0.9525, "step": 4174 }, { "epoch": 0.7448042101507448, "grad_norm": 0.41167715191841125, "learning_rate": 0.00034752869442191027, "loss": 0.8344, "step": 4175 }, { "epoch": 0.744982606368745, "grad_norm": 0.5152734518051147, "learning_rate": 0.00034746417961064793, "loss": 1.0936, "step": 4176 }, { "epoch": 0.7451610025867451, "grad_norm": 0.48306646943092346, "learning_rate": 0.0003473996571445909, "loss": 0.7804, "step": 4177 }, { "epoch": 0.7453393988047453, "grad_norm": 0.5196247100830078, "learning_rate": 0.0003473351270288067, "loss": 0.9043, "step": 4178 }, { "epoch": 0.7455177950227455, "grad_norm": 0.5509787201881409, "learning_rate": 0.0003472705892683636, "loss": 0.8701, "step": 4179 }, { "epoch": 0.7456961912407457, "grad_norm": 0.7097774744033813, "learning_rate": 0.0003472060438683302, "loss": 0.7195, "step": 4180 }, { "epoch": 0.7458745874587459, "grad_norm": 0.49308526515960693, "learning_rate": 0.00034714149083377594, "loss": 1.0288, "step": 4181 }, { "epoch": 0.7460529836767461, "grad_norm": 0.6980340480804443, "learning_rate": 0.00034707693016977083, "loss": 0.858, "step": 4182 }, { "epoch": 0.7462313798947462, "grad_norm": 0.4881076216697693, "learning_rate": 0.0003470123618813854, "loss": 0.9786, "step": 4183 }, { "epoch": 0.7464097761127464, "grad_norm": 0.5367513298988342, "learning_rate": 0.00034694778597369076, "loss": 0.8505, "step": 4184 }, { "epoch": 0.7465881723307466, "grad_norm": 0.47334301471710205, "learning_rate": 0.00034688320245175873, "loss": 0.8669, "step": 4185 }, { "epoch": 0.7467665685487468, "grad_norm": 0.6059718728065491, "learning_rate": 0.0003468186113206617, "loss": 1.1616, "step": 4186 }, { "epoch": 0.746944964766747, "grad_norm": 0.5258612036705017, "learning_rate": 0.00034675401258547266, "loss": 0.8902, "step": 4187 }, { "epoch": 0.7471233609847471, "grad_norm": 0.5637246370315552, "learning_rate": 0.00034668940625126506, "loss": 1.2349, "step": 4188 }, { "epoch": 0.7473017572027473, "grad_norm": 78.5971450805664, "learning_rate": 0.00034662479232311306, "loss": 1.0211, "step": 4189 }, { "epoch": 0.7474801534207475, "grad_norm": 0.6106373071670532, "learning_rate": 0.00034656017080609154, "loss": 1.0691, "step": 4190 }, { "epoch": 0.7476585496387477, "grad_norm": 0.5662621259689331, "learning_rate": 0.0003464955417052757, "loss": 0.9236, "step": 4191 }, { "epoch": 0.7478369458567479, "grad_norm": 0.5042510032653809, "learning_rate": 0.0003464309050257415, "loss": 0.8522, "step": 4192 }, { "epoch": 0.7480153420747481, "grad_norm": 0.5739179849624634, "learning_rate": 0.0003463662607725656, "loss": 0.9128, "step": 4193 }, { "epoch": 0.7481937382927482, "grad_norm": 0.4624471962451935, "learning_rate": 0.000346301608950825, "loss": 0.7043, "step": 4194 }, { "epoch": 0.7483721345107484, "grad_norm": 0.5266431570053101, "learning_rate": 0.00034623694956559747, "loss": 0.7484, "step": 4195 }, { "epoch": 0.7485505307287486, "grad_norm": 0.5885065197944641, "learning_rate": 0.0003461722826219614, "loss": 1.179, "step": 4196 }, { "epoch": 0.7487289269467488, "grad_norm": 0.5153014659881592, "learning_rate": 0.0003461076081249956, "loss": 0.7813, "step": 4197 }, { "epoch": 0.748907323164749, "grad_norm": 0.5379844307899475, "learning_rate": 0.0003460429260797796, "loss": 0.7575, "step": 4198 }, { "epoch": 0.749085719382749, "grad_norm": 0.502864420413971, "learning_rate": 0.00034597823649139346, "loss": 0.995, "step": 4199 }, { "epoch": 0.7492641156007492, "grad_norm": 0.4639131724834442, "learning_rate": 0.00034591353936491806, "loss": 0.8029, "step": 4200 }, { "epoch": 0.7494425118187494, "grad_norm": 0.5059810876846313, "learning_rate": 0.0003458488347054345, "loss": 0.8779, "step": 4201 }, { "epoch": 0.7496209080367496, "grad_norm": 0.4426543712615967, "learning_rate": 0.00034578412251802466, "loss": 0.7159, "step": 4202 }, { "epoch": 0.7497993042547498, "grad_norm": 0.571412205696106, "learning_rate": 0.0003457194028077711, "loss": 0.9628, "step": 4203 }, { "epoch": 0.74997770047275, "grad_norm": 0.5668926239013672, "learning_rate": 0.00034565467557975683, "loss": 1.0421, "step": 4204 }, { "epoch": 0.7501560966907501, "grad_norm": 0.48620158433914185, "learning_rate": 0.0003455899408390655, "loss": 0.8639, "step": 4205 }, { "epoch": 0.7503344929087503, "grad_norm": 0.5328919887542725, "learning_rate": 0.0003455251985907814, "loss": 0.9498, "step": 4206 }, { "epoch": 0.7505128891267505, "grad_norm": 0.5057373642921448, "learning_rate": 0.0003454604488399893, "loss": 0.9308, "step": 4207 }, { "epoch": 0.7506912853447507, "grad_norm": 0.43592992424964905, "learning_rate": 0.0003453956915917745, "loss": 0.8826, "step": 4208 }, { "epoch": 0.7508696815627509, "grad_norm": 0.46315765380859375, "learning_rate": 0.00034533092685122324, "loss": 0.9092, "step": 4209 }, { "epoch": 0.751048077780751, "grad_norm": 0.48972928524017334, "learning_rate": 0.000345266154623422, "loss": 0.9986, "step": 4210 }, { "epoch": 0.7512264739987512, "grad_norm": 0.49682357907295227, "learning_rate": 0.0003452013749134579, "loss": 0.9315, "step": 4211 }, { "epoch": 0.7514048702167514, "grad_norm": 0.5060596466064453, "learning_rate": 0.00034513658772641887, "loss": 1.0385, "step": 4212 }, { "epoch": 0.7515832664347516, "grad_norm": 0.652193546295166, "learning_rate": 0.00034507179306739324, "loss": 0.9741, "step": 4213 }, { "epoch": 0.7517616626527518, "grad_norm": 0.5387786030769348, "learning_rate": 0.0003450069909414698, "loss": 0.8605, "step": 4214 }, { "epoch": 0.751940058870752, "grad_norm": 0.5394373536109924, "learning_rate": 0.00034494218135373817, "loss": 0.8344, "step": 4215 }, { "epoch": 0.7521184550887521, "grad_norm": 0.501558780670166, "learning_rate": 0.00034487736430928846, "loss": 1.0367, "step": 4216 }, { "epoch": 0.7522968513067523, "grad_norm": 0.49226245284080505, "learning_rate": 0.00034481253981321144, "loss": 0.8282, "step": 4217 }, { "epoch": 0.7524752475247525, "grad_norm": 0.5037868022918701, "learning_rate": 0.0003447477078705983, "loss": 1.0139, "step": 4218 }, { "epoch": 0.7526536437427527, "grad_norm": 0.4862633943557739, "learning_rate": 0.00034468286848654106, "loss": 0.8863, "step": 4219 }, { "epoch": 0.7528320399607529, "grad_norm": 0.4658646881580353, "learning_rate": 0.000344618021666132, "loss": 0.8069, "step": 4220 }, { "epoch": 0.753010436178753, "grad_norm": 0.44514206051826477, "learning_rate": 0.0003445531674144642, "loss": 1.0605, "step": 4221 }, { "epoch": 0.7531888323967532, "grad_norm": 0.5069167613983154, "learning_rate": 0.0003444883057366314, "loss": 0.8131, "step": 4222 }, { "epoch": 0.7533672286147534, "grad_norm": 0.48138898611068726, "learning_rate": 0.00034442343663772755, "loss": 0.8838, "step": 4223 }, { "epoch": 0.7535456248327536, "grad_norm": 0.46832725405693054, "learning_rate": 0.0003443585601228478, "loss": 1.0179, "step": 4224 }, { "epoch": 0.7537240210507538, "grad_norm": 0.5185703635215759, "learning_rate": 0.00034429367619708733, "loss": 0.8979, "step": 4225 }, { "epoch": 0.753902417268754, "grad_norm": 0.5143477916717529, "learning_rate": 0.0003442287848655421, "loss": 0.8142, "step": 4226 }, { "epoch": 0.754080813486754, "grad_norm": 0.5070206522941589, "learning_rate": 0.00034416388613330864, "loss": 1.0616, "step": 4227 }, { "epoch": 0.7542592097047542, "grad_norm": 0.5132788419723511, "learning_rate": 0.00034409898000548403, "loss": 1.0537, "step": 4228 }, { "epoch": 0.7544376059227544, "grad_norm": 0.4766051173210144, "learning_rate": 0.00034403406648716604, "loss": 0.9183, "step": 4229 }, { "epoch": 0.7546160021407546, "grad_norm": 0.6825383305549622, "learning_rate": 0.00034396914558345297, "loss": 1.0334, "step": 4230 }, { "epoch": 0.7547943983587548, "grad_norm": 0.4257564842700958, "learning_rate": 0.0003439042172994436, "loss": 0.75, "step": 4231 }, { "epoch": 0.7549727945767549, "grad_norm": 0.5030829906463623, "learning_rate": 0.0003438392816402375, "loss": 1.0223, "step": 4232 }, { "epoch": 0.7551511907947551, "grad_norm": 0.4708796441555023, "learning_rate": 0.00034377433861093457, "loss": 0.7668, "step": 4233 }, { "epoch": 0.7553295870127553, "grad_norm": 0.49385902285575867, "learning_rate": 0.0003437093882166354, "loss": 0.7773, "step": 4234 }, { "epoch": 0.7555079832307555, "grad_norm": 0.5581505298614502, "learning_rate": 0.00034364443046244124, "loss": 0.9124, "step": 4235 }, { "epoch": 0.7556863794487557, "grad_norm": 0.4790385067462921, "learning_rate": 0.0003435794653534538, "loss": 0.8304, "step": 4236 }, { "epoch": 0.7558647756667559, "grad_norm": 0.43401768803596497, "learning_rate": 0.00034351449289477545, "loss": 0.77, "step": 4237 }, { "epoch": 0.756043171884756, "grad_norm": 0.5333930253982544, "learning_rate": 0.000343449513091509, "loss": 0.8807, "step": 4238 }, { "epoch": 0.7562215681027562, "grad_norm": 0.4971325993537903, "learning_rate": 0.0003433845259487581, "loss": 0.8195, "step": 4239 }, { "epoch": 0.7563999643207564, "grad_norm": 0.4987589716911316, "learning_rate": 0.00034331953147162666, "loss": 0.9902, "step": 4240 }, { "epoch": 0.7565783605387566, "grad_norm": 0.45375093817710876, "learning_rate": 0.0003432545296652194, "loss": 0.9725, "step": 4241 }, { "epoch": 0.7567567567567568, "grad_norm": 1.4552748203277588, "learning_rate": 0.00034318952053464147, "loss": 0.8451, "step": 4242 }, { "epoch": 0.7569351529747569, "grad_norm": 0.44463515281677246, "learning_rate": 0.0003431245040849987, "loss": 0.7589, "step": 4243 }, { "epoch": 0.7571135491927571, "grad_norm": 0.5243865251541138, "learning_rate": 0.00034305948032139745, "loss": 0.776, "step": 4244 }, { "epoch": 0.7572919454107573, "grad_norm": 14.643073081970215, "learning_rate": 0.00034299444924894474, "loss": 0.9523, "step": 4245 }, { "epoch": 0.7574703416287575, "grad_norm": 0.5148363709449768, "learning_rate": 0.00034292941087274794, "loss": 0.8977, "step": 4246 }, { "epoch": 0.7576487378467577, "grad_norm": 0.5748273134231567, "learning_rate": 0.0003428643651979152, "loss": 0.9339, "step": 4247 }, { "epoch": 0.7578271340647579, "grad_norm": 0.5037097930908203, "learning_rate": 0.00034279931222955517, "loss": 0.8882, "step": 4248 }, { "epoch": 0.758005530282758, "grad_norm": 0.5515426397323608, "learning_rate": 0.00034273425197277715, "loss": 1.113, "step": 4249 }, { "epoch": 0.7581839265007582, "grad_norm": 0.49536243081092834, "learning_rate": 0.00034266918443269083, "loss": 0.7182, "step": 4250 }, { "epoch": 0.7583623227187584, "grad_norm": 0.49721240997314453, "learning_rate": 0.0003426041096144067, "loss": 1.0869, "step": 4251 }, { "epoch": 0.7585407189367586, "grad_norm": 0.48414304852485657, "learning_rate": 0.0003425390275230356, "loss": 0.8055, "step": 4252 }, { "epoch": 0.7587191151547588, "grad_norm": 0.48049938678741455, "learning_rate": 0.00034247393816368914, "loss": 0.7842, "step": 4253 }, { "epoch": 0.7588975113727588, "grad_norm": 0.5838775634765625, "learning_rate": 0.00034240884154147934, "loss": 0.9039, "step": 4254 }, { "epoch": 0.759075907590759, "grad_norm": 0.5820823907852173, "learning_rate": 0.0003423437376615189, "loss": 1.0101, "step": 4255 }, { "epoch": 0.7592543038087592, "grad_norm": 0.6763784289360046, "learning_rate": 0.00034227862652892103, "loss": 1.065, "step": 4256 }, { "epoch": 0.7594327000267594, "grad_norm": 0.5086767077445984, "learning_rate": 0.0003422135081487996, "loss": 1.1122, "step": 4257 }, { "epoch": 0.7596110962447596, "grad_norm": 0.4516467750072479, "learning_rate": 0.0003421483825262688, "loss": 0.785, "step": 4258 }, { "epoch": 0.7597894924627598, "grad_norm": 0.6000534892082214, "learning_rate": 0.0003420832496664439, "loss": 1.013, "step": 4259 }, { "epoch": 0.7599678886807599, "grad_norm": 0.4857349395751953, "learning_rate": 0.00034201810957444, "loss": 1.0669, "step": 4260 }, { "epoch": 0.7601462848987601, "grad_norm": 0.5024265646934509, "learning_rate": 0.0003419529622553735, "loss": 0.9337, "step": 4261 }, { "epoch": 0.7603246811167603, "grad_norm": 0.4981759190559387, "learning_rate": 0.0003418878077143608, "loss": 1.0209, "step": 4262 }, { "epoch": 0.7605030773347605, "grad_norm": 0.4379788637161255, "learning_rate": 0.00034182264595651927, "loss": 0.668, "step": 4263 }, { "epoch": 0.7606814735527607, "grad_norm": 0.5320781469345093, "learning_rate": 0.0003417574769869666, "loss": 0.8506, "step": 4264 }, { "epoch": 0.7608598697707608, "grad_norm": 0.5261650085449219, "learning_rate": 0.0003416923008108213, "loss": 1.0954, "step": 4265 }, { "epoch": 0.761038265988761, "grad_norm": 0.47511598467826843, "learning_rate": 0.00034162711743320205, "loss": 0.9106, "step": 4266 }, { "epoch": 0.7612166622067612, "grad_norm": 0.4774247109889984, "learning_rate": 0.00034156192685922846, "loss": 0.7263, "step": 4267 }, { "epoch": 0.7613950584247614, "grad_norm": 0.46322304010391235, "learning_rate": 0.00034149672909402056, "loss": 0.7982, "step": 4268 }, { "epoch": 0.7615734546427616, "grad_norm": 0.48991408944129944, "learning_rate": 0.00034143152414269887, "loss": 0.8453, "step": 4269 }, { "epoch": 0.7617518508607618, "grad_norm": 0.47439679503440857, "learning_rate": 0.00034136631201038466, "loss": 0.8049, "step": 4270 }, { "epoch": 0.7619302470787619, "grad_norm": 0.4984125792980194, "learning_rate": 0.0003413010927021996, "loss": 0.8084, "step": 4271 }, { "epoch": 0.7621086432967621, "grad_norm": 0.547827959060669, "learning_rate": 0.0003412358662232661, "loss": 0.9195, "step": 4272 }, { "epoch": 0.7622870395147623, "grad_norm": 0.4370548725128174, "learning_rate": 0.0003411706325787068, "loss": 0.7599, "step": 4273 }, { "epoch": 0.7624654357327625, "grad_norm": 0.47955775260925293, "learning_rate": 0.00034110539177364534, "loss": 0.8049, "step": 4274 }, { "epoch": 0.7626438319507627, "grad_norm": 0.5160841941833496, "learning_rate": 0.00034104014381320557, "loss": 0.8973, "step": 4275 }, { "epoch": 0.7628222281687628, "grad_norm": 0.506690502166748, "learning_rate": 0.0003409748887025121, "loss": 1.0149, "step": 4276 }, { "epoch": 0.763000624386763, "grad_norm": 0.5552113652229309, "learning_rate": 0.00034090962644669, "loss": 1.028, "step": 4277 }, { "epoch": 0.7631790206047632, "grad_norm": 0.45480722188949585, "learning_rate": 0.000340844357050865, "loss": 0.7717, "step": 4278 }, { "epoch": 0.7633574168227634, "grad_norm": 0.5435570478439331, "learning_rate": 0.0003407790805201633, "loss": 1.2671, "step": 4279 }, { "epoch": 0.7635358130407636, "grad_norm": 0.43579912185668945, "learning_rate": 0.0003407137968597116, "loss": 0.8684, "step": 4280 }, { "epoch": 0.7637142092587638, "grad_norm": 0.4706798791885376, "learning_rate": 0.00034064850607463736, "loss": 0.9247, "step": 4281 }, { "epoch": 0.7638926054767639, "grad_norm": 0.4617820084095001, "learning_rate": 0.0003405832081700685, "loss": 0.8273, "step": 4282 }, { "epoch": 0.764071001694764, "grad_norm": 0.49580156803131104, "learning_rate": 0.0003405179031511334, "loss": 0.9895, "step": 4283 }, { "epoch": 0.7642493979127643, "grad_norm": 0.4556731879711151, "learning_rate": 0.00034045259102296124, "loss": 0.8607, "step": 4284 }, { "epoch": 0.7644277941307644, "grad_norm": 0.461270272731781, "learning_rate": 0.0003403872717906814, "loss": 0.8178, "step": 4285 }, { "epoch": 0.7646061903487646, "grad_norm": 0.4924364686012268, "learning_rate": 0.00034032194545942417, "loss": 0.8912, "step": 4286 }, { "epoch": 0.7647845865667647, "grad_norm": 0.5016777515411377, "learning_rate": 0.00034025661203432024, "loss": 1.1841, "step": 4287 }, { "epoch": 0.7649629827847649, "grad_norm": 0.48156607151031494, "learning_rate": 0.0003401912715205008, "loss": 0.7984, "step": 4288 }, { "epoch": 0.7651413790027651, "grad_norm": 0.4831927716732025, "learning_rate": 0.00034012592392309774, "loss": 0.8091, "step": 4289 }, { "epoch": 0.7653197752207653, "grad_norm": 0.48947733640670776, "learning_rate": 0.0003400605692472433, "loss": 0.8708, "step": 4290 }, { "epoch": 0.7654981714387655, "grad_norm": 0.5063187479972839, "learning_rate": 0.0003399952074980706, "loss": 0.8559, "step": 4291 }, { "epoch": 0.7656765676567657, "grad_norm": 0.5040058493614197, "learning_rate": 0.00033992983868071303, "loss": 0.8416, "step": 4292 }, { "epoch": 0.7658549638747658, "grad_norm": 0.5163808465003967, "learning_rate": 0.0003398644628003046, "loss": 0.8919, "step": 4293 }, { "epoch": 0.766033360092766, "grad_norm": 0.5070242881774902, "learning_rate": 0.00033979907986197993, "loss": 0.8687, "step": 4294 }, { "epoch": 0.7662117563107662, "grad_norm": 0.5390428900718689, "learning_rate": 0.00033973368987087423, "loss": 0.9808, "step": 4295 }, { "epoch": 0.7663901525287664, "grad_norm": 0.4794482886791229, "learning_rate": 0.0003396682928321231, "loss": 0.9719, "step": 4296 }, { "epoch": 0.7665685487467666, "grad_norm": 0.482950896024704, "learning_rate": 0.0003396028887508628, "loss": 0.7459, "step": 4297 }, { "epoch": 0.7667469449647667, "grad_norm": 0.5277696251869202, "learning_rate": 0.00033953747763223026, "loss": 1.1243, "step": 4298 }, { "epoch": 0.7669253411827669, "grad_norm": 0.4380790889263153, "learning_rate": 0.0003394720594813627, "loss": 0.8427, "step": 4299 }, { "epoch": 0.7671037374007671, "grad_norm": 0.5023663640022278, "learning_rate": 0.0003394066343033981, "loss": 0.8703, "step": 4300 }, { "epoch": 0.7672821336187673, "grad_norm": 0.48462873697280884, "learning_rate": 0.00033934120210347496, "loss": 0.8443, "step": 4301 }, { "epoch": 0.7674605298367675, "grad_norm": 1.0551936626434326, "learning_rate": 0.0003392757628867322, "loss": 0.7749, "step": 4302 }, { "epoch": 0.7676389260547677, "grad_norm": 0.6180291771888733, "learning_rate": 0.0003392103166583095, "loss": 1.1561, "step": 4303 }, { "epoch": 0.7678173222727678, "grad_norm": 0.5031440854072571, "learning_rate": 0.0003391448634233468, "loss": 0.8814, "step": 4304 }, { "epoch": 0.767995718490768, "grad_norm": 0.4589061737060547, "learning_rate": 0.00033907940318698504, "loss": 0.9982, "step": 4305 }, { "epoch": 0.7681741147087682, "grad_norm": 0.46637728810310364, "learning_rate": 0.00033901393595436527, "loss": 0.8032, "step": 4306 }, { "epoch": 0.7683525109267684, "grad_norm": 0.5185346007347107, "learning_rate": 0.00033894846173062915, "loss": 1.0561, "step": 4307 }, { "epoch": 0.7685309071447686, "grad_norm": 0.43067285418510437, "learning_rate": 0.00033888298052091916, "loss": 0.7942, "step": 4308 }, { "epoch": 0.7687093033627687, "grad_norm": 0.6915571689605713, "learning_rate": 0.00033881749233037817, "loss": 1.0712, "step": 4309 }, { "epoch": 0.7688876995807689, "grad_norm": 0.4629500210285187, "learning_rate": 0.0003387519971641495, "loss": 0.7828, "step": 4310 }, { "epoch": 0.7690660957987691, "grad_norm": 0.65446937084198, "learning_rate": 0.00033868649502737726, "loss": 0.8419, "step": 4311 }, { "epoch": 0.7692444920167693, "grad_norm": 0.4821755588054657, "learning_rate": 0.0003386209859252058, "loss": 0.9422, "step": 4312 }, { "epoch": 0.7694228882347695, "grad_norm": 3.9539215564727783, "learning_rate": 0.0003385554698627803, "loss": 0.7106, "step": 4313 }, { "epoch": 0.7696012844527697, "grad_norm": 0.5639221668243408, "learning_rate": 0.00033848994684524623, "loss": 0.8801, "step": 4314 }, { "epoch": 0.7697796806707697, "grad_norm": 0.4910728931427002, "learning_rate": 0.0003384244168777498, "loss": 0.8116, "step": 4315 }, { "epoch": 0.7699580768887699, "grad_norm": 0.5648394823074341, "learning_rate": 0.0003383588799654378, "loss": 0.9331, "step": 4316 }, { "epoch": 0.7701364731067701, "grad_norm": 0.5196200609207153, "learning_rate": 0.00033829333611345736, "loss": 0.8242, "step": 4317 }, { "epoch": 0.7703148693247703, "grad_norm": 0.4403199553489685, "learning_rate": 0.0003382277853269564, "loss": 0.8163, "step": 4318 }, { "epoch": 0.7704932655427705, "grad_norm": 0.7118186950683594, "learning_rate": 0.000338162227611083, "loss": 0.8899, "step": 4319 }, { "epoch": 0.7706716617607706, "grad_norm": 0.8601351976394653, "learning_rate": 0.00033809666297098624, "loss": 1.0249, "step": 4320 }, { "epoch": 0.7708500579787708, "grad_norm": 0.8977810144424438, "learning_rate": 0.0003380310914118155, "loss": 0.771, "step": 4321 }, { "epoch": 0.771028454196771, "grad_norm": 0.5474297404289246, "learning_rate": 0.0003379655129387207, "loss": 0.9783, "step": 4322 }, { "epoch": 0.7712068504147712, "grad_norm": 6.160559177398682, "learning_rate": 0.0003378999275568523, "loss": 1.1071, "step": 4323 }, { "epoch": 0.7713852466327714, "grad_norm": 0.6195570230484009, "learning_rate": 0.0003378343352713614, "loss": 0.8238, "step": 4324 }, { "epoch": 0.7715636428507716, "grad_norm": 0.5602040886878967, "learning_rate": 0.00033776873608739976, "loss": 0.7919, "step": 4325 }, { "epoch": 0.7717420390687717, "grad_norm": 0.5972548127174377, "learning_rate": 0.00033770313001011933, "loss": 0.9286, "step": 4326 }, { "epoch": 0.7719204352867719, "grad_norm": 0.5935573577880859, "learning_rate": 0.0003376375170446727, "loss": 0.9524, "step": 4327 }, { "epoch": 0.7720988315047721, "grad_norm": 0.7750356197357178, "learning_rate": 0.00033757189719621326, "loss": 0.8514, "step": 4328 }, { "epoch": 0.7722772277227723, "grad_norm": 0.5310060381889343, "learning_rate": 0.00033750627046989475, "loss": 0.8609, "step": 4329 }, { "epoch": 0.7724556239407725, "grad_norm": 0.5345288515090942, "learning_rate": 0.00033744063687087136, "loss": 0.9829, "step": 4330 }, { "epoch": 0.7726340201587726, "grad_norm": 0.50868159532547, "learning_rate": 0.000337374996404298, "loss": 0.9928, "step": 4331 }, { "epoch": 0.7728124163767728, "grad_norm": 0.46974533796310425, "learning_rate": 0.00033730934907532994, "loss": 0.832, "step": 4332 }, { "epoch": 0.772990812594773, "grad_norm": 0.4263937771320343, "learning_rate": 0.0003372436948891233, "loss": 0.7312, "step": 4333 }, { "epoch": 0.7731692088127732, "grad_norm": 0.5079303979873657, "learning_rate": 0.0003371780338508343, "loss": 0.9124, "step": 4334 }, { "epoch": 0.7733476050307734, "grad_norm": 0.5083035826683044, "learning_rate": 0.00033711236596562004, "loss": 0.8987, "step": 4335 }, { "epoch": 0.7735260012487736, "grad_norm": 0.4942117929458618, "learning_rate": 0.00033704669123863813, "loss": 1.0008, "step": 4336 }, { "epoch": 0.7737043974667737, "grad_norm": 0.5014617443084717, "learning_rate": 0.00033698100967504655, "loss": 0.6739, "step": 4337 }, { "epoch": 0.7738827936847739, "grad_norm": 0.4389418065547943, "learning_rate": 0.0003369153212800038, "loss": 0.6625, "step": 4338 }, { "epoch": 0.7740611899027741, "grad_norm": 0.4890193045139313, "learning_rate": 0.0003368496260586692, "loss": 0.7249, "step": 4339 }, { "epoch": 0.7742395861207743, "grad_norm": 0.49702176451683044, "learning_rate": 0.00033678392401620226, "loss": 0.8034, "step": 4340 }, { "epoch": 0.7744179823387745, "grad_norm": 0.5406886339187622, "learning_rate": 0.00033671821515776336, "loss": 0.9704, "step": 4341 }, { "epoch": 0.7745963785567745, "grad_norm": 0.460330992937088, "learning_rate": 0.00033665249948851316, "loss": 0.6877, "step": 4342 }, { "epoch": 0.7747747747747747, "grad_norm": 0.4634692370891571, "learning_rate": 0.0003365867770136129, "loss": 0.7982, "step": 4343 }, { "epoch": 0.7749531709927749, "grad_norm": 0.5319910049438477, "learning_rate": 0.00033652104773822445, "loss": 1.0181, "step": 4344 }, { "epoch": 0.7751315672107751, "grad_norm": 0.48047569394111633, "learning_rate": 0.00033645531166751015, "loss": 0.9439, "step": 4345 }, { "epoch": 0.7753099634287753, "grad_norm": 0.4725799858570099, "learning_rate": 0.00033638956880663285, "loss": 0.8295, "step": 4346 }, { "epoch": 0.7754883596467755, "grad_norm": 0.4927878975868225, "learning_rate": 0.000336323819160756, "loss": 0.8699, "step": 4347 }, { "epoch": 0.7756667558647756, "grad_norm": 0.5209059715270996, "learning_rate": 0.00033625806273504354, "loss": 0.8208, "step": 4348 }, { "epoch": 0.7758451520827758, "grad_norm": 0.4795133173465729, "learning_rate": 0.00033619229953465996, "loss": 0.8273, "step": 4349 }, { "epoch": 0.776023548300776, "grad_norm": 0.5821531414985657, "learning_rate": 0.0003361265295647703, "loss": 0.8388, "step": 4350 }, { "epoch": 0.7762019445187762, "grad_norm": 0.5282803177833557, "learning_rate": 0.00033606075283054005, "loss": 0.6619, "step": 4351 }, { "epoch": 0.7763803407367764, "grad_norm": 0.6072514653205872, "learning_rate": 0.00033599496933713535, "loss": 1.0466, "step": 4352 }, { "epoch": 0.7765587369547765, "grad_norm": 0.4529389441013336, "learning_rate": 0.0003359291790897227, "loss": 0.8325, "step": 4353 }, { "epoch": 0.7767371331727767, "grad_norm": 0.47834986448287964, "learning_rate": 0.0003358633820934692, "loss": 0.9251, "step": 4354 }, { "epoch": 0.7769155293907769, "grad_norm": 0.5303228497505188, "learning_rate": 0.0003357975783535428, "loss": 1.1421, "step": 4355 }, { "epoch": 0.7770939256087771, "grad_norm": 0.5310399532318115, "learning_rate": 0.00033573176787511145, "loss": 0.8054, "step": 4356 }, { "epoch": 0.7772723218267773, "grad_norm": 0.46372997760772705, "learning_rate": 0.0003356659506633439, "loss": 1.0177, "step": 4357 }, { "epoch": 0.7774507180447775, "grad_norm": 0.45827627182006836, "learning_rate": 0.00033560012672340957, "loss": 0.7673, "step": 4358 }, { "epoch": 0.7776291142627776, "grad_norm": 0.5642030835151672, "learning_rate": 0.000335534296060478, "loss": 0.8029, "step": 4359 }, { "epoch": 0.7778075104807778, "grad_norm": 0.5288789868354797, "learning_rate": 0.00033546845867971976, "loss": 1.0097, "step": 4360 }, { "epoch": 0.777985906698778, "grad_norm": 0.5099871158599854, "learning_rate": 0.0003354026145863054, "loss": 0.8843, "step": 4361 }, { "epoch": 0.7781643029167782, "grad_norm": 0.4645063579082489, "learning_rate": 0.0003353367637854065, "loss": 0.8116, "step": 4362 }, { "epoch": 0.7783426991347784, "grad_norm": 0.48817193508148193, "learning_rate": 0.00033527090628219494, "loss": 0.7076, "step": 4363 }, { "epoch": 0.7785210953527785, "grad_norm": 0.4992578625679016, "learning_rate": 0.00033520504208184304, "loss": 0.9631, "step": 4364 }, { "epoch": 0.7786994915707787, "grad_norm": 0.4915018081665039, "learning_rate": 0.00033513917118952385, "loss": 0.8473, "step": 4365 }, { "epoch": 0.7788778877887789, "grad_norm": 1.4712707996368408, "learning_rate": 0.0003350732936104108, "loss": 1.0, "step": 4366 }, { "epoch": 0.7790562840067791, "grad_norm": 0.5193944573402405, "learning_rate": 0.0003350074093496778, "loss": 1.0957, "step": 4367 }, { "epoch": 0.7792346802247793, "grad_norm": 0.4992116689682007, "learning_rate": 0.0003349415184124995, "loss": 0.9401, "step": 4368 }, { "epoch": 0.7794130764427795, "grad_norm": 0.512789785861969, "learning_rate": 0.00033487562080405085, "loss": 0.915, "step": 4369 }, { "epoch": 0.7795914726607795, "grad_norm": 1.0886067152023315, "learning_rate": 0.00033480971652950753, "loss": 0.9546, "step": 4370 }, { "epoch": 0.7797698688787797, "grad_norm": 0.49638882279396057, "learning_rate": 0.0003347438055940456, "loss": 0.6824, "step": 4371 }, { "epoch": 0.77994826509678, "grad_norm": 0.5366002917289734, "learning_rate": 0.0003346778880028416, "loss": 1.0622, "step": 4372 }, { "epoch": 0.7801266613147801, "grad_norm": 0.45976123213768005, "learning_rate": 0.00033461196376107275, "loss": 0.7995, "step": 4373 }, { "epoch": 0.7803050575327803, "grad_norm": 0.48608720302581787, "learning_rate": 0.00033454603287391666, "loss": 1.0287, "step": 4374 }, { "epoch": 0.7804834537507804, "grad_norm": 3.942063570022583, "learning_rate": 0.0003344800953465515, "loss": 1.1003, "step": 4375 }, { "epoch": 0.7806618499687806, "grad_norm": 0.4614802598953247, "learning_rate": 0.000334414151184156, "loss": 0.8365, "step": 4376 }, { "epoch": 0.7808402461867808, "grad_norm": 0.4670039117336273, "learning_rate": 0.00033434820039190944, "loss": 0.9256, "step": 4377 }, { "epoch": 0.781018642404781, "grad_norm": 0.5666428208351135, "learning_rate": 0.0003342822429749915, "loss": 0.8964, "step": 4378 }, { "epoch": 0.7811970386227812, "grad_norm": 0.5543097257614136, "learning_rate": 0.00033421627893858244, "loss": 0.9927, "step": 4379 }, { "epoch": 0.7813754348407814, "grad_norm": 0.5561456680297852, "learning_rate": 0.0003341503082878631, "loss": 0.9066, "step": 4380 }, { "epoch": 0.7815538310587815, "grad_norm": 0.5354291796684265, "learning_rate": 0.00033408433102801474, "loss": 1.0043, "step": 4381 }, { "epoch": 0.7817322272767817, "grad_norm": 0.45063936710357666, "learning_rate": 0.00033401834716421925, "loss": 0.6419, "step": 4382 }, { "epoch": 0.7819106234947819, "grad_norm": 0.5044664144515991, "learning_rate": 0.0003339523567016589, "loss": 0.9381, "step": 4383 }, { "epoch": 0.7820890197127821, "grad_norm": 0.4677816331386566, "learning_rate": 0.0003338863596455166, "loss": 0.879, "step": 4384 }, { "epoch": 0.7822674159307823, "grad_norm": 0.4861041009426117, "learning_rate": 0.00033382035600097563, "loss": 0.8487, "step": 4385 }, { "epoch": 0.7824458121487824, "grad_norm": 0.5526744723320007, "learning_rate": 0.0003337543457732201, "loss": 0.9411, "step": 4386 }, { "epoch": 0.7826242083667826, "grad_norm": 4.370226860046387, "learning_rate": 0.0003336883289674342, "loss": 0.9109, "step": 4387 }, { "epoch": 0.7828026045847828, "grad_norm": 0.4913937449455261, "learning_rate": 0.00033362230558880296, "loss": 0.8254, "step": 4388 }, { "epoch": 0.782981000802783, "grad_norm": 0.5903784036636353, "learning_rate": 0.00033355627564251184, "loss": 1.2674, "step": 4389 }, { "epoch": 0.7831593970207832, "grad_norm": 0.48795902729034424, "learning_rate": 0.0003334902391337468, "loss": 0.7577, "step": 4390 }, { "epoch": 0.7833377932387834, "grad_norm": 0.4892320930957794, "learning_rate": 0.00033342419606769433, "loss": 0.9373, "step": 4391 }, { "epoch": 0.7835161894567835, "grad_norm": 0.4674488604068756, "learning_rate": 0.00033335814644954137, "loss": 0.7285, "step": 4392 }, { "epoch": 0.7836945856747837, "grad_norm": 0.4890602231025696, "learning_rate": 0.00033329209028447543, "loss": 0.9521, "step": 4393 }, { "epoch": 0.7838729818927839, "grad_norm": 0.4913036823272705, "learning_rate": 0.0003332260275776846, "loss": 0.8138, "step": 4394 }, { "epoch": 0.7840513781107841, "grad_norm": 0.4752870500087738, "learning_rate": 0.0003331599583343574, "loss": 1.065, "step": 4395 }, { "epoch": 0.7842297743287843, "grad_norm": 0.542586624622345, "learning_rate": 0.0003330938825596828, "loss": 0.9805, "step": 4396 }, { "epoch": 0.7844081705467844, "grad_norm": 0.4948160946369171, "learning_rate": 0.0003330278002588505, "loss": 0.7871, "step": 4397 }, { "epoch": 0.7845865667647846, "grad_norm": 0.5883384943008423, "learning_rate": 0.0003329617114370505, "loss": 1.1591, "step": 4398 }, { "epoch": 0.7847649629827848, "grad_norm": 0.484783798456192, "learning_rate": 0.00033289561609947335, "loss": 1.0079, "step": 4399 }, { "epoch": 0.784943359200785, "grad_norm": 0.5016215443611145, "learning_rate": 0.00033282951425131014, "loss": 0.7401, "step": 4400 }, { "epoch": 0.7851217554187851, "grad_norm": 0.45084336400032043, "learning_rate": 0.00033276340589775255, "loss": 0.7707, "step": 4401 }, { "epoch": 0.7853001516367853, "grad_norm": 0.4217044413089752, "learning_rate": 0.00033269729104399263, "loss": 0.614, "step": 4402 }, { "epoch": 0.7854785478547854, "grad_norm": 0.5516197085380554, "learning_rate": 0.00033263116969522316, "loss": 0.9417, "step": 4403 }, { "epoch": 0.7856569440727856, "grad_norm": 0.4416463375091553, "learning_rate": 0.00033256504185663713, "loss": 0.7047, "step": 4404 }, { "epoch": 0.7858353402907858, "grad_norm": 2.543560743331909, "learning_rate": 0.00033249890753342826, "loss": 0.981, "step": 4405 }, { "epoch": 0.786013736508786, "grad_norm": 0.44152694940567017, "learning_rate": 0.0003324327667307907, "loss": 0.8247, "step": 4406 }, { "epoch": 0.7861921327267862, "grad_norm": 0.7885439395904541, "learning_rate": 0.00033236661945391905, "loss": 0.9313, "step": 4407 }, { "epoch": 0.7863705289447863, "grad_norm": 0.6830782294273376, "learning_rate": 0.00033230046570800867, "loss": 0.713, "step": 4408 }, { "epoch": 0.7865489251627865, "grad_norm": 0.5507636666297913, "learning_rate": 0.000332234305498255, "loss": 1.0907, "step": 4409 }, { "epoch": 0.7867273213807867, "grad_norm": 0.4962003231048584, "learning_rate": 0.00033216813882985444, "loss": 0.8278, "step": 4410 }, { "epoch": 0.7869057175987869, "grad_norm": 0.5796182155609131, "learning_rate": 0.00033210196570800365, "loss": 0.881, "step": 4411 }, { "epoch": 0.7870841138167871, "grad_norm": 0.48952221870422363, "learning_rate": 0.00033203578613789973, "loss": 0.9298, "step": 4412 }, { "epoch": 0.7872625100347873, "grad_norm": 0.49012526869773865, "learning_rate": 0.00033196960012474053, "loss": 0.8109, "step": 4413 }, { "epoch": 0.7874409062527874, "grad_norm": 0.49443262815475464, "learning_rate": 0.0003319034076737242, "loss": 0.9402, "step": 4414 }, { "epoch": 0.7876193024707876, "grad_norm": 0.4793679714202881, "learning_rate": 0.00033183720879004935, "loss": 0.7636, "step": 4415 }, { "epoch": 0.7877976986887878, "grad_norm": 0.49106279015541077, "learning_rate": 0.0003317710034789154, "loss": 0.8781, "step": 4416 }, { "epoch": 0.787976094906788, "grad_norm": 0.46945035457611084, "learning_rate": 0.00033170479174552217, "loss": 0.7358, "step": 4417 }, { "epoch": 0.7881544911247882, "grad_norm": 0.48162609338760376, "learning_rate": 0.00033163857359506965, "loss": 0.8671, "step": 4418 }, { "epoch": 0.7883328873427883, "grad_norm": 0.4425477385520935, "learning_rate": 0.00033157234903275867, "loss": 0.8101, "step": 4419 }, { "epoch": 0.7885112835607885, "grad_norm": 0.499623566865921, "learning_rate": 0.00033150611806379054, "loss": 0.8189, "step": 4420 }, { "epoch": 0.7886896797787887, "grad_norm": 0.44681599736213684, "learning_rate": 0.000331439880693367, "loss": 0.7808, "step": 4421 }, { "epoch": 0.7888680759967889, "grad_norm": 0.4531422555446625, "learning_rate": 0.00033137363692669014, "loss": 0.7667, "step": 4422 }, { "epoch": 0.7890464722147891, "grad_norm": 0.43248048424720764, "learning_rate": 0.00033130738676896297, "loss": 0.7906, "step": 4423 }, { "epoch": 0.7892248684327893, "grad_norm": 0.5049116611480713, "learning_rate": 0.00033124113022538865, "loss": 0.9246, "step": 4424 }, { "epoch": 0.7894032646507894, "grad_norm": 0.48414015769958496, "learning_rate": 0.00033117486730117093, "loss": 0.8807, "step": 4425 }, { "epoch": 0.7895816608687896, "grad_norm": 0.5417115688323975, "learning_rate": 0.000331108598001514, "loss": 0.9549, "step": 4426 }, { "epoch": 0.7897600570867898, "grad_norm": 0.4763809144496918, "learning_rate": 0.0003310423223316227, "loss": 0.8302, "step": 4427 }, { "epoch": 0.78993845330479, "grad_norm": 0.4397316873073578, "learning_rate": 0.00033097604029670225, "loss": 0.8886, "step": 4428 }, { "epoch": 0.7901168495227902, "grad_norm": 0.451040118932724, "learning_rate": 0.0003309097519019585, "loss": 0.6568, "step": 4429 }, { "epoch": 0.7902952457407902, "grad_norm": 0.4390012323856354, "learning_rate": 0.0003308434571525976, "loss": 0.7361, "step": 4430 }, { "epoch": 0.7904736419587904, "grad_norm": 0.48856833577156067, "learning_rate": 0.0003307771560538264, "loss": 0.7811, "step": 4431 }, { "epoch": 0.7906520381767906, "grad_norm": 0.49298450350761414, "learning_rate": 0.00033071084861085217, "loss": 0.8156, "step": 4432 }, { "epoch": 0.7908304343947908, "grad_norm": 0.424003541469574, "learning_rate": 0.00033064453482888257, "loss": 0.6857, "step": 4433 }, { "epoch": 0.791008830612791, "grad_norm": 0.7829723358154297, "learning_rate": 0.0003305782147131259, "loss": 0.9136, "step": 4434 }, { "epoch": 0.7911872268307912, "grad_norm": 0.549010157585144, "learning_rate": 0.0003305118882687909, "loss": 0.772, "step": 4435 }, { "epoch": 0.7913656230487913, "grad_norm": 0.4902380704879761, "learning_rate": 0.00033044555550108693, "loss": 0.7763, "step": 4436 }, { "epoch": 0.7915440192667915, "grad_norm": 0.4383619725704193, "learning_rate": 0.0003303792164152236, "loss": 0.9407, "step": 4437 }, { "epoch": 0.7917224154847917, "grad_norm": 0.486372709274292, "learning_rate": 0.00033031287101641116, "loss": 0.997, "step": 4438 }, { "epoch": 0.7919008117027919, "grad_norm": 0.5471330285072327, "learning_rate": 0.00033024651930986044, "loss": 0.8833, "step": 4439 }, { "epoch": 0.7920792079207921, "grad_norm": 0.47940507531166077, "learning_rate": 0.0003301801613007826, "loss": 1.008, "step": 4440 }, { "epoch": 0.7922576041387922, "grad_norm": 0.4837360382080078, "learning_rate": 0.0003301137969943894, "loss": 0.9464, "step": 4441 }, { "epoch": 0.7924360003567924, "grad_norm": 0.48073476552963257, "learning_rate": 0.0003300474263958931, "loss": 0.8527, "step": 4442 }, { "epoch": 0.7926143965747926, "grad_norm": 0.47916659712791443, "learning_rate": 0.00032998104951050634, "loss": 0.8098, "step": 4443 }, { "epoch": 0.7927927927927928, "grad_norm": 0.5394938588142395, "learning_rate": 0.00032991466634344234, "loss": 0.9535, "step": 4444 }, { "epoch": 0.792971189010793, "grad_norm": 0.44547122716903687, "learning_rate": 0.00032984827689991493, "loss": 0.7269, "step": 4445 }, { "epoch": 0.7931495852287932, "grad_norm": 0.5474995374679565, "learning_rate": 0.0003297818811851381, "loss": 1.0227, "step": 4446 }, { "epoch": 0.7933279814467933, "grad_norm": 0.5473585724830627, "learning_rate": 0.0003297154792043268, "loss": 1.043, "step": 4447 }, { "epoch": 0.7935063776647935, "grad_norm": 0.4513511061668396, "learning_rate": 0.0003296490709626959, "loss": 0.7503, "step": 4448 }, { "epoch": 0.7936847738827937, "grad_norm": 0.47088685631752014, "learning_rate": 0.0003295826564654614, "loss": 0.8366, "step": 4449 }, { "epoch": 0.7938631701007939, "grad_norm": 0.4641473889350891, "learning_rate": 0.00032951623571783915, "loss": 0.8643, "step": 4450 }, { "epoch": 0.7940415663187941, "grad_norm": 0.43831127882003784, "learning_rate": 0.00032944980872504613, "loss": 0.7894, "step": 4451 }, { "epoch": 0.7942199625367942, "grad_norm": 0.4277407228946686, "learning_rate": 0.00032938337549229924, "loss": 0.6743, "step": 4452 }, { "epoch": 0.7943983587547944, "grad_norm": 0.454748272895813, "learning_rate": 0.00032931693602481616, "loss": 0.829, "step": 4453 }, { "epoch": 0.7945767549727946, "grad_norm": 0.5081732273101807, "learning_rate": 0.0003292504903278151, "loss": 0.9585, "step": 4454 }, { "epoch": 0.7947551511907948, "grad_norm": 0.4319037199020386, "learning_rate": 0.0003291840384065146, "loss": 0.7298, "step": 4455 }, { "epoch": 0.794933547408795, "grad_norm": 0.5093657374382019, "learning_rate": 0.00032911758026613384, "loss": 1.0046, "step": 4456 }, { "epoch": 0.7951119436267952, "grad_norm": 0.4518407881259918, "learning_rate": 0.0003290511159118924, "loss": 0.7589, "step": 4457 }, { "epoch": 0.7952903398447952, "grad_norm": 0.4718686044216156, "learning_rate": 0.0003289846453490103, "loss": 0.7775, "step": 4458 }, { "epoch": 0.7954687360627954, "grad_norm": 0.520844578742981, "learning_rate": 0.00032891816858270816, "loss": 1.2267, "step": 4459 }, { "epoch": 0.7956471322807956, "grad_norm": 0.4900132119655609, "learning_rate": 0.000328851685618207, "loss": 0.941, "step": 4460 }, { "epoch": 0.7958255284987958, "grad_norm": 0.4234750270843506, "learning_rate": 0.00032878519646072833, "loss": 0.8223, "step": 4461 }, { "epoch": 0.796003924716796, "grad_norm": 0.42578887939453125, "learning_rate": 0.0003287187011154943, "loss": 0.6952, "step": 4462 }, { "epoch": 0.7961823209347961, "grad_norm": 0.4930354356765747, "learning_rate": 0.00032865219958772734, "loss": 0.8822, "step": 4463 }, { "epoch": 0.7963607171527963, "grad_norm": 0.5005689263343811, "learning_rate": 0.0003285856918826505, "loss": 0.8978, "step": 4464 }, { "epoch": 0.7965391133707965, "grad_norm": 0.46632346510887146, "learning_rate": 0.00032851917800548724, "loss": 0.9696, "step": 4465 }, { "epoch": 0.7967175095887967, "grad_norm": 0.4460965692996979, "learning_rate": 0.0003284526579614615, "loss": 0.8667, "step": 4466 }, { "epoch": 0.7968959058067969, "grad_norm": 0.45909878611564636, "learning_rate": 0.0003283861317557978, "loss": 0.9574, "step": 4467 }, { "epoch": 0.7970743020247971, "grad_norm": 0.48101097345352173, "learning_rate": 0.0003283195993937209, "loss": 0.8039, "step": 4468 }, { "epoch": 0.7972526982427972, "grad_norm": 0.5066887140274048, "learning_rate": 0.0003282530608804565, "loss": 0.8576, "step": 4469 }, { "epoch": 0.7974310944607974, "grad_norm": 0.44910162687301636, "learning_rate": 0.0003281865162212304, "loss": 0.846, "step": 4470 }, { "epoch": 0.7976094906787976, "grad_norm": 0.46548551321029663, "learning_rate": 0.000328119965421269, "loss": 0.7717, "step": 4471 }, { "epoch": 0.7977878868967978, "grad_norm": 0.603074312210083, "learning_rate": 0.00032805340848579903, "loss": 0.9443, "step": 4472 }, { "epoch": 0.797966283114798, "grad_norm": 0.4617455005645752, "learning_rate": 0.00032798684542004793, "loss": 0.6775, "step": 4473 }, { "epoch": 0.7981446793327981, "grad_norm": 0.5626710653305054, "learning_rate": 0.00032792027622924357, "loss": 1.0429, "step": 4474 }, { "epoch": 0.7983230755507983, "grad_norm": 0.500339925289154, "learning_rate": 0.00032785370091861435, "loss": 1.0465, "step": 4475 }, { "epoch": 0.7985014717687985, "grad_norm": 0.44158151745796204, "learning_rate": 0.0003277871194933888, "loss": 0.6398, "step": 4476 }, { "epoch": 0.7986798679867987, "grad_norm": 0.47629523277282715, "learning_rate": 0.0003277205319587965, "loss": 0.8574, "step": 4477 }, { "epoch": 0.7988582642047989, "grad_norm": 0.6063137650489807, "learning_rate": 0.00032765393832006695, "loss": 0.928, "step": 4478 }, { "epoch": 0.7990366604227991, "grad_norm": 0.41350358724594116, "learning_rate": 0.00032758733858243054, "loss": 0.7943, "step": 4479 }, { "epoch": 0.7992150566407992, "grad_norm": 0.4761079251766205, "learning_rate": 0.000327520732751118, "loss": 0.9293, "step": 4480 }, { "epoch": 0.7993934528587994, "grad_norm": 0.46137312054634094, "learning_rate": 0.0003274541208313604, "loss": 0.6324, "step": 4481 }, { "epoch": 0.7995718490767996, "grad_norm": 0.46572351455688477, "learning_rate": 0.00032738750282838955, "loss": 0.796, "step": 4482 }, { "epoch": 0.7997502452947998, "grad_norm": 1.1689634323120117, "learning_rate": 0.0003273208787474375, "loss": 0.8631, "step": 4483 }, { "epoch": 0.7999286415128, "grad_norm": 0.5234804749488831, "learning_rate": 0.00032725424859373687, "loss": 1.022, "step": 4484 }, { "epoch": 0.8001070377308, "grad_norm": 0.46076735854148865, "learning_rate": 0.0003271876123725208, "loss": 0.8363, "step": 4485 }, { "epoch": 0.8002854339488003, "grad_norm": 0.5089244246482849, "learning_rate": 0.0003271209700890229, "loss": 1.0501, "step": 4486 }, { "epoch": 0.8004638301668004, "grad_norm": 0.49238088726997375, "learning_rate": 0.0003270543217484772, "loss": 0.7696, "step": 4487 }, { "epoch": 0.8006422263848006, "grad_norm": 0.5077065229415894, "learning_rate": 0.0003269876673561183, "loss": 0.7879, "step": 4488 }, { "epoch": 0.8008206226028008, "grad_norm": 0.4703711271286011, "learning_rate": 0.000326921006917181, "loss": 0.7882, "step": 4489 }, { "epoch": 0.800999018820801, "grad_norm": 0.4334394633769989, "learning_rate": 0.000326854340436901, "loss": 0.6552, "step": 4490 }, { "epoch": 0.8011774150388011, "grad_norm": 0.45196327567100525, "learning_rate": 0.0003267876679205142, "loss": 0.674, "step": 4491 }, { "epoch": 0.8013558112568013, "grad_norm": 0.629697859287262, "learning_rate": 0.0003267209893732569, "loss": 1.0846, "step": 4492 }, { "epoch": 0.8015342074748015, "grad_norm": 0.4871639907360077, "learning_rate": 0.00032665430480036616, "loss": 0.8173, "step": 4493 }, { "epoch": 0.8017126036928017, "grad_norm": 2.4100522994995117, "learning_rate": 0.0003265876142070794, "loss": 0.8096, "step": 4494 }, { "epoch": 0.8018909999108019, "grad_norm": 0.567620038986206, "learning_rate": 0.00032652091759863424, "loss": 1.2446, "step": 4495 }, { "epoch": 0.802069396128802, "grad_norm": 0.440104603767395, "learning_rate": 0.0003264542149802692, "loss": 0.7097, "step": 4496 }, { "epoch": 0.8022477923468022, "grad_norm": 0.49658969044685364, "learning_rate": 0.0003263875063572231, "loss": 1.0596, "step": 4497 }, { "epoch": 0.8024261885648024, "grad_norm": 0.5105962157249451, "learning_rate": 0.000326320791734735, "loss": 0.9645, "step": 4498 }, { "epoch": 0.8026045847828026, "grad_norm": 0.5017589926719666, "learning_rate": 0.00032625407111804477, "loss": 0.8783, "step": 4499 }, { "epoch": 0.8027829810008028, "grad_norm": 0.4669455885887146, "learning_rate": 0.0003261873445123926, "loss": 0.7848, "step": 4500 }, { "epoch": 0.802961377218803, "grad_norm": 0.5145031809806824, "learning_rate": 0.0003261206119230192, "loss": 0.9043, "step": 4501 }, { "epoch": 0.8031397734368031, "grad_norm": 0.8292232155799866, "learning_rate": 0.0003260538733551658, "loss": 0.8444, "step": 4502 }, { "epoch": 0.8033181696548033, "grad_norm": 0.4397391974925995, "learning_rate": 0.0003259871288140738, "loss": 0.8279, "step": 4503 }, { "epoch": 0.8034965658728035, "grad_norm": 0.48555225133895874, "learning_rate": 0.0003259203783049854, "loss": 1.0585, "step": 4504 }, { "epoch": 0.8036749620908037, "grad_norm": 0.4733032286167145, "learning_rate": 0.00032585362183314327, "loss": 0.899, "step": 4505 }, { "epoch": 0.8038533583088039, "grad_norm": 0.6270719766616821, "learning_rate": 0.00032578685940379027, "loss": 0.8007, "step": 4506 }, { "epoch": 0.804031754526804, "grad_norm": 1.0983858108520508, "learning_rate": 0.00032572009102216983, "loss": 0.8323, "step": 4507 }, { "epoch": 0.8042101507448042, "grad_norm": 0.5489131212234497, "learning_rate": 0.00032565331669352613, "loss": 0.78, "step": 4508 }, { "epoch": 0.8043885469628044, "grad_norm": 0.4885414242744446, "learning_rate": 0.00032558653642310347, "loss": 0.9142, "step": 4509 }, { "epoch": 0.8045669431808046, "grad_norm": 0.5155977010726929, "learning_rate": 0.0003255197502161468, "loss": 0.9124, "step": 4510 }, { "epoch": 0.8047453393988048, "grad_norm": 0.44706472754478455, "learning_rate": 0.0003254529580779014, "loss": 0.7907, "step": 4511 }, { "epoch": 0.804923735616805, "grad_norm": 0.6217315793037415, "learning_rate": 0.0003253861600136132, "loss": 0.7259, "step": 4512 }, { "epoch": 0.8051021318348051, "grad_norm": 0.5625154972076416, "learning_rate": 0.0003253193560285284, "loss": 0.9467, "step": 4513 }, { "epoch": 0.8052805280528053, "grad_norm": 0.46225252747535706, "learning_rate": 0.00032525254612789377, "loss": 0.8054, "step": 4514 }, { "epoch": 0.8054589242708055, "grad_norm": 0.4399617612361908, "learning_rate": 0.0003251857303169565, "loss": 0.7312, "step": 4515 }, { "epoch": 0.8056373204888057, "grad_norm": 0.4609040319919586, "learning_rate": 0.00032511890860096443, "loss": 0.8695, "step": 4516 }, { "epoch": 0.8058157167068059, "grad_norm": 0.5766742825508118, "learning_rate": 0.00032505208098516567, "loss": 0.9441, "step": 4517 }, { "epoch": 0.8059941129248059, "grad_norm": 0.47482702136039734, "learning_rate": 0.0003249852474748086, "loss": 0.7759, "step": 4518 }, { "epoch": 0.8061725091428061, "grad_norm": 0.4535680115222931, "learning_rate": 0.0003249184080751426, "loss": 0.9628, "step": 4519 }, { "epoch": 0.8063509053608063, "grad_norm": 0.44655969738960266, "learning_rate": 0.00032485156279141695, "loss": 0.8394, "step": 4520 }, { "epoch": 0.8065293015788065, "grad_norm": 0.5331109762191772, "learning_rate": 0.00032478471162888185, "loss": 1.0328, "step": 4521 }, { "epoch": 0.8067076977968067, "grad_norm": 0.5180974006652832, "learning_rate": 0.00032471785459278757, "loss": 0.914, "step": 4522 }, { "epoch": 0.8068860940148069, "grad_norm": 0.4896256923675537, "learning_rate": 0.0003246509916883853, "loss": 0.6507, "step": 4523 }, { "epoch": 0.807064490232807, "grad_norm": 0.5117029547691345, "learning_rate": 0.0003245841229209262, "loss": 1.0229, "step": 4524 }, { "epoch": 0.8072428864508072, "grad_norm": 0.4755803346633911, "learning_rate": 0.00032451724829566216, "loss": 0.8733, "step": 4525 }, { "epoch": 0.8074212826688074, "grad_norm": 0.642292320728302, "learning_rate": 0.0003244503678178455, "loss": 1.0649, "step": 4526 }, { "epoch": 0.8075996788868076, "grad_norm": 0.5174018740653992, "learning_rate": 0.000324383481492729, "loss": 1.0805, "step": 4527 }, { "epoch": 0.8077780751048078, "grad_norm": 0.439130961894989, "learning_rate": 0.0003243165893255659, "loss": 0.7667, "step": 4528 }, { "epoch": 0.807956471322808, "grad_norm": 0.5517845153808594, "learning_rate": 0.00032424969132160985, "loss": 1.0015, "step": 4529 }, { "epoch": 0.8081348675408081, "grad_norm": 0.5080962777137756, "learning_rate": 0.00032418278748611495, "loss": 0.8835, "step": 4530 }, { "epoch": 0.8083132637588083, "grad_norm": 0.5714918971061707, "learning_rate": 0.00032411587782433594, "loss": 1.2275, "step": 4531 }, { "epoch": 0.8084916599768085, "grad_norm": 0.5037386417388916, "learning_rate": 0.0003240489623415277, "loss": 0.8592, "step": 4532 }, { "epoch": 0.8086700561948087, "grad_norm": 0.5023531913757324, "learning_rate": 0.00032398204104294585, "loss": 0.9501, "step": 4533 }, { "epoch": 0.8088484524128089, "grad_norm": 0.4968222677707672, "learning_rate": 0.00032391511393384633, "loss": 0.8091, "step": 4534 }, { "epoch": 0.809026848630809, "grad_norm": 0.5576387643814087, "learning_rate": 0.00032384818101948554, "loss": 0.8792, "step": 4535 }, { "epoch": 0.8092052448488092, "grad_norm": 0.4761325716972351, "learning_rate": 0.0003237812423051204, "loss": 0.8114, "step": 4536 }, { "epoch": 0.8093836410668094, "grad_norm": 0.393627792596817, "learning_rate": 0.00032371429779600824, "loss": 0.5444, "step": 4537 }, { "epoch": 0.8095620372848096, "grad_norm": 0.5613236427307129, "learning_rate": 0.00032364734749740687, "loss": 0.855, "step": 4538 }, { "epoch": 0.8097404335028098, "grad_norm": 0.46514925360679626, "learning_rate": 0.00032358039141457454, "loss": 0.7157, "step": 4539 }, { "epoch": 0.80991882972081, "grad_norm": 0.4529775083065033, "learning_rate": 0.0003235134295527699, "loss": 0.8365, "step": 4540 }, { "epoch": 0.8100972259388101, "grad_norm": 0.47237035632133484, "learning_rate": 0.0003234464619172522, "loss": 0.852, "step": 4541 }, { "epoch": 0.8102756221568103, "grad_norm": 0.4663850963115692, "learning_rate": 0.00032337948851328093, "loss": 0.7608, "step": 4542 }, { "epoch": 0.8104540183748105, "grad_norm": 0.5153512954711914, "learning_rate": 0.0003233125093461162, "loss": 0.8918, "step": 4543 }, { "epoch": 0.8106324145928107, "grad_norm": 0.40413349866867065, "learning_rate": 0.0003232455244210186, "loss": 0.6576, "step": 4544 }, { "epoch": 0.8108108108108109, "grad_norm": 0.4308657944202423, "learning_rate": 0.0003231785337432489, "loss": 0.9653, "step": 4545 }, { "epoch": 0.8109892070288109, "grad_norm": 0.4188125729560852, "learning_rate": 0.00032311153731806873, "loss": 0.7579, "step": 4546 }, { "epoch": 0.8111676032468111, "grad_norm": 0.5259095430374146, "learning_rate": 0.00032304453515073994, "loss": 1.1485, "step": 4547 }, { "epoch": 0.8113459994648113, "grad_norm": 0.4392109215259552, "learning_rate": 0.0003229775272465247, "loss": 0.8834, "step": 4548 }, { "epoch": 0.8115243956828115, "grad_norm": 0.4974541962146759, "learning_rate": 0.0003229105136106859, "loss": 0.865, "step": 4549 }, { "epoch": 0.8117027919008117, "grad_norm": 1.2180891036987305, "learning_rate": 0.0003228434942484869, "loss": 0.9709, "step": 4550 }, { "epoch": 0.8118811881188119, "grad_norm": 0.4866902828216553, "learning_rate": 0.000322776469165191, "loss": 0.9429, "step": 4551 }, { "epoch": 0.812059584336812, "grad_norm": 0.5774986147880554, "learning_rate": 0.0003227094383660626, "loss": 0.8982, "step": 4552 }, { "epoch": 0.8122379805548122, "grad_norm": 0.4660821259021759, "learning_rate": 0.0003226424018563661, "loss": 0.8265, "step": 4553 }, { "epoch": 0.8124163767728124, "grad_norm": 0.4910152554512024, "learning_rate": 0.00032257535964136673, "loss": 0.868, "step": 4554 }, { "epoch": 0.8125947729908126, "grad_norm": 0.4666675627231598, "learning_rate": 0.0003225083117263298, "loss": 0.849, "step": 4555 }, { "epoch": 0.8127731692088128, "grad_norm": 0.5173287391662598, "learning_rate": 0.00032244125811652135, "loss": 0.9152, "step": 4556 }, { "epoch": 0.8129515654268129, "grad_norm": 0.503570020198822, "learning_rate": 0.00032237419881720765, "loss": 1.0545, "step": 4557 }, { "epoch": 0.8131299616448131, "grad_norm": 0.4771646559238434, "learning_rate": 0.00032230713383365545, "loss": 0.8746, "step": 4558 }, { "epoch": 0.8133083578628133, "grad_norm": 0.47005799412727356, "learning_rate": 0.0003222400631711321, "loss": 0.8811, "step": 4559 }, { "epoch": 0.8134867540808135, "grad_norm": 0.48531290888786316, "learning_rate": 0.00032217298683490526, "loss": 0.7489, "step": 4560 }, { "epoch": 0.8136651502988137, "grad_norm": 0.4748542010784149, "learning_rate": 0.0003221059048302431, "loss": 1.0024, "step": 4561 }, { "epoch": 0.8138435465168139, "grad_norm": 0.46616560220718384, "learning_rate": 0.00032203881716241426, "loss": 0.8083, "step": 4562 }, { "epoch": 0.814021942734814, "grad_norm": 1.1073871850967407, "learning_rate": 0.00032197172383668763, "loss": 0.6794, "step": 4563 }, { "epoch": 0.8142003389528142, "grad_norm": 0.5361044406890869, "learning_rate": 0.0003219046248583329, "loss": 1.0918, "step": 4564 }, { "epoch": 0.8143787351708144, "grad_norm": 0.44288361072540283, "learning_rate": 0.00032183752023261973, "loss": 0.8201, "step": 4565 }, { "epoch": 0.8145571313888146, "grad_norm": 0.4622916579246521, "learning_rate": 0.00032177040996481874, "loss": 0.9029, "step": 4566 }, { "epoch": 0.8147355276068148, "grad_norm": 0.48599693179130554, "learning_rate": 0.0003217032940602006, "loss": 1.0285, "step": 4567 }, { "epoch": 0.8149139238248149, "grad_norm": 0.46129268407821655, "learning_rate": 0.00032163617252403654, "loss": 0.8161, "step": 4568 }, { "epoch": 0.8150923200428151, "grad_norm": 0.5313239097595215, "learning_rate": 0.0003215690453615985, "loss": 1.0821, "step": 4569 }, { "epoch": 0.8152707162608153, "grad_norm": 0.42274999618530273, "learning_rate": 0.0003215019125781583, "loss": 0.7682, "step": 4570 }, { "epoch": 0.8154491124788155, "grad_norm": 0.46743467450141907, "learning_rate": 0.00032143477417898866, "loss": 0.8906, "step": 4571 }, { "epoch": 0.8156275086968157, "grad_norm": 0.4919176697731018, "learning_rate": 0.0003213676301693626, "loss": 0.8782, "step": 4572 }, { "epoch": 0.8158059049148159, "grad_norm": 0.479407399892807, "learning_rate": 0.00032130048055455356, "loss": 0.6938, "step": 4573 }, { "epoch": 0.815984301132816, "grad_norm": 0.4484141767024994, "learning_rate": 0.0003212333253398355, "loss": 0.6904, "step": 4574 }, { "epoch": 0.8161626973508161, "grad_norm": 0.5200289487838745, "learning_rate": 0.0003211661645304827, "loss": 0.9434, "step": 4575 }, { "epoch": 0.8163410935688163, "grad_norm": 0.5177778601646423, "learning_rate": 0.00032109899813177, "loss": 0.8227, "step": 4576 }, { "epoch": 0.8165194897868165, "grad_norm": 0.5002527236938477, "learning_rate": 0.0003210318261489725, "loss": 0.9783, "step": 4577 }, { "epoch": 0.8166978860048167, "grad_norm": 0.47500887513160706, "learning_rate": 0.0003209646485873661, "loss": 0.7346, "step": 4578 }, { "epoch": 0.8168762822228168, "grad_norm": 0.49525851011276245, "learning_rate": 0.00032089746545222657, "loss": 0.8249, "step": 4579 }, { "epoch": 0.817054678440817, "grad_norm": 0.5161625742912292, "learning_rate": 0.0003208302767488307, "loss": 0.9479, "step": 4580 }, { "epoch": 0.8172330746588172, "grad_norm": 0.47351545095443726, "learning_rate": 0.00032076308248245533, "loss": 0.8557, "step": 4581 }, { "epoch": 0.8174114708768174, "grad_norm": 0.37471267580986023, "learning_rate": 0.00032069588265837794, "loss": 0.4742, "step": 4582 }, { "epoch": 0.8175898670948176, "grad_norm": 0.5405718088150024, "learning_rate": 0.0003206286772818764, "loss": 1.0018, "step": 4583 }, { "epoch": 0.8177682633128178, "grad_norm": 0.4767707884311676, "learning_rate": 0.00032056146635822886, "loss": 0.8955, "step": 4584 }, { "epoch": 0.8179466595308179, "grad_norm": 0.48326289653778076, "learning_rate": 0.00032049424989271416, "loss": 0.8068, "step": 4585 }, { "epoch": 0.8181250557488181, "grad_norm": 0.44311580061912537, "learning_rate": 0.0003204270278906114, "loss": 0.7806, "step": 4586 }, { "epoch": 0.8183034519668183, "grad_norm": 0.46994298696517944, "learning_rate": 0.00032035980035720015, "loss": 0.9937, "step": 4587 }, { "epoch": 0.8184818481848185, "grad_norm": 1.0689364671707153, "learning_rate": 0.0003202925672977605, "loss": 0.9123, "step": 4588 }, { "epoch": 0.8186602444028187, "grad_norm": 0.5591593980789185, "learning_rate": 0.0003202253287175728, "loss": 1.0514, "step": 4589 }, { "epoch": 0.8188386406208188, "grad_norm": 5.0126953125, "learning_rate": 0.00032015808462191816, "loss": 0.8152, "step": 4590 }, { "epoch": 0.819017036838819, "grad_norm": 1.514142394065857, "learning_rate": 0.00032009083501607753, "loss": 0.7788, "step": 4591 }, { "epoch": 0.8191954330568192, "grad_norm": 0.5694434642791748, "learning_rate": 0.00032002357990533296, "loss": 0.9116, "step": 4592 }, { "epoch": 0.8193738292748194, "grad_norm": 1.0806207656860352, "learning_rate": 0.0003199563192949666, "loss": 1.0898, "step": 4593 }, { "epoch": 0.8195522254928196, "grad_norm": 0.490933895111084, "learning_rate": 0.000319889053190261, "loss": 0.7681, "step": 4594 }, { "epoch": 0.8197306217108198, "grad_norm": 0.5183424949645996, "learning_rate": 0.00031982178159649925, "loss": 0.7874, "step": 4595 }, { "epoch": 0.8199090179288199, "grad_norm": 0.5265378355979919, "learning_rate": 0.0003197545045189648, "loss": 0.8632, "step": 4596 }, { "epoch": 0.8200874141468201, "grad_norm": 0.4929676353931427, "learning_rate": 0.0003196872219629417, "loss": 0.833, "step": 4597 }, { "epoch": 0.8202658103648203, "grad_norm": 0.48382702469825745, "learning_rate": 0.00031961993393371405, "loss": 0.6996, "step": 4598 }, { "epoch": 0.8204442065828205, "grad_norm": 0.7561694383621216, "learning_rate": 0.00031955264043656675, "loss": 0.8474, "step": 4599 }, { "epoch": 0.8206226028008207, "grad_norm": 0.6017073392868042, "learning_rate": 0.000319485341476785, "loss": 0.9392, "step": 4600 }, { "epoch": 0.8208009990188208, "grad_norm": 0.5361237525939941, "learning_rate": 0.00031941803705965447, "loss": 0.8068, "step": 4601 }, { "epoch": 0.820979395236821, "grad_norm": 0.5034250617027283, "learning_rate": 0.00031935072719046115, "loss": 1.0488, "step": 4602 }, { "epoch": 0.8211577914548212, "grad_norm": 0.48589175939559937, "learning_rate": 0.0003192834118744916, "loss": 1.0141, "step": 4603 }, { "epoch": 0.8213361876728213, "grad_norm": 0.43965524435043335, "learning_rate": 0.0003192160911170327, "loss": 0.6931, "step": 4604 }, { "epoch": 0.8215145838908215, "grad_norm": 0.6355204582214355, "learning_rate": 0.00031914876492337177, "loss": 0.8362, "step": 4605 }, { "epoch": 0.8216929801088217, "grad_norm": 0.502838671207428, "learning_rate": 0.0003190814332987965, "loss": 0.9824, "step": 4606 }, { "epoch": 0.8218713763268218, "grad_norm": 0.48031777143478394, "learning_rate": 0.00031901409624859536, "loss": 0.8348, "step": 4607 }, { "epoch": 0.822049772544822, "grad_norm": 0.47550857067108154, "learning_rate": 0.00031894675377805665, "loss": 0.8757, "step": 4608 }, { "epoch": 0.8222281687628222, "grad_norm": 1.1182494163513184, "learning_rate": 0.0003188794058924697, "loss": 0.9129, "step": 4609 }, { "epoch": 0.8224065649808224, "grad_norm": 0.5143523812294006, "learning_rate": 0.00031881205259712384, "loss": 0.9542, "step": 4610 }, { "epoch": 0.8225849611988226, "grad_norm": 0.5969739556312561, "learning_rate": 0.00031874469389730884, "loss": 0.9868, "step": 4611 }, { "epoch": 0.8227633574168227, "grad_norm": 0.5791580677032471, "learning_rate": 0.0003186773297983153, "loss": 0.8447, "step": 4612 }, { "epoch": 0.8229417536348229, "grad_norm": 0.4807104766368866, "learning_rate": 0.00031860996030543383, "loss": 0.9425, "step": 4613 }, { "epoch": 0.8231201498528231, "grad_norm": 0.48759788274765015, "learning_rate": 0.00031854258542395546, "loss": 0.7504, "step": 4614 }, { "epoch": 0.8232985460708233, "grad_norm": 0.5369093418121338, "learning_rate": 0.00031847520515917207, "loss": 0.8581, "step": 4615 }, { "epoch": 0.8234769422888235, "grad_norm": 0.5256065130233765, "learning_rate": 0.00031840781951637554, "loss": 0.8261, "step": 4616 }, { "epoch": 0.8236553385068237, "grad_norm": 0.47315025329589844, "learning_rate": 0.0003183404285008582, "loss": 0.786, "step": 4617 }, { "epoch": 0.8238337347248238, "grad_norm": 0.4942631125450134, "learning_rate": 0.00031827303211791314, "loss": 0.8627, "step": 4618 }, { "epoch": 0.824012130942824, "grad_norm": 0.4402255713939667, "learning_rate": 0.0003182056303728334, "loss": 0.7299, "step": 4619 }, { "epoch": 0.8241905271608242, "grad_norm": 0.5043277740478516, "learning_rate": 0.00031813822327091286, "loss": 0.8561, "step": 4620 }, { "epoch": 0.8243689233788244, "grad_norm": 0.49375712871551514, "learning_rate": 0.0003180708108174456, "loss": 0.8253, "step": 4621 }, { "epoch": 0.8245473195968246, "grad_norm": 1.158111572265625, "learning_rate": 0.00031800339301772614, "loss": 0.844, "step": 4622 }, { "epoch": 0.8247257158148247, "grad_norm": 0.5060315728187561, "learning_rate": 0.0003179359698770494, "loss": 0.8552, "step": 4623 }, { "epoch": 0.8249041120328249, "grad_norm": 0.5320479273796082, "learning_rate": 0.00031786854140071084, "loss": 0.9473, "step": 4624 }, { "epoch": 0.8250825082508251, "grad_norm": 0.5282738208770752, "learning_rate": 0.00031780110759400634, "loss": 0.9197, "step": 4625 }, { "epoch": 0.8252609044688253, "grad_norm": 0.4730012118816376, "learning_rate": 0.00031773366846223197, "loss": 0.7853, "step": 4626 }, { "epoch": 0.8254393006868255, "grad_norm": 0.5142650008201599, "learning_rate": 0.00031766622401068433, "loss": 0.8487, "step": 4627 }, { "epoch": 0.8256176969048257, "grad_norm": 0.4720161259174347, "learning_rate": 0.0003175987742446607, "loss": 0.8038, "step": 4628 }, { "epoch": 0.8257960931228258, "grad_norm": 0.5759044885635376, "learning_rate": 0.00031753131916945835, "loss": 0.712, "step": 4629 }, { "epoch": 0.825974489340826, "grad_norm": 0.6872929930686951, "learning_rate": 0.0003174638587903753, "loss": 0.9687, "step": 4630 }, { "epoch": 0.8261528855588262, "grad_norm": 0.6308576464653015, "learning_rate": 0.0003173963931127099, "loss": 0.8172, "step": 4631 }, { "epoch": 0.8263312817768264, "grad_norm": 0.5117534399032593, "learning_rate": 0.0003173289221417606, "loss": 0.9426, "step": 4632 }, { "epoch": 0.8265096779948266, "grad_norm": 0.5493487119674683, "learning_rate": 0.00031726144588282686, "loss": 0.9628, "step": 4633 }, { "epoch": 0.8266880742128266, "grad_norm": 3.2146034240722656, "learning_rate": 0.0003171939643412081, "loss": 0.9362, "step": 4634 }, { "epoch": 0.8268664704308268, "grad_norm": 0.49968186020851135, "learning_rate": 0.00031712647752220427, "loss": 0.6978, "step": 4635 }, { "epoch": 0.827044866648827, "grad_norm": 0.5384466052055359, "learning_rate": 0.0003170589854311157, "loss": 1.0049, "step": 4636 }, { "epoch": 0.8272232628668272, "grad_norm": 0.547645628452301, "learning_rate": 0.0003169914880732434, "loss": 1.1843, "step": 4637 }, { "epoch": 0.8274016590848274, "grad_norm": 0.4683157503604889, "learning_rate": 0.0003169239854538884, "loss": 0.7661, "step": 4638 }, { "epoch": 0.8275800553028276, "grad_norm": 0.5019885301589966, "learning_rate": 0.0003168564775783523, "loss": 0.8682, "step": 4639 }, { "epoch": 0.8277584515208277, "grad_norm": 0.6580917835235596, "learning_rate": 0.0003167889644519374, "loss": 0.8636, "step": 4640 }, { "epoch": 0.8279368477388279, "grad_norm": 0.5439825654029846, "learning_rate": 0.00031672144607994583, "loss": 0.8918, "step": 4641 }, { "epoch": 0.8281152439568281, "grad_norm": 0.5139893889427185, "learning_rate": 0.00031665392246768066, "loss": 0.9374, "step": 4642 }, { "epoch": 0.8282936401748283, "grad_norm": 0.4646841883659363, "learning_rate": 0.00031658639362044515, "loss": 0.744, "step": 4643 }, { "epoch": 0.8284720363928285, "grad_norm": 0.4877662658691406, "learning_rate": 0.00031651885954354285, "loss": 0.635, "step": 4644 }, { "epoch": 0.8286504326108286, "grad_norm": 0.5172538757324219, "learning_rate": 0.00031645132024227794, "loss": 0.9473, "step": 4645 }, { "epoch": 0.8288288288288288, "grad_norm": 0.8445831537246704, "learning_rate": 0.000316383775721955, "loss": 0.7455, "step": 4646 }, { "epoch": 0.829007225046829, "grad_norm": 0.5219873785972595, "learning_rate": 0.0003163162259878788, "loss": 0.9679, "step": 4647 }, { "epoch": 0.8291856212648292, "grad_norm": 0.5205322504043579, "learning_rate": 0.0003162486710453548, "loss": 0.7856, "step": 4648 }, { "epoch": 0.8293640174828294, "grad_norm": 0.4920894503593445, "learning_rate": 0.0003161811108996888, "loss": 0.7642, "step": 4649 }, { "epoch": 0.8295424137008296, "grad_norm": 0.4882299602031708, "learning_rate": 0.00031611354555618673, "loss": 0.7915, "step": 4650 }, { "epoch": 0.8297208099188297, "grad_norm": 0.5416523814201355, "learning_rate": 0.0003160459750201552, "loss": 1.1522, "step": 4651 }, { "epoch": 0.8298992061368299, "grad_norm": 0.4422816038131714, "learning_rate": 0.0003159783992969012, "loss": 0.7744, "step": 4652 }, { "epoch": 0.8300776023548301, "grad_norm": 0.5312891006469727, "learning_rate": 0.0003159108183917321, "loss": 1.0687, "step": 4653 }, { "epoch": 0.8302559985728303, "grad_norm": 0.4771096706390381, "learning_rate": 0.00031584323230995584, "loss": 0.8311, "step": 4654 }, { "epoch": 0.8304343947908305, "grad_norm": 0.5976044535636902, "learning_rate": 0.0003157756410568803, "loss": 0.6884, "step": 4655 }, { "epoch": 0.8306127910088306, "grad_norm": 0.6168167591094971, "learning_rate": 0.0003157080446378143, "loss": 0.8282, "step": 4656 }, { "epoch": 0.8307911872268308, "grad_norm": 0.5120362639427185, "learning_rate": 0.0003156404430580667, "loss": 1.0674, "step": 4657 }, { "epoch": 0.830969583444831, "grad_norm": 0.5644657015800476, "learning_rate": 0.000315572836322947, "loss": 1.0913, "step": 4658 }, { "epoch": 0.8311479796628312, "grad_norm": 0.4254976511001587, "learning_rate": 0.00031550522443776497, "loss": 0.8372, "step": 4659 }, { "epoch": 0.8313263758808314, "grad_norm": 0.4404276907444, "learning_rate": 0.0003154376074078307, "loss": 0.8619, "step": 4660 }, { "epoch": 0.8315047720988316, "grad_norm": 0.4840773046016693, "learning_rate": 0.00031536998523845497, "loss": 0.87, "step": 4661 }, { "epoch": 0.8316831683168316, "grad_norm": 0.4883440136909485, "learning_rate": 0.0003153023579349487, "loss": 1.0516, "step": 4662 }, { "epoch": 0.8318615645348318, "grad_norm": 0.39201605319976807, "learning_rate": 0.0003152347255026234, "loss": 0.6851, "step": 4663 }, { "epoch": 0.832039960752832, "grad_norm": 0.4473479688167572, "learning_rate": 0.0003151670879467908, "loss": 0.6324, "step": 4664 }, { "epoch": 0.8322183569708322, "grad_norm": 1.3589231967926025, "learning_rate": 0.0003150994452727631, "loss": 0.954, "step": 4665 }, { "epoch": 0.8323967531888324, "grad_norm": 0.44502779841423035, "learning_rate": 0.00031503179748585303, "loss": 0.6668, "step": 4666 }, { "epoch": 0.8325751494068325, "grad_norm": 0.49202197790145874, "learning_rate": 0.0003149641445913736, "loss": 0.8664, "step": 4667 }, { "epoch": 0.8327535456248327, "grad_norm": 0.6371123790740967, "learning_rate": 0.0003148964865946381, "loss": 0.7495, "step": 4668 }, { "epoch": 0.8329319418428329, "grad_norm": 0.5421488881111145, "learning_rate": 0.00031482882350096063, "loss": 1.1947, "step": 4669 }, { "epoch": 0.8331103380608331, "grad_norm": 0.49121710658073425, "learning_rate": 0.0003147611553156552, "loss": 0.8075, "step": 4670 }, { "epoch": 0.8332887342788333, "grad_norm": 0.5611662268638611, "learning_rate": 0.00031469348204403647, "loss": 0.9551, "step": 4671 }, { "epoch": 0.8334671304968335, "grad_norm": 0.4589937627315521, "learning_rate": 0.0003146258036914195, "loss": 0.8752, "step": 4672 }, { "epoch": 0.8336455267148336, "grad_norm": 0.49224385619163513, "learning_rate": 0.0003145581202631197, "loss": 0.7851, "step": 4673 }, { "epoch": 0.8338239229328338, "grad_norm": 0.4641133248806, "learning_rate": 0.00031449043176445297, "loss": 0.8309, "step": 4674 }, { "epoch": 0.834002319150834, "grad_norm": 0.46971380710601807, "learning_rate": 0.0003144227382007355, "loss": 0.5713, "step": 4675 }, { "epoch": 0.8341807153688342, "grad_norm": 0.6246667504310608, "learning_rate": 0.00031435503957728383, "loss": 0.9682, "step": 4676 }, { "epoch": 0.8343591115868344, "grad_norm": 0.5531001091003418, "learning_rate": 0.00031428733589941506, "loss": 0.9917, "step": 4677 }, { "epoch": 0.8345375078048345, "grad_norm": 0.5050147175788879, "learning_rate": 0.00031421962717244654, "loss": 0.8575, "step": 4678 }, { "epoch": 0.8347159040228347, "grad_norm": 0.5118961930274963, "learning_rate": 0.0003141519134016962, "loss": 0.8731, "step": 4679 }, { "epoch": 0.8348943002408349, "grad_norm": 0.4847746789455414, "learning_rate": 0.0003140841945924822, "loss": 0.8582, "step": 4680 }, { "epoch": 0.8350726964588351, "grad_norm": 0.4473601281642914, "learning_rate": 0.0003140164707501232, "loss": 0.768, "step": 4681 }, { "epoch": 0.8352510926768353, "grad_norm": 0.46980252861976624, "learning_rate": 0.00031394874187993805, "loss": 0.9835, "step": 4682 }, { "epoch": 0.8354294888948355, "grad_norm": 0.42925623059272766, "learning_rate": 0.00031388100798724624, "loss": 0.7166, "step": 4683 }, { "epoch": 0.8356078851128356, "grad_norm": 0.4442448019981384, "learning_rate": 0.0003138132690773675, "loss": 0.8631, "step": 4684 }, { "epoch": 0.8357862813308358, "grad_norm": 0.5010333061218262, "learning_rate": 0.00031374552515562215, "loss": 0.7902, "step": 4685 }, { "epoch": 0.835964677548836, "grad_norm": 0.473787784576416, "learning_rate": 0.0003136777762273306, "loss": 0.9016, "step": 4686 }, { "epoch": 0.8361430737668362, "grad_norm": 0.4981488287448883, "learning_rate": 0.000313610022297814, "loss": 0.8935, "step": 4687 }, { "epoch": 0.8363214699848364, "grad_norm": 0.5715591907501221, "learning_rate": 0.0003135422633723936, "loss": 0.9939, "step": 4688 }, { "epoch": 0.8364998662028365, "grad_norm": 0.587060809135437, "learning_rate": 0.0003134744994563912, "loss": 1.0045, "step": 4689 }, { "epoch": 0.8366782624208366, "grad_norm": 0.4668729901313782, "learning_rate": 0.0003134067305551289, "loss": 0.9922, "step": 4690 }, { "epoch": 0.8368566586388368, "grad_norm": 0.44394639134407043, "learning_rate": 0.0003133389566739292, "loss": 0.8563, "step": 4691 }, { "epoch": 0.837035054856837, "grad_norm": 0.41054239869117737, "learning_rate": 0.0003132711778181152, "loss": 0.6808, "step": 4692 }, { "epoch": 0.8372134510748372, "grad_norm": 0.45655539631843567, "learning_rate": 0.00031320339399301005, "loss": 0.7885, "step": 4693 }, { "epoch": 0.8373918472928374, "grad_norm": 0.528401255607605, "learning_rate": 0.00031313560520393756, "loss": 1.3023, "step": 4694 }, { "epoch": 0.8375702435108375, "grad_norm": 0.5124956369400024, "learning_rate": 0.0003130678114562218, "loss": 0.9328, "step": 4695 }, { "epoch": 0.8377486397288377, "grad_norm": 0.5178223252296448, "learning_rate": 0.00031300001275518733, "loss": 1.0182, "step": 4696 }, { "epoch": 0.8379270359468379, "grad_norm": 0.49288439750671387, "learning_rate": 0.00031293220910615896, "loss": 1.0634, "step": 4697 }, { "epoch": 0.8381054321648381, "grad_norm": 0.47642451524734497, "learning_rate": 0.00031286440051446187, "loss": 0.9013, "step": 4698 }, { "epoch": 0.8382838283828383, "grad_norm": 0.44043177366256714, "learning_rate": 0.0003127965869854219, "loss": 0.7628, "step": 4699 }, { "epoch": 0.8384622246008384, "grad_norm": 0.4588578939437866, "learning_rate": 0.00031272876852436493, "loss": 0.7756, "step": 4700 }, { "epoch": 0.8386406208188386, "grad_norm": 0.4768531620502472, "learning_rate": 0.0003126609451366176, "loss": 0.7546, "step": 4701 }, { "epoch": 0.8388190170368388, "grad_norm": 0.4645833671092987, "learning_rate": 0.00031259311682750655, "loss": 0.7292, "step": 4702 }, { "epoch": 0.838997413254839, "grad_norm": 0.49236834049224854, "learning_rate": 0.00031252528360235907, "loss": 0.884, "step": 4703 }, { "epoch": 0.8391758094728392, "grad_norm": 0.5257160067558289, "learning_rate": 0.0003124574454665027, "loss": 0.9921, "step": 4704 }, { "epoch": 0.8393542056908394, "grad_norm": 2.2589757442474365, "learning_rate": 0.0003123896024252654, "loss": 0.7997, "step": 4705 }, { "epoch": 0.8395326019088395, "grad_norm": 0.4541998505592346, "learning_rate": 0.00031232175448397547, "loss": 0.7263, "step": 4706 }, { "epoch": 0.8397109981268397, "grad_norm": 0.8749123215675354, "learning_rate": 0.00031225390164796193, "loss": 0.7226, "step": 4707 }, { "epoch": 0.8398893943448399, "grad_norm": 0.46198785305023193, "learning_rate": 0.0003121860439225537, "loss": 0.7642, "step": 4708 }, { "epoch": 0.8400677905628401, "grad_norm": 0.4428096115589142, "learning_rate": 0.0003121181813130804, "loss": 0.7683, "step": 4709 }, { "epoch": 0.8402461867808403, "grad_norm": 0.4557209014892578, "learning_rate": 0.0003120503138248718, "loss": 0.6965, "step": 4710 }, { "epoch": 0.8404245829988404, "grad_norm": 0.498859167098999, "learning_rate": 0.0003119824414632583, "loss": 0.7745, "step": 4711 }, { "epoch": 0.8406029792168406, "grad_norm": 0.44227057695388794, "learning_rate": 0.00031191456423357045, "loss": 0.8255, "step": 4712 }, { "epoch": 0.8407813754348408, "grad_norm": 0.5205174684524536, "learning_rate": 0.0003118466821411394, "loss": 1.0533, "step": 4713 }, { "epoch": 0.840959771652841, "grad_norm": 0.48816418647766113, "learning_rate": 0.0003117787951912965, "loss": 0.8577, "step": 4714 }, { "epoch": 0.8411381678708412, "grad_norm": 0.5432656407356262, "learning_rate": 0.00031171090338937376, "loss": 1.0221, "step": 4715 }, { "epoch": 0.8413165640888414, "grad_norm": 0.4946669340133667, "learning_rate": 0.0003116430067407031, "loss": 0.8704, "step": 4716 }, { "epoch": 0.8414949603068415, "grad_norm": 0.4849919378757477, "learning_rate": 0.0003115751052506173, "loss": 0.8248, "step": 4717 }, { "epoch": 0.8416733565248417, "grad_norm": 0.5005682110786438, "learning_rate": 0.0003115071989244491, "loss": 0.9195, "step": 4718 }, { "epoch": 0.8418517527428419, "grad_norm": 0.5993044972419739, "learning_rate": 0.00031143928776753213, "loss": 0.8419, "step": 4719 }, { "epoch": 0.842030148960842, "grad_norm": 0.704413115978241, "learning_rate": 0.0003113713717851998, "loss": 0.9343, "step": 4720 }, { "epoch": 0.8422085451788422, "grad_norm": 0.5452966094017029, "learning_rate": 0.0003113034509827864, "loss": 0.7155, "step": 4721 }, { "epoch": 0.8423869413968423, "grad_norm": 0.5383533835411072, "learning_rate": 0.0003112355253656263, "loss": 0.7432, "step": 4722 }, { "epoch": 0.8425653376148425, "grad_norm": 0.624819278717041, "learning_rate": 0.00031116759493905445, "loss": 0.8583, "step": 4723 }, { "epoch": 0.8427437338328427, "grad_norm": 0.982383668422699, "learning_rate": 0.000311099659708406, "loss": 0.6117, "step": 4724 }, { "epoch": 0.8429221300508429, "grad_norm": 0.6751213669776917, "learning_rate": 0.00031103171967901655, "loss": 1.022, "step": 4725 }, { "epoch": 0.8431005262688431, "grad_norm": 1.0229430198669434, "learning_rate": 0.00031096377485622214, "loss": 0.7461, "step": 4726 }, { "epoch": 0.8432789224868433, "grad_norm": 0.5392288565635681, "learning_rate": 0.000310895825245359, "loss": 0.7769, "step": 4727 }, { "epoch": 0.8434573187048434, "grad_norm": 1.207713007926941, "learning_rate": 0.0003108278708517641, "loss": 0.5888, "step": 4728 }, { "epoch": 0.8436357149228436, "grad_norm": 1.1378090381622314, "learning_rate": 0.0003107599116807743, "loss": 0.7526, "step": 4729 }, { "epoch": 0.8438141111408438, "grad_norm": 0.7071771621704102, "learning_rate": 0.00031069194773772715, "loss": 1.0347, "step": 4730 }, { "epoch": 0.843992507358844, "grad_norm": 0.7279558181762695, "learning_rate": 0.0003106239790279606, "loss": 0.8911, "step": 4731 }, { "epoch": 0.8441709035768442, "grad_norm": 0.49013325572013855, "learning_rate": 0.0003105560055568128, "loss": 0.6986, "step": 4732 }, { "epoch": 0.8443492997948443, "grad_norm": 0.5166245698928833, "learning_rate": 0.0003104880273296224, "loss": 1.0091, "step": 4733 }, { "epoch": 0.8445276960128445, "grad_norm": 0.5410045385360718, "learning_rate": 0.00031042004435172834, "loss": 1.0074, "step": 4734 }, { "epoch": 0.8447060922308447, "grad_norm": 0.40799999237060547, "learning_rate": 0.00031035205662847005, "loss": 0.7189, "step": 4735 }, { "epoch": 0.8448844884488449, "grad_norm": 0.468019038438797, "learning_rate": 0.0003102840641651872, "loss": 0.9389, "step": 4736 }, { "epoch": 0.8450628846668451, "grad_norm": 0.4619864225387573, "learning_rate": 0.00031021606696721984, "loss": 0.7924, "step": 4737 }, { "epoch": 0.8452412808848453, "grad_norm": 1.7745176553726196, "learning_rate": 0.0003101480650399085, "loss": 0.8259, "step": 4738 }, { "epoch": 0.8454196771028454, "grad_norm": 0.5147114396095276, "learning_rate": 0.000310080058388594, "loss": 0.7269, "step": 4739 }, { "epoch": 0.8455980733208456, "grad_norm": 1.0041489601135254, "learning_rate": 0.00031001204701861765, "loss": 0.8016, "step": 4740 }, { "epoch": 0.8457764695388458, "grad_norm": 0.5194323062896729, "learning_rate": 0.00030994403093532086, "loss": 0.8482, "step": 4741 }, { "epoch": 0.845954865756846, "grad_norm": 1.3192564249038696, "learning_rate": 0.00030987601014404576, "loss": 0.9586, "step": 4742 }, { "epoch": 0.8461332619748462, "grad_norm": 0.5231749415397644, "learning_rate": 0.00030980798465013454, "loss": 0.9005, "step": 4743 }, { "epoch": 0.8463116581928463, "grad_norm": 0.52949458360672, "learning_rate": 0.00030973995445892987, "loss": 0.9448, "step": 4744 }, { "epoch": 0.8464900544108465, "grad_norm": 0.509884238243103, "learning_rate": 0.00030967191957577503, "loss": 0.7253, "step": 4745 }, { "epoch": 0.8466684506288467, "grad_norm": 0.4490432143211365, "learning_rate": 0.00030960388000601325, "loss": 0.7781, "step": 4746 }, { "epoch": 0.8468468468468469, "grad_norm": 0.5485023856163025, "learning_rate": 0.0003095358357549883, "loss": 0.8696, "step": 4747 }, { "epoch": 0.847025243064847, "grad_norm": 0.46540600061416626, "learning_rate": 0.00030946778682804457, "loss": 0.837, "step": 4748 }, { "epoch": 0.8472036392828473, "grad_norm": 0.46531352400779724, "learning_rate": 0.0003093997332305264, "loss": 0.879, "step": 4749 }, { "epoch": 0.8473820355008473, "grad_norm": 0.5620687007904053, "learning_rate": 0.00030933167496777875, "loss": 1.2353, "step": 4750 }, { "epoch": 0.8475604317188475, "grad_norm": 0.4240880310535431, "learning_rate": 0.0003092636120451469, "loss": 0.6647, "step": 4751 }, { "epoch": 0.8477388279368477, "grad_norm": 0.5051021575927734, "learning_rate": 0.0003091955444679763, "loss": 0.9274, "step": 4752 }, { "epoch": 0.8479172241548479, "grad_norm": 0.47416186332702637, "learning_rate": 0.00030912747224161324, "loss": 0.7746, "step": 4753 }, { "epoch": 0.8480956203728481, "grad_norm": 0.4895223081111908, "learning_rate": 0.000309059395371404, "loss": 0.7968, "step": 4754 }, { "epoch": 0.8482740165908482, "grad_norm": 0.5074788331985474, "learning_rate": 0.00030899131386269527, "loss": 1.1851, "step": 4755 }, { "epoch": 0.8484524128088484, "grad_norm": 0.45386070013046265, "learning_rate": 0.000308923227720834, "loss": 0.8569, "step": 4756 }, { "epoch": 0.8486308090268486, "grad_norm": 0.5263837575912476, "learning_rate": 0.0003088551369511679, "loss": 0.9021, "step": 4757 }, { "epoch": 0.8488092052448488, "grad_norm": 0.6763511896133423, "learning_rate": 0.00030878704155904465, "loss": 0.8721, "step": 4758 }, { "epoch": 0.848987601462849, "grad_norm": 0.48090678453445435, "learning_rate": 0.0003087189415498124, "loss": 0.8649, "step": 4759 }, { "epoch": 0.8491659976808492, "grad_norm": 0.44986963272094727, "learning_rate": 0.0003086508369288198, "loss": 0.9613, "step": 4760 }, { "epoch": 0.8493443938988493, "grad_norm": 4.108095169067383, "learning_rate": 0.00030858272770141574, "loss": 0.7813, "step": 4761 }, { "epoch": 0.8495227901168495, "grad_norm": 0.4738391935825348, "learning_rate": 0.0003085146138729494, "loss": 0.8247, "step": 4762 }, { "epoch": 0.8497011863348497, "grad_norm": 0.45482027530670166, "learning_rate": 0.0003084464954487705, "loss": 0.7193, "step": 4763 }, { "epoch": 0.8498795825528499, "grad_norm": 0.6398509740829468, "learning_rate": 0.00030837837243422896, "loss": 0.7891, "step": 4764 }, { "epoch": 0.8500579787708501, "grad_norm": 0.45158737897872925, "learning_rate": 0.00030831024483467517, "loss": 0.7787, "step": 4765 }, { "epoch": 0.8502363749888502, "grad_norm": 0.46626511216163635, "learning_rate": 0.00030824211265545985, "loss": 0.8941, "step": 4766 }, { "epoch": 0.8504147712068504, "grad_norm": 0.4571463167667389, "learning_rate": 0.00030817397590193404, "loss": 0.9276, "step": 4767 }, { "epoch": 0.8505931674248506, "grad_norm": 0.4970678687095642, "learning_rate": 0.0003081058345794493, "loss": 0.8267, "step": 4768 }, { "epoch": 0.8507715636428508, "grad_norm": 0.5197835564613342, "learning_rate": 0.0003080376886933572, "loss": 0.8656, "step": 4769 }, { "epoch": 0.850949959860851, "grad_norm": 0.48982325196266174, "learning_rate": 0.0003079695382490101, "loss": 0.7117, "step": 4770 }, { "epoch": 0.8511283560788512, "grad_norm": 0.5008729696273804, "learning_rate": 0.0003079013832517603, "loss": 0.8803, "step": 4771 }, { "epoch": 0.8513067522968513, "grad_norm": 0.4859359562397003, "learning_rate": 0.00030783322370696087, "loss": 0.8494, "step": 4772 }, { "epoch": 0.8514851485148515, "grad_norm": 0.4618992209434509, "learning_rate": 0.00030776505961996494, "loss": 0.6753, "step": 4773 }, { "epoch": 0.8516635447328517, "grad_norm": 0.6842380166053772, "learning_rate": 0.00030769689099612604, "loss": 0.8521, "step": 4774 }, { "epoch": 0.8518419409508519, "grad_norm": 0.46096381545066833, "learning_rate": 0.00030762871784079815, "loss": 0.6602, "step": 4775 }, { "epoch": 0.8520203371688521, "grad_norm": 0.5396759510040283, "learning_rate": 0.0003075605401593356, "loss": 1.053, "step": 4776 }, { "epoch": 0.8521987333868521, "grad_norm": 0.4654025435447693, "learning_rate": 0.000307492357957093, "loss": 0.6658, "step": 4777 }, { "epoch": 0.8523771296048523, "grad_norm": 0.46096524596214294, "learning_rate": 0.0003074241712394253, "loss": 0.7662, "step": 4778 }, { "epoch": 0.8525555258228525, "grad_norm": 0.5289954543113708, "learning_rate": 0.0003073559800116879, "loss": 0.7916, "step": 4779 }, { "epoch": 0.8527339220408527, "grad_norm": 0.4729297459125519, "learning_rate": 0.00030728778427923655, "loss": 0.8108, "step": 4780 }, { "epoch": 0.8529123182588529, "grad_norm": 1.2165110111236572, "learning_rate": 0.0003072195840474273, "loss": 1.0334, "step": 4781 }, { "epoch": 0.8530907144768531, "grad_norm": 0.6247953176498413, "learning_rate": 0.00030715137932161646, "loss": 1.1616, "step": 4782 }, { "epoch": 0.8532691106948532, "grad_norm": 0.5182445645332336, "learning_rate": 0.00030708317010716093, "loss": 1.0503, "step": 4783 }, { "epoch": 0.8534475069128534, "grad_norm": 0.5359323620796204, "learning_rate": 0.0003070149564094178, "loss": 1.0459, "step": 4784 }, { "epoch": 0.8536259031308536, "grad_norm": 0.4704750180244446, "learning_rate": 0.0003069467382337445, "loss": 0.8514, "step": 4785 }, { "epoch": 0.8538042993488538, "grad_norm": 0.5520228743553162, "learning_rate": 0.0003068785155854989, "loss": 0.8987, "step": 4786 }, { "epoch": 0.853982695566854, "grad_norm": 0.4699079394340515, "learning_rate": 0.0003068102884700391, "loss": 0.736, "step": 4787 }, { "epoch": 0.8541610917848541, "grad_norm": 0.5631700754165649, "learning_rate": 0.00030674205689272375, "loss": 0.9386, "step": 4788 }, { "epoch": 0.8543394880028543, "grad_norm": 0.4554060101509094, "learning_rate": 0.00030667382085891175, "loss": 0.7084, "step": 4789 }, { "epoch": 0.8545178842208545, "grad_norm": 0.46370020508766174, "learning_rate": 0.00030660558037396216, "loss": 0.9635, "step": 4790 }, { "epoch": 0.8546962804388547, "grad_norm": 0.46454691886901855, "learning_rate": 0.0003065373354432346, "loss": 0.8481, "step": 4791 }, { "epoch": 0.8548746766568549, "grad_norm": 0.4899047017097473, "learning_rate": 0.0003064690860720891, "loss": 0.8236, "step": 4792 }, { "epoch": 0.8550530728748551, "grad_norm": 0.9299049377441406, "learning_rate": 0.0003064008322658859, "loss": 0.7952, "step": 4793 }, { "epoch": 0.8552314690928552, "grad_norm": 1.665650725364685, "learning_rate": 0.0003063325740299855, "loss": 1.0987, "step": 4794 }, { "epoch": 0.8554098653108554, "grad_norm": 0.4786631464958191, "learning_rate": 0.0003062643113697492, "loss": 0.8729, "step": 4795 }, { "epoch": 0.8555882615288556, "grad_norm": 1.025985836982727, "learning_rate": 0.00030619604429053793, "loss": 0.891, "step": 4796 }, { "epoch": 0.8557666577468558, "grad_norm": 0.5616780519485474, "learning_rate": 0.0003061277727977135, "loss": 1.0594, "step": 4797 }, { "epoch": 0.855945053964856, "grad_norm": 0.4509812593460083, "learning_rate": 0.000306059496896638, "loss": 0.6073, "step": 4798 }, { "epoch": 0.8561234501828561, "grad_norm": 0.4322461783885956, "learning_rate": 0.0003059912165926738, "loss": 0.772, "step": 4799 }, { "epoch": 0.8563018464008563, "grad_norm": 0.40975263714790344, "learning_rate": 0.00030592293189118344, "loss": 0.7681, "step": 4800 }, { "epoch": 0.8564802426188565, "grad_norm": 0.5190335512161255, "learning_rate": 0.00030585464279753015, "loss": 0.8196, "step": 4801 }, { "epoch": 0.8566586388368567, "grad_norm": 0.5089114904403687, "learning_rate": 0.0003057863493170772, "loss": 0.7172, "step": 4802 }, { "epoch": 0.8568370350548569, "grad_norm": 0.548192024230957, "learning_rate": 0.0003057180514551884, "loss": 0.8321, "step": 4803 }, { "epoch": 0.8570154312728571, "grad_norm": 0.4767385721206665, "learning_rate": 0.0003056497492172278, "loss": 0.8879, "step": 4804 }, { "epoch": 0.8571938274908572, "grad_norm": 0.488613098859787, "learning_rate": 0.00030558144260855986, "loss": 0.9423, "step": 4805 }, { "epoch": 0.8573722237088574, "grad_norm": 0.48882797360420227, "learning_rate": 0.0003055131316345493, "loss": 0.7837, "step": 4806 }, { "epoch": 0.8575506199268575, "grad_norm": 0.5603160262107849, "learning_rate": 0.0003054448163005613, "loss": 0.8026, "step": 4807 }, { "epoch": 0.8577290161448577, "grad_norm": 0.46545833349227905, "learning_rate": 0.00030537649661196135, "loss": 0.8075, "step": 4808 }, { "epoch": 0.857907412362858, "grad_norm": 0.4614555537700653, "learning_rate": 0.00030530817257411517, "loss": 0.7662, "step": 4809 }, { "epoch": 0.858085808580858, "grad_norm": 0.5005578398704529, "learning_rate": 0.0003052398441923888, "loss": 0.9496, "step": 4810 }, { "epoch": 0.8582642047988582, "grad_norm": 0.45669299364089966, "learning_rate": 0.00030517151147214895, "loss": 0.7207, "step": 4811 }, { "epoch": 0.8584426010168584, "grad_norm": 0.4442518949508667, "learning_rate": 0.0003051031744187623, "loss": 0.7858, "step": 4812 }, { "epoch": 0.8586209972348586, "grad_norm": 0.4947311580181122, "learning_rate": 0.00030503483303759597, "loss": 1.0889, "step": 4813 }, { "epoch": 0.8587993934528588, "grad_norm": 0.4674645960330963, "learning_rate": 0.00030496648733401764, "loss": 0.7326, "step": 4814 }, { "epoch": 0.858977789670859, "grad_norm": 0.49321508407592773, "learning_rate": 0.00030489813731339504, "loss": 0.8671, "step": 4815 }, { "epoch": 0.8591561858888591, "grad_norm": 0.48469820618629456, "learning_rate": 0.00030482978298109636, "loss": 1.0099, "step": 4816 }, { "epoch": 0.8593345821068593, "grad_norm": 0.5168629884719849, "learning_rate": 0.0003047614243424901, "loss": 1.0685, "step": 4817 }, { "epoch": 0.8595129783248595, "grad_norm": 0.5016402006149292, "learning_rate": 0.0003046930614029451, "loss": 0.9678, "step": 4818 }, { "epoch": 0.8596913745428597, "grad_norm": 0.458768367767334, "learning_rate": 0.00030462469416783067, "loss": 0.8762, "step": 4819 }, { "epoch": 0.8598697707608599, "grad_norm": 0.6688326597213745, "learning_rate": 0.0003045563226425162, "loss": 0.9441, "step": 4820 }, { "epoch": 0.86004816697886, "grad_norm": 0.5008928179740906, "learning_rate": 0.0003044879468323716, "loss": 0.7504, "step": 4821 }, { "epoch": 0.8602265631968602, "grad_norm": 2.919682502746582, "learning_rate": 0.0003044195667427672, "loss": 1.1196, "step": 4822 }, { "epoch": 0.8604049594148604, "grad_norm": 1.0007497072219849, "learning_rate": 0.0003043511823790734, "loss": 0.638, "step": 4823 }, { "epoch": 0.8605833556328606, "grad_norm": 0.5225825905799866, "learning_rate": 0.00030428279374666113, "loss": 0.8362, "step": 4824 }, { "epoch": 0.8607617518508608, "grad_norm": 0.533996045589447, "learning_rate": 0.0003042144008509016, "loss": 0.8759, "step": 4825 }, { "epoch": 0.860940148068861, "grad_norm": 0.45570921897888184, "learning_rate": 0.00030414600369716636, "loss": 0.7521, "step": 4826 }, { "epoch": 0.8611185442868611, "grad_norm": 2.225616216659546, "learning_rate": 0.0003040776022908273, "loss": 0.8446, "step": 4827 }, { "epoch": 0.8612969405048613, "grad_norm": 0.5153058767318726, "learning_rate": 0.00030400919663725655, "loss": 1.0429, "step": 4828 }, { "epoch": 0.8614753367228615, "grad_norm": 0.44439637660980225, "learning_rate": 0.00030394078674182684, "loss": 0.8713, "step": 4829 }, { "epoch": 0.8616537329408617, "grad_norm": 0.5092588663101196, "learning_rate": 0.0003038723726099109, "loss": 0.7983, "step": 4830 }, { "epoch": 0.8618321291588619, "grad_norm": 0.5052730441093445, "learning_rate": 0.000303803954246882, "loss": 0.9045, "step": 4831 }, { "epoch": 0.862010525376862, "grad_norm": 0.5321127772331238, "learning_rate": 0.00030373553165811377, "loss": 0.9417, "step": 4832 }, { "epoch": 0.8621889215948622, "grad_norm": 0.4712226688861847, "learning_rate": 0.00030366710484897984, "loss": 0.8645, "step": 4833 }, { "epoch": 0.8623673178128624, "grad_norm": 0.5059930682182312, "learning_rate": 0.0003035986738248547, "loss": 0.9814, "step": 4834 }, { "epoch": 0.8625457140308626, "grad_norm": 0.4837188124656677, "learning_rate": 0.00030353023859111284, "loss": 0.8914, "step": 4835 }, { "epoch": 0.8627241102488628, "grad_norm": 0.4856887757778168, "learning_rate": 0.0003034617991531289, "loss": 0.9607, "step": 4836 }, { "epoch": 0.862902506466863, "grad_norm": 0.5175963044166565, "learning_rate": 0.0003033933555162784, "loss": 0.9011, "step": 4837 }, { "epoch": 0.863080902684863, "grad_norm": 0.4169839024543762, "learning_rate": 0.0003033249076859367, "loss": 0.7326, "step": 4838 }, { "epoch": 0.8632592989028632, "grad_norm": 0.4630539119243622, "learning_rate": 0.0003032564556674797, "loss": 0.7955, "step": 4839 }, { "epoch": 0.8634376951208634, "grad_norm": 0.47739243507385254, "learning_rate": 0.0003031879994662836, "loss": 0.8976, "step": 4840 }, { "epoch": 0.8636160913388636, "grad_norm": 0.5732921957969666, "learning_rate": 0.00030311953908772495, "loss": 0.9195, "step": 4841 }, { "epoch": 0.8637944875568638, "grad_norm": 0.45784977078437805, "learning_rate": 0.0003030510745371805, "loss": 0.8676, "step": 4842 }, { "epoch": 0.8639728837748639, "grad_norm": 0.40974244475364685, "learning_rate": 0.00030298260582002753, "loss": 0.6207, "step": 4843 }, { "epoch": 0.8641512799928641, "grad_norm": 0.5293233394622803, "learning_rate": 0.00030291413294164336, "loss": 0.8833, "step": 4844 }, { "epoch": 0.8643296762108643, "grad_norm": 0.5298712253570557, "learning_rate": 0.00030284565590740607, "loss": 0.8459, "step": 4845 }, { "epoch": 0.8645080724288645, "grad_norm": 0.5469498038291931, "learning_rate": 0.00030277717472269373, "loss": 0.8662, "step": 4846 }, { "epoch": 0.8646864686468647, "grad_norm": 0.5502684116363525, "learning_rate": 0.00030270868939288474, "loss": 0.7796, "step": 4847 }, { "epoch": 0.8648648648648649, "grad_norm": 0.5410329699516296, "learning_rate": 0.00030264019992335805, "loss": 0.6923, "step": 4848 }, { "epoch": 0.865043261082865, "grad_norm": 0.5078060030937195, "learning_rate": 0.00030257170631949265, "loss": 0.8679, "step": 4849 }, { "epoch": 0.8652216573008652, "grad_norm": 0.4987890422344208, "learning_rate": 0.0003025032085866681, "loss": 0.8628, "step": 4850 }, { "epoch": 0.8654000535188654, "grad_norm": 0.5421337485313416, "learning_rate": 0.000302434706730264, "loss": 0.8687, "step": 4851 }, { "epoch": 0.8655784497368656, "grad_norm": 0.4919837415218353, "learning_rate": 0.0003023662007556607, "loss": 0.9298, "step": 4852 }, { "epoch": 0.8657568459548658, "grad_norm": 0.4684261381626129, "learning_rate": 0.0003022976906682385, "loss": 0.7224, "step": 4853 }, { "epoch": 0.8659352421728659, "grad_norm": 0.5145940780639648, "learning_rate": 0.00030222917647337833, "loss": 0.9305, "step": 4854 }, { "epoch": 0.8661136383908661, "grad_norm": 0.5013492107391357, "learning_rate": 0.00030216065817646097, "loss": 0.9581, "step": 4855 }, { "epoch": 0.8662920346088663, "grad_norm": 0.4868033528327942, "learning_rate": 0.000302092135782868, "loss": 0.9121, "step": 4856 }, { "epoch": 0.8664704308268665, "grad_norm": 0.5112666487693787, "learning_rate": 0.0003020236092979811, "loss": 0.9431, "step": 4857 }, { "epoch": 0.8666488270448667, "grad_norm": 0.588441789150238, "learning_rate": 0.0003019550787271823, "loss": 0.9898, "step": 4858 }, { "epoch": 0.8668272232628669, "grad_norm": 0.42368215322494507, "learning_rate": 0.00030188654407585394, "loss": 0.6658, "step": 4859 }, { "epoch": 0.867005619480867, "grad_norm": 0.4669845700263977, "learning_rate": 0.00030181800534937874, "loss": 0.9835, "step": 4860 }, { "epoch": 0.8671840156988672, "grad_norm": 0.8458566665649414, "learning_rate": 0.00030174946255313986, "loss": 0.8751, "step": 4861 }, { "epoch": 0.8673624119168674, "grad_norm": 0.43754637241363525, "learning_rate": 0.0003016809156925203, "loss": 0.7733, "step": 4862 }, { "epoch": 0.8675408081348676, "grad_norm": 0.539728045463562, "learning_rate": 0.00030161236477290387, "loss": 1.0081, "step": 4863 }, { "epoch": 0.8677192043528678, "grad_norm": 0.6148440837860107, "learning_rate": 0.00030154380979967457, "loss": 1.0732, "step": 4864 }, { "epoch": 0.8678976005708678, "grad_norm": 0.5512346625328064, "learning_rate": 0.0003014752507782166, "loss": 0.9659, "step": 4865 }, { "epoch": 0.868075996788868, "grad_norm": 0.4736751616001129, "learning_rate": 0.0003014066877139146, "loss": 0.567, "step": 4866 }, { "epoch": 0.8682543930068682, "grad_norm": 0.9616851806640625, "learning_rate": 0.00030133812061215346, "loss": 0.7497, "step": 4867 }, { "epoch": 0.8684327892248684, "grad_norm": 0.5108407139778137, "learning_rate": 0.00030126954947831843, "loss": 0.954, "step": 4868 }, { "epoch": 0.8686111854428686, "grad_norm": 0.5192012786865234, "learning_rate": 0.000301200974317795, "loss": 0.8119, "step": 4869 }, { "epoch": 0.8687895816608688, "grad_norm": 0.5100924372673035, "learning_rate": 0.0003011323951359692, "loss": 0.9276, "step": 4870 }, { "epoch": 0.8689679778788689, "grad_norm": 0.46075180172920227, "learning_rate": 0.00030106381193822695, "loss": 0.764, "step": 4871 }, { "epoch": 0.8691463740968691, "grad_norm": 0.45364946126937866, "learning_rate": 0.000300995224729955, "loss": 0.7632, "step": 4872 }, { "epoch": 0.8693247703148693, "grad_norm": 0.5815824270248413, "learning_rate": 0.00030092663351654, "loss": 0.9709, "step": 4873 }, { "epoch": 0.8695031665328695, "grad_norm": 0.4578329920768738, "learning_rate": 0.00030085803830336903, "loss": 0.6986, "step": 4874 }, { "epoch": 0.8696815627508697, "grad_norm": 0.9438912272453308, "learning_rate": 0.0003007894390958297, "loss": 0.9468, "step": 4875 }, { "epoch": 0.8698599589688698, "grad_norm": 1.1074215173721313, "learning_rate": 0.0003007208358993097, "loss": 0.9185, "step": 4876 }, { "epoch": 0.87003835518687, "grad_norm": 2.9237184524536133, "learning_rate": 0.00030065222871919706, "loss": 1.0407, "step": 4877 }, { "epoch": 0.8702167514048702, "grad_norm": 0.5479034185409546, "learning_rate": 0.00030058361756088014, "loss": 1.0267, "step": 4878 }, { "epoch": 0.8703951476228704, "grad_norm": 0.5139360427856445, "learning_rate": 0.0003005150024297477, "loss": 0.8951, "step": 4879 }, { "epoch": 0.8705735438408706, "grad_norm": 0.48596107959747314, "learning_rate": 0.00030044638333118873, "loss": 0.8362, "step": 4880 }, { "epoch": 0.8707519400588708, "grad_norm": 0.5204296708106995, "learning_rate": 0.00030037776027059247, "loss": 0.7555, "step": 4881 }, { "epoch": 0.8709303362768709, "grad_norm": 0.49391311407089233, "learning_rate": 0.00030030913325334864, "loss": 0.9305, "step": 4882 }, { "epoch": 0.8711087324948711, "grad_norm": 0.4564768970012665, "learning_rate": 0.00030024050228484714, "loss": 0.7117, "step": 4883 }, { "epoch": 0.8712871287128713, "grad_norm": 0.434636652469635, "learning_rate": 0.00030017186737047813, "loss": 0.7125, "step": 4884 }, { "epoch": 0.8714655249308715, "grad_norm": 0.542251706123352, "learning_rate": 0.00030010322851563233, "loss": 1.061, "step": 4885 }, { "epoch": 0.8716439211488717, "grad_norm": 0.5167363882064819, "learning_rate": 0.0003000345857257005, "loss": 0.7555, "step": 4886 }, { "epoch": 0.8718223173668718, "grad_norm": 0.4324584901332855, "learning_rate": 0.0002999659390060738, "loss": 0.6161, "step": 4887 }, { "epoch": 0.872000713584872, "grad_norm": 0.49977007508277893, "learning_rate": 0.0002998972883621439, "loss": 0.7964, "step": 4888 }, { "epoch": 0.8721791098028722, "grad_norm": 0.5305691361427307, "learning_rate": 0.00029982863379930224, "loss": 1.1202, "step": 4889 }, { "epoch": 0.8723575060208724, "grad_norm": 0.5107898712158203, "learning_rate": 0.0002997599753229412, "loss": 1.0624, "step": 4890 }, { "epoch": 0.8725359022388726, "grad_norm": 0.5440460443496704, "learning_rate": 0.00029969131293845313, "loss": 1.0675, "step": 4891 }, { "epoch": 0.8727142984568728, "grad_norm": 0.48299506306648254, "learning_rate": 0.00029962264665123076, "loss": 0.8612, "step": 4892 }, { "epoch": 0.8728926946748728, "grad_norm": 0.49671709537506104, "learning_rate": 0.000299553976466667, "loss": 0.9027, "step": 4893 }, { "epoch": 0.873071090892873, "grad_norm": 0.5757230520248413, "learning_rate": 0.00029948530239015534, "loss": 0.9273, "step": 4894 }, { "epoch": 0.8732494871108732, "grad_norm": 0.5014898180961609, "learning_rate": 0.0002994166244270893, "loss": 0.9075, "step": 4895 }, { "epoch": 0.8734278833288734, "grad_norm": 0.5563370585441589, "learning_rate": 0.0002993479425828628, "loss": 1.0369, "step": 4896 }, { "epoch": 0.8736062795468736, "grad_norm": 0.48165515065193176, "learning_rate": 0.00029927925686287006, "loss": 0.7797, "step": 4897 }, { "epoch": 0.8737846757648737, "grad_norm": 0.5211619734764099, "learning_rate": 0.0002992105672725058, "loss": 0.8904, "step": 4898 }, { "epoch": 0.8739630719828739, "grad_norm": 0.5099582672119141, "learning_rate": 0.00029914187381716473, "loss": 0.7874, "step": 4899 }, { "epoch": 0.8741414682008741, "grad_norm": 0.48764798045158386, "learning_rate": 0.00029907317650224204, "loss": 0.6851, "step": 4900 }, { "epoch": 0.8743198644188743, "grad_norm": 0.5231783986091614, "learning_rate": 0.0002990044753331332, "loss": 0.7969, "step": 4901 }, { "epoch": 0.8744982606368745, "grad_norm": 0.5043377876281738, "learning_rate": 0.00029893577031523403, "loss": 0.7843, "step": 4902 }, { "epoch": 0.8746766568548747, "grad_norm": 0.4754558801651001, "learning_rate": 0.0002988670614539404, "loss": 0.7068, "step": 4903 }, { "epoch": 0.8748550530728748, "grad_norm": 0.4813019037246704, "learning_rate": 0.0002987983487546488, "loss": 0.8119, "step": 4904 }, { "epoch": 0.875033449290875, "grad_norm": 0.5080868601799011, "learning_rate": 0.0002987296322227559, "loss": 0.993, "step": 4905 }, { "epoch": 0.8752118455088752, "grad_norm": 0.5094708800315857, "learning_rate": 0.00029866091186365865, "loss": 1.0395, "step": 4906 }, { "epoch": 0.8753902417268754, "grad_norm": 0.5148309469223022, "learning_rate": 0.0002985921876827544, "loss": 0.8391, "step": 4907 }, { "epoch": 0.8755686379448756, "grad_norm": 0.47196096181869507, "learning_rate": 0.00029852345968544057, "loss": 0.7106, "step": 4908 }, { "epoch": 0.8757470341628757, "grad_norm": 0.5898258090019226, "learning_rate": 0.00029845472787711516, "loss": 0.8076, "step": 4909 }, { "epoch": 0.8759254303808759, "grad_norm": 0.5204131007194519, "learning_rate": 0.0002983859922631762, "loss": 0.7482, "step": 4910 }, { "epoch": 0.8761038265988761, "grad_norm": 0.4837625026702881, "learning_rate": 0.0002983172528490223, "loss": 0.8764, "step": 4911 }, { "epoch": 0.8762822228168763, "grad_norm": 0.46985456347465515, "learning_rate": 0.00029824850964005215, "loss": 0.6421, "step": 4912 }, { "epoch": 0.8764606190348765, "grad_norm": 0.8495760560035706, "learning_rate": 0.00029817976264166475, "loss": 0.9803, "step": 4913 }, { "epoch": 0.8766390152528767, "grad_norm": 0.5043582916259766, "learning_rate": 0.00029811101185925955, "loss": 1.0006, "step": 4914 }, { "epoch": 0.8768174114708768, "grad_norm": 13.705134391784668, "learning_rate": 0.00029804225729823615, "loss": 0.9303, "step": 4915 }, { "epoch": 0.876995807688877, "grad_norm": 0.5199925899505615, "learning_rate": 0.00029797349896399457, "loss": 0.8513, "step": 4916 }, { "epoch": 0.8771742039068772, "grad_norm": 0.5620130300521851, "learning_rate": 0.000297904736861935, "loss": 0.7335, "step": 4917 }, { "epoch": 0.8773526001248774, "grad_norm": 1.1893454790115356, "learning_rate": 0.0002978359709974581, "loss": 0.9265, "step": 4918 }, { "epoch": 0.8775309963428776, "grad_norm": 0.9579685926437378, "learning_rate": 0.0002977672013759645, "loss": 0.7829, "step": 4919 }, { "epoch": 0.8777093925608777, "grad_norm": 0.8897736668586731, "learning_rate": 0.0002976984280028556, "loss": 0.8619, "step": 4920 }, { "epoch": 0.8778877887788779, "grad_norm": 0.6339399814605713, "learning_rate": 0.00029762965088353256, "loss": 0.9676, "step": 4921 }, { "epoch": 0.878066184996878, "grad_norm": 0.5451995134353638, "learning_rate": 0.00029756087002339734, "loss": 1.0137, "step": 4922 }, { "epoch": 0.8782445812148783, "grad_norm": 0.4819452166557312, "learning_rate": 0.00029749208542785175, "loss": 0.8346, "step": 4923 }, { "epoch": 0.8784229774328784, "grad_norm": 0.500859260559082, "learning_rate": 0.0002974232971022983, "loss": 0.7695, "step": 4924 }, { "epoch": 0.8786013736508786, "grad_norm": 0.7881564497947693, "learning_rate": 0.00029735450505213943, "loss": 1.0873, "step": 4925 }, { "epoch": 0.8787797698688787, "grad_norm": 1.3103078603744507, "learning_rate": 0.0002972857092827781, "loss": 0.889, "step": 4926 }, { "epoch": 0.8789581660868789, "grad_norm": 0.5103085041046143, "learning_rate": 0.00029721690979961764, "loss": 0.7738, "step": 4927 }, { "epoch": 0.8791365623048791, "grad_norm": 0.7508847117424011, "learning_rate": 0.0002971481066080613, "loss": 0.7429, "step": 4928 }, { "epoch": 0.8793149585228793, "grad_norm": 0.5857597589492798, "learning_rate": 0.000297079299713513, "loss": 0.9272, "step": 4929 }, { "epoch": 0.8794933547408795, "grad_norm": 0.5782235264778137, "learning_rate": 0.00029701048912137676, "loss": 0.9134, "step": 4930 }, { "epoch": 0.8796717509588796, "grad_norm": 0.5405158400535583, "learning_rate": 0.00029694167483705684, "loss": 0.8666, "step": 4931 }, { "epoch": 0.8798501471768798, "grad_norm": 0.5647017359733582, "learning_rate": 0.0002968728568659581, "loss": 1.007, "step": 4932 }, { "epoch": 0.88002854339488, "grad_norm": 0.537854015827179, "learning_rate": 0.0002968040352134853, "loss": 0.9957, "step": 4933 }, { "epoch": 0.8802069396128802, "grad_norm": 0.5491927862167358, "learning_rate": 0.00029673520988504376, "loss": 0.9025, "step": 4934 }, { "epoch": 0.8803853358308804, "grad_norm": 0.5295958518981934, "learning_rate": 0.0002966663808860389, "loss": 0.9705, "step": 4935 }, { "epoch": 0.8805637320488806, "grad_norm": 0.5388423800468445, "learning_rate": 0.0002965975482218766, "loss": 0.7576, "step": 4936 }, { "epoch": 0.8807421282668807, "grad_norm": 0.5519341230392456, "learning_rate": 0.00029652871189796284, "loss": 0.8774, "step": 4937 }, { "epoch": 0.8809205244848809, "grad_norm": 0.5609647035598755, "learning_rate": 0.00029645987191970414, "loss": 0.9987, "step": 4938 }, { "epoch": 0.8810989207028811, "grad_norm": 0.47703075408935547, "learning_rate": 0.000296391028292507, "loss": 0.7438, "step": 4939 }, { "epoch": 0.8812773169208813, "grad_norm": 0.5322765111923218, "learning_rate": 0.0002963221810217786, "loss": 1.1033, "step": 4940 }, { "epoch": 0.8814557131388815, "grad_norm": 0.4712493121623993, "learning_rate": 0.000296253330112926, "loss": 0.7569, "step": 4941 }, { "epoch": 0.8816341093568816, "grad_norm": 0.5673714876174927, "learning_rate": 0.00029618447557135677, "loss": 0.7299, "step": 4942 }, { "epoch": 0.8818125055748818, "grad_norm": 0.5213398337364197, "learning_rate": 0.00029611561740247854, "loss": 0.828, "step": 4943 }, { "epoch": 0.881990901792882, "grad_norm": 0.4355573058128357, "learning_rate": 0.0002960467556116997, "loss": 0.6324, "step": 4944 }, { "epoch": 0.8821692980108822, "grad_norm": 0.53075110912323, "learning_rate": 0.0002959778902044285, "loss": 0.852, "step": 4945 }, { "epoch": 0.8823476942288824, "grad_norm": 0.5229327082633972, "learning_rate": 0.00029590902118607353, "loss": 0.8341, "step": 4946 }, { "epoch": 0.8825260904468826, "grad_norm": 0.5234019160270691, "learning_rate": 0.00029584014856204387, "loss": 0.9364, "step": 4947 }, { "epoch": 0.8827044866648827, "grad_norm": 0.5076524019241333, "learning_rate": 0.0002957712723377487, "loss": 0.8151, "step": 4948 }, { "epoch": 0.8828828828828829, "grad_norm": 0.4554321765899658, "learning_rate": 0.00029570239251859744, "loss": 0.8135, "step": 4949 }, { "epoch": 0.8830612791008831, "grad_norm": 0.47473978996276855, "learning_rate": 0.0002956335091099999, "loss": 0.7501, "step": 4950 }, { "epoch": 0.8832396753188833, "grad_norm": 0.4464404284954071, "learning_rate": 0.00029556462211736614, "loss": 0.7138, "step": 4951 }, { "epoch": 0.8834180715368835, "grad_norm": 0.5105646848678589, "learning_rate": 0.0002954957315461066, "loss": 0.9071, "step": 4952 }, { "epoch": 0.8835964677548835, "grad_norm": 0.49081435799598694, "learning_rate": 0.00029542683740163203, "loss": 0.9046, "step": 4953 }, { "epoch": 0.8837748639728837, "grad_norm": 0.45001259446144104, "learning_rate": 0.0002953579396893531, "loss": 0.7903, "step": 4954 }, { "epoch": 0.8839532601908839, "grad_norm": 0.512221097946167, "learning_rate": 0.00029528903841468106, "loss": 0.7811, "step": 4955 }, { "epoch": 0.8841316564088841, "grad_norm": 0.9528411030769348, "learning_rate": 0.0002952201335830275, "loss": 0.9261, "step": 4956 }, { "epoch": 0.8843100526268843, "grad_norm": 0.5219822525978088, "learning_rate": 0.00029515122519980407, "loss": 0.7963, "step": 4957 }, { "epoch": 0.8844884488448845, "grad_norm": 0.4960407614707947, "learning_rate": 0.0002950823132704228, "loss": 0.8326, "step": 4958 }, { "epoch": 0.8846668450628846, "grad_norm": 0.6182901263237, "learning_rate": 0.0002950133978002961, "loss": 0.8034, "step": 4959 }, { "epoch": 0.8848452412808848, "grad_norm": 0.5232715010643005, "learning_rate": 0.00029494447879483657, "loss": 0.9416, "step": 4960 }, { "epoch": 0.885023637498885, "grad_norm": 0.514710009098053, "learning_rate": 0.00029487555625945695, "loss": 0.8282, "step": 4961 }, { "epoch": 0.8852020337168852, "grad_norm": 0.49220260977745056, "learning_rate": 0.0002948066301995704, "loss": 0.7967, "step": 4962 }, { "epoch": 0.8853804299348854, "grad_norm": 0.5210900902748108, "learning_rate": 0.0002947377006205905, "loss": 0.8346, "step": 4963 }, { "epoch": 0.8855588261528856, "grad_norm": 0.5323128700256348, "learning_rate": 0.0002946687675279308, "loss": 0.7406, "step": 4964 }, { "epoch": 0.8857372223708857, "grad_norm": 0.4670845866203308, "learning_rate": 0.0002945998309270053, "loss": 0.6944, "step": 4965 }, { "epoch": 0.8859156185888859, "grad_norm": 0.4710376262664795, "learning_rate": 0.0002945308908232283, "loss": 0.6828, "step": 4966 }, { "epoch": 0.8860940148068861, "grad_norm": 0.4978684186935425, "learning_rate": 0.0002944619472220143, "loss": 0.8463, "step": 4967 }, { "epoch": 0.8862724110248863, "grad_norm": 0.5263177156448364, "learning_rate": 0.0002943930001287781, "loss": 0.8929, "step": 4968 }, { "epoch": 0.8864508072428865, "grad_norm": 0.5331815481185913, "learning_rate": 0.0002943240495489348, "loss": 1.123, "step": 4969 }, { "epoch": 0.8866292034608866, "grad_norm": 0.5104199051856995, "learning_rate": 0.00029425509548789965, "loss": 0.7605, "step": 4970 }, { "epoch": 0.8868075996788868, "grad_norm": 0.5087189674377441, "learning_rate": 0.00029418613795108837, "loss": 0.8925, "step": 4971 }, { "epoch": 0.886985995896887, "grad_norm": 0.4605477750301361, "learning_rate": 0.0002941171769439168, "loss": 0.8883, "step": 4972 }, { "epoch": 0.8871643921148872, "grad_norm": 0.476589173078537, "learning_rate": 0.0002940482124718012, "loss": 0.8115, "step": 4973 }, { "epoch": 0.8873427883328874, "grad_norm": 0.5065289735794067, "learning_rate": 0.00029397924454015797, "loss": 0.9306, "step": 4974 }, { "epoch": 0.8875211845508876, "grad_norm": 0.5235819220542908, "learning_rate": 0.0002939102731544037, "loss": 1.0201, "step": 4975 }, { "epoch": 0.8876995807688877, "grad_norm": 0.6401598453521729, "learning_rate": 0.0002938412983199555, "loss": 0.9119, "step": 4976 }, { "epoch": 0.8878779769868879, "grad_norm": 0.5362576246261597, "learning_rate": 0.00029377232004223065, "loss": 1.006, "step": 4977 }, { "epoch": 0.8880563732048881, "grad_norm": 0.4627399146556854, "learning_rate": 0.00029370333832664657, "loss": 0.7415, "step": 4978 }, { "epoch": 0.8882347694228883, "grad_norm": 0.46277347207069397, "learning_rate": 0.0002936343531786212, "loss": 0.8446, "step": 4979 }, { "epoch": 0.8884131656408885, "grad_norm": 0.48642510175704956, "learning_rate": 0.0002935653646035724, "loss": 0.9397, "step": 4980 }, { "epoch": 0.8885915618588885, "grad_norm": 0.4110255837440491, "learning_rate": 0.00029349637260691865, "loss": 0.7168, "step": 4981 }, { "epoch": 0.8887699580768887, "grad_norm": 0.47086673974990845, "learning_rate": 0.0002934273771940785, "loss": 0.7632, "step": 4982 }, { "epoch": 0.8889483542948889, "grad_norm": 0.48255836963653564, "learning_rate": 0.0002933583783704709, "loss": 0.937, "step": 4983 }, { "epoch": 0.8891267505128891, "grad_norm": 0.44749948382377625, "learning_rate": 0.00029328937614151487, "loss": 0.7854, "step": 4984 }, { "epoch": 0.8893051467308893, "grad_norm": 0.5080204010009766, "learning_rate": 0.0002932203705126298, "loss": 0.7553, "step": 4985 }, { "epoch": 0.8894835429488895, "grad_norm": 0.47025632858276367, "learning_rate": 0.0002931513614892355, "loss": 0.8807, "step": 4986 }, { "epoch": 0.8896619391668896, "grad_norm": 0.46297869086265564, "learning_rate": 0.0002930823490767519, "loss": 0.8596, "step": 4987 }, { "epoch": 0.8898403353848898, "grad_norm": 0.413412868976593, "learning_rate": 0.0002930133332805991, "loss": 0.7766, "step": 4988 }, { "epoch": 0.89001873160289, "grad_norm": 0.4820116460323334, "learning_rate": 0.0002929443141061975, "loss": 0.8289, "step": 4989 }, { "epoch": 0.8901971278208902, "grad_norm": 0.48540276288986206, "learning_rate": 0.00029287529155896805, "loss": 0.9301, "step": 4990 }, { "epoch": 0.8903755240388904, "grad_norm": 1.949074149131775, "learning_rate": 0.0002928062656443317, "loss": 0.6227, "step": 4991 }, { "epoch": 0.8905539202568905, "grad_norm": 2.410193920135498, "learning_rate": 0.00029273723636770953, "loss": 0.699, "step": 4992 }, { "epoch": 0.8907323164748907, "grad_norm": 2.254347562789917, "learning_rate": 0.00029266820373452334, "loss": 0.7326, "step": 4993 }, { "epoch": 0.8909107126928909, "grad_norm": 0.8029910326004028, "learning_rate": 0.00029259916775019475, "loss": 0.8962, "step": 4994 }, { "epoch": 0.8910891089108911, "grad_norm": 0.6749674081802368, "learning_rate": 0.0002925301284201458, "loss": 0.9521, "step": 4995 }, { "epoch": 0.8912675051288913, "grad_norm": 0.700823962688446, "learning_rate": 0.00029246108574979896, "loss": 0.8662, "step": 4996 }, { "epoch": 0.8914459013468915, "grad_norm": 0.7377640604972839, "learning_rate": 0.0002923920397445766, "loss": 0.9585, "step": 4997 }, { "epoch": 0.8916242975648916, "grad_norm": 0.5655547380447388, "learning_rate": 0.00029232299040990174, "loss": 0.8825, "step": 4998 }, { "epoch": 0.8918026937828918, "grad_norm": 0.4939744472503662, "learning_rate": 0.0002922539377511974, "loss": 0.7917, "step": 4999 }, { "epoch": 0.891981090000892, "grad_norm": 0.5311207175254822, "learning_rate": 0.00029218488177388705, "loss": 1.0183, "step": 5000 }, { "epoch": 0.8921594862188922, "grad_norm": 0.5403327345848083, "learning_rate": 0.00029211582248339424, "loss": 0.7361, "step": 5001 }, { "epoch": 0.8923378824368924, "grad_norm": 0.47330448031425476, "learning_rate": 0.0002920467598851428, "loss": 0.6321, "step": 5002 }, { "epoch": 0.8925162786548925, "grad_norm": 0.5282453298568726, "learning_rate": 0.0002919776939845569, "loss": 0.8446, "step": 5003 }, { "epoch": 0.8926946748728927, "grad_norm": 0.48607364296913147, "learning_rate": 0.000291908624787061, "loss": 0.8299, "step": 5004 }, { "epoch": 0.8928730710908929, "grad_norm": 0.43826302886009216, "learning_rate": 0.0002918395522980798, "loss": 0.8356, "step": 5005 }, { "epoch": 0.8930514673088931, "grad_norm": 0.4682931900024414, "learning_rate": 0.00029177047652303816, "loss": 0.835, "step": 5006 }, { "epoch": 0.8932298635268933, "grad_norm": 0.6613882780075073, "learning_rate": 0.0002917013974673612, "loss": 0.7867, "step": 5007 }, { "epoch": 0.8934082597448935, "grad_norm": 0.41586834192276, "learning_rate": 0.00029163231513647454, "loss": 0.772, "step": 5008 }, { "epoch": 0.8935866559628936, "grad_norm": 0.5220624804496765, "learning_rate": 0.00029156322953580367, "loss": 0.8566, "step": 5009 }, { "epoch": 0.8937650521808937, "grad_norm": 0.48346883058547974, "learning_rate": 0.00029149414067077467, "loss": 0.9499, "step": 5010 }, { "epoch": 0.893943448398894, "grad_norm": 0.4970376789569855, "learning_rate": 0.00029142504854681375, "loss": 1.0051, "step": 5011 }, { "epoch": 0.8941218446168941, "grad_norm": 0.532818615436554, "learning_rate": 0.0002913559531693472, "loss": 0.8754, "step": 5012 }, { "epoch": 0.8943002408348943, "grad_norm": 0.4270954430103302, "learning_rate": 0.00029128685454380207, "loss": 0.6813, "step": 5013 }, { "epoch": 0.8944786370528944, "grad_norm": 0.5017000436782837, "learning_rate": 0.0002912177526756051, "loss": 0.9326, "step": 5014 }, { "epoch": 0.8946570332708946, "grad_norm": 0.5018951892852783, "learning_rate": 0.0002911486475701835, "loss": 0.867, "step": 5015 }, { "epoch": 0.8948354294888948, "grad_norm": 0.5230339765548706, "learning_rate": 0.0002910795392329649, "loss": 0.8305, "step": 5016 }, { "epoch": 0.895013825706895, "grad_norm": 0.4868851900100708, "learning_rate": 0.00029101042766937693, "loss": 0.8635, "step": 5017 }, { "epoch": 0.8951922219248952, "grad_norm": 0.5056290626525879, "learning_rate": 0.0002909413128848476, "loss": 0.861, "step": 5018 }, { "epoch": 0.8953706181428954, "grad_norm": 0.5414198040962219, "learning_rate": 0.0002908721948848052, "loss": 0.749, "step": 5019 }, { "epoch": 0.8955490143608955, "grad_norm": 0.4794885814189911, "learning_rate": 0.00029080307367467824, "loss": 0.7712, "step": 5020 }, { "epoch": 0.8957274105788957, "grad_norm": 0.454140841960907, "learning_rate": 0.0002907339492598954, "loss": 0.7231, "step": 5021 }, { "epoch": 0.8959058067968959, "grad_norm": 0.5277546644210815, "learning_rate": 0.0002906648216458857, "loss": 0.8037, "step": 5022 }, { "epoch": 0.8960842030148961, "grad_norm": 0.5510913729667664, "learning_rate": 0.0002905956908380784, "loss": 0.972, "step": 5023 }, { "epoch": 0.8962625992328963, "grad_norm": 0.4881914556026459, "learning_rate": 0.00029052655684190304, "loss": 0.8542, "step": 5024 }, { "epoch": 0.8964409954508964, "grad_norm": 0.5819581151008606, "learning_rate": 0.0002904574196627893, "loss": 0.6731, "step": 5025 }, { "epoch": 0.8966193916688966, "grad_norm": 0.48652219772338867, "learning_rate": 0.0002903882793061673, "loss": 0.7228, "step": 5026 }, { "epoch": 0.8967977878868968, "grad_norm": 0.49010154604911804, "learning_rate": 0.00029031913577746716, "loss": 0.7869, "step": 5027 }, { "epoch": 0.896976184104897, "grad_norm": 0.4732781648635864, "learning_rate": 0.00029024998908211945, "loss": 0.8617, "step": 5028 }, { "epoch": 0.8971545803228972, "grad_norm": 0.47294124960899353, "learning_rate": 0.000290180839225555, "loss": 1.0253, "step": 5029 }, { "epoch": 0.8973329765408974, "grad_norm": 0.4338665306568146, "learning_rate": 0.00029011168621320466, "loss": 0.6808, "step": 5030 }, { "epoch": 0.8975113727588975, "grad_norm": 0.5743736028671265, "learning_rate": 0.00029004253005049976, "loss": 0.6331, "step": 5031 }, { "epoch": 0.8976897689768977, "grad_norm": 0.4324184060096741, "learning_rate": 0.0002899733707428718, "loss": 0.7521, "step": 5032 }, { "epoch": 0.8978681651948979, "grad_norm": 0.95427405834198, "learning_rate": 0.0002899042082957525, "loss": 0.8488, "step": 5033 }, { "epoch": 0.8980465614128981, "grad_norm": 0.4829496741294861, "learning_rate": 0.00028983504271457385, "loss": 0.8179, "step": 5034 }, { "epoch": 0.8982249576308983, "grad_norm": 0.47819754481315613, "learning_rate": 0.000289765874004768, "loss": 0.8563, "step": 5035 }, { "epoch": 0.8984033538488984, "grad_norm": 0.5096835494041443, "learning_rate": 0.0002896967021717676, "loss": 0.7857, "step": 5036 }, { "epoch": 0.8985817500668986, "grad_norm": 0.47968176007270813, "learning_rate": 0.0002896275272210053, "loss": 0.7974, "step": 5037 }, { "epoch": 0.8987601462848988, "grad_norm": 0.38809946179389954, "learning_rate": 0.00028955834915791404, "loss": 0.5523, "step": 5038 }, { "epoch": 0.898938542502899, "grad_norm": 0.47436532378196716, "learning_rate": 0.0002894891679879271, "loss": 0.6803, "step": 5039 }, { "epoch": 0.8991169387208992, "grad_norm": 0.4444058835506439, "learning_rate": 0.0002894199837164779, "loss": 0.6817, "step": 5040 }, { "epoch": 0.8992953349388993, "grad_norm": 0.4691022038459778, "learning_rate": 0.00028935079634900016, "loss": 0.8635, "step": 5041 }, { "epoch": 0.8994737311568994, "grad_norm": 0.4624834358692169, "learning_rate": 0.0002892816058909277, "loss": 0.8735, "step": 5042 }, { "epoch": 0.8996521273748996, "grad_norm": 0.4934956431388855, "learning_rate": 0.00028921241234769484, "loss": 1.0094, "step": 5043 }, { "epoch": 0.8998305235928998, "grad_norm": 0.481513649225235, "learning_rate": 0.000289143215724736, "loss": 0.8711, "step": 5044 }, { "epoch": 0.9000089198109, "grad_norm": 0.47422125935554504, "learning_rate": 0.0002890740160274859, "loss": 0.8002, "step": 5045 }, { "epoch": 0.9001873160289002, "grad_norm": 0.5127121806144714, "learning_rate": 0.00028900481326137945, "loss": 0.7709, "step": 5046 }, { "epoch": 0.9003657122469003, "grad_norm": 0.4381260275840759, "learning_rate": 0.00028893560743185166, "loss": 0.693, "step": 5047 }, { "epoch": 0.9005441084649005, "grad_norm": 0.49497178196907043, "learning_rate": 0.000288866398544338, "loss": 0.7078, "step": 5048 }, { "epoch": 0.9007225046829007, "grad_norm": 0.511747419834137, "learning_rate": 0.00028879718660427417, "loss": 0.8916, "step": 5049 }, { "epoch": 0.9009009009009009, "grad_norm": 0.48092395067214966, "learning_rate": 0.00028872797161709593, "loss": 0.8367, "step": 5050 }, { "epoch": 0.9010792971189011, "grad_norm": 0.4676063060760498, "learning_rate": 0.0002886587535882395, "loss": 0.7425, "step": 5051 }, { "epoch": 0.9012576933369013, "grad_norm": 0.45306262373924255, "learning_rate": 0.00028858953252314126, "loss": 0.7591, "step": 5052 }, { "epoch": 0.9014360895549014, "grad_norm": 0.5242826342582703, "learning_rate": 0.0002885203084272377, "loss": 0.922, "step": 5053 }, { "epoch": 0.9016144857729016, "grad_norm": 0.4822898209095001, "learning_rate": 0.0002884510813059657, "loss": 0.9461, "step": 5054 }, { "epoch": 0.9017928819909018, "grad_norm": 0.46535515785217285, "learning_rate": 0.0002883818511647623, "loss": 0.8912, "step": 5055 }, { "epoch": 0.901971278208902, "grad_norm": 0.4802592694759369, "learning_rate": 0.0002883126180090648, "loss": 0.9853, "step": 5056 }, { "epoch": 0.9021496744269022, "grad_norm": 0.497173547744751, "learning_rate": 0.0002882433818443109, "loss": 0.9567, "step": 5057 }, { "epoch": 0.9023280706449023, "grad_norm": 0.5876064896583557, "learning_rate": 0.00028817414267593805, "loss": 0.9769, "step": 5058 }, { "epoch": 0.9025064668629025, "grad_norm": 0.4283727705478668, "learning_rate": 0.0002881049005093846, "loss": 0.7212, "step": 5059 }, { "epoch": 0.9026848630809027, "grad_norm": 0.5137040019035339, "learning_rate": 0.0002880356553500886, "loss": 0.7794, "step": 5060 }, { "epoch": 0.9028632592989029, "grad_norm": 0.6085144281387329, "learning_rate": 0.00028796640720348866, "loss": 0.8848, "step": 5061 }, { "epoch": 0.9030416555169031, "grad_norm": 0.5343783497810364, "learning_rate": 0.0002878971560750234, "loss": 0.7601, "step": 5062 }, { "epoch": 0.9032200517349033, "grad_norm": 0.5269213914871216, "learning_rate": 0.0002878279019701318, "loss": 0.8196, "step": 5063 }, { "epoch": 0.9033984479529034, "grad_norm": 0.4757898449897766, "learning_rate": 0.00028775864489425306, "loss": 0.8186, "step": 5064 }, { "epoch": 0.9035768441709036, "grad_norm": 0.5592185258865356, "learning_rate": 0.0002876893848528266, "loss": 1.1946, "step": 5065 }, { "epoch": 0.9037552403889038, "grad_norm": 0.43977245688438416, "learning_rate": 0.0002876201218512921, "loss": 0.7215, "step": 5066 }, { "epoch": 0.903933636606904, "grad_norm": 0.49869504570961, "learning_rate": 0.0002875508558950894, "loss": 0.8863, "step": 5067 }, { "epoch": 0.9041120328249042, "grad_norm": 0.47307130694389343, "learning_rate": 0.00028748158698965867, "loss": 0.7705, "step": 5068 }, { "epoch": 0.9042904290429042, "grad_norm": 0.43691718578338623, "learning_rate": 0.00028741231514044013, "loss": 0.6947, "step": 5069 }, { "epoch": 0.9044688252609044, "grad_norm": 0.4944881200790405, "learning_rate": 0.00028734304035287454, "loss": 0.7556, "step": 5070 }, { "epoch": 0.9046472214789046, "grad_norm": 0.48639604449272156, "learning_rate": 0.00028727376263240265, "loss": 0.7911, "step": 5071 }, { "epoch": 0.9048256176969048, "grad_norm": 0.4833623468875885, "learning_rate": 0.0002872044819844654, "loss": 0.802, "step": 5072 }, { "epoch": 0.905004013914905, "grad_norm": 0.47726356983184814, "learning_rate": 0.0002871351984145042, "loss": 0.8817, "step": 5073 }, { "epoch": 0.9051824101329052, "grad_norm": 0.5050178170204163, "learning_rate": 0.0002870659119279605, "loss": 1.0378, "step": 5074 }, { "epoch": 0.9053608063509053, "grad_norm": 0.4867967367172241, "learning_rate": 0.00028699662253027606, "loss": 0.7799, "step": 5075 }, { "epoch": 0.9055392025689055, "grad_norm": 0.4266904890537262, "learning_rate": 0.00028692733022689273, "loss": 0.753, "step": 5076 }, { "epoch": 0.9057175987869057, "grad_norm": 0.4467776417732239, "learning_rate": 0.0002868580350232528, "loss": 0.6849, "step": 5077 }, { "epoch": 0.9058959950049059, "grad_norm": 0.46932464838027954, "learning_rate": 0.0002867887369247987, "loss": 1.0174, "step": 5078 }, { "epoch": 0.9060743912229061, "grad_norm": 0.5115159153938293, "learning_rate": 0.000286719435936973, "loss": 0.8624, "step": 5079 }, { "epoch": 0.9062527874409062, "grad_norm": 0.4789755046367645, "learning_rate": 0.0002866501320652186, "loss": 0.9705, "step": 5080 }, { "epoch": 0.9064311836589064, "grad_norm": 0.4835361838340759, "learning_rate": 0.0002865808253149786, "loss": 0.7777, "step": 5081 }, { "epoch": 0.9066095798769066, "grad_norm": 0.4759388566017151, "learning_rate": 0.0002865115156916963, "loss": 0.7886, "step": 5082 }, { "epoch": 0.9067879760949068, "grad_norm": 0.5399003028869629, "learning_rate": 0.0002864422032008153, "loss": 0.9909, "step": 5083 }, { "epoch": 0.906966372312907, "grad_norm": 0.5101630687713623, "learning_rate": 0.0002863728878477793, "loss": 0.932, "step": 5084 }, { "epoch": 0.9071447685309072, "grad_norm": 0.5102178454399109, "learning_rate": 0.0002863035696380324, "loss": 0.9908, "step": 5085 }, { "epoch": 0.9073231647489073, "grad_norm": 0.5420662760734558, "learning_rate": 0.0002862342485770188, "loss": 1.0111, "step": 5086 }, { "epoch": 0.9075015609669075, "grad_norm": 0.4109286665916443, "learning_rate": 0.00028616492467018286, "loss": 0.8073, "step": 5087 }, { "epoch": 0.9076799571849077, "grad_norm": 0.489951491355896, "learning_rate": 0.0002860955979229693, "loss": 0.8515, "step": 5088 }, { "epoch": 0.9078583534029079, "grad_norm": 0.45059677958488464, "learning_rate": 0.00028602626834082297, "loss": 0.7568, "step": 5089 }, { "epoch": 0.9080367496209081, "grad_norm": 0.4857634902000427, "learning_rate": 0.00028595693592918905, "loss": 0.8072, "step": 5090 }, { "epoch": 0.9082151458389082, "grad_norm": 0.4536861479282379, "learning_rate": 0.00028588760069351286, "loss": 0.7684, "step": 5091 }, { "epoch": 0.9083935420569084, "grad_norm": 0.4914097487926483, "learning_rate": 0.0002858182626392399, "loss": 0.7621, "step": 5092 }, { "epoch": 0.9085719382749086, "grad_norm": 0.4842384159564972, "learning_rate": 0.0002857489217718162, "loss": 0.8194, "step": 5093 }, { "epoch": 0.9087503344929088, "grad_norm": 0.6547600030899048, "learning_rate": 0.00028567957809668744, "loss": 0.9461, "step": 5094 }, { "epoch": 0.908928730710909, "grad_norm": 0.7207086682319641, "learning_rate": 0.00028561023161929996, "loss": 0.7638, "step": 5095 }, { "epoch": 0.9091071269289092, "grad_norm": 0.4441107511520386, "learning_rate": 0.0002855408823451002, "loss": 0.7315, "step": 5096 }, { "epoch": 0.9092855231469092, "grad_norm": 0.42800113558769226, "learning_rate": 0.00028547153027953483, "loss": 0.6776, "step": 5097 }, { "epoch": 0.9094639193649094, "grad_norm": 0.46136319637298584, "learning_rate": 0.00028540217542805075, "loss": 0.9207, "step": 5098 }, { "epoch": 0.9096423155829096, "grad_norm": 0.4710569977760315, "learning_rate": 0.0002853328177960951, "loss": 0.8327, "step": 5099 }, { "epoch": 0.9098207118009098, "grad_norm": 0.48937293887138367, "learning_rate": 0.00028526345738911514, "loss": 0.8316, "step": 5100 }, { "epoch": 0.90999910801891, "grad_norm": 0.49789008498191833, "learning_rate": 0.0002851940942125584, "loss": 0.8759, "step": 5101 }, { "epoch": 0.9101775042369101, "grad_norm": 0.4753686487674713, "learning_rate": 0.0002851247282718726, "loss": 0.8414, "step": 5102 }, { "epoch": 0.9103559004549103, "grad_norm": 0.43923425674438477, "learning_rate": 0.0002850553595725057, "loss": 0.6973, "step": 5103 }, { "epoch": 0.9105342966729105, "grad_norm": 0.4735580086708069, "learning_rate": 0.000284985988119906, "loss": 0.9447, "step": 5104 }, { "epoch": 0.9107126928909107, "grad_norm": 0.543528139591217, "learning_rate": 0.00028491661391952196, "loss": 0.9476, "step": 5105 }, { "epoch": 0.9108910891089109, "grad_norm": 0.48239144682884216, "learning_rate": 0.00028484723697680197, "loss": 0.9228, "step": 5106 }, { "epoch": 0.9110694853269111, "grad_norm": 0.49379172921180725, "learning_rate": 0.00028477785729719504, "loss": 0.8531, "step": 5107 }, { "epoch": 0.9112478815449112, "grad_norm": 0.4407452344894409, "learning_rate": 0.00028470847488615015, "loss": 0.6475, "step": 5108 }, { "epoch": 0.9114262777629114, "grad_norm": 0.4452020227909088, "learning_rate": 0.00028463908974911656, "loss": 0.6565, "step": 5109 }, { "epoch": 0.9116046739809116, "grad_norm": 0.48613160848617554, "learning_rate": 0.0002845697018915437, "loss": 0.6659, "step": 5110 }, { "epoch": 0.9117830701989118, "grad_norm": 0.43221384286880493, "learning_rate": 0.00028450031131888146, "loss": 0.7194, "step": 5111 }, { "epoch": 0.911961466416912, "grad_norm": 0.46625545620918274, "learning_rate": 0.00028443091803657955, "loss": 0.8089, "step": 5112 }, { "epoch": 0.9121398626349121, "grad_norm": 0.4777173399925232, "learning_rate": 0.0002843615220500881, "loss": 0.8622, "step": 5113 }, { "epoch": 0.9123182588529123, "grad_norm": 0.47781699895858765, "learning_rate": 0.0002842921233648576, "loss": 0.9369, "step": 5114 }, { "epoch": 0.9124966550709125, "grad_norm": 0.5193250775337219, "learning_rate": 0.0002842227219863385, "loss": 0.967, "step": 5115 }, { "epoch": 0.9126750512889127, "grad_norm": 0.46008825302124023, "learning_rate": 0.00028415331791998145, "loss": 0.9416, "step": 5116 }, { "epoch": 0.9128534475069129, "grad_norm": 0.4777912199497223, "learning_rate": 0.00028408391117123755, "loss": 0.8793, "step": 5117 }, { "epoch": 0.9130318437249131, "grad_norm": 0.45857903361320496, "learning_rate": 0.00028401450174555794, "loss": 0.908, "step": 5118 }, { "epoch": 0.9132102399429132, "grad_norm": 0.4434804320335388, "learning_rate": 0.000283945089648394, "loss": 0.6586, "step": 5119 }, { "epoch": 0.9133886361609134, "grad_norm": 0.47657546401023865, "learning_rate": 0.0002838756748851973, "loss": 0.5956, "step": 5120 }, { "epoch": 0.9135670323789136, "grad_norm": 0.4849657118320465, "learning_rate": 0.0002838062574614197, "loss": 0.9679, "step": 5121 }, { "epoch": 0.9137454285969138, "grad_norm": 0.5244645476341248, "learning_rate": 0.00028373683738251314, "loss": 1.0226, "step": 5122 }, { "epoch": 0.913923824814914, "grad_norm": 0.4713945686817169, "learning_rate": 0.00028366741465393, "loss": 0.9147, "step": 5123 }, { "epoch": 0.914102221032914, "grad_norm": 0.4651153087615967, "learning_rate": 0.00028359798928112253, "loss": 1.0022, "step": 5124 }, { "epoch": 0.9142806172509143, "grad_norm": 0.4441218972206116, "learning_rate": 0.0002835285612695434, "loss": 0.7423, "step": 5125 }, { "epoch": 0.9144590134689145, "grad_norm": 0.44443660974502563, "learning_rate": 0.00028345913062464555, "loss": 0.592, "step": 5126 }, { "epoch": 0.9146374096869146, "grad_norm": 0.431494802236557, "learning_rate": 0.00028338969735188196, "loss": 0.7291, "step": 5127 }, { "epoch": 0.9148158059049148, "grad_norm": 0.4383372366428375, "learning_rate": 0.00028332026145670594, "loss": 0.7891, "step": 5128 }, { "epoch": 0.914994202122915, "grad_norm": 0.49089357256889343, "learning_rate": 0.00028325082294457086, "loss": 0.951, "step": 5129 }, { "epoch": 0.9151725983409151, "grad_norm": 0.4921533167362213, "learning_rate": 0.00028318138182093053, "loss": 0.9203, "step": 5130 }, { "epoch": 0.9153509945589153, "grad_norm": 0.48249009251594543, "learning_rate": 0.0002831119380912387, "loss": 0.7326, "step": 5131 }, { "epoch": 0.9155293907769155, "grad_norm": 0.48014095425605774, "learning_rate": 0.00028304249176094946, "loss": 0.7696, "step": 5132 }, { "epoch": 0.9157077869949157, "grad_norm": 0.5415335297584534, "learning_rate": 0.00028297304283551725, "loss": 0.9026, "step": 5133 }, { "epoch": 0.9158861832129159, "grad_norm": 0.4653064012527466, "learning_rate": 0.00028290359132039644, "loss": 1.0105, "step": 5134 }, { "epoch": 0.916064579430916, "grad_norm": 0.482500821352005, "learning_rate": 0.00028283413722104164, "loss": 0.8853, "step": 5135 }, { "epoch": 0.9162429756489162, "grad_norm": 0.44477859139442444, "learning_rate": 0.00028276468054290785, "loss": 0.9191, "step": 5136 }, { "epoch": 0.9164213718669164, "grad_norm": 0.4712953567504883, "learning_rate": 0.00028269522129145013, "loss": 0.7727, "step": 5137 }, { "epoch": 0.9165997680849166, "grad_norm": 0.4651871621608734, "learning_rate": 0.0002826257594721238, "loss": 0.8326, "step": 5138 }, { "epoch": 0.9167781643029168, "grad_norm": 0.4735598564147949, "learning_rate": 0.00028255629509038447, "loss": 0.8486, "step": 5139 }, { "epoch": 0.916956560520917, "grad_norm": 0.4831654727458954, "learning_rate": 0.00028248682815168767, "loss": 0.7643, "step": 5140 }, { "epoch": 0.9171349567389171, "grad_norm": 0.4459700584411621, "learning_rate": 0.0002824173586614894, "loss": 0.7463, "step": 5141 }, { "epoch": 0.9173133529569173, "grad_norm": 0.5149909853935242, "learning_rate": 0.0002823478866252456, "loss": 0.9273, "step": 5142 }, { "epoch": 0.9174917491749175, "grad_norm": 0.4949319362640381, "learning_rate": 0.0002822784120484128, "loss": 0.7418, "step": 5143 }, { "epoch": 0.9176701453929177, "grad_norm": 0.47984182834625244, "learning_rate": 0.00028220893493644737, "loss": 0.7807, "step": 5144 }, { "epoch": 0.9178485416109179, "grad_norm": 0.5391241908073425, "learning_rate": 0.0002821394552948062, "loss": 0.8166, "step": 5145 }, { "epoch": 0.918026937828918, "grad_norm": 0.45041200518608093, "learning_rate": 0.0002820699731289459, "loss": 0.6369, "step": 5146 }, { "epoch": 0.9182053340469182, "grad_norm": 0.45535093545913696, "learning_rate": 0.00028200048844432375, "loss": 0.6831, "step": 5147 }, { "epoch": 0.9183837302649184, "grad_norm": 1.066908597946167, "learning_rate": 0.000281931001246397, "loss": 0.8285, "step": 5148 }, { "epoch": 0.9185621264829186, "grad_norm": 0.46443071961402893, "learning_rate": 0.0002818615115406231, "loss": 0.7574, "step": 5149 }, { "epoch": 0.9187405227009188, "grad_norm": 1.1375192403793335, "learning_rate": 0.0002817920193324598, "loss": 0.6816, "step": 5150 }, { "epoch": 0.918918918918919, "grad_norm": 0.49952948093414307, "learning_rate": 0.000281722524627365, "loss": 0.8653, "step": 5151 }, { "epoch": 0.9190973151369191, "grad_norm": 0.978867769241333, "learning_rate": 0.00028165302743079693, "loss": 0.8213, "step": 5152 }, { "epoch": 0.9192757113549193, "grad_norm": 0.4918247163295746, "learning_rate": 0.0002815835277482135, "loss": 0.8755, "step": 5153 }, { "epoch": 0.9194541075729195, "grad_norm": 0.4470139443874359, "learning_rate": 0.0002815140255850735, "loss": 0.6909, "step": 5154 }, { "epoch": 0.9196325037909197, "grad_norm": 0.47432026267051697, "learning_rate": 0.0002814445209468354, "loss": 0.8101, "step": 5155 }, { "epoch": 0.9198109000089199, "grad_norm": 0.5474990606307983, "learning_rate": 0.00028137501383895824, "loss": 1.0695, "step": 5156 }, { "epoch": 0.9199892962269199, "grad_norm": 0.47270458936691284, "learning_rate": 0.00028130550426690095, "loss": 0.7906, "step": 5157 }, { "epoch": 0.9201676924449201, "grad_norm": 0.4652954339981079, "learning_rate": 0.0002812359922361228, "loss": 0.6902, "step": 5158 }, { "epoch": 0.9203460886629203, "grad_norm": 0.4987577199935913, "learning_rate": 0.00028116647775208335, "loss": 0.7337, "step": 5159 }, { "epoch": 0.9205244848809205, "grad_norm": 0.530262291431427, "learning_rate": 0.0002810969608202421, "loss": 0.9222, "step": 5160 }, { "epoch": 0.9207028810989207, "grad_norm": 0.48284605145454407, "learning_rate": 0.00028102744144605895, "loss": 0.7397, "step": 5161 }, { "epoch": 0.9208812773169209, "grad_norm": 0.4856660068035126, "learning_rate": 0.00028095791963499384, "loss": 0.7192, "step": 5162 }, { "epoch": 0.921059673534921, "grad_norm": 0.7125374674797058, "learning_rate": 0.0002808883953925071, "loss": 1.0275, "step": 5163 }, { "epoch": 0.9212380697529212, "grad_norm": 0.4433026611804962, "learning_rate": 0.0002808188687240591, "loss": 0.7499, "step": 5164 }, { "epoch": 0.9214164659709214, "grad_norm": 0.44827622175216675, "learning_rate": 0.00028074933963511035, "loss": 0.8216, "step": 5165 }, { "epoch": 0.9215948621889216, "grad_norm": 0.5151771306991577, "learning_rate": 0.0002806798081311217, "loss": 0.9786, "step": 5166 }, { "epoch": 0.9217732584069218, "grad_norm": 0.5016061663627625, "learning_rate": 0.0002806102742175542, "loss": 1.008, "step": 5167 }, { "epoch": 0.9219516546249219, "grad_norm": 0.4590628743171692, "learning_rate": 0.00028054073789986883, "loss": 0.7287, "step": 5168 }, { "epoch": 0.9221300508429221, "grad_norm": 0.47854331135749817, "learning_rate": 0.00028047119918352717, "loss": 1.1118, "step": 5169 }, { "epoch": 0.9223084470609223, "grad_norm": 0.49446046352386475, "learning_rate": 0.00028040165807399054, "loss": 1.0227, "step": 5170 }, { "epoch": 0.9224868432789225, "grad_norm": 0.49778786301612854, "learning_rate": 0.0002803321145767208, "loss": 0.7794, "step": 5171 }, { "epoch": 0.9226652394969227, "grad_norm": 0.46323299407958984, "learning_rate": 0.0002802625686971798, "loss": 0.8109, "step": 5172 }, { "epoch": 0.9228436357149229, "grad_norm": 0.4294353425502777, "learning_rate": 0.0002801930204408297, "loss": 0.7047, "step": 5173 }, { "epoch": 0.923022031932923, "grad_norm": 0.4814031720161438, "learning_rate": 0.0002801234698131328, "loss": 0.6691, "step": 5174 }, { "epoch": 0.9232004281509232, "grad_norm": 0.5088090300559998, "learning_rate": 0.0002800539168195515, "loss": 0.7468, "step": 5175 }, { "epoch": 0.9233788243689234, "grad_norm": 0.486738383769989, "learning_rate": 0.00027998436146554857, "loss": 0.8424, "step": 5176 }, { "epoch": 0.9235572205869236, "grad_norm": 0.43971219658851624, "learning_rate": 0.0002799148037565867, "loss": 0.829, "step": 5177 }, { "epoch": 0.9237356168049238, "grad_norm": 0.47104689478874207, "learning_rate": 0.0002798452436981291, "loss": 0.608, "step": 5178 }, { "epoch": 0.9239140130229239, "grad_norm": 0.477444589138031, "learning_rate": 0.0002797756812956389, "loss": 0.7896, "step": 5179 }, { "epoch": 0.9240924092409241, "grad_norm": 0.7103281617164612, "learning_rate": 0.00027970611655457953, "loss": 0.7084, "step": 5180 }, { "epoch": 0.9242708054589243, "grad_norm": 1.2961317300796509, "learning_rate": 0.0002796365494804144, "loss": 0.9616, "step": 5181 }, { "epoch": 0.9244492016769245, "grad_norm": 0.540886402130127, "learning_rate": 0.00027956698007860754, "loss": 0.8563, "step": 5182 }, { "epoch": 0.9246275978949247, "grad_norm": 0.4906296133995056, "learning_rate": 0.0002794974083546227, "loss": 0.8361, "step": 5183 }, { "epoch": 0.9248059941129249, "grad_norm": 0.47894391417503357, "learning_rate": 0.0002794278343139242, "loss": 0.881, "step": 5184 }, { "epoch": 0.9249843903309249, "grad_norm": 0.5145757794380188, "learning_rate": 0.0002793582579619762, "loss": 0.8234, "step": 5185 }, { "epoch": 0.9251627865489251, "grad_norm": 0.43651270866394043, "learning_rate": 0.0002792886793042434, "loss": 0.625, "step": 5186 }, { "epoch": 0.9253411827669253, "grad_norm": 0.5134057998657227, "learning_rate": 0.00027921909834619017, "loss": 0.9787, "step": 5187 }, { "epoch": 0.9255195789849255, "grad_norm": 0.6394853591918945, "learning_rate": 0.0002791495150932815, "loss": 0.9377, "step": 5188 }, { "epoch": 0.9256979752029257, "grad_norm": 0.4257347583770752, "learning_rate": 0.0002790799295509825, "loss": 0.6986, "step": 5189 }, { "epoch": 0.9258763714209258, "grad_norm": 0.7512332797050476, "learning_rate": 0.0002790103417247584, "loss": 0.826, "step": 5190 }, { "epoch": 0.926054767638926, "grad_norm": 0.49140146374702454, "learning_rate": 0.0002789407516200746, "loss": 0.7517, "step": 5191 }, { "epoch": 0.9262331638569262, "grad_norm": 0.4644830524921417, "learning_rate": 0.0002788711592423966, "loss": 0.7325, "step": 5192 }, { "epoch": 0.9264115600749264, "grad_norm": 0.466547429561615, "learning_rate": 0.0002788015645971901, "loss": 0.7623, "step": 5193 }, { "epoch": 0.9265899562929266, "grad_norm": 0.4740476906299591, "learning_rate": 0.00027873196768992114, "loss": 0.7261, "step": 5194 }, { "epoch": 0.9267683525109268, "grad_norm": 0.4969778060913086, "learning_rate": 0.00027866236852605575, "loss": 0.8977, "step": 5195 }, { "epoch": 0.9269467487289269, "grad_norm": 0.5204504132270813, "learning_rate": 0.0002785927671110603, "loss": 0.9751, "step": 5196 }, { "epoch": 0.9271251449469271, "grad_norm": 0.5292174220085144, "learning_rate": 0.00027852316345040125, "loss": 0.927, "step": 5197 }, { "epoch": 0.9273035411649273, "grad_norm": 0.5514991283416748, "learning_rate": 0.0002784535575495453, "loss": 1.0084, "step": 5198 }, { "epoch": 0.9274819373829275, "grad_norm": 0.5040212869644165, "learning_rate": 0.00027838394941395907, "loss": 0.7041, "step": 5199 }, { "epoch": 0.9276603336009277, "grad_norm": 0.5497444272041321, "learning_rate": 0.00027831433904910963, "loss": 1.0214, "step": 5200 }, { "epoch": 0.9278387298189278, "grad_norm": 0.4607004225254059, "learning_rate": 0.0002782447264604643, "loss": 0.7548, "step": 5201 }, { "epoch": 0.928017126036928, "grad_norm": 0.4299633502960205, "learning_rate": 0.00027817511165349024, "loss": 0.7039, "step": 5202 }, { "epoch": 0.9281955222549282, "grad_norm": 0.5070714950561523, "learning_rate": 0.000278105494633655, "loss": 0.9753, "step": 5203 }, { "epoch": 0.9283739184729284, "grad_norm": 0.49922603368759155, "learning_rate": 0.0002780358754064263, "loss": 0.8345, "step": 5204 }, { "epoch": 0.9285523146909286, "grad_norm": 0.6079359650611877, "learning_rate": 0.00027796625397727214, "loss": 0.694, "step": 5205 }, { "epoch": 0.9287307109089288, "grad_norm": 0.45466673374176025, "learning_rate": 0.0002778966303516603, "loss": 0.8286, "step": 5206 }, { "epoch": 0.9289091071269289, "grad_norm": 0.5105868577957153, "learning_rate": 0.00027782700453505925, "loss": 0.7927, "step": 5207 }, { "epoch": 0.9290875033449291, "grad_norm": 0.5233384370803833, "learning_rate": 0.00027775737653293716, "loss": 0.8307, "step": 5208 }, { "epoch": 0.9292658995629293, "grad_norm": 0.508243978023529, "learning_rate": 0.00027768774635076265, "loss": 0.8838, "step": 5209 }, { "epoch": 0.9294442957809295, "grad_norm": 0.46551835536956787, "learning_rate": 0.0002776181139940045, "loss": 0.7637, "step": 5210 }, { "epoch": 0.9296226919989297, "grad_norm": 0.44400593638420105, "learning_rate": 0.0002775484794681315, "loss": 0.7503, "step": 5211 }, { "epoch": 0.9298010882169297, "grad_norm": 0.4329807460308075, "learning_rate": 0.0002774788427786128, "loss": 0.6834, "step": 5212 }, { "epoch": 0.92997948443493, "grad_norm": 0.5895024538040161, "learning_rate": 0.0002774092039309176, "loss": 0.928, "step": 5213 }, { "epoch": 0.9301578806529301, "grad_norm": 0.46719425916671753, "learning_rate": 0.0002773395629305154, "loss": 0.6546, "step": 5214 }, { "epoch": 0.9303362768709303, "grad_norm": 0.5935095548629761, "learning_rate": 0.0002772699197828756, "loss": 0.7523, "step": 5215 }, { "epoch": 0.9305146730889305, "grad_norm": 0.5028880834579468, "learning_rate": 0.00027720027449346806, "loss": 0.9039, "step": 5216 }, { "epoch": 0.9306930693069307, "grad_norm": 0.4530572295188904, "learning_rate": 0.00027713062706776273, "loss": 0.7171, "step": 5217 }, { "epoch": 0.9308714655249308, "grad_norm": 0.4633224606513977, "learning_rate": 0.0002770609775112295, "loss": 0.8019, "step": 5218 }, { "epoch": 0.931049861742931, "grad_norm": 0.5543755292892456, "learning_rate": 0.00027699132582933886, "loss": 1.0682, "step": 5219 }, { "epoch": 0.9312282579609312, "grad_norm": 0.4689428508281708, "learning_rate": 0.000276921672027561, "loss": 0.7497, "step": 5220 }, { "epoch": 0.9314066541789314, "grad_norm": 0.5170400142669678, "learning_rate": 0.0002768520161113667, "loss": 0.8323, "step": 5221 }, { "epoch": 0.9315850503969316, "grad_norm": 0.4578782320022583, "learning_rate": 0.0002767823580862265, "loss": 1.0117, "step": 5222 }, { "epoch": 0.9317634466149317, "grad_norm": 0.44367796182632446, "learning_rate": 0.00027671269795761155, "loss": 0.7195, "step": 5223 }, { "epoch": 0.9319418428329319, "grad_norm": 0.48492223024368286, "learning_rate": 0.00027664303573099274, "loss": 0.9721, "step": 5224 }, { "epoch": 0.9321202390509321, "grad_norm": 0.696104109287262, "learning_rate": 0.00027657337141184134, "loss": 1.0126, "step": 5225 }, { "epoch": 0.9322986352689323, "grad_norm": 0.7282087206840515, "learning_rate": 0.00027650370500562885, "loss": 0.9032, "step": 5226 }, { "epoch": 0.9324770314869325, "grad_norm": 0.42898091673851013, "learning_rate": 0.00027643403651782673, "loss": 0.6429, "step": 5227 }, { "epoch": 0.9326554277049327, "grad_norm": 0.4315216541290283, "learning_rate": 0.00027636436595390674, "loss": 0.8703, "step": 5228 }, { "epoch": 0.9328338239229328, "grad_norm": 0.44752299785614014, "learning_rate": 0.0002762946933193408, "loss": 0.8215, "step": 5229 }, { "epoch": 0.933012220140933, "grad_norm": 0.4484764635562897, "learning_rate": 0.00027622501861960104, "loss": 0.6912, "step": 5230 }, { "epoch": 0.9331906163589332, "grad_norm": 0.371706485748291, "learning_rate": 0.0002761553418601595, "loss": 0.5803, "step": 5231 }, { "epoch": 0.9333690125769334, "grad_norm": 0.4724928140640259, "learning_rate": 0.0002760856630464888, "loss": 0.8421, "step": 5232 }, { "epoch": 0.9335474087949336, "grad_norm": 0.49815791845321655, "learning_rate": 0.0002760159821840612, "loss": 0.6765, "step": 5233 }, { "epoch": 0.9337258050129337, "grad_norm": 0.5424796342849731, "learning_rate": 0.00027594629927834956, "loss": 0.8134, "step": 5234 }, { "epoch": 0.9339042012309339, "grad_norm": 0.5402620434761047, "learning_rate": 0.0002758766143348268, "loss": 0.8324, "step": 5235 }, { "epoch": 0.9340825974489341, "grad_norm": 0.4638790190219879, "learning_rate": 0.0002758069273589659, "loss": 0.8668, "step": 5236 }, { "epoch": 0.9342609936669343, "grad_norm": 0.5340316295623779, "learning_rate": 0.00027573723835624004, "loss": 0.8628, "step": 5237 }, { "epoch": 0.9344393898849345, "grad_norm": 0.6846177577972412, "learning_rate": 0.00027566754733212255, "loss": 0.831, "step": 5238 }, { "epoch": 0.9346177861029347, "grad_norm": 0.502400815486908, "learning_rate": 0.0002755978542920869, "loss": 0.8583, "step": 5239 }, { "epoch": 0.9347961823209348, "grad_norm": 0.44814401865005493, "learning_rate": 0.00027552815924160686, "loss": 0.7743, "step": 5240 }, { "epoch": 0.934974578538935, "grad_norm": 0.6513227224349976, "learning_rate": 0.0002754584621861561, "loss": 0.922, "step": 5241 }, { "epoch": 0.9351529747569352, "grad_norm": 0.4567849040031433, "learning_rate": 0.0002753887631312086, "loss": 0.7956, "step": 5242 }, { "epoch": 0.9353313709749353, "grad_norm": 0.4874018430709839, "learning_rate": 0.00027531906208223865, "loss": 0.9146, "step": 5243 }, { "epoch": 0.9355097671929355, "grad_norm": 0.4673248827457428, "learning_rate": 0.00027524935904472053, "loss": 0.8524, "step": 5244 }, { "epoch": 0.9356881634109356, "grad_norm": 0.5060569047927856, "learning_rate": 0.0002751796540241286, "loss": 1.0127, "step": 5245 }, { "epoch": 0.9358665596289358, "grad_norm": 0.4823893904685974, "learning_rate": 0.00027510994702593743, "loss": 0.8641, "step": 5246 }, { "epoch": 0.936044955846936, "grad_norm": 0.5004808902740479, "learning_rate": 0.0002750402380556218, "loss": 0.8461, "step": 5247 }, { "epoch": 0.9362233520649362, "grad_norm": 0.4644457995891571, "learning_rate": 0.0002749705271186567, "loss": 0.7319, "step": 5248 }, { "epoch": 0.9364017482829364, "grad_norm": 0.49020230770111084, "learning_rate": 0.0002749008142205171, "loss": 0.7813, "step": 5249 }, { "epoch": 0.9365801445009366, "grad_norm": 0.568228006362915, "learning_rate": 0.0002748310993666783, "loss": 1.0021, "step": 5250 }, { "epoch": 0.9367585407189367, "grad_norm": 0.46288859844207764, "learning_rate": 0.00027476138256261575, "loss": 0.7229, "step": 5251 }, { "epoch": 0.9369369369369369, "grad_norm": 0.5582467913627625, "learning_rate": 0.00027469166381380474, "loss": 0.729, "step": 5252 }, { "epoch": 0.9371153331549371, "grad_norm": 0.6614149808883667, "learning_rate": 0.0002746219431257211, "loss": 0.8033, "step": 5253 }, { "epoch": 0.9372937293729373, "grad_norm": 0.4389813244342804, "learning_rate": 0.0002745522205038406, "loss": 0.6155, "step": 5254 }, { "epoch": 0.9374721255909375, "grad_norm": 0.564568042755127, "learning_rate": 0.0002744824959536393, "loss": 0.8861, "step": 5255 }, { "epoch": 0.9376505218089376, "grad_norm": 0.6031481623649597, "learning_rate": 0.00027441276948059337, "loss": 0.8549, "step": 5256 }, { "epoch": 0.9378289180269378, "grad_norm": 0.4482172429561615, "learning_rate": 0.0002743430410901789, "loss": 0.6852, "step": 5257 }, { "epoch": 0.938007314244938, "grad_norm": 0.47618693113327026, "learning_rate": 0.0002742733107878726, "loss": 0.6698, "step": 5258 }, { "epoch": 0.9381857104629382, "grad_norm": 0.4913199543952942, "learning_rate": 0.00027420357857915083, "loss": 0.9027, "step": 5259 }, { "epoch": 0.9383641066809384, "grad_norm": 0.510835587978363, "learning_rate": 0.0002741338444694904, "loss": 0.8386, "step": 5260 }, { "epoch": 0.9385425028989386, "grad_norm": 1.6068651676177979, "learning_rate": 0.00027406410846436826, "loss": 0.9289, "step": 5261 }, { "epoch": 0.9387208991169387, "grad_norm": 0.5296999216079712, "learning_rate": 0.0002739943705692614, "loss": 0.9303, "step": 5262 }, { "epoch": 0.9388992953349389, "grad_norm": 0.5108866095542908, "learning_rate": 0.00027392463078964696, "loss": 0.8593, "step": 5263 }, { "epoch": 0.9390776915529391, "grad_norm": 0.48683467507362366, "learning_rate": 0.0002738548891310023, "loss": 0.8258, "step": 5264 }, { "epoch": 0.9392560877709393, "grad_norm": 0.48959386348724365, "learning_rate": 0.00027378514559880495, "loss": 0.802, "step": 5265 }, { "epoch": 0.9394344839889395, "grad_norm": 0.5760032534599304, "learning_rate": 0.0002737154001985325, "loss": 0.8428, "step": 5266 }, { "epoch": 0.9396128802069396, "grad_norm": 0.47354596853256226, "learning_rate": 0.0002736456529356627, "loss": 0.7726, "step": 5267 }, { "epoch": 0.9397912764249398, "grad_norm": 0.5111311078071594, "learning_rate": 0.00027357590381567353, "loss": 0.8006, "step": 5268 }, { "epoch": 0.93996967264294, "grad_norm": 0.4919393062591553, "learning_rate": 0.00027350615284404305, "loss": 0.8726, "step": 5269 }, { "epoch": 0.9401480688609402, "grad_norm": 0.43833592534065247, "learning_rate": 0.0002734364000262494, "loss": 0.8131, "step": 5270 }, { "epoch": 0.9403264650789404, "grad_norm": 0.4701617658138275, "learning_rate": 0.00027336664536777093, "loss": 0.8316, "step": 5271 }, { "epoch": 0.9405048612969406, "grad_norm": 0.430813729763031, "learning_rate": 0.0002732968888740863, "loss": 0.7455, "step": 5272 }, { "epoch": 0.9406832575149406, "grad_norm": 0.46511298418045044, "learning_rate": 0.00027322713055067397, "loss": 0.8424, "step": 5273 }, { "epoch": 0.9408616537329408, "grad_norm": 0.5365491509437561, "learning_rate": 0.0002731573704030128, "loss": 0.902, "step": 5274 }, { "epoch": 0.941040049950941, "grad_norm": 0.4637891948223114, "learning_rate": 0.0002730876084365817, "loss": 0.7835, "step": 5275 }, { "epoch": 0.9412184461689412, "grad_norm": 0.47008875012397766, "learning_rate": 0.00027301784465685983, "loss": 0.8082, "step": 5276 }, { "epoch": 0.9413968423869414, "grad_norm": 0.42450207471847534, "learning_rate": 0.0002729480790693263, "loss": 0.7095, "step": 5277 }, { "epoch": 0.9415752386049415, "grad_norm": 0.5233426690101624, "learning_rate": 0.0002728783116794606, "loss": 0.9605, "step": 5278 }, { "epoch": 0.9417536348229417, "grad_norm": 1.0770610570907593, "learning_rate": 0.00027280854249274206, "loss": 0.6148, "step": 5279 }, { "epoch": 0.9419320310409419, "grad_norm": 0.6112112402915955, "learning_rate": 0.00027273877151465036, "loss": 1.1989, "step": 5280 }, { "epoch": 0.9421104272589421, "grad_norm": 0.5142430663108826, "learning_rate": 0.0002726689987506654, "loss": 0.9081, "step": 5281 }, { "epoch": 0.9422888234769423, "grad_norm": 0.5745399594306946, "learning_rate": 0.00027259922420626705, "loss": 0.8929, "step": 5282 }, { "epoch": 0.9424672196949425, "grad_norm": 0.6627066731452942, "learning_rate": 0.00027252944788693536, "loss": 0.6532, "step": 5283 }, { "epoch": 0.9426456159129426, "grad_norm": 0.584989607334137, "learning_rate": 0.00027245966979815044, "loss": 0.8091, "step": 5284 }, { "epoch": 0.9428240121309428, "grad_norm": 0.4927927851676941, "learning_rate": 0.0002723898899453929, "loss": 0.7812, "step": 5285 }, { "epoch": 0.943002408348943, "grad_norm": 0.5424949526786804, "learning_rate": 0.00027232010833414287, "loss": 0.9299, "step": 5286 }, { "epoch": 0.9431808045669432, "grad_norm": 0.5809288024902344, "learning_rate": 0.0002722503249698812, "loss": 0.8294, "step": 5287 }, { "epoch": 0.9433592007849434, "grad_norm": 0.601315438747406, "learning_rate": 0.0002721805398580885, "loss": 0.7725, "step": 5288 }, { "epoch": 0.9435375970029435, "grad_norm": 0.5783292055130005, "learning_rate": 0.0002721107530042458, "loss": 0.7394, "step": 5289 }, { "epoch": 0.9437159932209437, "grad_norm": 0.4998534321784973, "learning_rate": 0.00027204096441383414, "loss": 0.8316, "step": 5290 }, { "epoch": 0.9438943894389439, "grad_norm": 0.8708257079124451, "learning_rate": 0.0002719711740923346, "loss": 0.811, "step": 5291 }, { "epoch": 0.9440727856569441, "grad_norm": 0.5783541798591614, "learning_rate": 0.00027190138204522847, "loss": 0.9316, "step": 5292 }, { "epoch": 0.9442511818749443, "grad_norm": 1.6886039972305298, "learning_rate": 0.0002718315882779972, "loss": 0.6536, "step": 5293 }, { "epoch": 0.9444295780929445, "grad_norm": 0.5459927320480347, "learning_rate": 0.0002717617927961224, "loss": 0.8067, "step": 5294 }, { "epoch": 0.9446079743109446, "grad_norm": 0.46528294682502747, "learning_rate": 0.00027169199560508574, "loss": 0.5931, "step": 5295 }, { "epoch": 0.9447863705289448, "grad_norm": 0.5428159832954407, "learning_rate": 0.0002716221967103691, "loss": 0.8225, "step": 5296 }, { "epoch": 0.944964766746945, "grad_norm": 0.4363666772842407, "learning_rate": 0.0002715523961174545, "loss": 0.6572, "step": 5297 }, { "epoch": 0.9451431629649452, "grad_norm": 0.6103348731994629, "learning_rate": 0.0002714825938318239, "loss": 1.0737, "step": 5298 }, { "epoch": 0.9453215591829454, "grad_norm": 0.4808749854564667, "learning_rate": 0.0002714127898589596, "loss": 0.8564, "step": 5299 }, { "epoch": 0.9454999554009454, "grad_norm": 0.46589285135269165, "learning_rate": 0.00027134298420434405, "loss": 0.7157, "step": 5300 }, { "epoch": 0.9456783516189456, "grad_norm": 0.4251248836517334, "learning_rate": 0.0002712731768734597, "loss": 0.6488, "step": 5301 }, { "epoch": 0.9458567478369458, "grad_norm": 0.4899739623069763, "learning_rate": 0.0002712033678717892, "loss": 0.9113, "step": 5302 }, { "epoch": 0.946035144054946, "grad_norm": 0.506604790687561, "learning_rate": 0.00027113355720481523, "loss": 0.8268, "step": 5303 }, { "epoch": 0.9462135402729462, "grad_norm": 0.4809357225894928, "learning_rate": 0.00027106374487802096, "loss": 0.9743, "step": 5304 }, { "epoch": 0.9463919364909464, "grad_norm": 0.4477202296257019, "learning_rate": 0.00027099393089688906, "loss": 0.8365, "step": 5305 }, { "epoch": 0.9465703327089465, "grad_norm": 0.42474350333213806, "learning_rate": 0.0002709241152669029, "loss": 0.654, "step": 5306 }, { "epoch": 0.9467487289269467, "grad_norm": 0.4127642810344696, "learning_rate": 0.00027085429799354575, "loss": 0.7342, "step": 5307 }, { "epoch": 0.9469271251449469, "grad_norm": 0.4678402543067932, "learning_rate": 0.00027078447908230105, "loss": 0.7684, "step": 5308 }, { "epoch": 0.9471055213629471, "grad_norm": 0.434625506401062, "learning_rate": 0.00027071465853865224, "loss": 0.6473, "step": 5309 }, { "epoch": 0.9472839175809473, "grad_norm": 0.48882344365119934, "learning_rate": 0.00027064483636808314, "loss": 0.8451, "step": 5310 }, { "epoch": 0.9474623137989474, "grad_norm": 0.506234347820282, "learning_rate": 0.0002705750125760774, "loss": 1.075, "step": 5311 }, { "epoch": 0.9476407100169476, "grad_norm": 0.47516417503356934, "learning_rate": 0.00027050518716811904, "loss": 0.8066, "step": 5312 }, { "epoch": 0.9478191062349478, "grad_norm": 0.45935487747192383, "learning_rate": 0.0002704353601496921, "loss": 0.8011, "step": 5313 }, { "epoch": 0.947997502452948, "grad_norm": 0.4938845634460449, "learning_rate": 0.0002703655315262808, "loss": 0.9695, "step": 5314 }, { "epoch": 0.9481758986709482, "grad_norm": 0.4160667359828949, "learning_rate": 0.00027029570130336937, "loss": 0.6122, "step": 5315 }, { "epoch": 0.9483542948889484, "grad_norm": 0.5093261003494263, "learning_rate": 0.00027022586948644234, "loss": 1.0263, "step": 5316 }, { "epoch": 0.9485326911069485, "grad_norm": 0.4197608530521393, "learning_rate": 0.0002701560360809842, "loss": 0.8012, "step": 5317 }, { "epoch": 0.9487110873249487, "grad_norm": 0.47987252473831177, "learning_rate": 0.0002700862010924797, "loss": 0.7935, "step": 5318 }, { "epoch": 0.9488894835429489, "grad_norm": 0.4315970242023468, "learning_rate": 0.00027001636452641354, "loss": 0.6472, "step": 5319 }, { "epoch": 0.9490678797609491, "grad_norm": 0.4990069270133972, "learning_rate": 0.0002699465263882708, "loss": 0.7692, "step": 5320 }, { "epoch": 0.9492462759789493, "grad_norm": 0.4555619955062866, "learning_rate": 0.00026987668668353637, "loss": 0.7722, "step": 5321 }, { "epoch": 0.9494246721969494, "grad_norm": 0.5311444401741028, "learning_rate": 0.00026980684541769563, "loss": 1.0347, "step": 5322 }, { "epoch": 0.9496030684149496, "grad_norm": 0.4669055640697479, "learning_rate": 0.0002697370025962337, "loss": 0.6929, "step": 5323 }, { "epoch": 0.9497814646329498, "grad_norm": 0.4881044924259186, "learning_rate": 0.0002696671582246361, "loss": 0.8754, "step": 5324 }, { "epoch": 0.94995986085095, "grad_norm": 0.45897579193115234, "learning_rate": 0.0002695973123083884, "loss": 0.8245, "step": 5325 }, { "epoch": 0.9501382570689502, "grad_norm": 0.4809344410896301, "learning_rate": 0.00026952746485297614, "loss": 0.6617, "step": 5326 }, { "epoch": 0.9503166532869504, "grad_norm": 0.4562990963459015, "learning_rate": 0.00026945761586388524, "loss": 0.8179, "step": 5327 }, { "epoch": 0.9504950495049505, "grad_norm": 0.4504075050354004, "learning_rate": 0.0002693877653466015, "loss": 0.8011, "step": 5328 }, { "epoch": 0.9506734457229506, "grad_norm": 0.44723081588745117, "learning_rate": 0.0002693179133066111, "loss": 0.7159, "step": 5329 }, { "epoch": 0.9508518419409508, "grad_norm": 0.49957334995269775, "learning_rate": 0.00026924805974940007, "loss": 0.8277, "step": 5330 }, { "epoch": 0.951030238158951, "grad_norm": 0.4783354699611664, "learning_rate": 0.0002691782046804548, "loss": 0.9436, "step": 5331 }, { "epoch": 0.9512086343769512, "grad_norm": 0.39161327481269836, "learning_rate": 0.00026910834810526147, "loss": 0.5881, "step": 5332 }, { "epoch": 0.9513870305949513, "grad_norm": 0.46436673402786255, "learning_rate": 0.00026903849002930677, "loss": 0.7549, "step": 5333 }, { "epoch": 0.9515654268129515, "grad_norm": 0.4534311592578888, "learning_rate": 0.00026896863045807715, "loss": 0.6577, "step": 5334 }, { "epoch": 0.9517438230309517, "grad_norm": 0.4262526333332062, "learning_rate": 0.00026889876939705946, "loss": 0.7005, "step": 5335 }, { "epoch": 0.9519222192489519, "grad_norm": 0.49773791432380676, "learning_rate": 0.00026882890685174065, "loss": 0.8739, "step": 5336 }, { "epoch": 0.9521006154669521, "grad_norm": 0.46525028347969055, "learning_rate": 0.00026875904282760765, "loss": 0.6493, "step": 5337 }, { "epoch": 0.9522790116849523, "grad_norm": 0.42473146319389343, "learning_rate": 0.00026868917733014743, "loss": 0.6203, "step": 5338 }, { "epoch": 0.9524574079029524, "grad_norm": 0.49023476243019104, "learning_rate": 0.0002686193103648472, "loss": 0.9317, "step": 5339 }, { "epoch": 0.9526358041209526, "grad_norm": 0.5018748641014099, "learning_rate": 0.00026854944193719445, "loss": 0.9483, "step": 5340 }, { "epoch": 0.9528142003389528, "grad_norm": 0.49157464504241943, "learning_rate": 0.00026847957205267635, "loss": 0.9226, "step": 5341 }, { "epoch": 0.952992596556953, "grad_norm": 0.474185049533844, "learning_rate": 0.0002684097007167807, "loss": 0.8999, "step": 5342 }, { "epoch": 0.9531709927749532, "grad_norm": 0.4604833126068115, "learning_rate": 0.0002683398279349952, "loss": 0.8502, "step": 5343 }, { "epoch": 0.9533493889929533, "grad_norm": 0.5128932595252991, "learning_rate": 0.0002682699537128074, "loss": 0.8059, "step": 5344 }, { "epoch": 0.9535277852109535, "grad_norm": 0.42968082427978516, "learning_rate": 0.00026820007805570536, "loss": 0.6065, "step": 5345 }, { "epoch": 0.9537061814289537, "grad_norm": 0.5407286286354065, "learning_rate": 0.00026813020096917695, "loss": 0.9448, "step": 5346 }, { "epoch": 0.9538845776469539, "grad_norm": 1.0155121088027954, "learning_rate": 0.0002680603224587104, "loss": 1.2441, "step": 5347 }, { "epoch": 0.9540629738649541, "grad_norm": 0.5304948687553406, "learning_rate": 0.0002679904425297938, "loss": 1.0268, "step": 5348 }, { "epoch": 0.9542413700829543, "grad_norm": 0.4759924113750458, "learning_rate": 0.00026792056118791563, "loss": 0.9434, "step": 5349 }, { "epoch": 0.9544197663009544, "grad_norm": 0.3957015573978424, "learning_rate": 0.00026785067843856437, "loss": 0.6315, "step": 5350 }, { "epoch": 0.9545981625189546, "grad_norm": 0.46599963307380676, "learning_rate": 0.00026778079428722845, "loss": 0.7155, "step": 5351 }, { "epoch": 0.9547765587369548, "grad_norm": 0.4720132648944855, "learning_rate": 0.0002677109087393966, "loss": 0.8976, "step": 5352 }, { "epoch": 0.954954954954955, "grad_norm": 0.5006893277168274, "learning_rate": 0.00026764102180055766, "loss": 0.8828, "step": 5353 }, { "epoch": 0.9551333511729552, "grad_norm": 0.4419143795967102, "learning_rate": 0.0002675711334762004, "loss": 0.7284, "step": 5354 }, { "epoch": 0.9553117473909553, "grad_norm": 0.4111958146095276, "learning_rate": 0.0002675012437718139, "loss": 0.6126, "step": 5355 }, { "epoch": 0.9554901436089555, "grad_norm": 0.5170559287071228, "learning_rate": 0.0002674313526928872, "loss": 0.9313, "step": 5356 }, { "epoch": 0.9556685398269557, "grad_norm": 0.4762997329235077, "learning_rate": 0.0002673614602449096, "loss": 0.8053, "step": 5357 }, { "epoch": 0.9558469360449559, "grad_norm": 0.4590204358100891, "learning_rate": 0.0002672915664333704, "loss": 0.85, "step": 5358 }, { "epoch": 0.956025332262956, "grad_norm": 0.5133612155914307, "learning_rate": 0.000267221671263759, "loss": 0.7038, "step": 5359 }, { "epoch": 0.9562037284809562, "grad_norm": 0.4732131063938141, "learning_rate": 0.0002671517747415649, "loss": 0.8324, "step": 5360 }, { "epoch": 0.9563821246989563, "grad_norm": 0.4754936099052429, "learning_rate": 0.0002670818768722778, "loss": 0.9258, "step": 5361 }, { "epoch": 0.9565605209169565, "grad_norm": 0.4571801722049713, "learning_rate": 0.0002670119776613875, "loss": 0.8035, "step": 5362 }, { "epoch": 0.9567389171349567, "grad_norm": 0.48625341057777405, "learning_rate": 0.0002669420771143838, "loss": 0.8987, "step": 5363 }, { "epoch": 0.9569173133529569, "grad_norm": 0.4703410267829895, "learning_rate": 0.0002668721752367566, "loss": 0.9292, "step": 5364 }, { "epoch": 0.9570957095709571, "grad_norm": 0.458659291267395, "learning_rate": 0.00026680227203399604, "loss": 0.7997, "step": 5365 }, { "epoch": 0.9572741057889572, "grad_norm": 0.4934099018573761, "learning_rate": 0.0002667323675115922, "loss": 0.7636, "step": 5366 }, { "epoch": 0.9574525020069574, "grad_norm": 0.4184805154800415, "learning_rate": 0.0002666624616750355, "loss": 0.6587, "step": 5367 }, { "epoch": 0.9576308982249576, "grad_norm": 0.485462486743927, "learning_rate": 0.00026659255452981623, "loss": 0.9463, "step": 5368 }, { "epoch": 0.9578092944429578, "grad_norm": 0.6165233254432678, "learning_rate": 0.00026652264608142484, "loss": 0.6606, "step": 5369 }, { "epoch": 0.957987690660958, "grad_norm": 0.44948717951774597, "learning_rate": 0.000266452736335352, "loss": 0.7658, "step": 5370 }, { "epoch": 0.9581660868789582, "grad_norm": 0.40302959084510803, "learning_rate": 0.0002663828252970883, "loss": 0.6658, "step": 5371 }, { "epoch": 0.9583444830969583, "grad_norm": 0.4867773652076721, "learning_rate": 0.00026631291297212444, "loss": 0.7478, "step": 5372 }, { "epoch": 0.9585228793149585, "grad_norm": 0.4726101756095886, "learning_rate": 0.0002662429993659515, "loss": 0.8439, "step": 5373 }, { "epoch": 0.9587012755329587, "grad_norm": 0.5044912099838257, "learning_rate": 0.0002661730844840604, "loss": 0.9133, "step": 5374 }, { "epoch": 0.9588796717509589, "grad_norm": 0.4630397856235504, "learning_rate": 0.0002661031683319422, "loss": 0.9888, "step": 5375 }, { "epoch": 0.9590580679689591, "grad_norm": 0.4974066913127899, "learning_rate": 0.00026603325091508807, "loss": 1.1051, "step": 5376 }, { "epoch": 0.9592364641869592, "grad_norm": 0.42207884788513184, "learning_rate": 0.00026596333223898933, "loss": 0.7558, "step": 5377 }, { "epoch": 0.9594148604049594, "grad_norm": 0.5264635682106018, "learning_rate": 0.00026589341230913736, "loss": 0.9417, "step": 5378 }, { "epoch": 0.9595932566229596, "grad_norm": 0.467877596616745, "learning_rate": 0.0002658234911310236, "loss": 0.6177, "step": 5379 }, { "epoch": 0.9597716528409598, "grad_norm": 0.5078222155570984, "learning_rate": 0.0002657535687101396, "loss": 0.9433, "step": 5380 }, { "epoch": 0.95995004905896, "grad_norm": 0.5080622434616089, "learning_rate": 0.0002656836450519772, "loss": 0.8492, "step": 5381 }, { "epoch": 0.9601284452769602, "grad_norm": 0.4674982726573944, "learning_rate": 0.000265613720162028, "loss": 0.7062, "step": 5382 }, { "epoch": 0.9603068414949603, "grad_norm": 0.544535219669342, "learning_rate": 0.00026554379404578396, "loss": 0.8553, "step": 5383 }, { "epoch": 0.9604852377129605, "grad_norm": 0.4691609740257263, "learning_rate": 0.00026547386670873707, "loss": 0.747, "step": 5384 }, { "epoch": 0.9606636339309607, "grad_norm": 0.5239211916923523, "learning_rate": 0.00026540393815637924, "loss": 0.7682, "step": 5385 }, { "epoch": 0.9608420301489609, "grad_norm": 0.4878639280796051, "learning_rate": 0.00026533400839420286, "loss": 1.0158, "step": 5386 }, { "epoch": 0.9610204263669611, "grad_norm": 0.45283398032188416, "learning_rate": 0.0002652640774276999, "loss": 0.9548, "step": 5387 }, { "epoch": 0.9611988225849611, "grad_norm": 0.4687561094760895, "learning_rate": 0.00026519414526236297, "loss": 0.8433, "step": 5388 }, { "epoch": 0.9613772188029613, "grad_norm": 0.515119731426239, "learning_rate": 0.0002651242119036844, "loss": 0.8019, "step": 5389 }, { "epoch": 0.9615556150209615, "grad_norm": 0.42879781126976013, "learning_rate": 0.00026505427735715675, "loss": 0.7789, "step": 5390 }, { "epoch": 0.9617340112389617, "grad_norm": 0.49096372723579407, "learning_rate": 0.00026498434162827266, "loss": 0.9128, "step": 5391 }, { "epoch": 0.9619124074569619, "grad_norm": 0.45153510570526123, "learning_rate": 0.00026491440472252475, "loss": 0.7555, "step": 5392 }, { "epoch": 0.9620908036749621, "grad_norm": 0.43661659955978394, "learning_rate": 0.00026484446664540594, "loss": 0.6922, "step": 5393 }, { "epoch": 0.9622691998929622, "grad_norm": 0.4993753731250763, "learning_rate": 0.00026477452740240914, "loss": 0.7653, "step": 5394 }, { "epoch": 0.9624475961109624, "grad_norm": 0.4706138074398041, "learning_rate": 0.00026470458699902723, "loss": 0.9497, "step": 5395 }, { "epoch": 0.9626259923289626, "grad_norm": 0.4838847815990448, "learning_rate": 0.00026463464544075344, "loss": 0.8203, "step": 5396 }, { "epoch": 0.9628043885469628, "grad_norm": 0.4903802275657654, "learning_rate": 0.000264564702733081, "loss": 0.8855, "step": 5397 }, { "epoch": 0.962982784764963, "grad_norm": 0.5173416137695312, "learning_rate": 0.00026449475888150293, "loss": 0.872, "step": 5398 }, { "epoch": 0.9631611809829632, "grad_norm": 0.4816221594810486, "learning_rate": 0.0002644248138915128, "loss": 0.8021, "step": 5399 }, { "epoch": 0.9633395772009633, "grad_norm": 0.4750434160232544, "learning_rate": 0.00026435486776860395, "loss": 0.8306, "step": 5400 }, { "epoch": 0.9635179734189635, "grad_norm": 0.47214189171791077, "learning_rate": 0.00026428492051827, "loss": 0.8921, "step": 5401 }, { "epoch": 0.9636963696369637, "grad_norm": 0.4591917097568512, "learning_rate": 0.0002642149721460045, "loss": 0.7029, "step": 5402 }, { "epoch": 0.9638747658549639, "grad_norm": 0.5314555168151855, "learning_rate": 0.00026414502265730125, "loss": 0.9167, "step": 5403 }, { "epoch": 0.9640531620729641, "grad_norm": 0.4804200232028961, "learning_rate": 0.000264075072057654, "loss": 0.9891, "step": 5404 }, { "epoch": 0.9642315582909642, "grad_norm": 0.4692907929420471, "learning_rate": 0.00026400512035255663, "loss": 0.6985, "step": 5405 }, { "epoch": 0.9644099545089644, "grad_norm": 0.4961127042770386, "learning_rate": 0.00026393516754750313, "loss": 0.8653, "step": 5406 }, { "epoch": 0.9645883507269646, "grad_norm": 0.455837607383728, "learning_rate": 0.0002638652136479876, "loss": 0.6942, "step": 5407 }, { "epoch": 0.9647667469449648, "grad_norm": 0.5186680555343628, "learning_rate": 0.0002637952586595041, "loss": 1.0216, "step": 5408 }, { "epoch": 0.964945143162965, "grad_norm": 0.49785247445106506, "learning_rate": 0.00026372530258754695, "loss": 0.7818, "step": 5409 }, { "epoch": 0.9651235393809652, "grad_norm": 0.4302063286304474, "learning_rate": 0.0002636553454376105, "loss": 0.6761, "step": 5410 }, { "epoch": 0.9653019355989653, "grad_norm": 0.4406273066997528, "learning_rate": 0.00026358538721518905, "loss": 0.7473, "step": 5411 }, { "epoch": 0.9654803318169655, "grad_norm": 0.42952749133110046, "learning_rate": 0.0002635154279257771, "loss": 0.6809, "step": 5412 }, { "epoch": 0.9656587280349657, "grad_norm": 0.502311110496521, "learning_rate": 0.00026344546757486924, "loss": 1.1199, "step": 5413 }, { "epoch": 0.9658371242529659, "grad_norm": 0.4300912022590637, "learning_rate": 0.00026337550616796024, "loss": 0.9147, "step": 5414 }, { "epoch": 0.9660155204709661, "grad_norm": 0.4525073766708374, "learning_rate": 0.00026330554371054466, "loss": 0.9604, "step": 5415 }, { "epoch": 0.9661939166889661, "grad_norm": 0.41568025946617126, "learning_rate": 0.00026323558020811745, "loss": 0.7829, "step": 5416 }, { "epoch": 0.9663723129069663, "grad_norm": 0.5461636781692505, "learning_rate": 0.00026316561566617347, "loss": 0.8039, "step": 5417 }, { "epoch": 0.9665507091249665, "grad_norm": 0.4481653571128845, "learning_rate": 0.00026309565009020766, "loss": 0.8262, "step": 5418 }, { "epoch": 0.9667291053429667, "grad_norm": 0.911888599395752, "learning_rate": 0.00026302568348571514, "loss": 0.8544, "step": 5419 }, { "epoch": 0.9669075015609669, "grad_norm": 0.42414039373397827, "learning_rate": 0.0002629557158581911, "loss": 0.6467, "step": 5420 }, { "epoch": 0.9670858977789671, "grad_norm": 0.5355694890022278, "learning_rate": 0.00026288574721313064, "loss": 0.7265, "step": 5421 }, { "epoch": 0.9672642939969672, "grad_norm": 0.8004781007766724, "learning_rate": 0.0002628157775560291, "loss": 0.8402, "step": 5422 }, { "epoch": 0.9674426902149674, "grad_norm": 0.5186832547187805, "learning_rate": 0.00026274580689238206, "loss": 0.9926, "step": 5423 }, { "epoch": 0.9676210864329676, "grad_norm": 0.5586687922477722, "learning_rate": 0.00026267583522768473, "loss": 1.045, "step": 5424 }, { "epoch": 0.9677994826509678, "grad_norm": 0.4568381607532501, "learning_rate": 0.0002626058625674328, "loss": 0.6261, "step": 5425 }, { "epoch": 0.967977878868968, "grad_norm": 0.4433128237724304, "learning_rate": 0.0002625358889171217, "loss": 0.9197, "step": 5426 }, { "epoch": 0.9681562750869681, "grad_norm": 0.4375893175601959, "learning_rate": 0.00026246591428224743, "loss": 0.6837, "step": 5427 }, { "epoch": 0.9683346713049683, "grad_norm": 0.48804354667663574, "learning_rate": 0.0002623959386683056, "loss": 0.9795, "step": 5428 }, { "epoch": 0.9685130675229685, "grad_norm": 0.43355077505111694, "learning_rate": 0.00026232596208079203, "loss": 0.7921, "step": 5429 }, { "epoch": 0.9686914637409687, "grad_norm": 0.4870491623878479, "learning_rate": 0.00026225598452520277, "loss": 0.9069, "step": 5430 }, { "epoch": 0.9688698599589689, "grad_norm": 0.463943749666214, "learning_rate": 0.00026218600600703376, "loss": 0.7366, "step": 5431 }, { "epoch": 0.9690482561769691, "grad_norm": 0.46572330594062805, "learning_rate": 0.000262116026531781, "loss": 0.7924, "step": 5432 }, { "epoch": 0.9692266523949692, "grad_norm": 0.42976751923561096, "learning_rate": 0.00026204604610494077, "loss": 0.638, "step": 5433 }, { "epoch": 0.9694050486129694, "grad_norm": 0.5167571306228638, "learning_rate": 0.0002619760647320092, "loss": 0.7912, "step": 5434 }, { "epoch": 0.9695834448309696, "grad_norm": 0.39845386147499084, "learning_rate": 0.0002619060824184828, "loss": 0.6637, "step": 5435 }, { "epoch": 0.9697618410489698, "grad_norm": 0.4528100788593292, "learning_rate": 0.00026183609916985776, "loss": 0.6917, "step": 5436 }, { "epoch": 0.96994023726697, "grad_norm": 0.4622432291507721, "learning_rate": 0.00026176611499163056, "loss": 0.721, "step": 5437 }, { "epoch": 0.9701186334849701, "grad_norm": 0.447052538394928, "learning_rate": 0.00026169612988929773, "loss": 0.6908, "step": 5438 }, { "epoch": 0.9702970297029703, "grad_norm": 0.4297953248023987, "learning_rate": 0.00026162614386835597, "loss": 0.7084, "step": 5439 }, { "epoch": 0.9704754259209705, "grad_norm": 0.4557560682296753, "learning_rate": 0.0002615561569343018, "loss": 0.8465, "step": 5440 }, { "epoch": 0.9706538221389707, "grad_norm": 0.42266544699668884, "learning_rate": 0.000261486169092632, "loss": 0.6536, "step": 5441 }, { "epoch": 0.9708322183569709, "grad_norm": 0.4929708242416382, "learning_rate": 0.0002614161803488435, "loss": 0.9094, "step": 5442 }, { "epoch": 0.9710106145749711, "grad_norm": 0.45966464281082153, "learning_rate": 0.0002613461907084331, "loss": 0.7899, "step": 5443 }, { "epoch": 0.9711890107929712, "grad_norm": 0.5083897113800049, "learning_rate": 0.0002612762001768978, "loss": 0.9044, "step": 5444 }, { "epoch": 0.9713674070109714, "grad_norm": 0.4741891026496887, "learning_rate": 0.00026120620875973453, "loss": 0.9287, "step": 5445 }, { "epoch": 0.9715458032289715, "grad_norm": 1.0679816007614136, "learning_rate": 0.00026113621646244045, "loss": 0.999, "step": 5446 }, { "epoch": 0.9717241994469717, "grad_norm": 0.4491601288318634, "learning_rate": 0.0002610662232905127, "loss": 0.8127, "step": 5447 }, { "epoch": 0.971902595664972, "grad_norm": 0.538118302822113, "learning_rate": 0.00026099622924944863, "loss": 0.905, "step": 5448 }, { "epoch": 0.972080991882972, "grad_norm": 0.4619334936141968, "learning_rate": 0.0002609262343447454, "loss": 0.6049, "step": 5449 }, { "epoch": 0.9722593881009722, "grad_norm": 0.5011905431747437, "learning_rate": 0.0002608562385819004, "loss": 0.6995, "step": 5450 }, { "epoch": 0.9724377843189724, "grad_norm": 0.5354875326156616, "learning_rate": 0.0002607862419664111, "loss": 0.585, "step": 5451 }, { "epoch": 0.9726161805369726, "grad_norm": 0.46661576628685, "learning_rate": 0.00026071624450377495, "loss": 0.7073, "step": 5452 }, { "epoch": 0.9727945767549728, "grad_norm": 0.511663556098938, "learning_rate": 0.0002606462461994896, "loss": 0.8606, "step": 5453 }, { "epoch": 0.972972972972973, "grad_norm": 0.48708683252334595, "learning_rate": 0.0002605762470590527, "loss": 0.6398, "step": 5454 }, { "epoch": 0.9731513691909731, "grad_norm": 0.48029178380966187, "learning_rate": 0.0002605062470879619, "loss": 0.7369, "step": 5455 }, { "epoch": 0.9733297654089733, "grad_norm": 0.481996089220047, "learning_rate": 0.00026043624629171495, "loss": 0.9269, "step": 5456 }, { "epoch": 0.9735081616269735, "grad_norm": 0.5002886652946472, "learning_rate": 0.0002603662446758097, "loss": 0.8318, "step": 5457 }, { "epoch": 0.9736865578449737, "grad_norm": 0.48849231004714966, "learning_rate": 0.0002602962422457441, "loss": 0.7965, "step": 5458 }, { "epoch": 0.9738649540629739, "grad_norm": 0.4990347623825073, "learning_rate": 0.000260226239007016, "loss": 0.87, "step": 5459 }, { "epoch": 0.974043350280974, "grad_norm": 0.5066533088684082, "learning_rate": 0.0002601562349651235, "loss": 0.7364, "step": 5460 }, { "epoch": 0.9742217464989742, "grad_norm": 0.5521520972251892, "learning_rate": 0.0002600862301255647, "loss": 0.9395, "step": 5461 }, { "epoch": 0.9744001427169744, "grad_norm": 0.7457981109619141, "learning_rate": 0.00026001622449383776, "loss": 0.8701, "step": 5462 }, { "epoch": 0.9745785389349746, "grad_norm": 0.5160167217254639, "learning_rate": 0.00025994621807544084, "loss": 0.9626, "step": 5463 }, { "epoch": 0.9747569351529748, "grad_norm": 0.504137396812439, "learning_rate": 0.0002598762108758722, "loss": 0.7476, "step": 5464 }, { "epoch": 0.974935331370975, "grad_norm": 0.4687594473361969, "learning_rate": 0.00025980620290063023, "loss": 0.7256, "step": 5465 }, { "epoch": 0.9751137275889751, "grad_norm": 0.44819939136505127, "learning_rate": 0.0002597361941552133, "loss": 0.6916, "step": 5466 }, { "epoch": 0.9752921238069753, "grad_norm": 2.4401307106018066, "learning_rate": 0.00025966618464511986, "loss": 0.7513, "step": 5467 }, { "epoch": 0.9754705200249755, "grad_norm": 0.48852846026420593, "learning_rate": 0.0002595961743758484, "loss": 0.9704, "step": 5468 }, { "epoch": 0.9756489162429757, "grad_norm": 0.47070446610450745, "learning_rate": 0.00025952616335289766, "loss": 0.8079, "step": 5469 }, { "epoch": 0.9758273124609759, "grad_norm": 0.49295809864997864, "learning_rate": 0.00025945615158176605, "loss": 1.0164, "step": 5470 }, { "epoch": 0.976005708678976, "grad_norm": 0.5102391839027405, "learning_rate": 0.00025938613906795237, "loss": 1.1723, "step": 5471 }, { "epoch": 0.9761841048969762, "grad_norm": 0.4273965656757355, "learning_rate": 0.0002593161258169554, "loss": 0.5914, "step": 5472 }, { "epoch": 0.9763625011149764, "grad_norm": 0.4733925759792328, "learning_rate": 0.00025924611183427386, "loss": 0.8552, "step": 5473 }, { "epoch": 0.9765408973329766, "grad_norm": 0.4754532277584076, "learning_rate": 0.00025917609712540674, "loss": 0.8698, "step": 5474 }, { "epoch": 0.9767192935509768, "grad_norm": 0.5383017063140869, "learning_rate": 0.0002591060816958529, "loss": 1.0911, "step": 5475 }, { "epoch": 0.976897689768977, "grad_norm": 0.50196772813797, "learning_rate": 0.00025903606555111123, "loss": 0.9058, "step": 5476 }, { "epoch": 0.977076085986977, "grad_norm": 0.5124291181564331, "learning_rate": 0.000258966048696681, "loss": 0.9839, "step": 5477 }, { "epoch": 0.9772544822049772, "grad_norm": 0.5031840801239014, "learning_rate": 0.0002588960311380611, "loss": 0.7175, "step": 5478 }, { "epoch": 0.9774328784229774, "grad_norm": 0.46920791268348694, "learning_rate": 0.0002588260128807507, "loss": 0.8517, "step": 5479 }, { "epoch": 0.9776112746409776, "grad_norm": 0.5168631672859192, "learning_rate": 0.0002587559939302491, "loss": 0.964, "step": 5480 }, { "epoch": 0.9777896708589778, "grad_norm": 0.44529473781585693, "learning_rate": 0.00025868597429205543, "loss": 0.778, "step": 5481 }, { "epoch": 0.9779680670769779, "grad_norm": 0.4642591178417206, "learning_rate": 0.00025861595397166915, "loss": 0.7513, "step": 5482 }, { "epoch": 0.9781464632949781, "grad_norm": 0.47108370065689087, "learning_rate": 0.00025854593297458956, "loss": 0.6897, "step": 5483 }, { "epoch": 0.9783248595129783, "grad_norm": 0.5786851048469543, "learning_rate": 0.00025847591130631603, "loss": 0.9704, "step": 5484 }, { "epoch": 0.9785032557309785, "grad_norm": 0.4903102219104767, "learning_rate": 0.0002584058889723481, "loss": 0.9588, "step": 5485 }, { "epoch": 0.9786816519489787, "grad_norm": 0.40852758288383484, "learning_rate": 0.00025833586597818526, "loss": 0.6194, "step": 5486 }, { "epoch": 0.9788600481669789, "grad_norm": 0.42910122871398926, "learning_rate": 0.00025826584232932704, "loss": 0.7026, "step": 5487 }, { "epoch": 0.979038444384979, "grad_norm": 0.6865221261978149, "learning_rate": 0.00025819581803127316, "loss": 0.932, "step": 5488 }, { "epoch": 0.9792168406029792, "grad_norm": 0.44170790910720825, "learning_rate": 0.0002581257930895233, "loss": 0.7926, "step": 5489 }, { "epoch": 0.9793952368209794, "grad_norm": 0.4942973256111145, "learning_rate": 0.00025805576750957714, "loss": 0.9616, "step": 5490 }, { "epoch": 0.9795736330389796, "grad_norm": 0.5086950063705444, "learning_rate": 0.0002579857412969345, "loss": 0.9139, "step": 5491 }, { "epoch": 0.9797520292569798, "grad_norm": 0.5349062085151672, "learning_rate": 0.00025791571445709505, "loss": 0.9468, "step": 5492 }, { "epoch": 0.9799304254749799, "grad_norm": 0.5083227157592773, "learning_rate": 0.0002578456869955589, "loss": 0.9298, "step": 5493 }, { "epoch": 0.9801088216929801, "grad_norm": 0.4308398962020874, "learning_rate": 0.0002577756589178258, "loss": 0.6871, "step": 5494 }, { "epoch": 0.9802872179109803, "grad_norm": 0.3946779668331146, "learning_rate": 0.0002577056302293958, "loss": 0.7342, "step": 5495 }, { "epoch": 0.9804656141289805, "grad_norm": 0.4864563047885895, "learning_rate": 0.000257635600935769, "loss": 0.9185, "step": 5496 }, { "epoch": 0.9806440103469807, "grad_norm": 0.427357941865921, "learning_rate": 0.00025756557104244534, "loss": 0.7518, "step": 5497 }, { "epoch": 0.9808224065649809, "grad_norm": 0.4161073565483093, "learning_rate": 0.000257495540554925, "loss": 0.5618, "step": 5498 }, { "epoch": 0.981000802782981, "grad_norm": 0.437459796667099, "learning_rate": 0.00025742550947870806, "loss": 0.638, "step": 5499 }, { "epoch": 0.9811791990009812, "grad_norm": 0.4389030635356903, "learning_rate": 0.00025735547781929484, "loss": 0.6333, "step": 5500 }, { "epoch": 0.9813575952189814, "grad_norm": 0.47596055269241333, "learning_rate": 0.00025728544558218557, "loss": 0.7331, "step": 5501 }, { "epoch": 0.9815359914369816, "grad_norm": 0.49048876762390137, "learning_rate": 0.00025721541277288053, "loss": 0.7336, "step": 5502 }, { "epoch": 0.9817143876549818, "grad_norm": 0.4905683398246765, "learning_rate": 0.0002571453793968801, "loss": 0.8261, "step": 5503 }, { "epoch": 0.9818927838729818, "grad_norm": 0.46471673250198364, "learning_rate": 0.0002570753454596846, "loss": 0.8081, "step": 5504 }, { "epoch": 0.982071180090982, "grad_norm": 0.49744272232055664, "learning_rate": 0.00025700531096679456, "loss": 0.8207, "step": 5505 }, { "epoch": 0.9822495763089822, "grad_norm": 0.4972292184829712, "learning_rate": 0.0002569352759237104, "loss": 1.0645, "step": 5506 }, { "epoch": 0.9824279725269824, "grad_norm": 0.47991102933883667, "learning_rate": 0.00025686524033593263, "loss": 0.8118, "step": 5507 }, { "epoch": 0.9826063687449826, "grad_norm": 0.41838109493255615, "learning_rate": 0.00025679520420896184, "loss": 0.7066, "step": 5508 }, { "epoch": 0.9827847649629828, "grad_norm": 0.40462827682495117, "learning_rate": 0.00025672516754829866, "loss": 0.6267, "step": 5509 }, { "epoch": 0.9829631611809829, "grad_norm": 0.4698152542114258, "learning_rate": 0.00025665513035944373, "loss": 0.9033, "step": 5510 }, { "epoch": 0.9831415573989831, "grad_norm": 0.4284512400627136, "learning_rate": 0.0002565850926478977, "loss": 0.7329, "step": 5511 }, { "epoch": 0.9833199536169833, "grad_norm": 0.4176551401615143, "learning_rate": 0.0002565150544191613, "loss": 0.7597, "step": 5512 }, { "epoch": 0.9834983498349835, "grad_norm": 0.46959561109542847, "learning_rate": 0.00025644501567873533, "loss": 0.7277, "step": 5513 }, { "epoch": 0.9836767460529837, "grad_norm": 0.4930439591407776, "learning_rate": 0.0002563749764321207, "loss": 0.8034, "step": 5514 }, { "epoch": 0.9838551422709838, "grad_norm": 0.4476320743560791, "learning_rate": 0.0002563049366848181, "loss": 0.7959, "step": 5515 }, { "epoch": 0.984033538488984, "grad_norm": 0.7321682572364807, "learning_rate": 0.00025623489644232845, "loss": 0.7308, "step": 5516 }, { "epoch": 0.9842119347069842, "grad_norm": 0.447443425655365, "learning_rate": 0.00025616485571015277, "loss": 0.7, "step": 5517 }, { "epoch": 0.9843903309249844, "grad_norm": 0.4954073131084442, "learning_rate": 0.0002560948144937919, "loss": 0.8115, "step": 5518 }, { "epoch": 0.9845687271429846, "grad_norm": 0.5299807190895081, "learning_rate": 0.00025602477279874697, "loss": 0.9499, "step": 5519 }, { "epoch": 0.9847471233609848, "grad_norm": 0.4475019574165344, "learning_rate": 0.000255954730630519, "loss": 0.7138, "step": 5520 }, { "epoch": 0.9849255195789849, "grad_norm": 0.4217219650745392, "learning_rate": 0.000255884687994609, "loss": 0.697, "step": 5521 }, { "epoch": 0.9851039157969851, "grad_norm": 0.4400602877140045, "learning_rate": 0.0002558146448965182, "loss": 0.7563, "step": 5522 }, { "epoch": 0.9852823120149853, "grad_norm": 0.42823049426078796, "learning_rate": 0.0002557446013417477, "loss": 0.6568, "step": 5523 }, { "epoch": 0.9854607082329855, "grad_norm": 0.4775795638561249, "learning_rate": 0.00025567455733579867, "loss": 0.6586, "step": 5524 }, { "epoch": 0.9856391044509857, "grad_norm": 0.4584738314151764, "learning_rate": 0.00025560451288417224, "loss": 0.7584, "step": 5525 }, { "epoch": 0.9858175006689858, "grad_norm": 0.6482552289962769, "learning_rate": 0.00025553446799236987, "loss": 0.9836, "step": 5526 }, { "epoch": 0.985995896886986, "grad_norm": 2.181297779083252, "learning_rate": 0.00025546442266589274, "loss": 0.9366, "step": 5527 }, { "epoch": 0.9861742931049862, "grad_norm": 0.4901970326900482, "learning_rate": 0.0002553943769102422, "loss": 0.7734, "step": 5528 }, { "epoch": 0.9863526893229864, "grad_norm": 0.5219231843948364, "learning_rate": 0.00025532433073091967, "loss": 0.7347, "step": 5529 }, { "epoch": 0.9865310855409866, "grad_norm": 0.5067287087440491, "learning_rate": 0.0002552542841334265, "loss": 0.8365, "step": 5530 }, { "epoch": 0.9867094817589868, "grad_norm": 3.1279547214508057, "learning_rate": 0.0002551842371232641, "loss": 0.8209, "step": 5531 }, { "epoch": 0.9868878779769868, "grad_norm": 0.48862382769584656, "learning_rate": 0.00025511418970593393, "loss": 0.8062, "step": 5532 }, { "epoch": 0.987066274194987, "grad_norm": 1.2135387659072876, "learning_rate": 0.0002550441418869374, "loss": 0.8549, "step": 5533 }, { "epoch": 0.9872446704129872, "grad_norm": 0.8009121417999268, "learning_rate": 0.00025497409367177627, "loss": 0.8502, "step": 5534 }, { "epoch": 0.9874230666309874, "grad_norm": 0.6405108571052551, "learning_rate": 0.0002549040450659519, "loss": 0.9773, "step": 5535 }, { "epoch": 0.9876014628489876, "grad_norm": 3.273838758468628, "learning_rate": 0.00025483399607496604, "loss": 0.9752, "step": 5536 }, { "epoch": 0.9877798590669877, "grad_norm": 0.4358234703540802, "learning_rate": 0.0002547639467043201, "loss": 0.6485, "step": 5537 }, { "epoch": 0.9879582552849879, "grad_norm": 0.5305948853492737, "learning_rate": 0.00025469389695951595, "loss": 0.8997, "step": 5538 }, { "epoch": 0.9881366515029881, "grad_norm": 0.7307850122451782, "learning_rate": 0.0002546238468460551, "loss": 0.7562, "step": 5539 }, { "epoch": 0.9883150477209883, "grad_norm": 0.47485119104385376, "learning_rate": 0.0002545537963694392, "loss": 0.7355, "step": 5540 }, { "epoch": 0.9884934439389885, "grad_norm": 0.5570200085639954, "learning_rate": 0.0002544837455351702, "loss": 0.8597, "step": 5541 }, { "epoch": 0.9886718401569887, "grad_norm": 0.592291533946991, "learning_rate": 0.00025441369434874977, "loss": 1.0744, "step": 5542 }, { "epoch": 0.9888502363749888, "grad_norm": 0.487498939037323, "learning_rate": 0.0002543436428156796, "loss": 0.8186, "step": 5543 }, { "epoch": 0.989028632592989, "grad_norm": 0.475384920835495, "learning_rate": 0.0002542735909414617, "loss": 0.6558, "step": 5544 }, { "epoch": 0.9892070288109892, "grad_norm": 0.44877302646636963, "learning_rate": 0.00025420353873159774, "loss": 0.7219, "step": 5545 }, { "epoch": 0.9893854250289894, "grad_norm": 0.49043965339660645, "learning_rate": 0.0002541334861915897, "loss": 0.8794, "step": 5546 }, { "epoch": 0.9895638212469896, "grad_norm": 0.47729647159576416, "learning_rate": 0.00025406343332693934, "loss": 0.8159, "step": 5547 }, { "epoch": 0.9897422174649897, "grad_norm": 0.48762455582618713, "learning_rate": 0.0002539933801431487, "loss": 0.7083, "step": 5548 }, { "epoch": 0.9899206136829899, "grad_norm": 0.4554971158504486, "learning_rate": 0.0002539233266457198, "loss": 0.6679, "step": 5549 }, { "epoch": 0.9900990099009901, "grad_norm": 0.5924381613731384, "learning_rate": 0.0002538532728401544, "loss": 0.9812, "step": 5550 }, { "epoch": 0.9902774061189903, "grad_norm": 0.5297411680221558, "learning_rate": 0.0002537832187319547, "loss": 0.7363, "step": 5551 }, { "epoch": 0.9904558023369905, "grad_norm": 0.5259188413619995, "learning_rate": 0.00025371316432662254, "loss": 0.924, "step": 5552 }, { "epoch": 0.9906341985549907, "grad_norm": 0.5142114162445068, "learning_rate": 0.0002536431096296601, "loss": 0.94, "step": 5553 }, { "epoch": 0.9908125947729908, "grad_norm": 0.4876602590084076, "learning_rate": 0.00025357305464656943, "loss": 0.9249, "step": 5554 }, { "epoch": 0.990990990990991, "grad_norm": 0.4915853440761566, "learning_rate": 0.00025350299938285253, "loss": 1.1248, "step": 5555 }, { "epoch": 0.9911693872089912, "grad_norm": 0.5007473826408386, "learning_rate": 0.0002534329438440116, "loss": 1.0163, "step": 5556 }, { "epoch": 0.9913477834269914, "grad_norm": 0.4649515151977539, "learning_rate": 0.0002533628880355487, "loss": 0.8398, "step": 5557 }, { "epoch": 0.9915261796449916, "grad_norm": 0.4449246823787689, "learning_rate": 0.0002532928319629661, "loss": 0.8213, "step": 5558 }, { "epoch": 0.9917045758629917, "grad_norm": 0.47130802273750305, "learning_rate": 0.00025322277563176584, "loss": 0.8082, "step": 5559 }, { "epoch": 0.9918829720809919, "grad_norm": 0.5442295670509338, "learning_rate": 0.00025315271904745014, "loss": 0.9156, "step": 5560 }, { "epoch": 0.992061368298992, "grad_norm": 0.5047063827514648, "learning_rate": 0.0002530826622155213, "loss": 0.8635, "step": 5561 }, { "epoch": 0.9922397645169923, "grad_norm": 0.504047155380249, "learning_rate": 0.00025301260514148146, "loss": 0.9712, "step": 5562 }, { "epoch": 0.9924181607349924, "grad_norm": 0.46802255511283875, "learning_rate": 0.0002529425478308329, "loss": 0.8758, "step": 5563 }, { "epoch": 0.9925965569529926, "grad_norm": 0.5220746397972107, "learning_rate": 0.00025287249028907796, "loss": 1.001, "step": 5564 }, { "epoch": 0.9927749531709927, "grad_norm": 0.43903014063835144, "learning_rate": 0.0002528024325217188, "loss": 0.6244, "step": 5565 }, { "epoch": 0.9929533493889929, "grad_norm": 0.4588782787322998, "learning_rate": 0.0002527323745342578, "loss": 0.6223, "step": 5566 }, { "epoch": 0.9931317456069931, "grad_norm": 0.501520574092865, "learning_rate": 0.0002526623163321973, "loss": 0.9619, "step": 5567 }, { "epoch": 0.9933101418249933, "grad_norm": 0.4871865510940552, "learning_rate": 0.0002525922579210396, "loss": 0.9316, "step": 5568 }, { "epoch": 0.9934885380429935, "grad_norm": 0.4477481544017792, "learning_rate": 0.0002525221993062871, "loss": 0.7816, "step": 5569 }, { "epoch": 0.9936669342609936, "grad_norm": 0.4530073404312134, "learning_rate": 0.00025245214049344225, "loss": 0.7685, "step": 5570 }, { "epoch": 0.9938453304789938, "grad_norm": 0.4744289815425873, "learning_rate": 0.0002523820814880072, "loss": 0.7734, "step": 5571 }, { "epoch": 0.994023726696994, "grad_norm": 0.4457191526889801, "learning_rate": 0.0002523120222954845, "loss": 0.895, "step": 5572 }, { "epoch": 0.9942021229149942, "grad_norm": 0.4728289842605591, "learning_rate": 0.00025224196292137664, "loss": 0.7997, "step": 5573 }, { "epoch": 0.9943805191329944, "grad_norm": 0.4260111451148987, "learning_rate": 0.00025217190337118594, "loss": 0.7461, "step": 5574 }, { "epoch": 0.9945589153509946, "grad_norm": 0.4607539176940918, "learning_rate": 0.0002521018436504149, "loss": 0.7959, "step": 5575 }, { "epoch": 0.9947373115689947, "grad_norm": 0.6200941801071167, "learning_rate": 0.000252031783764566, "loss": 0.8551, "step": 5576 }, { "epoch": 0.9949157077869949, "grad_norm": 0.4465862810611725, "learning_rate": 0.0002519617237191416, "loss": 0.7163, "step": 5577 }, { "epoch": 0.9950941040049951, "grad_norm": 0.4562668800354004, "learning_rate": 0.00025189166351964425, "loss": 0.7838, "step": 5578 }, { "epoch": 0.9952725002229953, "grad_norm": 0.45672228932380676, "learning_rate": 0.0002518216031715765, "loss": 0.7227, "step": 5579 }, { "epoch": 0.9954508964409955, "grad_norm": 0.4691329002380371, "learning_rate": 0.0002517515426804408, "loss": 0.9586, "step": 5580 }, { "epoch": 0.9956292926589956, "grad_norm": 1.6988149881362915, "learning_rate": 0.00025168148205173974, "loss": 0.7888, "step": 5581 }, { "epoch": 0.9958076888769958, "grad_norm": 0.46691417694091797, "learning_rate": 0.0002516114212909758, "loss": 0.6838, "step": 5582 }, { "epoch": 0.995986085094996, "grad_norm": 0.45711347460746765, "learning_rate": 0.0002515413604036515, "loss": 0.7862, "step": 5583 }, { "epoch": 0.9961644813129962, "grad_norm": 0.49638915061950684, "learning_rate": 0.0002514712993952694, "loss": 0.7689, "step": 5584 }, { "epoch": 0.9963428775309964, "grad_norm": 0.4389818608760834, "learning_rate": 0.0002514012382713321, "loss": 0.6992, "step": 5585 }, { "epoch": 0.9965212737489966, "grad_norm": 0.4768664538860321, "learning_rate": 0.00025133117703734207, "loss": 0.6965, "step": 5586 }, { "epoch": 0.9966996699669967, "grad_norm": 0.5143462419509888, "learning_rate": 0.0002512611156988021, "loss": 1.0382, "step": 5587 }, { "epoch": 0.9968780661849969, "grad_norm": 0.47159501910209656, "learning_rate": 0.00025119105426121455, "loss": 0.7476, "step": 5588 }, { "epoch": 0.9970564624029971, "grad_norm": 0.4691692292690277, "learning_rate": 0.0002511209927300822, "loss": 0.8535, "step": 5589 }, { "epoch": 0.9972348586209973, "grad_norm": 0.44228291511535645, "learning_rate": 0.00025105093111090756, "loss": 0.777, "step": 5590 }, { "epoch": 0.9974132548389975, "grad_norm": 0.43507784605026245, "learning_rate": 0.00025098086940919317, "loss": 0.6588, "step": 5591 }, { "epoch": 0.9975916510569975, "grad_norm": 0.4751743972301483, "learning_rate": 0.00025091080763044177, "loss": 0.6748, "step": 5592 }, { "epoch": 0.9977700472749977, "grad_norm": 4.461918354034424, "learning_rate": 0.0002508407457801559, "loss": 0.669, "step": 5593 }, { "epoch": 0.9979484434929979, "grad_norm": 0.8328418135643005, "learning_rate": 0.00025077068386383816, "loss": 0.8627, "step": 5594 }, { "epoch": 0.9981268397109981, "grad_norm": 1.091975212097168, "learning_rate": 0.00025070062188699136, "loss": 0.8833, "step": 5595 }, { "epoch": 0.9983052359289983, "grad_norm": 0.4713873565196991, "learning_rate": 0.00025063055985511794, "loss": 0.6602, "step": 5596 }, { "epoch": 0.9984836321469985, "grad_norm": 0.5721749663352966, "learning_rate": 0.0002505604977737207, "loss": 0.8543, "step": 5597 }, { "epoch": 0.9986620283649986, "grad_norm": 0.5203986167907715, "learning_rate": 0.00025049043564830207, "loss": 0.7544, "step": 5598 }, { "epoch": 0.9988404245829988, "grad_norm": 0.7062872052192688, "learning_rate": 0.00025042037348436497, "loss": 0.8659, "step": 5599 }, { "epoch": 0.999018820800999, "grad_norm": 0.476829469203949, "learning_rate": 0.00025035031128741185, "loss": 0.7615, "step": 5600 }, { "epoch": 0.9991972170189992, "grad_norm": 0.42391034960746765, "learning_rate": 0.0002502802490629454, "loss": 0.5286, "step": 5601 }, { "epoch": 0.9993756132369994, "grad_norm": 0.5202667713165283, "learning_rate": 0.0002502101868164684, "loss": 0.9911, "step": 5602 }, { "epoch": 0.9995540094549995, "grad_norm": 0.47505563497543335, "learning_rate": 0.0002501401245534834, "loss": 0.9305, "step": 5603 }, { "epoch": 0.9997324056729997, "grad_norm": 0.49714621901512146, "learning_rate": 0.000250070062279493, "loss": 0.8849, "step": 5604 }, { "epoch": 0.9999108018909999, "grad_norm": 0.5128735899925232, "learning_rate": 0.00025, "loss": 0.8105, "step": 5605 }, { "epoch": 1.0, "grad_norm": 0.8229463696479797, "learning_rate": 0.000249929937720507, "loss": 0.5342, "step": 5606 }, { "epoch": 1.0001783962180002, "grad_norm": 0.48117566108703613, "learning_rate": 0.00024985987544651667, "loss": 0.6871, "step": 5607 }, { "epoch": 1.0003567924360004, "grad_norm": 0.4833311438560486, "learning_rate": 0.0002497898131835316, "loss": 0.6285, "step": 5608 }, { "epoch": 1.0005351886540006, "grad_norm": 0.5113769769668579, "learning_rate": 0.0002497197509370546, "loss": 0.7797, "step": 5609 }, { "epoch": 1.0007135848720008, "grad_norm": 0.5292689204216003, "learning_rate": 0.0002496496887125882, "loss": 0.8098, "step": 5610 }, { "epoch": 1.000891981090001, "grad_norm": 0.5784763097763062, "learning_rate": 0.0002495796265156351, "loss": 0.8951, "step": 5611 }, { "epoch": 1.001070377308001, "grad_norm": 0.516196608543396, "learning_rate": 0.000249509564351698, "loss": 0.8567, "step": 5612 }, { "epoch": 1.0012487735260012, "grad_norm": 0.49342212080955505, "learning_rate": 0.0002494395022262793, "loss": 0.6955, "step": 5613 }, { "epoch": 1.0014271697440014, "grad_norm": 0.45294061303138733, "learning_rate": 0.00024936944014488207, "loss": 0.6438, "step": 5614 }, { "epoch": 1.0016055659620016, "grad_norm": 0.5231368541717529, "learning_rate": 0.0002492993781130087, "loss": 0.7458, "step": 5615 }, { "epoch": 1.0017839621800018, "grad_norm": 0.49573010206222534, "learning_rate": 0.0002492293161361618, "loss": 0.6514, "step": 5616 }, { "epoch": 1.001962358398002, "grad_norm": 0.4060691297054291, "learning_rate": 0.00024915925421984417, "loss": 0.5311, "step": 5617 }, { "epoch": 1.0021407546160022, "grad_norm": 0.5073151588439941, "learning_rate": 0.0002490891923695583, "loss": 0.7972, "step": 5618 }, { "epoch": 1.0023191508340024, "grad_norm": 0.5470726490020752, "learning_rate": 0.00024901913059080684, "loss": 0.6951, "step": 5619 }, { "epoch": 1.0024975470520026, "grad_norm": 0.4746871888637543, "learning_rate": 0.0002489490688890925, "loss": 0.678, "step": 5620 }, { "epoch": 1.0026759432700028, "grad_norm": 0.6704702377319336, "learning_rate": 0.0002488790072699178, "loss": 0.9827, "step": 5621 }, { "epoch": 1.002854339488003, "grad_norm": 0.5733181834220886, "learning_rate": 0.00024880894573878546, "loss": 0.6582, "step": 5622 }, { "epoch": 1.003032735706003, "grad_norm": 0.4528675377368927, "learning_rate": 0.00024873888430119794, "loss": 0.6188, "step": 5623 }, { "epoch": 1.0032111319240031, "grad_norm": 0.4826764166355133, "learning_rate": 0.00024866882296265794, "loss": 0.7185, "step": 5624 }, { "epoch": 1.0033895281420033, "grad_norm": 0.4640424847602844, "learning_rate": 0.000248598761728668, "loss": 0.5546, "step": 5625 }, { "epoch": 1.0035679243600035, "grad_norm": 0.57952880859375, "learning_rate": 0.0002485287006047307, "loss": 0.5323, "step": 5626 }, { "epoch": 1.0037463205780037, "grad_norm": 0.4803033173084259, "learning_rate": 0.0002484586395963486, "loss": 0.7079, "step": 5627 }, { "epoch": 1.003924716796004, "grad_norm": 0.47530657052993774, "learning_rate": 0.0002483885787090242, "loss": 0.7073, "step": 5628 }, { "epoch": 1.0041031130140041, "grad_norm": 0.5128652453422546, "learning_rate": 0.00024831851794826027, "loss": 0.7797, "step": 5629 }, { "epoch": 1.0042815092320043, "grad_norm": 0.4710089862346649, "learning_rate": 0.0002482484573195592, "loss": 0.7584, "step": 5630 }, { "epoch": 1.0044599054500045, "grad_norm": 0.5013584494590759, "learning_rate": 0.0002481783968284235, "loss": 0.7168, "step": 5631 }, { "epoch": 1.0046383016680047, "grad_norm": 0.48483264446258545, "learning_rate": 0.00024810833648035576, "loss": 0.7159, "step": 5632 }, { "epoch": 1.004816697886005, "grad_norm": 0.43199649453163147, "learning_rate": 0.00024803827628085845, "loss": 0.6508, "step": 5633 }, { "epoch": 1.004995094104005, "grad_norm": 0.48384973406791687, "learning_rate": 0.00024796821623543407, "loss": 0.8085, "step": 5634 }, { "epoch": 1.005173490322005, "grad_norm": 0.5814610719680786, "learning_rate": 0.00024789815634958517, "loss": 0.7655, "step": 5635 }, { "epoch": 1.0053518865400053, "grad_norm": 0.4795527160167694, "learning_rate": 0.0002478280966288141, "loss": 0.7431, "step": 5636 }, { "epoch": 1.0055302827580055, "grad_norm": 0.4990082383155823, "learning_rate": 0.0002477580370786234, "loss": 0.7987, "step": 5637 }, { "epoch": 1.0057086789760057, "grad_norm": 0.5206576585769653, "learning_rate": 0.0002476879777045155, "loss": 0.7974, "step": 5638 }, { "epoch": 1.0058870751940059, "grad_norm": 0.38653305172920227, "learning_rate": 0.00024761791851199286, "loss": 0.5669, "step": 5639 }, { "epoch": 1.006065471412006, "grad_norm": 0.49176260828971863, "learning_rate": 0.0002475478595065578, "loss": 0.7417, "step": 5640 }, { "epoch": 1.0062438676300063, "grad_norm": 0.4860985279083252, "learning_rate": 0.0002474778006937129, "loss": 0.5978, "step": 5641 }, { "epoch": 1.0064222638480065, "grad_norm": 0.4277432858943939, "learning_rate": 0.0002474077420789604, "loss": 0.4554, "step": 5642 }, { "epoch": 1.0066006600660067, "grad_norm": 0.43579888343811035, "learning_rate": 0.0002473376836678028, "loss": 0.6738, "step": 5643 }, { "epoch": 1.0067790562840069, "grad_norm": 0.5105220079421997, "learning_rate": 0.00024726762546574215, "loss": 0.9275, "step": 5644 }, { "epoch": 1.0069574525020069, "grad_norm": 0.4739680886268616, "learning_rate": 0.0002471975674782812, "loss": 0.7133, "step": 5645 }, { "epoch": 1.007135848720007, "grad_norm": 0.4873730540275574, "learning_rate": 0.00024712750971092205, "loss": 0.7338, "step": 5646 }, { "epoch": 1.0073142449380073, "grad_norm": 0.45006611943244934, "learning_rate": 0.0002470574521691671, "loss": 0.6444, "step": 5647 }, { "epoch": 1.0074926411560075, "grad_norm": 0.46951183676719666, "learning_rate": 0.0002469873948585186, "loss": 0.7305, "step": 5648 }, { "epoch": 1.0076710373740076, "grad_norm": 0.4490543007850647, "learning_rate": 0.00024691733778447875, "loss": 0.6245, "step": 5649 }, { "epoch": 1.0078494335920078, "grad_norm": 0.5618079900741577, "learning_rate": 0.00024684728095254987, "loss": 0.787, "step": 5650 }, { "epoch": 1.008027829810008, "grad_norm": 7.213865756988525, "learning_rate": 0.0002467772243682342, "loss": 0.5768, "step": 5651 }, { "epoch": 1.0082062260280082, "grad_norm": 0.5140239000320435, "learning_rate": 0.000246707168037034, "loss": 0.7213, "step": 5652 }, { "epoch": 1.0083846222460084, "grad_norm": 0.5297546982765198, "learning_rate": 0.00024663711196445135, "loss": 0.7577, "step": 5653 }, { "epoch": 1.0085630184640086, "grad_norm": 0.5135631561279297, "learning_rate": 0.00024656705615598844, "loss": 0.8142, "step": 5654 }, { "epoch": 1.0087414146820088, "grad_norm": 0.4576781094074249, "learning_rate": 0.0002464970006171475, "loss": 0.6083, "step": 5655 }, { "epoch": 1.0089198109000088, "grad_norm": 0.5894820690155029, "learning_rate": 0.0002464269453534307, "loss": 0.8443, "step": 5656 }, { "epoch": 1.009098207118009, "grad_norm": 0.5274859070777893, "learning_rate": 0.00024635689037034, "loss": 0.9412, "step": 5657 }, { "epoch": 1.0092766033360092, "grad_norm": 0.46422895789146423, "learning_rate": 0.0002462868356733775, "loss": 0.7357, "step": 5658 }, { "epoch": 1.0094549995540094, "grad_norm": 0.49951404333114624, "learning_rate": 0.0002462167812680453, "loss": 0.8181, "step": 5659 }, { "epoch": 1.0096333957720096, "grad_norm": 0.48407530784606934, "learning_rate": 0.00024614672715984556, "loss": 0.6384, "step": 5660 }, { "epoch": 1.0098117919900098, "grad_norm": 1.810511827468872, "learning_rate": 0.0002460766733542803, "loss": 0.7134, "step": 5661 }, { "epoch": 1.00999018820801, "grad_norm": 0.4726197421550751, "learning_rate": 0.0002460066198568513, "loss": 0.6611, "step": 5662 }, { "epoch": 1.0101685844260102, "grad_norm": 0.512795090675354, "learning_rate": 0.0002459365666730607, "loss": 0.7829, "step": 5663 }, { "epoch": 1.0103469806440104, "grad_norm": 0.6051098108291626, "learning_rate": 0.0002458665138084104, "loss": 0.7989, "step": 5664 }, { "epoch": 1.0105253768620106, "grad_norm": 0.5258930921554565, "learning_rate": 0.00024579646126840233, "loss": 0.5785, "step": 5665 }, { "epoch": 1.0107037730800108, "grad_norm": 0.41359806060791016, "learning_rate": 0.0002457264090585384, "loss": 0.5786, "step": 5666 }, { "epoch": 1.0108821692980108, "grad_norm": 0.4931972622871399, "learning_rate": 0.0002456563571843204, "loss": 0.8471, "step": 5667 }, { "epoch": 1.011060565516011, "grad_norm": 0.49193498492240906, "learning_rate": 0.0002455863056512503, "loss": 0.8302, "step": 5668 }, { "epoch": 1.0112389617340112, "grad_norm": 0.5347012877464294, "learning_rate": 0.0002455162544648299, "loss": 1.1447, "step": 5669 }, { "epoch": 1.0114173579520114, "grad_norm": 0.46643638610839844, "learning_rate": 0.00024544620363056084, "loss": 0.5934, "step": 5670 }, { "epoch": 1.0115957541700116, "grad_norm": 0.6032198071479797, "learning_rate": 0.00024537615315394504, "loss": 0.5384, "step": 5671 }, { "epoch": 1.0117741503880118, "grad_norm": 0.5089538097381592, "learning_rate": 0.00024530610304048417, "loss": 0.8041, "step": 5672 }, { "epoch": 1.011952546606012, "grad_norm": 0.5048344731330872, "learning_rate": 0.00024523605329567996, "loss": 0.7037, "step": 5673 }, { "epoch": 1.0121309428240122, "grad_norm": 0.40389716625213623, "learning_rate": 0.00024516600392503397, "loss": 0.5336, "step": 5674 }, { "epoch": 1.0123093390420124, "grad_norm": 0.4871548116207123, "learning_rate": 0.0002450959549340481, "loss": 0.6761, "step": 5675 }, { "epoch": 1.0124877352600126, "grad_norm": 0.49612149596214294, "learning_rate": 0.00024502590632822374, "loss": 0.7691, "step": 5676 }, { "epoch": 1.0126661314780128, "grad_norm": 0.4323468506336212, "learning_rate": 0.0002449558581130626, "loss": 0.5754, "step": 5677 }, { "epoch": 1.0128445276960127, "grad_norm": 0.4288893938064575, "learning_rate": 0.00024488581029406614, "loss": 0.6697, "step": 5678 }, { "epoch": 1.013022923914013, "grad_norm": 0.4768436551094055, "learning_rate": 0.00024481576287673596, "loss": 0.657, "step": 5679 }, { "epoch": 1.0132013201320131, "grad_norm": 0.4346489906311035, "learning_rate": 0.00024474571586657353, "loss": 0.663, "step": 5680 }, { "epoch": 1.0133797163500133, "grad_norm": 0.5226327180862427, "learning_rate": 0.0002446756692690804, "loss": 0.9833, "step": 5681 }, { "epoch": 1.0135581125680135, "grad_norm": 0.5677915811538696, "learning_rate": 0.0002446056230897578, "loss": 0.9305, "step": 5682 }, { "epoch": 1.0137365087860137, "grad_norm": 0.5274161100387573, "learning_rate": 0.0002445355773341073, "loss": 0.7953, "step": 5683 }, { "epoch": 1.013914905004014, "grad_norm": 0.557561457157135, "learning_rate": 0.0002444655320076302, "loss": 0.9001, "step": 5684 }, { "epoch": 1.0140933012220141, "grad_norm": 0.3999897241592407, "learning_rate": 0.0002443954871158278, "loss": 0.6062, "step": 5685 }, { "epoch": 1.0142716974400143, "grad_norm": 0.6423475742340088, "learning_rate": 0.00024432544266420145, "loss": 0.8672, "step": 5686 }, { "epoch": 1.0144500936580145, "grad_norm": 0.5340455770492554, "learning_rate": 0.0002442553986582524, "loss": 0.7465, "step": 5687 }, { "epoch": 1.0146284898760147, "grad_norm": 0.4833824634552002, "learning_rate": 0.00024418535510348184, "loss": 0.7415, "step": 5688 }, { "epoch": 1.0148068860940147, "grad_norm": 0.452972948551178, "learning_rate": 0.00024411531200539102, "loss": 0.6568, "step": 5689 }, { "epoch": 1.014985282312015, "grad_norm": 0.45635709166526794, "learning_rate": 0.00024404526936948098, "loss": 0.5435, "step": 5690 }, { "epoch": 1.015163678530015, "grad_norm": 0.5237349271774292, "learning_rate": 0.00024397522720125302, "loss": 0.7549, "step": 5691 }, { "epoch": 1.0153420747480153, "grad_norm": 0.627680778503418, "learning_rate": 0.00024390518550620807, "loss": 0.6618, "step": 5692 }, { "epoch": 1.0155204709660155, "grad_norm": 0.5116132497787476, "learning_rate": 0.00024383514428984727, "loss": 0.7378, "step": 5693 }, { "epoch": 1.0156988671840157, "grad_norm": 1.0455267429351807, "learning_rate": 0.00024376510355767161, "loss": 0.8657, "step": 5694 }, { "epoch": 1.015877263402016, "grad_norm": 0.5515667200088501, "learning_rate": 0.000243695063315182, "loss": 0.7739, "step": 5695 }, { "epoch": 1.016055659620016, "grad_norm": 0.5279601812362671, "learning_rate": 0.0002436250235678794, "loss": 0.7465, "step": 5696 }, { "epoch": 1.0162340558380163, "grad_norm": 0.4827010929584503, "learning_rate": 0.00024355498432126468, "loss": 0.7674, "step": 5697 }, { "epoch": 1.0164124520560165, "grad_norm": 0.5047492384910583, "learning_rate": 0.00024348494558083873, "loss": 0.7607, "step": 5698 }, { "epoch": 1.0165908482740167, "grad_norm": 0.4985560476779938, "learning_rate": 0.00024341490735210237, "loss": 0.8236, "step": 5699 }, { "epoch": 1.0167692444920167, "grad_norm": 0.48609042167663574, "learning_rate": 0.00024334486964055634, "loss": 0.7264, "step": 5700 }, { "epoch": 1.0169476407100169, "grad_norm": 0.4970250129699707, "learning_rate": 0.00024327483245170138, "loss": 0.6943, "step": 5701 }, { "epoch": 1.017126036928017, "grad_norm": 0.5171718001365662, "learning_rate": 0.00024320479579103825, "loss": 0.6666, "step": 5702 }, { "epoch": 1.0173044331460173, "grad_norm": 0.48517653346061707, "learning_rate": 0.00024313475966406746, "loss": 0.7518, "step": 5703 }, { "epoch": 1.0174828293640175, "grad_norm": 0.5177552700042725, "learning_rate": 0.0002430647240762897, "loss": 0.6988, "step": 5704 }, { "epoch": 1.0176612255820177, "grad_norm": 0.4849720597267151, "learning_rate": 0.00024299468903320542, "loss": 0.6417, "step": 5705 }, { "epoch": 1.0178396218000179, "grad_norm": 0.5243503451347351, "learning_rate": 0.00024292465454031536, "loss": 0.7603, "step": 5706 }, { "epoch": 1.018018018018018, "grad_norm": 0.48335912823677063, "learning_rate": 0.00024285462060311995, "loss": 0.8571, "step": 5707 }, { "epoch": 1.0181964142360183, "grad_norm": 0.48116248846054077, "learning_rate": 0.00024278458722711948, "loss": 0.717, "step": 5708 }, { "epoch": 1.0183748104540185, "grad_norm": 0.5435453653335571, "learning_rate": 0.0002427145544178145, "loss": 0.7622, "step": 5709 }, { "epoch": 1.0185532066720187, "grad_norm": 0.5478907227516174, "learning_rate": 0.0002426445221807052, "loss": 0.6225, "step": 5710 }, { "epoch": 1.0187316028900186, "grad_norm": 0.44758474826812744, "learning_rate": 0.000242574490521292, "loss": 0.7621, "step": 5711 }, { "epoch": 1.0189099991080188, "grad_norm": 0.4896325170993805, "learning_rate": 0.0002425044594450751, "loss": 0.803, "step": 5712 }, { "epoch": 1.019088395326019, "grad_norm": 0.6267058253288269, "learning_rate": 0.00024243442895755476, "loss": 0.782, "step": 5713 }, { "epoch": 1.0192667915440192, "grad_norm": 0.49481847882270813, "learning_rate": 0.00024236439906423105, "loss": 0.7271, "step": 5714 }, { "epoch": 1.0194451877620194, "grad_norm": 0.42377015948295593, "learning_rate": 0.00024229436977060427, "loss": 0.529, "step": 5715 }, { "epoch": 1.0196235839800196, "grad_norm": 0.4759003520011902, "learning_rate": 0.00024222434108217428, "loss": 0.7413, "step": 5716 }, { "epoch": 1.0198019801980198, "grad_norm": 0.41128072142601013, "learning_rate": 0.0002421543130044412, "loss": 0.5624, "step": 5717 }, { "epoch": 1.01998037641602, "grad_norm": 0.5285554528236389, "learning_rate": 0.00024208428554290502, "loss": 0.71, "step": 5718 }, { "epoch": 1.0201587726340202, "grad_norm": 0.4953111410140991, "learning_rate": 0.00024201425870306565, "loss": 0.6101, "step": 5719 }, { "epoch": 1.0203371688520204, "grad_norm": 0.5717734098434448, "learning_rate": 0.0002419442324904229, "loss": 0.8363, "step": 5720 }, { "epoch": 1.0205155650700206, "grad_norm": 0.49335336685180664, "learning_rate": 0.0002418742069104767, "loss": 0.5884, "step": 5721 }, { "epoch": 1.0206939612880206, "grad_norm": 0.7236892580986023, "learning_rate": 0.0002418041819687268, "loss": 0.7232, "step": 5722 }, { "epoch": 1.0208723575060208, "grad_norm": 0.5189496278762817, "learning_rate": 0.00024173415767067295, "loss": 0.7079, "step": 5723 }, { "epoch": 1.021050753724021, "grad_norm": 0.5228090882301331, "learning_rate": 0.00024166413402181477, "loss": 0.7657, "step": 5724 }, { "epoch": 1.0212291499420212, "grad_norm": 0.4883708953857422, "learning_rate": 0.00024159411102765195, "loss": 0.6471, "step": 5725 }, { "epoch": 1.0214075461600214, "grad_norm": 0.5337876677513123, "learning_rate": 0.00024152408869368398, "loss": 0.8151, "step": 5726 }, { "epoch": 1.0215859423780216, "grad_norm": 0.6350103616714478, "learning_rate": 0.00024145406702541047, "loss": 0.694, "step": 5727 }, { "epoch": 1.0217643385960218, "grad_norm": 0.49717575311660767, "learning_rate": 0.00024138404602833092, "loss": 0.6491, "step": 5728 }, { "epoch": 1.021942734814022, "grad_norm": 0.4632187783718109, "learning_rate": 0.0002413140257079446, "loss": 0.6376, "step": 5729 }, { "epoch": 1.0221211310320222, "grad_norm": 0.7505040764808655, "learning_rate": 0.000241244006069751, "loss": 0.7011, "step": 5730 }, { "epoch": 1.0222995272500224, "grad_norm": 0.4758153557777405, "learning_rate": 0.00024117398711924937, "loss": 0.6989, "step": 5731 }, { "epoch": 1.0224779234680226, "grad_norm": 0.43656665086746216, "learning_rate": 0.000241103968861939, "loss": 0.5964, "step": 5732 }, { "epoch": 1.0226563196860226, "grad_norm": 0.5343319177627563, "learning_rate": 0.00024103395130331909, "loss": 0.855, "step": 5733 }, { "epoch": 1.0228347159040228, "grad_norm": 0.6632677912712097, "learning_rate": 0.00024096393444888878, "loss": 0.5942, "step": 5734 }, { "epoch": 1.023013112122023, "grad_norm": 0.40866273641586304, "learning_rate": 0.0002408939183041472, "loss": 0.5326, "step": 5735 }, { "epoch": 1.0231915083400231, "grad_norm": 0.43526768684387207, "learning_rate": 0.00024082390287459327, "loss": 0.6601, "step": 5736 }, { "epoch": 1.0233699045580233, "grad_norm": 0.5318346619606018, "learning_rate": 0.00024075388816572612, "loss": 0.709, "step": 5737 }, { "epoch": 1.0235483007760235, "grad_norm": 0.5785126090049744, "learning_rate": 0.0002406838741830446, "loss": 0.6006, "step": 5738 }, { "epoch": 1.0237266969940237, "grad_norm": 0.4882996380329132, "learning_rate": 0.0002406138609320476, "loss": 0.7155, "step": 5739 }, { "epoch": 1.023905093212024, "grad_norm": 0.5886842012405396, "learning_rate": 0.00024054384841823396, "loss": 0.7278, "step": 5740 }, { "epoch": 1.0240834894300241, "grad_norm": 0.5136778354644775, "learning_rate": 0.00024047383664710243, "loss": 0.7661, "step": 5741 }, { "epoch": 1.0242618856480243, "grad_norm": 0.563352644443512, "learning_rate": 0.0002404038256241516, "loss": 0.9258, "step": 5742 }, { "epoch": 1.0244402818660245, "grad_norm": 0.4579552710056305, "learning_rate": 0.0002403338153548802, "loss": 0.5869, "step": 5743 }, { "epoch": 1.0246186780840245, "grad_norm": 0.5219502449035645, "learning_rate": 0.00024026380584478676, "loss": 0.7318, "step": 5744 }, { "epoch": 1.0247970743020247, "grad_norm": 0.4938696622848511, "learning_rate": 0.00024019379709936984, "loss": 0.8765, "step": 5745 }, { "epoch": 1.024975470520025, "grad_norm": 0.6994608640670776, "learning_rate": 0.00024012378912412785, "loss": 0.7989, "step": 5746 }, { "epoch": 1.025153866738025, "grad_norm": 0.4768621027469635, "learning_rate": 0.00024005378192455923, "loss": 0.6248, "step": 5747 }, { "epoch": 1.0253322629560253, "grad_norm": 1.2631269693374634, "learning_rate": 0.00023998377550616228, "loss": 0.6479, "step": 5748 }, { "epoch": 1.0255106591740255, "grad_norm": 0.48576176166534424, "learning_rate": 0.00023991376987443535, "loss": 0.7885, "step": 5749 }, { "epoch": 1.0256890553920257, "grad_norm": 0.5313836336135864, "learning_rate": 0.00023984376503487657, "loss": 0.8569, "step": 5750 }, { "epoch": 1.025867451610026, "grad_norm": 1.306301236152649, "learning_rate": 0.00023977376099298397, "loss": 0.7603, "step": 5751 }, { "epoch": 1.026045847828026, "grad_norm": 0.6186336278915405, "learning_rate": 0.0002397037577542559, "loss": 0.8546, "step": 5752 }, { "epoch": 1.0262242440460263, "grad_norm": 0.4687788784503937, "learning_rate": 0.00023963375532419032, "loss": 0.7342, "step": 5753 }, { "epoch": 1.0264026402640265, "grad_norm": 0.5840802192687988, "learning_rate": 0.00023956375370828508, "loss": 0.6639, "step": 5754 }, { "epoch": 1.0265810364820265, "grad_norm": 0.5559926629066467, "learning_rate": 0.00023949375291203815, "loss": 0.8283, "step": 5755 }, { "epoch": 1.0267594327000267, "grad_norm": 0.47672855854034424, "learning_rate": 0.0002394237529409473, "loss": 0.7104, "step": 5756 }, { "epoch": 1.0269378289180269, "grad_norm": 0.49324122071266174, "learning_rate": 0.00023935375380051038, "loss": 0.7398, "step": 5757 }, { "epoch": 1.027116225136027, "grad_norm": 0.547975480556488, "learning_rate": 0.0002392837554962251, "loss": 0.8606, "step": 5758 }, { "epoch": 1.0272946213540273, "grad_norm": 0.48279041051864624, "learning_rate": 0.00023921375803358897, "loss": 0.7028, "step": 5759 }, { "epoch": 1.0274730175720275, "grad_norm": 0.5213961601257324, "learning_rate": 0.00023914376141809967, "loss": 0.6459, "step": 5760 }, { "epoch": 1.0276514137900277, "grad_norm": 0.46963804960250854, "learning_rate": 0.00023907376565525469, "loss": 0.6467, "step": 5761 }, { "epoch": 1.0278298100080279, "grad_norm": 0.4742526113986969, "learning_rate": 0.0002390037707505515, "loss": 0.6972, "step": 5762 }, { "epoch": 1.028008206226028, "grad_norm": 0.6087868213653564, "learning_rate": 0.00023893377670948735, "loss": 0.6751, "step": 5763 }, { "epoch": 1.0281866024440283, "grad_norm": 0.5412265062332153, "learning_rate": 0.00023886378353755964, "loss": 0.7496, "step": 5764 }, { "epoch": 1.0283649986620285, "grad_norm": 0.6296983957290649, "learning_rate": 0.00023879379124026556, "loss": 0.7903, "step": 5765 }, { "epoch": 1.0285433948800284, "grad_norm": 0.5297571420669556, "learning_rate": 0.00023872379982310224, "loss": 0.7143, "step": 5766 }, { "epoch": 1.0287217910980286, "grad_norm": 0.4944717586040497, "learning_rate": 0.00023865380929156691, "loss": 0.7, "step": 5767 }, { "epoch": 1.0289001873160288, "grad_norm": 0.44193384051322937, "learning_rate": 0.0002385838196511565, "loss": 0.6133, "step": 5768 }, { "epoch": 1.029078583534029, "grad_norm": 2.3271965980529785, "learning_rate": 0.000238513830907368, "loss": 0.712, "step": 5769 }, { "epoch": 1.0292569797520292, "grad_norm": 0.45124170184135437, "learning_rate": 0.00023844384306569825, "loss": 0.6838, "step": 5770 }, { "epoch": 1.0294353759700294, "grad_norm": 0.488295316696167, "learning_rate": 0.0002383738561316441, "loss": 0.6474, "step": 5771 }, { "epoch": 1.0296137721880296, "grad_norm": 0.4569397270679474, "learning_rate": 0.00023830387011070225, "loss": 0.7513, "step": 5772 }, { "epoch": 1.0297921684060298, "grad_norm": 0.5301244258880615, "learning_rate": 0.00023823388500836945, "loss": 0.7864, "step": 5773 }, { "epoch": 1.02997056462403, "grad_norm": 0.4296034872531891, "learning_rate": 0.00023816390083014234, "loss": 0.5479, "step": 5774 }, { "epoch": 1.0301489608420302, "grad_norm": 0.49546340107917786, "learning_rate": 0.00023809391758151726, "loss": 0.6903, "step": 5775 }, { "epoch": 1.0303273570600304, "grad_norm": 0.47323548793792725, "learning_rate": 0.0002380239352679908, "loss": 0.6874, "step": 5776 }, { "epoch": 1.0305057532780304, "grad_norm": 0.45178884267807007, "learning_rate": 0.00023795395389505927, "loss": 0.8751, "step": 5777 }, { "epoch": 1.0306841494960306, "grad_norm": 0.5081565380096436, "learning_rate": 0.00023788397346821905, "loss": 0.7723, "step": 5778 }, { "epoch": 1.0308625457140308, "grad_norm": 0.4901391565799713, "learning_rate": 0.00023781399399296635, "loss": 0.628, "step": 5779 }, { "epoch": 1.031040941932031, "grad_norm": 0.4235868752002716, "learning_rate": 0.0002377440154747973, "loss": 0.5508, "step": 5780 }, { "epoch": 1.0312193381500312, "grad_norm": 0.45451483130455017, "learning_rate": 0.000237674037919208, "loss": 0.6556, "step": 5781 }, { "epoch": 1.0313977343680314, "grad_norm": 0.5195580720901489, "learning_rate": 0.0002376040613316944, "loss": 0.9728, "step": 5782 }, { "epoch": 1.0315761305860316, "grad_norm": 0.49326059222221375, "learning_rate": 0.00023753408571775255, "loss": 0.6793, "step": 5783 }, { "epoch": 1.0317545268040318, "grad_norm": 0.4820156395435333, "learning_rate": 0.00023746411108287825, "loss": 0.562, "step": 5784 }, { "epoch": 1.031932923022032, "grad_norm": 0.48524320125579834, "learning_rate": 0.00023739413743256726, "loss": 0.6854, "step": 5785 }, { "epoch": 1.0321113192400322, "grad_norm": 0.49960482120513916, "learning_rate": 0.0002373241647723153, "loss": 0.8011, "step": 5786 }, { "epoch": 1.0322897154580324, "grad_norm": 0.5163941979408264, "learning_rate": 0.00023725419310761803, "loss": 0.7166, "step": 5787 }, { "epoch": 1.0324681116760324, "grad_norm": 0.507465660572052, "learning_rate": 0.0002371842224439709, "loss": 0.8175, "step": 5788 }, { "epoch": 1.0326465078940326, "grad_norm": 0.47466954588890076, "learning_rate": 0.00023711425278686945, "loss": 0.6952, "step": 5789 }, { "epoch": 1.0328249041120328, "grad_norm": 0.4188692569732666, "learning_rate": 0.000237044284141809, "loss": 0.5577, "step": 5790 }, { "epoch": 1.033003300330033, "grad_norm": 0.5226710438728333, "learning_rate": 0.0002369743165142849, "loss": 0.8422, "step": 5791 }, { "epoch": 1.0331816965480332, "grad_norm": 0.49022915959358215, "learning_rate": 0.00023690434990979238, "loss": 0.8066, "step": 5792 }, { "epoch": 1.0333600927660334, "grad_norm": 0.49656999111175537, "learning_rate": 0.0002368343843338266, "loss": 0.7251, "step": 5793 }, { "epoch": 1.0335384889840336, "grad_norm": 0.40922585129737854, "learning_rate": 0.00023676441979188258, "loss": 0.5209, "step": 5794 }, { "epoch": 1.0337168852020338, "grad_norm": 0.46607285737991333, "learning_rate": 0.0002366944562894554, "loss": 0.6608, "step": 5795 }, { "epoch": 1.033895281420034, "grad_norm": 0.43443769216537476, "learning_rate": 0.00023662449383203988, "loss": 0.6759, "step": 5796 }, { "epoch": 1.0340736776380341, "grad_norm": 0.5248075127601624, "learning_rate": 0.0002365545324251307, "loss": 0.8347, "step": 5797 }, { "epoch": 1.0342520738560343, "grad_norm": 0.5106051564216614, "learning_rate": 0.0002364845720742229, "loss": 0.8222, "step": 5798 }, { "epoch": 1.0344304700740343, "grad_norm": 0.4647291600704193, "learning_rate": 0.00023641461278481096, "loss": 0.5955, "step": 5799 }, { "epoch": 1.0346088662920345, "grad_norm": 0.3780640661716461, "learning_rate": 0.00023634465456238957, "loss": 0.4401, "step": 5800 }, { "epoch": 1.0347872625100347, "grad_norm": 0.4369570016860962, "learning_rate": 0.00023627469741245306, "loss": 0.5725, "step": 5801 }, { "epoch": 1.034965658728035, "grad_norm": 0.4812033772468567, "learning_rate": 0.0002362047413404959, "loss": 0.7532, "step": 5802 }, { "epoch": 1.0351440549460351, "grad_norm": 0.49813112616539, "learning_rate": 0.00023613478635201246, "loss": 0.7331, "step": 5803 }, { "epoch": 1.0353224511640353, "grad_norm": 0.45200127363204956, "learning_rate": 0.0002360648324524969, "loss": 0.6147, "step": 5804 }, { "epoch": 1.0355008473820355, "grad_norm": 0.5208128690719604, "learning_rate": 0.0002359948796474434, "loss": 0.6966, "step": 5805 }, { "epoch": 1.0356792436000357, "grad_norm": 0.49845966696739197, "learning_rate": 0.00023592492794234605, "loss": 0.7405, "step": 5806 }, { "epoch": 1.035857639818036, "grad_norm": 0.4411349594593048, "learning_rate": 0.0002358549773426988, "loss": 0.6018, "step": 5807 }, { "epoch": 1.0360360360360361, "grad_norm": 0.5190544128417969, "learning_rate": 0.00023578502785399558, "loss": 0.9016, "step": 5808 }, { "epoch": 1.0362144322540363, "grad_norm": 0.45174866914749146, "learning_rate": 0.0002357150794817301, "loss": 0.5387, "step": 5809 }, { "epoch": 1.0363928284720363, "grad_norm": 0.56296706199646, "learning_rate": 0.00023564513223139615, "loss": 0.6236, "step": 5810 }, { "epoch": 1.0365712246900365, "grad_norm": 0.4660092294216156, "learning_rate": 0.0002355751861084873, "loss": 0.6932, "step": 5811 }, { "epoch": 1.0367496209080367, "grad_norm": 0.4947931170463562, "learning_rate": 0.00023550524111849705, "loss": 0.6975, "step": 5812 }, { "epoch": 1.0369280171260369, "grad_norm": 0.49009427428245544, "learning_rate": 0.0002354352972669191, "loss": 0.6677, "step": 5813 }, { "epoch": 1.037106413344037, "grad_norm": 0.5009847283363342, "learning_rate": 0.00023536535455924654, "loss": 0.7397, "step": 5814 }, { "epoch": 1.0372848095620373, "grad_norm": 0.5054614543914795, "learning_rate": 0.00023529541300097275, "loss": 0.7261, "step": 5815 }, { "epoch": 1.0374632057800375, "grad_norm": 0.46940669417381287, "learning_rate": 0.0002352254725975909, "loss": 0.7628, "step": 5816 }, { "epoch": 1.0376416019980377, "grad_norm": 0.4963463842868805, "learning_rate": 0.00023515553335459407, "loss": 0.9402, "step": 5817 }, { "epoch": 1.0378199982160379, "grad_norm": 0.46906179189682007, "learning_rate": 0.00023508559527747527, "loss": 0.6101, "step": 5818 }, { "epoch": 1.037998394434038, "grad_norm": 0.5523307919502258, "learning_rate": 0.0002350156583717274, "loss": 0.6492, "step": 5819 }, { "epoch": 1.0381767906520383, "grad_norm": 0.5532516837120056, "learning_rate": 0.00023494572264284326, "loss": 0.82, "step": 5820 }, { "epoch": 1.0383551868700382, "grad_norm": 0.5024081468582153, "learning_rate": 0.00023487578809631567, "loss": 0.725, "step": 5821 }, { "epoch": 1.0385335830880384, "grad_norm": 0.4690212905406952, "learning_rate": 0.00023480585473763707, "loss": 0.7607, "step": 5822 }, { "epoch": 1.0387119793060386, "grad_norm": 0.39463046193122864, "learning_rate": 0.00023473592257230015, "loss": 0.5608, "step": 5823 }, { "epoch": 1.0388903755240388, "grad_norm": 0.44406580924987793, "learning_rate": 0.00023466599160579726, "loss": 0.6521, "step": 5824 }, { "epoch": 1.039068771742039, "grad_norm": 0.46876436471939087, "learning_rate": 0.0002345960618436208, "loss": 0.643, "step": 5825 }, { "epoch": 1.0392471679600392, "grad_norm": 0.5072786211967468, "learning_rate": 0.00023452613329126305, "loss": 0.91, "step": 5826 }, { "epoch": 1.0394255641780394, "grad_norm": 0.45098283886909485, "learning_rate": 0.0002344562059542161, "loss": 0.6326, "step": 5827 }, { "epoch": 1.0396039603960396, "grad_norm": 0.507174551486969, "learning_rate": 0.000234386279837972, "loss": 0.8313, "step": 5828 }, { "epoch": 1.0397823566140398, "grad_norm": 0.44841310381889343, "learning_rate": 0.0002343163549480228, "loss": 0.6486, "step": 5829 }, { "epoch": 1.03996075283204, "grad_norm": 0.45851168036460876, "learning_rate": 0.00023424643128986037, "loss": 0.6491, "step": 5830 }, { "epoch": 1.0401391490500402, "grad_norm": 0.4500889778137207, "learning_rate": 0.0002341765088689764, "loss": 0.6452, "step": 5831 }, { "epoch": 1.0403175452680402, "grad_norm": 0.4633578658103943, "learning_rate": 0.00023410658769086265, "loss": 0.6371, "step": 5832 }, { "epoch": 1.0404959414860404, "grad_norm": 0.46451932191848755, "learning_rate": 0.0002340366677610107, "loss": 0.6226, "step": 5833 }, { "epoch": 1.0406743377040406, "grad_norm": 0.5377410054206848, "learning_rate": 0.00023396674908491194, "loss": 0.8014, "step": 5834 }, { "epoch": 1.0408527339220408, "grad_norm": 0.46611928939819336, "learning_rate": 0.00023389683166805784, "loss": 0.7689, "step": 5835 }, { "epoch": 1.041031130140041, "grad_norm": 0.43398740887641907, "learning_rate": 0.00023382691551593964, "loss": 0.5971, "step": 5836 }, { "epoch": 1.0412095263580412, "grad_norm": 0.46025121212005615, "learning_rate": 0.0002337570006340485, "loss": 0.548, "step": 5837 }, { "epoch": 1.0413879225760414, "grad_norm": 0.5142043828964233, "learning_rate": 0.00023368708702787555, "loss": 0.7064, "step": 5838 }, { "epoch": 1.0415663187940416, "grad_norm": 0.4556623697280884, "learning_rate": 0.00023361717470291176, "loss": 0.7167, "step": 5839 }, { "epoch": 1.0417447150120418, "grad_norm": 0.46669575572013855, "learning_rate": 0.00023354726366464808, "loss": 0.6693, "step": 5840 }, { "epoch": 1.041923111230042, "grad_norm": 0.5870226621627808, "learning_rate": 0.00023347735391857517, "loss": 0.81, "step": 5841 }, { "epoch": 1.0421015074480422, "grad_norm": 0.5324650406837463, "learning_rate": 0.00023340744547018384, "loss": 0.6455, "step": 5842 }, { "epoch": 1.0422799036660422, "grad_norm": 0.4658534824848175, "learning_rate": 0.00023333753832496443, "loss": 0.716, "step": 5843 }, { "epoch": 1.0424582998840424, "grad_norm": 0.6966456174850464, "learning_rate": 0.0002332676324884077, "loss": 0.4844, "step": 5844 }, { "epoch": 1.0426366961020426, "grad_norm": 0.440407395362854, "learning_rate": 0.00023319772796600395, "loss": 0.5498, "step": 5845 }, { "epoch": 1.0428150923200428, "grad_norm": 0.5414045453071594, "learning_rate": 0.00023312782476324345, "loss": 0.8414, "step": 5846 }, { "epoch": 1.042993488538043, "grad_norm": 0.4795417785644531, "learning_rate": 0.0002330579228856163, "loss": 0.7409, "step": 5847 }, { "epoch": 1.0431718847560432, "grad_norm": 0.47270727157592773, "learning_rate": 0.00023298802233861254, "loss": 0.5839, "step": 5848 }, { "epoch": 1.0433502809740434, "grad_norm": 0.45291322469711304, "learning_rate": 0.0002329181231277222, "loss": 0.6999, "step": 5849 }, { "epoch": 1.0435286771920436, "grad_norm": 0.49870064854621887, "learning_rate": 0.00023284822525843513, "loss": 0.6977, "step": 5850 }, { "epoch": 1.0437070734100438, "grad_norm": 0.500630259513855, "learning_rate": 0.00023277832873624108, "loss": 0.8385, "step": 5851 }, { "epoch": 1.043885469628044, "grad_norm": 0.4953528642654419, "learning_rate": 0.00023270843356662968, "loss": 0.7302, "step": 5852 }, { "epoch": 1.0440638658460442, "grad_norm": 0.4294586181640625, "learning_rate": 0.00023263853975509044, "loss": 0.529, "step": 5853 }, { "epoch": 1.0442422620640441, "grad_norm": 0.5091565847396851, "learning_rate": 0.00023256864730711289, "loss": 0.7218, "step": 5854 }, { "epoch": 1.0444206582820443, "grad_norm": 0.4561258554458618, "learning_rate": 0.00023249875622818623, "loss": 0.6372, "step": 5855 }, { "epoch": 1.0445990545000445, "grad_norm": 0.46639683842658997, "learning_rate": 0.00023242886652379973, "loss": 0.6476, "step": 5856 }, { "epoch": 1.0447774507180447, "grad_norm": 0.4989601671695709, "learning_rate": 0.00023235897819944245, "loss": 0.7827, "step": 5857 }, { "epoch": 1.044955846936045, "grad_norm": 0.4990067780017853, "learning_rate": 0.00023228909126060335, "loss": 0.7969, "step": 5858 }, { "epoch": 1.0451342431540451, "grad_norm": 0.4401039183139801, "learning_rate": 0.00023221920571277159, "loss": 0.6276, "step": 5859 }, { "epoch": 1.0453126393720453, "grad_norm": 0.496707558631897, "learning_rate": 0.00023214932156143564, "loss": 0.7849, "step": 5860 }, { "epoch": 1.0454910355900455, "grad_norm": 0.5055187940597534, "learning_rate": 0.00023207943881208435, "loss": 0.75, "step": 5861 }, { "epoch": 1.0456694318080457, "grad_norm": 0.4924319088459015, "learning_rate": 0.0002320095574702062, "loss": 0.741, "step": 5862 }, { "epoch": 1.045847828026046, "grad_norm": 0.4404760003089905, "learning_rate": 0.0002319396775412897, "loss": 0.5559, "step": 5863 }, { "epoch": 1.0460262242440461, "grad_norm": 0.4808800518512726, "learning_rate": 0.0002318697990308231, "loss": 0.776, "step": 5864 }, { "epoch": 1.046204620462046, "grad_norm": 0.4353162348270416, "learning_rate": 0.00023179992194429473, "loss": 0.534, "step": 5865 }, { "epoch": 1.0463830166800463, "grad_norm": 0.489243745803833, "learning_rate": 0.00023173004628719262, "loss": 0.72, "step": 5866 }, { "epoch": 1.0465614128980465, "grad_norm": 0.5229580998420715, "learning_rate": 0.0002316601720650049, "loss": 0.8733, "step": 5867 }, { "epoch": 1.0467398091160467, "grad_norm": 0.4031560719013214, "learning_rate": 0.0002315902992832193, "loss": 0.4158, "step": 5868 }, { "epoch": 1.046918205334047, "grad_norm": 0.5013259053230286, "learning_rate": 0.00023152042794732366, "loss": 0.6999, "step": 5869 }, { "epoch": 1.047096601552047, "grad_norm": 0.4599556624889374, "learning_rate": 0.00023145055806280567, "loss": 0.6991, "step": 5870 }, { "epoch": 1.0472749977700473, "grad_norm": 0.4679622948169708, "learning_rate": 0.00023138068963515288, "loss": 0.9515, "step": 5871 }, { "epoch": 1.0474533939880475, "grad_norm": 0.4390574097633362, "learning_rate": 0.0002313108226698527, "loss": 0.5369, "step": 5872 }, { "epoch": 1.0476317902060477, "grad_norm": 0.42867380380630493, "learning_rate": 0.00023124095717239241, "loss": 0.5489, "step": 5873 }, { "epoch": 1.0478101864240479, "grad_norm": 0.43418964743614197, "learning_rate": 0.00023117109314825933, "loss": 0.7322, "step": 5874 }, { "epoch": 1.047988582642048, "grad_norm": 0.4745190441608429, "learning_rate": 0.00023110123060294047, "loss": 0.682, "step": 5875 }, { "epoch": 1.048166978860048, "grad_norm": 0.4581608176231384, "learning_rate": 0.00023103136954192286, "loss": 0.6654, "step": 5876 }, { "epoch": 1.0483453750780483, "grad_norm": 0.46257057785987854, "learning_rate": 0.0002309615099706933, "loss": 0.6197, "step": 5877 }, { "epoch": 1.0485237712960485, "grad_norm": 0.528059184551239, "learning_rate": 0.00023089165189473857, "loss": 0.7513, "step": 5878 }, { "epoch": 1.0487021675140487, "grad_norm": 0.44776850938796997, "learning_rate": 0.00023082179531954525, "loss": 0.6703, "step": 5879 }, { "epoch": 1.0488805637320489, "grad_norm": 0.46082332730293274, "learning_rate": 0.00023075194025059994, "loss": 0.5771, "step": 5880 }, { "epoch": 1.049058959950049, "grad_norm": 0.49523085355758667, "learning_rate": 0.00023068208669338894, "loss": 0.5816, "step": 5881 }, { "epoch": 1.0492373561680493, "grad_norm": 0.6181836128234863, "learning_rate": 0.0002306122346533985, "loss": 0.5963, "step": 5882 }, { "epoch": 1.0494157523860494, "grad_norm": 0.8168792128562927, "learning_rate": 0.00023054238413611482, "loss": 0.609, "step": 5883 }, { "epoch": 1.0495941486040496, "grad_norm": 0.5016876459121704, "learning_rate": 0.0002304725351470239, "loss": 0.8368, "step": 5884 }, { "epoch": 1.0497725448220498, "grad_norm": 0.4856698215007782, "learning_rate": 0.00023040268769161168, "loss": 0.5799, "step": 5885 }, { "epoch": 1.04995094104005, "grad_norm": 0.8010654449462891, "learning_rate": 0.00023033284177536396, "loss": 0.7327, "step": 5886 }, { "epoch": 1.05012933725805, "grad_norm": 0.48454806208610535, "learning_rate": 0.00023026299740376633, "loss": 0.7485, "step": 5887 }, { "epoch": 1.0503077334760502, "grad_norm": 0.47134482860565186, "learning_rate": 0.00023019315458230449, "loss": 0.7593, "step": 5888 }, { "epoch": 1.0504861296940504, "grad_norm": 0.5328679084777832, "learning_rate": 0.00023012331331646359, "loss": 0.6926, "step": 5889 }, { "epoch": 1.0506645259120506, "grad_norm": 0.46135085821151733, "learning_rate": 0.0002300534736117292, "loss": 0.6585, "step": 5890 }, { "epoch": 1.0508429221300508, "grad_norm": 0.7322617173194885, "learning_rate": 0.00022998363547358641, "loss": 0.7974, "step": 5891 }, { "epoch": 1.051021318348051, "grad_norm": 0.46016210317611694, "learning_rate": 0.0002299137989075203, "loss": 0.7362, "step": 5892 }, { "epoch": 1.0511997145660512, "grad_norm": 0.4320010244846344, "learning_rate": 0.00022984396391901582, "loss": 0.5613, "step": 5893 }, { "epoch": 1.0513781107840514, "grad_norm": 0.45406126976013184, "learning_rate": 0.0002297741305135577, "loss": 0.5832, "step": 5894 }, { "epoch": 1.0515565070020516, "grad_norm": 0.5657575130462646, "learning_rate": 0.00022970429869663064, "loss": 0.9168, "step": 5895 }, { "epoch": 1.0517349032200518, "grad_norm": 0.47483891248703003, "learning_rate": 0.00022963446847371925, "loss": 0.5917, "step": 5896 }, { "epoch": 1.051913299438052, "grad_norm": 0.4458398222923279, "learning_rate": 0.00022956463985030794, "loss": 0.5259, "step": 5897 }, { "epoch": 1.0520916956560522, "grad_norm": 0.4733836054801941, "learning_rate": 0.000229494812831881, "loss": 0.6835, "step": 5898 }, { "epoch": 1.0522700918740522, "grad_norm": 0.49233517050743103, "learning_rate": 0.00022942498742392265, "loss": 0.8004, "step": 5899 }, { "epoch": 1.0524484880920524, "grad_norm": 0.7832338809967041, "learning_rate": 0.00022935516363191695, "loss": 0.6962, "step": 5900 }, { "epoch": 1.0526268843100526, "grad_norm": 0.7512785792350769, "learning_rate": 0.00022928534146134783, "loss": 0.7948, "step": 5901 }, { "epoch": 1.0528052805280528, "grad_norm": 0.6257414221763611, "learning_rate": 0.00022921552091769907, "loss": 0.5168, "step": 5902 }, { "epoch": 1.052983676746053, "grad_norm": 0.5249746441841125, "learning_rate": 0.0002291457020064543, "loss": 0.9227, "step": 5903 }, { "epoch": 1.0531620729640532, "grad_norm": 0.4546600878238678, "learning_rate": 0.00022907588473309703, "loss": 0.5357, "step": 5904 }, { "epoch": 1.0533404691820534, "grad_norm": 0.5810390114784241, "learning_rate": 0.00022900606910311098, "loss": 0.6856, "step": 5905 }, { "epoch": 1.0535188654000536, "grad_norm": 0.45941051840782166, "learning_rate": 0.00022893625512197913, "loss": 0.6229, "step": 5906 }, { "epoch": 1.0536972616180538, "grad_norm": 0.5021553039550781, "learning_rate": 0.00022886644279518473, "loss": 0.9207, "step": 5907 }, { "epoch": 1.053875657836054, "grad_norm": 0.48117703199386597, "learning_rate": 0.00022879663212821083, "loss": 0.648, "step": 5908 }, { "epoch": 1.054054054054054, "grad_norm": 0.5499297976493835, "learning_rate": 0.00022872682312654032, "loss": 0.7953, "step": 5909 }, { "epoch": 1.0542324502720541, "grad_norm": 0.4166644513607025, "learning_rate": 0.000228657015795656, "loss": 0.506, "step": 5910 }, { "epoch": 1.0544108464900543, "grad_norm": 0.5143164992332458, "learning_rate": 0.00022858721014104043, "loss": 0.7587, "step": 5911 }, { "epoch": 1.0545892427080545, "grad_norm": 0.5195590257644653, "learning_rate": 0.00022851740616817615, "loss": 0.8337, "step": 5912 }, { "epoch": 1.0547676389260547, "grad_norm": 0.5148821473121643, "learning_rate": 0.00022844760388254556, "loss": 0.9393, "step": 5913 }, { "epoch": 1.054946035144055, "grad_norm": 0.5613206028938293, "learning_rate": 0.00022837780328963095, "loss": 1.0002, "step": 5914 }, { "epoch": 1.0551244313620551, "grad_norm": 0.4363964796066284, "learning_rate": 0.00022830800439491435, "loss": 0.6343, "step": 5915 }, { "epoch": 1.0553028275800553, "grad_norm": 0.46682143211364746, "learning_rate": 0.00022823820720387766, "loss": 0.6258, "step": 5916 }, { "epoch": 1.0554812237980555, "grad_norm": 0.45022645592689514, "learning_rate": 0.00022816841172200287, "loss": 0.7482, "step": 5917 }, { "epoch": 1.0556596200160557, "grad_norm": 0.5296302437782288, "learning_rate": 0.00022809861795477162, "loss": 0.7556, "step": 5918 }, { "epoch": 1.055838016234056, "grad_norm": 0.48178842663764954, "learning_rate": 0.00022802882590766544, "loss": 0.8764, "step": 5919 }, { "epoch": 1.0560164124520561, "grad_norm": 0.5427073240280151, "learning_rate": 0.00022795903558616587, "loss": 0.9599, "step": 5920 }, { "epoch": 1.056194808670056, "grad_norm": 0.5280442833900452, "learning_rate": 0.00022788924699575417, "loss": 0.6732, "step": 5921 }, { "epoch": 1.0563732048880563, "grad_norm": 0.5916957259178162, "learning_rate": 0.00022781946014191145, "loss": 0.8213, "step": 5922 }, { "epoch": 1.0565516011060565, "grad_norm": 0.566719651222229, "learning_rate": 0.00022774967503011884, "loss": 0.7634, "step": 5923 }, { "epoch": 1.0567299973240567, "grad_norm": 0.5337679982185364, "learning_rate": 0.00022767989166585717, "loss": 0.8209, "step": 5924 }, { "epoch": 1.056908393542057, "grad_norm": 1.3235185146331787, "learning_rate": 0.0002276101100546072, "loss": 0.706, "step": 5925 }, { "epoch": 1.057086789760057, "grad_norm": 0.4909132719039917, "learning_rate": 0.0002275403302018496, "loss": 0.6735, "step": 5926 }, { "epoch": 1.0572651859780573, "grad_norm": 0.4729708135128021, "learning_rate": 0.00022747055211306473, "loss": 0.6506, "step": 5927 }, { "epoch": 1.0574435821960575, "grad_norm": 0.48028764128685, "learning_rate": 0.000227400775793733, "loss": 0.577, "step": 5928 }, { "epoch": 1.0576219784140577, "grad_norm": 0.4471667408943176, "learning_rate": 0.00022733100124933464, "loss": 0.5678, "step": 5929 }, { "epoch": 1.057800374632058, "grad_norm": 0.4753149449825287, "learning_rate": 0.00022726122848534965, "loss": 0.7441, "step": 5930 }, { "epoch": 1.0579787708500579, "grad_norm": 0.5008271932601929, "learning_rate": 0.00022719145750725803, "loss": 0.7606, "step": 5931 }, { "epoch": 1.058157167068058, "grad_norm": 0.48235058784484863, "learning_rate": 0.0002271216883205395, "loss": 0.6566, "step": 5932 }, { "epoch": 1.0583355632860583, "grad_norm": 0.49578502774238586, "learning_rate": 0.00022705192093067377, "loss": 0.6123, "step": 5933 }, { "epoch": 1.0585139595040585, "grad_norm": 0.4736134111881256, "learning_rate": 0.0002269821553431403, "loss": 0.5684, "step": 5934 }, { "epoch": 1.0586923557220587, "grad_norm": 0.4547494351863861, "learning_rate": 0.00022691239156341828, "loss": 0.6157, "step": 5935 }, { "epoch": 1.0588707519400589, "grad_norm": 0.45939305424690247, "learning_rate": 0.0002268426295969872, "loss": 0.6121, "step": 5936 }, { "epoch": 1.059049148158059, "grad_norm": 0.5370938181877136, "learning_rate": 0.00022677286944932604, "loss": 0.8359, "step": 5937 }, { "epoch": 1.0592275443760593, "grad_norm": 0.5468327403068542, "learning_rate": 0.0002267031111259137, "loss": 0.7847, "step": 5938 }, { "epoch": 1.0594059405940595, "grad_norm": 0.472802996635437, "learning_rate": 0.00022663335463222906, "loss": 0.6689, "step": 5939 }, { "epoch": 1.0595843368120597, "grad_norm": 0.5464509129524231, "learning_rate": 0.00022656359997375063, "loss": 0.9584, "step": 5940 }, { "epoch": 1.0597627330300599, "grad_norm": 0.5138636827468872, "learning_rate": 0.000226493847155957, "loss": 0.7645, "step": 5941 }, { "epoch": 1.05994112924806, "grad_norm": 0.4933434724807739, "learning_rate": 0.00022642409618432648, "loss": 0.7348, "step": 5942 }, { "epoch": 1.06011952546606, "grad_norm": 0.4958341121673584, "learning_rate": 0.00022635434706433727, "loss": 0.7549, "step": 5943 }, { "epoch": 1.0602979216840602, "grad_norm": 0.4594402015209198, "learning_rate": 0.00022628459980146752, "loss": 0.5529, "step": 5944 }, { "epoch": 1.0604763179020604, "grad_norm": 0.5959436893463135, "learning_rate": 0.00022621485440119506, "loss": 0.65, "step": 5945 }, { "epoch": 1.0606547141200606, "grad_norm": 0.5254977345466614, "learning_rate": 0.00022614511086899768, "loss": 0.7693, "step": 5946 }, { "epoch": 1.0608331103380608, "grad_norm": 0.4639701843261719, "learning_rate": 0.00022607536921035313, "loss": 0.7103, "step": 5947 }, { "epoch": 1.061011506556061, "grad_norm": 0.5139868855476379, "learning_rate": 0.00022600562943073872, "loss": 0.7358, "step": 5948 }, { "epoch": 1.0611899027740612, "grad_norm": 0.4823780059814453, "learning_rate": 0.00022593589153563183, "loss": 0.6385, "step": 5949 }, { "epoch": 1.0613682989920614, "grad_norm": 0.475702702999115, "learning_rate": 0.00022586615553050958, "loss": 0.7695, "step": 5950 }, { "epoch": 1.0615466952100616, "grad_norm": 0.4667466878890991, "learning_rate": 0.00022579642142084918, "loss": 0.7241, "step": 5951 }, { "epoch": 1.0617250914280618, "grad_norm": 0.46129074692726135, "learning_rate": 0.00022572668921212746, "loss": 0.7244, "step": 5952 }, { "epoch": 1.0619034876460618, "grad_norm": 0.45217975974082947, "learning_rate": 0.0002256569589098211, "loss": 0.5342, "step": 5953 }, { "epoch": 1.062081883864062, "grad_norm": 0.4796280860900879, "learning_rate": 0.0002255872305194067, "loss": 0.7878, "step": 5954 }, { "epoch": 1.0622602800820622, "grad_norm": 0.5046055912971497, "learning_rate": 0.0002255175040463607, "loss": 0.6931, "step": 5955 }, { "epoch": 1.0624386763000624, "grad_norm": 0.5430681109428406, "learning_rate": 0.00022544777949615942, "loss": 0.6904, "step": 5956 }, { "epoch": 1.0626170725180626, "grad_norm": 0.5339892506599426, "learning_rate": 0.00022537805687427895, "loss": 0.7239, "step": 5957 }, { "epoch": 1.0627954687360628, "grad_norm": 0.510955274105072, "learning_rate": 0.0002253083361861953, "loss": 0.7252, "step": 5958 }, { "epoch": 1.062973864954063, "grad_norm": 0.48307734727859497, "learning_rate": 0.00022523861743738434, "loss": 0.7571, "step": 5959 }, { "epoch": 1.0631522611720632, "grad_norm": 0.4888105094432831, "learning_rate": 0.00022516890063332173, "loss": 0.6627, "step": 5960 }, { "epoch": 1.0633306573900634, "grad_norm": 0.44385766983032227, "learning_rate": 0.00022509918577948292, "loss": 0.6508, "step": 5961 }, { "epoch": 1.0635090536080636, "grad_norm": 0.4929356276988983, "learning_rate": 0.00022502947288134334, "loss": 0.6448, "step": 5962 }, { "epoch": 1.0636874498260638, "grad_norm": 0.5029351711273193, "learning_rate": 0.00022495976194437822, "loss": 0.7504, "step": 5963 }, { "epoch": 1.063865846044064, "grad_norm": 0.47953563928604126, "learning_rate": 0.00022489005297406266, "loss": 0.6591, "step": 5964 }, { "epoch": 1.064044242262064, "grad_norm": 0.5105430483818054, "learning_rate": 0.0002248203459758714, "loss": 0.8449, "step": 5965 }, { "epoch": 1.0642226384800642, "grad_norm": 0.5377670526504517, "learning_rate": 0.00022475064095527948, "loss": 0.8731, "step": 5966 }, { "epoch": 1.0644010346980644, "grad_norm": 0.47288522124290466, "learning_rate": 0.00022468093791776128, "loss": 0.595, "step": 5967 }, { "epoch": 1.0645794309160646, "grad_norm": 0.43630439043045044, "learning_rate": 0.00022461123686879137, "loss": 0.5941, "step": 5968 }, { "epoch": 1.0647578271340647, "grad_norm": 0.4721398651599884, "learning_rate": 0.00022454153781384395, "loss": 0.6303, "step": 5969 }, { "epoch": 1.064936223352065, "grad_norm": 0.5068725347518921, "learning_rate": 0.00022447184075839323, "loss": 0.7956, "step": 5970 }, { "epoch": 1.0651146195700651, "grad_norm": 0.5235750675201416, "learning_rate": 0.0002244021457079131, "loss": 0.7384, "step": 5971 }, { "epoch": 1.0652930157880653, "grad_norm": 0.44175130128860474, "learning_rate": 0.00022433245266787749, "loss": 0.6365, "step": 5972 }, { "epoch": 1.0654714120060655, "grad_norm": 0.4824054539203644, "learning_rate": 0.00022426276164376003, "loss": 0.795, "step": 5973 }, { "epoch": 1.0656498082240657, "grad_norm": 0.46271640062332153, "learning_rate": 0.00022419307264103414, "loss": 0.6397, "step": 5974 }, { "epoch": 1.0658282044420657, "grad_norm": 0.4163155257701874, "learning_rate": 0.0002241233856651732, "loss": 0.4912, "step": 5975 }, { "epoch": 1.066006600660066, "grad_norm": 0.46445098519325256, "learning_rate": 0.00022405370072165043, "loss": 0.587, "step": 5976 }, { "epoch": 1.0661849968780661, "grad_norm": 0.4462120831012726, "learning_rate": 0.00022398401781593884, "loss": 0.6272, "step": 5977 }, { "epoch": 1.0663633930960663, "grad_norm": 0.48081815242767334, "learning_rate": 0.00022391433695351131, "loss": 0.6237, "step": 5978 }, { "epoch": 1.0665417893140665, "grad_norm": 0.5043575167655945, "learning_rate": 0.00022384465813984054, "loss": 0.6698, "step": 5979 }, { "epoch": 1.0667201855320667, "grad_norm": 0.5385834574699402, "learning_rate": 0.00022377498138039903, "loss": 0.7248, "step": 5980 }, { "epoch": 1.066898581750067, "grad_norm": 0.5373404622077942, "learning_rate": 0.00022370530668065915, "loss": 0.7383, "step": 5981 }, { "epoch": 1.067076977968067, "grad_norm": 0.464138925075531, "learning_rate": 0.0002236356340460932, "loss": 0.5345, "step": 5982 }, { "epoch": 1.0672553741860673, "grad_norm": 0.5200468301773071, "learning_rate": 0.00022356596348217325, "loss": 0.7235, "step": 5983 }, { "epoch": 1.0674337704040675, "grad_norm": 0.5154780745506287, "learning_rate": 0.00022349629499437116, "loss": 0.8373, "step": 5984 }, { "epoch": 1.0676121666220677, "grad_norm": 0.4483351707458496, "learning_rate": 0.00022342662858815867, "loss": 0.5755, "step": 5985 }, { "epoch": 1.067790562840068, "grad_norm": 0.4439292848110199, "learning_rate": 0.0002233569642690073, "loss": 0.5772, "step": 5986 }, { "epoch": 1.0679689590580679, "grad_norm": 0.49343034625053406, "learning_rate": 0.00022328730204238852, "loss": 0.7372, "step": 5987 }, { "epoch": 1.068147355276068, "grad_norm": 0.4922356605529785, "learning_rate": 0.00022321764191377347, "loss": 0.7034, "step": 5988 }, { "epoch": 1.0683257514940683, "grad_norm": 0.426301509141922, "learning_rate": 0.00022314798388863336, "loss": 0.6708, "step": 5989 }, { "epoch": 1.0685041477120685, "grad_norm": 0.46274682879447937, "learning_rate": 0.000223078327972439, "loss": 0.5906, "step": 5990 }, { "epoch": 1.0686825439300687, "grad_norm": 0.43291884660720825, "learning_rate": 0.0002230086741706612, "loss": 0.6506, "step": 5991 }, { "epoch": 1.0688609401480689, "grad_norm": 0.4762420952320099, "learning_rate": 0.00022293902248877052, "loss": 0.6862, "step": 5992 }, { "epoch": 1.069039336366069, "grad_norm": 0.4900970160961151, "learning_rate": 0.00022286937293223736, "loss": 0.7371, "step": 5993 }, { "epoch": 1.0692177325840693, "grad_norm": 0.44312936067581177, "learning_rate": 0.00022279972550653203, "loss": 0.7384, "step": 5994 }, { "epoch": 1.0693961288020695, "grad_norm": 0.5585013628005981, "learning_rate": 0.00022273008021712448, "loss": 0.9225, "step": 5995 }, { "epoch": 1.0695745250200697, "grad_norm": 0.4900633990764618, "learning_rate": 0.00022266043706948462, "loss": 0.6349, "step": 5996 }, { "epoch": 1.0697529212380696, "grad_norm": 0.47767671942710876, "learning_rate": 0.00022259079606908237, "loss": 0.728, "step": 5997 }, { "epoch": 1.0699313174560698, "grad_norm": 0.477839857339859, "learning_rate": 0.00022252115722138724, "loss": 0.626, "step": 5998 }, { "epoch": 1.07010971367407, "grad_norm": 0.47772642970085144, "learning_rate": 0.00022245152053186853, "loss": 0.7116, "step": 5999 }, { "epoch": 1.0702881098920702, "grad_norm": 0.45496633648872375, "learning_rate": 0.00022238188600599558, "loss": 0.6855, "step": 6000 }, { "epoch": 1.0704665061100704, "grad_norm": 0.5116882920265198, "learning_rate": 0.00022231225364923736, "loss": 0.7783, "step": 6001 }, { "epoch": 1.0706449023280706, "grad_norm": 0.5786128044128418, "learning_rate": 0.00022224262346706288, "loss": 0.7753, "step": 6002 }, { "epoch": 1.0708232985460708, "grad_norm": 0.4571937620639801, "learning_rate": 0.00022217299546494078, "loss": 0.6224, "step": 6003 }, { "epoch": 1.071001694764071, "grad_norm": 0.4922640919685364, "learning_rate": 0.00022210336964833966, "loss": 0.7753, "step": 6004 }, { "epoch": 1.0711800909820712, "grad_norm": 0.4930363595485687, "learning_rate": 0.0002220337460227279, "loss": 0.7542, "step": 6005 }, { "epoch": 1.0713584872000714, "grad_norm": 0.4897131323814392, "learning_rate": 0.00022196412459357372, "loss": 0.7468, "step": 6006 }, { "epoch": 1.0715368834180716, "grad_norm": 0.5091310739517212, "learning_rate": 0.00022189450536634506, "loss": 0.6646, "step": 6007 }, { "epoch": 1.0717152796360718, "grad_norm": 0.5187907814979553, "learning_rate": 0.00022182488834650987, "loss": 0.816, "step": 6008 }, { "epoch": 1.0718936758540718, "grad_norm": 0.5296580195426941, "learning_rate": 0.00022175527353953585, "loss": 0.8415, "step": 6009 }, { "epoch": 1.072072072072072, "grad_norm": 0.4686925411224365, "learning_rate": 0.00022168566095089043, "loss": 0.5842, "step": 6010 }, { "epoch": 1.0722504682900722, "grad_norm": 0.5479150414466858, "learning_rate": 0.000221616050586041, "loss": 0.7578, "step": 6011 }, { "epoch": 1.0724288645080724, "grad_norm": 0.6131009459495544, "learning_rate": 0.0002215464424504548, "loss": 0.6768, "step": 6012 }, { "epoch": 1.0726072607260726, "grad_norm": 0.47752901911735535, "learning_rate": 0.00022147683654959876, "loss": 0.6412, "step": 6013 }, { "epoch": 1.0727856569440728, "grad_norm": 0.47805142402648926, "learning_rate": 0.0002214072328889397, "loss": 0.6528, "step": 6014 }, { "epoch": 1.072964053162073, "grad_norm": 0.44289377331733704, "learning_rate": 0.00022133763147394426, "loss": 0.7201, "step": 6015 }, { "epoch": 1.0731424493800732, "grad_norm": 0.5617145299911499, "learning_rate": 0.00022126803231007893, "loss": 0.7928, "step": 6016 }, { "epoch": 1.0733208455980734, "grad_norm": 0.5184254050254822, "learning_rate": 0.00022119843540280995, "loss": 0.8179, "step": 6017 }, { "epoch": 1.0734992418160736, "grad_norm": 0.4659225344657898, "learning_rate": 0.00022112884075760347, "loss": 0.5611, "step": 6018 }, { "epoch": 1.0736776380340736, "grad_norm": 0.5036365389823914, "learning_rate": 0.00022105924837992547, "loss": 0.9739, "step": 6019 }, { "epoch": 1.0738560342520738, "grad_norm": 0.4579788148403168, "learning_rate": 0.0002209896582752416, "loss": 0.6486, "step": 6020 }, { "epoch": 1.074034430470074, "grad_norm": 0.4959608018398285, "learning_rate": 0.00022092007044901746, "loss": 0.9164, "step": 6021 }, { "epoch": 1.0742128266880742, "grad_norm": 0.4846688508987427, "learning_rate": 0.00022085048490671849, "loss": 0.7229, "step": 6022 }, { "epoch": 1.0743912229060744, "grad_norm": 0.4702696204185486, "learning_rate": 0.0002207809016538099, "loss": 0.5403, "step": 6023 }, { "epoch": 1.0745696191240746, "grad_norm": 0.4317050576210022, "learning_rate": 0.00022071132069575672, "loss": 0.6298, "step": 6024 }, { "epoch": 1.0747480153420748, "grad_norm": 0.4599125385284424, "learning_rate": 0.00022064174203802382, "loss": 0.6337, "step": 6025 }, { "epoch": 1.074926411560075, "grad_norm": 0.5147960186004639, "learning_rate": 0.00022057216568607582, "loss": 0.6918, "step": 6026 }, { "epoch": 1.0751048077780752, "grad_norm": 0.45055848360061646, "learning_rate": 0.00022050259164537725, "loss": 0.6336, "step": 6027 }, { "epoch": 1.0752832039960754, "grad_norm": 0.5015688538551331, "learning_rate": 0.00022043301992139247, "loss": 0.7122, "step": 6028 }, { "epoch": 1.0754616002140756, "grad_norm": 0.4941107928752899, "learning_rate": 0.0002203634505195856, "loss": 0.7284, "step": 6029 }, { "epoch": 1.0756399964320758, "grad_norm": 0.4930790960788727, "learning_rate": 0.00022029388344542056, "loss": 0.6292, "step": 6030 }, { "epoch": 1.0758183926500757, "grad_norm": 0.461085706949234, "learning_rate": 0.00022022431870436114, "loss": 0.5988, "step": 6031 }, { "epoch": 1.075996788868076, "grad_norm": 0.5100793838500977, "learning_rate": 0.00022015475630187095, "loss": 0.773, "step": 6032 }, { "epoch": 1.0761751850860761, "grad_norm": 0.45178812742233276, "learning_rate": 0.00022008519624341333, "loss": 0.6794, "step": 6033 }, { "epoch": 1.0763535813040763, "grad_norm": 0.44591426849365234, "learning_rate": 0.0002200156385344515, "loss": 0.6673, "step": 6034 }, { "epoch": 1.0765319775220765, "grad_norm": 0.47172197699546814, "learning_rate": 0.00021994608318044853, "loss": 0.7226, "step": 6035 }, { "epoch": 1.0767103737400767, "grad_norm": 0.5064817070960999, "learning_rate": 0.00021987653018686724, "loss": 0.7875, "step": 6036 }, { "epoch": 1.076888769958077, "grad_norm": 0.49598416686058044, "learning_rate": 0.0002198069795591703, "loss": 0.6392, "step": 6037 }, { "epoch": 1.0770671661760771, "grad_norm": 0.5123727321624756, "learning_rate": 0.00021973743130282024, "loss": 0.7137, "step": 6038 }, { "epoch": 1.0772455623940773, "grad_norm": 0.48429879546165466, "learning_rate": 0.00021966788542327926, "loss": 0.6153, "step": 6039 }, { "epoch": 1.0774239586120775, "grad_norm": 0.5295233130455017, "learning_rate": 0.00021959834192600958, "loss": 0.7566, "step": 6040 }, { "epoch": 1.0776023548300775, "grad_norm": 0.5196706652641296, "learning_rate": 0.00021952880081647298, "loss": 0.7511, "step": 6041 }, { "epoch": 1.0777807510480777, "grad_norm": 0.4832461476325989, "learning_rate": 0.00021945926210013112, "loss": 0.5969, "step": 6042 }, { "epoch": 1.0779591472660779, "grad_norm": 0.46703991293907166, "learning_rate": 0.00021938972578244582, "loss": 0.5534, "step": 6043 }, { "epoch": 1.078137543484078, "grad_norm": 0.5281887054443359, "learning_rate": 0.00021932019186887824, "loss": 0.8217, "step": 6044 }, { "epoch": 1.0783159397020783, "grad_norm": 0.48991912603378296, "learning_rate": 0.00021925066036488969, "loss": 0.6933, "step": 6045 }, { "epoch": 1.0784943359200785, "grad_norm": 0.49540868401527405, "learning_rate": 0.00021918113127594098, "loss": 0.8362, "step": 6046 }, { "epoch": 1.0786727321380787, "grad_norm": 0.4319058358669281, "learning_rate": 0.00021911160460749295, "loss": 0.5308, "step": 6047 }, { "epoch": 1.0788511283560789, "grad_norm": 0.4906058609485626, "learning_rate": 0.00021904208036500618, "loss": 0.6291, "step": 6048 }, { "epoch": 1.079029524574079, "grad_norm": 0.46787381172180176, "learning_rate": 0.0002189725585539411, "loss": 0.7964, "step": 6049 }, { "epoch": 1.0792079207920793, "grad_norm": 0.5059190988540649, "learning_rate": 0.00021890303917975794, "loss": 0.7308, "step": 6050 }, { "epoch": 1.0793863170100795, "grad_norm": 0.4896734356880188, "learning_rate": 0.0002188335222479167, "loss": 0.8345, "step": 6051 }, { "epoch": 1.0795647132280797, "grad_norm": 0.48433777689933777, "learning_rate": 0.0002187640077638772, "loss": 0.5938, "step": 6052 }, { "epoch": 1.0797431094460797, "grad_norm": 0.5422555804252625, "learning_rate": 0.00021869449573309912, "loss": 0.8484, "step": 6053 }, { "epoch": 1.0799215056640799, "grad_norm": 0.4676606059074402, "learning_rate": 0.00021862498616104188, "loss": 0.6853, "step": 6054 }, { "epoch": 1.08009990188208, "grad_norm": 0.5379725694656372, "learning_rate": 0.00021855547905316467, "loss": 0.8504, "step": 6055 }, { "epoch": 1.0802782981000802, "grad_norm": 0.4376412034034729, "learning_rate": 0.00021848597441492663, "loss": 0.6913, "step": 6056 }, { "epoch": 1.0804566943180804, "grad_norm": 0.48028087615966797, "learning_rate": 0.0002184164722517865, "loss": 0.7915, "step": 6057 }, { "epoch": 1.0806350905360806, "grad_norm": 0.5001854300498962, "learning_rate": 0.00021834697256920316, "loss": 0.7001, "step": 6058 }, { "epoch": 1.0808134867540808, "grad_norm": 0.4411584436893463, "learning_rate": 0.00021827747537263496, "loss": 0.5408, "step": 6059 }, { "epoch": 1.080991882972081, "grad_norm": 0.4824419319629669, "learning_rate": 0.0002182079806675402, "loss": 0.6647, "step": 6060 }, { "epoch": 1.0811702791900812, "grad_norm": 0.4544886350631714, "learning_rate": 0.00021813848845937691, "loss": 0.6396, "step": 6061 }, { "epoch": 1.0813486754080814, "grad_norm": 0.5271000266075134, "learning_rate": 0.00021806899875360307, "loss": 0.7279, "step": 6062 }, { "epoch": 1.0815270716260814, "grad_norm": 0.5003283619880676, "learning_rate": 0.00021799951155567632, "loss": 0.7433, "step": 6063 }, { "epoch": 1.0817054678440816, "grad_norm": 0.5777568221092224, "learning_rate": 0.00021793002687105415, "loss": 0.8419, "step": 6064 }, { "epoch": 1.0818838640620818, "grad_norm": 0.4634368121623993, "learning_rate": 0.00021786054470519388, "loss": 0.6258, "step": 6065 }, { "epoch": 1.082062260280082, "grad_norm": 0.4333091378211975, "learning_rate": 0.00021779106506355264, "loss": 0.6735, "step": 6066 }, { "epoch": 1.0822406564980822, "grad_norm": 0.45657244324684143, "learning_rate": 0.00021772158795158725, "loss": 0.6708, "step": 6067 }, { "epoch": 1.0824190527160824, "grad_norm": 0.5117029547691345, "learning_rate": 0.00021765211337475445, "loss": 0.7658, "step": 6068 }, { "epoch": 1.0825974489340826, "grad_norm": 0.45610618591308594, "learning_rate": 0.00021758264133851072, "loss": 0.5174, "step": 6069 }, { "epoch": 1.0827758451520828, "grad_norm": 0.4404855966567993, "learning_rate": 0.0002175131718483124, "loss": 0.5822, "step": 6070 }, { "epoch": 1.082954241370083, "grad_norm": 0.5023370385169983, "learning_rate": 0.0002174437049096156, "loss": 0.7341, "step": 6071 }, { "epoch": 1.0831326375880832, "grad_norm": 0.5234084725379944, "learning_rate": 0.00021737424052787618, "loss": 0.8447, "step": 6072 }, { "epoch": 1.0833110338060834, "grad_norm": 0.49975350499153137, "learning_rate": 0.00021730477870854985, "loss": 0.6664, "step": 6073 }, { "epoch": 1.0834894300240836, "grad_norm": 0.5313869118690491, "learning_rate": 0.00021723531945709216, "loss": 0.7133, "step": 6074 }, { "epoch": 1.0836678262420836, "grad_norm": 0.513853907585144, "learning_rate": 0.0002171658627789584, "loss": 0.9083, "step": 6075 }, { "epoch": 1.0838462224600838, "grad_norm": 0.48460695147514343, "learning_rate": 0.00021709640867960362, "loss": 0.8195, "step": 6076 }, { "epoch": 1.084024618678084, "grad_norm": 0.49029091000556946, "learning_rate": 0.00021702695716448276, "loss": 0.6181, "step": 6077 }, { "epoch": 1.0842030148960842, "grad_norm": 0.48168060183525085, "learning_rate": 0.00021695750823905053, "loss": 0.6585, "step": 6078 }, { "epoch": 1.0843814111140844, "grad_norm": 0.5174769759178162, "learning_rate": 0.00021688806190876136, "loss": 0.684, "step": 6079 }, { "epoch": 1.0845598073320846, "grad_norm": 0.47358494997024536, "learning_rate": 0.00021681861817906954, "loss": 0.6681, "step": 6080 }, { "epoch": 1.0847382035500848, "grad_norm": 0.5476037859916687, "learning_rate": 0.00021674917705542918, "loss": 0.8857, "step": 6081 }, { "epoch": 1.084916599768085, "grad_norm": 0.47755712270736694, "learning_rate": 0.00021667973854329415, "loss": 0.6948, "step": 6082 }, { "epoch": 1.0850949959860852, "grad_norm": 0.48054665327072144, "learning_rate": 0.0002166103026481181, "loss": 0.6114, "step": 6083 }, { "epoch": 1.0852733922040854, "grad_norm": 0.5116613507270813, "learning_rate": 0.00021654086937535449, "loss": 0.7028, "step": 6084 }, { "epoch": 1.0854517884220853, "grad_norm": 0.5386189222335815, "learning_rate": 0.00021647143873045662, "loss": 0.8417, "step": 6085 }, { "epoch": 1.0856301846400855, "grad_norm": 0.770457923412323, "learning_rate": 0.00021640201071887761, "loss": 0.7823, "step": 6086 }, { "epoch": 1.0858085808580857, "grad_norm": 0.45015949010849, "learning_rate": 0.00021633258534607013, "loss": 0.5068, "step": 6087 }, { "epoch": 1.085986977076086, "grad_norm": 0.4800700843334198, "learning_rate": 0.0002162631626174868, "loss": 0.6321, "step": 6088 }, { "epoch": 1.0861653732940861, "grad_norm": 0.5034077167510986, "learning_rate": 0.0002161937425385803, "loss": 0.7027, "step": 6089 }, { "epoch": 1.0863437695120863, "grad_norm": 0.46986886858940125, "learning_rate": 0.00021612432511480267, "loss": 0.5984, "step": 6090 }, { "epoch": 1.0865221657300865, "grad_norm": 1.3976149559020996, "learning_rate": 0.00021605491035160603, "loss": 0.81, "step": 6091 }, { "epoch": 1.0867005619480867, "grad_norm": 0.4368259906768799, "learning_rate": 0.0002159854982544421, "loss": 0.5048, "step": 6092 }, { "epoch": 1.086878958166087, "grad_norm": 0.45895346999168396, "learning_rate": 0.00021591608882876249, "loss": 0.6807, "step": 6093 }, { "epoch": 1.0870573543840871, "grad_norm": 0.500730037689209, "learning_rate": 0.00021584668208001856, "loss": 0.7392, "step": 6094 }, { "epoch": 1.0872357506020873, "grad_norm": 0.5281662344932556, "learning_rate": 0.00021577727801366158, "loss": 0.8316, "step": 6095 }, { "epoch": 1.0874141468200875, "grad_norm": 0.5275529623031616, "learning_rate": 0.00021570787663514242, "loss": 0.7324, "step": 6096 }, { "epoch": 1.0875925430380875, "grad_norm": 0.4927082061767578, "learning_rate": 0.00021563847794991186, "loss": 0.7293, "step": 6097 }, { "epoch": 1.0877709392560877, "grad_norm": 0.49843451380729675, "learning_rate": 0.0002155690819634205, "loss": 0.7416, "step": 6098 }, { "epoch": 1.087949335474088, "grad_norm": 0.46133577823638916, "learning_rate": 0.00021549968868111863, "loss": 0.5853, "step": 6099 }, { "epoch": 1.088127731692088, "grad_norm": 0.45552465319633484, "learning_rate": 0.00021543029810845634, "loss": 0.6492, "step": 6100 }, { "epoch": 1.0883061279100883, "grad_norm": 0.40714430809020996, "learning_rate": 0.00021536091025088356, "loss": 0.5971, "step": 6101 }, { "epoch": 1.0884845241280885, "grad_norm": 0.46682703495025635, "learning_rate": 0.00021529152511384997, "loss": 0.7605, "step": 6102 }, { "epoch": 1.0886629203460887, "grad_norm": 0.45366188883781433, "learning_rate": 0.00021522214270280497, "loss": 0.6504, "step": 6103 }, { "epoch": 1.088841316564089, "grad_norm": 0.44408997893333435, "learning_rate": 0.00021515276302319807, "loss": 0.6159, "step": 6104 }, { "epoch": 1.089019712782089, "grad_norm": 0.4182736277580261, "learning_rate": 0.0002150833860804781, "loss": 0.6284, "step": 6105 }, { "epoch": 1.0891981090000893, "grad_norm": 0.4445931911468506, "learning_rate": 0.00021501401188009397, "loss": 0.5778, "step": 6106 }, { "epoch": 1.0893765052180893, "grad_norm": 0.46306318044662476, "learning_rate": 0.00021494464042749427, "loss": 0.6435, "step": 6107 }, { "epoch": 1.0895549014360895, "grad_norm": 0.49822935461997986, "learning_rate": 0.0002148752717281275, "loss": 0.6868, "step": 6108 }, { "epoch": 1.0897332976540897, "grad_norm": 0.4217550754547119, "learning_rate": 0.0002148059057874417, "loss": 0.5048, "step": 6109 }, { "epoch": 1.0899116938720899, "grad_norm": 0.497555136680603, "learning_rate": 0.00021473654261088492, "loss": 0.741, "step": 6110 }, { "epoch": 1.09009009009009, "grad_norm": 0.47492715716362, "learning_rate": 0.00021466718220390494, "loss": 0.6734, "step": 6111 }, { "epoch": 1.0902684863080903, "grad_norm": 0.5085891485214233, "learning_rate": 0.0002145978245719493, "loss": 0.6618, "step": 6112 }, { "epoch": 1.0904468825260905, "grad_norm": 0.45178478956222534, "learning_rate": 0.00021452846972046523, "loss": 0.5666, "step": 6113 }, { "epoch": 1.0906252787440907, "grad_norm": 0.44206705689430237, "learning_rate": 0.0002144591176548999, "loss": 0.5961, "step": 6114 }, { "epoch": 1.0908036749620909, "grad_norm": 0.4810873866081238, "learning_rate": 0.00021438976838070016, "loss": 0.7622, "step": 6115 }, { "epoch": 1.090982071180091, "grad_norm": 0.45664024353027344, "learning_rate": 0.00021432042190331266, "loss": 0.7886, "step": 6116 }, { "epoch": 1.0911604673980912, "grad_norm": 0.439494788646698, "learning_rate": 0.0002142510782281839, "loss": 0.7159, "step": 6117 }, { "epoch": 1.0913388636160914, "grad_norm": 0.5126174688339233, "learning_rate": 0.00021418173736076007, "loss": 0.806, "step": 6118 }, { "epoch": 1.0915172598340914, "grad_norm": 0.465787798166275, "learning_rate": 0.00021411239930648713, "loss": 0.5724, "step": 6119 }, { "epoch": 1.0916956560520916, "grad_norm": 0.5169677138328552, "learning_rate": 0.00021404306407081094, "loss": 0.7997, "step": 6120 }, { "epoch": 1.0918740522700918, "grad_norm": 0.5252036452293396, "learning_rate": 0.00021397373165917704, "loss": 0.7794, "step": 6121 }, { "epoch": 1.092052448488092, "grad_norm": 0.456243097782135, "learning_rate": 0.00021390440207703075, "loss": 0.6367, "step": 6122 }, { "epoch": 1.0922308447060922, "grad_norm": 0.45093104243278503, "learning_rate": 0.00021383507532981717, "loss": 0.5247, "step": 6123 }, { "epoch": 1.0924092409240924, "grad_norm": 0.40191787481307983, "learning_rate": 0.00021376575142298122, "loss": 0.4306, "step": 6124 }, { "epoch": 1.0925876371420926, "grad_norm": 0.4974512755870819, "learning_rate": 0.00021369643036196762, "loss": 0.6029, "step": 6125 }, { "epoch": 1.0927660333600928, "grad_norm": 0.4702337682247162, "learning_rate": 0.0002136271121522207, "loss": 0.6395, "step": 6126 }, { "epoch": 1.092944429578093, "grad_norm": 0.48748883605003357, "learning_rate": 0.00021355779679918475, "loss": 0.6442, "step": 6127 }, { "epoch": 1.0931228257960932, "grad_norm": 0.4701106548309326, "learning_rate": 0.00021348848430830376, "loss": 0.597, "step": 6128 }, { "epoch": 1.0933012220140932, "grad_norm": 0.492400586605072, "learning_rate": 0.00021341917468502148, "loss": 0.6764, "step": 6129 }, { "epoch": 1.0934796182320934, "grad_norm": 0.5202962756156921, "learning_rate": 0.00021334986793478147, "loss": 0.8682, "step": 6130 }, { "epoch": 1.0936580144500936, "grad_norm": 0.4661516547203064, "learning_rate": 0.00021328056406302707, "loss": 0.7784, "step": 6131 }, { "epoch": 1.0938364106680938, "grad_norm": 0.5273587107658386, "learning_rate": 0.0002132112630752014, "loss": 0.6089, "step": 6132 }, { "epoch": 1.094014806886094, "grad_norm": 0.44249820709228516, "learning_rate": 0.0002131419649767473, "loss": 0.539, "step": 6133 }, { "epoch": 1.0941932031040942, "grad_norm": 0.4947591722011566, "learning_rate": 0.00021307266977310728, "loss": 0.8595, "step": 6134 }, { "epoch": 1.0943715993220944, "grad_norm": 0.46888822317123413, "learning_rate": 0.00021300337746972398, "loss": 0.719, "step": 6135 }, { "epoch": 1.0945499955400946, "grad_norm": 0.5104032158851624, "learning_rate": 0.00021293408807203948, "loss": 0.7808, "step": 6136 }, { "epoch": 1.0947283917580948, "grad_norm": 0.5240311622619629, "learning_rate": 0.00021286480158549582, "loss": 0.7997, "step": 6137 }, { "epoch": 1.094906787976095, "grad_norm": 0.5189229249954224, "learning_rate": 0.00021279551801553463, "loss": 0.8566, "step": 6138 }, { "epoch": 1.0950851841940952, "grad_norm": 0.4285975396633148, "learning_rate": 0.00021272623736759742, "loss": 0.7213, "step": 6139 }, { "epoch": 1.0952635804120954, "grad_norm": 0.5412182211875916, "learning_rate": 0.0002126569596471255, "loss": 0.8526, "step": 6140 }, { "epoch": 1.0954419766300953, "grad_norm": 0.5037285685539246, "learning_rate": 0.00021258768485955988, "loss": 0.7894, "step": 6141 }, { "epoch": 1.0956203728480955, "grad_norm": 0.4721333980560303, "learning_rate": 0.00021251841301034142, "loss": 0.7696, "step": 6142 }, { "epoch": 1.0957987690660957, "grad_norm": 0.5173086524009705, "learning_rate": 0.00021244914410491062, "loss": 0.8049, "step": 6143 }, { "epoch": 1.095977165284096, "grad_norm": 0.47684210538864136, "learning_rate": 0.00021237987814870795, "loss": 0.6094, "step": 6144 }, { "epoch": 1.0961555615020961, "grad_norm": 0.42279088497161865, "learning_rate": 0.0002123106151471734, "loss": 0.5532, "step": 6145 }, { "epoch": 1.0963339577200963, "grad_norm": 0.5065193176269531, "learning_rate": 0.00021224135510574703, "loss": 0.6686, "step": 6146 }, { "epoch": 1.0965123539380965, "grad_norm": 0.4805443584918976, "learning_rate": 0.0002121720980298683, "loss": 0.5822, "step": 6147 }, { "epoch": 1.0966907501560967, "grad_norm": 0.4822993278503418, "learning_rate": 0.0002121028439249767, "loss": 0.7553, "step": 6148 }, { "epoch": 1.096869146374097, "grad_norm": 0.4846903383731842, "learning_rate": 0.00021203359279651132, "loss": 0.6991, "step": 6149 }, { "epoch": 1.0970475425920971, "grad_norm": 0.4852921962738037, "learning_rate": 0.0002119643446499114, "loss": 0.7108, "step": 6150 }, { "epoch": 1.0972259388100971, "grad_norm": 0.4798181354999542, "learning_rate": 0.00021189509949061543, "loss": 0.6163, "step": 6151 }, { "epoch": 1.0974043350280973, "grad_norm": 0.5046129822731018, "learning_rate": 0.00021182585732406196, "loss": 0.8545, "step": 6152 }, { "epoch": 1.0975827312460975, "grad_norm": 0.5011611580848694, "learning_rate": 0.0002117566181556892, "loss": 0.7396, "step": 6153 }, { "epoch": 1.0977611274640977, "grad_norm": 0.4669513702392578, "learning_rate": 0.0002116873819909352, "loss": 0.6778, "step": 6154 }, { "epoch": 1.097939523682098, "grad_norm": 0.4715968370437622, "learning_rate": 0.00021161814883523773, "loss": 0.7572, "step": 6155 }, { "epoch": 1.098117919900098, "grad_norm": 0.9391183257102966, "learning_rate": 0.00021154891869403433, "loss": 0.7393, "step": 6156 }, { "epoch": 1.0982963161180983, "grad_norm": 0.49894896149635315, "learning_rate": 0.00021147969157276234, "loss": 0.7028, "step": 6157 }, { "epoch": 1.0984747123360985, "grad_norm": 0.4233601987361908, "learning_rate": 0.00021141046747685883, "loss": 0.6457, "step": 6158 }, { "epoch": 1.0986531085540987, "grad_norm": 0.5224723219871521, "learning_rate": 0.00021134124641176052, "loss": 0.8284, "step": 6159 }, { "epoch": 1.098831504772099, "grad_norm": 0.4575176537036896, "learning_rate": 0.00021127202838290413, "loss": 0.6213, "step": 6160 }, { "epoch": 1.099009900990099, "grad_norm": 0.4861457347869873, "learning_rate": 0.00021120281339572595, "loss": 0.7269, "step": 6161 }, { "epoch": 1.0991882972080993, "grad_norm": 0.49578866362571716, "learning_rate": 0.00021113360145566206, "loss": 0.7596, "step": 6162 }, { "epoch": 1.0993666934260993, "grad_norm": 0.43700939416885376, "learning_rate": 0.00021106439256814844, "loss": 0.6501, "step": 6163 }, { "epoch": 1.0995450896440995, "grad_norm": 0.5099201798439026, "learning_rate": 0.00021099518673862061, "loss": 0.9244, "step": 6164 }, { "epoch": 1.0997234858620997, "grad_norm": 0.434171587228775, "learning_rate": 0.00021092598397251408, "loss": 0.543, "step": 6165 }, { "epoch": 1.0999018820800999, "grad_norm": 0.45645880699157715, "learning_rate": 0.00021085678427526394, "loss": 0.661, "step": 6166 }, { "epoch": 1.1000802782981, "grad_norm": 0.45062491297721863, "learning_rate": 0.00021078758765230514, "loss": 0.5422, "step": 6167 }, { "epoch": 1.1002586745161003, "grad_norm": 0.4871162176132202, "learning_rate": 0.00021071839410907232, "loss": 0.7189, "step": 6168 }, { "epoch": 1.1004370707341005, "grad_norm": 0.3932052254676819, "learning_rate": 0.00021064920365099994, "loss": 0.4697, "step": 6169 }, { "epoch": 1.1006154669521007, "grad_norm": 0.46333831548690796, "learning_rate": 0.00021058001628352214, "loss": 0.6093, "step": 6170 }, { "epoch": 1.1007938631701009, "grad_norm": 0.4353841543197632, "learning_rate": 0.00021051083201207297, "loss": 0.555, "step": 6171 }, { "epoch": 1.100972259388101, "grad_norm": 0.4251698851585388, "learning_rate": 0.000210441650842086, "loss": 0.616, "step": 6172 }, { "epoch": 1.101150655606101, "grad_norm": 0.5128315091133118, "learning_rate": 0.00021037247277899473, "loss": 0.8073, "step": 6173 }, { "epoch": 1.1013290518241012, "grad_norm": 0.49934786558151245, "learning_rate": 0.00021030329782823244, "loss": 0.7532, "step": 6174 }, { "epoch": 1.1015074480421014, "grad_norm": 0.45256903767585754, "learning_rate": 0.00021023412599523202, "loss": 0.6656, "step": 6175 }, { "epoch": 1.1016858442601016, "grad_norm": 0.4254659414291382, "learning_rate": 0.00021016495728542626, "loss": 0.5954, "step": 6176 }, { "epoch": 1.1018642404781018, "grad_norm": 0.4867410957813263, "learning_rate": 0.00021009579170424758, "loss": 0.7753, "step": 6177 }, { "epoch": 1.102042636696102, "grad_norm": 0.42600226402282715, "learning_rate": 0.00021002662925712827, "loss": 0.6506, "step": 6178 }, { "epoch": 1.1022210329141022, "grad_norm": 0.4646792709827423, "learning_rate": 0.00020995746994950036, "loss": 0.722, "step": 6179 }, { "epoch": 1.1023994291321024, "grad_norm": 0.4408302903175354, "learning_rate": 0.00020988831378679536, "loss": 0.5475, "step": 6180 }, { "epoch": 1.1025778253501026, "grad_norm": 0.5261194705963135, "learning_rate": 0.000209819160774445, "loss": 0.8598, "step": 6181 }, { "epoch": 1.1027562215681028, "grad_norm": 0.4445323944091797, "learning_rate": 0.00020975001091788048, "loss": 0.5923, "step": 6182 }, { "epoch": 1.102934617786103, "grad_norm": 0.8575343489646912, "learning_rate": 0.0002096808642225328, "loss": 0.6453, "step": 6183 }, { "epoch": 1.1031130140041032, "grad_norm": 0.47943076491355896, "learning_rate": 0.00020961172069383275, "loss": 0.6593, "step": 6184 }, { "epoch": 1.1032914102221032, "grad_norm": 0.4768347442150116, "learning_rate": 0.00020954258033721072, "loss": 0.81, "step": 6185 }, { "epoch": 1.1034698064401034, "grad_norm": 0.5110130906105042, "learning_rate": 0.00020947344315809703, "loss": 0.7159, "step": 6186 }, { "epoch": 1.1036482026581036, "grad_norm": 0.5536441206932068, "learning_rate": 0.00020940430916192165, "loss": 0.7023, "step": 6187 }, { "epoch": 1.1038265988761038, "grad_norm": 0.5131134986877441, "learning_rate": 0.00020933517835411436, "loss": 0.7933, "step": 6188 }, { "epoch": 1.104004995094104, "grad_norm": 0.4293597936630249, "learning_rate": 0.0002092660507401047, "loss": 0.5803, "step": 6189 }, { "epoch": 1.1041833913121042, "grad_norm": 0.5110434293746948, "learning_rate": 0.00020919692632532182, "loss": 0.7934, "step": 6190 }, { "epoch": 1.1043617875301044, "grad_norm": 0.5271182656288147, "learning_rate": 0.00020912780511519484, "loss": 0.8077, "step": 6191 }, { "epoch": 1.1045401837481046, "grad_norm": 0.4269711673259735, "learning_rate": 0.00020905868711515248, "loss": 0.6145, "step": 6192 }, { "epoch": 1.1047185799661048, "grad_norm": 0.4809115529060364, "learning_rate": 0.0002089895723306232, "loss": 0.6951, "step": 6193 }, { "epoch": 1.104896976184105, "grad_norm": 0.5060098171234131, "learning_rate": 0.00020892046076703523, "loss": 0.9445, "step": 6194 }, { "epoch": 1.105075372402105, "grad_norm": 0.5290777683258057, "learning_rate": 0.00020885135242981647, "loss": 0.7443, "step": 6195 }, { "epoch": 1.1052537686201052, "grad_norm": 0.47494322061538696, "learning_rate": 0.00020878224732439493, "loss": 0.7927, "step": 6196 }, { "epoch": 1.1054321648381054, "grad_norm": 0.44407710433006287, "learning_rate": 0.000208713145456198, "loss": 0.5888, "step": 6197 }, { "epoch": 1.1056105610561056, "grad_norm": 0.45321714878082275, "learning_rate": 0.00020864404683065276, "loss": 0.6145, "step": 6198 }, { "epoch": 1.1057889572741058, "grad_norm": 0.4794192910194397, "learning_rate": 0.00020857495145318634, "loss": 0.7451, "step": 6199 }, { "epoch": 1.105967353492106, "grad_norm": 0.4514195919036865, "learning_rate": 0.0002085058593292254, "loss": 0.6112, "step": 6200 }, { "epoch": 1.1061457497101062, "grad_norm": 0.48285120725631714, "learning_rate": 0.00020843677046419637, "loss": 0.7315, "step": 6201 }, { "epoch": 1.1063241459281064, "grad_norm": 0.4666329324245453, "learning_rate": 0.00020836768486352553, "loss": 0.5705, "step": 6202 }, { "epoch": 1.1065025421461065, "grad_norm": 0.5958988070487976, "learning_rate": 0.0002082986025326388, "loss": 0.6226, "step": 6203 }, { "epoch": 1.1066809383641067, "grad_norm": 0.5066222548484802, "learning_rate": 0.00020822952347696188, "loss": 0.5811, "step": 6204 }, { "epoch": 1.106859334582107, "grad_norm": 0.4955041706562042, "learning_rate": 0.00020816044770192028, "loss": 0.8117, "step": 6205 }, { "epoch": 1.1070377308001071, "grad_norm": 0.4577980637550354, "learning_rate": 0.00020809137521293902, "loss": 0.5482, "step": 6206 }, { "epoch": 1.1072161270181071, "grad_norm": 0.581256091594696, "learning_rate": 0.00020802230601544314, "loss": 0.9406, "step": 6207 }, { "epoch": 1.1073945232361073, "grad_norm": 0.43365752696990967, "learning_rate": 0.00020795324011485728, "loss": 0.5084, "step": 6208 }, { "epoch": 1.1075729194541075, "grad_norm": 0.4679083526134491, "learning_rate": 0.00020788417751660588, "loss": 0.6819, "step": 6209 }, { "epoch": 1.1077513156721077, "grad_norm": 0.41970065236091614, "learning_rate": 0.00020781511822611296, "loss": 0.4688, "step": 6210 }, { "epoch": 1.107929711890108, "grad_norm": 0.4997738301753998, "learning_rate": 0.00020774606224880255, "loss": 0.5245, "step": 6211 }, { "epoch": 1.1081081081081081, "grad_norm": 0.4666350781917572, "learning_rate": 0.00020767700959009824, "loss": 0.7218, "step": 6212 }, { "epoch": 1.1082865043261083, "grad_norm": 0.4743230938911438, "learning_rate": 0.0002076079602554234, "loss": 0.6804, "step": 6213 }, { "epoch": 1.1084649005441085, "grad_norm": 0.5221150517463684, "learning_rate": 0.0002075389142502011, "loss": 0.8767, "step": 6214 }, { "epoch": 1.1086432967621087, "grad_norm": 0.4845503866672516, "learning_rate": 0.0002074698715798542, "loss": 0.6975, "step": 6215 }, { "epoch": 1.108821692980109, "grad_norm": 0.424908846616745, "learning_rate": 0.0002074008322498053, "loss": 0.4744, "step": 6216 }, { "epoch": 1.1090000891981089, "grad_norm": 0.44817301630973816, "learning_rate": 0.00020733179626547667, "loss": 0.5131, "step": 6217 }, { "epoch": 1.109178485416109, "grad_norm": 0.49306273460388184, "learning_rate": 0.0002072627636322905, "loss": 0.7012, "step": 6218 }, { "epoch": 1.1093568816341093, "grad_norm": 0.4819309413433075, "learning_rate": 0.00020719373435566842, "loss": 0.6232, "step": 6219 }, { "epoch": 1.1095352778521095, "grad_norm": 0.5482651591300964, "learning_rate": 0.00020712470844103198, "loss": 0.8316, "step": 6220 }, { "epoch": 1.1097136740701097, "grad_norm": 0.47659358382225037, "learning_rate": 0.00020705568589380252, "loss": 0.7383, "step": 6221 }, { "epoch": 1.1098920702881099, "grad_norm": 0.4586566686630249, "learning_rate": 0.00020698666671940103, "loss": 0.6438, "step": 6222 }, { "epoch": 1.11007046650611, "grad_norm": 0.5156270861625671, "learning_rate": 0.0002069176509232482, "loss": 0.7463, "step": 6223 }, { "epoch": 1.1102488627241103, "grad_norm": 0.4540422260761261, "learning_rate": 0.0002068486385107645, "loss": 0.7645, "step": 6224 }, { "epoch": 1.1104272589421105, "grad_norm": 0.39585331082344055, "learning_rate": 0.00020677962948737022, "loss": 0.5105, "step": 6225 }, { "epoch": 1.1106056551601107, "grad_norm": 0.4956061840057373, "learning_rate": 0.00020671062385848517, "loss": 0.7741, "step": 6226 }, { "epoch": 1.1107840513781109, "grad_norm": 0.45627540349960327, "learning_rate": 0.00020664162162952913, "loss": 0.6525, "step": 6227 }, { "epoch": 1.110962447596111, "grad_norm": 0.45656707882881165, "learning_rate": 0.00020657262280592147, "loss": 0.6371, "step": 6228 }, { "epoch": 1.111140843814111, "grad_norm": 0.5140594840049744, "learning_rate": 0.0002065036273930813, "loss": 0.8361, "step": 6229 }, { "epoch": 1.1113192400321112, "grad_norm": 0.4385344386100769, "learning_rate": 0.00020643463539642766, "loss": 0.5181, "step": 6230 }, { "epoch": 1.1114976362501114, "grad_norm": 0.5141750574111938, "learning_rate": 0.00020636564682137887, "loss": 0.5044, "step": 6231 }, { "epoch": 1.1116760324681116, "grad_norm": 0.5330828428268433, "learning_rate": 0.00020629666167335342, "loss": 0.7067, "step": 6232 }, { "epoch": 1.1118544286861118, "grad_norm": 0.5701097249984741, "learning_rate": 0.00020622767995776936, "loss": 0.8013, "step": 6233 }, { "epoch": 1.112032824904112, "grad_norm": 0.5556646585464478, "learning_rate": 0.00020615870168004449, "loss": 0.8375, "step": 6234 }, { "epoch": 1.1122112211221122, "grad_norm": 0.5408580303192139, "learning_rate": 0.0002060897268455963, "loss": 0.5886, "step": 6235 }, { "epoch": 1.1123896173401124, "grad_norm": 0.4653705954551697, "learning_rate": 0.0002060207554598421, "loss": 0.6007, "step": 6236 }, { "epoch": 1.1125680135581126, "grad_norm": 0.485100656747818, "learning_rate": 0.00020595178752819883, "loss": 0.6615, "step": 6237 }, { "epoch": 1.1127464097761128, "grad_norm": 0.4773385226726532, "learning_rate": 0.00020588282305608325, "loss": 0.7062, "step": 6238 }, { "epoch": 1.1129248059941128, "grad_norm": 0.538426399230957, "learning_rate": 0.00020581386204891172, "loss": 0.8382, "step": 6239 }, { "epoch": 1.113103202212113, "grad_norm": 0.45640110969543457, "learning_rate": 0.00020574490451210045, "loss": 0.6707, "step": 6240 }, { "epoch": 1.1132815984301132, "grad_norm": 0.5065681338310242, "learning_rate": 0.00020567595045106523, "loss": 0.7169, "step": 6241 }, { "epoch": 1.1134599946481134, "grad_norm": 0.49156734347343445, "learning_rate": 0.0002056069998712219, "loss": 0.6726, "step": 6242 }, { "epoch": 1.1136383908661136, "grad_norm": 0.4353838860988617, "learning_rate": 0.00020553805277798574, "loss": 0.5635, "step": 6243 }, { "epoch": 1.1138167870841138, "grad_norm": 0.5049417614936829, "learning_rate": 0.00020546910917677172, "loss": 0.8726, "step": 6244 }, { "epoch": 1.113995183302114, "grad_norm": 0.46186742186546326, "learning_rate": 0.00020540016907299473, "loss": 0.5505, "step": 6245 }, { "epoch": 1.1141735795201142, "grad_norm": 0.38499900698661804, "learning_rate": 0.0002053312324720692, "loss": 0.4884, "step": 6246 }, { "epoch": 1.1143519757381144, "grad_norm": 0.4557759165763855, "learning_rate": 0.00020526229937940956, "loss": 0.5865, "step": 6247 }, { "epoch": 1.1145303719561146, "grad_norm": 0.4711809754371643, "learning_rate": 0.00020519336980042956, "loss": 0.6212, "step": 6248 }, { "epoch": 1.1147087681741148, "grad_norm": 0.5073553323745728, "learning_rate": 0.00020512444374054309, "loss": 0.7891, "step": 6249 }, { "epoch": 1.114887164392115, "grad_norm": 0.5361490845680237, "learning_rate": 0.00020505552120516347, "loss": 0.8727, "step": 6250 }, { "epoch": 1.115065560610115, "grad_norm": 0.49489954113960266, "learning_rate": 0.00020498660219970394, "loss": 0.6391, "step": 6251 }, { "epoch": 1.1152439568281152, "grad_norm": 0.44518017768859863, "learning_rate": 0.00020491768672957722, "loss": 0.6854, "step": 6252 }, { "epoch": 1.1154223530461154, "grad_norm": 0.501320481300354, "learning_rate": 0.00020484877480019602, "loss": 0.8994, "step": 6253 }, { "epoch": 1.1156007492641156, "grad_norm": 0.5842064023017883, "learning_rate": 0.0002047798664169726, "loss": 0.988, "step": 6254 }, { "epoch": 1.1157791454821158, "grad_norm": 0.4702877998352051, "learning_rate": 0.000204710961585319, "loss": 0.8212, "step": 6255 }, { "epoch": 1.115957541700116, "grad_norm": 0.4764789640903473, "learning_rate": 0.00020464206031064694, "loss": 0.6702, "step": 6256 }, { "epoch": 1.1161359379181162, "grad_norm": 0.4664061367511749, "learning_rate": 0.000204573162598368, "loss": 0.6985, "step": 6257 }, { "epoch": 1.1163143341361164, "grad_norm": 1.0998966693878174, "learning_rate": 0.00020450426845389333, "loss": 0.6394, "step": 6258 }, { "epoch": 1.1164927303541166, "grad_norm": 0.4073130786418915, "learning_rate": 0.00020443537788263384, "loss": 0.5515, "step": 6259 }, { "epoch": 1.1166711265721168, "grad_norm": 0.48286718130111694, "learning_rate": 0.00020436649089000013, "loss": 0.7877, "step": 6260 }, { "epoch": 1.1168495227901167, "grad_norm": 0.505884051322937, "learning_rate": 0.00020429760748140262, "loss": 0.7128, "step": 6261 }, { "epoch": 1.117027919008117, "grad_norm": 0.6370548605918884, "learning_rate": 0.00020422872766225137, "loss": 0.9009, "step": 6262 }, { "epoch": 1.1172063152261171, "grad_norm": 0.4987682104110718, "learning_rate": 0.00020415985143795612, "loss": 0.6506, "step": 6263 }, { "epoch": 1.1173847114441173, "grad_norm": 0.7356500029563904, "learning_rate": 0.00020409097881392646, "loss": 0.7871, "step": 6264 }, { "epoch": 1.1175631076621175, "grad_norm": 0.578613817691803, "learning_rate": 0.00020402210979557153, "loss": 0.7906, "step": 6265 }, { "epoch": 1.1177415038801177, "grad_norm": 0.4457342028617859, "learning_rate": 0.00020395324438830033, "loss": 0.6093, "step": 6266 }, { "epoch": 1.117919900098118, "grad_norm": 0.4944089949131012, "learning_rate": 0.00020388438259752147, "loss": 0.7959, "step": 6267 }, { "epoch": 1.1180982963161181, "grad_norm": 0.46992331743240356, "learning_rate": 0.00020381552442864337, "loss": 0.6263, "step": 6268 }, { "epoch": 1.1182766925341183, "grad_norm": 0.42646077275276184, "learning_rate": 0.00020374666988707407, "loss": 0.5327, "step": 6269 }, { "epoch": 1.1184550887521185, "grad_norm": 0.48363184928894043, "learning_rate": 0.00020367781897822146, "loss": 0.7761, "step": 6270 }, { "epoch": 1.1186334849701187, "grad_norm": 0.47337979078292847, "learning_rate": 0.00020360897170749299, "loss": 0.6731, "step": 6271 }, { "epoch": 1.118811881188119, "grad_norm": 0.4817976653575897, "learning_rate": 0.00020354012808029587, "loss": 0.6265, "step": 6272 }, { "epoch": 1.118990277406119, "grad_norm": 0.45678937435150146, "learning_rate": 0.00020347128810203717, "loss": 0.6553, "step": 6273 }, { "epoch": 1.119168673624119, "grad_norm": 0.483571857213974, "learning_rate": 0.00020340245177812344, "loss": 0.746, "step": 6274 }, { "epoch": 1.1193470698421193, "grad_norm": 0.4845406115055084, "learning_rate": 0.00020333361911396112, "loss": 0.6712, "step": 6275 }, { "epoch": 1.1195254660601195, "grad_norm": 0.464175820350647, "learning_rate": 0.00020326479011495627, "loss": 0.5484, "step": 6276 }, { "epoch": 1.1197038622781197, "grad_norm": 0.48887899518013, "learning_rate": 0.00020319596478651477, "loss": 0.6694, "step": 6277 }, { "epoch": 1.1198822584961199, "grad_norm": 0.4447461664676666, "learning_rate": 0.00020312714313404197, "loss": 0.4637, "step": 6278 }, { "epoch": 1.12006065471412, "grad_norm": 0.5541086792945862, "learning_rate": 0.00020305832516294314, "loss": 0.7051, "step": 6279 }, { "epoch": 1.1202390509321203, "grad_norm": 0.43977195024490356, "learning_rate": 0.00020298951087862333, "loss": 0.5394, "step": 6280 }, { "epoch": 1.1204174471501205, "grad_norm": 0.5157051682472229, "learning_rate": 0.00020292070028648707, "loss": 0.7626, "step": 6281 }, { "epoch": 1.1205958433681207, "grad_norm": 0.4950587749481201, "learning_rate": 0.00020285189339193873, "loss": 0.704, "step": 6282 }, { "epoch": 1.1207742395861207, "grad_norm": 0.4870520532131195, "learning_rate": 0.0002027830902003824, "loss": 0.6548, "step": 6283 }, { "epoch": 1.1209526358041209, "grad_norm": 0.5210698246955872, "learning_rate": 0.00020271429071722186, "loss": 0.6496, "step": 6284 }, { "epoch": 1.121131032022121, "grad_norm": 0.4650278091430664, "learning_rate": 0.00020264549494786066, "loss": 0.5775, "step": 6285 }, { "epoch": 1.1213094282401213, "grad_norm": 0.5056297779083252, "learning_rate": 0.00020257670289770181, "loss": 0.7124, "step": 6286 }, { "epoch": 1.1214878244581215, "grad_norm": 0.4929437041282654, "learning_rate": 0.00020250791457214823, "loss": 0.9018, "step": 6287 }, { "epoch": 1.1216662206761217, "grad_norm": 0.5105372071266174, "learning_rate": 0.0002024391299766027, "loss": 0.7514, "step": 6288 }, { "epoch": 1.1218446168941218, "grad_norm": 0.4985578656196594, "learning_rate": 0.00020237034911646745, "loss": 0.6705, "step": 6289 }, { "epoch": 1.122023013112122, "grad_norm": 0.5785099864006042, "learning_rate": 0.0002023015719971445, "loss": 1.1394, "step": 6290 }, { "epoch": 1.1222014093301222, "grad_norm": 0.5343894362449646, "learning_rate": 0.0002022327986240355, "loss": 0.8509, "step": 6291 }, { "epoch": 1.1223798055481224, "grad_norm": 0.4681146442890167, "learning_rate": 0.00020216402900254197, "loss": 0.5766, "step": 6292 }, { "epoch": 1.1225582017661226, "grad_norm": 0.4778856039047241, "learning_rate": 0.000202095263138065, "loss": 0.8314, "step": 6293 }, { "epoch": 1.1227365979841228, "grad_norm": 0.44656750559806824, "learning_rate": 0.00020202650103600544, "loss": 0.6106, "step": 6294 }, { "epoch": 1.1229149942021228, "grad_norm": 0.5104446411132812, "learning_rate": 0.00020195774270176386, "loss": 0.6599, "step": 6295 }, { "epoch": 1.123093390420123, "grad_norm": 0.44691547751426697, "learning_rate": 0.0002018889881407405, "loss": 0.5945, "step": 6296 }, { "epoch": 1.1232717866381232, "grad_norm": 0.5005384683609009, "learning_rate": 0.00020182023735833531, "loss": 0.5805, "step": 6297 }, { "epoch": 1.1234501828561234, "grad_norm": 0.5261300802230835, "learning_rate": 0.000201751490359948, "loss": 0.825, "step": 6298 }, { "epoch": 1.1236285790741236, "grad_norm": 0.47424525022506714, "learning_rate": 0.00020168274715097782, "loss": 0.6754, "step": 6299 }, { "epoch": 1.1238069752921238, "grad_norm": 0.48731860518455505, "learning_rate": 0.00020161400773682387, "loss": 0.6607, "step": 6300 }, { "epoch": 1.123985371510124, "grad_norm": 0.4794895350933075, "learning_rate": 0.00020154527212288493, "loss": 0.6262, "step": 6301 }, { "epoch": 1.1241637677281242, "grad_norm": 0.4823654592037201, "learning_rate": 0.00020147654031455942, "loss": 0.6762, "step": 6302 }, { "epoch": 1.1243421639461244, "grad_norm": 0.5071163177490234, "learning_rate": 0.0002014078123172456, "loss": 0.7975, "step": 6303 }, { "epoch": 1.1245205601641246, "grad_norm": 0.4861220419406891, "learning_rate": 0.0002013390881363413, "loss": 0.6742, "step": 6304 }, { "epoch": 1.1246989563821246, "grad_norm": 0.5352680087089539, "learning_rate": 0.00020127036777724407, "loss": 0.7588, "step": 6305 }, { "epoch": 1.1248773526001248, "grad_norm": 0.4422907829284668, "learning_rate": 0.00020120165124535119, "loss": 0.6386, "step": 6306 }, { "epoch": 1.125055748818125, "grad_norm": 0.4726184606552124, "learning_rate": 0.00020113293854605963, "loss": 0.6324, "step": 6307 }, { "epoch": 1.1252341450361252, "grad_norm": 0.5098016858100891, "learning_rate": 0.00020106422968476604, "loss": 0.7632, "step": 6308 }, { "epoch": 1.1254125412541254, "grad_norm": 0.4972569942474365, "learning_rate": 0.00020099552466686677, "loss": 0.6985, "step": 6309 }, { "epoch": 1.1255909374721256, "grad_norm": 0.4584605097770691, "learning_rate": 0.00020092682349775797, "loss": 0.6985, "step": 6310 }, { "epoch": 1.1257693336901258, "grad_norm": 0.49379658699035645, "learning_rate": 0.0002008581261828353, "loss": 0.5424, "step": 6311 }, { "epoch": 1.125947729908126, "grad_norm": 0.5629817247390747, "learning_rate": 0.00020078943272749426, "loss": 0.6988, "step": 6312 }, { "epoch": 1.1261261261261262, "grad_norm": 0.49711212515830994, "learning_rate": 0.00020072074313712995, "loss": 0.6466, "step": 6313 }, { "epoch": 1.1263045223441264, "grad_norm": 0.5518889427185059, "learning_rate": 0.00020065205741713732, "loss": 0.7526, "step": 6314 }, { "epoch": 1.1264829185621266, "grad_norm": 0.44839757680892944, "learning_rate": 0.00020058337557291085, "loss": 0.6743, "step": 6315 }, { "epoch": 1.1266613147801268, "grad_norm": 0.4963269531726837, "learning_rate": 0.00020051469760984475, "loss": 0.7584, "step": 6316 }, { "epoch": 1.126839710998127, "grad_norm": 0.4421939551830292, "learning_rate": 0.00020044602353333304, "loss": 0.6155, "step": 6317 }, { "epoch": 1.127018107216127, "grad_norm": 0.47304272651672363, "learning_rate": 0.00020037735334876928, "loss": 0.843, "step": 6318 }, { "epoch": 1.1271965034341271, "grad_norm": 0.4691406786441803, "learning_rate": 0.00020030868706154688, "loss": 0.8837, "step": 6319 }, { "epoch": 1.1273748996521273, "grad_norm": 0.470851868391037, "learning_rate": 0.00020024002467705878, "loss": 0.7696, "step": 6320 }, { "epoch": 1.1275532958701275, "grad_norm": 0.5345527529716492, "learning_rate": 0.00020017136620069777, "loss": 0.9637, "step": 6321 }, { "epoch": 1.1277316920881277, "grad_norm": 0.47198501229286194, "learning_rate": 0.00020010271163785622, "loss": 0.705, "step": 6322 }, { "epoch": 1.127910088306128, "grad_norm": 0.5204360485076904, "learning_rate": 0.00020003406099392625, "loss": 0.6943, "step": 6323 }, { "epoch": 1.1280884845241281, "grad_norm": 0.49591392278671265, "learning_rate": 0.00019996541427429957, "loss": 0.6205, "step": 6324 }, { "epoch": 1.1282668807421283, "grad_norm": 0.45050108432769775, "learning_rate": 0.0001998967714843677, "loss": 0.5172, "step": 6325 }, { "epoch": 1.1284452769601285, "grad_norm": 0.45870310068130493, "learning_rate": 0.0001998281326295219, "loss": 0.6089, "step": 6326 }, { "epoch": 1.1286236731781285, "grad_norm": 0.4750981032848358, "learning_rate": 0.00019975949771515296, "loss": 0.6546, "step": 6327 }, { "epoch": 1.1288020693961287, "grad_norm": 1.4412025213241577, "learning_rate": 0.0001996908667466514, "loss": 0.7441, "step": 6328 }, { "epoch": 1.128980465614129, "grad_norm": 0.49182528257369995, "learning_rate": 0.00019962223972940757, "loss": 0.6518, "step": 6329 }, { "epoch": 1.129158861832129, "grad_norm": 0.5143123269081116, "learning_rate": 0.00019955361666881133, "loss": 0.6357, "step": 6330 }, { "epoch": 1.1293372580501293, "grad_norm": 1.9303398132324219, "learning_rate": 0.00019948499757025239, "loss": 0.5802, "step": 6331 }, { "epoch": 1.1295156542681295, "grad_norm": 0.5342963337898254, "learning_rate": 0.00019941638243911993, "loss": 0.6309, "step": 6332 }, { "epoch": 1.1296940504861297, "grad_norm": 0.4394116997718811, "learning_rate": 0.00019934777128080292, "loss": 0.6286, "step": 6333 }, { "epoch": 1.12987244670413, "grad_norm": 0.5031057596206665, "learning_rate": 0.00019927916410069027, "loss": 0.6877, "step": 6334 }, { "epoch": 1.13005084292213, "grad_norm": 0.4916764795780182, "learning_rate": 0.00019921056090417026, "loss": 0.7054, "step": 6335 }, { "epoch": 1.1302292391401303, "grad_norm": 0.535476803779602, "learning_rate": 0.00019914196169663095, "loss": 0.7493, "step": 6336 }, { "epoch": 1.1304076353581305, "grad_norm": 0.5123772025108337, "learning_rate": 0.00019907336648346008, "loss": 0.6594, "step": 6337 }, { "epoch": 1.1305860315761307, "grad_norm": 0.5296303629875183, "learning_rate": 0.00019900477527004507, "loss": 0.7239, "step": 6338 }, { "epoch": 1.1307644277941309, "grad_norm": 0.5358021259307861, "learning_rate": 0.00019893618806177306, "loss": 0.9083, "step": 6339 }, { "epoch": 1.1309428240121309, "grad_norm": 0.47676989436149597, "learning_rate": 0.00019886760486403088, "loss": 0.5314, "step": 6340 }, { "epoch": 1.131121220230131, "grad_norm": 0.5259501934051514, "learning_rate": 0.00019879902568220497, "loss": 0.7819, "step": 6341 }, { "epoch": 1.1312996164481313, "grad_norm": 0.4975660741329193, "learning_rate": 0.00019873045052168158, "loss": 0.6725, "step": 6342 }, { "epoch": 1.1314780126661315, "grad_norm": 0.5157343149185181, "learning_rate": 0.00019866187938784657, "loss": 0.6001, "step": 6343 }, { "epoch": 1.1316564088841317, "grad_norm": 0.43325984477996826, "learning_rate": 0.00019859331228608547, "loss": 0.6975, "step": 6344 }, { "epoch": 1.1318348051021319, "grad_norm": 0.49547433853149414, "learning_rate": 0.0001985247492217835, "loss": 0.6571, "step": 6345 }, { "epoch": 1.132013201320132, "grad_norm": 0.49157583713531494, "learning_rate": 0.00019845619020032553, "loss": 0.7212, "step": 6346 }, { "epoch": 1.1321915975381323, "grad_norm": 0.48966532945632935, "learning_rate": 0.0001983876352270962, "loss": 0.5914, "step": 6347 }, { "epoch": 1.1323699937561325, "grad_norm": 0.5003472566604614, "learning_rate": 0.0001983190843074797, "loss": 0.5599, "step": 6348 }, { "epoch": 1.1325483899741324, "grad_norm": 0.48509982228279114, "learning_rate": 0.00019825053744686023, "loss": 0.6925, "step": 6349 }, { "epoch": 1.1327267861921326, "grad_norm": 0.4695090651512146, "learning_rate": 0.00019818199465062122, "loss": 0.7031, "step": 6350 }, { "epoch": 1.1329051824101328, "grad_norm": 0.494907408952713, "learning_rate": 0.00019811345592414607, "loss": 0.6428, "step": 6351 }, { "epoch": 1.133083578628133, "grad_norm": 0.5415506958961487, "learning_rate": 0.00019804492127281772, "loss": 0.9687, "step": 6352 }, { "epoch": 1.1332619748461332, "grad_norm": 0.45336905121803284, "learning_rate": 0.00019797639070201896, "loss": 0.6972, "step": 6353 }, { "epoch": 1.1334403710641334, "grad_norm": 0.4650866687297821, "learning_rate": 0.00019790786421713204, "loss": 0.6297, "step": 6354 }, { "epoch": 1.1336187672821336, "grad_norm": 0.4537782669067383, "learning_rate": 0.00019783934182353904, "loss": 0.5633, "step": 6355 }, { "epoch": 1.1337971635001338, "grad_norm": 0.45536094903945923, "learning_rate": 0.00019777082352662173, "loss": 0.6543, "step": 6356 }, { "epoch": 1.133975559718134, "grad_norm": 0.4556442201137543, "learning_rate": 0.00019770230933176147, "loss": 0.6048, "step": 6357 }, { "epoch": 1.1341539559361342, "grad_norm": 0.44016602635383606, "learning_rate": 0.00019763379924433934, "loss": 0.6132, "step": 6358 }, { "epoch": 1.1343323521541344, "grad_norm": 0.5556267499923706, "learning_rate": 0.00019756529326973602, "loss": 0.8597, "step": 6359 }, { "epoch": 1.1345107483721346, "grad_norm": 0.5245857834815979, "learning_rate": 0.00019749679141333205, "loss": 0.6483, "step": 6360 }, { "epoch": 1.1346891445901348, "grad_norm": 0.4881051182746887, "learning_rate": 0.00019742829368050744, "loss": 0.6523, "step": 6361 }, { "epoch": 1.1348675408081348, "grad_norm": 0.4933343231678009, "learning_rate": 0.00019735980007664207, "loss": 0.6629, "step": 6362 }, { "epoch": 1.135045937026135, "grad_norm": 0.4620760977268219, "learning_rate": 0.0001972913106071153, "loss": 0.6428, "step": 6363 }, { "epoch": 1.1352243332441352, "grad_norm": 0.5003635883331299, "learning_rate": 0.00019722282527730628, "loss": 0.6564, "step": 6364 }, { "epoch": 1.1354027294621354, "grad_norm": 0.41504591703414917, "learning_rate": 0.0001971543440925939, "loss": 0.441, "step": 6365 }, { "epoch": 1.1355811256801356, "grad_norm": 0.49655354022979736, "learning_rate": 0.0001970858670583566, "loss": 0.6645, "step": 6366 }, { "epoch": 1.1357595218981358, "grad_norm": 0.4752812087535858, "learning_rate": 0.00019701739417997256, "loss": 0.6179, "step": 6367 }, { "epoch": 1.135937918116136, "grad_norm": 0.46678200364112854, "learning_rate": 0.00019694892546281954, "loss": 0.5577, "step": 6368 }, { "epoch": 1.1361163143341362, "grad_norm": 0.5003397464752197, "learning_rate": 0.0001968804609122751, "loss": 0.6509, "step": 6369 }, { "epoch": 1.1362947105521364, "grad_norm": 0.5790354013442993, "learning_rate": 0.00019681200053371645, "loss": 0.6953, "step": 6370 }, { "epoch": 1.1364731067701364, "grad_norm": 0.5248673558235168, "learning_rate": 0.00019674354433252034, "loss": 0.6928, "step": 6371 }, { "epoch": 1.1366515029881366, "grad_norm": 0.5383015871047974, "learning_rate": 0.00019667509231406332, "loss": 0.6522, "step": 6372 }, { "epoch": 1.1368298992061368, "grad_norm": 0.5394713282585144, "learning_rate": 0.00019660664448372162, "loss": 0.6593, "step": 6373 }, { "epoch": 1.137008295424137, "grad_norm": 0.5048239827156067, "learning_rate": 0.00019653820084687107, "loss": 0.8368, "step": 6374 }, { "epoch": 1.1371866916421371, "grad_norm": 0.4014268219470978, "learning_rate": 0.00019646976140888725, "loss": 0.5085, "step": 6375 }, { "epoch": 1.1373650878601373, "grad_norm": 0.4501616954803467, "learning_rate": 0.00019640132617514534, "loss": 0.5781, "step": 6376 }, { "epoch": 1.1375434840781375, "grad_norm": 0.5049312114715576, "learning_rate": 0.00019633289515102017, "loss": 0.8279, "step": 6377 }, { "epoch": 1.1377218802961377, "grad_norm": 0.4441901743412018, "learning_rate": 0.00019626446834188638, "loss": 0.58, "step": 6378 }, { "epoch": 1.137900276514138, "grad_norm": 0.4739856719970703, "learning_rate": 0.00019619604575311797, "loss": 0.755, "step": 6379 }, { "epoch": 1.1380786727321381, "grad_norm": 0.4583461880683899, "learning_rate": 0.0001961276273900891, "loss": 0.5062, "step": 6380 }, { "epoch": 1.1382570689501383, "grad_norm": 0.4415271282196045, "learning_rate": 0.00019605921325817317, "loss": 0.5786, "step": 6381 }, { "epoch": 1.1384354651681385, "grad_norm": 0.5233315229415894, "learning_rate": 0.00019599080336274343, "loss": 0.8495, "step": 6382 }, { "epoch": 1.1386138613861387, "grad_norm": 0.5158662796020508, "learning_rate": 0.00019592239770917276, "loss": 0.7314, "step": 6383 }, { "epoch": 1.1387922576041387, "grad_norm": 0.6792814135551453, "learning_rate": 0.00019585399630283367, "loss": 0.7851, "step": 6384 }, { "epoch": 1.138970653822139, "grad_norm": 0.44133511185646057, "learning_rate": 0.00019578559914909844, "loss": 0.6067, "step": 6385 }, { "epoch": 1.139149050040139, "grad_norm": 0.4633074700832367, "learning_rate": 0.00019571720625333888, "loss": 0.6206, "step": 6386 }, { "epoch": 1.1393274462581393, "grad_norm": 0.7552552223205566, "learning_rate": 0.00019564881762092662, "loss": 0.659, "step": 6387 }, { "epoch": 1.1395058424761395, "grad_norm": 0.5166911482810974, "learning_rate": 0.00019558043325723282, "loss": 0.6211, "step": 6388 }, { "epoch": 1.1396842386941397, "grad_norm": 0.5080016851425171, "learning_rate": 0.00019551205316762838, "loss": 0.8162, "step": 6389 }, { "epoch": 1.13986263491214, "grad_norm": 0.5829038619995117, "learning_rate": 0.00019544367735748388, "loss": 0.9879, "step": 6390 }, { "epoch": 1.14004103113014, "grad_norm": 0.49968674778938293, "learning_rate": 0.00019537530583216945, "loss": 0.6904, "step": 6391 }, { "epoch": 1.1402194273481403, "grad_norm": 0.469959557056427, "learning_rate": 0.00019530693859705497, "loss": 0.748, "step": 6392 }, { "epoch": 1.1403978235661403, "grad_norm": 0.5133938193321228, "learning_rate": 0.00019523857565751003, "loss": 0.7729, "step": 6393 }, { "epoch": 1.1405762197841405, "grad_norm": 0.543165385723114, "learning_rate": 0.00019517021701890365, "loss": 0.7534, "step": 6394 }, { "epoch": 1.1407546160021407, "grad_norm": 0.4762142300605774, "learning_rate": 0.00019510186268660497, "loss": 0.4908, "step": 6395 }, { "epoch": 1.1409330122201409, "grad_norm": 0.5711979866027832, "learning_rate": 0.00019503351266598234, "loss": 0.7407, "step": 6396 }, { "epoch": 1.141111408438141, "grad_norm": 0.4412236511707306, "learning_rate": 0.00019496516696240399, "loss": 0.6312, "step": 6397 }, { "epoch": 1.1412898046561413, "grad_norm": 0.5444515347480774, "learning_rate": 0.00019489682558123772, "loss": 0.6745, "step": 6398 }, { "epoch": 1.1414682008741415, "grad_norm": 0.4231199026107788, "learning_rate": 0.00019482848852785107, "loss": 0.4643, "step": 6399 }, { "epoch": 1.1416465970921417, "grad_norm": 0.49194931983947754, "learning_rate": 0.00019476015580761118, "loss": 0.6983, "step": 6400 }, { "epoch": 1.1418249933101419, "grad_norm": 0.5072386860847473, "learning_rate": 0.0001946918274258849, "loss": 0.7764, "step": 6401 }, { "epoch": 1.142003389528142, "grad_norm": 0.4746543765068054, "learning_rate": 0.0001946235033880387, "loss": 0.7924, "step": 6402 }, { "epoch": 1.1421817857461423, "grad_norm": 0.4350649416446686, "learning_rate": 0.00019455518369943873, "loss": 0.4785, "step": 6403 }, { "epoch": 1.1423601819641425, "grad_norm": 0.5181750655174255, "learning_rate": 0.00019448686836545073, "loss": 0.8109, "step": 6404 }, { "epoch": 1.1425385781821427, "grad_norm": 0.6082465052604675, "learning_rate": 0.0001944185573914402, "loss": 0.6327, "step": 6405 }, { "epoch": 1.1427169744001426, "grad_norm": 0.5113035440444946, "learning_rate": 0.00019435025078277227, "loss": 0.7438, "step": 6406 }, { "epoch": 1.1428953706181428, "grad_norm": 0.4369472861289978, "learning_rate": 0.00019428194854481169, "loss": 0.5909, "step": 6407 }, { "epoch": 1.143073766836143, "grad_norm": 0.4993836283683777, "learning_rate": 0.00019421365068292287, "loss": 0.6107, "step": 6408 }, { "epoch": 1.1432521630541432, "grad_norm": 0.41479700803756714, "learning_rate": 0.0001941453572024699, "loss": 0.496, "step": 6409 }, { "epoch": 1.1434305592721434, "grad_norm": 0.5059956312179565, "learning_rate": 0.00019407706810881657, "loss": 0.7022, "step": 6410 }, { "epoch": 1.1436089554901436, "grad_norm": 0.46626099944114685, "learning_rate": 0.00019400878340732625, "loss": 0.5752, "step": 6411 }, { "epoch": 1.1437873517081438, "grad_norm": 0.44759035110473633, "learning_rate": 0.00019394050310336198, "loss": 0.5391, "step": 6412 }, { "epoch": 1.143965747926144, "grad_norm": 0.4467226564884186, "learning_rate": 0.0001938722272022865, "loss": 0.4655, "step": 6413 }, { "epoch": 1.1441441441441442, "grad_norm": 0.49334871768951416, "learning_rate": 0.0001938039557094621, "loss": 0.6975, "step": 6414 }, { "epoch": 1.1443225403621442, "grad_norm": 0.5575069785118103, "learning_rate": 0.00019373568863025086, "loss": 0.7331, "step": 6415 }, { "epoch": 1.1445009365801444, "grad_norm": 0.5732327699661255, "learning_rate": 0.00019366742597001446, "loss": 0.9117, "step": 6416 }, { "epoch": 1.1446793327981446, "grad_norm": 0.5360227227210999, "learning_rate": 0.00019359916773411414, "loss": 0.8448, "step": 6417 }, { "epoch": 1.1448577290161448, "grad_norm": 0.4648105204105377, "learning_rate": 0.00019353091392791094, "loss": 0.5582, "step": 6418 }, { "epoch": 1.145036125234145, "grad_norm": 0.5667071342468262, "learning_rate": 0.00019346266455676542, "loss": 0.7995, "step": 6419 }, { "epoch": 1.1452145214521452, "grad_norm": 0.4563814699649811, "learning_rate": 0.00019339441962603794, "loss": 0.7236, "step": 6420 }, { "epoch": 1.1453929176701454, "grad_norm": 0.4414633512496948, "learning_rate": 0.00019332617914108834, "loss": 0.5218, "step": 6421 }, { "epoch": 1.1455713138881456, "grad_norm": 0.46729329228401184, "learning_rate": 0.00019325794310727626, "loss": 0.7908, "step": 6422 }, { "epoch": 1.1457497101061458, "grad_norm": 0.4846903383731842, "learning_rate": 0.0001931897115299609, "loss": 0.6743, "step": 6423 }, { "epoch": 1.145928106324146, "grad_norm": 0.4348449409008026, "learning_rate": 0.00019312148441450122, "loss": 0.5317, "step": 6424 }, { "epoch": 1.1461065025421462, "grad_norm": 0.4379520118236542, "learning_rate": 0.0001930532617662555, "loss": 0.6015, "step": 6425 }, { "epoch": 1.1462848987601464, "grad_norm": 0.42976364493370056, "learning_rate": 0.00019298504359058222, "loss": 0.4379, "step": 6426 }, { "epoch": 1.1464632949781466, "grad_norm": 0.48329517245292664, "learning_rate": 0.00019291682989283908, "loss": 0.6796, "step": 6427 }, { "epoch": 1.1466416911961466, "grad_norm": 0.4555102288722992, "learning_rate": 0.00019284862067838352, "loss": 0.5488, "step": 6428 }, { "epoch": 1.1468200874141468, "grad_norm": 0.49463093280792236, "learning_rate": 0.0001927804159525728, "loss": 0.7097, "step": 6429 }, { "epoch": 1.146998483632147, "grad_norm": 0.5678471922874451, "learning_rate": 0.0001927122157207635, "loss": 0.9311, "step": 6430 }, { "epoch": 1.1471768798501472, "grad_norm": 0.5055651068687439, "learning_rate": 0.00019264401998831212, "loss": 0.737, "step": 6431 }, { "epoch": 1.1473552760681474, "grad_norm": 0.5009707808494568, "learning_rate": 0.00019257582876057474, "loss": 0.6349, "step": 6432 }, { "epoch": 1.1475336722861476, "grad_norm": 0.4183083176612854, "learning_rate": 0.00019250764204290709, "loss": 0.5802, "step": 6433 }, { "epoch": 1.1477120685041478, "grad_norm": 0.44495829939842224, "learning_rate": 0.00019243945984066444, "loss": 0.5597, "step": 6434 }, { "epoch": 1.147890464722148, "grad_norm": 0.5188366770744324, "learning_rate": 0.00019237128215920187, "loss": 0.7409, "step": 6435 }, { "epoch": 1.1480688609401482, "grad_norm": 0.49368321895599365, "learning_rate": 0.000192303109003874, "loss": 0.6452, "step": 6436 }, { "epoch": 1.1482472571581481, "grad_norm": 0.41561809182167053, "learning_rate": 0.00019223494038003516, "loss": 0.6164, "step": 6437 }, { "epoch": 1.1484256533761483, "grad_norm": 0.49593281745910645, "learning_rate": 0.00019216677629303923, "loss": 0.7516, "step": 6438 }, { "epoch": 1.1486040495941485, "grad_norm": 0.5000485181808472, "learning_rate": 0.00019209861674823975, "loss": 0.6881, "step": 6439 }, { "epoch": 1.1487824458121487, "grad_norm": 0.41080528497695923, "learning_rate": 0.00019203046175098992, "loss": 0.5175, "step": 6440 }, { "epoch": 1.148960842030149, "grad_norm": 0.46818774938583374, "learning_rate": 0.00019196231130664282, "loss": 0.7355, "step": 6441 }, { "epoch": 1.1491392382481491, "grad_norm": 0.4806887209415436, "learning_rate": 0.00019189416542055078, "loss": 0.6491, "step": 6442 }, { "epoch": 1.1493176344661493, "grad_norm": 0.46855100989341736, "learning_rate": 0.00019182602409806597, "loss": 0.7616, "step": 6443 }, { "epoch": 1.1494960306841495, "grad_norm": 0.49849364161491394, "learning_rate": 0.00019175788734454019, "loss": 0.722, "step": 6444 }, { "epoch": 1.1496744269021497, "grad_norm": 0.48306459188461304, "learning_rate": 0.0001916897551653249, "loss": 0.6938, "step": 6445 }, { "epoch": 1.14985282312015, "grad_norm": 0.4553467035293579, "learning_rate": 0.0001916216275657711, "loss": 0.5726, "step": 6446 }, { "epoch": 1.1500312193381501, "grad_norm": 0.4541803300380707, "learning_rate": 0.0001915535045512296, "loss": 0.5465, "step": 6447 }, { "epoch": 1.1502096155561503, "grad_norm": 0.5156320333480835, "learning_rate": 0.00019148538612705066, "loss": 0.6868, "step": 6448 }, { "epoch": 1.1503880117741505, "grad_norm": 0.4388979375362396, "learning_rate": 0.00019141727229858433, "loss": 0.5943, "step": 6449 }, { "epoch": 1.1505664079921505, "grad_norm": 0.4948444366455078, "learning_rate": 0.00019134916307118028, "loss": 0.6421, "step": 6450 }, { "epoch": 1.1507448042101507, "grad_norm": 0.46321436762809753, "learning_rate": 0.00019128105845018766, "loss": 0.5047, "step": 6451 }, { "epoch": 1.1509232004281509, "grad_norm": 0.46855947375297546, "learning_rate": 0.00019121295844095544, "loss": 0.6025, "step": 6452 }, { "epoch": 1.151101596646151, "grad_norm": 0.45880499482154846, "learning_rate": 0.00019114486304883216, "loss": 0.7395, "step": 6453 }, { "epoch": 1.1512799928641513, "grad_norm": 0.4672548472881317, "learning_rate": 0.00019107677227916603, "loss": 0.7361, "step": 6454 }, { "epoch": 1.1514583890821515, "grad_norm": 0.4436180591583252, "learning_rate": 0.0001910086861373048, "loss": 0.5226, "step": 6455 }, { "epoch": 1.1516367853001517, "grad_norm": 0.47873300313949585, "learning_rate": 0.000190940604628596, "loss": 0.7932, "step": 6456 }, { "epoch": 1.1518151815181519, "grad_norm": 0.5310173034667969, "learning_rate": 0.00019087252775838671, "loss": 0.8148, "step": 6457 }, { "epoch": 1.151993577736152, "grad_norm": 0.4951856732368469, "learning_rate": 0.00019080445553202363, "loss": 0.7325, "step": 6458 }, { "epoch": 1.152171973954152, "grad_norm": 0.5043248534202576, "learning_rate": 0.0001907363879548532, "loss": 0.6899, "step": 6459 }, { "epoch": 1.1523503701721522, "grad_norm": 0.5174239873886108, "learning_rate": 0.00019066832503222128, "loss": 0.6925, "step": 6460 }, { "epoch": 1.1525287663901524, "grad_norm": 0.47568047046661377, "learning_rate": 0.00019060026676947362, "loss": 0.7057, "step": 6461 }, { "epoch": 1.1527071626081526, "grad_norm": 0.4853077828884125, "learning_rate": 0.0001905322131719555, "loss": 0.7131, "step": 6462 }, { "epoch": 1.1528855588261528, "grad_norm": 0.4645387828350067, "learning_rate": 0.0001904641642450117, "loss": 0.6627, "step": 6463 }, { "epoch": 1.153063955044153, "grad_norm": 0.4763006269931793, "learning_rate": 0.00019039611999398682, "loss": 0.6816, "step": 6464 }, { "epoch": 1.1532423512621532, "grad_norm": 0.5304715633392334, "learning_rate": 0.00019032808042422503, "loss": 0.7631, "step": 6465 }, { "epoch": 1.1534207474801534, "grad_norm": 0.4912330210208893, "learning_rate": 0.0001902600455410701, "loss": 0.6471, "step": 6466 }, { "epoch": 1.1535991436981536, "grad_norm": 0.4972264766693115, "learning_rate": 0.00019019201534986553, "loss": 0.7728, "step": 6467 }, { "epoch": 1.1537775399161538, "grad_norm": 2.746565341949463, "learning_rate": 0.0001901239898559543, "loss": 0.6513, "step": 6468 }, { "epoch": 1.153955936134154, "grad_norm": 0.5050514340400696, "learning_rate": 0.00019005596906467918, "loss": 0.8309, "step": 6469 }, { "epoch": 1.1541343323521542, "grad_norm": 0.5243608951568604, "learning_rate": 0.0001899879529813825, "loss": 0.8084, "step": 6470 }, { "epoch": 1.1543127285701544, "grad_norm": 0.4700818955898285, "learning_rate": 0.00018991994161140596, "loss": 0.5866, "step": 6471 }, { "epoch": 1.1544911247881544, "grad_norm": 0.46044886112213135, "learning_rate": 0.00018985193496009152, "loss": 0.6026, "step": 6472 }, { "epoch": 1.1546695210061546, "grad_norm": 0.47794002294540405, "learning_rate": 0.0001897839330327802, "loss": 0.593, "step": 6473 }, { "epoch": 1.1548479172241548, "grad_norm": 0.49822142720222473, "learning_rate": 0.00018971593583481282, "loss": 0.8087, "step": 6474 }, { "epoch": 1.155026313442155, "grad_norm": 0.49274197220802307, "learning_rate": 0.00018964794337153002, "loss": 0.6831, "step": 6475 }, { "epoch": 1.1552047096601552, "grad_norm": 0.49197664856910706, "learning_rate": 0.0001895799556482717, "loss": 0.5862, "step": 6476 }, { "epoch": 1.1553831058781554, "grad_norm": 0.49241068959236145, "learning_rate": 0.00018951197267037765, "loss": 0.6444, "step": 6477 }, { "epoch": 1.1555615020961556, "grad_norm": 0.5059458017349243, "learning_rate": 0.00018944399444318724, "loss": 0.7429, "step": 6478 }, { "epoch": 1.1557398983141558, "grad_norm": 0.4923364222049713, "learning_rate": 0.00018937602097203942, "loss": 0.6462, "step": 6479 }, { "epoch": 1.155918294532156, "grad_norm": 0.5192193388938904, "learning_rate": 0.0001893080522622729, "loss": 0.7404, "step": 6480 }, { "epoch": 1.156096690750156, "grad_norm": 0.5662673711776733, "learning_rate": 0.0001892400883192258, "loss": 0.7242, "step": 6481 }, { "epoch": 1.1562750869681562, "grad_norm": 0.4407677948474884, "learning_rate": 0.00018917212914823598, "loss": 0.5281, "step": 6482 }, { "epoch": 1.1564534831861564, "grad_norm": 0.4470885694026947, "learning_rate": 0.00018910417475464104, "loss": 0.6045, "step": 6483 }, { "epoch": 1.1566318794041566, "grad_norm": 0.477516233921051, "learning_rate": 0.00018903622514377798, "loss": 0.6069, "step": 6484 }, { "epoch": 1.1568102756221568, "grad_norm": 0.5461446046829224, "learning_rate": 0.00018896828032098352, "loss": 0.7073, "step": 6485 }, { "epoch": 1.156988671840157, "grad_norm": 0.5411002039909363, "learning_rate": 0.00018890034029159399, "loss": 0.8649, "step": 6486 }, { "epoch": 1.1571670680581572, "grad_norm": 0.5572760701179504, "learning_rate": 0.0001888324050609455, "loss": 0.9441, "step": 6487 }, { "epoch": 1.1573454642761574, "grad_norm": 0.44463208317756653, "learning_rate": 0.00018876447463437367, "loss": 0.5227, "step": 6488 }, { "epoch": 1.1575238604941576, "grad_norm": 0.47355902194976807, "learning_rate": 0.0001886965490172136, "loss": 0.4937, "step": 6489 }, { "epoch": 1.1577022567121578, "grad_norm": 0.44894346594810486, "learning_rate": 0.00018862862821480023, "loss": 0.5107, "step": 6490 }, { "epoch": 1.157880652930158, "grad_norm": 0.5426954627037048, "learning_rate": 0.00018856071223246796, "loss": 0.6947, "step": 6491 }, { "epoch": 1.1580590491481582, "grad_norm": 0.44600164890289307, "learning_rate": 0.0001884928010755509, "loss": 0.6412, "step": 6492 }, { "epoch": 1.1582374453661584, "grad_norm": 0.4314921498298645, "learning_rate": 0.0001884248947493828, "loss": 0.5661, "step": 6493 }, { "epoch": 1.1584158415841583, "grad_norm": 0.44217392802238464, "learning_rate": 0.00018835699325929692, "loss": 0.5272, "step": 6494 }, { "epoch": 1.1585942378021585, "grad_norm": 0.42524126172065735, "learning_rate": 0.0001882890966106263, "loss": 0.5549, "step": 6495 }, { "epoch": 1.1587726340201587, "grad_norm": 0.4192121922969818, "learning_rate": 0.00018822120480870352, "loss": 0.4929, "step": 6496 }, { "epoch": 1.158951030238159, "grad_norm": 0.4756525456905365, "learning_rate": 0.00018815331785886066, "loss": 0.6762, "step": 6497 }, { "epoch": 1.1591294264561591, "grad_norm": 0.5104422569274902, "learning_rate": 0.00018808543576642964, "loss": 0.7025, "step": 6498 }, { "epoch": 1.1593078226741593, "grad_norm": 0.46176689863204956, "learning_rate": 0.00018801755853674183, "loss": 0.5673, "step": 6499 }, { "epoch": 1.1594862188921595, "grad_norm": 0.49885469675064087, "learning_rate": 0.00018794968617512827, "loss": 0.6967, "step": 6500 }, { "epoch": 1.1596646151101597, "grad_norm": 0.4945884943008423, "learning_rate": 0.00018788181868691965, "loss": 0.7192, "step": 6501 }, { "epoch": 1.15984301132816, "grad_norm": 0.4873616099357605, "learning_rate": 0.00018781395607744627, "loss": 0.6867, "step": 6502 }, { "epoch": 1.16002140754616, "grad_norm": 0.393449068069458, "learning_rate": 0.00018774609835203808, "loss": 0.4874, "step": 6503 }, { "epoch": 1.16019980376416, "grad_norm": 0.42069491744041443, "learning_rate": 0.00018767824551602446, "loss": 0.493, "step": 6504 }, { "epoch": 1.1603781999821603, "grad_norm": 0.44530075788497925, "learning_rate": 0.00018761039757473466, "loss": 0.4976, "step": 6505 }, { "epoch": 1.1605565962001605, "grad_norm": 0.4841354191303253, "learning_rate": 0.0001875425545334974, "loss": 0.7774, "step": 6506 }, { "epoch": 1.1607349924181607, "grad_norm": 0.44460541009902954, "learning_rate": 0.00018747471639764103, "loss": 0.6535, "step": 6507 }, { "epoch": 1.160913388636161, "grad_norm": 0.5261197686195374, "learning_rate": 0.00018740688317249349, "loss": 0.7753, "step": 6508 }, { "epoch": 1.161091784854161, "grad_norm": 0.4835450351238251, "learning_rate": 0.0001873390548633825, "loss": 0.9085, "step": 6509 }, { "epoch": 1.1612701810721613, "grad_norm": 0.4938504993915558, "learning_rate": 0.00018727123147563508, "loss": 0.7482, "step": 6510 }, { "epoch": 1.1614485772901615, "grad_norm": 0.4450148642063141, "learning_rate": 0.00018720341301457815, "loss": 0.5129, "step": 6511 }, { "epoch": 1.1616269735081617, "grad_norm": 0.49409425258636475, "learning_rate": 0.00018713559948553815, "loss": 0.6571, "step": 6512 }, { "epoch": 1.1618053697261619, "grad_norm": 0.42532381415367126, "learning_rate": 0.00018706779089384113, "loss": 0.508, "step": 6513 }, { "epoch": 1.161983765944162, "grad_norm": 0.5003641843795776, "learning_rate": 0.0001869999872448127, "loss": 0.6938, "step": 6514 }, { "epoch": 1.1621621621621623, "grad_norm": 0.4792661666870117, "learning_rate": 0.0001869321885437782, "loss": 0.6612, "step": 6515 }, { "epoch": 1.1623405583801623, "grad_norm": 0.44840648770332336, "learning_rate": 0.00018686439479606245, "loss": 0.609, "step": 6516 }, { "epoch": 1.1625189545981625, "grad_norm": 0.48601728677749634, "learning_rate": 0.00018679660600698996, "loss": 0.747, "step": 6517 }, { "epoch": 1.1626973508161627, "grad_norm": 0.5069820284843445, "learning_rate": 0.0001867288221818848, "loss": 0.5674, "step": 6518 }, { "epoch": 1.1628757470341629, "grad_norm": 0.4526546001434326, "learning_rate": 0.00018666104332607075, "loss": 0.5845, "step": 6519 }, { "epoch": 1.163054143252163, "grad_norm": 0.48952043056488037, "learning_rate": 0.00018659326944487115, "loss": 0.7082, "step": 6520 }, { "epoch": 1.1632325394701633, "grad_norm": 0.5443047881126404, "learning_rate": 0.0001865255005436089, "loss": 0.718, "step": 6521 }, { "epoch": 1.1634109356881635, "grad_norm": 0.5090129375457764, "learning_rate": 0.00018645773662760647, "loss": 0.9893, "step": 6522 }, { "epoch": 1.1635893319061636, "grad_norm": 0.5263794660568237, "learning_rate": 0.00018638997770218602, "loss": 0.6613, "step": 6523 }, { "epoch": 1.1637677281241638, "grad_norm": 0.6042819619178772, "learning_rate": 0.0001863222237726694, "loss": 0.7601, "step": 6524 }, { "epoch": 1.1639461243421638, "grad_norm": 0.5052105784416199, "learning_rate": 0.0001862544748443779, "loss": 0.7175, "step": 6525 }, { "epoch": 1.164124520560164, "grad_norm": 0.4983629882335663, "learning_rate": 0.00018618673092263253, "loss": 0.682, "step": 6526 }, { "epoch": 1.1643029167781642, "grad_norm": 0.4915952682495117, "learning_rate": 0.00018611899201275385, "loss": 0.6723, "step": 6527 }, { "epoch": 1.1644813129961644, "grad_norm": 0.5149100422859192, "learning_rate": 0.000186051258120062, "loss": 0.6966, "step": 6528 }, { "epoch": 1.1646597092141646, "grad_norm": 0.5236539244651794, "learning_rate": 0.00018598352924987689, "loss": 0.8879, "step": 6529 }, { "epoch": 1.1648381054321648, "grad_norm": 0.4917599856853485, "learning_rate": 0.00018591580540751784, "loss": 0.7433, "step": 6530 }, { "epoch": 1.165016501650165, "grad_norm": 0.47887739539146423, "learning_rate": 0.00018584808659830385, "loss": 0.7286, "step": 6531 }, { "epoch": 1.1651948978681652, "grad_norm": 0.4636463224887848, "learning_rate": 0.0001857803728275534, "loss": 0.6089, "step": 6532 }, { "epoch": 1.1653732940861654, "grad_norm": 0.5316303372383118, "learning_rate": 0.00018571266410058492, "loss": 0.6789, "step": 6533 }, { "epoch": 1.1655516903041656, "grad_norm": 0.49224573373794556, "learning_rate": 0.00018564496042271624, "loss": 0.788, "step": 6534 }, { "epoch": 1.1657300865221658, "grad_norm": 0.5619474053382874, "learning_rate": 0.0001855772617992646, "loss": 0.7761, "step": 6535 }, { "epoch": 1.165908482740166, "grad_norm": 0.49133652448654175, "learning_rate": 0.00018550956823554706, "loss": 0.6949, "step": 6536 }, { "epoch": 1.1660868789581662, "grad_norm": 0.4631252884864807, "learning_rate": 0.00018544187973688032, "loss": 0.5078, "step": 6537 }, { "epoch": 1.1662652751761662, "grad_norm": 0.4481963515281677, "learning_rate": 0.00018537419630858053, "loss": 0.6133, "step": 6538 }, { "epoch": 1.1664436713941664, "grad_norm": 0.49360591173171997, "learning_rate": 0.0001853065179559636, "loss": 0.6333, "step": 6539 }, { "epoch": 1.1666220676121666, "grad_norm": 0.5313968062400818, "learning_rate": 0.00018523884468434488, "loss": 0.8331, "step": 6540 }, { "epoch": 1.1668004638301668, "grad_norm": 0.5059915781021118, "learning_rate": 0.00018517117649903943, "loss": 0.7155, "step": 6541 }, { "epoch": 1.166978860048167, "grad_norm": 0.4583684504032135, "learning_rate": 0.00018510351340536192, "loss": 0.6011, "step": 6542 }, { "epoch": 1.1671572562661672, "grad_norm": 0.4290786385536194, "learning_rate": 0.0001850358554086265, "loss": 0.5471, "step": 6543 }, { "epoch": 1.1673356524841674, "grad_norm": 0.4612555205821991, "learning_rate": 0.00018496820251414703, "loss": 0.6882, "step": 6544 }, { "epoch": 1.1675140487021676, "grad_norm": 0.3945925831794739, "learning_rate": 0.00018490055472723696, "loss": 0.51, "step": 6545 }, { "epoch": 1.1676924449201678, "grad_norm": 0.43021753430366516, "learning_rate": 0.0001848329120532093, "loss": 0.5537, "step": 6546 }, { "epoch": 1.1678708411381677, "grad_norm": 0.46711066365242004, "learning_rate": 0.00018476527449737666, "loss": 0.7032, "step": 6547 }, { "epoch": 1.168049237356168, "grad_norm": 0.4292619228363037, "learning_rate": 0.0001846976420650513, "loss": 0.569, "step": 6548 }, { "epoch": 1.1682276335741681, "grad_norm": 0.45965656638145447, "learning_rate": 0.00018463001476154507, "loss": 0.7034, "step": 6549 }, { "epoch": 1.1684060297921683, "grad_norm": 0.49549368023872375, "learning_rate": 0.00018456239259216934, "loss": 0.6292, "step": 6550 }, { "epoch": 1.1685844260101685, "grad_norm": 0.5122089385986328, "learning_rate": 0.0001844947755622351, "loss": 0.7911, "step": 6551 }, { "epoch": 1.1687628222281687, "grad_norm": 1.3904839754104614, "learning_rate": 0.00018442716367705303, "loss": 0.6504, "step": 6552 }, { "epoch": 1.168941218446169, "grad_norm": 0.5320877432823181, "learning_rate": 0.0001843595569419333, "loss": 0.6984, "step": 6553 }, { "epoch": 1.1691196146641691, "grad_norm": 0.44560760259628296, "learning_rate": 0.0001842919553621857, "loss": 0.5054, "step": 6554 }, { "epoch": 1.1692980108821693, "grad_norm": 0.4876161217689514, "learning_rate": 0.00018422435894311972, "loss": 0.6423, "step": 6555 }, { "epoch": 1.1694764071001695, "grad_norm": 0.4854297339916229, "learning_rate": 0.00018415676769004426, "loss": 0.6986, "step": 6556 }, { "epoch": 1.1696548033181697, "grad_norm": 0.4554065465927124, "learning_rate": 0.00018408918160826789, "loss": 0.5424, "step": 6557 }, { "epoch": 1.16983319953617, "grad_norm": 0.5047731399536133, "learning_rate": 0.00018402160070309884, "loss": 0.7302, "step": 6558 }, { "epoch": 1.1700115957541701, "grad_norm": 0.47815465927124023, "learning_rate": 0.0001839540249798449, "loss": 0.7022, "step": 6559 }, { "epoch": 1.17018999197217, "grad_norm": 0.6329243183135986, "learning_rate": 0.0001838864544438134, "loss": 0.5491, "step": 6560 }, { "epoch": 1.1703683881901703, "grad_norm": 0.4645484685897827, "learning_rate": 0.0001838188891003113, "loss": 0.6384, "step": 6561 }, { "epoch": 1.1705467844081705, "grad_norm": 0.49263012409210205, "learning_rate": 0.0001837513289546452, "loss": 0.7574, "step": 6562 }, { "epoch": 1.1707251806261707, "grad_norm": 0.44261929392814636, "learning_rate": 0.00018368377401212116, "loss": 0.5943, "step": 6563 }, { "epoch": 1.170903576844171, "grad_norm": 0.4851824641227722, "learning_rate": 0.000183616224278045, "loss": 0.5772, "step": 6564 }, { "epoch": 1.171081973062171, "grad_norm": 0.4466243088245392, "learning_rate": 0.00018354867975772205, "loss": 0.5803, "step": 6565 }, { "epoch": 1.1712603692801713, "grad_norm": 0.5128055810928345, "learning_rate": 0.00018348114045645713, "loss": 0.7322, "step": 6566 }, { "epoch": 1.1714387654981715, "grad_norm": 0.5170561075210571, "learning_rate": 0.00018341360637955489, "loss": 0.7447, "step": 6567 }, { "epoch": 1.1716171617161717, "grad_norm": 0.4576283097267151, "learning_rate": 0.00018334607753231935, "loss": 0.6132, "step": 6568 }, { "epoch": 1.1717955579341717, "grad_norm": 0.44425490498542786, "learning_rate": 0.00018327855392005418, "loss": 0.5558, "step": 6569 }, { "epoch": 1.1719739541521719, "grad_norm": 0.5051802396774292, "learning_rate": 0.00018321103554806267, "loss": 0.6139, "step": 6570 }, { "epoch": 1.172152350370172, "grad_norm": 0.4622550904750824, "learning_rate": 0.00018314352242164767, "loss": 0.6846, "step": 6571 }, { "epoch": 1.1723307465881723, "grad_norm": 0.47944626212120056, "learning_rate": 0.00018307601454611166, "loss": 0.6305, "step": 6572 }, { "epoch": 1.1725091428061725, "grad_norm": 0.4514850080013275, "learning_rate": 0.00018300851192675665, "loss": 0.6158, "step": 6573 }, { "epoch": 1.1726875390241727, "grad_norm": 0.4573253095149994, "learning_rate": 0.00018294101456888432, "loss": 0.6954, "step": 6574 }, { "epoch": 1.1728659352421729, "grad_norm": 0.428072065114975, "learning_rate": 0.00018287352247779582, "loss": 0.4599, "step": 6575 }, { "epoch": 1.173044331460173, "grad_norm": 0.4548456072807312, "learning_rate": 0.00018280603565879207, "loss": 0.6041, "step": 6576 }, { "epoch": 1.1732227276781733, "grad_norm": 0.4897709786891937, "learning_rate": 0.00018273855411717323, "loss": 0.6279, "step": 6577 }, { "epoch": 1.1734011238961735, "grad_norm": 0.5311153531074524, "learning_rate": 0.00018267107785823936, "loss": 0.645, "step": 6578 }, { "epoch": 1.1735795201141737, "grad_norm": 0.48785173892974854, "learning_rate": 0.00018260360688729016, "loss": 0.5661, "step": 6579 }, { "epoch": 1.1737579163321739, "grad_norm": 0.5114821791648865, "learning_rate": 0.00018253614120962463, "loss": 0.7209, "step": 6580 }, { "epoch": 1.173936312550174, "grad_norm": 0.5291317105293274, "learning_rate": 0.00018246868083054166, "loss": 0.8096, "step": 6581 }, { "epoch": 1.174114708768174, "grad_norm": 0.5284007787704468, "learning_rate": 0.00018240122575533934, "loss": 0.7968, "step": 6582 }, { "epoch": 1.1742931049861742, "grad_norm": 0.49136999249458313, "learning_rate": 0.00018233377598931566, "loss": 0.6637, "step": 6583 }, { "epoch": 1.1744715012041744, "grad_norm": 0.4023672342300415, "learning_rate": 0.00018226633153776812, "loss": 0.4708, "step": 6584 }, { "epoch": 1.1746498974221746, "grad_norm": 0.39422962069511414, "learning_rate": 0.00018219889240599375, "loss": 0.5222, "step": 6585 }, { "epoch": 1.1748282936401748, "grad_norm": 0.5104691386222839, "learning_rate": 0.00018213145859928914, "loss": 0.6543, "step": 6586 }, { "epoch": 1.175006689858175, "grad_norm": 0.4648468792438507, "learning_rate": 0.00018206403012295064, "loss": 0.6523, "step": 6587 }, { "epoch": 1.1751850860761752, "grad_norm": 0.5008534789085388, "learning_rate": 0.00018199660698227393, "loss": 0.7357, "step": 6588 }, { "epoch": 1.1753634822941754, "grad_norm": 0.5412165522575378, "learning_rate": 0.00018192918918255452, "loss": 0.7517, "step": 6589 }, { "epoch": 1.1755418785121756, "grad_norm": 0.4949028193950653, "learning_rate": 0.0001818617767290872, "loss": 0.636, "step": 6590 }, { "epoch": 1.1757202747301756, "grad_norm": 0.49255791306495667, "learning_rate": 0.00018179436962716667, "loss": 0.6114, "step": 6591 }, { "epoch": 1.1758986709481758, "grad_norm": 0.5153682827949524, "learning_rate": 0.00018172696788208698, "loss": 0.8209, "step": 6592 }, { "epoch": 1.176077067166176, "grad_norm": 0.4873290956020355, "learning_rate": 0.0001816595714991418, "loss": 0.6777, "step": 6593 }, { "epoch": 1.1762554633841762, "grad_norm": 0.4800300598144531, "learning_rate": 0.00018159218048362452, "loss": 0.659, "step": 6594 }, { "epoch": 1.1764338596021764, "grad_norm": 0.511806070804596, "learning_rate": 0.00018152479484082797, "loss": 0.7344, "step": 6595 }, { "epoch": 1.1766122558201766, "grad_norm": 0.5367228388786316, "learning_rate": 0.0001814574145760445, "loss": 0.9972, "step": 6596 }, { "epoch": 1.1767906520381768, "grad_norm": 0.5340408682823181, "learning_rate": 0.00018139003969456623, "loss": 0.8547, "step": 6597 }, { "epoch": 1.176969048256177, "grad_norm": 0.44577550888061523, "learning_rate": 0.00018132267020168471, "loss": 0.5981, "step": 6598 }, { "epoch": 1.1771474444741772, "grad_norm": 0.5312680006027222, "learning_rate": 0.00018125530610269114, "loss": 0.7921, "step": 6599 }, { "epoch": 1.1773258406921774, "grad_norm": 0.4474240839481354, "learning_rate": 0.00018118794740287625, "loss": 0.532, "step": 6600 }, { "epoch": 1.1775042369101776, "grad_norm": 0.455321341753006, "learning_rate": 0.00018112059410753034, "loss": 0.5826, "step": 6601 }, { "epoch": 1.1776826331281778, "grad_norm": 0.5083394646644592, "learning_rate": 0.00018105324622194336, "loss": 0.7073, "step": 6602 }, { "epoch": 1.177861029346178, "grad_norm": 0.5182134509086609, "learning_rate": 0.00018098590375140473, "loss": 0.7853, "step": 6603 }, { "epoch": 1.178039425564178, "grad_norm": 0.5327116250991821, "learning_rate": 0.00018091856670120348, "loss": 0.8714, "step": 6604 }, { "epoch": 1.1782178217821782, "grad_norm": 0.4751187562942505, "learning_rate": 0.00018085123507662832, "loss": 0.6433, "step": 6605 }, { "epoch": 1.1783962180001784, "grad_norm": 0.4442801773548126, "learning_rate": 0.0001807839088829674, "loss": 0.539, "step": 6606 }, { "epoch": 1.1785746142181786, "grad_norm": 0.5099819302558899, "learning_rate": 0.00018071658812550845, "loss": 0.741, "step": 6607 }, { "epoch": 1.1787530104361787, "grad_norm": 0.4700085520744324, "learning_rate": 0.00018064927280953891, "loss": 0.7668, "step": 6608 }, { "epoch": 1.178931406654179, "grad_norm": 0.5318174362182617, "learning_rate": 0.00018058196294034554, "loss": 0.8966, "step": 6609 }, { "epoch": 1.1791098028721791, "grad_norm": 0.46459850668907166, "learning_rate": 0.000180514658523215, "loss": 0.6678, "step": 6610 }, { "epoch": 1.1792881990901793, "grad_norm": 0.47724831104278564, "learning_rate": 0.00018044735956343328, "loss": 0.6598, "step": 6611 }, { "epoch": 1.1794665953081795, "grad_norm": 0.46925148367881775, "learning_rate": 0.000180380066066286, "loss": 0.5628, "step": 6612 }, { "epoch": 1.1796449915261795, "grad_norm": 0.4924178421497345, "learning_rate": 0.00018031277803705835, "loss": 0.703, "step": 6613 }, { "epoch": 1.1798233877441797, "grad_norm": 0.460475891828537, "learning_rate": 0.00018024549548103518, "loss": 0.5927, "step": 6614 }, { "epoch": 1.18000178396218, "grad_norm": 0.5572229027748108, "learning_rate": 0.0001801782184035008, "loss": 0.8473, "step": 6615 }, { "epoch": 1.1801801801801801, "grad_norm": 0.5910245180130005, "learning_rate": 0.00018011094680973902, "loss": 0.7021, "step": 6616 }, { "epoch": 1.1803585763981803, "grad_norm": 0.5176860094070435, "learning_rate": 0.00018004368070503342, "loss": 0.7578, "step": 6617 }, { "epoch": 1.1805369726161805, "grad_norm": 0.5350726842880249, "learning_rate": 0.00017997642009466702, "loss": 0.8098, "step": 6618 }, { "epoch": 1.1807153688341807, "grad_norm": 0.552034318447113, "learning_rate": 0.0001799091649839225, "loss": 0.8272, "step": 6619 }, { "epoch": 1.180893765052181, "grad_norm": 0.5275470018386841, "learning_rate": 0.00017984191537808198, "loss": 0.6984, "step": 6620 }, { "epoch": 1.181072161270181, "grad_norm": 0.5647268891334534, "learning_rate": 0.0001797746712824272, "loss": 0.935, "step": 6621 }, { "epoch": 1.1812505574881813, "grad_norm": 0.4905039370059967, "learning_rate": 0.0001797074327022396, "loss": 0.645, "step": 6622 }, { "epoch": 1.1814289537061815, "grad_norm": 0.4301641881465912, "learning_rate": 0.00017964019964279994, "loss": 0.4728, "step": 6623 }, { "epoch": 1.1816073499241817, "grad_norm": 0.46979376673698425, "learning_rate": 0.0001795729721093886, "loss": 0.6428, "step": 6624 }, { "epoch": 1.181785746142182, "grad_norm": 0.48805493116378784, "learning_rate": 0.00017950575010728582, "loss": 0.6446, "step": 6625 }, { "epoch": 1.1819641423601819, "grad_norm": 0.49873703718185425, "learning_rate": 0.00017943853364177112, "loss": 0.7287, "step": 6626 }, { "epoch": 1.182142538578182, "grad_norm": 0.422006756067276, "learning_rate": 0.00017937132271812368, "loss": 0.4329, "step": 6627 }, { "epoch": 1.1823209347961823, "grad_norm": 0.46575191617012024, "learning_rate": 0.00017930411734162204, "loss": 0.582, "step": 6628 }, { "epoch": 1.1824993310141825, "grad_norm": 0.5291426181793213, "learning_rate": 0.00017923691751754468, "loss": 0.7962, "step": 6629 }, { "epoch": 1.1826777272321827, "grad_norm": 0.48327967524528503, "learning_rate": 0.00017916972325116931, "loss": 0.6514, "step": 6630 }, { "epoch": 1.1828561234501829, "grad_norm": 0.47633296251296997, "learning_rate": 0.00017910253454777344, "loss": 0.6475, "step": 6631 }, { "epoch": 1.183034519668183, "grad_norm": 0.5271769165992737, "learning_rate": 0.000179035351412634, "loss": 0.7574, "step": 6632 }, { "epoch": 1.1832129158861833, "grad_norm": 0.45109376311302185, "learning_rate": 0.00017896817385102748, "loss": 0.538, "step": 6633 }, { "epoch": 1.1833913121041835, "grad_norm": 0.489512175321579, "learning_rate": 0.00017890100186823004, "loss": 0.7353, "step": 6634 }, { "epoch": 1.1835697083221837, "grad_norm": 0.5405703186988831, "learning_rate": 0.00017883383546951737, "loss": 0.7591, "step": 6635 }, { "epoch": 1.1837481045401836, "grad_norm": 0.4727650284767151, "learning_rate": 0.00017876667466016458, "loss": 0.7046, "step": 6636 }, { "epoch": 1.1839265007581838, "grad_norm": 0.49231699109077454, "learning_rate": 0.0001786995194454465, "loss": 0.7205, "step": 6637 }, { "epoch": 1.184104896976184, "grad_norm": 0.49565812945365906, "learning_rate": 0.0001786323698306375, "loss": 0.7502, "step": 6638 }, { "epoch": 1.1842832931941842, "grad_norm": 0.4355218708515167, "learning_rate": 0.00017856522582101133, "loss": 0.5357, "step": 6639 }, { "epoch": 1.1844616894121844, "grad_norm": 0.509213387966156, "learning_rate": 0.00017849808742184176, "loss": 0.8362, "step": 6640 }, { "epoch": 1.1846400856301846, "grad_norm": 0.49817323684692383, "learning_rate": 0.0001784309546384016, "loss": 0.6041, "step": 6641 }, { "epoch": 1.1848184818481848, "grad_norm": 0.44568195939064026, "learning_rate": 0.00017836382747596341, "loss": 0.6403, "step": 6642 }, { "epoch": 1.184996878066185, "grad_norm": 0.42027372121810913, "learning_rate": 0.00017829670593979944, "loss": 0.5297, "step": 6643 }, { "epoch": 1.1851752742841852, "grad_norm": 0.4120577871799469, "learning_rate": 0.0001782295900351813, "loss": 0.5559, "step": 6644 }, { "epoch": 1.1853536705021854, "grad_norm": 0.4313491880893707, "learning_rate": 0.00017816247976738025, "loss": 0.629, "step": 6645 }, { "epoch": 1.1855320667201856, "grad_norm": 0.45468538999557495, "learning_rate": 0.00017809537514166718, "loss": 0.6541, "step": 6646 }, { "epoch": 1.1857104629381858, "grad_norm": 42.75632858276367, "learning_rate": 0.00017802827616331235, "loss": 1.0525, "step": 6647 }, { "epoch": 1.1858888591561858, "grad_norm": 0.5608348846435547, "learning_rate": 0.00017796118283758584, "loss": 0.7031, "step": 6648 }, { "epoch": 1.186067255374186, "grad_norm": 0.4569344222545624, "learning_rate": 0.00017789409516975698, "loss": 0.524, "step": 6649 }, { "epoch": 1.1862456515921862, "grad_norm": 0.5035879015922546, "learning_rate": 0.0001778270131650948, "loss": 0.681, "step": 6650 }, { "epoch": 1.1864240478101864, "grad_norm": 0.4764963984489441, "learning_rate": 0.000177759936828868, "loss": 0.6345, "step": 6651 }, { "epoch": 1.1866024440281866, "grad_norm": 0.46746543049812317, "learning_rate": 0.00017769286616634461, "loss": 0.6118, "step": 6652 }, { "epoch": 1.1867808402461868, "grad_norm": 0.4906679689884186, "learning_rate": 0.00017762580118279244, "loss": 0.6521, "step": 6653 }, { "epoch": 1.186959236464187, "grad_norm": 0.5302233099937439, "learning_rate": 0.0001775587418834787, "loss": 0.6907, "step": 6654 }, { "epoch": 1.1871376326821872, "grad_norm": 0.46473437547683716, "learning_rate": 0.00017749168827367015, "loss": 0.6949, "step": 6655 }, { "epoch": 1.1873160289001874, "grad_norm": 0.43767476081848145, "learning_rate": 0.00017742464035863325, "loss": 0.5771, "step": 6656 }, { "epoch": 1.1874944251181876, "grad_norm": 0.5204544067382812, "learning_rate": 0.00017735759814363383, "loss": 0.6368, "step": 6657 }, { "epoch": 1.1876728213361876, "grad_norm": 0.49561235308647156, "learning_rate": 0.00017729056163393744, "loss": 0.5123, "step": 6658 }, { "epoch": 1.1878512175541878, "grad_norm": 0.4810577929019928, "learning_rate": 0.00017722353083480903, "loss": 0.6761, "step": 6659 }, { "epoch": 1.188029613772188, "grad_norm": 0.5160006284713745, "learning_rate": 0.00017715650575151322, "loss": 0.6518, "step": 6660 }, { "epoch": 1.1882080099901882, "grad_norm": 0.42943114042282104, "learning_rate": 0.0001770894863893141, "loss": 0.529, "step": 6661 }, { "epoch": 1.1883864062081884, "grad_norm": 0.4512742757797241, "learning_rate": 0.00017702247275347532, "loss": 0.5871, "step": 6662 }, { "epoch": 1.1885648024261886, "grad_norm": 0.5028432011604309, "learning_rate": 0.00017695546484926012, "loss": 0.5853, "step": 6663 }, { "epoch": 1.1887431986441888, "grad_norm": 0.5111497044563293, "learning_rate": 0.00017688846268193125, "loss": 0.6258, "step": 6664 }, { "epoch": 1.188921594862189, "grad_norm": 0.4514205753803253, "learning_rate": 0.0001768214662567511, "loss": 0.5353, "step": 6665 }, { "epoch": 1.1890999910801892, "grad_norm": 0.4557707607746124, "learning_rate": 0.0001767544755789815, "loss": 0.6054, "step": 6666 }, { "epoch": 1.1892783872981894, "grad_norm": 0.5693210959434509, "learning_rate": 0.00017668749065388384, "loss": 0.7786, "step": 6667 }, { "epoch": 1.1894567835161896, "grad_norm": 0.47501084208488464, "learning_rate": 0.00017662051148671914, "loss": 0.6898, "step": 6668 }, { "epoch": 1.1896351797341898, "grad_norm": 0.4343969225883484, "learning_rate": 0.00017655353808274793, "loss": 0.5845, "step": 6669 }, { "epoch": 1.1898135759521897, "grad_norm": 0.4890352487564087, "learning_rate": 0.00017648657044723007, "loss": 0.579, "step": 6670 }, { "epoch": 1.18999197217019, "grad_norm": 0.4660862386226654, "learning_rate": 0.00017641960858542544, "loss": 0.5289, "step": 6671 }, { "epoch": 1.1901703683881901, "grad_norm": 0.49059972167015076, "learning_rate": 0.0001763526525025931, "loss": 0.6006, "step": 6672 }, { "epoch": 1.1903487646061903, "grad_norm": 0.4573802351951599, "learning_rate": 0.00017628570220399177, "loss": 0.5264, "step": 6673 }, { "epoch": 1.1905271608241905, "grad_norm": 0.5544562935829163, "learning_rate": 0.00017621875769487964, "loss": 0.7761, "step": 6674 }, { "epoch": 1.1907055570421907, "grad_norm": 0.4699319303035736, "learning_rate": 0.00017615181898051452, "loss": 0.604, "step": 6675 }, { "epoch": 1.190883953260191, "grad_norm": 0.4405967891216278, "learning_rate": 0.00017608488606615376, "loss": 0.5839, "step": 6676 }, { "epoch": 1.1910623494781911, "grad_norm": 0.4909313917160034, "learning_rate": 0.00017601795895705422, "loss": 0.6793, "step": 6677 }, { "epoch": 1.1912407456961913, "grad_norm": 0.5177908539772034, "learning_rate": 0.00017595103765847238, "loss": 0.7589, "step": 6678 }, { "epoch": 1.1914191419141915, "grad_norm": 0.4939672648906708, "learning_rate": 0.00017588412217566413, "loss": 0.6188, "step": 6679 }, { "epoch": 1.1915975381321915, "grad_norm": 0.4704304039478302, "learning_rate": 0.00017581721251388506, "loss": 0.5345, "step": 6680 }, { "epoch": 1.1917759343501917, "grad_norm": 0.43384838104248047, "learning_rate": 0.0001757503086783902, "loss": 0.5269, "step": 6681 }, { "epoch": 1.1919543305681919, "grad_norm": 0.4727664887905121, "learning_rate": 0.0001756834106744342, "loss": 0.7044, "step": 6682 }, { "epoch": 1.192132726786192, "grad_norm": 0.5356529355049133, "learning_rate": 0.00017561651850727105, "loss": 0.8679, "step": 6683 }, { "epoch": 1.1923111230041923, "grad_norm": 0.45256125926971436, "learning_rate": 0.00017554963218215458, "loss": 0.6471, "step": 6684 }, { "epoch": 1.1924895192221925, "grad_norm": 0.6792163252830505, "learning_rate": 0.00017548275170433783, "loss": 0.713, "step": 6685 }, { "epoch": 1.1926679154401927, "grad_norm": 0.42677950859069824, "learning_rate": 0.00017541587707907387, "loss": 0.4628, "step": 6686 }, { "epoch": 1.1928463116581929, "grad_norm": 0.5247853994369507, "learning_rate": 0.00017534900831161476, "loss": 0.8362, "step": 6687 }, { "epoch": 1.193024707876193, "grad_norm": 0.48497462272644043, "learning_rate": 0.00017528214540721241, "loss": 0.6612, "step": 6688 }, { "epoch": 1.1932031040941933, "grad_norm": 0.43292883038520813, "learning_rate": 0.0001752152883711182, "loss": 0.5158, "step": 6689 }, { "epoch": 1.1933815003121935, "grad_norm": 0.5147132277488708, "learning_rate": 0.00017514843720858308, "loss": 0.795, "step": 6690 }, { "epoch": 1.1935598965301937, "grad_norm": 0.5482494831085205, "learning_rate": 0.00017508159192485746, "loss": 0.6979, "step": 6691 }, { "epoch": 1.1937382927481937, "grad_norm": 0.5331830978393555, "learning_rate": 0.0001750147525251914, "loss": 0.5722, "step": 6692 }, { "epoch": 1.1939166889661939, "grad_norm": 0.5137856006622314, "learning_rate": 0.0001749479190148344, "loss": 0.7025, "step": 6693 }, { "epoch": 1.194095085184194, "grad_norm": 0.4909903109073639, "learning_rate": 0.00017488109139903558, "loss": 0.5722, "step": 6694 }, { "epoch": 1.1942734814021942, "grad_norm": 0.5011717081069946, "learning_rate": 0.00017481426968304347, "loss": 0.6142, "step": 6695 }, { "epoch": 1.1944518776201944, "grad_norm": 0.635654628276825, "learning_rate": 0.00017474745387210627, "loss": 0.6943, "step": 6696 }, { "epoch": 1.1946302738381946, "grad_norm": 0.5469489097595215, "learning_rate": 0.00017468064397147166, "loss": 0.6825, "step": 6697 }, { "epoch": 1.1948086700561948, "grad_norm": 0.5090768337249756, "learning_rate": 0.00017461383998638685, "loss": 0.6433, "step": 6698 }, { "epoch": 1.194987066274195, "grad_norm": 0.5082751512527466, "learning_rate": 0.00017454704192209863, "loss": 0.6789, "step": 6699 }, { "epoch": 1.1951654624921952, "grad_norm": 0.4537877142429352, "learning_rate": 0.0001744802497838532, "loss": 0.6015, "step": 6700 }, { "epoch": 1.1953438587101954, "grad_norm": 0.4903229773044586, "learning_rate": 0.00017441346357689651, "loss": 0.6014, "step": 6701 }, { "epoch": 1.1955222549281954, "grad_norm": 0.4441526532173157, "learning_rate": 0.00017434668330647385, "loss": 0.5323, "step": 6702 }, { "epoch": 1.1957006511461956, "grad_norm": 0.532263994216919, "learning_rate": 0.00017427990897783013, "loss": 0.729, "step": 6703 }, { "epoch": 1.1958790473641958, "grad_norm": 0.49677395820617676, "learning_rate": 0.0001742131405962098, "loss": 0.646, "step": 6704 }, { "epoch": 1.196057443582196, "grad_norm": 0.5097184181213379, "learning_rate": 0.00017414637816685677, "loss": 0.688, "step": 6705 }, { "epoch": 1.1962358398001962, "grad_norm": 0.4844436049461365, "learning_rate": 0.00017407962169501456, "loss": 0.6241, "step": 6706 }, { "epoch": 1.1964142360181964, "grad_norm": 0.49923595786094666, "learning_rate": 0.00017401287118592624, "loss": 0.7081, "step": 6707 }, { "epoch": 1.1965926322361966, "grad_norm": 0.49586620926856995, "learning_rate": 0.00017394612664483429, "loss": 0.6703, "step": 6708 }, { "epoch": 1.1967710284541968, "grad_norm": 0.5127303004264832, "learning_rate": 0.00017387938807698078, "loss": 0.5966, "step": 6709 }, { "epoch": 1.196949424672197, "grad_norm": 0.5159239172935486, "learning_rate": 0.0001738126554876074, "loss": 0.6792, "step": 6710 }, { "epoch": 1.1971278208901972, "grad_norm": 0.45261603593826294, "learning_rate": 0.0001737459288819553, "loss": 0.5966, "step": 6711 }, { "epoch": 1.1973062171081974, "grad_norm": 0.4482129216194153, "learning_rate": 0.00017367920826526508, "loss": 0.5745, "step": 6712 }, { "epoch": 1.1974846133261976, "grad_norm": 0.49750131368637085, "learning_rate": 0.000173612493642777, "loss": 0.6134, "step": 6713 }, { "epoch": 1.1976630095441976, "grad_norm": 0.5099869966506958, "learning_rate": 0.00017354578501973083, "loss": 0.8435, "step": 6714 }, { "epoch": 1.1978414057621978, "grad_norm": 0.4573536813259125, "learning_rate": 0.00017347908240136585, "loss": 0.5377, "step": 6715 }, { "epoch": 1.198019801980198, "grad_norm": 0.5403563380241394, "learning_rate": 0.00017341238579292063, "loss": 0.792, "step": 6716 }, { "epoch": 1.1981981981981982, "grad_norm": 0.44874098896980286, "learning_rate": 0.00017334569519963377, "loss": 0.6036, "step": 6717 }, { "epoch": 1.1983765944161984, "grad_norm": 0.5061765313148499, "learning_rate": 0.00017327901062674306, "loss": 0.6612, "step": 6718 }, { "epoch": 1.1985549906341986, "grad_norm": 0.4852476418018341, "learning_rate": 0.00017321233207948583, "loss": 0.5551, "step": 6719 }, { "epoch": 1.1987333868521988, "grad_norm": 0.7161738276481628, "learning_rate": 0.00017314565956309903, "loss": 0.5223, "step": 6720 }, { "epoch": 1.198911783070199, "grad_norm": 0.5031499266624451, "learning_rate": 0.000173078993082819, "loss": 0.7557, "step": 6721 }, { "epoch": 1.1990901792881992, "grad_norm": 1.0971347093582153, "learning_rate": 0.00017301233264388176, "loss": 0.6716, "step": 6722 }, { "epoch": 1.1992685755061994, "grad_norm": 0.6954957842826843, "learning_rate": 0.0001729456782515228, "loss": 0.669, "step": 6723 }, { "epoch": 1.1994469717241993, "grad_norm": 0.5447986125946045, "learning_rate": 0.0001728790299109771, "loss": 0.7785, "step": 6724 }, { "epoch": 1.1996253679421995, "grad_norm": 0.5347351431846619, "learning_rate": 0.00017281238762747919, "loss": 0.7014, "step": 6725 }, { "epoch": 1.1998037641601997, "grad_norm": 0.5047118663787842, "learning_rate": 0.00017274575140626317, "loss": 0.574, "step": 6726 }, { "epoch": 1.1999821603782, "grad_norm": 0.5407329797744751, "learning_rate": 0.0001726791212525626, "loss": 0.7382, "step": 6727 }, { "epoch": 1.2001605565962001, "grad_norm": 0.51371830701828, "learning_rate": 0.00017261249717161054, "loss": 0.779, "step": 6728 }, { "epoch": 1.2003389528142003, "grad_norm": 0.4466804563999176, "learning_rate": 0.00017254587916863969, "loss": 0.5177, "step": 6729 }, { "epoch": 1.2005173490322005, "grad_norm": 0.45748934149742126, "learning_rate": 0.0001724792672488821, "loss": 0.6624, "step": 6730 }, { "epoch": 1.2006957452502007, "grad_norm": 0.5126693844795227, "learning_rate": 0.0001724126614175694, "loss": 0.7792, "step": 6731 }, { "epoch": 1.200874141468201, "grad_norm": 0.46355128288269043, "learning_rate": 0.00017234606167993303, "loss": 0.6419, "step": 6732 }, { "epoch": 1.2010525376862011, "grad_norm": 0.4698454439640045, "learning_rate": 0.0001722794680412036, "loss": 0.6422, "step": 6733 }, { "epoch": 1.2012309339042013, "grad_norm": 0.4981541335582733, "learning_rate": 0.0001722128805066112, "loss": 0.6687, "step": 6734 }, { "epoch": 1.2014093301222015, "grad_norm": 0.5997437834739685, "learning_rate": 0.00017214629908138574, "loss": 0.9846, "step": 6735 }, { "epoch": 1.2015877263402015, "grad_norm": 0.4991457164287567, "learning_rate": 0.0001720797237707564, "loss": 0.6268, "step": 6736 }, { "epoch": 1.2017661225582017, "grad_norm": 0.4917805790901184, "learning_rate": 0.0001720131545799521, "loss": 0.62, "step": 6737 }, { "epoch": 1.201944518776202, "grad_norm": 0.45788219571113586, "learning_rate": 0.00017194659151420106, "loss": 0.7157, "step": 6738 }, { "epoch": 1.202122914994202, "grad_norm": 0.49863317608833313, "learning_rate": 0.0001718800345787311, "loss": 0.6979, "step": 6739 }, { "epoch": 1.2023013112122023, "grad_norm": 0.591437578201294, "learning_rate": 0.00017181348377876958, "loss": 0.6895, "step": 6740 }, { "epoch": 1.2024797074302025, "grad_norm": 0.4187926650047302, "learning_rate": 0.00017174693911954354, "loss": 0.5386, "step": 6741 }, { "epoch": 1.2026581036482027, "grad_norm": 0.4731749892234802, "learning_rate": 0.0001716804006062791, "loss": 0.6097, "step": 6742 }, { "epoch": 1.202836499866203, "grad_norm": 0.4960040748119354, "learning_rate": 0.00017161386824420232, "loss": 0.7277, "step": 6743 }, { "epoch": 1.203014896084203, "grad_norm": 0.47977831959724426, "learning_rate": 0.00017154734203853858, "loss": 0.6146, "step": 6744 }, { "epoch": 1.2031932923022033, "grad_norm": 0.6268126368522644, "learning_rate": 0.00017148082199451288, "loss": 0.6726, "step": 6745 }, { "epoch": 1.2033716885202033, "grad_norm": 0.5123054385185242, "learning_rate": 0.0001714143081173495, "loss": 0.8116, "step": 6746 }, { "epoch": 1.2035500847382035, "grad_norm": 0.45163649320602417, "learning_rate": 0.00017134780041227265, "loss": 0.6415, "step": 6747 }, { "epoch": 1.2037284809562037, "grad_norm": 0.511045515537262, "learning_rate": 0.00017128129888450573, "loss": 0.5628, "step": 6748 }, { "epoch": 1.2039068771742039, "grad_norm": 0.44558185338974, "learning_rate": 0.00017121480353927165, "loss": 0.5404, "step": 6749 }, { "epoch": 1.204085273392204, "grad_norm": 0.5241187810897827, "learning_rate": 0.00017114831438179304, "loss": 0.7767, "step": 6750 }, { "epoch": 1.2042636696102043, "grad_norm": 0.722863495349884, "learning_rate": 0.00017108183141729188, "loss": 0.685, "step": 6751 }, { "epoch": 1.2044420658282045, "grad_norm": 0.6952433586120605, "learning_rate": 0.00017101535465098973, "loss": 0.6479, "step": 6752 }, { "epoch": 1.2046204620462047, "grad_norm": 0.48947012424468994, "learning_rate": 0.00017094888408810763, "loss": 0.749, "step": 6753 }, { "epoch": 1.2047988582642049, "grad_norm": 0.4884073734283447, "learning_rate": 0.0001708824197338662, "loss": 0.727, "step": 6754 }, { "epoch": 1.204977254482205, "grad_norm": 0.5633352398872375, "learning_rate": 0.00017081596159348544, "loss": 0.8563, "step": 6755 }, { "epoch": 1.2051556507002052, "grad_norm": 0.48596227169036865, "learning_rate": 0.00017074950967218495, "loss": 0.6633, "step": 6756 }, { "epoch": 1.2053340469182054, "grad_norm": 0.45270687341690063, "learning_rate": 0.0001706830639751839, "loss": 0.5795, "step": 6757 }, { "epoch": 1.2055124431362054, "grad_norm": 0.48366621136665344, "learning_rate": 0.00017061662450770085, "loss": 0.7047, "step": 6758 }, { "epoch": 1.2056908393542056, "grad_norm": 0.48411786556243896, "learning_rate": 0.00017055019127495396, "loss": 0.7534, "step": 6759 }, { "epoch": 1.2058692355722058, "grad_norm": 0.4793466627597809, "learning_rate": 0.00017048376428216083, "loss": 0.6564, "step": 6760 }, { "epoch": 1.206047631790206, "grad_norm": 0.4573822021484375, "learning_rate": 0.0001704173435345387, "loss": 0.4675, "step": 6761 }, { "epoch": 1.2062260280082062, "grad_norm": 0.540582537651062, "learning_rate": 0.00017035092903730403, "loss": 0.7691, "step": 6762 }, { "epoch": 1.2064044242262064, "grad_norm": 0.5048598647117615, "learning_rate": 0.00017028452079567325, "loss": 0.7058, "step": 6763 }, { "epoch": 1.2065828204442066, "grad_norm": 0.46692147850990295, "learning_rate": 0.00017021811881486184, "loss": 0.5641, "step": 6764 }, { "epoch": 1.2067612166622068, "grad_norm": 0.4819284975528717, "learning_rate": 0.00017015172310008508, "loss": 0.733, "step": 6765 }, { "epoch": 1.206939612880207, "grad_norm": 0.4647090435028076, "learning_rate": 0.00017008533365655765, "loss": 0.5602, "step": 6766 }, { "epoch": 1.2071180090982072, "grad_norm": 0.42821335792541504, "learning_rate": 0.0001700189504894937, "loss": 0.5918, "step": 6767 }, { "epoch": 1.2072964053162072, "grad_norm": 0.42195186018943787, "learning_rate": 0.00016995257360410694, "loss": 0.5887, "step": 6768 }, { "epoch": 1.2074748015342074, "grad_norm": 0.5590615272521973, "learning_rate": 0.00016988620300561065, "loss": 0.5942, "step": 6769 }, { "epoch": 1.2076531977522076, "grad_norm": 0.459682822227478, "learning_rate": 0.0001698198386992174, "loss": 0.6768, "step": 6770 }, { "epoch": 1.2078315939702078, "grad_norm": 0.5003504157066345, "learning_rate": 0.0001697534806901396, "loss": 0.7413, "step": 6771 }, { "epoch": 1.208009990188208, "grad_norm": 0.4521839916706085, "learning_rate": 0.00016968712898358888, "loss": 0.5958, "step": 6772 }, { "epoch": 1.2081883864062082, "grad_norm": 0.47012245655059814, "learning_rate": 0.00016962078358477648, "loss": 0.6722, "step": 6773 }, { "epoch": 1.2083667826242084, "grad_norm": 0.4755409359931946, "learning_rate": 0.0001695544444989132, "loss": 0.7082, "step": 6774 }, { "epoch": 1.2085451788422086, "grad_norm": 0.44018933176994324, "learning_rate": 0.00016948811173120914, "loss": 0.6281, "step": 6775 }, { "epoch": 1.2087235750602088, "grad_norm": 0.5177139639854431, "learning_rate": 0.00016942178528687419, "loss": 0.829, "step": 6776 }, { "epoch": 1.208901971278209, "grad_norm": 0.42148521542549133, "learning_rate": 0.00016935546517111744, "loss": 0.5036, "step": 6777 }, { "epoch": 1.2090803674962092, "grad_norm": 0.4840809404850006, "learning_rate": 0.00016928915138914787, "loss": 0.6851, "step": 6778 }, { "epoch": 1.2092587637142094, "grad_norm": 0.4874308407306671, "learning_rate": 0.0001692228439461736, "loss": 0.7215, "step": 6779 }, { "epoch": 1.2094371599322093, "grad_norm": 0.5354540944099426, "learning_rate": 0.0001691565428474024, "loss": 0.6564, "step": 6780 }, { "epoch": 1.2096155561502095, "grad_norm": 0.47135040163993835, "learning_rate": 0.0001690902480980415, "loss": 0.6163, "step": 6781 }, { "epoch": 1.2097939523682097, "grad_norm": 0.5024769902229309, "learning_rate": 0.00016902395970329776, "loss": 0.7379, "step": 6782 }, { "epoch": 1.20997234858621, "grad_norm": 0.4286527633666992, "learning_rate": 0.00016895767766837732, "loss": 0.4737, "step": 6783 }, { "epoch": 1.2101507448042101, "grad_norm": 0.4754869043827057, "learning_rate": 0.00016889140199848605, "loss": 0.5874, "step": 6784 }, { "epoch": 1.2103291410222103, "grad_norm": 0.5316412448883057, "learning_rate": 0.00016882513269882916, "loss": 0.8397, "step": 6785 }, { "epoch": 1.2105075372402105, "grad_norm": 0.46648043394088745, "learning_rate": 0.00016875886977461136, "loss": 0.6463, "step": 6786 }, { "epoch": 1.2106859334582107, "grad_norm": 0.5333806276321411, "learning_rate": 0.00016869261323103707, "loss": 0.845, "step": 6787 }, { "epoch": 1.210864329676211, "grad_norm": 0.48115333914756775, "learning_rate": 0.00016862636307330987, "loss": 0.6568, "step": 6788 }, { "epoch": 1.2110427258942111, "grad_norm": 0.524509072303772, "learning_rate": 0.00016856011930663312, "loss": 0.8149, "step": 6789 }, { "epoch": 1.2112211221122111, "grad_norm": 0.4812416732311249, "learning_rate": 0.0001684938819362095, "loss": 0.5716, "step": 6790 }, { "epoch": 1.2113995183302113, "grad_norm": 0.4996643662452698, "learning_rate": 0.0001684276509672414, "loss": 0.8065, "step": 6791 }, { "epoch": 1.2115779145482115, "grad_norm": 0.5265586376190186, "learning_rate": 0.0001683614264049304, "loss": 0.7074, "step": 6792 }, { "epoch": 1.2117563107662117, "grad_norm": 0.4892607033252716, "learning_rate": 0.00016829520825447787, "loss": 0.7555, "step": 6793 }, { "epoch": 1.211934706984212, "grad_norm": 0.493960440158844, "learning_rate": 0.00016822899652108454, "loss": 0.8655, "step": 6794 }, { "epoch": 1.212113103202212, "grad_norm": 0.5139688849449158, "learning_rate": 0.00016816279120995063, "loss": 0.8795, "step": 6795 }, { "epoch": 1.2122914994202123, "grad_norm": 0.41714024543762207, "learning_rate": 0.00016809659232627588, "loss": 0.4714, "step": 6796 }, { "epoch": 1.2124698956382125, "grad_norm": 0.5181586742401123, "learning_rate": 0.00016803039987525953, "loss": 0.7304, "step": 6797 }, { "epoch": 1.2126482918562127, "grad_norm": 0.47298526763916016, "learning_rate": 0.00016796421386210028, "loss": 0.5293, "step": 6798 }, { "epoch": 1.212826688074213, "grad_norm": 0.47888705134391785, "learning_rate": 0.0001678980342919964, "loss": 0.5461, "step": 6799 }, { "epoch": 1.213005084292213, "grad_norm": 0.5607367753982544, "learning_rate": 0.0001678318611701456, "loss": 0.8278, "step": 6800 }, { "epoch": 1.2131834805102133, "grad_norm": 0.5705345869064331, "learning_rate": 0.00016776569450174504, "loss": 0.786, "step": 6801 }, { "epoch": 1.2133618767282133, "grad_norm": 0.5020168423652649, "learning_rate": 0.00016769953429199142, "loss": 0.7604, "step": 6802 }, { "epoch": 1.2135402729462135, "grad_norm": 0.4835543632507324, "learning_rate": 0.00016763338054608096, "loss": 0.6996, "step": 6803 }, { "epoch": 1.2137186691642137, "grad_norm": 0.44707992672920227, "learning_rate": 0.00016756723326920937, "loss": 0.6119, "step": 6804 }, { "epoch": 1.2138970653822139, "grad_norm": 0.40079420804977417, "learning_rate": 0.0001675010924665718, "loss": 0.5236, "step": 6805 }, { "epoch": 1.214075461600214, "grad_norm": 0.4688962697982788, "learning_rate": 0.0001674349581433629, "loss": 0.5646, "step": 6806 }, { "epoch": 1.2142538578182143, "grad_norm": 0.46147796511650085, "learning_rate": 0.0001673688303047769, "loss": 0.5085, "step": 6807 }, { "epoch": 1.2144322540362145, "grad_norm": 0.4987983703613281, "learning_rate": 0.00016730270895600732, "loss": 0.7102, "step": 6808 }, { "epoch": 1.2146106502542147, "grad_norm": 0.4857620894908905, "learning_rate": 0.00016723659410224746, "loss": 0.6561, "step": 6809 }, { "epoch": 1.2147890464722149, "grad_norm": 0.5185416340827942, "learning_rate": 0.00016717048574868987, "loss": 0.688, "step": 6810 }, { "epoch": 1.214967442690215, "grad_norm": 0.43132469058036804, "learning_rate": 0.0001671043839005267, "loss": 0.6281, "step": 6811 }, { "epoch": 1.215145838908215, "grad_norm": 0.5003576278686523, "learning_rate": 0.00016703828856294955, "loss": 0.7138, "step": 6812 }, { "epoch": 1.2153242351262152, "grad_norm": 0.5227103233337402, "learning_rate": 0.00016697219974114955, "loss": 0.7263, "step": 6813 }, { "epoch": 1.2155026313442154, "grad_norm": 0.46056926250457764, "learning_rate": 0.0001669061174403172, "loss": 0.6924, "step": 6814 }, { "epoch": 1.2156810275622156, "grad_norm": 0.5291325449943542, "learning_rate": 0.00016684004166564264, "loss": 0.7044, "step": 6815 }, { "epoch": 1.2158594237802158, "grad_norm": 0.3964986503124237, "learning_rate": 0.0001667739724223154, "loss": 0.4319, "step": 6816 }, { "epoch": 1.216037819998216, "grad_norm": 0.49517232179641724, "learning_rate": 0.00016670790971552458, "loss": 0.5721, "step": 6817 }, { "epoch": 1.2162162162162162, "grad_norm": 0.47603654861450195, "learning_rate": 0.0001666418535504587, "loss": 0.6336, "step": 6818 }, { "epoch": 1.2163946124342164, "grad_norm": 0.4822855293750763, "learning_rate": 0.00016657580393230573, "loss": 0.7157, "step": 6819 }, { "epoch": 1.2165730086522166, "grad_norm": 0.45155301690101624, "learning_rate": 0.00016650976086625324, "loss": 0.5622, "step": 6820 }, { "epoch": 1.2167514048702168, "grad_norm": 0.4612867832183838, "learning_rate": 0.00016644372435748822, "loss": 0.5382, "step": 6821 }, { "epoch": 1.216929801088217, "grad_norm": 0.44989264011383057, "learning_rate": 0.00016637769441119713, "loss": 0.5503, "step": 6822 }, { "epoch": 1.2171081973062172, "grad_norm": 0.5135206580162048, "learning_rate": 0.00016631167103256582, "loss": 0.7834, "step": 6823 }, { "epoch": 1.2172865935242172, "grad_norm": 0.5145620107650757, "learning_rate": 0.00016624565422677996, "loss": 0.8026, "step": 6824 }, { "epoch": 1.2174649897422174, "grad_norm": 0.4703541398048401, "learning_rate": 0.0001661796439990244, "loss": 0.677, "step": 6825 }, { "epoch": 1.2176433859602176, "grad_norm": 0.4866558611392975, "learning_rate": 0.00016611364035448348, "loss": 0.6063, "step": 6826 }, { "epoch": 1.2178217821782178, "grad_norm": 0.5036291480064392, "learning_rate": 0.00016604764329834117, "loss": 0.8213, "step": 6827 }, { "epoch": 1.218000178396218, "grad_norm": 0.4724942743778229, "learning_rate": 0.00016598165283578082, "loss": 0.6396, "step": 6828 }, { "epoch": 1.2181785746142182, "grad_norm": 0.44807612895965576, "learning_rate": 0.0001659156689719853, "loss": 0.5442, "step": 6829 }, { "epoch": 1.2183569708322184, "grad_norm": 0.42465436458587646, "learning_rate": 0.00016584969171213693, "loss": 0.4956, "step": 6830 }, { "epoch": 1.2185353670502186, "grad_norm": 0.9363518357276917, "learning_rate": 0.0001657837210614176, "loss": 0.8942, "step": 6831 }, { "epoch": 1.2187137632682188, "grad_norm": 0.44213372468948364, "learning_rate": 0.00016571775702500856, "loss": 0.4695, "step": 6832 }, { "epoch": 1.218892159486219, "grad_norm": 0.4889032542705536, "learning_rate": 0.0001656517996080906, "loss": 0.4976, "step": 6833 }, { "epoch": 1.219070555704219, "grad_norm": 0.5484008193016052, "learning_rate": 0.00016558584881584408, "loss": 0.7479, "step": 6834 }, { "epoch": 1.2192489519222192, "grad_norm": 0.4762975573539734, "learning_rate": 0.00016551990465344857, "loss": 0.6117, "step": 6835 }, { "epoch": 1.2194273481402194, "grad_norm": 0.5832328200340271, "learning_rate": 0.00016545396712608346, "loss": 0.7153, "step": 6836 }, { "epoch": 1.2196057443582196, "grad_norm": 0.46144014596939087, "learning_rate": 0.00016538803623892734, "loss": 0.568, "step": 6837 }, { "epoch": 1.2197841405762198, "grad_norm": 0.4600122570991516, "learning_rate": 0.0001653221119971584, "loss": 0.5168, "step": 6838 }, { "epoch": 1.21996253679422, "grad_norm": 0.51224684715271, "learning_rate": 0.0001652561944059544, "loss": 0.77, "step": 6839 }, { "epoch": 1.2201409330122202, "grad_norm": 0.529111921787262, "learning_rate": 0.00016519028347049242, "loss": 0.7114, "step": 6840 }, { "epoch": 1.2203193292302204, "grad_norm": 0.49239465594291687, "learning_rate": 0.00016512437919594908, "loss": 0.7044, "step": 6841 }, { "epoch": 1.2204977254482205, "grad_norm": 0.4696052670478821, "learning_rate": 0.00016505848158750047, "loss": 0.647, "step": 6842 }, { "epoch": 1.2206761216662207, "grad_norm": 0.4317275285720825, "learning_rate": 0.00016499259065032217, "loss": 0.5817, "step": 6843 }, { "epoch": 1.220854517884221, "grad_norm": 0.5241662263870239, "learning_rate": 0.00016492670638958924, "loss": 0.7357, "step": 6844 }, { "epoch": 1.2210329141022211, "grad_norm": 0.5198963284492493, "learning_rate": 0.00016486082881047616, "loss": 0.5708, "step": 6845 }, { "epoch": 1.2212113103202211, "grad_norm": 0.49722328782081604, "learning_rate": 0.00016479495791815702, "loss": 0.6887, "step": 6846 }, { "epoch": 1.2213897065382213, "grad_norm": 0.5908113121986389, "learning_rate": 0.00016472909371780512, "loss": 0.9454, "step": 6847 }, { "epoch": 1.2215681027562215, "grad_norm": 0.5063602328300476, "learning_rate": 0.00016466323621459352, "loss": 0.6783, "step": 6848 }, { "epoch": 1.2217464989742217, "grad_norm": 0.47944772243499756, "learning_rate": 0.00016459738541369466, "loss": 0.6294, "step": 6849 }, { "epoch": 1.221924895192222, "grad_norm": 0.48459392786026, "learning_rate": 0.00016453154132028036, "loss": 0.6708, "step": 6850 }, { "epoch": 1.2221032914102221, "grad_norm": 0.5024449229240417, "learning_rate": 0.00016446570393952205, "loss": 0.6274, "step": 6851 }, { "epoch": 1.2222816876282223, "grad_norm": 0.4757004976272583, "learning_rate": 0.0001643998732765905, "loss": 0.6813, "step": 6852 }, { "epoch": 1.2224600838462225, "grad_norm": 0.6229151487350464, "learning_rate": 0.0001643340493366561, "loss": 0.5887, "step": 6853 }, { "epoch": 1.2226384800642227, "grad_norm": 0.5339584946632385, "learning_rate": 0.00016426823212488856, "loss": 0.8846, "step": 6854 }, { "epoch": 1.222816876282223, "grad_norm": 0.4785784184932709, "learning_rate": 0.0001642024216464572, "loss": 0.5552, "step": 6855 }, { "epoch": 1.2229952725002229, "grad_norm": 0.45622581243515015, "learning_rate": 0.00016413661790653074, "loss": 0.5302, "step": 6856 }, { "epoch": 1.223173668718223, "grad_norm": 0.4625503420829773, "learning_rate": 0.00016407082091027736, "loss": 0.5837, "step": 6857 }, { "epoch": 1.2233520649362233, "grad_norm": 0.49430111050605774, "learning_rate": 0.00016400503066286472, "loss": 0.5746, "step": 6858 }, { "epoch": 1.2235304611542235, "grad_norm": 0.6408543586730957, "learning_rate": 0.00016393924716946002, "loss": 0.6712, "step": 6859 }, { "epoch": 1.2237088573722237, "grad_norm": 0.5817059874534607, "learning_rate": 0.00016387347043522976, "loss": 0.8973, "step": 6860 }, { "epoch": 1.2238872535902239, "grad_norm": 0.7297067642211914, "learning_rate": 0.00016380770046534005, "loss": 0.9795, "step": 6861 }, { "epoch": 1.224065649808224, "grad_norm": 0.5279180407524109, "learning_rate": 0.00016374193726495647, "loss": 0.6417, "step": 6862 }, { "epoch": 1.2242440460262243, "grad_norm": 0.45359596610069275, "learning_rate": 0.00016367618083924402, "loss": 0.6208, "step": 6863 }, { "epoch": 1.2244224422442245, "grad_norm": 0.47759270668029785, "learning_rate": 0.00016361043119336719, "loss": 0.4829, "step": 6864 }, { "epoch": 1.2246008384622247, "grad_norm": 0.4140789210796356, "learning_rate": 0.00016354468833248992, "loss": 0.3787, "step": 6865 }, { "epoch": 1.2247792346802249, "grad_norm": 0.4323157072067261, "learning_rate": 0.00016347895226177561, "loss": 0.5246, "step": 6866 }, { "epoch": 1.224957630898225, "grad_norm": 0.42192304134368896, "learning_rate": 0.0001634132229863872, "loss": 0.5802, "step": 6867 }, { "epoch": 1.225136027116225, "grad_norm": 0.39509162306785583, "learning_rate": 0.00016334750051148696, "loss": 0.3766, "step": 6868 }, { "epoch": 1.2253144233342252, "grad_norm": 0.5555423498153687, "learning_rate": 0.0001632817848422366, "loss": 0.8368, "step": 6869 }, { "epoch": 1.2254928195522254, "grad_norm": 0.489187628030777, "learning_rate": 0.00016321607598379767, "loss": 0.6287, "step": 6870 }, { "epoch": 1.2256712157702256, "grad_norm": 0.4797876477241516, "learning_rate": 0.00016315037394133082, "loss": 0.5631, "step": 6871 }, { "epoch": 1.2258496119882258, "grad_norm": 0.5317756533622742, "learning_rate": 0.00016308467871999622, "loss": 0.6423, "step": 6872 }, { "epoch": 1.226028008206226, "grad_norm": 0.5093753933906555, "learning_rate": 0.00016301899032495354, "loss": 0.7641, "step": 6873 }, { "epoch": 1.2262064044242262, "grad_norm": 0.4402000606060028, "learning_rate": 0.0001629533087613619, "loss": 0.5262, "step": 6874 }, { "epoch": 1.2263848006422264, "grad_norm": 0.605444073677063, "learning_rate": 0.00016288763403437994, "loss": 0.8924, "step": 6875 }, { "epoch": 1.2265631968602266, "grad_norm": 0.5263209342956543, "learning_rate": 0.00016282196614916572, "loss": 0.5979, "step": 6876 }, { "epoch": 1.2267415930782268, "grad_norm": 0.49599960446357727, "learning_rate": 0.0001627563051108768, "loss": 0.554, "step": 6877 }, { "epoch": 1.2269199892962268, "grad_norm": 0.5215542912483215, "learning_rate": 0.0001626906509246701, "loss": 0.673, "step": 6878 }, { "epoch": 1.227098385514227, "grad_norm": 0.48019513487815857, "learning_rate": 0.0001626250035957021, "loss": 0.6647, "step": 6879 }, { "epoch": 1.2272767817322272, "grad_norm": 0.5026001334190369, "learning_rate": 0.00016255936312912876, "loss": 0.5802, "step": 6880 }, { "epoch": 1.2274551779502274, "grad_norm": 0.5892630815505981, "learning_rate": 0.00016249372953010537, "loss": 0.7349, "step": 6881 }, { "epoch": 1.2276335741682276, "grad_norm": 0.49261564016342163, "learning_rate": 0.00016242810280378678, "loss": 0.6885, "step": 6882 }, { "epoch": 1.2278119703862278, "grad_norm": 0.5151751637458801, "learning_rate": 0.00016236248295532736, "loss": 0.7812, "step": 6883 }, { "epoch": 1.227990366604228, "grad_norm": 1.1146833896636963, "learning_rate": 0.00016229686998988068, "loss": 0.5243, "step": 6884 }, { "epoch": 1.2281687628222282, "grad_norm": 0.5391646027565002, "learning_rate": 0.00016223126391260023, "loss": 0.8716, "step": 6885 }, { "epoch": 1.2283471590402284, "grad_norm": 0.4731014370918274, "learning_rate": 0.00016216566472863854, "loss": 0.5053, "step": 6886 }, { "epoch": 1.2285255552582286, "grad_norm": 0.5936664938926697, "learning_rate": 0.00016210007244314774, "loss": 0.8034, "step": 6887 }, { "epoch": 1.2287039514762288, "grad_norm": 0.5338798761367798, "learning_rate": 0.00016203448706127938, "loss": 0.7982, "step": 6888 }, { "epoch": 1.228882347694229, "grad_norm": 0.5306500792503357, "learning_rate": 0.00016196890858818458, "loss": 0.6466, "step": 6889 }, { "epoch": 1.229060743912229, "grad_norm": 0.44917765259742737, "learning_rate": 0.0001619033370290138, "loss": 0.5473, "step": 6890 }, { "epoch": 1.2292391401302292, "grad_norm": 0.520117998123169, "learning_rate": 0.00016183777238891703, "loss": 0.6827, "step": 6891 }, { "epoch": 1.2294175363482294, "grad_norm": 0.5320703387260437, "learning_rate": 0.0001617722146730437, "loss": 0.8353, "step": 6892 }, { "epoch": 1.2295959325662296, "grad_norm": 0.47005659341812134, "learning_rate": 0.00016170666388654265, "loss": 0.6324, "step": 6893 }, { "epoch": 1.2297743287842298, "grad_norm": 0.45758256316185, "learning_rate": 0.00016164112003456223, "loss": 0.6812, "step": 6894 }, { "epoch": 1.22995272500223, "grad_norm": 0.4741186201572418, "learning_rate": 0.00016157558312225018, "loss": 0.588, "step": 6895 }, { "epoch": 1.2301311212202302, "grad_norm": 0.4784983694553375, "learning_rate": 0.0001615100531547538, "loss": 0.601, "step": 6896 }, { "epoch": 1.2303095174382304, "grad_norm": 0.45370981097221375, "learning_rate": 0.00016144453013721978, "loss": 0.4963, "step": 6897 }, { "epoch": 1.2304879136562306, "grad_norm": 0.5208538770675659, "learning_rate": 0.00016137901407479421, "loss": 0.5965, "step": 6898 }, { "epoch": 1.2306663098742308, "grad_norm": 0.45619308948516846, "learning_rate": 0.00016131350497262278, "loss": 0.5699, "step": 6899 }, { "epoch": 1.2308447060922307, "grad_norm": 0.5480870604515076, "learning_rate": 0.00016124800283585044, "loss": 0.6709, "step": 6900 }, { "epoch": 1.231023102310231, "grad_norm": 0.5735124945640564, "learning_rate": 0.00016118250766962184, "loss": 0.7542, "step": 6901 }, { "epoch": 1.2312014985282311, "grad_norm": 0.4865647852420807, "learning_rate": 0.00016111701947908085, "loss": 0.6056, "step": 6902 }, { "epoch": 1.2313798947462313, "grad_norm": 0.7100339531898499, "learning_rate": 0.00016105153826937086, "loss": 0.9174, "step": 6903 }, { "epoch": 1.2315582909642315, "grad_norm": 0.5096773505210876, "learning_rate": 0.00016098606404563482, "loss": 0.6564, "step": 6904 }, { "epoch": 1.2317366871822317, "grad_norm": 0.5119658708572388, "learning_rate": 0.000160920596813015, "loss": 0.7196, "step": 6905 }, { "epoch": 1.231915083400232, "grad_norm": 0.4193865656852722, "learning_rate": 0.0001608551365766532, "loss": 0.5337, "step": 6906 }, { "epoch": 1.2320934796182321, "grad_norm": 0.47937509417533875, "learning_rate": 0.00016078968334169057, "loss": 0.6803, "step": 6907 }, { "epoch": 1.2322718758362323, "grad_norm": 0.5272152423858643, "learning_rate": 0.00016072423711326782, "loss": 0.6065, "step": 6908 }, { "epoch": 1.2324502720542325, "grad_norm": 0.40397560596466064, "learning_rate": 0.0001606587978965251, "loss": 0.546, "step": 6909 }, { "epoch": 1.2326286682722327, "grad_norm": 0.428237646818161, "learning_rate": 0.00016059336569660193, "loss": 0.4717, "step": 6910 }, { "epoch": 1.232807064490233, "grad_norm": 0.4891768991947174, "learning_rate": 0.00016052794051863733, "loss": 0.6167, "step": 6911 }, { "epoch": 1.232985460708233, "grad_norm": 0.5653678178787231, "learning_rate": 0.00016046252236776978, "loss": 0.8338, "step": 6912 }, { "epoch": 1.233163856926233, "grad_norm": 0.4607936441898346, "learning_rate": 0.00016039711124913718, "loss": 0.6023, "step": 6913 }, { "epoch": 1.2333422531442333, "grad_norm": 0.5353094339370728, "learning_rate": 0.00016033170716787698, "loss": 0.785, "step": 6914 }, { "epoch": 1.2335206493622335, "grad_norm": 0.47947603464126587, "learning_rate": 0.00016026631012912578, "loss": 0.6174, "step": 6915 }, { "epoch": 1.2336990455802337, "grad_norm": 0.5786678791046143, "learning_rate": 0.00016020092013802002, "loss": 0.9001, "step": 6916 }, { "epoch": 1.2338774417982339, "grad_norm": 0.4535296559333801, "learning_rate": 0.00016013553719969537, "loss": 0.5654, "step": 6917 }, { "epoch": 1.234055838016234, "grad_norm": 0.5149961113929749, "learning_rate": 0.00016007016131928703, "loss": 0.6585, "step": 6918 }, { "epoch": 1.2342342342342343, "grad_norm": 0.4400131106376648, "learning_rate": 0.00016000479250192942, "loss": 0.5134, "step": 6919 }, { "epoch": 1.2344126304522345, "grad_norm": 0.4986150562763214, "learning_rate": 0.0001599394307527567, "loss": 0.6103, "step": 6920 }, { "epoch": 1.2345910266702347, "grad_norm": 0.487212210893631, "learning_rate": 0.00015987407607690235, "loss": 0.7115, "step": 6921 }, { "epoch": 1.2347694228882347, "grad_norm": 0.46623316407203674, "learning_rate": 0.0001598087284794993, "loss": 0.5599, "step": 6922 }, { "epoch": 1.2349478191062349, "grad_norm": 0.4859059453010559, "learning_rate": 0.00015974338796567982, "loss": 0.6134, "step": 6923 }, { "epoch": 1.235126215324235, "grad_norm": 0.5123870968818665, "learning_rate": 0.00015967805454057587, "loss": 0.7607, "step": 6924 }, { "epoch": 1.2353046115422353, "grad_norm": 0.49646908044815063, "learning_rate": 0.00015961272820931867, "loss": 0.6267, "step": 6925 }, { "epoch": 1.2354830077602355, "grad_norm": 0.4790204167366028, "learning_rate": 0.0001595474089770389, "loss": 0.6403, "step": 6926 }, { "epoch": 1.2356614039782357, "grad_norm": 0.49380695819854736, "learning_rate": 0.00015948209684886667, "loss": 0.6544, "step": 6927 }, { "epoch": 1.2358398001962358, "grad_norm": 0.5434766411781311, "learning_rate": 0.00015941679182993157, "loss": 0.8058, "step": 6928 }, { "epoch": 1.236018196414236, "grad_norm": 0.6229785680770874, "learning_rate": 0.00015935149392536273, "loss": 0.7764, "step": 6929 }, { "epoch": 1.2361965926322362, "grad_norm": 0.5308107733726501, "learning_rate": 0.00015928620314028838, "loss": 0.6995, "step": 6930 }, { "epoch": 1.2363749888502364, "grad_norm": 0.6573651432991028, "learning_rate": 0.0001592209194798368, "loss": 0.7208, "step": 6931 }, { "epoch": 1.2365533850682366, "grad_norm": 0.5968353748321533, "learning_rate": 0.00015915564294913503, "loss": 0.8999, "step": 6932 }, { "epoch": 1.2367317812862368, "grad_norm": 0.45062559843063354, "learning_rate": 0.00015909037355331003, "loss": 0.5419, "step": 6933 }, { "epoch": 1.2369101775042368, "grad_norm": 0.5150581002235413, "learning_rate": 0.0001590251112974879, "loss": 0.6129, "step": 6934 }, { "epoch": 1.237088573722237, "grad_norm": 0.4370402693748474, "learning_rate": 0.00015895985618679444, "loss": 0.6262, "step": 6935 }, { "epoch": 1.2372669699402372, "grad_norm": 0.5415884852409363, "learning_rate": 0.0001588946082263547, "loss": 0.8408, "step": 6936 }, { "epoch": 1.2374453661582374, "grad_norm": 0.49960747361183167, "learning_rate": 0.0001588293674212932, "loss": 0.7211, "step": 6937 }, { "epoch": 1.2376237623762376, "grad_norm": 0.47522178292274475, "learning_rate": 0.00015876413377673395, "loss": 0.4949, "step": 6938 }, { "epoch": 1.2378021585942378, "grad_norm": 0.5218939781188965, "learning_rate": 0.00015869890729780045, "loss": 0.5721, "step": 6939 }, { "epoch": 1.237980554812238, "grad_norm": 0.5287362933158875, "learning_rate": 0.00015863368798961538, "loss": 0.9253, "step": 6940 }, { "epoch": 1.2381589510302382, "grad_norm": 0.5668135285377502, "learning_rate": 0.00015856847585730117, "loss": 0.8171, "step": 6941 }, { "epoch": 1.2383373472482384, "grad_norm": 0.5259323716163635, "learning_rate": 0.00015850327090597953, "loss": 0.6072, "step": 6942 }, { "epoch": 1.2385157434662386, "grad_norm": 0.4646325409412384, "learning_rate": 0.00015843807314077157, "loss": 0.5816, "step": 6943 }, { "epoch": 1.2386941396842386, "grad_norm": 0.4163079857826233, "learning_rate": 0.00015837288256679799, "loss": 0.4746, "step": 6944 }, { "epoch": 1.2388725359022388, "grad_norm": 0.5029598474502563, "learning_rate": 0.00015830769918917872, "loss": 0.7697, "step": 6945 }, { "epoch": 1.239050932120239, "grad_norm": 0.5194973349571228, "learning_rate": 0.00015824252301303336, "loss": 0.7874, "step": 6946 }, { "epoch": 1.2392293283382392, "grad_norm": 0.4863186478614807, "learning_rate": 0.00015817735404348072, "loss": 0.715, "step": 6947 }, { "epoch": 1.2394077245562394, "grad_norm": 0.4885355830192566, "learning_rate": 0.00015811219228563923, "loss": 0.6721, "step": 6948 }, { "epoch": 1.2395861207742396, "grad_norm": 0.46745818853378296, "learning_rate": 0.00015804703774462657, "loss": 0.6217, "step": 6949 }, { "epoch": 1.2397645169922398, "grad_norm": 0.5246323347091675, "learning_rate": 0.00015798189042556, "loss": 0.7571, "step": 6950 }, { "epoch": 1.23994291321024, "grad_norm": 0.5258684158325195, "learning_rate": 0.0001579167503335562, "loss": 0.8582, "step": 6951 }, { "epoch": 1.2401213094282402, "grad_norm": 0.49560466408729553, "learning_rate": 0.0001578516174737312, "loss": 0.6666, "step": 6952 }, { "epoch": 1.2402997056462404, "grad_norm": 0.4392854571342468, "learning_rate": 0.00015778649185120048, "loss": 0.5967, "step": 6953 }, { "epoch": 1.2404781018642406, "grad_norm": 0.49791455268859863, "learning_rate": 0.000157721373471079, "loss": 0.6931, "step": 6954 }, { "epoch": 1.2406564980822408, "grad_norm": 0.48088231682777405, "learning_rate": 0.00015765626233848115, "loss": 0.733, "step": 6955 }, { "epoch": 1.2408348943002407, "grad_norm": 0.5019537806510925, "learning_rate": 0.00015759115845852072, "loss": 0.8829, "step": 6956 }, { "epoch": 1.241013290518241, "grad_norm": 0.5134669542312622, "learning_rate": 0.0001575260618363109, "loss": 0.7432, "step": 6957 }, { "epoch": 1.2411916867362411, "grad_norm": 0.5204741358757019, "learning_rate": 0.00015746097247696443, "loss": 0.7888, "step": 6958 }, { "epoch": 1.2413700829542413, "grad_norm": 0.5397398471832275, "learning_rate": 0.0001573958903855934, "loss": 0.7761, "step": 6959 }, { "epoch": 1.2415484791722415, "grad_norm": 0.4783801734447479, "learning_rate": 0.00015733081556730926, "loss": 0.7478, "step": 6960 }, { "epoch": 1.2417268753902417, "grad_norm": 0.6109618544578552, "learning_rate": 0.00015726574802722284, "loss": 0.7975, "step": 6961 }, { "epoch": 1.241905271608242, "grad_norm": 0.48145678639411926, "learning_rate": 0.00015720068777044476, "loss": 0.5358, "step": 6962 }, { "epoch": 1.2420836678262421, "grad_norm": 0.5572999715805054, "learning_rate": 0.0001571356348020848, "loss": 0.6544, "step": 6963 }, { "epoch": 1.2422620640442423, "grad_norm": 0.503771185874939, "learning_rate": 0.00015707058912725207, "loss": 0.5603, "step": 6964 }, { "epoch": 1.2424404602622425, "grad_norm": 0.5243847370147705, "learning_rate": 0.00015700555075105532, "loss": 0.6095, "step": 6965 }, { "epoch": 1.2426188564802425, "grad_norm": 0.5380844473838806, "learning_rate": 0.00015694051967860256, "loss": 0.6285, "step": 6966 }, { "epoch": 1.2427972526982427, "grad_norm": 0.5606120824813843, "learning_rate": 0.00015687549591500134, "loss": 0.8092, "step": 6967 }, { "epoch": 1.242975648916243, "grad_norm": 0.5026856660842896, "learning_rate": 0.0001568104794653586, "loss": 0.6013, "step": 6968 }, { "epoch": 1.243154045134243, "grad_norm": 0.47534608840942383, "learning_rate": 0.00015674547033478072, "loss": 0.5524, "step": 6969 }, { "epoch": 1.2433324413522433, "grad_norm": 0.5083494186401367, "learning_rate": 0.00015668046852837343, "loss": 0.7611, "step": 6970 }, { "epoch": 1.2435108375702435, "grad_norm": 0.4332534372806549, "learning_rate": 0.000156615474051242, "loss": 0.4897, "step": 6971 }, { "epoch": 1.2436892337882437, "grad_norm": 0.4423793852329254, "learning_rate": 0.00015655048690849102, "loss": 0.5234, "step": 6972 }, { "epoch": 1.243867630006244, "grad_norm": 0.47594019770622253, "learning_rate": 0.00015648550710522466, "loss": 0.6987, "step": 6973 }, { "epoch": 1.244046026224244, "grad_norm": 0.5025418400764465, "learning_rate": 0.00015642053464654627, "loss": 0.5931, "step": 6974 }, { "epoch": 1.2442244224422443, "grad_norm": 0.48870721459388733, "learning_rate": 0.00015635556953755883, "loss": 0.7561, "step": 6975 }, { "epoch": 1.2444028186602445, "grad_norm": 0.5276898741722107, "learning_rate": 0.00015629061178336456, "loss": 0.5714, "step": 6976 }, { "epoch": 1.2445812148782447, "grad_norm": 0.5232852697372437, "learning_rate": 0.00015622566138906547, "loss": 0.6564, "step": 6977 }, { "epoch": 1.2447596110962447, "grad_norm": 0.41942137479782104, "learning_rate": 0.00015616071835976254, "loss": 0.4946, "step": 6978 }, { "epoch": 1.2449380073142449, "grad_norm": 0.6095328330993652, "learning_rate": 0.00015609578270055636, "loss": 0.6298, "step": 6979 }, { "epoch": 1.245116403532245, "grad_norm": 0.4936719238758087, "learning_rate": 0.00015603085441654702, "loss": 0.744, "step": 6980 }, { "epoch": 1.2452947997502453, "grad_norm": 0.4040975868701935, "learning_rate": 0.00015596593351283394, "loss": 0.5204, "step": 6981 }, { "epoch": 1.2454731959682455, "grad_norm": 0.5431026816368103, "learning_rate": 0.000155901019994516, "loss": 0.7077, "step": 6982 }, { "epoch": 1.2456515921862457, "grad_norm": 0.5668693780899048, "learning_rate": 0.00015583611386669143, "loss": 0.7685, "step": 6983 }, { "epoch": 1.2458299884042459, "grad_norm": 0.4577692747116089, "learning_rate": 0.00015577121513445796, "loss": 0.4953, "step": 6984 }, { "epoch": 1.246008384622246, "grad_norm": 0.5005066394805908, "learning_rate": 0.0001557063238029127, "loss": 0.6231, "step": 6985 }, { "epoch": 1.2461867808402463, "grad_norm": 0.4887985289096832, "learning_rate": 0.00015564143987715224, "loss": 0.5986, "step": 6986 }, { "epoch": 1.2463651770582465, "grad_norm": 0.5014304518699646, "learning_rate": 0.00015557656336227243, "loss": 0.6816, "step": 6987 }, { "epoch": 1.2465435732762464, "grad_norm": 0.4619125425815582, "learning_rate": 0.00015551169426336874, "loss": 0.608, "step": 6988 }, { "epoch": 1.2467219694942466, "grad_norm": 0.4961630702018738, "learning_rate": 0.0001554468325855359, "loss": 0.6304, "step": 6989 }, { "epoch": 1.2469003657122468, "grad_norm": 0.5148522853851318, "learning_rate": 0.00015538197833386814, "loss": 0.81, "step": 6990 }, { "epoch": 1.247078761930247, "grad_norm": 0.414350688457489, "learning_rate": 0.000155317131513459, "loss": 0.4424, "step": 6991 }, { "epoch": 1.2472571581482472, "grad_norm": 0.4806058406829834, "learning_rate": 0.00015525229212940168, "loss": 0.6765, "step": 6992 }, { "epoch": 1.2474355543662474, "grad_norm": 0.5416685342788696, "learning_rate": 0.0001551874601867886, "loss": 0.7167, "step": 6993 }, { "epoch": 1.2476139505842476, "grad_norm": 0.4818612039089203, "learning_rate": 0.00015512263569071152, "loss": 0.6258, "step": 6994 }, { "epoch": 1.2477923468022478, "grad_norm": 0.5225372910499573, "learning_rate": 0.00015505781864626184, "loss": 0.8175, "step": 6995 }, { "epoch": 1.247970743020248, "grad_norm": 0.45656535029411316, "learning_rate": 0.00015499300905853026, "loss": 0.6183, "step": 6996 }, { "epoch": 1.2481491392382482, "grad_norm": 0.5095564126968384, "learning_rate": 0.00015492820693260682, "loss": 0.3982, "step": 6997 }, { "epoch": 1.2483275354562484, "grad_norm": 0.4879413843154907, "learning_rate": 0.00015486341227358114, "loss": 0.5668, "step": 6998 }, { "epoch": 1.2485059316742486, "grad_norm": 0.463022381067276, "learning_rate": 0.00015479862508654212, "loss": 0.6153, "step": 6999 }, { "epoch": 1.2486843278922486, "grad_norm": 52.42532730102539, "learning_rate": 0.0001547338453765781, "loss": 1.0213, "step": 7000 }, { "epoch": 1.2488627241102488, "grad_norm": 0.45679882168769836, "learning_rate": 0.00015466907314877682, "loss": 0.5221, "step": 7001 }, { "epoch": 1.249041120328249, "grad_norm": 0.5205798745155334, "learning_rate": 0.00015460430840822552, "loss": 0.6813, "step": 7002 }, { "epoch": 1.2492195165462492, "grad_norm": 0.510740339756012, "learning_rate": 0.00015453955116001084, "loss": 0.6022, "step": 7003 }, { "epoch": 1.2493979127642494, "grad_norm": 0.5229342579841614, "learning_rate": 0.0001544748014092187, "loss": 0.6381, "step": 7004 }, { "epoch": 1.2495763089822496, "grad_norm": 0.48941877484321594, "learning_rate": 0.00015441005916093454, "loss": 0.6733, "step": 7005 }, { "epoch": 1.2497547052002498, "grad_norm": 0.5428256392478943, "learning_rate": 0.0001543453244202433, "loss": 0.7319, "step": 7006 }, { "epoch": 1.24993310141825, "grad_norm": 0.4766102433204651, "learning_rate": 0.0001542805971922289, "loss": 0.6504, "step": 7007 }, { "epoch": 1.2501114976362502, "grad_norm": 0.573760986328125, "learning_rate": 0.00015421587748197532, "loss": 0.7209, "step": 7008 }, { "epoch": 1.2502898938542502, "grad_norm": 0.5121127963066101, "learning_rate": 0.00015415116529456552, "loss": 0.7118, "step": 7009 }, { "epoch": 1.2504682900722504, "grad_norm": 0.5492568612098694, "learning_rate": 0.00015408646063508197, "loss": 0.7907, "step": 7010 }, { "epoch": 1.2506466862902506, "grad_norm": 0.4394141733646393, "learning_rate": 0.00015402176350860653, "loss": 0.5036, "step": 7011 }, { "epoch": 1.2508250825082508, "grad_norm": 0.4782421886920929, "learning_rate": 0.00015395707392022045, "loss": 0.6055, "step": 7012 }, { "epoch": 1.251003478726251, "grad_norm": 0.5013086795806885, "learning_rate": 0.0001538923918750045, "loss": 0.6325, "step": 7013 }, { "epoch": 1.2511818749442511, "grad_norm": 0.5063600540161133, "learning_rate": 0.00015382771737803866, "loss": 0.7373, "step": 7014 }, { "epoch": 1.2513602711622513, "grad_norm": 0.48954328894615173, "learning_rate": 0.00015376305043440254, "loss": 0.5793, "step": 7015 }, { "epoch": 1.2515386673802515, "grad_norm": 0.4994099736213684, "learning_rate": 0.00015369839104917505, "loss": 0.5759, "step": 7016 }, { "epoch": 1.2517170635982517, "grad_norm": 0.5017921924591064, "learning_rate": 0.00015363373922743444, "loss": 0.6372, "step": 7017 }, { "epoch": 1.251895459816252, "grad_norm": 0.4896945059299469, "learning_rate": 0.0001535690949742585, "loss": 0.5314, "step": 7018 }, { "epoch": 1.2520738560342521, "grad_norm": 0.524643063545227, "learning_rate": 0.00015350445829472444, "loss": 0.7481, "step": 7019 }, { "epoch": 1.2522522522522523, "grad_norm": 0.42429521679878235, "learning_rate": 0.00015343982919390858, "loss": 0.5296, "step": 7020 }, { "epoch": 1.2524306484702525, "grad_norm": 0.44809257984161377, "learning_rate": 0.000153375207676887, "loss": 0.5499, "step": 7021 }, { "epoch": 1.2526090446882527, "grad_norm": 0.45676136016845703, "learning_rate": 0.00015331059374873495, "loss": 0.537, "step": 7022 }, { "epoch": 1.2527874409062527, "grad_norm": 0.5065332651138306, "learning_rate": 0.00015324598741452733, "loss": 0.7571, "step": 7023 }, { "epoch": 1.252965837124253, "grad_norm": 0.4751022160053253, "learning_rate": 0.0001531813886793383, "loss": 0.5616, "step": 7024 }, { "epoch": 1.253144233342253, "grad_norm": 0.5255012512207031, "learning_rate": 0.00015311679754824125, "loss": 0.832, "step": 7025 }, { "epoch": 1.2533226295602533, "grad_norm": 0.4546463191509247, "learning_rate": 0.00015305221402630925, "loss": 0.5618, "step": 7026 }, { "epoch": 1.2535010257782535, "grad_norm": 0.46323102712631226, "learning_rate": 0.00015298763811861466, "loss": 0.501, "step": 7027 }, { "epoch": 1.2536794219962537, "grad_norm": 0.5189341902732849, "learning_rate": 0.0001529230698302292, "loss": 0.6688, "step": 7028 }, { "epoch": 1.253857818214254, "grad_norm": 0.49833574891090393, "learning_rate": 0.0001528585091662241, "loss": 0.6542, "step": 7029 }, { "epoch": 1.254036214432254, "grad_norm": 0.5910803079605103, "learning_rate": 0.00015279395613166985, "loss": 0.8463, "step": 7030 }, { "epoch": 1.254214610650254, "grad_norm": 0.4795231223106384, "learning_rate": 0.00015272941073163647, "loss": 0.709, "step": 7031 }, { "epoch": 1.2543930068682543, "grad_norm": 0.520872950553894, "learning_rate": 0.00015266487297119335, "loss": 0.7132, "step": 7032 }, { "epoch": 1.2545714030862545, "grad_norm": 0.44845151901245117, "learning_rate": 0.00015260034285540915, "loss": 0.5002, "step": 7033 }, { "epoch": 1.2547497993042547, "grad_norm": 0.4645794928073883, "learning_rate": 0.00015253582038935216, "loss": 0.5065, "step": 7034 }, { "epoch": 1.2549281955222549, "grad_norm": 0.4405880272388458, "learning_rate": 0.00015247130557808985, "loss": 0.5645, "step": 7035 }, { "epoch": 1.255106591740255, "grad_norm": 0.4625834822654724, "learning_rate": 0.00015240679842668924, "loss": 0.4968, "step": 7036 }, { "epoch": 1.2552849879582553, "grad_norm": 0.4433553218841553, "learning_rate": 0.00015234229894021666, "loss": 0.5323, "step": 7037 }, { "epoch": 1.2554633841762555, "grad_norm": 0.510197639465332, "learning_rate": 0.0001522778071237379, "loss": 0.7702, "step": 7038 }, { "epoch": 1.2556417803942557, "grad_norm": 0.5122009515762329, "learning_rate": 0.00015221332298231816, "loss": 0.6138, "step": 7039 }, { "epoch": 1.2558201766122559, "grad_norm": 0.47657519578933716, "learning_rate": 0.00015214884652102193, "loss": 0.6609, "step": 7040 }, { "epoch": 1.255998572830256, "grad_norm": 0.4612807631492615, "learning_rate": 0.00015208437774491318, "loss": 0.466, "step": 7041 }, { "epoch": 1.2561769690482563, "grad_norm": 0.5131711959838867, "learning_rate": 0.00015201991665905528, "loss": 0.6583, "step": 7042 }, { "epoch": 1.2563553652662565, "grad_norm": 0.4903299808502197, "learning_rate": 0.00015195546326851096, "loss": 0.5683, "step": 7043 }, { "epoch": 1.2565337614842567, "grad_norm": 0.772181510925293, "learning_rate": 0.00015189101757834235, "loss": 0.58, "step": 7044 }, { "epoch": 1.2567121577022566, "grad_norm": 0.43410158157348633, "learning_rate": 0.00015182657959361107, "loss": 0.5278, "step": 7045 }, { "epoch": 1.2568905539202568, "grad_norm": 0.5446970462799072, "learning_rate": 0.00015176214931937794, "loss": 0.7534, "step": 7046 }, { "epoch": 1.257068950138257, "grad_norm": 0.4579203426837921, "learning_rate": 0.00015169772676070328, "loss": 0.6194, "step": 7047 }, { "epoch": 1.2572473463562572, "grad_norm": 0.4995640516281128, "learning_rate": 0.0001516333119226469, "loss": 0.6539, "step": 7048 }, { "epoch": 1.2574257425742574, "grad_norm": 0.4806896150112152, "learning_rate": 0.00015156890481026787, "loss": 0.4902, "step": 7049 }, { "epoch": 1.2576041387922576, "grad_norm": 0.5618352293968201, "learning_rate": 0.00015150450542862466, "loss": 0.7631, "step": 7050 }, { "epoch": 1.2577825350102578, "grad_norm": 0.679996907711029, "learning_rate": 0.00015144011378277522, "loss": 0.8891, "step": 7051 }, { "epoch": 1.257960931228258, "grad_norm": 0.47723329067230225, "learning_rate": 0.00015137572987777688, "loss": 0.526, "step": 7052 }, { "epoch": 1.258139327446258, "grad_norm": 0.53426593542099, "learning_rate": 0.00015131135371868615, "loss": 0.5509, "step": 7053 }, { "epoch": 1.2583177236642582, "grad_norm": 0.5936741232872009, "learning_rate": 0.0001512469853105593, "loss": 0.6007, "step": 7054 }, { "epoch": 1.2584961198822584, "grad_norm": 0.5116682052612305, "learning_rate": 0.00015118262465845179, "loss": 0.7028, "step": 7055 }, { "epoch": 1.2586745161002586, "grad_norm": 0.5201318264007568, "learning_rate": 0.00015111827176741833, "loss": 0.7666, "step": 7056 }, { "epoch": 1.2588529123182588, "grad_norm": 0.4887928068637848, "learning_rate": 0.0001510539266425133, "loss": 0.6696, "step": 7057 }, { "epoch": 1.259031308536259, "grad_norm": 0.499969482421875, "learning_rate": 0.00015098958928879035, "loss": 0.5775, "step": 7058 }, { "epoch": 1.2592097047542592, "grad_norm": 0.4900054335594177, "learning_rate": 0.0001509252597113024, "loss": 0.7217, "step": 7059 }, { "epoch": 1.2593881009722594, "grad_norm": 0.45761123299598694, "learning_rate": 0.00015086093791510187, "loss": 0.4968, "step": 7060 }, { "epoch": 1.2595664971902596, "grad_norm": 0.4074775278568268, "learning_rate": 0.00015079662390524062, "loss": 0.457, "step": 7061 }, { "epoch": 1.2597448934082598, "grad_norm": 0.5297835469245911, "learning_rate": 0.00015073231768676987, "loss": 0.6604, "step": 7062 }, { "epoch": 1.25992328962626, "grad_norm": 0.5857179164886475, "learning_rate": 0.00015066801926474015, "loss": 0.8372, "step": 7063 }, { "epoch": 1.2601016858442602, "grad_norm": 0.5072324872016907, "learning_rate": 0.00015060372864420147, "loss": 0.6661, "step": 7064 }, { "epoch": 1.2602800820622604, "grad_norm": 0.5058489441871643, "learning_rate": 0.00015053944583020318, "loss": 0.6894, "step": 7065 }, { "epoch": 1.2604584782802606, "grad_norm": 0.4949549436569214, "learning_rate": 0.00015047517082779406, "loss": 0.6834, "step": 7066 }, { "epoch": 1.2606368744982606, "grad_norm": 0.47919708490371704, "learning_rate": 0.0001504109036420221, "loss": 0.6148, "step": 7067 }, { "epoch": 1.2608152707162608, "grad_norm": 0.5355616211891174, "learning_rate": 0.00015034664427793484, "loss": 0.6993, "step": 7068 }, { "epoch": 1.260993666934261, "grad_norm": 0.4291095435619354, "learning_rate": 0.00015028239274057938, "loss": 0.5921, "step": 7069 }, { "epoch": 1.2611720631522612, "grad_norm": 0.5145418047904968, "learning_rate": 0.0001502181490350019, "loss": 0.715, "step": 7070 }, { "epoch": 1.2613504593702614, "grad_norm": 0.48409318923950195, "learning_rate": 0.00015015391316624798, "loss": 0.6167, "step": 7071 }, { "epoch": 1.2615288555882616, "grad_norm": 0.5300613641738892, "learning_rate": 0.0001500896851393628, "loss": 0.7082, "step": 7072 }, { "epoch": 1.2617072518062618, "grad_norm": 0.5523662567138672, "learning_rate": 0.00015002546495939073, "loss": 0.6498, "step": 7073 }, { "epoch": 1.261885648024262, "grad_norm": 0.48872825503349304, "learning_rate": 0.00014996125263137564, "loss": 0.6851, "step": 7074 }, { "epoch": 1.262064044242262, "grad_norm": 0.4612981677055359, "learning_rate": 0.0001498970481603607, "loss": 0.7026, "step": 7075 }, { "epoch": 1.2622424404602621, "grad_norm": 0.47795626521110535, "learning_rate": 0.00014983285155138854, "loss": 0.7194, "step": 7076 }, { "epoch": 1.2624208366782623, "grad_norm": 0.4560318887233734, "learning_rate": 0.00014976866280950107, "loss": 0.587, "step": 7077 }, { "epoch": 1.2625992328962625, "grad_norm": 0.4796944856643677, "learning_rate": 0.00014970448193973979, "loss": 0.6386, "step": 7078 }, { "epoch": 1.2627776291142627, "grad_norm": 0.528049647808075, "learning_rate": 0.00014964030894714525, "loss": 0.5845, "step": 7079 }, { "epoch": 1.262956025332263, "grad_norm": 0.46722978353500366, "learning_rate": 0.0001495761438367577, "loss": 0.6446, "step": 7080 }, { "epoch": 1.2631344215502631, "grad_norm": 0.5087683200836182, "learning_rate": 0.00014951198661361656, "loss": 0.589, "step": 7081 }, { "epoch": 1.2633128177682633, "grad_norm": 0.5003060698509216, "learning_rate": 0.00014944783728276076, "loss": 0.7562, "step": 7082 }, { "epoch": 1.2634912139862635, "grad_norm": 0.5159687399864197, "learning_rate": 0.0001493836958492285, "loss": 0.6961, "step": 7083 }, { "epoch": 1.2636696102042637, "grad_norm": 0.6173499822616577, "learning_rate": 0.0001493195623180575, "loss": 0.7186, "step": 7084 }, { "epoch": 1.263848006422264, "grad_norm": 0.5042861104011536, "learning_rate": 0.00014925543669428478, "loss": 0.6666, "step": 7085 }, { "epoch": 1.2640264026402641, "grad_norm": 0.5728045105934143, "learning_rate": 0.00014919131898294668, "loss": 0.7712, "step": 7086 }, { "epoch": 1.2642047988582643, "grad_norm": 0.46245428919792175, "learning_rate": 0.00014912720918907905, "loss": 0.5197, "step": 7087 }, { "epoch": 1.2643831950762645, "grad_norm": 0.4901646077632904, "learning_rate": 0.00014906310731771697, "loss": 0.6486, "step": 7088 }, { "epoch": 1.2645615912942645, "grad_norm": 0.5244214534759521, "learning_rate": 0.000148999013373895, "loss": 0.6701, "step": 7089 }, { "epoch": 1.2647399875122647, "grad_norm": 0.6227227449417114, "learning_rate": 0.00014893492736264708, "loss": 0.5513, "step": 7090 }, { "epoch": 1.2649183837302649, "grad_norm": 0.4480903148651123, "learning_rate": 0.00014887084928900653, "loss": 0.5382, "step": 7091 }, { "epoch": 1.265096779948265, "grad_norm": 0.41941893100738525, "learning_rate": 0.00014880677915800585, "loss": 0.451, "step": 7092 }, { "epoch": 1.2652751761662653, "grad_norm": 0.44205811619758606, "learning_rate": 0.00014874271697467724, "loss": 0.5072, "step": 7093 }, { "epoch": 1.2654535723842655, "grad_norm": 0.4529136121273041, "learning_rate": 0.00014867866274405204, "loss": 0.5873, "step": 7094 }, { "epoch": 1.2656319686022657, "grad_norm": 0.4797188639640808, "learning_rate": 0.00014861461647116105, "loss": 0.579, "step": 7095 }, { "epoch": 1.2658103648202659, "grad_norm": 0.47483915090560913, "learning_rate": 0.00014855057816103452, "loss": 0.6112, "step": 7096 }, { "epoch": 1.2659887610382659, "grad_norm": 0.6069769263267517, "learning_rate": 0.00014848654781870186, "loss": 0.6348, "step": 7097 }, { "epoch": 1.266167157256266, "grad_norm": 0.6138676404953003, "learning_rate": 0.00014842252544919205, "loss": 0.5382, "step": 7098 }, { "epoch": 1.2663455534742663, "grad_norm": 0.5163396596908569, "learning_rate": 0.00014835851105753333, "loss": 0.6694, "step": 7099 }, { "epoch": 1.2665239496922664, "grad_norm": 0.5151557922363281, "learning_rate": 0.0001482945046487535, "loss": 0.7113, "step": 7100 }, { "epoch": 1.2667023459102666, "grad_norm": 0.5661108493804932, "learning_rate": 0.00014823050622787948, "loss": 0.8239, "step": 7101 }, { "epoch": 1.2668807421282668, "grad_norm": 0.7180517315864563, "learning_rate": 0.00014816651579993773, "loss": 0.8975, "step": 7102 }, { "epoch": 1.267059138346267, "grad_norm": 0.5672160983085632, "learning_rate": 0.000148102533369954, "loss": 0.6035, "step": 7103 }, { "epoch": 1.2672375345642672, "grad_norm": 0.47604504227638245, "learning_rate": 0.0001480385589429535, "loss": 0.5376, "step": 7104 }, { "epoch": 1.2674159307822674, "grad_norm": 0.6531439423561096, "learning_rate": 0.0001479745925239606, "loss": 0.5673, "step": 7105 }, { "epoch": 1.2675943270002676, "grad_norm": 0.5715680122375488, "learning_rate": 0.0001479106341179994, "loss": 0.7327, "step": 7106 }, { "epoch": 1.2677727232182678, "grad_norm": 0.5460495948791504, "learning_rate": 0.00014784668373009298, "loss": 0.5201, "step": 7107 }, { "epoch": 1.267951119436268, "grad_norm": 0.5381412506103516, "learning_rate": 0.00014778274136526408, "loss": 0.7042, "step": 7108 }, { "epoch": 1.2681295156542682, "grad_norm": 0.49676334857940674, "learning_rate": 0.00014771880702853468, "loss": 0.5428, "step": 7109 }, { "epoch": 1.2683079118722684, "grad_norm": 0.726411759853363, "learning_rate": 0.00014765488072492617, "loss": 0.6615, "step": 7110 }, { "epoch": 1.2684863080902684, "grad_norm": 0.5252700448036194, "learning_rate": 0.00014759096245945929, "loss": 0.6958, "step": 7111 }, { "epoch": 1.2686647043082686, "grad_norm": 1.2057543992996216, "learning_rate": 0.0001475270522371542, "loss": 0.4825, "step": 7112 }, { "epoch": 1.2688431005262688, "grad_norm": 0.4835338592529297, "learning_rate": 0.00014746315006303027, "loss": 0.6651, "step": 7113 }, { "epoch": 1.269021496744269, "grad_norm": 3.452477216720581, "learning_rate": 0.0001473992559421063, "loss": 0.7079, "step": 7114 }, { "epoch": 1.2691998929622692, "grad_norm": 0.5416814088821411, "learning_rate": 0.00014733536987940075, "loss": 0.5033, "step": 7115 }, { "epoch": 1.2693782891802694, "grad_norm": 0.46173161268234253, "learning_rate": 0.00014727149187993105, "loss": 0.6123, "step": 7116 }, { "epoch": 1.2695566853982696, "grad_norm": 0.4671329855918884, "learning_rate": 0.00014720762194871424, "loss": 0.6949, "step": 7117 }, { "epoch": 1.2697350816162698, "grad_norm": 0.5171025395393372, "learning_rate": 0.00014714376009076647, "loss": 0.6301, "step": 7118 }, { "epoch": 1.2699134778342698, "grad_norm": 0.47935229539871216, "learning_rate": 0.00014707990631110355, "loss": 0.6334, "step": 7119 }, { "epoch": 1.27009187405227, "grad_norm": 0.4533785283565521, "learning_rate": 0.0001470160606147405, "loss": 0.5618, "step": 7120 }, { "epoch": 1.2702702702702702, "grad_norm": 0.5093548893928528, "learning_rate": 0.0001469522230066917, "loss": 0.7452, "step": 7121 }, { "epoch": 1.2704486664882704, "grad_norm": 0.43215861916542053, "learning_rate": 0.000146888393491971, "loss": 0.599, "step": 7122 }, { "epoch": 1.2706270627062706, "grad_norm": 0.46486908197402954, "learning_rate": 0.0001468245720755915, "loss": 0.6116, "step": 7123 }, { "epoch": 1.2708054589242708, "grad_norm": 0.5231932401657104, "learning_rate": 0.00014676075876256567, "loss": 0.5941, "step": 7124 }, { "epoch": 1.270983855142271, "grad_norm": 0.5373736619949341, "learning_rate": 0.00014669695355790552, "loss": 0.5963, "step": 7125 }, { "epoch": 1.2711622513602712, "grad_norm": 0.5684579014778137, "learning_rate": 0.00014663315646662212, "loss": 0.6317, "step": 7126 }, { "epoch": 1.2713406475782714, "grad_norm": 0.5242096185684204, "learning_rate": 0.00014656936749372614, "loss": 0.8106, "step": 7127 }, { "epoch": 1.2715190437962716, "grad_norm": 0.5143678188323975, "learning_rate": 0.00014650558664422748, "loss": 0.665, "step": 7128 }, { "epoch": 1.2716974400142718, "grad_norm": 0.5314942598342896, "learning_rate": 0.0001464418139231355, "loss": 0.633, "step": 7129 }, { "epoch": 1.271875836232272, "grad_norm": 0.5419167876243591, "learning_rate": 0.0001463780493354589, "loss": 0.6588, "step": 7130 }, { "epoch": 1.2720542324502722, "grad_norm": 1.082300066947937, "learning_rate": 0.00014631429288620575, "loss": 0.7633, "step": 7131 }, { "epoch": 1.2722326286682724, "grad_norm": 0.5155816674232483, "learning_rate": 0.0001462505445803834, "loss": 0.6571, "step": 7132 }, { "epoch": 1.2724110248862723, "grad_norm": 0.7917349338531494, "learning_rate": 0.00014618680442299864, "loss": 0.5941, "step": 7133 }, { "epoch": 1.2725894211042725, "grad_norm": 0.5167495012283325, "learning_rate": 0.00014612307241905758, "loss": 0.7426, "step": 7134 }, { "epoch": 1.2727678173222727, "grad_norm": 0.6022337675094604, "learning_rate": 0.00014605934857356571, "loss": 0.7413, "step": 7135 }, { "epoch": 1.272946213540273, "grad_norm": 0.4629674553871155, "learning_rate": 0.0001459956328915279, "loss": 0.5869, "step": 7136 }, { "epoch": 1.2731246097582731, "grad_norm": 0.4646163582801819, "learning_rate": 0.00014593192537794834, "loss": 0.52, "step": 7137 }, { "epoch": 1.2733030059762733, "grad_norm": 0.4827001988887787, "learning_rate": 0.00014586822603783047, "loss": 0.6536, "step": 7138 }, { "epoch": 1.2734814021942735, "grad_norm": 0.442564457654953, "learning_rate": 0.00014580453487617745, "loss": 0.5261, "step": 7139 }, { "epoch": 1.2736597984122737, "grad_norm": 0.5968412756919861, "learning_rate": 0.0001457408518979913, "loss": 0.8212, "step": 7140 }, { "epoch": 1.2738381946302737, "grad_norm": 0.5755636692047119, "learning_rate": 0.00014567717710827388, "loss": 0.8274, "step": 7141 }, { "epoch": 1.274016590848274, "grad_norm": 0.5358353853225708, "learning_rate": 0.0001456135105120261, "loss": 0.7647, "step": 7142 }, { "epoch": 1.274194987066274, "grad_norm": 0.45210355520248413, "learning_rate": 0.00014554985211424814, "loss": 0.582, "step": 7143 }, { "epoch": 1.2743733832842743, "grad_norm": 0.5111218094825745, "learning_rate": 0.00014548620191994, "loss": 0.7459, "step": 7144 }, { "epoch": 1.2745517795022745, "grad_norm": 0.4721735715866089, "learning_rate": 0.00014542255993410034, "loss": 0.6472, "step": 7145 }, { "epoch": 1.2747301757202747, "grad_norm": 0.4758826196193695, "learning_rate": 0.000145358926161728, "loss": 0.5357, "step": 7146 }, { "epoch": 1.274908571938275, "grad_norm": 0.5536307096481323, "learning_rate": 0.00014529530060782066, "loss": 0.7217, "step": 7147 }, { "epoch": 1.275086968156275, "grad_norm": 0.465385377407074, "learning_rate": 0.00014523168327737517, "loss": 0.5545, "step": 7148 }, { "epoch": 1.2752653643742753, "grad_norm": 0.47780296206474304, "learning_rate": 0.0001451680741753883, "loss": 0.6572, "step": 7149 }, { "epoch": 1.2754437605922755, "grad_norm": 0.4876740276813507, "learning_rate": 0.00014510447330685572, "loss": 0.6728, "step": 7150 }, { "epoch": 1.2756221568102757, "grad_norm": 0.5187113881111145, "learning_rate": 0.00014504088067677273, "loss": 0.573, "step": 7151 }, { "epoch": 1.2758005530282759, "grad_norm": 0.46036651730537415, "learning_rate": 0.00014497729629013367, "loss": 0.6419, "step": 7152 }, { "epoch": 1.275978949246276, "grad_norm": 0.49554046988487244, "learning_rate": 0.0001449137201519327, "loss": 0.7062, "step": 7153 }, { "epoch": 1.2761573454642763, "grad_norm": 0.44751298427581787, "learning_rate": 0.00014485015226716296, "loss": 0.5182, "step": 7154 }, { "epoch": 1.2763357416822763, "grad_norm": 0.4852393567562103, "learning_rate": 0.0001447865926408169, "loss": 0.6061, "step": 7155 }, { "epoch": 1.2765141379002765, "grad_norm": 0.49661117792129517, "learning_rate": 0.00014472304127788663, "loss": 0.7156, "step": 7156 }, { "epoch": 1.2766925341182767, "grad_norm": 0.4922674298286438, "learning_rate": 0.00014465949818336332, "loss": 0.6471, "step": 7157 }, { "epoch": 1.2768709303362769, "grad_norm": 0.4639200270175934, "learning_rate": 0.0001445959633622378, "loss": 0.5754, "step": 7158 }, { "epoch": 1.277049326554277, "grad_norm": 0.4920192360877991, "learning_rate": 0.00014453243681949985, "loss": 0.5749, "step": 7159 }, { "epoch": 1.2772277227722773, "grad_norm": 0.5195376873016357, "learning_rate": 0.00014446891856013895, "loss": 0.6442, "step": 7160 }, { "epoch": 1.2774061189902775, "grad_norm": 0.5274832844734192, "learning_rate": 0.00014440540858914384, "loss": 0.6963, "step": 7161 }, { "epoch": 1.2775845152082776, "grad_norm": 0.538936972618103, "learning_rate": 0.0001443419069115024, "loss": 0.7555, "step": 7162 }, { "epoch": 1.2777629114262776, "grad_norm": 0.4810023009777069, "learning_rate": 0.00014427841353220223, "loss": 0.5429, "step": 7163 }, { "epoch": 1.2779413076442778, "grad_norm": 0.49055951833724976, "learning_rate": 0.00014421492845622985, "loss": 0.6905, "step": 7164 }, { "epoch": 1.278119703862278, "grad_norm": 0.4986650049686432, "learning_rate": 0.0001441514516885716, "loss": 0.8251, "step": 7165 }, { "epoch": 1.2782981000802782, "grad_norm": 0.47552618384361267, "learning_rate": 0.00014408798323421268, "loss": 0.5881, "step": 7166 }, { "epoch": 1.2784764962982784, "grad_norm": 0.4542260468006134, "learning_rate": 0.00014402452309813808, "loss": 0.549, "step": 7167 }, { "epoch": 1.2786548925162786, "grad_norm": 0.44126781821250916, "learning_rate": 0.00014396107128533182, "loss": 0.499, "step": 7168 }, { "epoch": 1.2788332887342788, "grad_norm": 0.45107805728912354, "learning_rate": 0.00014389762780077725, "loss": 0.5451, "step": 7169 }, { "epoch": 1.279011684952279, "grad_norm": 0.5026717185974121, "learning_rate": 0.00014383419264945747, "loss": 0.634, "step": 7170 }, { "epoch": 1.2791900811702792, "grad_norm": 0.4510766267776489, "learning_rate": 0.00014377076583635442, "loss": 0.5339, "step": 7171 }, { "epoch": 1.2793684773882794, "grad_norm": 0.40873438119888306, "learning_rate": 0.0001437073473664498, "loss": 0.4696, "step": 7172 }, { "epoch": 1.2795468736062796, "grad_norm": 0.48713698983192444, "learning_rate": 0.0001436439372447243, "loss": 0.5792, "step": 7173 }, { "epoch": 1.2797252698242798, "grad_norm": 0.45994314551353455, "learning_rate": 0.00014358053547615824, "loss": 0.5733, "step": 7174 }, { "epoch": 1.27990366604228, "grad_norm": 0.5316993594169617, "learning_rate": 0.00014351714206573107, "loss": 0.726, "step": 7175 }, { "epoch": 1.2800820622602802, "grad_norm": 0.6356591582298279, "learning_rate": 0.00014345375701842173, "loss": 0.5762, "step": 7176 }, { "epoch": 1.2802604584782802, "grad_norm": 0.5784276723861694, "learning_rate": 0.00014339038033920858, "loss": 0.6256, "step": 7177 }, { "epoch": 1.2804388546962804, "grad_norm": 0.4795292615890503, "learning_rate": 0.00014332701203306896, "loss": 0.6083, "step": 7178 }, { "epoch": 1.2806172509142806, "grad_norm": 0.46686503291130066, "learning_rate": 0.00014326365210498001, "loss": 0.5921, "step": 7179 }, { "epoch": 1.2807956471322808, "grad_norm": 0.4302233159542084, "learning_rate": 0.0001432003005599179, "loss": 0.5002, "step": 7180 }, { "epoch": 1.280974043350281, "grad_norm": 0.5263996124267578, "learning_rate": 0.00014313695740285814, "loss": 0.7031, "step": 7181 }, { "epoch": 1.2811524395682812, "grad_norm": 0.48142942786216736, "learning_rate": 0.00014307362263877581, "loss": 0.5069, "step": 7182 }, { "epoch": 1.2813308357862814, "grad_norm": 0.503164529800415, "learning_rate": 0.00014301029627264512, "loss": 0.6391, "step": 7183 }, { "epoch": 1.2815092320042816, "grad_norm": 0.4538707137107849, "learning_rate": 0.00014294697830943975, "loss": 0.4964, "step": 7184 }, { "epoch": 1.2816876282222815, "grad_norm": 0.5014795660972595, "learning_rate": 0.00014288366875413256, "loss": 0.7765, "step": 7185 }, { "epoch": 1.2818660244402817, "grad_norm": 0.5531452298164368, "learning_rate": 0.00014282036761169604, "loss": 0.6846, "step": 7186 }, { "epoch": 1.282044420658282, "grad_norm": 0.6034454703330994, "learning_rate": 0.0001427570748871016, "loss": 0.7885, "step": 7187 }, { "epoch": 1.2822228168762821, "grad_norm": 0.4677254557609558, "learning_rate": 0.0001426937905853205, "loss": 0.6689, "step": 7188 }, { "epoch": 1.2824012130942823, "grad_norm": 0.4888259172439575, "learning_rate": 0.00014263051471132286, "loss": 0.5989, "step": 7189 }, { "epoch": 1.2825796093122825, "grad_norm": 0.5150851607322693, "learning_rate": 0.0001425672472700783, "loss": 0.7595, "step": 7190 }, { "epoch": 1.2827580055302827, "grad_norm": 1.0994735956192017, "learning_rate": 0.00014250398826655593, "loss": 0.7208, "step": 7191 }, { "epoch": 1.282936401748283, "grad_norm": 0.5045804977416992, "learning_rate": 0.00014244073770572403, "loss": 0.7697, "step": 7192 }, { "epoch": 1.2831147979662831, "grad_norm": 0.7239548563957214, "learning_rate": 0.00014237749559255043, "loss": 0.7041, "step": 7193 }, { "epoch": 1.2832931941842833, "grad_norm": 0.4901469647884369, "learning_rate": 0.000142314261932002, "loss": 0.7087, "step": 7194 }, { "epoch": 1.2834715904022835, "grad_norm": 0.5142937302589417, "learning_rate": 0.000142251036729045, "loss": 0.6189, "step": 7195 }, { "epoch": 1.2836499866202837, "grad_norm": 0.6226754784584045, "learning_rate": 0.00014218781998864526, "loss": 0.8731, "step": 7196 }, { "epoch": 1.283828382838284, "grad_norm": 0.48372164368629456, "learning_rate": 0.00014212461171576768, "loss": 0.7479, "step": 7197 }, { "epoch": 1.2840067790562841, "grad_norm": 0.5454527139663696, "learning_rate": 0.0001420614119153768, "loss": 0.6334, "step": 7198 }, { "epoch": 1.284185175274284, "grad_norm": 0.5037645101547241, "learning_rate": 0.00014199822059243606, "loss": 0.5815, "step": 7199 }, { "epoch": 1.2843635714922843, "grad_norm": 0.5184837579727173, "learning_rate": 0.00014193503775190868, "loss": 0.7455, "step": 7200 }, { "epoch": 1.2845419677102845, "grad_norm": 0.52362459897995, "learning_rate": 0.00014187186339875696, "loss": 0.6227, "step": 7201 }, { "epoch": 1.2847203639282847, "grad_norm": 0.3797712028026581, "learning_rate": 0.00014180869753794247, "loss": 0.4564, "step": 7202 }, { "epoch": 1.284898760146285, "grad_norm": 0.49934831261634827, "learning_rate": 0.00014174554017442638, "loss": 0.6207, "step": 7203 }, { "epoch": 1.285077156364285, "grad_norm": 0.5866370797157288, "learning_rate": 0.0001416823913131689, "loss": 0.6251, "step": 7204 }, { "epoch": 1.2852555525822853, "grad_norm": 0.5229966640472412, "learning_rate": 0.00014161925095912986, "loss": 0.6002, "step": 7205 }, { "epoch": 1.2854339488002855, "grad_norm": 0.5066050887107849, "learning_rate": 0.00014155611911726814, "loss": 0.6648, "step": 7206 }, { "epoch": 1.2856123450182855, "grad_norm": 0.547694981098175, "learning_rate": 0.00014149299579254215, "loss": 0.7228, "step": 7207 }, { "epoch": 1.2857907412362857, "grad_norm": 0.475382000207901, "learning_rate": 0.00014142988098990968, "loss": 0.6569, "step": 7208 }, { "epoch": 1.2859691374542859, "grad_norm": 0.4719434082508087, "learning_rate": 0.00014136677471432755, "loss": 0.6066, "step": 7209 }, { "epoch": 1.286147533672286, "grad_norm": 0.4972936511039734, "learning_rate": 0.00014130367697075225, "loss": 0.6338, "step": 7210 }, { "epoch": 1.2863259298902863, "grad_norm": 0.4694492220878601, "learning_rate": 0.0001412405877641393, "loss": 0.6117, "step": 7211 }, { "epoch": 1.2865043261082865, "grad_norm": 0.4956999719142914, "learning_rate": 0.00014117750709944388, "loss": 0.706, "step": 7212 }, { "epoch": 1.2866827223262867, "grad_norm": 0.49469372630119324, "learning_rate": 0.00014111443498162013, "loss": 0.7405, "step": 7213 }, { "epoch": 1.2868611185442869, "grad_norm": 0.48914238810539246, "learning_rate": 0.00014105137141562192, "loss": 0.6415, "step": 7214 }, { "epoch": 1.287039514762287, "grad_norm": 0.43339380621910095, "learning_rate": 0.0001409883164064021, "loss": 0.4761, "step": 7215 }, { "epoch": 1.2872179109802873, "grad_norm": 0.5167938470840454, "learning_rate": 0.0001409252699589129, "loss": 0.7187, "step": 7216 }, { "epoch": 1.2873963071982875, "grad_norm": 0.45789870619773865, "learning_rate": 0.00014086223207810614, "loss": 0.5403, "step": 7217 }, { "epoch": 1.2875747034162877, "grad_norm": 0.49316656589508057, "learning_rate": 0.00014079920276893263, "loss": 0.6763, "step": 7218 }, { "epoch": 1.2877530996342879, "grad_norm": 0.4639813303947449, "learning_rate": 0.00014073618203634282, "loss": 0.5242, "step": 7219 }, { "epoch": 1.287931495852288, "grad_norm": 0.47487959265708923, "learning_rate": 0.00014067316988528616, "loss": 0.5721, "step": 7220 }, { "epoch": 1.288109892070288, "grad_norm": 0.4365386664867401, "learning_rate": 0.00014061016632071173, "loss": 0.4473, "step": 7221 }, { "epoch": 1.2882882882882882, "grad_norm": 0.5029173493385315, "learning_rate": 0.0001405471713475678, "loss": 0.7293, "step": 7222 }, { "epoch": 1.2884666845062884, "grad_norm": 0.6281735301017761, "learning_rate": 0.00014048418497080185, "loss": 0.7677, "step": 7223 }, { "epoch": 1.2886450807242886, "grad_norm": 0.5290473103523254, "learning_rate": 0.000140421207195361, "loss": 0.6217, "step": 7224 }, { "epoch": 1.2888234769422888, "grad_norm": 0.4867291748523712, "learning_rate": 0.00014035823802619127, "loss": 0.6605, "step": 7225 }, { "epoch": 1.289001873160289, "grad_norm": 0.5893452167510986, "learning_rate": 0.00014029527746823846, "loss": 0.6862, "step": 7226 }, { "epoch": 1.2891802693782892, "grad_norm": 0.5156494975090027, "learning_rate": 0.00014023232552644733, "loss": 0.658, "step": 7227 }, { "epoch": 1.2893586655962894, "grad_norm": 0.48713207244873047, "learning_rate": 0.00014016938220576204, "loss": 0.6116, "step": 7228 }, { "epoch": 1.2895370618142896, "grad_norm": 0.4963529109954834, "learning_rate": 0.00014010644751112628, "loss": 0.5374, "step": 7229 }, { "epoch": 1.2897154580322896, "grad_norm": 0.5015867948532104, "learning_rate": 0.00014004352144748273, "loss": 0.6357, "step": 7230 }, { "epoch": 1.2898938542502898, "grad_norm": 0.5196330547332764, "learning_rate": 0.0001399806040197738, "loss": 0.6529, "step": 7231 }, { "epoch": 1.29007225046829, "grad_norm": 0.6642472147941589, "learning_rate": 0.00013991769523294078, "loss": 0.6634, "step": 7232 }, { "epoch": 1.2902506466862902, "grad_norm": 0.5459299087524414, "learning_rate": 0.00013985479509192472, "loss": 0.7802, "step": 7233 }, { "epoch": 1.2904290429042904, "grad_norm": 0.5235913991928101, "learning_rate": 0.00013979190360166566, "loss": 0.6494, "step": 7234 }, { "epoch": 1.2906074391222906, "grad_norm": 0.4864553213119507, "learning_rate": 0.00013972902076710297, "loss": 0.5321, "step": 7235 }, { "epoch": 1.2907858353402908, "grad_norm": 0.4859999418258667, "learning_rate": 0.0001396661465931755, "loss": 0.5942, "step": 7236 }, { "epoch": 1.290964231558291, "grad_norm": 0.4459374248981476, "learning_rate": 0.00013960328108482146, "loss": 0.5264, "step": 7237 }, { "epoch": 1.2911426277762912, "grad_norm": 0.5186498165130615, "learning_rate": 0.00013954042424697827, "loss": 0.7167, "step": 7238 }, { "epoch": 1.2913210239942914, "grad_norm": 0.49951282143592834, "learning_rate": 0.00013947757608458262, "loss": 0.5215, "step": 7239 }, { "epoch": 1.2914994202122916, "grad_norm": 0.4620944857597351, "learning_rate": 0.00013941473660257047, "loss": 0.7108, "step": 7240 }, { "epoch": 1.2916778164302918, "grad_norm": 0.4689721167087555, "learning_rate": 0.00013935190580587745, "loss": 0.6203, "step": 7241 }, { "epoch": 1.291856212648292, "grad_norm": 0.43373170495033264, "learning_rate": 0.00013928908369943802, "loss": 0.5734, "step": 7242 }, { "epoch": 1.292034608866292, "grad_norm": 0.48551568388938904, "learning_rate": 0.00013922627028818642, "loss": 0.6898, "step": 7243 }, { "epoch": 1.2922130050842922, "grad_norm": 0.4706454873085022, "learning_rate": 0.00013916346557705579, "loss": 0.6658, "step": 7244 }, { "epoch": 1.2923914013022924, "grad_norm": 0.47324392199516296, "learning_rate": 0.00013910066957097895, "loss": 0.6189, "step": 7245 }, { "epoch": 1.2925697975202926, "grad_norm": 0.4348990321159363, "learning_rate": 0.00013903788227488773, "loss": 0.5601, "step": 7246 }, { "epoch": 1.2927481937382928, "grad_norm": 0.4262705445289612, "learning_rate": 0.00013897510369371359, "loss": 0.4819, "step": 7247 }, { "epoch": 1.292926589956293, "grad_norm": 0.5076077580451965, "learning_rate": 0.000138912333832387, "loss": 0.8249, "step": 7248 }, { "epoch": 1.2931049861742931, "grad_norm": 0.44083496928215027, "learning_rate": 0.00013884957269583777, "loss": 0.5899, "step": 7249 }, { "epoch": 1.2932833823922933, "grad_norm": 0.5185807347297668, "learning_rate": 0.00013878682028899543, "loss": 0.6274, "step": 7250 }, { "epoch": 1.2934617786102935, "grad_norm": 0.46207600831985474, "learning_rate": 0.00013872407661678825, "loss": 0.6729, "step": 7251 }, { "epoch": 1.2936401748282935, "grad_norm": 0.49033087491989136, "learning_rate": 0.00013866134168414421, "loss": 0.5321, "step": 7252 }, { "epoch": 1.2938185710462937, "grad_norm": 0.728157103061676, "learning_rate": 0.00013859861549599058, "loss": 0.4031, "step": 7253 }, { "epoch": 1.293996967264294, "grad_norm": 0.5120450258255005, "learning_rate": 0.00013853589805725363, "loss": 0.6634, "step": 7254 }, { "epoch": 1.2941753634822941, "grad_norm": 0.4849170446395874, "learning_rate": 0.00013847318937285942, "loss": 0.6161, "step": 7255 }, { "epoch": 1.2943537597002943, "grad_norm": 0.5624775886535645, "learning_rate": 0.00013841048944773278, "loss": 0.6795, "step": 7256 }, { "epoch": 1.2945321559182945, "grad_norm": 0.5056706070899963, "learning_rate": 0.00013834779828679838, "loss": 0.7651, "step": 7257 }, { "epoch": 1.2947105521362947, "grad_norm": 0.5136926174163818, "learning_rate": 0.00013828511589497977, "loss": 0.8467, "step": 7258 }, { "epoch": 1.294888948354295, "grad_norm": 0.4533271789550781, "learning_rate": 0.0001382224422772002, "loss": 0.536, "step": 7259 }, { "epoch": 1.295067344572295, "grad_norm": 0.4658295214176178, "learning_rate": 0.00013815977743838188, "loss": 0.5347, "step": 7260 }, { "epoch": 1.2952457407902953, "grad_norm": 0.4228724539279938, "learning_rate": 0.00013809712138344643, "loss": 0.5316, "step": 7261 }, { "epoch": 1.2954241370082955, "grad_norm": 0.4298241436481476, "learning_rate": 0.000138034474117315, "loss": 0.4455, "step": 7262 }, { "epoch": 1.2956025332262957, "grad_norm": 0.4992910623550415, "learning_rate": 0.00013797183564490773, "loss": 0.6047, "step": 7263 }, { "epoch": 1.295780929444296, "grad_norm": 0.49004730582237244, "learning_rate": 0.00013790920597114433, "loss": 0.6561, "step": 7264 }, { "epoch": 1.2959593256622959, "grad_norm": 1.2394171953201294, "learning_rate": 0.00013784658510094356, "loss": 0.5966, "step": 7265 }, { "epoch": 1.296137721880296, "grad_norm": 0.40918824076652527, "learning_rate": 0.00013778397303922387, "loss": 0.4365, "step": 7266 }, { "epoch": 1.2963161180982963, "grad_norm": 0.521958589553833, "learning_rate": 0.0001377213697909025, "loss": 0.6175, "step": 7267 }, { "epoch": 1.2964945143162965, "grad_norm": 0.594232976436615, "learning_rate": 0.00013765877536089648, "loss": 0.7357, "step": 7268 }, { "epoch": 1.2966729105342967, "grad_norm": 0.6050662398338318, "learning_rate": 0.00013759618975412198, "loss": 0.8643, "step": 7269 }, { "epoch": 1.2968513067522969, "grad_norm": 0.4976935088634491, "learning_rate": 0.00013753361297549421, "loss": 0.5672, "step": 7270 }, { "epoch": 1.297029702970297, "grad_norm": 0.4690896272659302, "learning_rate": 0.00013747104502992823, "loss": 0.5142, "step": 7271 }, { "epoch": 1.2972080991882973, "grad_norm": 0.5143970251083374, "learning_rate": 0.00013740848592233785, "loss": 0.6846, "step": 7272 }, { "epoch": 1.2973864954062975, "grad_norm": 0.5174264907836914, "learning_rate": 0.00013734593565763664, "loss": 0.6434, "step": 7273 }, { "epoch": 1.2975648916242974, "grad_norm": 0.4763343334197998, "learning_rate": 0.00013728339424073715, "loss": 0.6206, "step": 7274 }, { "epoch": 1.2977432878422976, "grad_norm": 0.44902318716049194, "learning_rate": 0.00013722086167655128, "loss": 0.4841, "step": 7275 }, { "epoch": 1.2979216840602978, "grad_norm": 0.6365222930908203, "learning_rate": 0.0001371583379699905, "loss": 0.7602, "step": 7276 }, { "epoch": 1.298100080278298, "grad_norm": 0.49694737792015076, "learning_rate": 0.0001370958231259652, "loss": 0.5375, "step": 7277 }, { "epoch": 1.2982784764962982, "grad_norm": 0.507412850856781, "learning_rate": 0.00013703331714938545, "loss": 0.6576, "step": 7278 }, { "epoch": 1.2984568727142984, "grad_norm": 0.45106565952301025, "learning_rate": 0.00013697082004516026, "loss": 0.5242, "step": 7279 }, { "epoch": 1.2986352689322986, "grad_norm": 0.4997382164001465, "learning_rate": 0.00013690833181819834, "loss": 0.6324, "step": 7280 }, { "epoch": 1.2988136651502988, "grad_norm": 0.4807584583759308, "learning_rate": 0.00013684585247340734, "loss": 0.6275, "step": 7281 }, { "epoch": 1.298992061368299, "grad_norm": 0.5849037170410156, "learning_rate": 0.00013678338201569422, "loss": 0.7473, "step": 7282 }, { "epoch": 1.2991704575862992, "grad_norm": 0.5092651844024658, "learning_rate": 0.00013672092044996576, "loss": 0.6442, "step": 7283 }, { "epoch": 1.2993488538042994, "grad_norm": 0.45579296350479126, "learning_rate": 0.00013665846778112734, "loss": 0.4597, "step": 7284 }, { "epoch": 1.2995272500222996, "grad_norm": 0.5193167924880981, "learning_rate": 0.00013659602401408416, "loss": 0.614, "step": 7285 }, { "epoch": 1.2997056462402998, "grad_norm": 0.511915922164917, "learning_rate": 0.0001365335891537405, "loss": 0.69, "step": 7286 }, { "epoch": 1.2998840424582998, "grad_norm": 0.47553443908691406, "learning_rate": 0.0001364711632049998, "loss": 0.5546, "step": 7287 }, { "epoch": 1.3000624386763, "grad_norm": 0.4340362250804901, "learning_rate": 0.00013640874617276523, "loss": 0.4619, "step": 7288 }, { "epoch": 1.3002408348943002, "grad_norm": 0.558949887752533, "learning_rate": 0.00013634633806193868, "loss": 1.1185, "step": 7289 }, { "epoch": 1.3004192311123004, "grad_norm": 0.45011255145072937, "learning_rate": 0.00013628393887742197, "loss": 0.5726, "step": 7290 }, { "epoch": 1.3005976273303006, "grad_norm": 0.44671180844306946, "learning_rate": 0.00013622154862411568, "loss": 0.549, "step": 7291 }, { "epoch": 1.3007760235483008, "grad_norm": 0.5005772709846497, "learning_rate": 0.00013615916730692006, "loss": 0.6959, "step": 7292 }, { "epoch": 1.300954419766301, "grad_norm": 0.5107164978981018, "learning_rate": 0.00013609679493073435, "loss": 0.5958, "step": 7293 }, { "epoch": 1.3011328159843012, "grad_norm": 0.46262556314468384, "learning_rate": 0.00013603443150045745, "loss": 0.531, "step": 7294 }, { "epoch": 1.3013112122023014, "grad_norm": 0.49760428071022034, "learning_rate": 0.0001359720770209873, "loss": 0.5535, "step": 7295 }, { "epoch": 1.3014896084203014, "grad_norm": 0.5045909881591797, "learning_rate": 0.00013590973149722103, "loss": 0.737, "step": 7296 }, { "epoch": 1.3016680046383016, "grad_norm": 0.4710780680179596, "learning_rate": 0.00013584739493405546, "loss": 0.6419, "step": 7297 }, { "epoch": 1.3018464008563018, "grad_norm": 0.5229588747024536, "learning_rate": 0.00013578506733638622, "loss": 0.6623, "step": 7298 }, { "epoch": 1.302024797074302, "grad_norm": 0.48090076446533203, "learning_rate": 0.0001357227487091087, "loss": 0.645, "step": 7299 }, { "epoch": 1.3022031932923022, "grad_norm": 0.4656026363372803, "learning_rate": 0.0001356604390571174, "loss": 0.482, "step": 7300 }, { "epoch": 1.3023815895103024, "grad_norm": 0.4530593454837799, "learning_rate": 0.00013559813838530588, "loss": 0.4968, "step": 7301 }, { "epoch": 1.3025599857283026, "grad_norm": 0.5168888568878174, "learning_rate": 0.0001355358466985675, "loss": 0.6378, "step": 7302 }, { "epoch": 1.3027383819463028, "grad_norm": 0.45193690061569214, "learning_rate": 0.00013547356400179432, "loss": 0.4601, "step": 7303 }, { "epoch": 1.302916778164303, "grad_norm": 0.5073840618133545, "learning_rate": 0.00013541129029987826, "loss": 0.5461, "step": 7304 }, { "epoch": 1.3030951743823032, "grad_norm": 0.5524479150772095, "learning_rate": 0.00013534902559771, "loss": 0.6818, "step": 7305 }, { "epoch": 1.3032735706003034, "grad_norm": 0.5372066497802734, "learning_rate": 0.00013528676990018007, "loss": 0.7605, "step": 7306 }, { "epoch": 1.3034519668183036, "grad_norm": 0.5457733869552612, "learning_rate": 0.00013522452321217788, "loss": 0.7541, "step": 7307 }, { "epoch": 1.3036303630363038, "grad_norm": 0.5292448997497559, "learning_rate": 0.00013516228553859212, "loss": 0.6048, "step": 7308 }, { "epoch": 1.3038087592543037, "grad_norm": 0.45932427048683167, "learning_rate": 0.00013510005688431115, "loss": 0.5494, "step": 7309 }, { "epoch": 1.303987155472304, "grad_norm": 0.4594475030899048, "learning_rate": 0.00013503783725422216, "loss": 0.4971, "step": 7310 }, { "epoch": 1.3041655516903041, "grad_norm": 0.5574771165847778, "learning_rate": 0.00013497562665321206, "loss": 0.5786, "step": 7311 }, { "epoch": 1.3043439479083043, "grad_norm": 0.46712732315063477, "learning_rate": 0.00013491342508616667, "loss": 0.5883, "step": 7312 }, { "epoch": 1.3045223441263045, "grad_norm": 0.47527366876602173, "learning_rate": 0.00013485123255797132, "loss": 0.6923, "step": 7313 }, { "epoch": 1.3047007403443047, "grad_norm": 0.45830777287483215, "learning_rate": 0.0001347890490735107, "loss": 0.5369, "step": 7314 }, { "epoch": 1.304879136562305, "grad_norm": 0.5052610039710999, "learning_rate": 0.0001347268746376685, "loss": 0.5744, "step": 7315 }, { "epoch": 1.3050575327803051, "grad_norm": 0.4734193682670593, "learning_rate": 0.00013466470925532808, "loss": 0.5339, "step": 7316 }, { "epoch": 1.3052359289983053, "grad_norm": 0.4872669279575348, "learning_rate": 0.00013460255293137164, "loss": 0.5804, "step": 7317 }, { "epoch": 1.3054143252163053, "grad_norm": 0.5103761553764343, "learning_rate": 0.00013454040567068113, "loss": 0.6856, "step": 7318 }, { "epoch": 1.3055927214343055, "grad_norm": 0.5222267508506775, "learning_rate": 0.00013447826747813748, "loss": 0.7514, "step": 7319 }, { "epoch": 1.3057711176523057, "grad_norm": 0.4690699875354767, "learning_rate": 0.0001344161383586209, "loss": 0.6305, "step": 7320 }, { "epoch": 1.305949513870306, "grad_norm": 0.5072498917579651, "learning_rate": 0.00013435401831701115, "loss": 0.6352, "step": 7321 }, { "epoch": 1.306127910088306, "grad_norm": 0.4983615577220917, "learning_rate": 0.00013429190735818696, "loss": 0.5993, "step": 7322 }, { "epoch": 1.3063063063063063, "grad_norm": 0.5258157253265381, "learning_rate": 0.0001342298054870267, "loss": 0.7972, "step": 7323 }, { "epoch": 1.3064847025243065, "grad_norm": 0.4537825882434845, "learning_rate": 0.00013416771270840751, "loss": 0.5255, "step": 7324 }, { "epoch": 1.3066630987423067, "grad_norm": 0.47432446479797363, "learning_rate": 0.00013410562902720647, "loss": 0.4699, "step": 7325 }, { "epoch": 1.3068414949603069, "grad_norm": 0.5209951996803284, "learning_rate": 0.00013404355444829934, "loss": 0.6747, "step": 7326 }, { "epoch": 1.307019891178307, "grad_norm": 0.4715597927570343, "learning_rate": 0.00013398148897656164, "loss": 0.5054, "step": 7327 }, { "epoch": 1.3071982873963073, "grad_norm": 0.8090279698371887, "learning_rate": 0.00013391943261686782, "loss": 0.4708, "step": 7328 }, { "epoch": 1.3073766836143075, "grad_norm": 0.500850260257721, "learning_rate": 0.00013385738537409174, "loss": 0.6052, "step": 7329 }, { "epoch": 1.3075550798323077, "grad_norm": 0.5126155018806458, "learning_rate": 0.00013379534725310678, "loss": 0.5697, "step": 7330 }, { "epoch": 1.3077334760503077, "grad_norm": 0.5333918929100037, "learning_rate": 0.00013373331825878516, "loss": 0.695, "step": 7331 }, { "epoch": 1.3079118722683079, "grad_norm": 0.5330007076263428, "learning_rate": 0.00013367129839599872, "loss": 0.5267, "step": 7332 }, { "epoch": 1.308090268486308, "grad_norm": 0.5808795690536499, "learning_rate": 0.0001336092876696185, "loss": 0.8733, "step": 7333 }, { "epoch": 1.3082686647043082, "grad_norm": 0.5341413021087646, "learning_rate": 0.0001335472860845146, "loss": 0.778, "step": 7334 }, { "epoch": 1.3084470609223084, "grad_norm": 0.4854552447795868, "learning_rate": 0.00013348529364555685, "loss": 0.5773, "step": 7335 }, { "epoch": 1.3086254571403086, "grad_norm": 0.4257410168647766, "learning_rate": 0.0001334233103576139, "loss": 0.5018, "step": 7336 }, { "epoch": 1.3088038533583088, "grad_norm": 0.5281109809875488, "learning_rate": 0.0001333613362255541, "loss": 0.633, "step": 7337 }, { "epoch": 1.308982249576309, "grad_norm": 0.45057862997055054, "learning_rate": 0.00013329937125424466, "loss": 0.5499, "step": 7338 }, { "epoch": 1.3091606457943092, "grad_norm": 0.568513035774231, "learning_rate": 0.00013323741544855246, "loss": 0.7561, "step": 7339 }, { "epoch": 1.3093390420123092, "grad_norm": 0.4433366358280182, "learning_rate": 0.00013317546881334342, "loss": 0.4844, "step": 7340 }, { "epoch": 1.3095174382303094, "grad_norm": 0.4924166798591614, "learning_rate": 0.00013311353135348267, "loss": 0.5766, "step": 7341 }, { "epoch": 1.3096958344483096, "grad_norm": 0.531856894493103, "learning_rate": 0.00013305160307383495, "loss": 0.6314, "step": 7342 }, { "epoch": 1.3098742306663098, "grad_norm": 0.5930870175361633, "learning_rate": 0.00013298968397926398, "loss": 0.7782, "step": 7343 }, { "epoch": 1.31005262688431, "grad_norm": 0.5085564255714417, "learning_rate": 0.0001329277740746328, "loss": 0.6933, "step": 7344 }, { "epoch": 1.3102310231023102, "grad_norm": 0.5098043084144592, "learning_rate": 0.000132865873364804, "loss": 0.6053, "step": 7345 }, { "epoch": 1.3104094193203104, "grad_norm": 0.49799683690071106, "learning_rate": 0.00013280398185463898, "loss": 0.6819, "step": 7346 }, { "epoch": 1.3105878155383106, "grad_norm": 0.4547264277935028, "learning_rate": 0.00013274209954899888, "loss": 0.4916, "step": 7347 }, { "epoch": 1.3107662117563108, "grad_norm": 0.48524683713912964, "learning_rate": 0.00013268022645274375, "loss": 0.7242, "step": 7348 }, { "epoch": 1.310944607974311, "grad_norm": 0.4403549134731293, "learning_rate": 0.00013261836257073324, "loss": 0.5146, "step": 7349 }, { "epoch": 1.3111230041923112, "grad_norm": 0.4839078485965729, "learning_rate": 0.00013255650790782591, "loss": 0.5517, "step": 7350 }, { "epoch": 1.3113014004103114, "grad_norm": 0.4646989107131958, "learning_rate": 0.00013249466246888, "loss": 0.5391, "step": 7351 }, { "epoch": 1.3114797966283116, "grad_norm": 0.46415239572525024, "learning_rate": 0.00013243282625875267, "loss": 0.6404, "step": 7352 }, { "epoch": 1.3116581928463116, "grad_norm": 0.5185125470161438, "learning_rate": 0.00013237099928230066, "loss": 0.7389, "step": 7353 }, { "epoch": 1.3118365890643118, "grad_norm": 0.4275851547718048, "learning_rate": 0.0001323091815443797, "loss": 0.5303, "step": 7354 }, { "epoch": 1.312014985282312, "grad_norm": 0.41446274518966675, "learning_rate": 0.00013224737304984494, "loss": 0.4631, "step": 7355 }, { "epoch": 1.3121933815003122, "grad_norm": 0.4250105917453766, "learning_rate": 0.0001321855738035509, "loss": 0.483, "step": 7356 }, { "epoch": 1.3123717777183124, "grad_norm": 0.501487135887146, "learning_rate": 0.0001321237838103511, "loss": 0.6878, "step": 7357 }, { "epoch": 1.3125501739363126, "grad_norm": 0.5788127183914185, "learning_rate": 0.0001320620030750987, "loss": 0.5583, "step": 7358 }, { "epoch": 1.3127285701543128, "grad_norm": 0.5718269348144531, "learning_rate": 0.00013200023160264568, "loss": 0.7069, "step": 7359 }, { "epoch": 1.312906966372313, "grad_norm": 0.5326660871505737, "learning_rate": 0.00013193846939784374, "loss": 0.7465, "step": 7360 }, { "epoch": 1.3130853625903132, "grad_norm": 0.5201784372329712, "learning_rate": 0.00013187671646554367, "loss": 0.6788, "step": 7361 }, { "epoch": 1.3132637588083131, "grad_norm": 0.5949232578277588, "learning_rate": 0.0001318149728105954, "loss": 0.6459, "step": 7362 }, { "epoch": 1.3134421550263133, "grad_norm": 0.4743814766407013, "learning_rate": 0.00013175323843784837, "loss": 0.705, "step": 7363 }, { "epoch": 1.3136205512443135, "grad_norm": 0.4792519211769104, "learning_rate": 0.00013169151335215101, "loss": 0.6492, "step": 7364 }, { "epoch": 1.3137989474623137, "grad_norm": 0.4962617754936218, "learning_rate": 0.00013162979755835142, "loss": 0.5276, "step": 7365 }, { "epoch": 1.313977343680314, "grad_norm": 0.438672810792923, "learning_rate": 0.00013156809106129656, "loss": 0.4126, "step": 7366 }, { "epoch": 1.3141557398983141, "grad_norm": 0.43237635493278503, "learning_rate": 0.00013150639386583278, "loss": 0.5041, "step": 7367 }, { "epoch": 1.3143341361163143, "grad_norm": 0.4956110119819641, "learning_rate": 0.00013144470597680592, "loss": 0.6361, "step": 7368 }, { "epoch": 1.3145125323343145, "grad_norm": 0.4873587191104889, "learning_rate": 0.00013138302739906072, "loss": 0.7563, "step": 7369 }, { "epoch": 1.3146909285523147, "grad_norm": 0.4835222363471985, "learning_rate": 0.0001313213581374416, "loss": 0.5999, "step": 7370 }, { "epoch": 1.314869324770315, "grad_norm": 0.4809320867061615, "learning_rate": 0.00013125969819679188, "loss": 0.6086, "step": 7371 }, { "epoch": 1.3150477209883151, "grad_norm": 0.4588382840156555, "learning_rate": 0.00013119804758195442, "loss": 0.4699, "step": 7372 }, { "epoch": 1.3152261172063153, "grad_norm": 0.4489077031612396, "learning_rate": 0.00013113640629777113, "loss": 0.3956, "step": 7373 }, { "epoch": 1.3154045134243155, "grad_norm": 0.5158323049545288, "learning_rate": 0.0001310747743490833, "loss": 0.6929, "step": 7374 }, { "epoch": 1.3155829096423155, "grad_norm": 0.5298815965652466, "learning_rate": 0.0001310131517407316, "loss": 0.7762, "step": 7375 }, { "epoch": 1.3157613058603157, "grad_norm": 0.5620563626289368, "learning_rate": 0.0001309515384775557, "loss": 0.5016, "step": 7376 }, { "epoch": 1.315939702078316, "grad_norm": 0.5272053480148315, "learning_rate": 0.0001308899345643948, "loss": 0.8066, "step": 7377 }, { "epoch": 1.316118098296316, "grad_norm": 0.4727185368537903, "learning_rate": 0.00013082834000608724, "loss": 0.5287, "step": 7378 }, { "epoch": 1.3162964945143163, "grad_norm": 0.4828498959541321, "learning_rate": 0.00013076675480747042, "loss": 0.6108, "step": 7379 }, { "epoch": 1.3164748907323165, "grad_norm": 0.45956501364707947, "learning_rate": 0.00013070517897338147, "loss": 0.5884, "step": 7380 }, { "epoch": 1.3166532869503167, "grad_norm": 0.49907490611076355, "learning_rate": 0.00013064361250865637, "loss": 0.5318, "step": 7381 }, { "epoch": 1.316831683168317, "grad_norm": 0.48141568899154663, "learning_rate": 0.0001305820554181306, "loss": 0.7263, "step": 7382 }, { "epoch": 1.317010079386317, "grad_norm": 0.4059731960296631, "learning_rate": 0.0001305205077066388, "loss": 0.4103, "step": 7383 }, { "epoch": 1.317188475604317, "grad_norm": 0.48073357343673706, "learning_rate": 0.00013045896937901496, "loss": 0.5717, "step": 7384 }, { "epoch": 1.3173668718223173, "grad_norm": 0.4782210886478424, "learning_rate": 0.00013039744044009212, "loss": 0.6653, "step": 7385 }, { "epoch": 1.3175452680403175, "grad_norm": 0.5318115949630737, "learning_rate": 0.00013033592089470295, "loss": 0.5354, "step": 7386 }, { "epoch": 1.3177236642583177, "grad_norm": 0.5100518465042114, "learning_rate": 0.00013027441074767903, "loss": 0.6491, "step": 7387 }, { "epoch": 1.3179020604763179, "grad_norm": 0.4386556148529053, "learning_rate": 0.00013021291000385132, "loss": 0.4745, "step": 7388 }, { "epoch": 1.318080456694318, "grad_norm": 0.5141617655754089, "learning_rate": 0.0001301514186680502, "loss": 0.5839, "step": 7389 }, { "epoch": 1.3182588529123183, "grad_norm": 7.867745399475098, "learning_rate": 0.00013008993674510483, "loss": 0.5498, "step": 7390 }, { "epoch": 1.3184372491303185, "grad_norm": 0.5267961621284485, "learning_rate": 0.00013002846423984448, "loss": 0.6516, "step": 7391 }, { "epoch": 1.3186156453483187, "grad_norm": 0.47720450162887573, "learning_rate": 0.00012996700115709692, "loss": 0.5947, "step": 7392 }, { "epoch": 1.3187940415663189, "grad_norm": 0.514872133731842, "learning_rate": 0.00012990554750168931, "loss": 0.6416, "step": 7393 }, { "epoch": 1.318972437784319, "grad_norm": 0.443766713142395, "learning_rate": 0.00012984410327844843, "loss": 0.4777, "step": 7394 }, { "epoch": 1.3191508340023193, "grad_norm": 0.42529335618019104, "learning_rate": 0.00012978266849219985, "loss": 0.5089, "step": 7395 }, { "epoch": 1.3193292302203194, "grad_norm": 0.540155291557312, "learning_rate": 0.00012972124314776886, "loss": 0.7208, "step": 7396 }, { "epoch": 1.3195076264383194, "grad_norm": 0.4460678994655609, "learning_rate": 0.0001296598272499796, "loss": 0.5367, "step": 7397 }, { "epoch": 1.3196860226563196, "grad_norm": 0.5822709798812866, "learning_rate": 0.0001295984208036558, "loss": 0.5829, "step": 7398 }, { "epoch": 1.3198644188743198, "grad_norm": 0.4621860086917877, "learning_rate": 0.00012953702381362023, "loss": 0.5372, "step": 7399 }, { "epoch": 1.32004281509232, "grad_norm": 0.5000967979431152, "learning_rate": 0.00012947563628469487, "loss": 0.5573, "step": 7400 }, { "epoch": 1.3202212113103202, "grad_norm": 0.4751860797405243, "learning_rate": 0.00012941425822170124, "loss": 0.6386, "step": 7401 }, { "epoch": 1.3203996075283204, "grad_norm": 0.5089471340179443, "learning_rate": 0.0001293528896294598, "loss": 0.664, "step": 7402 }, { "epoch": 1.3205780037463206, "grad_norm": 0.5417841076850891, "learning_rate": 0.00012929153051279062, "loss": 0.5501, "step": 7403 }, { "epoch": 1.3207563999643208, "grad_norm": 0.5360578298568726, "learning_rate": 0.00012923018087651256, "loss": 0.6679, "step": 7404 }, { "epoch": 1.320934796182321, "grad_norm": 0.48479247093200684, "learning_rate": 0.0001291688407254441, "loss": 0.4484, "step": 7405 }, { "epoch": 1.321113192400321, "grad_norm": 0.5442363023757935, "learning_rate": 0.000129107510064403, "loss": 0.6294, "step": 7406 }, { "epoch": 1.3212915886183212, "grad_norm": 0.41899165511131287, "learning_rate": 0.00012904618889820595, "loss": 0.5342, "step": 7407 }, { "epoch": 1.3214699848363214, "grad_norm": 0.49729251861572266, "learning_rate": 0.0001289848772316693, "loss": 0.7296, "step": 7408 }, { "epoch": 1.3216483810543216, "grad_norm": 0.49834224581718445, "learning_rate": 0.00012892357506960817, "loss": 0.654, "step": 7409 }, { "epoch": 1.3218267772723218, "grad_norm": 0.47109490633010864, "learning_rate": 0.0001288622824168375, "loss": 0.6164, "step": 7410 }, { "epoch": 1.322005173490322, "grad_norm": 0.43739229440689087, "learning_rate": 0.0001288009992781709, "loss": 0.5467, "step": 7411 }, { "epoch": 1.3221835697083222, "grad_norm": 0.4927254617214203, "learning_rate": 0.00012873972565842173, "loss": 0.5437, "step": 7412 }, { "epoch": 1.3223619659263224, "grad_norm": 0.4604239761829376, "learning_rate": 0.00012867846156240238, "loss": 0.5555, "step": 7413 }, { "epoch": 1.3225403621443226, "grad_norm": 0.4850262999534607, "learning_rate": 0.00012861720699492435, "loss": 0.5776, "step": 7414 }, { "epoch": 1.3227187583623228, "grad_norm": 0.5016975998878479, "learning_rate": 0.00012855596196079873, "loss": 0.7419, "step": 7415 }, { "epoch": 1.322897154580323, "grad_norm": 0.49091964960098267, "learning_rate": 0.0001284947264648355, "loss": 0.7162, "step": 7416 }, { "epoch": 1.3230755507983232, "grad_norm": 0.4609431326389313, "learning_rate": 0.00012843350051184425, "loss": 0.5136, "step": 7417 }, { "epoch": 1.3232539470163234, "grad_norm": 0.4845166802406311, "learning_rate": 0.00012837228410663348, "loss": 0.5677, "step": 7418 }, { "epoch": 1.3234323432343233, "grad_norm": 0.5238844156265259, "learning_rate": 0.00012831107725401125, "loss": 0.6283, "step": 7419 }, { "epoch": 1.3236107394523235, "grad_norm": 0.5406588912010193, "learning_rate": 0.00012824987995878456, "loss": 0.5869, "step": 7420 }, { "epoch": 1.3237891356703237, "grad_norm": 0.526140570640564, "learning_rate": 0.0001281886922257599, "loss": 0.7051, "step": 7421 }, { "epoch": 1.323967531888324, "grad_norm": 0.4669652581214905, "learning_rate": 0.00012812751405974306, "loss": 0.5571, "step": 7422 }, { "epoch": 1.3241459281063241, "grad_norm": 0.47372832894325256, "learning_rate": 0.0001280663454655387, "loss": 0.5887, "step": 7423 }, { "epoch": 1.3243243243243243, "grad_norm": 0.4857545793056488, "learning_rate": 0.00012800518644795117, "loss": 0.6542, "step": 7424 }, { "epoch": 1.3245027205423245, "grad_norm": 0.5294866561889648, "learning_rate": 0.0001279440370117838, "loss": 0.6887, "step": 7425 }, { "epoch": 1.3246811167603247, "grad_norm": 0.5368157029151917, "learning_rate": 0.00012788289716183918, "loss": 0.6384, "step": 7426 }, { "epoch": 1.324859512978325, "grad_norm": 0.4165150225162506, "learning_rate": 0.00012782176690291936, "loss": 0.4424, "step": 7427 }, { "epoch": 1.325037909196325, "grad_norm": 0.5419753789901733, "learning_rate": 0.00012776064623982525, "loss": 0.65, "step": 7428 }, { "epoch": 1.3252163054143251, "grad_norm": 0.5392053127288818, "learning_rate": 0.0001276995351773575, "loss": 0.6592, "step": 7429 }, { "epoch": 1.3253947016323253, "grad_norm": 0.49515146017074585, "learning_rate": 0.00012763843372031554, "loss": 0.6608, "step": 7430 }, { "epoch": 1.3255730978503255, "grad_norm": 0.4864502549171448, "learning_rate": 0.00012757734187349843, "loss": 0.5878, "step": 7431 }, { "epoch": 1.3257514940683257, "grad_norm": 0.5112424492835999, "learning_rate": 0.0001275162596417041, "loss": 0.4266, "step": 7432 }, { "epoch": 1.325929890286326, "grad_norm": 0.5315030813217163, "learning_rate": 0.00012745518702973014, "loss": 0.7017, "step": 7433 }, { "epoch": 1.326108286504326, "grad_norm": 0.5505911111831665, "learning_rate": 0.00012739412404237305, "loss": 0.7733, "step": 7434 }, { "epoch": 1.3262866827223263, "grad_norm": 0.5162804126739502, "learning_rate": 0.00012733307068442862, "loss": 0.6944, "step": 7435 }, { "epoch": 1.3264650789403265, "grad_norm": 0.4766071140766144, "learning_rate": 0.000127272026960692, "loss": 0.573, "step": 7436 }, { "epoch": 1.3266434751583267, "grad_norm": 0.4825485944747925, "learning_rate": 0.00012721099287595766, "loss": 0.6408, "step": 7437 }, { "epoch": 1.326821871376327, "grad_norm": 0.40906190872192383, "learning_rate": 0.00012714996843501904, "loss": 0.507, "step": 7438 }, { "epoch": 1.327000267594327, "grad_norm": 0.5242511630058289, "learning_rate": 0.0001270889536426691, "loss": 0.7517, "step": 7439 }, { "epoch": 1.3271786638123273, "grad_norm": 0.5269128084182739, "learning_rate": 0.00012702794850369975, "loss": 0.7014, "step": 7440 }, { "epoch": 1.3273570600303275, "grad_norm": 0.5377137064933777, "learning_rate": 0.00012696695302290251, "loss": 0.7327, "step": 7441 }, { "epoch": 1.3275354562483275, "grad_norm": 0.4645613729953766, "learning_rate": 0.00012690596720506776, "loss": 0.5923, "step": 7442 }, { "epoch": 1.3277138524663277, "grad_norm": 0.5495232343673706, "learning_rate": 0.00012684499105498543, "loss": 0.7983, "step": 7443 }, { "epoch": 1.3278922486843279, "grad_norm": 0.5172943472862244, "learning_rate": 0.00012678402457744442, "loss": 0.5517, "step": 7444 }, { "epoch": 1.328070644902328, "grad_norm": 0.44186466932296753, "learning_rate": 0.0001267230677772332, "loss": 0.4636, "step": 7445 }, { "epoch": 1.3282490411203283, "grad_norm": 0.4601621925830841, "learning_rate": 0.00012666212065913922, "loss": 0.5381, "step": 7446 }, { "epoch": 1.3284274373383285, "grad_norm": 0.540649950504303, "learning_rate": 0.00012660118322794907, "loss": 0.584, "step": 7447 }, { "epoch": 1.3286058335563287, "grad_norm": 0.45804736018180847, "learning_rate": 0.000126540255488449, "loss": 0.5367, "step": 7448 }, { "epoch": 1.3287842297743289, "grad_norm": 0.46976906061172485, "learning_rate": 0.000126479337445424, "loss": 0.5498, "step": 7449 }, { "epoch": 1.3289626259923288, "grad_norm": 0.42822468280792236, "learning_rate": 0.0001264184291036588, "loss": 0.519, "step": 7450 }, { "epoch": 1.329141022210329, "grad_norm": 0.5430753827095032, "learning_rate": 0.00012635753046793692, "loss": 0.7222, "step": 7451 }, { "epoch": 1.3293194184283292, "grad_norm": 0.4692500829696655, "learning_rate": 0.00012629664154304137, "loss": 0.4665, "step": 7452 }, { "epoch": 1.3294978146463294, "grad_norm": 0.45048218965530396, "learning_rate": 0.00012623576233375449, "loss": 0.4468, "step": 7453 }, { "epoch": 1.3296762108643296, "grad_norm": 0.5404579043388367, "learning_rate": 0.00012617489284485746, "loss": 0.6637, "step": 7454 }, { "epoch": 1.3298546070823298, "grad_norm": 0.5273029208183289, "learning_rate": 0.00012611403308113113, "loss": 0.7473, "step": 7455 }, { "epoch": 1.33003300330033, "grad_norm": 0.4636683762073517, "learning_rate": 0.00012605318304735524, "loss": 0.4647, "step": 7456 }, { "epoch": 1.3302113995183302, "grad_norm": 0.5994560718536377, "learning_rate": 0.00012599234274830913, "loss": 0.7458, "step": 7457 }, { "epoch": 1.3303897957363304, "grad_norm": 0.4628642201423645, "learning_rate": 0.00012593151218877105, "loss": 0.523, "step": 7458 }, { "epoch": 1.3305681919543306, "grad_norm": 0.5867023468017578, "learning_rate": 0.00012587069137351853, "loss": 0.8639, "step": 7459 }, { "epoch": 1.3307465881723308, "grad_norm": 0.5695323944091797, "learning_rate": 0.00012580988030732858, "loss": 0.7051, "step": 7460 }, { "epoch": 1.330924984390331, "grad_norm": 0.5253010392189026, "learning_rate": 0.00012574907899497707, "loss": 0.7779, "step": 7461 }, { "epoch": 1.3311033806083312, "grad_norm": 0.4339081346988678, "learning_rate": 0.00012568828744123956, "loss": 0.4361, "step": 7462 }, { "epoch": 1.3312817768263314, "grad_norm": 0.4968469738960266, "learning_rate": 0.0001256275056508903, "loss": 0.6992, "step": 7463 }, { "epoch": 1.3314601730443314, "grad_norm": 0.4775252044200897, "learning_rate": 0.00012556673362870338, "loss": 0.582, "step": 7464 }, { "epoch": 1.3316385692623316, "grad_norm": 0.482811838388443, "learning_rate": 0.00012550597137945152, "loss": 0.6071, "step": 7465 }, { "epoch": 1.3318169654803318, "grad_norm": 0.4188464879989624, "learning_rate": 0.00012544521890790712, "loss": 0.4437, "step": 7466 }, { "epoch": 1.331995361698332, "grad_norm": 0.49079805612564087, "learning_rate": 0.0001253844762188417, "loss": 0.5477, "step": 7467 }, { "epoch": 1.3321737579163322, "grad_norm": 0.4884779751300812, "learning_rate": 0.00012532374331702584, "loss": 0.5643, "step": 7468 }, { "epoch": 1.3323521541343324, "grad_norm": 0.9924476742744446, "learning_rate": 0.00012526302020722958, "loss": 0.5498, "step": 7469 }, { "epoch": 1.3325305503523326, "grad_norm": 0.4724234640598297, "learning_rate": 0.00012520230689422196, "loss": 0.5189, "step": 7470 }, { "epoch": 1.3327089465703328, "grad_norm": 0.4852631390094757, "learning_rate": 0.00012514160338277154, "loss": 0.515, "step": 7471 }, { "epoch": 1.3328873427883328, "grad_norm": 0.4969404637813568, "learning_rate": 0.00012508090967764586, "loss": 0.6241, "step": 7472 }, { "epoch": 1.333065739006333, "grad_norm": 0.47249653935432434, "learning_rate": 0.00012502022578361166, "loss": 0.6354, "step": 7473 }, { "epoch": 1.3332441352243332, "grad_norm": 0.4671791195869446, "learning_rate": 0.00012495955170543528, "loss": 0.5399, "step": 7474 }, { "epoch": 1.3334225314423334, "grad_norm": 0.45596200227737427, "learning_rate": 0.00012489888744788178, "loss": 0.4377, "step": 7475 }, { "epoch": 1.3336009276603336, "grad_norm": 0.48205843567848206, "learning_rate": 0.00012483823301571593, "loss": 0.6145, "step": 7476 }, { "epoch": 1.3337793238783338, "grad_norm": 0.6052742004394531, "learning_rate": 0.0001247775884137013, "loss": 0.7105, "step": 7477 }, { "epoch": 1.333957720096334, "grad_norm": 0.4180543124675751, "learning_rate": 0.00012471695364660106, "loss": 0.6102, "step": 7478 }, { "epoch": 1.3341361163143342, "grad_norm": 0.5577853322029114, "learning_rate": 0.0001246563287191774, "loss": 0.5936, "step": 7479 }, { "epoch": 1.3343145125323344, "grad_norm": 0.5047107934951782, "learning_rate": 0.00012459571363619167, "loss": 0.5753, "step": 7480 }, { "epoch": 1.3344929087503345, "grad_norm": 0.4621174931526184, "learning_rate": 0.00012453510840240457, "loss": 0.5743, "step": 7481 }, { "epoch": 1.3346713049683347, "grad_norm": 0.456528902053833, "learning_rate": 0.00012447451302257607, "loss": 0.5022, "step": 7482 }, { "epoch": 1.334849701186335, "grad_norm": 0.43456900119781494, "learning_rate": 0.00012441392750146542, "loss": 0.4349, "step": 7483 }, { "epoch": 1.3350280974043351, "grad_norm": 0.46913912892341614, "learning_rate": 0.00012435335184383085, "loss": 0.709, "step": 7484 }, { "epoch": 1.3352064936223353, "grad_norm": 0.42998531460762024, "learning_rate": 0.00012429278605442988, "loss": 0.5149, "step": 7485 }, { "epoch": 1.3353848898403353, "grad_norm": 0.5419842600822449, "learning_rate": 0.00012423223013801945, "loss": 0.7627, "step": 7486 }, { "epoch": 1.3355632860583355, "grad_norm": 0.4358643889427185, "learning_rate": 0.00012417168409935547, "loss": 0.4494, "step": 7487 }, { "epoch": 1.3357416822763357, "grad_norm": 0.6511504650115967, "learning_rate": 0.00012411114794319336, "loss": 0.7345, "step": 7488 }, { "epoch": 1.335920078494336, "grad_norm": 0.40742728114128113, "learning_rate": 0.00012405062167428744, "loss": 0.4618, "step": 7489 }, { "epoch": 1.3360984747123361, "grad_norm": 0.4425670802593231, "learning_rate": 0.00012399010529739158, "loss": 0.5878, "step": 7490 }, { "epoch": 1.3362768709303363, "grad_norm": 0.4261229634284973, "learning_rate": 0.00012392959881725853, "loss": 0.4344, "step": 7491 }, { "epoch": 1.3364552671483365, "grad_norm": 0.5334789156913757, "learning_rate": 0.00012386910223864062, "loss": 0.6877, "step": 7492 }, { "epoch": 1.3366336633663367, "grad_norm": 0.4170753061771393, "learning_rate": 0.00012380861556628915, "loss": 0.4582, "step": 7493 }, { "epoch": 1.3368120595843367, "grad_norm": 0.4649193286895752, "learning_rate": 0.0001237481388049546, "loss": 0.5534, "step": 7494 }, { "epoch": 1.3369904558023369, "grad_norm": 0.4809529483318329, "learning_rate": 0.00012368767195938701, "loss": 0.4642, "step": 7495 }, { "epoch": 1.337168852020337, "grad_norm": 0.467174232006073, "learning_rate": 0.00012362721503433521, "loss": 0.5001, "step": 7496 }, { "epoch": 1.3373472482383373, "grad_norm": 0.5081356167793274, "learning_rate": 0.00012356676803454758, "loss": 0.6698, "step": 7497 }, { "epoch": 1.3375256444563375, "grad_norm": 0.5005205273628235, "learning_rate": 0.00012350633096477165, "loss": 0.6773, "step": 7498 }, { "epoch": 1.3377040406743377, "grad_norm": 0.5624291896820068, "learning_rate": 0.00012344590382975395, "loss": 0.7196, "step": 7499 }, { "epoch": 1.3378824368923379, "grad_norm": 0.5750653147697449, "learning_rate": 0.00012338548663424063, "loss": 0.7924, "step": 7500 }, { "epoch": 1.338060833110338, "grad_norm": 0.47942960262298584, "learning_rate": 0.00012332507938297657, "loss": 0.5147, "step": 7501 }, { "epoch": 1.3382392293283383, "grad_norm": 0.4365304112434387, "learning_rate": 0.0001232646820807064, "loss": 0.5417, "step": 7502 }, { "epoch": 1.3384176255463385, "grad_norm": 0.4866253435611725, "learning_rate": 0.0001232042947321734, "loss": 0.5912, "step": 7503 }, { "epoch": 1.3385960217643387, "grad_norm": 0.42192235589027405, "learning_rate": 0.00012314391734212068, "loss": 0.4446, "step": 7504 }, { "epoch": 1.3387744179823389, "grad_norm": 0.45548638701438904, "learning_rate": 0.00012308354991529008, "loss": 0.5527, "step": 7505 }, { "epoch": 1.338952814200339, "grad_norm": 0.4403938949108124, "learning_rate": 0.00012302319245642278, "loss": 0.4453, "step": 7506 }, { "epoch": 1.3391312104183393, "grad_norm": 0.4472365975379944, "learning_rate": 0.00012296284497025938, "loss": 0.485, "step": 7507 }, { "epoch": 1.3393096066363392, "grad_norm": 0.49241968989372253, "learning_rate": 0.00012290250746153935, "loss": 0.674, "step": 7508 }, { "epoch": 1.3394880028543394, "grad_norm": 0.5484983921051025, "learning_rate": 0.0001228421799350018, "loss": 0.6603, "step": 7509 }, { "epoch": 1.3396663990723396, "grad_norm": 0.666890025138855, "learning_rate": 0.00012278186239538463, "loss": 0.5482, "step": 7510 }, { "epoch": 1.3398447952903398, "grad_norm": 0.45667779445648193, "learning_rate": 0.00012272155484742534, "loss": 0.5867, "step": 7511 }, { "epoch": 1.34002319150834, "grad_norm": 0.4096013009548187, "learning_rate": 0.00012266125729586025, "loss": 0.4004, "step": 7512 }, { "epoch": 1.3402015877263402, "grad_norm": 0.4495585262775421, "learning_rate": 0.00012260096974542524, "loss": 0.4578, "step": 7513 }, { "epoch": 1.3403799839443404, "grad_norm": 0.49030259251594543, "learning_rate": 0.0001225406922008553, "loss": 0.7084, "step": 7514 }, { "epoch": 1.3405583801623406, "grad_norm": 0.49130570888519287, "learning_rate": 0.0001224804246668845, "loss": 0.6026, "step": 7515 }, { "epoch": 1.3407367763803406, "grad_norm": 0.4424048364162445, "learning_rate": 0.00012242016714824632, "loss": 0.4755, "step": 7516 }, { "epoch": 1.3409151725983408, "grad_norm": 0.5130663514137268, "learning_rate": 0.00012235991964967325, "loss": 0.6028, "step": 7517 }, { "epoch": 1.341093568816341, "grad_norm": 0.4313303232192993, "learning_rate": 0.0001222996821758972, "loss": 0.4998, "step": 7518 }, { "epoch": 1.3412719650343412, "grad_norm": 0.5465921759605408, "learning_rate": 0.0001222394547316492, "loss": 0.8042, "step": 7519 }, { "epoch": 1.3414503612523414, "grad_norm": 0.5101335048675537, "learning_rate": 0.00012217923732165938, "loss": 0.6261, "step": 7520 }, { "epoch": 1.3416287574703416, "grad_norm": 0.5270312428474426, "learning_rate": 0.00012211902995065728, "loss": 0.6911, "step": 7521 }, { "epoch": 1.3418071536883418, "grad_norm": 0.5207296013832092, "learning_rate": 0.0001220588326233715, "loss": 0.7429, "step": 7522 }, { "epoch": 1.341985549906342, "grad_norm": 0.4834536015987396, "learning_rate": 0.00012199864534453003, "loss": 0.594, "step": 7523 }, { "epoch": 1.3421639461243422, "grad_norm": 0.46801358461380005, "learning_rate": 0.00012193846811885978, "loss": 0.5481, "step": 7524 }, { "epoch": 1.3423423423423424, "grad_norm": 0.4802463948726654, "learning_rate": 0.00012187830095108721, "loss": 0.7895, "step": 7525 }, { "epoch": 1.3425207385603426, "grad_norm": 0.49674612283706665, "learning_rate": 0.00012181814384593776, "loss": 0.693, "step": 7526 }, { "epoch": 1.3426991347783428, "grad_norm": 0.44266417622566223, "learning_rate": 0.00012175799680813593, "loss": 0.4989, "step": 7527 }, { "epoch": 1.342877530996343, "grad_norm": 0.5586349964141846, "learning_rate": 0.00012169785984240605, "loss": 0.556, "step": 7528 }, { "epoch": 1.3430559272143432, "grad_norm": 0.485568106174469, "learning_rate": 0.00012163773295347095, "loss": 0.6456, "step": 7529 }, { "epoch": 1.3432343234323432, "grad_norm": 0.47074824571609497, "learning_rate": 0.00012157761614605314, "loss": 0.5116, "step": 7530 }, { "epoch": 1.3434127196503434, "grad_norm": 0.48266589641571045, "learning_rate": 0.0001215175094248741, "loss": 0.5583, "step": 7531 }, { "epoch": 1.3435911158683436, "grad_norm": 0.5991159677505493, "learning_rate": 0.0001214574127946545, "loss": 0.8355, "step": 7532 }, { "epoch": 1.3437695120863438, "grad_norm": 0.47576338052749634, "learning_rate": 0.00012139732626011446, "loss": 0.6401, "step": 7533 }, { "epoch": 1.343947908304344, "grad_norm": 0.5145847201347351, "learning_rate": 0.000121337249825973, "loss": 0.6782, "step": 7534 }, { "epoch": 1.3441263045223442, "grad_norm": 0.6812740564346313, "learning_rate": 0.00012127718349694863, "loss": 0.5941, "step": 7535 }, { "epoch": 1.3443047007403444, "grad_norm": 0.5664840936660767, "learning_rate": 0.00012121712727775882, "loss": 0.755, "step": 7536 }, { "epoch": 1.3444830969583446, "grad_norm": 0.43093249201774597, "learning_rate": 0.00012115708117312049, "loss": 0.4926, "step": 7537 }, { "epoch": 1.3446614931763445, "grad_norm": 0.4539382755756378, "learning_rate": 0.00012109704518774956, "loss": 0.5437, "step": 7538 }, { "epoch": 1.3448398893943447, "grad_norm": 0.46809741854667664, "learning_rate": 0.00012103701932636114, "loss": 0.4688, "step": 7539 }, { "epoch": 1.345018285612345, "grad_norm": 0.479373037815094, "learning_rate": 0.00012097700359366981, "loss": 0.6389, "step": 7540 }, { "epoch": 1.3451966818303451, "grad_norm": 0.3928679823875427, "learning_rate": 0.00012091699799438899, "loss": 0.36, "step": 7541 }, { "epoch": 1.3453750780483453, "grad_norm": 0.4709312915802002, "learning_rate": 0.00012085700253323173, "loss": 0.5524, "step": 7542 }, { "epoch": 1.3455534742663455, "grad_norm": 0.4242085814476013, "learning_rate": 0.0001207970172149098, "loss": 0.4975, "step": 7543 }, { "epoch": 1.3457318704843457, "grad_norm": 0.4750572741031647, "learning_rate": 0.00012073704204413452, "loss": 0.6045, "step": 7544 }, { "epoch": 1.345910266702346, "grad_norm": 0.5419875383377075, "learning_rate": 0.00012067707702561645, "loss": 0.7744, "step": 7545 }, { "epoch": 1.3460886629203461, "grad_norm": 0.5909712910652161, "learning_rate": 0.00012061712216406501, "loss": 0.8503, "step": 7546 }, { "epoch": 1.3462670591383463, "grad_norm": 0.4636007249355316, "learning_rate": 0.00012055717746418918, "loss": 0.5914, "step": 7547 }, { "epoch": 1.3464454553563465, "grad_norm": 0.5388915538787842, "learning_rate": 0.00012049724293069686, "loss": 0.7088, "step": 7548 }, { "epoch": 1.3466238515743467, "grad_norm": 0.5035228729248047, "learning_rate": 0.00012043731856829543, "loss": 0.6403, "step": 7549 }, { "epoch": 1.346802247792347, "grad_norm": 0.4420143961906433, "learning_rate": 0.00012037740438169118, "loss": 0.5323, "step": 7550 }, { "epoch": 1.3469806440103471, "grad_norm": 0.5462261438369751, "learning_rate": 0.00012031750037558986, "loss": 0.8891, "step": 7551 }, { "epoch": 1.347159040228347, "grad_norm": 0.45456668734550476, "learning_rate": 0.00012025760655469628, "loss": 0.4707, "step": 7552 }, { "epoch": 1.3473374364463473, "grad_norm": 0.4722398519515991, "learning_rate": 0.00012019772292371437, "loss": 0.5228, "step": 7553 }, { "epoch": 1.3475158326643475, "grad_norm": 0.53815096616745, "learning_rate": 0.0001201378494873475, "loss": 0.804, "step": 7554 }, { "epoch": 1.3476942288823477, "grad_norm": 0.48279356956481934, "learning_rate": 0.00012007798625029798, "loss": 0.7179, "step": 7555 }, { "epoch": 1.3478726251003479, "grad_norm": 1.062436819076538, "learning_rate": 0.0001200181332172676, "loss": 0.4516, "step": 7556 }, { "epoch": 1.348051021318348, "grad_norm": 0.44580498337745667, "learning_rate": 0.000119958290392957, "loss": 0.5279, "step": 7557 }, { "epoch": 1.3482294175363483, "grad_norm": 0.48104509711265564, "learning_rate": 0.00011989845778206629, "loss": 0.6148, "step": 7558 }, { "epoch": 1.3484078137543485, "grad_norm": 0.5047935843467712, "learning_rate": 0.00011983863538929485, "loss": 0.6991, "step": 7559 }, { "epoch": 1.3485862099723485, "grad_norm": 0.5424177646636963, "learning_rate": 0.00011977882321934086, "loss": 0.6587, "step": 7560 }, { "epoch": 1.3487646061903487, "grad_norm": 0.5127599239349365, "learning_rate": 0.00011971902127690215, "loss": 0.7465, "step": 7561 }, { "epoch": 1.3489430024083489, "grad_norm": 0.4546005427837372, "learning_rate": 0.00011965922956667535, "loss": 0.5514, "step": 7562 }, { "epoch": 1.349121398626349, "grad_norm": 0.49438631534576416, "learning_rate": 0.00011959944809335668, "loss": 0.6156, "step": 7563 }, { "epoch": 1.3492997948443493, "grad_norm": 0.5039214491844177, "learning_rate": 0.00011953967686164125, "loss": 0.532, "step": 7564 }, { "epoch": 1.3494781910623495, "grad_norm": 0.5272899270057678, "learning_rate": 0.00011947991587622334, "loss": 0.7789, "step": 7565 }, { "epoch": 1.3496565872803497, "grad_norm": 0.5394687652587891, "learning_rate": 0.00011942016514179677, "loss": 0.7146, "step": 7566 }, { "epoch": 1.3498349834983498, "grad_norm": 0.4609784185886383, "learning_rate": 0.00011936042466305413, "loss": 0.555, "step": 7567 }, { "epoch": 1.35001337971635, "grad_norm": 0.48477205634117126, "learning_rate": 0.00011930069444468764, "loss": 0.5976, "step": 7568 }, { "epoch": 1.3501917759343502, "grad_norm": 0.47605961561203003, "learning_rate": 0.00011924097449138824, "loss": 0.5233, "step": 7569 }, { "epoch": 1.3503701721523504, "grad_norm": 0.46146827936172485, "learning_rate": 0.00011918126480784655, "loss": 0.4737, "step": 7570 }, { "epoch": 1.3505485683703506, "grad_norm": 0.48921510577201843, "learning_rate": 0.0001191215653987519, "loss": 0.6749, "step": 7571 }, { "epoch": 1.3507269645883508, "grad_norm": 0.49213096499443054, "learning_rate": 0.0001190618762687933, "loss": 0.6015, "step": 7572 }, { "epoch": 1.350905360806351, "grad_norm": 0.5104438662528992, "learning_rate": 0.0001190021974226585, "loss": 0.5599, "step": 7573 }, { "epoch": 1.351083757024351, "grad_norm": 0.55939120054245, "learning_rate": 0.00011894252886503476, "loss": 0.6426, "step": 7574 }, { "epoch": 1.3512621532423512, "grad_norm": 0.5199212431907654, "learning_rate": 0.00011888287060060845, "loss": 0.7041, "step": 7575 }, { "epoch": 1.3514405494603514, "grad_norm": 0.48316359519958496, "learning_rate": 0.000118823222634065, "loss": 0.5074, "step": 7576 }, { "epoch": 1.3516189456783516, "grad_norm": 0.630157470703125, "learning_rate": 0.0001187635849700893, "loss": 0.5867, "step": 7577 }, { "epoch": 1.3517973418963518, "grad_norm": 0.48873627185821533, "learning_rate": 0.00011870395761336514, "loss": 0.5635, "step": 7578 }, { "epoch": 1.351975738114352, "grad_norm": 0.4779564142227173, "learning_rate": 0.00011864434056857554, "loss": 0.4933, "step": 7579 }, { "epoch": 1.3521541343323522, "grad_norm": 0.5071210861206055, "learning_rate": 0.00011858473384040302, "loss": 0.7165, "step": 7580 }, { "epoch": 1.3523325305503524, "grad_norm": 0.5788468718528748, "learning_rate": 0.00011852513743352885, "loss": 0.8357, "step": 7581 }, { "epoch": 1.3525109267683524, "grad_norm": 0.4292871356010437, "learning_rate": 0.0001184655513526339, "loss": 0.5256, "step": 7582 }, { "epoch": 1.3526893229863526, "grad_norm": 0.4763195216655731, "learning_rate": 0.00011840597560239785, "loss": 0.5758, "step": 7583 }, { "epoch": 1.3528677192043528, "grad_norm": 0.5240211486816406, "learning_rate": 0.00011834641018749994, "loss": 0.5393, "step": 7584 }, { "epoch": 1.353046115422353, "grad_norm": 0.46273085474967957, "learning_rate": 0.00011828685511261833, "loss": 0.4274, "step": 7585 }, { "epoch": 1.3532245116403532, "grad_norm": 0.5363104343414307, "learning_rate": 0.00011822731038243035, "loss": 0.6826, "step": 7586 }, { "epoch": 1.3534029078583534, "grad_norm": 0.46597224473953247, "learning_rate": 0.00011816777600161278, "loss": 0.5414, "step": 7587 }, { "epoch": 1.3535813040763536, "grad_norm": 0.48101913928985596, "learning_rate": 0.00011810825197484126, "loss": 0.4022, "step": 7588 }, { "epoch": 1.3537597002943538, "grad_norm": 0.5787967443466187, "learning_rate": 0.00011804873830679089, "loss": 0.7585, "step": 7589 }, { "epoch": 1.353938096512354, "grad_norm": 0.39563214778900146, "learning_rate": 0.0001179892350021359, "loss": 0.3329, "step": 7590 }, { "epoch": 1.3541164927303542, "grad_norm": 0.5252716541290283, "learning_rate": 0.00011792974206554949, "loss": 0.6866, "step": 7591 }, { "epoch": 1.3542948889483544, "grad_norm": 0.5136657953262329, "learning_rate": 0.00011787025950170441, "loss": 0.6072, "step": 7592 }, { "epoch": 1.3544732851663546, "grad_norm": 0.503588080406189, "learning_rate": 0.0001178107873152722, "loss": 0.6746, "step": 7593 }, { "epoch": 1.3546516813843548, "grad_norm": 0.5402643084526062, "learning_rate": 0.00011775132551092397, "loss": 0.73, "step": 7594 }, { "epoch": 1.354830077602355, "grad_norm": 0.4741210639476776, "learning_rate": 0.0001176918740933296, "loss": 0.5221, "step": 7595 }, { "epoch": 1.355008473820355, "grad_norm": 0.5205968618392944, "learning_rate": 0.00011763243306715862, "loss": 0.639, "step": 7596 }, { "epoch": 1.3551868700383551, "grad_norm": 0.5402557253837585, "learning_rate": 0.00011757300243707927, "loss": 0.7846, "step": 7597 }, { "epoch": 1.3553652662563553, "grad_norm": 0.46083173155784607, "learning_rate": 0.00011751358220775943, "loss": 0.5686, "step": 7598 }, { "epoch": 1.3555436624743555, "grad_norm": 0.5107517242431641, "learning_rate": 0.00011745417238386583, "loss": 0.639, "step": 7599 }, { "epoch": 1.3557220586923557, "grad_norm": 0.5665692687034607, "learning_rate": 0.0001173947729700644, "loss": 0.7715, "step": 7600 }, { "epoch": 1.355900454910356, "grad_norm": 0.5142179727554321, "learning_rate": 0.00011733538397102053, "loss": 0.5198, "step": 7601 }, { "epoch": 1.3560788511283561, "grad_norm": 0.49913138151168823, "learning_rate": 0.00011727600539139841, "loss": 0.6599, "step": 7602 }, { "epoch": 1.3562572473463563, "grad_norm": 0.4611426293849945, "learning_rate": 0.00011721663723586181, "loss": 0.5418, "step": 7603 }, { "epoch": 1.3564356435643563, "grad_norm": 0.49066436290740967, "learning_rate": 0.00011715727950907329, "loss": 0.6004, "step": 7604 }, { "epoch": 1.3566140397823565, "grad_norm": 0.4376862645149231, "learning_rate": 0.00011709793221569486, "loss": 0.5262, "step": 7605 }, { "epoch": 1.3567924360003567, "grad_norm": 0.46929794549942017, "learning_rate": 0.00011703859536038774, "loss": 0.4824, "step": 7606 }, { "epoch": 1.356970832218357, "grad_norm": 0.5407469868659973, "learning_rate": 0.00011697926894781205, "loss": 0.692, "step": 7607 }, { "epoch": 1.357149228436357, "grad_norm": 0.5103482007980347, "learning_rate": 0.00011691995298262739, "loss": 0.6721, "step": 7608 }, { "epoch": 1.3573276246543573, "grad_norm": 0.532581090927124, "learning_rate": 0.00011686064746949229, "loss": 0.7574, "step": 7609 }, { "epoch": 1.3575060208723575, "grad_norm": 0.4488193690776825, "learning_rate": 0.00011680135241306472, "loss": 0.5191, "step": 7610 }, { "epoch": 1.3576844170903577, "grad_norm": 0.4854279160499573, "learning_rate": 0.00011674206781800162, "loss": 0.5963, "step": 7611 }, { "epoch": 1.357862813308358, "grad_norm": 0.4275936186313629, "learning_rate": 0.00011668279368895907, "loss": 0.4465, "step": 7612 }, { "epoch": 1.358041209526358, "grad_norm": 0.5791702270507812, "learning_rate": 0.00011662353003059262, "loss": 0.7993, "step": 7613 }, { "epoch": 1.3582196057443583, "grad_norm": 0.5329555869102478, "learning_rate": 0.00011656427684755666, "loss": 0.6066, "step": 7614 }, { "epoch": 1.3583980019623585, "grad_norm": 0.4500406086444855, "learning_rate": 0.00011650503414450502, "loss": 0.5459, "step": 7615 }, { "epoch": 1.3585763981803587, "grad_norm": 0.5136424899101257, "learning_rate": 0.0001164458019260905, "loss": 0.6399, "step": 7616 }, { "epoch": 1.358754794398359, "grad_norm": 0.49857786297798157, "learning_rate": 0.0001163865801969653, "loss": 0.632, "step": 7617 }, { "epoch": 1.3589331906163589, "grad_norm": 0.529205858707428, "learning_rate": 0.00011632736896178059, "loss": 0.5917, "step": 7618 }, { "epoch": 1.359111586834359, "grad_norm": 0.6193312406539917, "learning_rate": 0.00011626816822518662, "loss": 0.693, "step": 7619 }, { "epoch": 1.3592899830523593, "grad_norm": 0.4899401068687439, "learning_rate": 0.00011620897799183336, "loss": 0.5771, "step": 7620 }, { "epoch": 1.3594683792703595, "grad_norm": 0.5664353966712952, "learning_rate": 0.0001161497982663693, "loss": 0.7672, "step": 7621 }, { "epoch": 1.3596467754883597, "grad_norm": 0.4969763159751892, "learning_rate": 0.00011609062905344256, "loss": 0.669, "step": 7622 }, { "epoch": 1.3598251717063599, "grad_norm": 0.48748770356178284, "learning_rate": 0.0001160314703577002, "loss": 0.6251, "step": 7623 }, { "epoch": 1.36000356792436, "grad_norm": 0.4871166944503784, "learning_rate": 0.00011597232218378842, "loss": 0.4868, "step": 7624 }, { "epoch": 1.3601819641423603, "grad_norm": 0.4607223868370056, "learning_rate": 0.00011591318453635286, "loss": 0.617, "step": 7625 }, { "epoch": 1.3603603603603602, "grad_norm": 0.5145749449729919, "learning_rate": 0.000115854057420038, "loss": 0.8115, "step": 7626 }, { "epoch": 1.3605387565783604, "grad_norm": 0.4471273124217987, "learning_rate": 0.00011579494083948783, "loss": 0.4159, "step": 7627 }, { "epoch": 1.3607171527963606, "grad_norm": 0.5210676193237305, "learning_rate": 0.00011573583479934516, "loss": 0.6254, "step": 7628 }, { "epoch": 1.3608955490143608, "grad_norm": 0.44996219873428345, "learning_rate": 0.00011567673930425232, "loss": 0.4922, "step": 7629 }, { "epoch": 1.361073945232361, "grad_norm": 0.5416077375411987, "learning_rate": 0.0001156176543588505, "loss": 0.6659, "step": 7630 }, { "epoch": 1.3612523414503612, "grad_norm": 0.5629211068153381, "learning_rate": 0.00011555857996778038, "loss": 0.6857, "step": 7631 }, { "epoch": 1.3614307376683614, "grad_norm": 0.552343487739563, "learning_rate": 0.00011549951613568152, "loss": 0.8255, "step": 7632 }, { "epoch": 1.3616091338863616, "grad_norm": 0.4920686185359955, "learning_rate": 0.0001154404628671927, "loss": 0.5705, "step": 7633 }, { "epoch": 1.3617875301043618, "grad_norm": 0.514735758304596, "learning_rate": 0.0001153814201669521, "loss": 0.5387, "step": 7634 }, { "epoch": 1.361965926322362, "grad_norm": 0.6460976600646973, "learning_rate": 0.00011532238803959666, "loss": 0.5714, "step": 7635 }, { "epoch": 1.3621443225403622, "grad_norm": 0.48432081937789917, "learning_rate": 0.00011526336648976307, "loss": 0.4946, "step": 7636 }, { "epoch": 1.3623227187583624, "grad_norm": 0.5579152703285217, "learning_rate": 0.00011520435552208672, "loss": 0.6566, "step": 7637 }, { "epoch": 1.3625011149763626, "grad_norm": 0.5772841572761536, "learning_rate": 0.00011514535514120217, "loss": 0.7587, "step": 7638 }, { "epoch": 1.3626795111943628, "grad_norm": 0.43424656987190247, "learning_rate": 0.00011508636535174349, "loss": 0.4863, "step": 7639 }, { "epoch": 1.3628579074123628, "grad_norm": 0.5111271739006042, "learning_rate": 0.00011502738615834351, "loss": 0.6528, "step": 7640 }, { "epoch": 1.363036303630363, "grad_norm": 0.4701317548751831, "learning_rate": 0.00011496841756563467, "loss": 0.4773, "step": 7641 }, { "epoch": 1.3632146998483632, "grad_norm": 0.5139216780662537, "learning_rate": 0.00011490945957824808, "loss": 0.647, "step": 7642 }, { "epoch": 1.3633930960663634, "grad_norm": 0.5112646222114563, "learning_rate": 0.00011485051220081449, "loss": 0.582, "step": 7643 }, { "epoch": 1.3635714922843636, "grad_norm": 0.5711879730224609, "learning_rate": 0.00011479157543796353, "loss": 0.7522, "step": 7644 }, { "epoch": 1.3637498885023638, "grad_norm": 0.4305424988269806, "learning_rate": 0.00011473264929432398, "loss": 0.4716, "step": 7645 }, { "epoch": 1.363928284720364, "grad_norm": 0.5155298113822937, "learning_rate": 0.000114673733774524, "loss": 0.6646, "step": 7646 }, { "epoch": 1.3641066809383642, "grad_norm": 0.5119175910949707, "learning_rate": 0.00011461482888319064, "loss": 0.6522, "step": 7647 }, { "epoch": 1.3642850771563642, "grad_norm": 0.5106381177902222, "learning_rate": 0.00011455593462495047, "loss": 0.7938, "step": 7648 }, { "epoch": 1.3644634733743644, "grad_norm": 0.44368186593055725, "learning_rate": 0.00011449705100442881, "loss": 0.4743, "step": 7649 }, { "epoch": 1.3646418695923646, "grad_norm": 0.4758698046207428, "learning_rate": 0.00011443817802625044, "loss": 0.5669, "step": 7650 }, { "epoch": 1.3648202658103648, "grad_norm": 0.41871505975723267, "learning_rate": 0.00011437931569503935, "loss": 0.4859, "step": 7651 }, { "epoch": 1.364998662028365, "grad_norm": 0.44119468331336975, "learning_rate": 0.00011432046401541835, "loss": 0.57, "step": 7652 }, { "epoch": 1.3651770582463651, "grad_norm": 0.5434176921844482, "learning_rate": 0.0001142616229920098, "loss": 0.7457, "step": 7653 }, { "epoch": 1.3653554544643653, "grad_norm": 0.4746476411819458, "learning_rate": 0.00011420279262943487, "loss": 0.5716, "step": 7654 }, { "epoch": 1.3655338506823655, "grad_norm": 0.4947410523891449, "learning_rate": 0.00011414397293231424, "loss": 0.6332, "step": 7655 }, { "epoch": 1.3657122469003657, "grad_norm": 0.4677318036556244, "learning_rate": 0.00011408516390526747, "loss": 0.5055, "step": 7656 }, { "epoch": 1.365890643118366, "grad_norm": 0.535660445690155, "learning_rate": 0.00011402636555291348, "loss": 0.8911, "step": 7657 }, { "epoch": 1.3660690393363661, "grad_norm": 0.4632878601551056, "learning_rate": 0.00011396757787987025, "loss": 0.5509, "step": 7658 }, { "epoch": 1.3662474355543663, "grad_norm": 0.5350466966629028, "learning_rate": 0.00011390880089075483, "loss": 0.7065, "step": 7659 }, { "epoch": 1.3664258317723665, "grad_norm": 0.5175812840461731, "learning_rate": 0.00011385003459018369, "loss": 0.7321, "step": 7660 }, { "epoch": 1.3666042279903667, "grad_norm": 0.4582364857196808, "learning_rate": 0.00011379127898277217, "loss": 0.5169, "step": 7661 }, { "epoch": 1.3667826242083667, "grad_norm": 0.45724961161613464, "learning_rate": 0.00011373253407313508, "loss": 0.5547, "step": 7662 }, { "epoch": 1.366961020426367, "grad_norm": 0.48377981781959534, "learning_rate": 0.00011367379986588603, "loss": 0.4886, "step": 7663 }, { "epoch": 1.3671394166443671, "grad_norm": 0.4916684627532959, "learning_rate": 0.00011361507636563817, "loss": 0.4752, "step": 7664 }, { "epoch": 1.3673178128623673, "grad_norm": 0.48477721214294434, "learning_rate": 0.00011355636357700342, "loss": 0.5644, "step": 7665 }, { "epoch": 1.3674962090803675, "grad_norm": 0.5753141641616821, "learning_rate": 0.00011349766150459314, "loss": 0.7344, "step": 7666 }, { "epoch": 1.3676746052983677, "grad_norm": 0.48007839918136597, "learning_rate": 0.0001134389701530179, "loss": 0.5605, "step": 7667 }, { "epoch": 1.367853001516368, "grad_norm": 0.46499067544937134, "learning_rate": 0.00011338028952688709, "loss": 0.5273, "step": 7668 }, { "epoch": 1.368031397734368, "grad_norm": 0.4519871473312378, "learning_rate": 0.00011332161963080961, "loss": 0.5694, "step": 7669 }, { "epoch": 1.368209793952368, "grad_norm": 0.4737212061882019, "learning_rate": 0.00011326296046939332, "loss": 0.5597, "step": 7670 }, { "epoch": 1.3683881901703683, "grad_norm": 0.4954215884208679, "learning_rate": 0.00011320431204724519, "loss": 0.6356, "step": 7671 }, { "epoch": 1.3685665863883685, "grad_norm": 0.4386094808578491, "learning_rate": 0.00011314567436897161, "loss": 0.6296, "step": 7672 }, { "epoch": 1.3687449826063687, "grad_norm": 0.7949509620666504, "learning_rate": 0.0001130870474391778, "loss": 0.6376, "step": 7673 }, { "epoch": 1.3689233788243689, "grad_norm": 0.47607821226119995, "learning_rate": 0.00011302843126246842, "loss": 0.5973, "step": 7674 }, { "epoch": 1.369101775042369, "grad_norm": 0.4358568489551544, "learning_rate": 0.00011296982584344704, "loss": 0.4679, "step": 7675 }, { "epoch": 1.3692801712603693, "grad_norm": 0.5030319690704346, "learning_rate": 0.00011291123118671665, "loss": 0.6087, "step": 7676 }, { "epoch": 1.3694585674783695, "grad_norm": 0.5743786096572876, "learning_rate": 0.00011285264729687908, "loss": 0.8441, "step": 7677 }, { "epoch": 1.3696369636963697, "grad_norm": 0.45528116822242737, "learning_rate": 0.00011279407417853569, "loss": 0.4794, "step": 7678 }, { "epoch": 1.3698153599143699, "grad_norm": 0.538870632648468, "learning_rate": 0.00011273551183628664, "loss": 0.6597, "step": 7679 }, { "epoch": 1.36999375613237, "grad_norm": 0.4873597025871277, "learning_rate": 0.00011267696027473132, "loss": 0.5062, "step": 7680 }, { "epoch": 1.3701721523503703, "grad_norm": 0.573042094707489, "learning_rate": 0.00011261841949846846, "loss": 0.6462, "step": 7681 }, { "epoch": 1.3703505485683705, "grad_norm": 0.4371779263019562, "learning_rate": 0.00011255988951209589, "loss": 0.4188, "step": 7682 }, { "epoch": 1.3705289447863707, "grad_norm": 0.5239207744598389, "learning_rate": 0.00011250137032021038, "loss": 0.6176, "step": 7683 }, { "epoch": 1.3707073410043706, "grad_norm": 0.4236181378364563, "learning_rate": 0.00011244286192740815, "loss": 0.4331, "step": 7684 }, { "epoch": 1.3708857372223708, "grad_norm": 0.5079860091209412, "learning_rate": 0.00011238436433828427, "loss": 0.5725, "step": 7685 }, { "epoch": 1.371064133440371, "grad_norm": 0.48492246866226196, "learning_rate": 0.00011232587755743332, "loss": 0.5257, "step": 7686 }, { "epoch": 1.3712425296583712, "grad_norm": 0.43290892243385315, "learning_rate": 0.00011226740158944856, "loss": 0.4729, "step": 7687 }, { "epoch": 1.3714209258763714, "grad_norm": 0.4931113123893738, "learning_rate": 0.00011220893643892291, "loss": 0.5433, "step": 7688 }, { "epoch": 1.3715993220943716, "grad_norm": 0.492724746465683, "learning_rate": 0.00011215048211044801, "loss": 0.5631, "step": 7689 }, { "epoch": 1.3717777183123718, "grad_norm": 0.48536619544029236, "learning_rate": 0.000112092038608615, "loss": 0.509, "step": 7690 }, { "epoch": 1.371956114530372, "grad_norm": 0.43689486384391785, "learning_rate": 0.00011203360593801396, "loss": 0.5927, "step": 7691 }, { "epoch": 1.372134510748372, "grad_norm": 0.40810397267341614, "learning_rate": 0.00011197518410323401, "loss": 0.4723, "step": 7692 }, { "epoch": 1.3723129069663722, "grad_norm": 0.4644933044910431, "learning_rate": 0.00011191677310886384, "loss": 0.562, "step": 7693 }, { "epoch": 1.3724913031843724, "grad_norm": 0.5269336104393005, "learning_rate": 0.00011185837295949075, "loss": 0.5993, "step": 7694 }, { "epoch": 1.3726696994023726, "grad_norm": 0.5395172238349915, "learning_rate": 0.00011179998365970174, "loss": 0.7452, "step": 7695 }, { "epoch": 1.3728480956203728, "grad_norm": 0.5476558804512024, "learning_rate": 0.00011174160521408241, "loss": 0.7672, "step": 7696 }, { "epoch": 1.373026491838373, "grad_norm": 0.5317317247390747, "learning_rate": 0.0001116832376272179, "loss": 0.679, "step": 7697 }, { "epoch": 1.3732048880563732, "grad_norm": 0.9869957566261292, "learning_rate": 0.00011162488090369252, "loss": 0.7226, "step": 7698 }, { "epoch": 1.3733832842743734, "grad_norm": 0.5336429476737976, "learning_rate": 0.00011156653504808934, "loss": 0.7258, "step": 7699 }, { "epoch": 1.3735616804923736, "grad_norm": 0.5142067670822144, "learning_rate": 0.00011150820006499101, "loss": 0.6718, "step": 7700 }, { "epoch": 1.3737400767103738, "grad_norm": 0.49160951375961304, "learning_rate": 0.00011144987595897896, "loss": 0.623, "step": 7701 }, { "epoch": 1.373918472928374, "grad_norm": 0.4981287717819214, "learning_rate": 0.00011139156273463411, "loss": 0.6447, "step": 7702 }, { "epoch": 1.3740968691463742, "grad_norm": 0.4330550730228424, "learning_rate": 0.0001113332603965363, "loss": 0.3944, "step": 7703 }, { "epoch": 1.3742752653643744, "grad_norm": 0.5066832900047302, "learning_rate": 0.00011127496894926442, "loss": 0.5891, "step": 7704 }, { "epoch": 1.3744536615823746, "grad_norm": 0.5171502232551575, "learning_rate": 0.00011121668839739691, "loss": 0.7084, "step": 7705 }, { "epoch": 1.3746320578003746, "grad_norm": 0.48667481541633606, "learning_rate": 0.00011115841874551084, "loss": 0.5493, "step": 7706 }, { "epoch": 1.3748104540183748, "grad_norm": 0.4992111623287201, "learning_rate": 0.00011110015999818293, "loss": 0.584, "step": 7707 }, { "epoch": 1.374988850236375, "grad_norm": 0.4618065357208252, "learning_rate": 0.00011104191215998857, "loss": 0.6038, "step": 7708 }, { "epoch": 1.3751672464543752, "grad_norm": 0.4726942777633667, "learning_rate": 0.00011098367523550273, "loss": 0.5524, "step": 7709 }, { "epoch": 1.3753456426723754, "grad_norm": 0.4904707372188568, "learning_rate": 0.00011092544922929914, "loss": 0.6269, "step": 7710 }, { "epoch": 1.3755240388903756, "grad_norm": 0.4519045054912567, "learning_rate": 0.0001108672341459509, "loss": 0.5587, "step": 7711 }, { "epoch": 1.3757024351083758, "grad_norm": 0.45719850063323975, "learning_rate": 0.00011080902999003032, "loss": 0.5617, "step": 7712 }, { "epoch": 1.375880831326376, "grad_norm": 0.4688970446586609, "learning_rate": 0.00011075083676610853, "loss": 0.4525, "step": 7713 }, { "epoch": 1.376059227544376, "grad_norm": 0.4638102650642395, "learning_rate": 0.00011069265447875617, "loss": 0.5887, "step": 7714 }, { "epoch": 1.3762376237623761, "grad_norm": 0.5201415419578552, "learning_rate": 0.0001106344831325427, "loss": 0.6049, "step": 7715 }, { "epoch": 1.3764160199803763, "grad_norm": 0.5137766003608704, "learning_rate": 0.00011057632273203708, "loss": 0.6497, "step": 7716 }, { "epoch": 1.3765944161983765, "grad_norm": 0.5084298849105835, "learning_rate": 0.00011051817328180702, "loss": 0.6516, "step": 7717 }, { "epoch": 1.3767728124163767, "grad_norm": 0.4721417725086212, "learning_rate": 0.00011046003478641955, "loss": 0.5664, "step": 7718 }, { "epoch": 1.376951208634377, "grad_norm": 0.4899318814277649, "learning_rate": 0.00011040190725044097, "loss": 0.6067, "step": 7719 }, { "epoch": 1.3771296048523771, "grad_norm": 0.48559266328811646, "learning_rate": 0.00011034379067843644, "loss": 0.5541, "step": 7720 }, { "epoch": 1.3773080010703773, "grad_norm": 0.46098917722702026, "learning_rate": 0.00011028568507497058, "loss": 0.5172, "step": 7721 }, { "epoch": 1.3774863972883775, "grad_norm": 0.5564523935317993, "learning_rate": 0.00011022759044460678, "loss": 0.7203, "step": 7722 }, { "epoch": 1.3776647935063777, "grad_norm": 0.5125877261161804, "learning_rate": 0.00011016950679190798, "loss": 0.5827, "step": 7723 }, { "epoch": 1.377843189724378, "grad_norm": 0.4653429090976715, "learning_rate": 0.00011011143412143596, "loss": 0.593, "step": 7724 }, { "epoch": 1.3780215859423781, "grad_norm": 0.44072821736335754, "learning_rate": 0.0001100533724377516, "loss": 0.5665, "step": 7725 }, { "epoch": 1.3781999821603783, "grad_norm": 0.48405829071998596, "learning_rate": 0.00010999532174541524, "loss": 0.6273, "step": 7726 }, { "epoch": 1.3783783783783785, "grad_norm": 0.5710448622703552, "learning_rate": 0.0001099372820489859, "loss": 0.6321, "step": 7727 }, { "epoch": 1.3785567745963785, "grad_norm": 0.47928139567375183, "learning_rate": 0.00010987925335302229, "loss": 0.6109, "step": 7728 }, { "epoch": 1.3787351708143787, "grad_norm": 0.47921472787857056, "learning_rate": 0.00010982123566208185, "loss": 0.5726, "step": 7729 }, { "epoch": 1.3789135670323789, "grad_norm": 0.4955821633338928, "learning_rate": 0.00010976322898072117, "loss": 0.5733, "step": 7730 }, { "epoch": 1.379091963250379, "grad_norm": 0.45048630237579346, "learning_rate": 0.00010970523331349619, "loss": 0.5002, "step": 7731 }, { "epoch": 1.3792703594683793, "grad_norm": 0.4999293088912964, "learning_rate": 0.00010964724866496173, "loss": 0.6949, "step": 7732 }, { "epoch": 1.3794487556863795, "grad_norm": 0.5359913110733032, "learning_rate": 0.00010958927503967206, "loss": 0.6405, "step": 7733 }, { "epoch": 1.3796271519043797, "grad_norm": 0.4533331096172333, "learning_rate": 0.0001095313124421802, "loss": 0.5499, "step": 7734 }, { "epoch": 1.3798055481223799, "grad_norm": 0.4622163772583008, "learning_rate": 0.00010947336087703872, "loss": 0.532, "step": 7735 }, { "epoch": 1.3799839443403799, "grad_norm": 0.4350248873233795, "learning_rate": 0.0001094154203487989, "loss": 0.4439, "step": 7736 }, { "epoch": 1.38016234055838, "grad_norm": 0.47929033637046814, "learning_rate": 0.00010935749086201158, "loss": 0.5798, "step": 7737 }, { "epoch": 1.3803407367763803, "grad_norm": 0.5002579689025879, "learning_rate": 0.00010929957242122637, "loss": 0.6427, "step": 7738 }, { "epoch": 1.3805191329943804, "grad_norm": 0.5057860612869263, "learning_rate": 0.00010924166503099211, "loss": 0.7614, "step": 7739 }, { "epoch": 1.3806975292123806, "grad_norm": 0.4656047224998474, "learning_rate": 0.00010918376869585702, "loss": 0.5467, "step": 7740 }, { "epoch": 1.3808759254303808, "grad_norm": 0.5039976835250854, "learning_rate": 0.00010912588342036802, "loss": 0.6859, "step": 7741 }, { "epoch": 1.381054321648381, "grad_norm": 0.6870278716087341, "learning_rate": 0.00010906800920907153, "loss": 0.5815, "step": 7742 }, { "epoch": 1.3812327178663812, "grad_norm": 0.43213722109794617, "learning_rate": 0.00010901014606651305, "loss": 0.3694, "step": 7743 }, { "epoch": 1.3814111140843814, "grad_norm": 0.5106995701789856, "learning_rate": 0.00010895229399723694, "loss": 0.5759, "step": 7744 }, { "epoch": 1.3815895103023816, "grad_norm": 0.45927223563194275, "learning_rate": 0.00010889445300578701, "loss": 0.5318, "step": 7745 }, { "epoch": 1.3817679065203818, "grad_norm": 0.5364645719528198, "learning_rate": 0.00010883662309670597, "loss": 0.6094, "step": 7746 }, { "epoch": 1.381946302738382, "grad_norm": 0.5728722214698792, "learning_rate": 0.00010877880427453588, "loss": 0.7887, "step": 7747 }, { "epoch": 1.3821246989563822, "grad_norm": 0.49186766147613525, "learning_rate": 0.00010872099654381762, "loss": 0.5248, "step": 7748 }, { "epoch": 1.3823030951743824, "grad_norm": 0.48165062069892883, "learning_rate": 0.00010866319990909163, "loss": 0.6226, "step": 7749 }, { "epoch": 1.3824814913923824, "grad_norm": 0.49330055713653564, "learning_rate": 0.00010860541437489705, "loss": 0.6703, "step": 7750 }, { "epoch": 1.3826598876103826, "grad_norm": 0.5186200141906738, "learning_rate": 0.00010854763994577232, "loss": 0.7396, "step": 7751 }, { "epoch": 1.3828382838283828, "grad_norm": 0.5332891941070557, "learning_rate": 0.00010848987662625515, "loss": 0.7148, "step": 7752 }, { "epoch": 1.383016680046383, "grad_norm": 0.5629260540008545, "learning_rate": 0.00010843212442088207, "loss": 0.7122, "step": 7753 }, { "epoch": 1.3831950762643832, "grad_norm": 0.44756463170051575, "learning_rate": 0.00010837438333418914, "loss": 0.5578, "step": 7754 }, { "epoch": 1.3833734724823834, "grad_norm": 0.48155879974365234, "learning_rate": 0.00010831665337071109, "loss": 0.5866, "step": 7755 }, { "epoch": 1.3835518687003836, "grad_norm": 0.5249548554420471, "learning_rate": 0.0001082589345349822, "loss": 0.6235, "step": 7756 }, { "epoch": 1.3837302649183838, "grad_norm": 0.4601089656352997, "learning_rate": 0.00010820122683153552, "loss": 0.4966, "step": 7757 }, { "epoch": 1.3839086611363838, "grad_norm": 0.4255712330341339, "learning_rate": 0.00010814353026490345, "loss": 0.3554, "step": 7758 }, { "epoch": 1.384087057354384, "grad_norm": 0.3792378902435303, "learning_rate": 0.00010808584483961755, "loss": 0.3656, "step": 7759 }, { "epoch": 1.3842654535723842, "grad_norm": 0.5113689303398132, "learning_rate": 0.00010802817056020825, "loss": 0.6168, "step": 7760 }, { "epoch": 1.3844438497903844, "grad_norm": 0.5441461205482483, "learning_rate": 0.00010797050743120542, "loss": 0.7846, "step": 7761 }, { "epoch": 1.3846222460083846, "grad_norm": 0.47966670989990234, "learning_rate": 0.00010791285545713783, "loss": 0.5587, "step": 7762 }, { "epoch": 1.3848006422263848, "grad_norm": 0.46844810247421265, "learning_rate": 0.00010785521464253334, "loss": 0.551, "step": 7763 }, { "epoch": 1.384979038444385, "grad_norm": 1.28788161277771, "learning_rate": 0.00010779758499191919, "loss": 0.7065, "step": 7764 }, { "epoch": 1.3851574346623852, "grad_norm": 0.5162654519081116, "learning_rate": 0.00010773996650982146, "loss": 0.6795, "step": 7765 }, { "epoch": 1.3853358308803854, "grad_norm": 0.5239789485931396, "learning_rate": 0.00010768235920076561, "loss": 0.7062, "step": 7766 }, { "epoch": 1.3855142270983856, "grad_norm": 0.4905536472797394, "learning_rate": 0.00010762476306927594, "loss": 0.5994, "step": 7767 }, { "epoch": 1.3856926233163858, "grad_norm": 0.5204237699508667, "learning_rate": 0.00010756717811987618, "loss": 0.5825, "step": 7768 }, { "epoch": 1.385871019534386, "grad_norm": 0.5072759389877319, "learning_rate": 0.0001075096043570889, "loss": 0.6562, "step": 7769 }, { "epoch": 1.3860494157523862, "grad_norm": 0.45566022396087646, "learning_rate": 0.00010745204178543605, "loss": 0.5398, "step": 7770 }, { "epoch": 1.3862278119703864, "grad_norm": 0.4731004238128662, "learning_rate": 0.00010739449040943849, "loss": 0.6673, "step": 7771 }, { "epoch": 1.3864062081883863, "grad_norm": 0.472790390253067, "learning_rate": 0.0001073369502336161, "loss": 0.499, "step": 7772 }, { "epoch": 1.3865846044063865, "grad_norm": 0.46505823731422424, "learning_rate": 0.00010727942126248843, "loss": 0.5368, "step": 7773 }, { "epoch": 1.3867630006243867, "grad_norm": 0.47594350576400757, "learning_rate": 0.0001072219035005735, "loss": 0.5684, "step": 7774 }, { "epoch": 1.386941396842387, "grad_norm": 0.49051812291145325, "learning_rate": 0.00010716439695238895, "loss": 0.5684, "step": 7775 }, { "epoch": 1.3871197930603871, "grad_norm": 0.5569090843200684, "learning_rate": 0.00010710690162245118, "loss": 0.666, "step": 7776 }, { "epoch": 1.3872981892783873, "grad_norm": 0.4677387475967407, "learning_rate": 0.00010704941751527578, "loss": 0.5223, "step": 7777 }, { "epoch": 1.3874765854963875, "grad_norm": 0.5612090229988098, "learning_rate": 0.00010699194463537767, "loss": 0.7503, "step": 7778 }, { "epoch": 1.3876549817143877, "grad_norm": 0.5161556601524353, "learning_rate": 0.00010693448298727062, "loss": 0.6457, "step": 7779 }, { "epoch": 1.3878333779323877, "grad_norm": 0.5419967174530029, "learning_rate": 0.0001068770325754678, "loss": 0.6822, "step": 7780 }, { "epoch": 1.388011774150388, "grad_norm": 0.4641594886779785, "learning_rate": 0.00010681959340448116, "loss": 0.4709, "step": 7781 }, { "epoch": 1.388190170368388, "grad_norm": 0.8507322669029236, "learning_rate": 0.00010676216547882214, "loss": 0.5894, "step": 7782 }, { "epoch": 1.3883685665863883, "grad_norm": 0.5100333094596863, "learning_rate": 0.00010670474880300098, "loss": 0.6258, "step": 7783 }, { "epoch": 1.3885469628043885, "grad_norm": 0.49771493673324585, "learning_rate": 0.00010664734338152712, "loss": 0.5853, "step": 7784 }, { "epoch": 1.3887253590223887, "grad_norm": 0.5230436325073242, "learning_rate": 0.0001065899492189093, "loss": 0.6821, "step": 7785 }, { "epoch": 1.388903755240389, "grad_norm": 0.5067762732505798, "learning_rate": 0.00010653256631965505, "loss": 0.7528, "step": 7786 }, { "epoch": 1.389082151458389, "grad_norm": 0.48270925879478455, "learning_rate": 0.0001064751946882714, "loss": 0.5972, "step": 7787 }, { "epoch": 1.3892605476763893, "grad_norm": 0.47450000047683716, "learning_rate": 0.0001064178343292641, "loss": 0.5595, "step": 7788 }, { "epoch": 1.3894389438943895, "grad_norm": 0.48282551765441895, "learning_rate": 0.00010636048524713832, "loss": 0.515, "step": 7789 }, { "epoch": 1.3896173401123897, "grad_norm": 0.5305141806602478, "learning_rate": 0.0001063031474463983, "loss": 0.5755, "step": 7790 }, { "epoch": 1.3897957363303899, "grad_norm": 0.48532387614250183, "learning_rate": 0.00010624582093154717, "loss": 0.5692, "step": 7791 }, { "epoch": 1.38997413254839, "grad_norm": 0.5484132170677185, "learning_rate": 0.00010618850570708746, "loss": 0.6092, "step": 7792 }, { "epoch": 1.3901525287663903, "grad_norm": 0.5588973760604858, "learning_rate": 0.00010613120177752056, "loss": 0.7572, "step": 7793 }, { "epoch": 1.3903309249843903, "grad_norm": 0.5241721272468567, "learning_rate": 0.00010607390914734721, "loss": 0.4951, "step": 7794 }, { "epoch": 1.3905093212023905, "grad_norm": 0.49718037247657776, "learning_rate": 0.00010601662782106705, "loss": 0.7169, "step": 7795 }, { "epoch": 1.3906877174203907, "grad_norm": 0.5253555774688721, "learning_rate": 0.00010595935780317906, "loss": 0.6851, "step": 7796 }, { "epoch": 1.3908661136383909, "grad_norm": 0.5283133387565613, "learning_rate": 0.00010590209909818113, "loss": 0.6305, "step": 7797 }, { "epoch": 1.391044509856391, "grad_norm": 0.5732675790786743, "learning_rate": 0.0001058448517105702, "loss": 0.781, "step": 7798 }, { "epoch": 1.3912229060743913, "grad_norm": 0.4708344638347626, "learning_rate": 0.0001057876156448427, "loss": 0.5672, "step": 7799 }, { "epoch": 1.3914013022923915, "grad_norm": 0.48784154653549194, "learning_rate": 0.00010573039090549374, "loss": 0.5962, "step": 7800 }, { "epoch": 1.3915796985103916, "grad_norm": 0.4613569974899292, "learning_rate": 0.00010567317749701785, "loss": 0.5355, "step": 7801 }, { "epoch": 1.3917580947283916, "grad_norm": 0.51911461353302, "learning_rate": 0.00010561597542390842, "loss": 0.609, "step": 7802 }, { "epoch": 1.3919364909463918, "grad_norm": 0.478140652179718, "learning_rate": 0.00010555878469065814, "loss": 0.5447, "step": 7803 }, { "epoch": 1.392114887164392, "grad_norm": 0.4372231662273407, "learning_rate": 0.00010550160530175883, "loss": 0.5127, "step": 7804 }, { "epoch": 1.3922932833823922, "grad_norm": 0.4996614456176758, "learning_rate": 0.00010544443726170119, "loss": 0.5593, "step": 7805 }, { "epoch": 1.3924716796003924, "grad_norm": 0.47102734446525574, "learning_rate": 0.00010538728057497532, "loss": 0.6029, "step": 7806 }, { "epoch": 1.3926500758183926, "grad_norm": 0.5277090072631836, "learning_rate": 0.0001053301352460701, "loss": 0.6538, "step": 7807 }, { "epoch": 1.3928284720363928, "grad_norm": 0.4949512183666229, "learning_rate": 0.0001052730012794739, "loss": 0.5816, "step": 7808 }, { "epoch": 1.393006868254393, "grad_norm": 0.4444587230682373, "learning_rate": 0.00010521587867967389, "loss": 0.4541, "step": 7809 }, { "epoch": 1.3931852644723932, "grad_norm": 0.46152183413505554, "learning_rate": 0.0001051587674511564, "loss": 0.4362, "step": 7810 }, { "epoch": 1.3933636606903934, "grad_norm": 0.4519818127155304, "learning_rate": 0.00010510166759840705, "loss": 0.4725, "step": 7811 }, { "epoch": 1.3935420569083936, "grad_norm": 0.48864850401878357, "learning_rate": 0.00010504457912591028, "loss": 0.4834, "step": 7812 }, { "epoch": 1.3937204531263938, "grad_norm": 0.5076178908348083, "learning_rate": 0.00010498750203815, "loss": 0.6161, "step": 7813 }, { "epoch": 1.393898849344394, "grad_norm": 0.4903450012207031, "learning_rate": 0.0001049304363396088, "loss": 0.5639, "step": 7814 }, { "epoch": 1.3940772455623942, "grad_norm": 0.44738972187042236, "learning_rate": 0.0001048733820347688, "loss": 0.4897, "step": 7815 }, { "epoch": 1.3942556417803942, "grad_norm": 0.5322644710540771, "learning_rate": 0.00010481633912811081, "loss": 0.7432, "step": 7816 }, { "epoch": 1.3944340379983944, "grad_norm": 0.5089389085769653, "learning_rate": 0.00010475930762411518, "loss": 0.5638, "step": 7817 }, { "epoch": 1.3946124342163946, "grad_norm": 0.5720763206481934, "learning_rate": 0.00010470228752726094, "loss": 0.6849, "step": 7818 }, { "epoch": 1.3947908304343948, "grad_norm": 0.5013535618782043, "learning_rate": 0.00010464527884202648, "loss": 0.6032, "step": 7819 }, { "epoch": 1.394969226652395, "grad_norm": 0.523189127445221, "learning_rate": 0.00010458828157288938, "loss": 0.7473, "step": 7820 }, { "epoch": 1.3951476228703952, "grad_norm": 0.42415139079093933, "learning_rate": 0.00010453129572432599, "loss": 0.4601, "step": 7821 }, { "epoch": 1.3953260190883954, "grad_norm": 0.4629247784614563, "learning_rate": 0.0001044743213008121, "loss": 0.4898, "step": 7822 }, { "epoch": 1.3955044153063956, "grad_norm": 0.7844071984291077, "learning_rate": 0.00010441735830682242, "loss": 0.8687, "step": 7823 }, { "epoch": 1.3956828115243956, "grad_norm": 0.46940067410469055, "learning_rate": 0.00010436040674683067, "loss": 0.5528, "step": 7824 }, { "epoch": 1.3958612077423957, "grad_norm": 0.49708932638168335, "learning_rate": 0.00010430346662530999, "loss": 0.6975, "step": 7825 }, { "epoch": 1.396039603960396, "grad_norm": 0.47257721424102783, "learning_rate": 0.00010424653794673222, "loss": 0.5658, "step": 7826 }, { "epoch": 1.3962180001783961, "grad_norm": 0.4764406383037567, "learning_rate": 0.00010418962071556876, "loss": 0.7406, "step": 7827 }, { "epoch": 1.3963963963963963, "grad_norm": 0.4551503360271454, "learning_rate": 0.00010413271493628965, "loss": 0.5595, "step": 7828 }, { "epoch": 1.3965747926143965, "grad_norm": 0.4807843267917633, "learning_rate": 0.00010407582061336443, "loss": 0.4765, "step": 7829 }, { "epoch": 1.3967531888323967, "grad_norm": 0.46619912981987, "learning_rate": 0.00010401893775126146, "loss": 0.5123, "step": 7830 }, { "epoch": 1.396931585050397, "grad_norm": 0.4833389222621918, "learning_rate": 0.00010396206635444819, "loss": 0.5746, "step": 7831 }, { "epoch": 1.3971099812683971, "grad_norm": 1.4866968393325806, "learning_rate": 0.00010390520642739149, "loss": 0.4599, "step": 7832 }, { "epoch": 1.3972883774863973, "grad_norm": 0.4797883927822113, "learning_rate": 0.00010384835797455691, "loss": 0.4975, "step": 7833 }, { "epoch": 1.3974667737043975, "grad_norm": 0.543132483959198, "learning_rate": 0.00010379152100040942, "loss": 0.7443, "step": 7834 }, { "epoch": 1.3976451699223977, "grad_norm": 0.49611344933509827, "learning_rate": 0.00010373469550941304, "loss": 0.5435, "step": 7835 }, { "epoch": 1.397823566140398, "grad_norm": 0.549901008605957, "learning_rate": 0.00010367788150603061, "loss": 0.8226, "step": 7836 }, { "epoch": 1.3980019623583981, "grad_norm": 0.48870500922203064, "learning_rate": 0.0001036210789947245, "loss": 0.6682, "step": 7837 }, { "epoch": 1.398180358576398, "grad_norm": 0.4649103879928589, "learning_rate": 0.00010356428797995579, "loss": 0.5182, "step": 7838 }, { "epoch": 1.3983587547943983, "grad_norm": 0.49515223503112793, "learning_rate": 0.00010350750846618495, "loss": 0.6104, "step": 7839 }, { "epoch": 1.3985371510123985, "grad_norm": 0.4876445531845093, "learning_rate": 0.00010345074045787128, "loss": 0.6187, "step": 7840 }, { "epoch": 1.3987155472303987, "grad_norm": 0.47437965869903564, "learning_rate": 0.00010339398395947347, "loss": 0.7047, "step": 7841 }, { "epoch": 1.398893943448399, "grad_norm": 0.4776071608066559, "learning_rate": 0.00010333723897544908, "loss": 0.655, "step": 7842 }, { "epoch": 1.399072339666399, "grad_norm": 0.48358532786369324, "learning_rate": 0.00010328050551025472, "loss": 0.6231, "step": 7843 }, { "epoch": 1.3992507358843993, "grad_norm": 0.5266225934028625, "learning_rate": 0.00010322378356834641, "loss": 0.7969, "step": 7844 }, { "epoch": 1.3994291321023995, "grad_norm": 0.42173731327056885, "learning_rate": 0.00010316707315417892, "loss": 0.4284, "step": 7845 }, { "epoch": 1.3996075283203995, "grad_norm": 0.47026675939559937, "learning_rate": 0.00010311037427220637, "loss": 0.5441, "step": 7846 }, { "epoch": 1.3997859245383997, "grad_norm": 0.4756429195404053, "learning_rate": 0.00010305368692688174, "loss": 0.4786, "step": 7847 }, { "epoch": 1.3999643207563999, "grad_norm": 0.48206183314323425, "learning_rate": 0.00010299701112265739, "loss": 0.623, "step": 7848 }, { "epoch": 1.4001427169744, "grad_norm": 0.47061610221862793, "learning_rate": 0.00010294034686398443, "loss": 0.5716, "step": 7849 }, { "epoch": 1.4003211131924003, "grad_norm": 0.46232593059539795, "learning_rate": 0.00010288369415531335, "loss": 0.5713, "step": 7850 }, { "epoch": 1.4004995094104005, "grad_norm": 0.415913850069046, "learning_rate": 0.00010282705300109372, "loss": 0.4381, "step": 7851 }, { "epoch": 1.4006779056284007, "grad_norm": 0.5227693319320679, "learning_rate": 0.00010277042340577388, "loss": 0.6813, "step": 7852 }, { "epoch": 1.4008563018464009, "grad_norm": 0.4625205099582672, "learning_rate": 0.00010271380537380176, "loss": 0.5038, "step": 7853 }, { "epoch": 1.401034698064401, "grad_norm": 0.405355840921402, "learning_rate": 0.00010265719890962388, "loss": 0.4174, "step": 7854 }, { "epoch": 1.4012130942824013, "grad_norm": 0.4783884584903717, "learning_rate": 0.00010260060401768628, "loss": 0.5663, "step": 7855 }, { "epoch": 1.4013914905004015, "grad_norm": 0.49855825304985046, "learning_rate": 0.00010254402070243383, "loss": 0.6415, "step": 7856 }, { "epoch": 1.4015698867184017, "grad_norm": 0.5020763278007507, "learning_rate": 0.00010248744896831044, "loss": 0.6289, "step": 7857 }, { "epoch": 1.4017482829364019, "grad_norm": 0.5697162747383118, "learning_rate": 0.0001024308888197594, "loss": 0.7057, "step": 7858 }, { "epoch": 1.401926679154402, "grad_norm": 0.5255553126335144, "learning_rate": 0.00010237434026122278, "loss": 0.699, "step": 7859 }, { "epoch": 1.402105075372402, "grad_norm": 0.48212558031082153, "learning_rate": 0.00010231780329714202, "loss": 0.4816, "step": 7860 }, { "epoch": 1.4022834715904022, "grad_norm": 0.55892014503479, "learning_rate": 0.00010226127793195736, "loss": 0.7858, "step": 7861 }, { "epoch": 1.4024618678084024, "grad_norm": 0.473724365234375, "learning_rate": 0.00010220476417010843, "loss": 0.7018, "step": 7862 }, { "epoch": 1.4026402640264026, "grad_norm": 0.5031070709228516, "learning_rate": 0.00010214826201603372, "loss": 0.5582, "step": 7863 }, { "epoch": 1.4028186602444028, "grad_norm": 0.5168371200561523, "learning_rate": 0.00010209177147417073, "loss": 0.6044, "step": 7864 }, { "epoch": 1.402997056462403, "grad_norm": 0.5146030783653259, "learning_rate": 0.00010203529254895652, "loss": 0.6976, "step": 7865 }, { "epoch": 1.4031754526804032, "grad_norm": 0.5497780442237854, "learning_rate": 0.00010197882524482669, "loss": 0.6612, "step": 7866 }, { "epoch": 1.4033538488984034, "grad_norm": 0.530221164226532, "learning_rate": 0.00010192236956621628, "loss": 0.6626, "step": 7867 }, { "epoch": 1.4035322451164034, "grad_norm": 0.5098912715911865, "learning_rate": 0.00010186592551755927, "loss": 0.6494, "step": 7868 }, { "epoch": 1.4037106413344036, "grad_norm": 0.44442427158355713, "learning_rate": 0.0001018094931032886, "loss": 0.4793, "step": 7869 }, { "epoch": 1.4038890375524038, "grad_norm": 0.5126553773880005, "learning_rate": 0.0001017530723278367, "loss": 0.6441, "step": 7870 }, { "epoch": 1.404067433770404, "grad_norm": 0.568909227848053, "learning_rate": 0.00010169666319563458, "loss": 0.8517, "step": 7871 }, { "epoch": 1.4042458299884042, "grad_norm": 0.500556230545044, "learning_rate": 0.00010164026571111284, "loss": 0.6562, "step": 7872 }, { "epoch": 1.4044242262064044, "grad_norm": 0.4273224472999573, "learning_rate": 0.00010158387987870065, "loss": 0.4493, "step": 7873 }, { "epoch": 1.4046026224244046, "grad_norm": 0.49929437041282654, "learning_rate": 0.00010152750570282679, "loss": 0.7073, "step": 7874 }, { "epoch": 1.4047810186424048, "grad_norm": 0.47794604301452637, "learning_rate": 0.00010147114318791864, "loss": 0.5762, "step": 7875 }, { "epoch": 1.404959414860405, "grad_norm": 0.4568422734737396, "learning_rate": 0.00010141479233840309, "loss": 0.5284, "step": 7876 }, { "epoch": 1.4051378110784052, "grad_norm": 0.5395530462265015, "learning_rate": 0.00010135845315870579, "loss": 0.7679, "step": 7877 }, { "epoch": 1.4053162072964054, "grad_norm": 0.5494503974914551, "learning_rate": 0.00010130212565325153, "loss": 0.7338, "step": 7878 }, { "epoch": 1.4054946035144056, "grad_norm": 0.4741472899913788, "learning_rate": 0.00010124580982646442, "loss": 0.5062, "step": 7879 }, { "epoch": 1.4056729997324058, "grad_norm": 0.47103115916252136, "learning_rate": 0.00010118950568276722, "loss": 0.548, "step": 7880 }, { "epoch": 1.405851395950406, "grad_norm": 0.48441192507743835, "learning_rate": 0.0001011332132265824, "loss": 0.6215, "step": 7881 }, { "epoch": 1.406029792168406, "grad_norm": 0.5685884356498718, "learning_rate": 0.0001010769324623309, "loss": 0.5983, "step": 7882 }, { "epoch": 1.4062081883864062, "grad_norm": 0.5327664613723755, "learning_rate": 0.00010102066339443299, "loss": 0.418, "step": 7883 }, { "epoch": 1.4063865846044064, "grad_norm": 0.4698556363582611, "learning_rate": 0.00010096440602730816, "loss": 0.6731, "step": 7884 }, { "epoch": 1.4065649808224066, "grad_norm": 0.46319758892059326, "learning_rate": 0.00010090816036537462, "loss": 0.4945, "step": 7885 }, { "epoch": 1.4067433770404068, "grad_norm": 0.5858322381973267, "learning_rate": 0.00010085192641305013, "loss": 0.7778, "step": 7886 }, { "epoch": 1.406921773258407, "grad_norm": 0.4609849750995636, "learning_rate": 0.00010079570417475106, "loss": 0.521, "step": 7887 }, { "epoch": 1.4071001694764071, "grad_norm": 0.6530886888504028, "learning_rate": 0.00010073949365489323, "loss": 0.3922, "step": 7888 }, { "epoch": 1.4072785656944073, "grad_norm": 0.7445523142814636, "learning_rate": 0.00010068329485789138, "loss": 0.6121, "step": 7889 }, { "epoch": 1.4074569619124073, "grad_norm": 0.5070978999137878, "learning_rate": 0.0001006271077881592, "loss": 0.5824, "step": 7890 }, { "epoch": 1.4076353581304075, "grad_norm": 0.46640315651893616, "learning_rate": 0.00010057093245010975, "loss": 0.5733, "step": 7891 }, { "epoch": 1.4078137543484077, "grad_norm": 0.5237272381782532, "learning_rate": 0.00010051476884815491, "loss": 0.5761, "step": 7892 }, { "epoch": 1.407992150566408, "grad_norm": 0.4710404872894287, "learning_rate": 0.00010045861698670589, "loss": 0.5382, "step": 7893 }, { "epoch": 1.4081705467844081, "grad_norm": 0.46644294261932373, "learning_rate": 0.00010040247687017263, "loss": 0.3815, "step": 7894 }, { "epoch": 1.4083489430024083, "grad_norm": 0.4705592393875122, "learning_rate": 0.00010034634850296445, "loss": 0.4517, "step": 7895 }, { "epoch": 1.4085273392204085, "grad_norm": 0.5408839583396912, "learning_rate": 0.00010029023188948976, "loss": 0.8073, "step": 7896 }, { "epoch": 1.4087057354384087, "grad_norm": 0.5179523825645447, "learning_rate": 0.00010023412703415574, "loss": 0.6676, "step": 7897 }, { "epoch": 1.408884131656409, "grad_norm": 0.49431368708610535, "learning_rate": 0.00010017803394136902, "loss": 0.4934, "step": 7898 }, { "epoch": 1.409062527874409, "grad_norm": 0.5114967226982117, "learning_rate": 0.00010012195261553494, "loss": 0.6566, "step": 7899 }, { "epoch": 1.4092409240924093, "grad_norm": 0.47235408425331116, "learning_rate": 0.00010006588306105832, "loss": 0.5071, "step": 7900 }, { "epoch": 1.4094193203104095, "grad_norm": 0.4692701995372772, "learning_rate": 0.00010000982528234262, "loss": 0.5268, "step": 7901 }, { "epoch": 1.4095977165284097, "grad_norm": 0.5551561117172241, "learning_rate": 9.995377928379079e-05, "loss": 0.6615, "step": 7902 }, { "epoch": 1.40977611274641, "grad_norm": 0.48021402955055237, "learning_rate": 9.989774506980457e-05, "loss": 0.601, "step": 7903 }, { "epoch": 1.4099545089644099, "grad_norm": 0.5263514518737793, "learning_rate": 9.984172264478475e-05, "loss": 0.6353, "step": 7904 }, { "epoch": 1.41013290518241, "grad_norm": 0.48175185918807983, "learning_rate": 9.978571201313153e-05, "loss": 0.5538, "step": 7905 }, { "epoch": 1.4103113014004103, "grad_norm": 0.47728538513183594, "learning_rate": 9.972971317924374e-05, "loss": 0.5215, "step": 7906 }, { "epoch": 1.4104896976184105, "grad_norm": 0.49182960391044617, "learning_rate": 9.967372614751971e-05, "loss": 0.4886, "step": 7907 }, { "epoch": 1.4106680938364107, "grad_norm": 0.532971203327179, "learning_rate": 9.961775092235642e-05, "loss": 0.6214, "step": 7908 }, { "epoch": 1.4108464900544109, "grad_norm": 0.4523656368255615, "learning_rate": 9.956178750815037e-05, "loss": 0.4945, "step": 7909 }, { "epoch": 1.411024886272411, "grad_norm": 0.49244529008865356, "learning_rate": 9.950583590929671e-05, "loss": 0.565, "step": 7910 }, { "epoch": 1.4112032824904113, "grad_norm": 0.4732704162597656, "learning_rate": 9.944989613018993e-05, "loss": 0.471, "step": 7911 }, { "epoch": 1.4113816787084112, "grad_norm": 0.5447127223014832, "learning_rate": 9.939396817522362e-05, "loss": 0.6094, "step": 7912 }, { "epoch": 1.4115600749264114, "grad_norm": 0.5061089992523193, "learning_rate": 9.933805204879013e-05, "loss": 0.553, "step": 7913 }, { "epoch": 1.4117384711444116, "grad_norm": 0.5438100695610046, "learning_rate": 9.928214775528127e-05, "loss": 0.6007, "step": 7914 }, { "epoch": 1.4119168673624118, "grad_norm": 0.49287474155426025, "learning_rate": 9.922625529908769e-05, "loss": 0.5443, "step": 7915 }, { "epoch": 1.412095263580412, "grad_norm": 0.5883775353431702, "learning_rate": 9.917037468459905e-05, "loss": 0.7577, "step": 7916 }, { "epoch": 1.4122736597984122, "grad_norm": 0.540412187576294, "learning_rate": 9.911450591620436e-05, "loss": 0.6534, "step": 7917 }, { "epoch": 1.4124520560164124, "grad_norm": 0.5728485584259033, "learning_rate": 9.905864899829135e-05, "loss": 0.7171, "step": 7918 }, { "epoch": 1.4126304522344126, "grad_norm": 0.4712778329849243, "learning_rate": 9.900280393524719e-05, "loss": 0.5482, "step": 7919 }, { "epoch": 1.4128088484524128, "grad_norm": 0.5077334642410278, "learning_rate": 9.894697073145773e-05, "loss": 0.7018, "step": 7920 }, { "epoch": 1.412987244670413, "grad_norm": 0.489620566368103, "learning_rate": 9.889114939130828e-05, "loss": 0.6211, "step": 7921 }, { "epoch": 1.4131656408884132, "grad_norm": 0.4577091634273529, "learning_rate": 9.883533991918291e-05, "loss": 0.5056, "step": 7922 }, { "epoch": 1.4133440371064134, "grad_norm": 0.5330988764762878, "learning_rate": 9.877954231946485e-05, "loss": 0.5441, "step": 7923 }, { "epoch": 1.4135224333244136, "grad_norm": 0.46641597151756287, "learning_rate": 9.872375659653652e-05, "loss": 0.5333, "step": 7924 }, { "epoch": 1.4137008295424138, "grad_norm": 0.47872334718704224, "learning_rate": 9.866798275477915e-05, "loss": 0.5553, "step": 7925 }, { "epoch": 1.4138792257604138, "grad_norm": 0.4439626634120941, "learning_rate": 9.861222079857332e-05, "loss": 0.4818, "step": 7926 }, { "epoch": 1.414057621978414, "grad_norm": 0.5242343544960022, "learning_rate": 9.85564707322986e-05, "loss": 0.6662, "step": 7927 }, { "epoch": 1.4142360181964142, "grad_norm": 0.47222810983657837, "learning_rate": 9.850073256033337e-05, "loss": 0.5828, "step": 7928 }, { "epoch": 1.4144144144144144, "grad_norm": 0.4517582952976227, "learning_rate": 9.844500628705555e-05, "loss": 0.464, "step": 7929 }, { "epoch": 1.4145928106324146, "grad_norm": 0.5746625065803528, "learning_rate": 9.83892919168416e-05, "loss": 0.7129, "step": 7930 }, { "epoch": 1.4147712068504148, "grad_norm": 0.41956639289855957, "learning_rate": 9.83335894540675e-05, "loss": 0.4671, "step": 7931 }, { "epoch": 1.414949603068415, "grad_norm": 0.46683749556541443, "learning_rate": 9.827789890310795e-05, "loss": 0.547, "step": 7932 }, { "epoch": 1.4151279992864152, "grad_norm": 0.5841799378395081, "learning_rate": 9.822222026833703e-05, "loss": 0.8423, "step": 7933 }, { "epoch": 1.4153063955044152, "grad_norm": 0.520228922367096, "learning_rate": 9.816655355412748e-05, "loss": 0.6169, "step": 7934 }, { "epoch": 1.4154847917224154, "grad_norm": 0.47443145513534546, "learning_rate": 9.81108987648516e-05, "loss": 0.725, "step": 7935 }, { "epoch": 1.4156631879404156, "grad_norm": 0.5204434990882874, "learning_rate": 9.805525590488037e-05, "loss": 0.7623, "step": 7936 }, { "epoch": 1.4158415841584158, "grad_norm": 0.4761342704296112, "learning_rate": 9.799962497858387e-05, "loss": 0.7241, "step": 7937 }, { "epoch": 1.416019980376416, "grad_norm": 0.4414876401424408, "learning_rate": 9.794400599033146e-05, "loss": 0.5095, "step": 7938 }, { "epoch": 1.4161983765944162, "grad_norm": 0.543932318687439, "learning_rate": 9.788839894449134e-05, "loss": 0.692, "step": 7939 }, { "epoch": 1.4163767728124164, "grad_norm": 0.5136645436286926, "learning_rate": 9.783280384543097e-05, "loss": 0.6467, "step": 7940 }, { "epoch": 1.4165551690304166, "grad_norm": 0.5013181567192078, "learning_rate": 9.777722069751663e-05, "loss": 0.6199, "step": 7941 }, { "epoch": 1.4167335652484168, "grad_norm": 0.4954874813556671, "learning_rate": 9.772164950511386e-05, "loss": 0.4462, "step": 7942 }, { "epoch": 1.416911961466417, "grad_norm": 0.42616045475006104, "learning_rate": 9.76660902725873e-05, "loss": 0.4236, "step": 7943 }, { "epoch": 1.4170903576844172, "grad_norm": 0.4762613773345947, "learning_rate": 9.761054300430036e-05, "loss": 0.6666, "step": 7944 }, { "epoch": 1.4172687539024174, "grad_norm": 0.43737682700157166, "learning_rate": 9.75550077046159e-05, "loss": 0.4711, "step": 7945 }, { "epoch": 1.4174471501204176, "grad_norm": 0.514951229095459, "learning_rate": 9.749948437789544e-05, "loss": 0.5771, "step": 7946 }, { "epoch": 1.4176255463384178, "grad_norm": 1.177257776260376, "learning_rate": 9.744397302849995e-05, "loss": 0.6234, "step": 7947 }, { "epoch": 1.4178039425564177, "grad_norm": 0.4536794424057007, "learning_rate": 9.738847366078912e-05, "loss": 0.6181, "step": 7948 }, { "epoch": 1.417982338774418, "grad_norm": 0.6050822138786316, "learning_rate": 9.733298627912185e-05, "loss": 0.6667, "step": 7949 }, { "epoch": 1.4181607349924181, "grad_norm": 0.5121973752975464, "learning_rate": 9.727751088785621e-05, "loss": 0.6663, "step": 7950 }, { "epoch": 1.4183391312104183, "grad_norm": 0.5665974020957947, "learning_rate": 9.722204749134908e-05, "loss": 0.634, "step": 7951 }, { "epoch": 1.4185175274284185, "grad_norm": 0.5509992241859436, "learning_rate": 9.716659609395665e-05, "loss": 0.6952, "step": 7952 }, { "epoch": 1.4186959236464187, "grad_norm": 0.4740985035896301, "learning_rate": 9.711115670003393e-05, "loss": 0.5432, "step": 7953 }, { "epoch": 1.418874319864419, "grad_norm": 0.4883805513381958, "learning_rate": 9.705572931393525e-05, "loss": 0.5195, "step": 7954 }, { "epoch": 1.4190527160824191, "grad_norm": 0.4700393080711365, "learning_rate": 9.700031394001366e-05, "loss": 0.5883, "step": 7955 }, { "epoch": 1.419231112300419, "grad_norm": 0.46181434392929077, "learning_rate": 9.694491058262162e-05, "loss": 0.5271, "step": 7956 }, { "epoch": 1.4194095085184193, "grad_norm": 0.48557132482528687, "learning_rate": 9.688951924611048e-05, "loss": 0.5342, "step": 7957 }, { "epoch": 1.4195879047364195, "grad_norm": 0.4598628878593445, "learning_rate": 9.683413993483053e-05, "loss": 0.4381, "step": 7958 }, { "epoch": 1.4197663009544197, "grad_norm": 0.7683700323104858, "learning_rate": 9.677877265313143e-05, "loss": 0.5352, "step": 7959 }, { "epoch": 1.41994469717242, "grad_norm": 0.4990261197090149, "learning_rate": 9.67234174053615e-05, "loss": 0.5855, "step": 7960 }, { "epoch": 1.42012309339042, "grad_norm": 0.46791011095046997, "learning_rate": 9.666807419586849e-05, "loss": 0.4852, "step": 7961 }, { "epoch": 1.4203014896084203, "grad_norm": 0.5144142508506775, "learning_rate": 9.661274302899891e-05, "loss": 0.5664, "step": 7962 }, { "epoch": 1.4204798858264205, "grad_norm": 0.4881432354450226, "learning_rate": 9.655742390909845e-05, "loss": 0.4927, "step": 7963 }, { "epoch": 1.4206582820444207, "grad_norm": 0.4858154356479645, "learning_rate": 9.650211684051193e-05, "loss": 0.5022, "step": 7964 }, { "epoch": 1.4208366782624209, "grad_norm": 0.4513051509857178, "learning_rate": 9.644682182758304e-05, "loss": 0.4369, "step": 7965 }, { "epoch": 1.421015074480421, "grad_norm": 0.6125142574310303, "learning_rate": 9.639153887465477e-05, "loss": 0.6945, "step": 7966 }, { "epoch": 1.4211934706984213, "grad_norm": 0.45510220527648926, "learning_rate": 9.633626798606885e-05, "loss": 0.5108, "step": 7967 }, { "epoch": 1.4213718669164215, "grad_norm": 0.5278647541999817, "learning_rate": 9.628100916616638e-05, "loss": 0.8597, "step": 7968 }, { "epoch": 1.4215502631344217, "grad_norm": 0.4560278654098511, "learning_rate": 9.622576241928733e-05, "loss": 0.5682, "step": 7969 }, { "epoch": 1.4217286593524217, "grad_norm": 0.49285340309143066, "learning_rate": 9.617052774977061e-05, "loss": 0.522, "step": 7970 }, { "epoch": 1.4219070555704219, "grad_norm": 0.4475046992301941, "learning_rate": 9.611530516195454e-05, "loss": 0.493, "step": 7971 }, { "epoch": 1.422085451788422, "grad_norm": 0.46044278144836426, "learning_rate": 9.606009466017602e-05, "loss": 0.5749, "step": 7972 }, { "epoch": 1.4222638480064222, "grad_norm": 0.53668612241745, "learning_rate": 9.600489624877157e-05, "loss": 0.562, "step": 7973 }, { "epoch": 1.4224422442244224, "grad_norm": 0.4615837335586548, "learning_rate": 9.59497099320763e-05, "loss": 0.5402, "step": 7974 }, { "epoch": 1.4226206404424226, "grad_norm": 0.531973659992218, "learning_rate": 9.589453571442444e-05, "loss": 0.7516, "step": 7975 }, { "epoch": 1.4227990366604228, "grad_norm": 0.5027945637702942, "learning_rate": 9.583937360014952e-05, "loss": 0.6668, "step": 7976 }, { "epoch": 1.422977432878423, "grad_norm": 0.49252915382385254, "learning_rate": 9.578422359358377e-05, "loss": 0.624, "step": 7977 }, { "epoch": 1.423155829096423, "grad_norm": 0.4632330536842346, "learning_rate": 9.572908569905883e-05, "loss": 0.6206, "step": 7978 }, { "epoch": 1.4233342253144232, "grad_norm": 0.46430516242980957, "learning_rate": 9.5673959920905e-05, "loss": 0.5605, "step": 7979 }, { "epoch": 1.4235126215324234, "grad_norm": 0.5031800270080566, "learning_rate": 9.561884626345205e-05, "loss": 0.541, "step": 7980 }, { "epoch": 1.4236910177504236, "grad_norm": 0.5108554363250732, "learning_rate": 9.556374473102839e-05, "loss": 0.5613, "step": 7981 }, { "epoch": 1.4238694139684238, "grad_norm": 0.5381075143814087, "learning_rate": 9.550865532796185e-05, "loss": 0.635, "step": 7982 }, { "epoch": 1.424047810186424, "grad_norm": 0.522135317325592, "learning_rate": 9.545357805857901e-05, "loss": 0.5884, "step": 7983 }, { "epoch": 1.4242262064044242, "grad_norm": 0.5213260650634766, "learning_rate": 9.539851292720562e-05, "loss": 0.6214, "step": 7984 }, { "epoch": 1.4244046026224244, "grad_norm": 0.4458264112472534, "learning_rate": 9.53434599381665e-05, "loss": 0.47, "step": 7985 }, { "epoch": 1.4245829988404246, "grad_norm": 0.543368935585022, "learning_rate": 9.528841909578545e-05, "loss": 0.6638, "step": 7986 }, { "epoch": 1.4247613950584248, "grad_norm": 0.5813699960708618, "learning_rate": 9.523339040438536e-05, "loss": 0.7685, "step": 7987 }, { "epoch": 1.424939791276425, "grad_norm": 0.5484562516212463, "learning_rate": 9.517837386828829e-05, "loss": 0.6551, "step": 7988 }, { "epoch": 1.4251181874944252, "grad_norm": 0.5792138576507568, "learning_rate": 9.512336949181502e-05, "loss": 0.6305, "step": 7989 }, { "epoch": 1.4252965837124254, "grad_norm": 0.5417539477348328, "learning_rate": 9.506837727928577e-05, "loss": 0.7499, "step": 7990 }, { "epoch": 1.4254749799304256, "grad_norm": 0.49307334423065186, "learning_rate": 9.501339723501937e-05, "loss": 0.5378, "step": 7991 }, { "epoch": 1.4256533761484256, "grad_norm": 0.44642525911331177, "learning_rate": 9.495842936333415e-05, "loss": 0.4978, "step": 7992 }, { "epoch": 1.4258317723664258, "grad_norm": 0.5179886817932129, "learning_rate": 9.49034736685471e-05, "loss": 0.6413, "step": 7993 }, { "epoch": 1.426010168584426, "grad_norm": 0.48655420541763306, "learning_rate": 9.484853015497458e-05, "loss": 0.5178, "step": 7994 }, { "epoch": 1.4261885648024262, "grad_norm": 0.4873103201389313, "learning_rate": 9.47935988269317e-05, "loss": 0.5396, "step": 7995 }, { "epoch": 1.4263669610204264, "grad_norm": 0.49392032623291016, "learning_rate": 9.47386796887327e-05, "loss": 0.5208, "step": 7996 }, { "epoch": 1.4265453572384266, "grad_norm": 0.5058830380439758, "learning_rate": 9.468377274469109e-05, "loss": 0.5961, "step": 7997 }, { "epoch": 1.4267237534564268, "grad_norm": 0.4766574800014496, "learning_rate": 9.462887799911904e-05, "loss": 0.3617, "step": 7998 }, { "epoch": 1.426902149674427, "grad_norm": 0.44295012950897217, "learning_rate": 9.457399545632814e-05, "loss": 0.5054, "step": 7999 }, { "epoch": 1.427080545892427, "grad_norm": 0.5294760465621948, "learning_rate": 9.451912512062863e-05, "loss": 0.6995, "step": 8000 }, { "epoch": 1.4272589421104271, "grad_norm": 0.46700701117515564, "learning_rate": 9.446426699633023e-05, "loss": 0.613, "step": 8001 }, { "epoch": 1.4274373383284273, "grad_norm": 0.5137273669242859, "learning_rate": 9.44094210877413e-05, "loss": 0.7029, "step": 8002 }, { "epoch": 1.4276157345464275, "grad_norm": 0.5057253837585449, "learning_rate": 9.435458739916946e-05, "loss": 0.6471, "step": 8003 }, { "epoch": 1.4277941307644277, "grad_norm": 0.5789628028869629, "learning_rate": 9.429976593492146e-05, "loss": 0.7758, "step": 8004 }, { "epoch": 1.427972526982428, "grad_norm": 0.5334276556968689, "learning_rate": 9.424495669930272e-05, "loss": 0.6382, "step": 8005 }, { "epoch": 1.4281509232004281, "grad_norm": 0.5051296949386597, "learning_rate": 9.419015969661814e-05, "loss": 0.6057, "step": 8006 }, { "epoch": 1.4283293194184283, "grad_norm": 0.5126160979270935, "learning_rate": 9.413537493117142e-05, "loss": 0.7181, "step": 8007 }, { "epoch": 1.4285077156364285, "grad_norm": 0.5015937089920044, "learning_rate": 9.408060240726515e-05, "loss": 0.5093, "step": 8008 }, { "epoch": 1.4286861118544287, "grad_norm": 0.5069268941879272, "learning_rate": 9.402584212920134e-05, "loss": 0.5187, "step": 8009 }, { "epoch": 1.428864508072429, "grad_norm": 0.5026482343673706, "learning_rate": 9.397109410128071e-05, "loss": 0.5594, "step": 8010 }, { "epoch": 1.4290429042904291, "grad_norm": 0.4498440623283386, "learning_rate": 9.391635832780329e-05, "loss": 0.4379, "step": 8011 }, { "epoch": 1.4292213005084293, "grad_norm": 0.494722843170166, "learning_rate": 9.386163481306784e-05, "loss": 0.5101, "step": 8012 }, { "epoch": 1.4293996967264295, "grad_norm": 0.4928201138973236, "learning_rate": 9.380692356137247e-05, "loss": 0.5785, "step": 8013 }, { "epoch": 1.4295780929444295, "grad_norm": 0.49480581283569336, "learning_rate": 9.375222457701401e-05, "loss": 0.6379, "step": 8014 }, { "epoch": 1.4297564891624297, "grad_norm": 0.46592390537261963, "learning_rate": 9.369753786428869e-05, "loss": 0.5308, "step": 8015 }, { "epoch": 1.42993488538043, "grad_norm": 0.5155026912689209, "learning_rate": 9.364286342749151e-05, "loss": 0.6286, "step": 8016 }, { "epoch": 1.43011328159843, "grad_norm": 0.4269189238548279, "learning_rate": 9.358820127091636e-05, "loss": 0.4702, "step": 8017 }, { "epoch": 1.4302916778164303, "grad_norm": 0.4979037344455719, "learning_rate": 9.353355139885672e-05, "loss": 0.5917, "step": 8018 }, { "epoch": 1.4304700740344305, "grad_norm": 0.43645283579826355, "learning_rate": 9.347891381560455e-05, "loss": 0.5333, "step": 8019 }, { "epoch": 1.4306484702524307, "grad_norm": 0.47989028692245483, "learning_rate": 9.342428852545123e-05, "loss": 0.6838, "step": 8020 }, { "epoch": 1.430826866470431, "grad_norm": 0.41572171449661255, "learning_rate": 9.336967553268691e-05, "loss": 0.3637, "step": 8021 }, { "epoch": 1.4310052626884309, "grad_norm": 0.5728147029876709, "learning_rate": 9.33150748416008e-05, "loss": 0.7151, "step": 8022 }, { "epoch": 1.431183658906431, "grad_norm": 0.48712360858917236, "learning_rate": 9.326048645648134e-05, "loss": 0.5196, "step": 8023 }, { "epoch": 1.4313620551244313, "grad_norm": 0.44054681062698364, "learning_rate": 9.320591038161574e-05, "loss": 0.4682, "step": 8024 }, { "epoch": 1.4315404513424315, "grad_norm": 0.48739486932754517, "learning_rate": 9.315134662129058e-05, "loss": 0.5445, "step": 8025 }, { "epoch": 1.4317188475604317, "grad_norm": 0.44029682874679565, "learning_rate": 9.309679517979102e-05, "loss": 0.4334, "step": 8026 }, { "epoch": 1.4318972437784319, "grad_norm": 0.47833380103111267, "learning_rate": 9.304225606140176e-05, "loss": 0.4728, "step": 8027 }, { "epoch": 1.432075639996432, "grad_norm": 0.5727766156196594, "learning_rate": 9.298772927040618e-05, "loss": 0.7175, "step": 8028 }, { "epoch": 1.4322540362144323, "grad_norm": 0.4518079161643982, "learning_rate": 9.293321481108668e-05, "loss": 0.5195, "step": 8029 }, { "epoch": 1.4324324324324325, "grad_norm": 0.526343584060669, "learning_rate": 9.2878712687725e-05, "loss": 0.588, "step": 8030 }, { "epoch": 1.4326108286504327, "grad_norm": 0.4975213408470154, "learning_rate": 9.282422290460149e-05, "loss": 0.6872, "step": 8031 }, { "epoch": 1.4327892248684329, "grad_norm": 0.5089072585105896, "learning_rate": 9.276974546599599e-05, "loss": 0.6198, "step": 8032 }, { "epoch": 1.432967621086433, "grad_norm": 0.4496549665927887, "learning_rate": 9.27152803761869e-05, "loss": 0.501, "step": 8033 }, { "epoch": 1.4331460173044333, "grad_norm": 0.5993954539299011, "learning_rate": 9.266082763945202e-05, "loss": 0.7204, "step": 8034 }, { "epoch": 1.4333244135224334, "grad_norm": 0.5009292960166931, "learning_rate": 9.260638726006812e-05, "loss": 0.5112, "step": 8035 }, { "epoch": 1.4335028097404334, "grad_norm": 0.49264848232269287, "learning_rate": 9.255195924231075e-05, "loss": 0.5595, "step": 8036 }, { "epoch": 1.4336812059584336, "grad_norm": 0.481372594833374, "learning_rate": 9.249754359045484e-05, "loss": 0.5221, "step": 8037 }, { "epoch": 1.4338596021764338, "grad_norm": 0.5572468638420105, "learning_rate": 9.244314030877398e-05, "loss": 0.7088, "step": 8038 }, { "epoch": 1.434037998394434, "grad_norm": 0.4595695436000824, "learning_rate": 9.238874940154116e-05, "loss": 0.4761, "step": 8039 }, { "epoch": 1.4342163946124342, "grad_norm": 0.43319255113601685, "learning_rate": 9.233437087302806e-05, "loss": 0.5051, "step": 8040 }, { "epoch": 1.4343947908304344, "grad_norm": 0.4756569266319275, "learning_rate": 9.228000472750569e-05, "loss": 0.5404, "step": 8041 }, { "epoch": 1.4345731870484346, "grad_norm": 0.5660195350646973, "learning_rate": 9.22256509692439e-05, "loss": 0.6584, "step": 8042 }, { "epoch": 1.4347515832664348, "grad_norm": 0.5442212224006653, "learning_rate": 9.21713096025115e-05, "loss": 0.7664, "step": 8043 }, { "epoch": 1.4349299794844348, "grad_norm": 0.4902845323085785, "learning_rate": 9.211698063157659e-05, "loss": 0.6131, "step": 8044 }, { "epoch": 1.435108375702435, "grad_norm": 0.47630754113197327, "learning_rate": 9.206266406070601e-05, "loss": 0.5899, "step": 8045 }, { "epoch": 1.4352867719204352, "grad_norm": 0.4589233696460724, "learning_rate": 9.200835989416589e-05, "loss": 0.5258, "step": 8046 }, { "epoch": 1.4354651681384354, "grad_norm": 0.5233177542686462, "learning_rate": 9.195406813622115e-05, "loss": 0.6126, "step": 8047 }, { "epoch": 1.4356435643564356, "grad_norm": 0.475238561630249, "learning_rate": 9.189978879113587e-05, "loss": 0.6048, "step": 8048 }, { "epoch": 1.4358219605744358, "grad_norm": 0.47865399718284607, "learning_rate": 9.184552186317321e-05, "loss": 0.6042, "step": 8049 }, { "epoch": 1.436000356792436, "grad_norm": 0.49072253704071045, "learning_rate": 9.179126735659513e-05, "loss": 0.4852, "step": 8050 }, { "epoch": 1.4361787530104362, "grad_norm": 0.47413280606269836, "learning_rate": 9.173702527566292e-05, "loss": 0.4936, "step": 8051 }, { "epoch": 1.4363571492284364, "grad_norm": 0.5141640901565552, "learning_rate": 9.168279562463655e-05, "loss": 0.6562, "step": 8052 }, { "epoch": 1.4365355454464366, "grad_norm": 0.5093448758125305, "learning_rate": 9.162857840777535e-05, "loss": 0.7238, "step": 8053 }, { "epoch": 1.4367139416644368, "grad_norm": 0.45524322986602783, "learning_rate": 9.157437362933749e-05, "loss": 0.5362, "step": 8054 }, { "epoch": 1.436892337882437, "grad_norm": 0.479604572057724, "learning_rate": 9.152018129358003e-05, "loss": 0.4765, "step": 8055 }, { "epoch": 1.4370707341004372, "grad_norm": 0.4629632532596588, "learning_rate": 9.146600140475944e-05, "loss": 0.5064, "step": 8056 }, { "epoch": 1.4372491303184374, "grad_norm": 0.595065176486969, "learning_rate": 9.141183396713077e-05, "loss": 0.7567, "step": 8057 }, { "epoch": 1.4374275265364373, "grad_norm": 0.47794783115386963, "learning_rate": 9.13576789849485e-05, "loss": 0.4937, "step": 8058 }, { "epoch": 1.4376059227544375, "grad_norm": 0.5082597732543945, "learning_rate": 9.130353646246578e-05, "loss": 0.7144, "step": 8059 }, { "epoch": 1.4377843189724377, "grad_norm": 0.46265289187431335, "learning_rate": 9.124940640393512e-05, "loss": 0.5559, "step": 8060 }, { "epoch": 1.437962715190438, "grad_norm": 0.5391396880149841, "learning_rate": 9.119528881360764e-05, "loss": 0.4127, "step": 8061 }, { "epoch": 1.4381411114084381, "grad_norm": 0.4582943618297577, "learning_rate": 9.114118369573393e-05, "loss": 0.3894, "step": 8062 }, { "epoch": 1.4383195076264383, "grad_norm": 0.460657000541687, "learning_rate": 9.108709105456323e-05, "loss": 0.4666, "step": 8063 }, { "epoch": 1.4384979038444385, "grad_norm": 0.5155767798423767, "learning_rate": 9.103301089434399e-05, "loss": 0.673, "step": 8064 }, { "epoch": 1.4386763000624387, "grad_norm": 0.41279855370521545, "learning_rate": 9.097894321932377e-05, "loss": 0.3958, "step": 8065 }, { "epoch": 1.4388546962804387, "grad_norm": 0.4912382662296295, "learning_rate": 9.09248880337489e-05, "loss": 0.6629, "step": 8066 }, { "epoch": 1.439033092498439, "grad_norm": 0.5284416675567627, "learning_rate": 9.087084534186476e-05, "loss": 0.6543, "step": 8067 }, { "epoch": 1.4392114887164391, "grad_norm": 0.49840492010116577, "learning_rate": 9.081681514791609e-05, "loss": 0.65, "step": 8068 }, { "epoch": 1.4393898849344393, "grad_norm": 0.4940994679927826, "learning_rate": 9.076279745614613e-05, "loss": 0.5973, "step": 8069 }, { "epoch": 1.4395682811524395, "grad_norm": 0.49154722690582275, "learning_rate": 9.070879227079765e-05, "loss": 0.6159, "step": 8070 }, { "epoch": 1.4397466773704397, "grad_norm": 0.4630894660949707, "learning_rate": 9.065479959611194e-05, "loss": 0.5687, "step": 8071 }, { "epoch": 1.43992507358844, "grad_norm": 0.7224597930908203, "learning_rate": 9.060081943632983e-05, "loss": 0.499, "step": 8072 }, { "epoch": 1.44010346980644, "grad_norm": 0.47265711426734924, "learning_rate": 9.054685179569066e-05, "loss": 0.5876, "step": 8073 }, { "epoch": 1.4402818660244403, "grad_norm": 0.5103265643119812, "learning_rate": 9.049289667843325e-05, "loss": 0.7843, "step": 8074 }, { "epoch": 1.4404602622424405, "grad_norm": 0.5575939416885376, "learning_rate": 9.043895408879505e-05, "loss": 0.7539, "step": 8075 }, { "epoch": 1.4406386584604407, "grad_norm": 0.46837523579597473, "learning_rate": 9.038502403101268e-05, "loss": 0.5082, "step": 8076 }, { "epoch": 1.440817054678441, "grad_norm": 0.5041645169258118, "learning_rate": 9.033110650932188e-05, "loss": 0.635, "step": 8077 }, { "epoch": 1.440995450896441, "grad_norm": 0.4759989380836487, "learning_rate": 9.027720152795721e-05, "loss": 0.4989, "step": 8078 }, { "epoch": 1.4411738471144413, "grad_norm": 0.49040788412094116, "learning_rate": 9.022330909115239e-05, "loss": 0.5498, "step": 8079 }, { "epoch": 1.4413522433324413, "grad_norm": 0.44871506094932556, "learning_rate": 9.01694292031402e-05, "loss": 0.5627, "step": 8080 }, { "epoch": 1.4415306395504415, "grad_norm": 0.45151621103286743, "learning_rate": 9.011556186815217e-05, "loss": 0.4521, "step": 8081 }, { "epoch": 1.4417090357684417, "grad_norm": 0.5003564953804016, "learning_rate": 9.006170709041922e-05, "loss": 0.5973, "step": 8082 }, { "epoch": 1.4418874319864419, "grad_norm": 0.559359073638916, "learning_rate": 9.000786487417084e-05, "loss": 0.65, "step": 8083 }, { "epoch": 1.442065828204442, "grad_norm": 0.48790860176086426, "learning_rate": 8.995403522363602e-05, "loss": 0.5723, "step": 8084 }, { "epoch": 1.4422442244224423, "grad_norm": 0.5341598391532898, "learning_rate": 8.99002181430423e-05, "loss": 0.7464, "step": 8085 }, { "epoch": 1.4424226206404425, "grad_norm": 0.5069512128829956, "learning_rate": 8.984641363661666e-05, "loss": 0.7237, "step": 8086 }, { "epoch": 1.4426010168584427, "grad_norm": 0.5093790888786316, "learning_rate": 8.979262170858474e-05, "loss": 0.6849, "step": 8087 }, { "epoch": 1.4427794130764429, "grad_norm": 0.4905139207839966, "learning_rate": 8.973884236317131e-05, "loss": 0.6719, "step": 8088 }, { "epoch": 1.4429578092944428, "grad_norm": 0.5070201754570007, "learning_rate": 8.968507560460029e-05, "loss": 0.6181, "step": 8089 }, { "epoch": 1.443136205512443, "grad_norm": 0.47273147106170654, "learning_rate": 8.963132143709437e-05, "loss": 0.5333, "step": 8090 }, { "epoch": 1.4433146017304432, "grad_norm": 0.5406144261360168, "learning_rate": 8.957757986487556e-05, "loss": 0.6466, "step": 8091 }, { "epoch": 1.4434929979484434, "grad_norm": 0.506829023361206, "learning_rate": 8.95238508921645e-05, "loss": 0.5386, "step": 8092 }, { "epoch": 1.4436713941664436, "grad_norm": 0.515042781829834, "learning_rate": 8.94701345231812e-05, "loss": 0.6152, "step": 8093 }, { "epoch": 1.4438497903844438, "grad_norm": 0.48475679755210876, "learning_rate": 8.941643076214436e-05, "loss": 0.6374, "step": 8094 }, { "epoch": 1.444028186602444, "grad_norm": 0.5149679780006409, "learning_rate": 8.936273961327198e-05, "loss": 0.6776, "step": 8095 }, { "epoch": 1.4442065828204442, "grad_norm": 0.610506534576416, "learning_rate": 8.930906108078096e-05, "loss": 0.5515, "step": 8096 }, { "epoch": 1.4443849790384444, "grad_norm": 0.4772319793701172, "learning_rate": 8.925539516888706e-05, "loss": 0.5777, "step": 8097 }, { "epoch": 1.4445633752564446, "grad_norm": 0.42948904633522034, "learning_rate": 8.920174188180533e-05, "loss": 0.4874, "step": 8098 }, { "epoch": 1.4447417714744448, "grad_norm": 0.44335153698921204, "learning_rate": 8.91481012237495e-05, "loss": 0.3909, "step": 8099 }, { "epoch": 1.444920167692445, "grad_norm": 0.5608864426612854, "learning_rate": 8.909447319893269e-05, "loss": 0.7402, "step": 8100 }, { "epoch": 1.4450985639104452, "grad_norm": 0.5135005116462708, "learning_rate": 8.904085781156671e-05, "loss": 0.6809, "step": 8101 }, { "epoch": 1.4452769601284452, "grad_norm": 0.5145747661590576, "learning_rate": 8.898725506586239e-05, "loss": 0.6553, "step": 8102 }, { "epoch": 1.4454553563464454, "grad_norm": 0.46137893199920654, "learning_rate": 8.893366496602984e-05, "loss": 0.4671, "step": 8103 }, { "epoch": 1.4456337525644456, "grad_norm": 0.47708660364151, "learning_rate": 8.888008751627788e-05, "loss": 0.4231, "step": 8104 }, { "epoch": 1.4458121487824458, "grad_norm": 0.5265963077545166, "learning_rate": 8.882652272081457e-05, "loss": 0.6085, "step": 8105 }, { "epoch": 1.445990545000446, "grad_norm": 0.5075244307518005, "learning_rate": 8.877297058384673e-05, "loss": 0.53, "step": 8106 }, { "epoch": 1.4461689412184462, "grad_norm": 0.504497230052948, "learning_rate": 8.871943110958048e-05, "loss": 0.5873, "step": 8107 }, { "epoch": 1.4463473374364464, "grad_norm": 0.5384894013404846, "learning_rate": 8.866590430222072e-05, "loss": 0.542, "step": 8108 }, { "epoch": 1.4465257336544466, "grad_norm": 0.4829096496105194, "learning_rate": 8.861239016597123e-05, "loss": 0.5085, "step": 8109 }, { "epoch": 1.4467041298724468, "grad_norm": 0.5008237361907959, "learning_rate": 8.855888870503535e-05, "loss": 0.5474, "step": 8110 }, { "epoch": 1.4468825260904468, "grad_norm": 0.4817045331001282, "learning_rate": 8.850539992361475e-05, "loss": 0.4752, "step": 8111 }, { "epoch": 1.447060922308447, "grad_norm": 0.5164246559143066, "learning_rate": 8.845192382591067e-05, "loss": 0.6815, "step": 8112 }, { "epoch": 1.4472393185264472, "grad_norm": 0.4688229262828827, "learning_rate": 8.839846041612295e-05, "loss": 0.5217, "step": 8113 }, { "epoch": 1.4474177147444474, "grad_norm": 0.4715273082256317, "learning_rate": 8.834500969845052e-05, "loss": 0.5708, "step": 8114 }, { "epoch": 1.4475961109624476, "grad_norm": 0.42476388812065125, "learning_rate": 8.829157167709157e-05, "loss": 0.4669, "step": 8115 }, { "epoch": 1.4477745071804478, "grad_norm": 0.5047100782394409, "learning_rate": 8.823814635624288e-05, "loss": 0.7172, "step": 8116 }, { "epoch": 1.447952903398448, "grad_norm": 0.49228885769844055, "learning_rate": 8.81847337401007e-05, "loss": 0.5684, "step": 8117 }, { "epoch": 1.4481312996164482, "grad_norm": 0.5029509663581848, "learning_rate": 8.813133383285977e-05, "loss": 0.6608, "step": 8118 }, { "epoch": 1.4483096958344484, "grad_norm": 0.5024793148040771, "learning_rate": 8.807794663871429e-05, "loss": 0.6344, "step": 8119 }, { "epoch": 1.4484880920524486, "grad_norm": 0.49262115359306335, "learning_rate": 8.802457216185717e-05, "loss": 0.6377, "step": 8120 }, { "epoch": 1.4486664882704487, "grad_norm": 0.43594643473625183, "learning_rate": 8.797121040648049e-05, "loss": 0.527, "step": 8121 }, { "epoch": 1.448844884488449, "grad_norm": 0.4278947114944458, "learning_rate": 8.791786137677524e-05, "loss": 0.5052, "step": 8122 }, { "epoch": 1.4490232807064491, "grad_norm": 0.4548480212688446, "learning_rate": 8.78645250769313e-05, "loss": 0.4939, "step": 8123 }, { "epoch": 1.4492016769244491, "grad_norm": 0.5257616639137268, "learning_rate": 8.781120151113788e-05, "loss": 0.6517, "step": 8124 }, { "epoch": 1.4493800731424493, "grad_norm": 0.5141300559043884, "learning_rate": 8.775789068358283e-05, "loss": 0.5612, "step": 8125 }, { "epoch": 1.4495584693604495, "grad_norm": 0.464578241109848, "learning_rate": 8.770459259845323e-05, "loss": 0.4524, "step": 8126 }, { "epoch": 1.4497368655784497, "grad_norm": 0.48500585556030273, "learning_rate": 8.765130725993514e-05, "loss": 0.6593, "step": 8127 }, { "epoch": 1.44991526179645, "grad_norm": 0.4844876825809479, "learning_rate": 8.759803467221348e-05, "loss": 0.589, "step": 8128 }, { "epoch": 1.4500936580144501, "grad_norm": 0.5171689391136169, "learning_rate": 8.754477483947232e-05, "loss": 0.6696, "step": 8129 }, { "epoch": 1.4502720542324503, "grad_norm": 0.5518413186073303, "learning_rate": 8.749152776589459e-05, "loss": 0.7856, "step": 8130 }, { "epoch": 1.4504504504504505, "grad_norm": 0.4898933470249176, "learning_rate": 8.74382934556624e-05, "loss": 0.6053, "step": 8131 }, { "epoch": 1.4506288466684507, "grad_norm": 0.4357226490974426, "learning_rate": 8.738507191295658e-05, "loss": 0.5202, "step": 8132 }, { "epoch": 1.4508072428864507, "grad_norm": 0.5392679572105408, "learning_rate": 8.733186314195734e-05, "loss": 0.6614, "step": 8133 }, { "epoch": 1.4509856391044509, "grad_norm": 0.566256582736969, "learning_rate": 8.72786671468436e-05, "loss": 0.7098, "step": 8134 }, { "epoch": 1.451164035322451, "grad_norm": 0.45817068219184875, "learning_rate": 8.722548393179319e-05, "loss": 0.5082, "step": 8135 }, { "epoch": 1.4513424315404513, "grad_norm": 0.4491539001464844, "learning_rate": 8.717231350098331e-05, "loss": 0.4243, "step": 8136 }, { "epoch": 1.4515208277584515, "grad_norm": 0.5039847493171692, "learning_rate": 8.711915585858979e-05, "loss": 0.5085, "step": 8137 }, { "epoch": 1.4516992239764517, "grad_norm": 0.5082625150680542, "learning_rate": 8.706601100878778e-05, "loss": 0.5393, "step": 8138 }, { "epoch": 1.4518776201944519, "grad_norm": 0.5001698732376099, "learning_rate": 8.701287895575102e-05, "loss": 0.6006, "step": 8139 }, { "epoch": 1.452056016412452, "grad_norm": 0.4417283535003662, "learning_rate": 8.695975970365264e-05, "loss": 0.6028, "step": 8140 }, { "epoch": 1.4522344126304523, "grad_norm": 0.46480488777160645, "learning_rate": 8.690665325666463e-05, "loss": 0.5018, "step": 8141 }, { "epoch": 1.4524128088484525, "grad_norm": 0.5661883354187012, "learning_rate": 8.685355961895783e-05, "loss": 0.7788, "step": 8142 }, { "epoch": 1.4525912050664527, "grad_norm": 0.46579429507255554, "learning_rate": 8.680047879470233e-05, "loss": 0.5646, "step": 8143 }, { "epoch": 1.4527696012844529, "grad_norm": 0.4672081768512726, "learning_rate": 8.67474107880669e-05, "loss": 0.5924, "step": 8144 }, { "epoch": 1.452947997502453, "grad_norm": 0.4720962941646576, "learning_rate": 8.669435560321968e-05, "loss": 0.4921, "step": 8145 }, { "epoch": 1.453126393720453, "grad_norm": 0.5264710783958435, "learning_rate": 8.664131324432745e-05, "loss": 0.6339, "step": 8146 }, { "epoch": 1.4533047899384532, "grad_norm": 0.4442679286003113, "learning_rate": 8.658828371555613e-05, "loss": 0.4405, "step": 8147 }, { "epoch": 1.4534831861564534, "grad_norm": 0.4961048364639282, "learning_rate": 8.653526702107075e-05, "loss": 0.5394, "step": 8148 }, { "epoch": 1.4536615823744536, "grad_norm": 0.5059047341346741, "learning_rate": 8.64822631650351e-05, "loss": 0.5187, "step": 8149 }, { "epoch": 1.4538399785924538, "grad_norm": 0.6599757671356201, "learning_rate": 8.64292721516122e-05, "loss": 0.6328, "step": 8150 }, { "epoch": 1.454018374810454, "grad_norm": 0.48593562841415405, "learning_rate": 8.637629398496377e-05, "loss": 0.5988, "step": 8151 }, { "epoch": 1.4541967710284542, "grad_norm": 0.5314595103263855, "learning_rate": 8.632332866925091e-05, "loss": 0.5746, "step": 8152 }, { "epoch": 1.4543751672464544, "grad_norm": 0.4576443135738373, "learning_rate": 8.627037620863328e-05, "loss": 0.4491, "step": 8153 }, { "epoch": 1.4545535634644546, "grad_norm": 0.4969932734966278, "learning_rate": 8.621743660726994e-05, "loss": 0.6862, "step": 8154 }, { "epoch": 1.4547319596824546, "grad_norm": 0.5143519043922424, "learning_rate": 8.616450986931857e-05, "loss": 0.4361, "step": 8155 }, { "epoch": 1.4549103559004548, "grad_norm": 0.498677134513855, "learning_rate": 8.611159599893609e-05, "loss": 0.5173, "step": 8156 }, { "epoch": 1.455088752118455, "grad_norm": 0.47454512119293213, "learning_rate": 8.605869500027838e-05, "loss": 0.6723, "step": 8157 }, { "epoch": 1.4552671483364552, "grad_norm": 0.46952202916145325, "learning_rate": 8.600580687750017e-05, "loss": 0.4888, "step": 8158 }, { "epoch": 1.4554455445544554, "grad_norm": 0.6815268993377686, "learning_rate": 8.59529316347554e-05, "loss": 0.5794, "step": 8159 }, { "epoch": 1.4556239407724556, "grad_norm": 0.5202351808547974, "learning_rate": 8.590006927619676e-05, "loss": 0.6447, "step": 8160 }, { "epoch": 1.4558023369904558, "grad_norm": 0.4786362051963806, "learning_rate": 8.584721980597599e-05, "loss": 0.5809, "step": 8161 }, { "epoch": 1.455980733208456, "grad_norm": 0.5115570425987244, "learning_rate": 8.579438322824403e-05, "loss": 0.6302, "step": 8162 }, { "epoch": 1.4561591294264562, "grad_norm": 0.4989685118198395, "learning_rate": 8.574155954715047e-05, "loss": 0.5402, "step": 8163 }, { "epoch": 1.4563375256444564, "grad_norm": 0.5098851919174194, "learning_rate": 8.568874876684418e-05, "loss": 0.6773, "step": 8164 }, { "epoch": 1.4565159218624566, "grad_norm": 0.5459485650062561, "learning_rate": 8.56359508914728e-05, "loss": 0.7716, "step": 8165 }, { "epoch": 1.4566943180804568, "grad_norm": 0.5095139145851135, "learning_rate": 8.55831659251832e-05, "loss": 0.7628, "step": 8166 }, { "epoch": 1.456872714298457, "grad_norm": 0.5003282427787781, "learning_rate": 8.553039387212097e-05, "loss": 0.5369, "step": 8167 }, { "epoch": 1.457051110516457, "grad_norm": 0.5022057890892029, "learning_rate": 8.547763473643074e-05, "loss": 0.5863, "step": 8168 }, { "epoch": 1.4572295067344572, "grad_norm": 0.4498676061630249, "learning_rate": 8.542488852225638e-05, "loss": 0.4954, "step": 8169 }, { "epoch": 1.4574079029524574, "grad_norm": 0.6011500358581543, "learning_rate": 8.537215523374037e-05, "loss": 0.7241, "step": 8170 }, { "epoch": 1.4575862991704576, "grad_norm": 0.48579999804496765, "learning_rate": 8.531943487502445e-05, "loss": 0.4106, "step": 8171 }, { "epoch": 1.4577646953884578, "grad_norm": 0.5279344320297241, "learning_rate": 8.52667274502493e-05, "loss": 0.5385, "step": 8172 }, { "epoch": 1.457943091606458, "grad_norm": 0.5536198616027832, "learning_rate": 8.521403296355443e-05, "loss": 0.7606, "step": 8173 }, { "epoch": 1.4581214878244582, "grad_norm": 0.5520446300506592, "learning_rate": 8.516135141907858e-05, "loss": 0.5819, "step": 8174 }, { "epoch": 1.4582998840424584, "grad_norm": 0.49602624773979187, "learning_rate": 8.510868282095916e-05, "loss": 0.6204, "step": 8175 }, { "epoch": 1.4584782802604586, "grad_norm": 0.4096802771091461, "learning_rate": 8.505602717333291e-05, "loss": 0.3461, "step": 8176 }, { "epoch": 1.4586566764784585, "grad_norm": 0.5201975107192993, "learning_rate": 8.500338448033524e-05, "loss": 0.5463, "step": 8177 }, { "epoch": 1.4588350726964587, "grad_norm": 0.45560964941978455, "learning_rate": 8.495075474610081e-05, "loss": 0.4585, "step": 8178 }, { "epoch": 1.459013468914459, "grad_norm": 0.4644407629966736, "learning_rate": 8.489813797476303e-05, "loss": 0.5049, "step": 8179 }, { "epoch": 1.4591918651324591, "grad_norm": 0.4632987678050995, "learning_rate": 8.484553417045448e-05, "loss": 0.4836, "step": 8180 }, { "epoch": 1.4593702613504593, "grad_norm": 0.5432924628257751, "learning_rate": 8.479294333730664e-05, "loss": 0.6128, "step": 8181 }, { "epoch": 1.4595486575684595, "grad_norm": 0.4162793755531311, "learning_rate": 8.474036547944985e-05, "loss": 0.4007, "step": 8182 }, { "epoch": 1.4597270537864597, "grad_norm": 0.49239131808280945, "learning_rate": 8.468780060101372e-05, "loss": 0.6786, "step": 8183 }, { "epoch": 1.45990545000446, "grad_norm": 0.5322994589805603, "learning_rate": 8.463524870612649e-05, "loss": 0.6764, "step": 8184 }, { "epoch": 1.4600838462224601, "grad_norm": 0.5249794125556946, "learning_rate": 8.458270979891578e-05, "loss": 0.5932, "step": 8185 }, { "epoch": 1.4602622424404603, "grad_norm": 0.5151904821395874, "learning_rate": 8.453018388350772e-05, "loss": 0.5464, "step": 8186 }, { "epoch": 1.4604406386584605, "grad_norm": 0.5483924746513367, "learning_rate": 8.447767096402787e-05, "loss": 0.6301, "step": 8187 }, { "epoch": 1.4606190348764607, "grad_norm": 0.4792023301124573, "learning_rate": 8.442517104460057e-05, "loss": 0.5793, "step": 8188 }, { "epoch": 1.460797431094461, "grad_norm": 0.5382273197174072, "learning_rate": 8.437268412934898e-05, "loss": 0.7128, "step": 8189 }, { "epoch": 1.460975827312461, "grad_norm": 0.5009669065475464, "learning_rate": 8.432021022239561e-05, "loss": 0.5538, "step": 8190 }, { "epoch": 1.461154223530461, "grad_norm": 0.4763016998767853, "learning_rate": 8.426774932786154e-05, "loss": 0.5062, "step": 8191 }, { "epoch": 1.4613326197484613, "grad_norm": 0.49051937460899353, "learning_rate": 8.421530144986722e-05, "loss": 0.6787, "step": 8192 }, { "epoch": 1.4615110159664615, "grad_norm": 0.48328647017478943, "learning_rate": 8.416286659253178e-05, "loss": 0.5788, "step": 8193 }, { "epoch": 1.4616894121844617, "grad_norm": 0.48622655868530273, "learning_rate": 8.411044475997331e-05, "loss": 0.6081, "step": 8194 }, { "epoch": 1.4618678084024619, "grad_norm": 0.45523685216903687, "learning_rate": 8.405803595630926e-05, "loss": 0.5096, "step": 8195 }, { "epoch": 1.462046204620462, "grad_norm": 0.5314157009124756, "learning_rate": 8.400564018565554e-05, "loss": 0.6395, "step": 8196 }, { "epoch": 1.4622246008384623, "grad_norm": 0.48079803586006165, "learning_rate": 8.395325745212747e-05, "loss": 0.5513, "step": 8197 }, { "epoch": 1.4624029970564625, "grad_norm": 0.4904074966907501, "learning_rate": 8.390088775983906e-05, "loss": 0.566, "step": 8198 }, { "epoch": 1.4625813932744625, "grad_norm": 0.48788025975227356, "learning_rate": 8.384853111290352e-05, "loss": 0.6323, "step": 8199 }, { "epoch": 1.4627597894924627, "grad_norm": 0.455021470785141, "learning_rate": 8.379618751543274e-05, "loss": 0.5147, "step": 8200 }, { "epoch": 1.4629381857104629, "grad_norm": 0.47679826617240906, "learning_rate": 8.374385697153791e-05, "loss": 0.6236, "step": 8201 }, { "epoch": 1.463116581928463, "grad_norm": 0.5282472968101501, "learning_rate": 8.369153948532907e-05, "loss": 0.6563, "step": 8202 }, { "epoch": 1.4632949781464633, "grad_norm": 0.5126857757568359, "learning_rate": 8.363923506091506e-05, "loss": 0.5623, "step": 8203 }, { "epoch": 1.4634733743644635, "grad_norm": 0.476513147354126, "learning_rate": 8.358694370240402e-05, "loss": 0.443, "step": 8204 }, { "epoch": 1.4636517705824637, "grad_norm": 0.5799571871757507, "learning_rate": 8.353466541390273e-05, "loss": 0.5796, "step": 8205 }, { "epoch": 1.4638301668004639, "grad_norm": 0.41350480914115906, "learning_rate": 8.348240019951728e-05, "loss": 0.3979, "step": 8206 }, { "epoch": 1.464008563018464, "grad_norm": 0.47292932868003845, "learning_rate": 8.343014806335245e-05, "loss": 0.4637, "step": 8207 }, { "epoch": 1.4641869592364642, "grad_norm": 0.5536729693412781, "learning_rate": 8.3377909009512e-05, "loss": 0.6503, "step": 8208 }, { "epoch": 1.4643653554544644, "grad_norm": 0.5005719065666199, "learning_rate": 8.3325683042099e-05, "loss": 0.4994, "step": 8209 }, { "epoch": 1.4645437516724646, "grad_norm": 0.47958654165267944, "learning_rate": 8.327347016521503e-05, "loss": 0.5329, "step": 8210 }, { "epoch": 1.4647221478904648, "grad_norm": 0.5664541125297546, "learning_rate": 8.322127038296104e-05, "loss": 0.766, "step": 8211 }, { "epoch": 1.4649005441084648, "grad_norm": 0.5163717865943909, "learning_rate": 8.316908369943663e-05, "loss": 0.5823, "step": 8212 }, { "epoch": 1.465078940326465, "grad_norm": 0.4811464548110962, "learning_rate": 8.311691011874067e-05, "loss": 0.55, "step": 8213 }, { "epoch": 1.4652573365444652, "grad_norm": 0.4442557990550995, "learning_rate": 8.306474964497076e-05, "loss": 0.4014, "step": 8214 }, { "epoch": 1.4654357327624654, "grad_norm": 0.5611222982406616, "learning_rate": 8.301260228222351e-05, "loss": 0.7519, "step": 8215 }, { "epoch": 1.4656141289804656, "grad_norm": 0.4814377427101135, "learning_rate": 8.29604680345947e-05, "loss": 0.5494, "step": 8216 }, { "epoch": 1.4657925251984658, "grad_norm": 0.42738616466522217, "learning_rate": 8.290834690617868e-05, "loss": 0.5058, "step": 8217 }, { "epoch": 1.465970921416466, "grad_norm": 0.6096622347831726, "learning_rate": 8.285623890106936e-05, "loss": 0.573, "step": 8218 }, { "epoch": 1.4661493176344662, "grad_norm": 0.43758609890937805, "learning_rate": 8.280414402335909e-05, "loss": 0.4614, "step": 8219 }, { "epoch": 1.4663277138524664, "grad_norm": 0.48316100239753723, "learning_rate": 8.275206227713936e-05, "loss": 0.5601, "step": 8220 }, { "epoch": 1.4665061100704664, "grad_norm": 0.5487798452377319, "learning_rate": 8.269999366650071e-05, "loss": 0.7356, "step": 8221 }, { "epoch": 1.4666845062884666, "grad_norm": 0.4505595862865448, "learning_rate": 8.264793819553252e-05, "loss": 0.4967, "step": 8222 }, { "epoch": 1.4668629025064668, "grad_norm": 0.46539705991744995, "learning_rate": 8.259589586832331e-05, "loss": 0.4694, "step": 8223 }, { "epoch": 1.467041298724467, "grad_norm": 0.5119168758392334, "learning_rate": 8.254386668896033e-05, "loss": 0.5304, "step": 8224 }, { "epoch": 1.4672196949424672, "grad_norm": 0.494151771068573, "learning_rate": 8.249185066153006e-05, "loss": 0.5928, "step": 8225 }, { "epoch": 1.4673980911604674, "grad_norm": 0.4593886137008667, "learning_rate": 8.243984779011779e-05, "loss": 0.5024, "step": 8226 }, { "epoch": 1.4675764873784676, "grad_norm": 0.46240317821502686, "learning_rate": 8.238785807880767e-05, "loss": 0.534, "step": 8227 }, { "epoch": 1.4677548835964678, "grad_norm": 0.49743345379829407, "learning_rate": 8.233588153168312e-05, "loss": 0.5577, "step": 8228 }, { "epoch": 1.467933279814468, "grad_norm": 0.43200600147247314, "learning_rate": 8.228391815282619e-05, "loss": 0.3676, "step": 8229 }, { "epoch": 1.4681116760324682, "grad_norm": 0.46943768858909607, "learning_rate": 8.223196794631826e-05, "loss": 0.4637, "step": 8230 }, { "epoch": 1.4682900722504684, "grad_norm": 0.5156434178352356, "learning_rate": 8.218003091623927e-05, "loss": 0.6567, "step": 8231 }, { "epoch": 1.4684684684684686, "grad_norm": 0.46780145168304443, "learning_rate": 8.212810706666846e-05, "loss": 0.5733, "step": 8232 }, { "epoch": 1.4686468646864688, "grad_norm": 0.5769681334495544, "learning_rate": 8.207619640168393e-05, "loss": 0.8275, "step": 8233 }, { "epoch": 1.4688252609044687, "grad_norm": 0.4969267249107361, "learning_rate": 8.202429892536261e-05, "loss": 0.5933, "step": 8234 }, { "epoch": 1.469003657122469, "grad_norm": 0.5537621378898621, "learning_rate": 8.197241464178065e-05, "loss": 0.7404, "step": 8235 }, { "epoch": 1.4691820533404691, "grad_norm": 0.5195968747138977, "learning_rate": 8.192054355501282e-05, "loss": 0.6725, "step": 8236 }, { "epoch": 1.4693604495584693, "grad_norm": 0.47820261120796204, "learning_rate": 8.18686856691333e-05, "loss": 0.6093, "step": 8237 }, { "epoch": 1.4695388457764695, "grad_norm": 0.45387136936187744, "learning_rate": 8.181684098821474e-05, "loss": 0.552, "step": 8238 }, { "epoch": 1.4697172419944697, "grad_norm": 0.49222466349601746, "learning_rate": 8.17650095163292e-05, "loss": 0.6341, "step": 8239 }, { "epoch": 1.46989563821247, "grad_norm": 0.47406187653541565, "learning_rate": 8.171319125754745e-05, "loss": 0.5789, "step": 8240 }, { "epoch": 1.4700740344304701, "grad_norm": 0.49534159898757935, "learning_rate": 8.16613862159391e-05, "loss": 0.5961, "step": 8241 }, { "epoch": 1.4702524306484703, "grad_norm": 0.5249592661857605, "learning_rate": 8.160959439557316e-05, "loss": 0.6454, "step": 8242 }, { "epoch": 1.4704308268664703, "grad_norm": 0.4379402697086334, "learning_rate": 8.155781580051714e-05, "loss": 0.4289, "step": 8243 }, { "epoch": 1.4706092230844705, "grad_norm": 0.4104413092136383, "learning_rate": 8.150605043483783e-05, "loss": 0.4201, "step": 8244 }, { "epoch": 1.4707876193024707, "grad_norm": 0.5442726612091064, "learning_rate": 8.145429830260073e-05, "loss": 0.7512, "step": 8245 }, { "epoch": 1.470966015520471, "grad_norm": 0.47776398062705994, "learning_rate": 8.140255940787059e-05, "loss": 0.5361, "step": 8246 }, { "epoch": 1.471144411738471, "grad_norm": 0.49612924456596375, "learning_rate": 8.13508337547108e-05, "loss": 0.6154, "step": 8247 }, { "epoch": 1.4713228079564713, "grad_norm": 0.45897966623306274, "learning_rate": 8.129912134718398e-05, "loss": 0.4906, "step": 8248 }, { "epoch": 1.4715012041744715, "grad_norm": 0.5512779355049133, "learning_rate": 8.124742218935164e-05, "loss": 0.739, "step": 8249 }, { "epoch": 1.4716796003924717, "grad_norm": 0.5072031617164612, "learning_rate": 8.119573628527404e-05, "loss": 0.5826, "step": 8250 }, { "epoch": 1.471857996610472, "grad_norm": 0.47292739152908325, "learning_rate": 8.114406363901078e-05, "loss": 0.4861, "step": 8251 }, { "epoch": 1.472036392828472, "grad_norm": 0.448589026927948, "learning_rate": 8.109240425462008e-05, "loss": 0.4831, "step": 8252 }, { "epoch": 1.4722147890464723, "grad_norm": 0.4658273756504059, "learning_rate": 8.104075813615918e-05, "loss": 0.5328, "step": 8253 }, { "epoch": 1.4723931852644725, "grad_norm": 0.5529447197914124, "learning_rate": 8.098912528768452e-05, "loss": 0.6661, "step": 8254 }, { "epoch": 1.4725715814824727, "grad_norm": 0.46816307306289673, "learning_rate": 8.093750571325112e-05, "loss": 0.4844, "step": 8255 }, { "epoch": 1.4727499777004727, "grad_norm": 0.5499048829078674, "learning_rate": 8.088589941691338e-05, "loss": 0.5624, "step": 8256 }, { "epoch": 1.4729283739184729, "grad_norm": 0.5012791752815247, "learning_rate": 8.083430640272424e-05, "loss": 0.5128, "step": 8257 }, { "epoch": 1.473106770136473, "grad_norm": 0.5420527458190918, "learning_rate": 8.078272667473593e-05, "loss": 0.5709, "step": 8258 }, { "epoch": 1.4732851663544733, "grad_norm": 0.4874211549758911, "learning_rate": 8.073116023699939e-05, "loss": 0.5259, "step": 8259 }, { "epoch": 1.4734635625724735, "grad_norm": 0.5653133988380432, "learning_rate": 8.067960709356478e-05, "loss": 0.6445, "step": 8260 }, { "epoch": 1.4736419587904737, "grad_norm": 0.4977559745311737, "learning_rate": 8.062806724848093e-05, "loss": 0.5627, "step": 8261 }, { "epoch": 1.4738203550084739, "grad_norm": 0.5231120586395264, "learning_rate": 8.057654070579573e-05, "loss": 0.5859, "step": 8262 }, { "epoch": 1.473998751226474, "grad_norm": 0.5144016146659851, "learning_rate": 8.052502746955612e-05, "loss": 0.6625, "step": 8263 }, { "epoch": 1.4741771474444743, "grad_norm": 0.5300917029380798, "learning_rate": 8.04735275438079e-05, "loss": 0.6414, "step": 8264 }, { "epoch": 1.4743555436624742, "grad_norm": 0.506809651851654, "learning_rate": 8.042204093259597e-05, "loss": 0.5545, "step": 8265 }, { "epoch": 1.4745339398804744, "grad_norm": 0.4742717742919922, "learning_rate": 8.037056763996398e-05, "loss": 0.5692, "step": 8266 }, { "epoch": 1.4747123360984746, "grad_norm": 0.477117657661438, "learning_rate": 8.031910766995451e-05, "loss": 0.4557, "step": 8267 }, { "epoch": 1.4748907323164748, "grad_norm": 0.4841883182525635, "learning_rate": 8.02676610266094e-05, "loss": 0.6635, "step": 8268 }, { "epoch": 1.475069128534475, "grad_norm": 0.4822656810283661, "learning_rate": 8.021622771396905e-05, "loss": 0.557, "step": 8269 }, { "epoch": 1.4752475247524752, "grad_norm": 0.49790987372398376, "learning_rate": 8.01648077360732e-05, "loss": 0.6972, "step": 8270 }, { "epoch": 1.4754259209704754, "grad_norm": 0.5201764702796936, "learning_rate": 8.01134010969602e-05, "loss": 0.6559, "step": 8271 }, { "epoch": 1.4756043171884756, "grad_norm": 0.4643043577671051, "learning_rate": 8.006200780066763e-05, "loss": 0.4562, "step": 8272 }, { "epoch": 1.4757827134064758, "grad_norm": 0.4936201870441437, "learning_rate": 8.001062785123184e-05, "loss": 0.519, "step": 8273 }, { "epoch": 1.475961109624476, "grad_norm": 0.5245823860168457, "learning_rate": 7.995926125268813e-05, "loss": 0.7527, "step": 8274 }, { "epoch": 1.4761395058424762, "grad_norm": 0.45136356353759766, "learning_rate": 7.99079080090709e-05, "loss": 0.4744, "step": 8275 }, { "epoch": 1.4763179020604764, "grad_norm": 0.5103075504302979, "learning_rate": 7.985656812441336e-05, "loss": 0.5456, "step": 8276 }, { "epoch": 1.4764962982784766, "grad_norm": 0.5783809423446655, "learning_rate": 7.980524160274776e-05, "loss": 0.6718, "step": 8277 }, { "epoch": 1.4766746944964766, "grad_norm": 0.41144779324531555, "learning_rate": 7.975392844810523e-05, "loss": 0.4706, "step": 8278 }, { "epoch": 1.4768530907144768, "grad_norm": 0.5507053732872009, "learning_rate": 7.970262866451583e-05, "loss": 0.6462, "step": 8279 }, { "epoch": 1.477031486932477, "grad_norm": 0.5084280967712402, "learning_rate": 7.965134225600881e-05, "loss": 0.5376, "step": 8280 }, { "epoch": 1.4772098831504772, "grad_norm": 0.46495214104652405, "learning_rate": 7.960006922661197e-05, "loss": 0.4167, "step": 8281 }, { "epoch": 1.4773882793684774, "grad_norm": 0.54592365026474, "learning_rate": 7.954880958035245e-05, "loss": 0.6447, "step": 8282 }, { "epoch": 1.4775666755864776, "grad_norm": 0.588589608669281, "learning_rate": 7.949756332125599e-05, "loss": 0.7427, "step": 8283 }, { "epoch": 1.4777450718044778, "grad_norm": 0.5428383350372314, "learning_rate": 7.944633045334763e-05, "loss": 0.628, "step": 8284 }, { "epoch": 1.477923468022478, "grad_norm": 0.45825469493865967, "learning_rate": 7.939511098065097e-05, "loss": 0.4774, "step": 8285 }, { "epoch": 1.4781018642404782, "grad_norm": 0.591995358467102, "learning_rate": 7.934390490718898e-05, "loss": 0.5751, "step": 8286 }, { "epoch": 1.4782802604584782, "grad_norm": 0.5302790403366089, "learning_rate": 7.929271223698326e-05, "loss": 0.5787, "step": 8287 }, { "epoch": 1.4784586566764784, "grad_norm": 0.5035462379455566, "learning_rate": 7.924153297405437e-05, "loss": 0.5414, "step": 8288 }, { "epoch": 1.4786370528944786, "grad_norm": 0.4451696574687958, "learning_rate": 7.919036712242205e-05, "loss": 0.4648, "step": 8289 }, { "epoch": 1.4788154491124788, "grad_norm": 0.5414475798606873, "learning_rate": 7.913921468610477e-05, "loss": 0.7727, "step": 8290 }, { "epoch": 1.478993845330479, "grad_norm": 0.44653692841529846, "learning_rate": 7.908807566912007e-05, "loss": 0.5023, "step": 8291 }, { "epoch": 1.4791722415484791, "grad_norm": 0.4529421925544739, "learning_rate": 7.903695007548432e-05, "loss": 0.534, "step": 8292 }, { "epoch": 1.4793506377664793, "grad_norm": 0.5018486380577087, "learning_rate": 7.898583790921291e-05, "loss": 0.514, "step": 8293 }, { "epoch": 1.4795290339844795, "grad_norm": 0.4705956280231476, "learning_rate": 7.893473917432029e-05, "loss": 0.5485, "step": 8294 }, { "epoch": 1.4797074302024797, "grad_norm": 0.4740748107433319, "learning_rate": 7.888365387481955e-05, "loss": 0.5393, "step": 8295 }, { "epoch": 1.47988582642048, "grad_norm": 0.5045658349990845, "learning_rate": 7.88325820147231e-05, "loss": 0.5581, "step": 8296 }, { "epoch": 1.4800642226384801, "grad_norm": 0.48949524760246277, "learning_rate": 7.87815235980419e-05, "loss": 0.5983, "step": 8297 }, { "epoch": 1.4802426188564803, "grad_norm": 0.47559529542922974, "learning_rate": 7.873047862878624e-05, "loss": 0.566, "step": 8298 }, { "epoch": 1.4804210150744805, "grad_norm": 0.49631282687187195, "learning_rate": 7.867944711096508e-05, "loss": 0.5443, "step": 8299 }, { "epoch": 1.4805994112924805, "grad_norm": 0.508677065372467, "learning_rate": 7.862842904858633e-05, "loss": 0.602, "step": 8300 }, { "epoch": 1.4807778075104807, "grad_norm": 0.5043870806694031, "learning_rate": 7.857742444565713e-05, "loss": 0.5086, "step": 8301 }, { "epoch": 1.480956203728481, "grad_norm": 0.5479679107666016, "learning_rate": 7.852643330618314e-05, "loss": 0.6697, "step": 8302 }, { "epoch": 1.4811345999464811, "grad_norm": 0.5437744855880737, "learning_rate": 7.847545563416936e-05, "loss": 0.6367, "step": 8303 }, { "epoch": 1.4813129961644813, "grad_norm": 0.5325936675071716, "learning_rate": 7.842449143361943e-05, "loss": 0.6722, "step": 8304 }, { "epoch": 1.4814913923824815, "grad_norm": 0.48976537585258484, "learning_rate": 7.837354070853616e-05, "loss": 0.6325, "step": 8305 }, { "epoch": 1.4816697886004817, "grad_norm": 0.5160368084907532, "learning_rate": 7.832260346292117e-05, "loss": 0.5519, "step": 8306 }, { "epoch": 1.481848184818482, "grad_norm": 0.49977096915245056, "learning_rate": 7.827167970077492e-05, "loss": 0.5907, "step": 8307 }, { "epoch": 1.482026581036482, "grad_norm": 0.47831645607948303, "learning_rate": 7.822076942609707e-05, "loss": 0.5587, "step": 8308 }, { "epoch": 1.482204977254482, "grad_norm": 0.496364951133728, "learning_rate": 7.816987264288606e-05, "loss": 0.5853, "step": 8309 }, { "epoch": 1.4823833734724823, "grad_norm": 0.45871299505233765, "learning_rate": 7.811898935513936e-05, "loss": 0.4607, "step": 8310 }, { "epoch": 1.4825617696904825, "grad_norm": 0.4363771677017212, "learning_rate": 7.806811956685331e-05, "loss": 0.5019, "step": 8311 }, { "epoch": 1.4827401659084827, "grad_norm": 0.5848436951637268, "learning_rate": 7.801726328202305e-05, "loss": 0.6789, "step": 8312 }, { "epoch": 1.4829185621264829, "grad_norm": 0.4620121121406555, "learning_rate": 7.796642050464303e-05, "loss": 0.5479, "step": 8313 }, { "epoch": 1.483096958344483, "grad_norm": 0.5338596701622009, "learning_rate": 7.79155912387062e-05, "loss": 0.5664, "step": 8314 }, { "epoch": 1.4832753545624833, "grad_norm": 0.4988861382007599, "learning_rate": 7.786477548820489e-05, "loss": 0.5782, "step": 8315 }, { "epoch": 1.4834537507804835, "grad_norm": 0.6170352101325989, "learning_rate": 7.781397325712994e-05, "loss": 0.5044, "step": 8316 }, { "epoch": 1.4836321469984837, "grad_norm": 0.5399267077445984, "learning_rate": 7.776318454947154e-05, "loss": 0.6209, "step": 8317 }, { "epoch": 1.4838105432164839, "grad_norm": 0.5562114715576172, "learning_rate": 7.771240936921839e-05, "loss": 0.7107, "step": 8318 }, { "epoch": 1.483988939434484, "grad_norm": 0.46948572993278503, "learning_rate": 7.766164772035855e-05, "loss": 0.5113, "step": 8319 }, { "epoch": 1.4841673356524843, "grad_norm": 0.5978415012359619, "learning_rate": 7.761089960687876e-05, "loss": 0.6767, "step": 8320 }, { "epoch": 1.4843457318704845, "grad_norm": 0.4891349673271179, "learning_rate": 7.756016503276464e-05, "loss": 0.5338, "step": 8321 }, { "epoch": 1.4845241280884847, "grad_norm": 0.45725369453430176, "learning_rate": 7.750944400200102e-05, "loss": 0.4976, "step": 8322 }, { "epoch": 1.4847025243064846, "grad_norm": 0.42975348234176636, "learning_rate": 7.745873651857138e-05, "loss": 0.4602, "step": 8323 }, { "epoch": 1.4848809205244848, "grad_norm": 0.4848777949810028, "learning_rate": 7.740804258645831e-05, "loss": 0.6199, "step": 8324 }, { "epoch": 1.485059316742485, "grad_norm": 0.4301760792732239, "learning_rate": 7.735736220964337e-05, "loss": 0.4882, "step": 8325 }, { "epoch": 1.4852377129604852, "grad_norm": 0.4948813021183014, "learning_rate": 7.730669539210686e-05, "loss": 0.6615, "step": 8326 }, { "epoch": 1.4854161091784854, "grad_norm": 0.4712623059749603, "learning_rate": 7.725604213782824e-05, "loss": 0.5958, "step": 8327 }, { "epoch": 1.4855945053964856, "grad_norm": 0.484377384185791, "learning_rate": 7.720540245078567e-05, "loss": 0.6291, "step": 8328 }, { "epoch": 1.4857729016144858, "grad_norm": 0.5532923340797424, "learning_rate": 7.715477633495649e-05, "loss": 0.712, "step": 8329 }, { "epoch": 1.485951297832486, "grad_norm": 0.4761124849319458, "learning_rate": 7.71041637943167e-05, "loss": 0.4219, "step": 8330 }, { "epoch": 1.486129694050486, "grad_norm": 0.4717256724834442, "learning_rate": 7.70535648328416e-05, "loss": 0.5188, "step": 8331 }, { "epoch": 1.4863080902684862, "grad_norm": 0.4593075215816498, "learning_rate": 7.700297945450507e-05, "loss": 0.4863, "step": 8332 }, { "epoch": 1.4864864864864864, "grad_norm": 0.4873063266277313, "learning_rate": 7.695240766328002e-05, "loss": 0.5419, "step": 8333 }, { "epoch": 1.4866648827044866, "grad_norm": 0.5233901143074036, "learning_rate": 7.690184946313846e-05, "loss": 0.5865, "step": 8334 }, { "epoch": 1.4868432789224868, "grad_norm": 0.5119859576225281, "learning_rate": 7.685130485805112e-05, "loss": 0.5873, "step": 8335 }, { "epoch": 1.487021675140487, "grad_norm": 0.4452389180660248, "learning_rate": 7.680077385198783e-05, "loss": 0.3941, "step": 8336 }, { "epoch": 1.4872000713584872, "grad_norm": 0.49985215067863464, "learning_rate": 7.675025644891714e-05, "loss": 0.586, "step": 8337 }, { "epoch": 1.4873784675764874, "grad_norm": 0.5451793074607849, "learning_rate": 7.669975265280688e-05, "loss": 0.597, "step": 8338 }, { "epoch": 1.4875568637944876, "grad_norm": 0.47509869933128357, "learning_rate": 7.664926246762335e-05, "loss": 0.5232, "step": 8339 }, { "epoch": 1.4877352600124878, "grad_norm": 0.44898343086242676, "learning_rate": 7.659878589733216e-05, "loss": 0.4391, "step": 8340 }, { "epoch": 1.487913656230488, "grad_norm": 0.5049835443496704, "learning_rate": 7.654832294589775e-05, "loss": 0.5392, "step": 8341 }, { "epoch": 1.4880920524484882, "grad_norm": 0.5432078242301941, "learning_rate": 7.649787361728338e-05, "loss": 0.694, "step": 8342 }, { "epoch": 1.4882704486664884, "grad_norm": 0.4939239025115967, "learning_rate": 7.644743791545141e-05, "loss": 0.4921, "step": 8343 }, { "epoch": 1.4884488448844886, "grad_norm": 0.43726587295532227, "learning_rate": 7.639701584436292e-05, "loss": 0.4466, "step": 8344 }, { "epoch": 1.4886272411024886, "grad_norm": 0.48199325799942017, "learning_rate": 7.634660740797817e-05, "loss": 0.5736, "step": 8345 }, { "epoch": 1.4888056373204888, "grad_norm": 0.4477761387825012, "learning_rate": 7.629621261025613e-05, "loss": 0.4244, "step": 8346 }, { "epoch": 1.488984033538489, "grad_norm": 0.564702570438385, "learning_rate": 7.624583145515474e-05, "loss": 0.7607, "step": 8347 }, { "epoch": 1.4891624297564892, "grad_norm": 0.4603510797023773, "learning_rate": 7.619546394663104e-05, "loss": 0.4789, "step": 8348 }, { "epoch": 1.4893408259744894, "grad_norm": 0.4973684549331665, "learning_rate": 7.614511008864073e-05, "loss": 0.5349, "step": 8349 }, { "epoch": 1.4895192221924896, "grad_norm": 0.4872196316719055, "learning_rate": 7.609476988513875e-05, "loss": 0.5265, "step": 8350 }, { "epoch": 1.4896976184104898, "grad_norm": 0.5209683775901794, "learning_rate": 7.604444334007862e-05, "loss": 0.6186, "step": 8351 }, { "epoch": 1.48987601462849, "grad_norm": 0.48873645067214966, "learning_rate": 7.599413045741313e-05, "loss": 0.5037, "step": 8352 }, { "epoch": 1.49005441084649, "grad_norm": 0.45188388228416443, "learning_rate": 7.594383124109375e-05, "loss": 0.4037, "step": 8353 }, { "epoch": 1.4902328070644901, "grad_norm": 0.5492420196533203, "learning_rate": 7.589354569507081e-05, "loss": 0.589, "step": 8354 }, { "epoch": 1.4904112032824903, "grad_norm": 0.532741904258728, "learning_rate": 7.584327382329401e-05, "loss": 0.6725, "step": 8355 }, { "epoch": 1.4905895995004905, "grad_norm": 0.5740191340446472, "learning_rate": 7.579301562971147e-05, "loss": 0.6417, "step": 8356 }, { "epoch": 1.4907679957184907, "grad_norm": 0.4454120099544525, "learning_rate": 7.57427711182706e-05, "loss": 0.397, "step": 8357 }, { "epoch": 1.490946391936491, "grad_norm": 0.49578696489334106, "learning_rate": 7.56925402929175e-05, "loss": 0.4834, "step": 8358 }, { "epoch": 1.4911247881544911, "grad_norm": 0.5603286623954773, "learning_rate": 7.564232315759718e-05, "loss": 0.4489, "step": 8359 }, { "epoch": 1.4913031843724913, "grad_norm": 0.5061764717102051, "learning_rate": 7.559211971625385e-05, "loss": 0.5227, "step": 8360 }, { "epoch": 1.4914815805904915, "grad_norm": 0.48216429352760315, "learning_rate": 7.554192997283033e-05, "loss": 0.605, "step": 8361 }, { "epoch": 1.4916599768084917, "grad_norm": 0.47625431418418884, "learning_rate": 7.549175393126861e-05, "loss": 0.4631, "step": 8362 }, { "epoch": 1.491838373026492, "grad_norm": 0.5075172781944275, "learning_rate": 7.544159159550937e-05, "loss": 0.5409, "step": 8363 }, { "epoch": 1.4920167692444921, "grad_norm": 0.5858179330825806, "learning_rate": 7.539144296949246e-05, "loss": 0.6648, "step": 8364 }, { "epoch": 1.4921951654624923, "grad_norm": 0.47895708680152893, "learning_rate": 7.534130805715644e-05, "loss": 0.5248, "step": 8365 }, { "epoch": 1.4923735616804925, "grad_norm": 0.4535577893257141, "learning_rate": 7.529118686243897e-05, "loss": 0.5158, "step": 8366 }, { "epoch": 1.4925519578984925, "grad_norm": 0.4920910894870758, "learning_rate": 7.524107938927652e-05, "loss": 0.5183, "step": 8367 }, { "epoch": 1.4927303541164927, "grad_norm": 0.49673569202423096, "learning_rate": 7.51909856416044e-05, "loss": 0.5476, "step": 8368 }, { "epoch": 1.4929087503344929, "grad_norm": 0.5008490681648254, "learning_rate": 7.514090562335712e-05, "loss": 0.5373, "step": 8369 }, { "epoch": 1.493087146552493, "grad_norm": 0.46472302079200745, "learning_rate": 7.50908393384678e-05, "loss": 0.4453, "step": 8370 }, { "epoch": 1.4932655427704933, "grad_norm": 0.4681701362133026, "learning_rate": 7.504078679086868e-05, "loss": 0.5433, "step": 8371 }, { "epoch": 1.4934439389884935, "grad_norm": 0.48634573817253113, "learning_rate": 7.499074798449095e-05, "loss": 0.4632, "step": 8372 }, { "epoch": 1.4936223352064937, "grad_norm": 0.5008024573326111, "learning_rate": 7.494072292326448e-05, "loss": 0.5638, "step": 8373 }, { "epoch": 1.4938007314244939, "grad_norm": 0.5322297811508179, "learning_rate": 7.48907116111184e-05, "loss": 0.642, "step": 8374 }, { "epoch": 1.4939791276424939, "grad_norm": 0.5250205993652344, "learning_rate": 7.484071405198037e-05, "loss": 0.6071, "step": 8375 }, { "epoch": 1.494157523860494, "grad_norm": 0.5197263360023499, "learning_rate": 7.479073024977736e-05, "loss": 0.6804, "step": 8376 }, { "epoch": 1.4943359200784943, "grad_norm": 0.4836525022983551, "learning_rate": 7.474076020843496e-05, "loss": 0.5058, "step": 8377 }, { "epoch": 1.4945143162964944, "grad_norm": 0.5223329663276672, "learning_rate": 7.469080393187786e-05, "loss": 0.6614, "step": 8378 }, { "epoch": 1.4946927125144946, "grad_norm": 0.4857519865036011, "learning_rate": 7.464086142402959e-05, "loss": 0.5892, "step": 8379 }, { "epoch": 1.4948711087324948, "grad_norm": 0.4820669889450073, "learning_rate": 7.459093268881254e-05, "loss": 0.5444, "step": 8380 }, { "epoch": 1.495049504950495, "grad_norm": 0.4609326720237732, "learning_rate": 7.45410177301482e-05, "loss": 0.4923, "step": 8381 }, { "epoch": 1.4952279011684952, "grad_norm": 0.47613245248794556, "learning_rate": 7.449111655195678e-05, "loss": 0.5083, "step": 8382 }, { "epoch": 1.4954062973864954, "grad_norm": 0.496552973985672, "learning_rate": 7.444122915815759e-05, "loss": 0.5721, "step": 8383 }, { "epoch": 1.4955846936044956, "grad_norm": 0.493670254945755, "learning_rate": 7.439135555266866e-05, "loss": 0.461, "step": 8384 }, { "epoch": 1.4957630898224958, "grad_norm": 0.5373759269714355, "learning_rate": 7.434149573940707e-05, "loss": 0.5685, "step": 8385 }, { "epoch": 1.495941486040496, "grad_norm": 0.5208349227905273, "learning_rate": 7.429164972228891e-05, "loss": 0.5965, "step": 8386 }, { "epoch": 1.4961198822584962, "grad_norm": 0.5484915971755981, "learning_rate": 7.424181750522887e-05, "loss": 0.5944, "step": 8387 }, { "epoch": 1.4962982784764964, "grad_norm": 0.5299747586250305, "learning_rate": 7.419199909214095e-05, "loss": 0.5318, "step": 8388 }, { "epoch": 1.4964766746944964, "grad_norm": 0.5350375175476074, "learning_rate": 7.414219448693769e-05, "loss": 0.6534, "step": 8389 }, { "epoch": 1.4966550709124966, "grad_norm": 0.5854753255844116, "learning_rate": 7.409240369353084e-05, "loss": 0.7582, "step": 8390 }, { "epoch": 1.4968334671304968, "grad_norm": 0.5602213144302368, "learning_rate": 7.404262671583092e-05, "loss": 0.583, "step": 8391 }, { "epoch": 1.497011863348497, "grad_norm": 0.4914671778678894, "learning_rate": 7.399286355774732e-05, "loss": 0.5397, "step": 8392 }, { "epoch": 1.4971902595664972, "grad_norm": 0.47588059306144714, "learning_rate": 7.394311422318853e-05, "loss": 0.4886, "step": 8393 }, { "epoch": 1.4973686557844974, "grad_norm": 0.5675820708274841, "learning_rate": 7.389337871606172e-05, "loss": 0.6568, "step": 8394 }, { "epoch": 1.4975470520024976, "grad_norm": 0.508738100528717, "learning_rate": 7.384365704027321e-05, "loss": 0.5592, "step": 8395 }, { "epoch": 1.4977254482204978, "grad_norm": 0.4801078140735626, "learning_rate": 7.379394919972804e-05, "loss": 0.4698, "step": 8396 }, { "epoch": 1.4979038444384978, "grad_norm": 0.48387011885643005, "learning_rate": 7.374425519833031e-05, "loss": 0.551, "step": 8397 }, { "epoch": 1.498082240656498, "grad_norm": 0.5585319399833679, "learning_rate": 7.369457503998286e-05, "loss": 0.7281, "step": 8398 }, { "epoch": 1.4982606368744982, "grad_norm": 0.5239479541778564, "learning_rate": 7.36449087285877e-05, "loss": 0.6676, "step": 8399 }, { "epoch": 1.4984390330924984, "grad_norm": 0.550262451171875, "learning_rate": 7.359525626804544e-05, "loss": 0.7478, "step": 8400 }, { "epoch": 1.4986174293104986, "grad_norm": 0.45570600032806396, "learning_rate": 7.354561766225584e-05, "loss": 0.5162, "step": 8401 }, { "epoch": 1.4987958255284988, "grad_norm": 0.43634846806526184, "learning_rate": 7.349599291511757e-05, "loss": 0.4103, "step": 8402 }, { "epoch": 1.498974221746499, "grad_norm": 0.5149981379508972, "learning_rate": 7.344638203052798e-05, "loss": 0.594, "step": 8403 }, { "epoch": 1.4991526179644992, "grad_norm": 0.4976111054420471, "learning_rate": 7.339678501238364e-05, "loss": 0.5622, "step": 8404 }, { "epoch": 1.4993310141824994, "grad_norm": 0.5391610860824585, "learning_rate": 7.334720186457982e-05, "loss": 0.6742, "step": 8405 }, { "epoch": 1.4995094104004996, "grad_norm": 0.4595610499382019, "learning_rate": 7.329763259101069e-05, "loss": 0.4807, "step": 8406 }, { "epoch": 1.4996878066184998, "grad_norm": 0.49623727798461914, "learning_rate": 7.32480771955695e-05, "loss": 0.6094, "step": 8407 }, { "epoch": 1.4998662028365, "grad_norm": 0.47364911437034607, "learning_rate": 7.319853568214818e-05, "loss": 0.5401, "step": 8408 }, { "epoch": 1.5000445990545002, "grad_norm": 0.4699852764606476, "learning_rate": 7.314900805463789e-05, "loss": 0.4449, "step": 8409 }, { "epoch": 1.5002229952725004, "grad_norm": 0.4462581276893616, "learning_rate": 7.30994943169283e-05, "loss": 0.4451, "step": 8410 }, { "epoch": 1.5004013914905006, "grad_norm": 0.5866194367408752, "learning_rate": 7.304999447290838e-05, "loss": 0.6961, "step": 8411 }, { "epoch": 1.5005797877085005, "grad_norm": 0.4362695515155792, "learning_rate": 7.300050852646578e-05, "loss": 0.3869, "step": 8412 }, { "epoch": 1.5007581839265007, "grad_norm": 0.6117103099822998, "learning_rate": 7.295103648148697e-05, "loss": 0.6624, "step": 8413 }, { "epoch": 1.500936580144501, "grad_norm": 0.49935221672058105, "learning_rate": 7.290157834185763e-05, "loss": 0.4923, "step": 8414 }, { "epoch": 1.5011149763625011, "grad_norm": 0.5208079218864441, "learning_rate": 7.285213411146205e-05, "loss": 0.4946, "step": 8415 }, { "epoch": 1.5012933725805013, "grad_norm": 0.6199221611022949, "learning_rate": 7.280270379418363e-05, "loss": 0.6581, "step": 8416 }, { "epoch": 1.5014717687985015, "grad_norm": 0.49500852823257446, "learning_rate": 7.275328739390465e-05, "loss": 0.4407, "step": 8417 }, { "epoch": 1.5016501650165015, "grad_norm": 0.4843553602695465, "learning_rate": 7.270388491450616e-05, "loss": 0.5689, "step": 8418 }, { "epoch": 1.5018285612345017, "grad_norm": 0.5777263045310974, "learning_rate": 7.265449635986831e-05, "loss": 0.5386, "step": 8419 }, { "epoch": 1.502006957452502, "grad_norm": 0.5656114220619202, "learning_rate": 7.260512173386993e-05, "loss": 0.6281, "step": 8420 }, { "epoch": 1.502185353670502, "grad_norm": 0.48054200410842896, "learning_rate": 7.255576104038902e-05, "loss": 0.5009, "step": 8421 }, { "epoch": 1.5023637498885023, "grad_norm": 0.47279128432273865, "learning_rate": 7.25064142833022e-05, "loss": 0.4902, "step": 8422 }, { "epoch": 1.5025421461065025, "grad_norm": 0.5290525555610657, "learning_rate": 7.24570814664853e-05, "loss": 0.4631, "step": 8423 }, { "epoch": 1.5027205423245027, "grad_norm": 0.46618005633354187, "learning_rate": 7.240776259381276e-05, "loss": 0.5708, "step": 8424 }, { "epoch": 1.502898938542503, "grad_norm": 0.4757915139198303, "learning_rate": 7.235845766915819e-05, "loss": 0.499, "step": 8425 }, { "epoch": 1.503077334760503, "grad_norm": 0.4191453754901886, "learning_rate": 7.23091666963939e-05, "loss": 0.3913, "step": 8426 }, { "epoch": 1.5032557309785033, "grad_norm": 0.4610666036605835, "learning_rate": 7.225988967939113e-05, "loss": 0.3548, "step": 8427 }, { "epoch": 1.5034341271965035, "grad_norm": 0.488862544298172, "learning_rate": 7.221062662202018e-05, "loss": 0.5241, "step": 8428 }, { "epoch": 1.5036125234145037, "grad_norm": 0.5716550946235657, "learning_rate": 7.216137752815005e-05, "loss": 0.7528, "step": 8429 }, { "epoch": 1.5037909196325039, "grad_norm": 0.5017661452293396, "learning_rate": 7.211214240164887e-05, "loss": 0.5129, "step": 8430 }, { "epoch": 1.503969315850504, "grad_norm": 0.4955231547355652, "learning_rate": 7.206292124638342e-05, "loss": 0.5957, "step": 8431 }, { "epoch": 1.5041477120685043, "grad_norm": 0.5126091837882996, "learning_rate": 7.201371406621954e-05, "loss": 0.6606, "step": 8432 }, { "epoch": 1.5043261082865045, "grad_norm": 0.4719708561897278, "learning_rate": 7.196452086502206e-05, "loss": 0.4889, "step": 8433 }, { "epoch": 1.5045045045045045, "grad_norm": 0.4715104401111603, "learning_rate": 7.191534164665439e-05, "loss": 0.4824, "step": 8434 }, { "epoch": 1.5046829007225047, "grad_norm": 0.49331334233283997, "learning_rate": 7.186617641497926e-05, "loss": 0.5115, "step": 8435 }, { "epoch": 1.5048612969405049, "grad_norm": 0.4969932436943054, "learning_rate": 7.181702517385788e-05, "loss": 0.5298, "step": 8436 }, { "epoch": 1.505039693158505, "grad_norm": 0.5154744386672974, "learning_rate": 7.176788792715074e-05, "loss": 0.5574, "step": 8437 }, { "epoch": 1.5052180893765053, "grad_norm": 0.5841989517211914, "learning_rate": 7.171876467871699e-05, "loss": 0.5517, "step": 8438 }, { "epoch": 1.5053964855945055, "grad_norm": 0.5319039225578308, "learning_rate": 7.166965543241466e-05, "loss": 0.7089, "step": 8439 }, { "epoch": 1.5055748818125054, "grad_norm": 0.5384335517883301, "learning_rate": 7.162056019210095e-05, "loss": 0.604, "step": 8440 }, { "epoch": 1.5057532780305056, "grad_norm": 0.48112642765045166, "learning_rate": 7.157147896163157e-05, "loss": 0.464, "step": 8441 }, { "epoch": 1.5059316742485058, "grad_norm": 0.519540011882782, "learning_rate": 7.152241174486154e-05, "loss": 0.5771, "step": 8442 }, { "epoch": 1.506110070466506, "grad_norm": 0.49812787771224976, "learning_rate": 7.147335854564444e-05, "loss": 0.5615, "step": 8443 }, { "epoch": 1.5062884666845062, "grad_norm": 0.4924754500389099, "learning_rate": 7.142431936783297e-05, "loss": 0.4846, "step": 8444 }, { "epoch": 1.5064668629025064, "grad_norm": 0.5007926225662231, "learning_rate": 7.137529421527852e-05, "loss": 0.5509, "step": 8445 }, { "epoch": 1.5066452591205066, "grad_norm": 0.4786141514778137, "learning_rate": 7.132628309183165e-05, "loss": 0.4272, "step": 8446 }, { "epoch": 1.5068236553385068, "grad_norm": 0.4895467758178711, "learning_rate": 7.127728600134164e-05, "loss": 0.4545, "step": 8447 }, { "epoch": 1.507002051556507, "grad_norm": 0.4716068506240845, "learning_rate": 7.122830294765664e-05, "loss": 0.4106, "step": 8448 }, { "epoch": 1.5071804477745072, "grad_norm": 0.5841825008392334, "learning_rate": 7.117933393462384e-05, "loss": 0.7319, "step": 8449 }, { "epoch": 1.5073588439925074, "grad_norm": 0.4899286925792694, "learning_rate": 7.113037896608923e-05, "loss": 0.4271, "step": 8450 }, { "epoch": 1.5075372402105076, "grad_norm": 0.5296789407730103, "learning_rate": 7.108143804589759e-05, "loss": 0.5624, "step": 8451 }, { "epoch": 1.5077156364285078, "grad_norm": 0.48504510521888733, "learning_rate": 7.10325111778929e-05, "loss": 0.5338, "step": 8452 }, { "epoch": 1.507894032646508, "grad_norm": 0.5016950368881226, "learning_rate": 7.098359836591764e-05, "loss": 0.5561, "step": 8453 }, { "epoch": 1.5080724288645082, "grad_norm": 0.5088602900505066, "learning_rate": 7.093469961381365e-05, "loss": 0.6215, "step": 8454 }, { "epoch": 1.5082508250825084, "grad_norm": 0.6125203371047974, "learning_rate": 7.088581492542121e-05, "loss": 0.6588, "step": 8455 }, { "epoch": 1.5084292213005084, "grad_norm": 0.8469102382659912, "learning_rate": 7.083694430457988e-05, "loss": 0.5289, "step": 8456 }, { "epoch": 1.5086076175185086, "grad_norm": 0.49744218587875366, "learning_rate": 7.078808775512774e-05, "loss": 0.6164, "step": 8457 }, { "epoch": 1.5087860137365088, "grad_norm": 0.5175665616989136, "learning_rate": 7.073924528090214e-05, "loss": 0.5147, "step": 8458 }, { "epoch": 1.508964409954509, "grad_norm": 0.492880254983902, "learning_rate": 7.06904168857391e-05, "loss": 0.5535, "step": 8459 }, { "epoch": 1.5091428061725092, "grad_norm": 0.5100347399711609, "learning_rate": 7.064160257347346e-05, "loss": 0.6079, "step": 8460 }, { "epoch": 1.5093212023905094, "grad_norm": 0.4987751543521881, "learning_rate": 7.059280234793927e-05, "loss": 0.5412, "step": 8461 }, { "epoch": 1.5094995986085094, "grad_norm": 0.4200221002101898, "learning_rate": 7.054401621296899e-05, "loss": 0.3935, "step": 8462 }, { "epoch": 1.5096779948265096, "grad_norm": 0.5284053087234497, "learning_rate": 7.049524417239465e-05, "loss": 0.6021, "step": 8463 }, { "epoch": 1.5098563910445097, "grad_norm": 0.5351274013519287, "learning_rate": 7.044648623004654e-05, "loss": 0.6754, "step": 8464 }, { "epoch": 1.51003478726251, "grad_norm": 0.5108939409255981, "learning_rate": 7.039774238975408e-05, "loss": 0.6219, "step": 8465 }, { "epoch": 1.5102131834805101, "grad_norm": 0.5333287119865417, "learning_rate": 7.034901265534571e-05, "loss": 0.581, "step": 8466 }, { "epoch": 1.5103915796985103, "grad_norm": 0.6006919145584106, "learning_rate": 7.030029703064849e-05, "loss": 0.6058, "step": 8467 }, { "epoch": 1.5105699759165105, "grad_norm": 0.4753846526145935, "learning_rate": 7.02515955194887e-05, "loss": 0.4262, "step": 8468 }, { "epoch": 1.5107483721345107, "grad_norm": 0.48568055033683777, "learning_rate": 7.020290812569119e-05, "loss": 0.4365, "step": 8469 }, { "epoch": 1.510926768352511, "grad_norm": 0.488115519285202, "learning_rate": 7.015423485307996e-05, "loss": 0.5195, "step": 8470 }, { "epoch": 1.5111051645705111, "grad_norm": 0.5295347571372986, "learning_rate": 7.010557570547774e-05, "loss": 0.5334, "step": 8471 }, { "epoch": 1.5112835607885113, "grad_norm": 0.49440649151802063, "learning_rate": 7.00569306867061e-05, "loss": 0.546, "step": 8472 }, { "epoch": 1.5114619570065115, "grad_norm": 0.5544924736022949, "learning_rate": 7.000829980058576e-05, "loss": 0.5707, "step": 8473 }, { "epoch": 1.5116403532245117, "grad_norm": 0.6297951936721802, "learning_rate": 6.995968305093603e-05, "loss": 0.5151, "step": 8474 }, { "epoch": 1.511818749442512, "grad_norm": 0.4706231653690338, "learning_rate": 6.991108044157537e-05, "loss": 0.5719, "step": 8475 }, { "epoch": 1.5119971456605121, "grad_norm": 0.5109243988990784, "learning_rate": 6.986249197632092e-05, "loss": 0.6104, "step": 8476 }, { "epoch": 1.5121755418785123, "grad_norm": 0.5036676526069641, "learning_rate": 6.981391765898881e-05, "loss": 0.6026, "step": 8477 }, { "epoch": 1.5123539380965123, "grad_norm": 0.5482192635536194, "learning_rate": 6.976535749339413e-05, "loss": 0.5634, "step": 8478 }, { "epoch": 1.5125323343145125, "grad_norm": 0.47447332739830017, "learning_rate": 6.971681148335066e-05, "loss": 0.4998, "step": 8479 }, { "epoch": 1.5127107305325127, "grad_norm": 0.46494314074516296, "learning_rate": 6.96682796326713e-05, "loss": 0.4863, "step": 8480 }, { "epoch": 1.512889126750513, "grad_norm": 0.525435745716095, "learning_rate": 6.961976194516759e-05, "loss": 0.5581, "step": 8481 }, { "epoch": 1.513067522968513, "grad_norm": 0.5048510432243347, "learning_rate": 6.95712584246502e-05, "loss": 0.5747, "step": 8482 }, { "epoch": 1.5132459191865133, "grad_norm": 0.4954485297203064, "learning_rate": 6.952276907492846e-05, "loss": 0.504, "step": 8483 }, { "epoch": 1.5134243154045133, "grad_norm": 0.5358535051345825, "learning_rate": 6.947429389981085e-05, "loss": 0.6579, "step": 8484 }, { "epoch": 1.5136027116225135, "grad_norm": 0.4358263611793518, "learning_rate": 6.942583290310453e-05, "loss": 0.4034, "step": 8485 }, { "epoch": 1.5137811078405137, "grad_norm": 0.43710726499557495, "learning_rate": 6.937738608861552e-05, "loss": 0.4148, "step": 8486 }, { "epoch": 1.5139595040585139, "grad_norm": 0.4802948832511902, "learning_rate": 6.932895346014893e-05, "loss": 0.4326, "step": 8487 }, { "epoch": 1.514137900276514, "grad_norm": 0.515270471572876, "learning_rate": 6.928053502150849e-05, "loss": 0.4963, "step": 8488 }, { "epoch": 1.5143162964945143, "grad_norm": 0.5241411924362183, "learning_rate": 6.923213077649718e-05, "loss": 0.5903, "step": 8489 }, { "epoch": 1.5144946927125145, "grad_norm": 0.5037186741828918, "learning_rate": 6.918374072891643e-05, "loss": 0.6109, "step": 8490 }, { "epoch": 1.5146730889305147, "grad_norm": 0.586534857749939, "learning_rate": 6.913536488256695e-05, "loss": 0.6432, "step": 8491 }, { "epoch": 1.5148514851485149, "grad_norm": 0.49035558104515076, "learning_rate": 6.9087003241248e-05, "loss": 0.4847, "step": 8492 }, { "epoch": 1.515029881366515, "grad_norm": 0.5941229462623596, "learning_rate": 6.903865580875795e-05, "loss": 0.7171, "step": 8493 }, { "epoch": 1.5152082775845153, "grad_norm": 0.4788690209388733, "learning_rate": 6.899032258889409e-05, "loss": 0.5057, "step": 8494 }, { "epoch": 1.5153866738025155, "grad_norm": 0.6357868909835815, "learning_rate": 6.894200358545233e-05, "loss": 0.7302, "step": 8495 }, { "epoch": 1.5155650700205157, "grad_norm": 0.5005565881729126, "learning_rate": 6.889369880222776e-05, "loss": 0.4858, "step": 8496 }, { "epoch": 1.5157434662385159, "grad_norm": 0.5777773857116699, "learning_rate": 6.884540824301416e-05, "loss": 0.6461, "step": 8497 }, { "epoch": 1.515921862456516, "grad_norm": 0.4599872827529907, "learning_rate": 6.879713191160417e-05, "loss": 0.5051, "step": 8498 }, { "epoch": 1.5161002586745163, "grad_norm": 0.44417500495910645, "learning_rate": 6.874886981178952e-05, "loss": 0.4822, "step": 8499 }, { "epoch": 1.5162786548925162, "grad_norm": 0.46541309356689453, "learning_rate": 6.870062194736057e-05, "loss": 0.4517, "step": 8500 }, { "epoch": 1.5164570511105164, "grad_norm": 0.6577228903770447, "learning_rate": 6.865238832210682e-05, "loss": 0.5325, "step": 8501 }, { "epoch": 1.5166354473285166, "grad_norm": 0.43800440430641174, "learning_rate": 6.860416893981638e-05, "loss": 0.4597, "step": 8502 }, { "epoch": 1.5168138435465168, "grad_norm": 0.46744707226753235, "learning_rate": 6.855596380427651e-05, "loss": 0.5437, "step": 8503 }, { "epoch": 1.516992239764517, "grad_norm": 0.48990002274513245, "learning_rate": 6.85077729192731e-05, "loss": 0.5202, "step": 8504 }, { "epoch": 1.5171706359825172, "grad_norm": 0.49149778485298157, "learning_rate": 6.845959628859119e-05, "loss": 0.5856, "step": 8505 }, { "epoch": 1.5173490322005172, "grad_norm": 0.5475661158561707, "learning_rate": 6.841143391601445e-05, "loss": 0.7378, "step": 8506 }, { "epoch": 1.5175274284185174, "grad_norm": 0.5574973821640015, "learning_rate": 6.836328580532547e-05, "loss": 0.5967, "step": 8507 }, { "epoch": 1.5177058246365176, "grad_norm": 0.522175133228302, "learning_rate": 6.831515196030588e-05, "loss": 0.6041, "step": 8508 }, { "epoch": 1.5178842208545178, "grad_norm": 0.5970126390457153, "learning_rate": 6.826703238473605e-05, "loss": 0.8074, "step": 8509 }, { "epoch": 1.518062617072518, "grad_norm": 0.5063888430595398, "learning_rate": 6.821892708239535e-05, "loss": 0.5915, "step": 8510 }, { "epoch": 1.5182410132905182, "grad_norm": 0.5437260866165161, "learning_rate": 6.817083605706193e-05, "loss": 0.6038, "step": 8511 }, { "epoch": 1.5184194095085184, "grad_norm": 0.49606233835220337, "learning_rate": 6.812275931251268e-05, "loss": 0.5352, "step": 8512 }, { "epoch": 1.5185978057265186, "grad_norm": 0.509730339050293, "learning_rate": 6.807469685252376e-05, "loss": 0.5743, "step": 8513 }, { "epoch": 1.5187762019445188, "grad_norm": 0.5043073296546936, "learning_rate": 6.802664868086978e-05, "loss": 0.496, "step": 8514 }, { "epoch": 1.518954598162519, "grad_norm": 0.5181002020835876, "learning_rate": 6.797861480132456e-05, "loss": 0.5839, "step": 8515 }, { "epoch": 1.5191329943805192, "grad_norm": 0.5447046756744385, "learning_rate": 6.793059521766054e-05, "loss": 0.6502, "step": 8516 }, { "epoch": 1.5193113905985194, "grad_norm": 0.5003826022148132, "learning_rate": 6.788258993364929e-05, "loss": 0.5199, "step": 8517 }, { "epoch": 1.5194897868165196, "grad_norm": 0.5281699299812317, "learning_rate": 6.783459895306107e-05, "loss": 0.6103, "step": 8518 }, { "epoch": 1.5196681830345198, "grad_norm": 0.4539282023906708, "learning_rate": 6.778662227966495e-05, "loss": 0.4092, "step": 8519 }, { "epoch": 1.51984657925252, "grad_norm": 0.49163827300071716, "learning_rate": 6.773865991722921e-05, "loss": 0.4445, "step": 8520 }, { "epoch": 1.5200249754705202, "grad_norm": 0.47441521286964417, "learning_rate": 6.76907118695206e-05, "loss": 0.4945, "step": 8521 }, { "epoch": 1.5202033716885202, "grad_norm": 0.6085272431373596, "learning_rate": 6.76427781403051e-05, "loss": 0.7252, "step": 8522 }, { "epoch": 1.5203817679065204, "grad_norm": 0.5102150440216064, "learning_rate": 6.759485873334725e-05, "loss": 0.4936, "step": 8523 }, { "epoch": 1.5205601641245206, "grad_norm": 0.48524045944213867, "learning_rate": 6.754695365241071e-05, "loss": 0.5631, "step": 8524 }, { "epoch": 1.5207385603425208, "grad_norm": 0.433088481426239, "learning_rate": 6.749906290125799e-05, "loss": 0.4493, "step": 8525 }, { "epoch": 1.520916956560521, "grad_norm": 0.5861080288887024, "learning_rate": 6.745118648365026e-05, "loss": 0.6544, "step": 8526 }, { "epoch": 1.5210953527785211, "grad_norm": 0.4654527008533478, "learning_rate": 6.740332440334784e-05, "loss": 0.4399, "step": 8527 }, { "epoch": 1.5212737489965211, "grad_norm": 0.5457956194877625, "learning_rate": 6.735547666410968e-05, "loss": 0.6563, "step": 8528 }, { "epoch": 1.5214521452145213, "grad_norm": 0.6288897395133972, "learning_rate": 6.730764326969388e-05, "loss": 0.7027, "step": 8529 }, { "epoch": 1.5216305414325215, "grad_norm": 0.5368294715881348, "learning_rate": 6.725982422385715e-05, "loss": 0.6109, "step": 8530 }, { "epoch": 1.5218089376505217, "grad_norm": 0.5222885012626648, "learning_rate": 6.721201953035511e-05, "loss": 0.5362, "step": 8531 }, { "epoch": 1.521987333868522, "grad_norm": 0.49758774042129517, "learning_rate": 6.716422919294247e-05, "loss": 0.4813, "step": 8532 }, { "epoch": 1.5221657300865221, "grad_norm": 0.5573477745056152, "learning_rate": 6.71164532153725e-05, "loss": 0.7868, "step": 8533 }, { "epoch": 1.5223441263045223, "grad_norm": 0.46816423535346985, "learning_rate": 6.706869160139767e-05, "loss": 0.4346, "step": 8534 }, { "epoch": 1.5225225225225225, "grad_norm": 0.521041750907898, "learning_rate": 6.702094435476902e-05, "loss": 0.5432, "step": 8535 }, { "epoch": 1.5227009187405227, "grad_norm": 0.49728789925575256, "learning_rate": 6.697321147923671e-05, "loss": 0.6175, "step": 8536 }, { "epoch": 1.522879314958523, "grad_norm": 0.5513471364974976, "learning_rate": 6.692549297854956e-05, "loss": 0.6744, "step": 8537 }, { "epoch": 1.523057711176523, "grad_norm": 1.1942397356033325, "learning_rate": 6.68777888564554e-05, "loss": 0.6059, "step": 8538 }, { "epoch": 1.5232361073945233, "grad_norm": 0.5422153472900391, "learning_rate": 6.683009911670095e-05, "loss": 0.5448, "step": 8539 }, { "epoch": 1.5234145036125235, "grad_norm": 0.5084717869758606, "learning_rate": 6.678242376303165e-05, "loss": 0.5318, "step": 8540 }, { "epoch": 1.5235928998305237, "grad_norm": 0.4946412444114685, "learning_rate": 6.673476279919202e-05, "loss": 0.4886, "step": 8541 }, { "epoch": 1.523771296048524, "grad_norm": 0.5950092673301697, "learning_rate": 6.668711622892515e-05, "loss": 0.7057, "step": 8542 }, { "epoch": 1.523949692266524, "grad_norm": 0.45473888516426086, "learning_rate": 6.663948405597339e-05, "loss": 0.4004, "step": 8543 }, { "epoch": 1.524128088484524, "grad_norm": 0.5750323534011841, "learning_rate": 6.659186628407762e-05, "loss": 0.6046, "step": 8544 }, { "epoch": 1.5243064847025243, "grad_norm": 0.5322027802467346, "learning_rate": 6.654426291697768e-05, "loss": 0.5181, "step": 8545 }, { "epoch": 1.5244848809205245, "grad_norm": 0.4996216893196106, "learning_rate": 6.649667395841247e-05, "loss": 0.5104, "step": 8546 }, { "epoch": 1.5246632771385247, "grad_norm": 0.5934137105941772, "learning_rate": 6.644909941211943e-05, "loss": 0.7318, "step": 8547 }, { "epoch": 1.5248416733565249, "grad_norm": 0.4300279915332794, "learning_rate": 6.640153928183523e-05, "loss": 0.3769, "step": 8548 }, { "epoch": 1.525020069574525, "grad_norm": 0.5084002614021301, "learning_rate": 6.635399357129501e-05, "loss": 0.4071, "step": 8549 }, { "epoch": 1.525198465792525, "grad_norm": 0.47641390562057495, "learning_rate": 6.630646228423323e-05, "loss": 0.6074, "step": 8550 }, { "epoch": 1.5253768620105252, "grad_norm": 0.5083361268043518, "learning_rate": 6.625894542438283e-05, "loss": 0.657, "step": 8551 }, { "epoch": 1.5255552582285254, "grad_norm": 0.5362805128097534, "learning_rate": 6.621144299547572e-05, "loss": 0.6543, "step": 8552 }, { "epoch": 1.5257336544465256, "grad_norm": 0.46762344241142273, "learning_rate": 6.616395500124276e-05, "loss": 0.4705, "step": 8553 }, { "epoch": 1.5259120506645258, "grad_norm": 0.41276419162750244, "learning_rate": 6.611648144541369e-05, "loss": 0.4374, "step": 8554 }, { "epoch": 1.526090446882526, "grad_norm": 0.5008599758148193, "learning_rate": 6.60690223317171e-05, "loss": 0.5399, "step": 8555 }, { "epoch": 1.5262688431005262, "grad_norm": 0.8646233677864075, "learning_rate": 6.602157766388034e-05, "loss": 0.4534, "step": 8556 }, { "epoch": 1.5264472393185264, "grad_norm": 0.4763386845588684, "learning_rate": 6.597414744562963e-05, "loss": 0.5278, "step": 8557 }, { "epoch": 1.5266256355365266, "grad_norm": 0.5339080691337585, "learning_rate": 6.592673168069027e-05, "loss": 0.555, "step": 8558 }, { "epoch": 1.5268040317545268, "grad_norm": 0.5438008904457092, "learning_rate": 6.587933037278609e-05, "loss": 0.6286, "step": 8559 }, { "epoch": 1.526982427972527, "grad_norm": 0.4510871469974518, "learning_rate": 6.583194352564017e-05, "loss": 0.4141, "step": 8560 }, { "epoch": 1.5271608241905272, "grad_norm": 0.5599578619003296, "learning_rate": 6.578457114297407e-05, "loss": 0.619, "step": 8561 }, { "epoch": 1.5273392204085274, "grad_norm": 0.5268929600715637, "learning_rate": 6.573721322850854e-05, "loss": 0.6176, "step": 8562 }, { "epoch": 1.5275176166265276, "grad_norm": 0.5147603154182434, "learning_rate": 6.568986978596291e-05, "loss": 0.5238, "step": 8563 }, { "epoch": 1.5276960128445278, "grad_norm": 0.5306607484817505, "learning_rate": 6.564254081905571e-05, "loss": 0.5801, "step": 8564 }, { "epoch": 1.527874409062528, "grad_norm": 0.5111916065216064, "learning_rate": 6.559522633150397e-05, "loss": 0.5071, "step": 8565 }, { "epoch": 1.528052805280528, "grad_norm": 0.48746898770332336, "learning_rate": 6.554792632702376e-05, "loss": 0.5161, "step": 8566 }, { "epoch": 1.5282312014985282, "grad_norm": 0.48005223274230957, "learning_rate": 6.55006408093301e-05, "loss": 0.4149, "step": 8567 }, { "epoch": 1.5284095977165284, "grad_norm": 0.5667544007301331, "learning_rate": 6.545336978213664e-05, "loss": 0.6735, "step": 8568 }, { "epoch": 1.5285879939345286, "grad_norm": 0.5892688035964966, "learning_rate": 6.54061132491561e-05, "loss": 0.5779, "step": 8569 }, { "epoch": 1.5287663901525288, "grad_norm": 0.4722636342048645, "learning_rate": 6.535887121410006e-05, "loss": 0.4264, "step": 8570 }, { "epoch": 1.528944786370529, "grad_norm": 0.5367898941040039, "learning_rate": 6.531164368067874e-05, "loss": 0.5761, "step": 8571 }, { "epoch": 1.529123182588529, "grad_norm": 0.5178304314613342, "learning_rate": 6.526443065260154e-05, "loss": 0.5354, "step": 8572 }, { "epoch": 1.5293015788065292, "grad_norm": 0.4545149803161621, "learning_rate": 6.521723213357635e-05, "loss": 0.3896, "step": 8573 }, { "epoch": 1.5294799750245294, "grad_norm": 0.5091972947120667, "learning_rate": 6.517004812731034e-05, "loss": 0.6283, "step": 8574 }, { "epoch": 1.5296583712425296, "grad_norm": 0.5601230263710022, "learning_rate": 6.512287863750912e-05, "loss": 0.6376, "step": 8575 }, { "epoch": 1.5298367674605298, "grad_norm": 0.44082170724868774, "learning_rate": 6.507572366787753e-05, "loss": 0.412, "step": 8576 }, { "epoch": 1.53001516367853, "grad_norm": 0.6141606569290161, "learning_rate": 6.502858322211902e-05, "loss": 0.7171, "step": 8577 }, { "epoch": 1.5301935598965302, "grad_norm": 0.5599898099899292, "learning_rate": 6.498145730393592e-05, "loss": 0.6881, "step": 8578 }, { "epoch": 1.5303719561145304, "grad_norm": 0.5843207836151123, "learning_rate": 6.493434591702962e-05, "loss": 0.7688, "step": 8579 }, { "epoch": 1.5305503523325306, "grad_norm": 0.4394163191318512, "learning_rate": 6.488724906510008e-05, "loss": 0.4879, "step": 8580 }, { "epoch": 1.5307287485505308, "grad_norm": 0.49832600355148315, "learning_rate": 6.484016675184639e-05, "loss": 0.5856, "step": 8581 }, { "epoch": 1.530907144768531, "grad_norm": 0.5916674137115479, "learning_rate": 6.479309898096627e-05, "loss": 0.5649, "step": 8582 }, { "epoch": 1.5310855409865312, "grad_norm": 0.4655032455921173, "learning_rate": 6.474604575615653e-05, "loss": 0.5289, "step": 8583 }, { "epoch": 1.5312639372045314, "grad_norm": 0.48161542415618896, "learning_rate": 6.469900708111254e-05, "loss": 0.4909, "step": 8584 }, { "epoch": 1.5314423334225316, "grad_norm": 0.564775288105011, "learning_rate": 6.465198295952881e-05, "loss": 0.5441, "step": 8585 }, { "epoch": 1.5316207296405318, "grad_norm": 0.4698559641838074, "learning_rate": 6.460497339509864e-05, "loss": 0.487, "step": 8586 }, { "epoch": 1.531799125858532, "grad_norm": 0.45102497935295105, "learning_rate": 6.4557978391514e-05, "loss": 0.5289, "step": 8587 }, { "epoch": 1.531977522076532, "grad_norm": 0.5399623513221741, "learning_rate": 6.451099795246604e-05, "loss": 0.7627, "step": 8588 }, { "epoch": 1.5321559182945321, "grad_norm": 0.5812605023384094, "learning_rate": 6.446403208164436e-05, "loss": 0.7045, "step": 8589 }, { "epoch": 1.5323343145125323, "grad_norm": 0.5008137822151184, "learning_rate": 6.441708078273787e-05, "loss": 0.4697, "step": 8590 }, { "epoch": 1.5325127107305325, "grad_norm": 0.5091111660003662, "learning_rate": 6.437014405943397e-05, "loss": 0.4132, "step": 8591 }, { "epoch": 1.5326911069485327, "grad_norm": 0.6219770312309265, "learning_rate": 6.432322191541901e-05, "loss": 0.5969, "step": 8592 }, { "epoch": 1.532869503166533, "grad_norm": 0.5382397770881653, "learning_rate": 6.427631435437836e-05, "loss": 0.5745, "step": 8593 }, { "epoch": 1.533047899384533, "grad_norm": 0.5575965642929077, "learning_rate": 6.422942137999598e-05, "loss": 0.4856, "step": 8594 }, { "epoch": 1.533226295602533, "grad_norm": 0.5452744960784912, "learning_rate": 6.418254299595499e-05, "loss": 0.5125, "step": 8595 }, { "epoch": 1.5334046918205333, "grad_norm": 0.5527557134628296, "learning_rate": 6.413567920593705e-05, "loss": 0.6013, "step": 8596 }, { "epoch": 1.5335830880385335, "grad_norm": 0.47276362776756287, "learning_rate": 6.408883001362292e-05, "loss": 0.4906, "step": 8597 }, { "epoch": 1.5337614842565337, "grad_norm": 0.5150408148765564, "learning_rate": 6.404199542269213e-05, "loss": 0.5248, "step": 8598 }, { "epoch": 1.533939880474534, "grad_norm": 0.4958198070526123, "learning_rate": 6.399517543682278e-05, "loss": 0.5716, "step": 8599 }, { "epoch": 1.534118276692534, "grad_norm": 0.49585381150245667, "learning_rate": 6.39483700596925e-05, "loss": 0.6034, "step": 8600 }, { "epoch": 1.5342966729105343, "grad_norm": 0.5567946434020996, "learning_rate": 6.390157929497708e-05, "loss": 0.6809, "step": 8601 }, { "epoch": 1.5344750691285345, "grad_norm": 0.541259765625, "learning_rate": 6.385480314635162e-05, "loss": 0.6461, "step": 8602 }, { "epoch": 1.5346534653465347, "grad_norm": 0.5411567091941833, "learning_rate": 6.380804161748982e-05, "loss": 0.6509, "step": 8603 }, { "epoch": 1.5348318615645349, "grad_norm": 0.4732004404067993, "learning_rate": 6.376129471206422e-05, "loss": 0.4766, "step": 8604 }, { "epoch": 1.535010257782535, "grad_norm": 0.45597603917121887, "learning_rate": 6.371456243374646e-05, "loss": 0.5052, "step": 8605 }, { "epoch": 1.5351886540005353, "grad_norm": 0.49953046441078186, "learning_rate": 6.366784478620674e-05, "loss": 0.6363, "step": 8606 }, { "epoch": 1.5353670502185355, "grad_norm": 0.4946024715900421, "learning_rate": 6.362114177311437e-05, "loss": 0.5352, "step": 8607 }, { "epoch": 1.5355454464365357, "grad_norm": 0.4868670403957367, "learning_rate": 6.357445339813726e-05, "loss": 0.4525, "step": 8608 }, { "epoch": 1.5357238426545359, "grad_norm": 0.5404412150382996, "learning_rate": 6.352777966494242e-05, "loss": 0.6858, "step": 8609 }, { "epoch": 1.5359022388725359, "grad_norm": 0.5547458529472351, "learning_rate": 6.348112057719551e-05, "loss": 0.6672, "step": 8610 }, { "epoch": 1.536080635090536, "grad_norm": 0.5338977575302124, "learning_rate": 6.343447613856108e-05, "loss": 0.694, "step": 8611 }, { "epoch": 1.5362590313085362, "grad_norm": 0.5286486744880676, "learning_rate": 6.338784635270264e-05, "loss": 0.6343, "step": 8612 }, { "epoch": 1.5364374275265364, "grad_norm": 0.49329471588134766, "learning_rate": 6.334123122328239e-05, "loss": 0.5588, "step": 8613 }, { "epoch": 1.5366158237445366, "grad_norm": 0.5088777542114258, "learning_rate": 6.32946307539616e-05, "loss": 0.5553, "step": 8614 }, { "epoch": 1.5367942199625368, "grad_norm": 0.627946674823761, "learning_rate": 6.324804494840008e-05, "loss": 0.4839, "step": 8615 }, { "epoch": 1.5369726161805368, "grad_norm": 0.5603138208389282, "learning_rate": 6.320147381025673e-05, "loss": 0.7505, "step": 8616 }, { "epoch": 1.537151012398537, "grad_norm": 0.4293627440929413, "learning_rate": 6.315491734318934e-05, "loss": 0.3965, "step": 8617 }, { "epoch": 1.5373294086165372, "grad_norm": 0.5595969557762146, "learning_rate": 6.310837555085424e-05, "loss": 0.716, "step": 8618 }, { "epoch": 1.5375078048345374, "grad_norm": 0.4659646153450012, "learning_rate": 6.306184843690699e-05, "loss": 0.4463, "step": 8619 }, { "epoch": 1.5376862010525376, "grad_norm": 0.5158681869506836, "learning_rate": 6.301533600500165e-05, "loss": 0.5118, "step": 8620 }, { "epoch": 1.5378645972705378, "grad_norm": 0.4700511693954468, "learning_rate": 6.296883825879141e-05, "loss": 0.4758, "step": 8621 }, { "epoch": 1.538042993488538, "grad_norm": 0.5160353183746338, "learning_rate": 6.292235520192807e-05, "loss": 0.6518, "step": 8622 }, { "epoch": 1.5382213897065382, "grad_norm": 0.4958648085594177, "learning_rate": 6.28758868380625e-05, "loss": 0.4795, "step": 8623 }, { "epoch": 1.5383997859245384, "grad_norm": 0.42343762516975403, "learning_rate": 6.282943317084428e-05, "loss": 0.3622, "step": 8624 }, { "epoch": 1.5385781821425386, "grad_norm": 0.5087177753448486, "learning_rate": 6.278299420392173e-05, "loss": 0.4692, "step": 8625 }, { "epoch": 1.5387565783605388, "grad_norm": 0.44395479559898376, "learning_rate": 6.273656994094232e-05, "loss": 0.4136, "step": 8626 }, { "epoch": 1.538934974578539, "grad_norm": 0.5368765592575073, "learning_rate": 6.269016038555206e-05, "loss": 0.648, "step": 8627 }, { "epoch": 1.5391133707965392, "grad_norm": 0.6212941408157349, "learning_rate": 6.264376554139608e-05, "loss": 0.7711, "step": 8628 }, { "epoch": 1.5392917670145394, "grad_norm": 0.5054884552955627, "learning_rate": 6.259738541211804e-05, "loss": 0.5336, "step": 8629 }, { "epoch": 1.5394701632325396, "grad_norm": 0.5286135673522949, "learning_rate": 6.255102000136073e-05, "loss": 0.5894, "step": 8630 }, { "epoch": 1.5396485594505398, "grad_norm": 0.581322193145752, "learning_rate": 6.250466931276569e-05, "loss": 0.732, "step": 8631 }, { "epoch": 1.5398269556685398, "grad_norm": 0.492714524269104, "learning_rate": 6.245833334997317e-05, "loss": 0.583, "step": 8632 }, { "epoch": 1.54000535188654, "grad_norm": 0.49149906635284424, "learning_rate": 6.241201211662254e-05, "loss": 0.496, "step": 8633 }, { "epoch": 1.5401837481045402, "grad_norm": 0.45817720890045166, "learning_rate": 6.236570561635163e-05, "loss": 0.5408, "step": 8634 }, { "epoch": 1.5403621443225404, "grad_norm": 0.5533788204193115, "learning_rate": 6.231941385279757e-05, "loss": 0.6682, "step": 8635 }, { "epoch": 1.5405405405405406, "grad_norm": 0.5434373021125793, "learning_rate": 6.227313682959596e-05, "loss": 0.6231, "step": 8636 }, { "epoch": 1.5407189367585408, "grad_norm": 0.45322054624557495, "learning_rate": 6.222687455038134e-05, "loss": 0.3839, "step": 8637 }, { "epoch": 1.5408973329765407, "grad_norm": 0.5391322374343872, "learning_rate": 6.218062701878724e-05, "loss": 0.4676, "step": 8638 }, { "epoch": 1.541075729194541, "grad_norm": 0.4535292685031891, "learning_rate": 6.213439423844583e-05, "loss": 0.4573, "step": 8639 }, { "epoch": 1.5412541254125411, "grad_norm": 0.46426570415496826, "learning_rate": 6.208817621298829e-05, "loss": 0.4326, "step": 8640 }, { "epoch": 1.5414325216305413, "grad_norm": 0.6217679977416992, "learning_rate": 6.204197294604446e-05, "loss": 0.6326, "step": 8641 }, { "epoch": 1.5416109178485415, "grad_norm": 0.4808856248855591, "learning_rate": 6.199578444124329e-05, "loss": 0.4614, "step": 8642 }, { "epoch": 1.5417893140665417, "grad_norm": 0.5089523792266846, "learning_rate": 6.194961070221219e-05, "loss": 0.5047, "step": 8643 }, { "epoch": 1.541967710284542, "grad_norm": 0.43264392018318176, "learning_rate": 6.190345173257786e-05, "loss": 0.3809, "step": 8644 }, { "epoch": 1.5421461065025421, "grad_norm": 0.450720876455307, "learning_rate": 6.185730753596539e-05, "loss": 0.4279, "step": 8645 }, { "epoch": 1.5423245027205423, "grad_norm": 0.4698851704597473, "learning_rate": 6.181117811599901e-05, "loss": 0.4536, "step": 8646 }, { "epoch": 1.5425028989385425, "grad_norm": 0.46005532145500183, "learning_rate": 6.176506347630181e-05, "loss": 0.4388, "step": 8647 }, { "epoch": 1.5426812951565427, "grad_norm": 0.5724323391914368, "learning_rate": 6.171896362049542e-05, "loss": 0.6823, "step": 8648 }, { "epoch": 1.542859691374543, "grad_norm": 0.47760656476020813, "learning_rate": 6.167287855220072e-05, "loss": 0.4862, "step": 8649 }, { "epoch": 1.5430380875925431, "grad_norm": 0.533450186252594, "learning_rate": 6.162680827503705e-05, "loss": 0.606, "step": 8650 }, { "epoch": 1.5432164838105433, "grad_norm": 0.5100364089012146, "learning_rate": 6.158075279262273e-05, "loss": 0.5607, "step": 8651 }, { "epoch": 1.5433948800285435, "grad_norm": 0.5601518154144287, "learning_rate": 6.153471210857511e-05, "loss": 0.6058, "step": 8652 }, { "epoch": 1.5435732762465437, "grad_norm": 0.4649224579334259, "learning_rate": 6.148868622650999e-05, "loss": 0.461, "step": 8653 }, { "epoch": 1.5437516724645437, "grad_norm": 0.510765016078949, "learning_rate": 6.144267515004243e-05, "loss": 0.5787, "step": 8654 }, { "epoch": 1.543930068682544, "grad_norm": 0.5434867143630981, "learning_rate": 6.139667888278594e-05, "loss": 0.5901, "step": 8655 }, { "epoch": 1.544108464900544, "grad_norm": 0.5194826722145081, "learning_rate": 6.13506974283532e-05, "loss": 0.5809, "step": 8656 }, { "epoch": 1.5442868611185443, "grad_norm": 0.42716190218925476, "learning_rate": 6.130473079035548e-05, "loss": 0.4636, "step": 8657 }, { "epoch": 1.5444652573365445, "grad_norm": 0.4728177785873413, "learning_rate": 6.125877897240295e-05, "loss": 0.4347, "step": 8658 }, { "epoch": 1.5446436535545447, "grad_norm": 0.4928957223892212, "learning_rate": 6.121284197810476e-05, "loss": 0.5965, "step": 8659 }, { "epoch": 1.5448220497725447, "grad_norm": 0.5190236568450928, "learning_rate": 6.116691981106868e-05, "loss": 0.5734, "step": 8660 }, { "epoch": 1.5450004459905449, "grad_norm": 0.4811258018016815, "learning_rate": 6.11210124749014e-05, "loss": 0.5823, "step": 8661 }, { "epoch": 1.545178842208545, "grad_norm": 0.5223371982574463, "learning_rate": 6.107511997320863e-05, "loss": 0.52, "step": 8662 }, { "epoch": 1.5453572384265453, "grad_norm": 0.5431803464889526, "learning_rate": 6.102924230959456e-05, "loss": 0.6178, "step": 8663 }, { "epoch": 1.5455356346445455, "grad_norm": 0.5262665152549744, "learning_rate": 6.0983379487662555e-05, "loss": 0.5756, "step": 8664 }, { "epoch": 1.5457140308625457, "grad_norm": 0.5077388882637024, "learning_rate": 6.09375315110145e-05, "loss": 0.5741, "step": 8665 }, { "epoch": 1.5458924270805459, "grad_norm": 0.5087509155273438, "learning_rate": 6.089169838325143e-05, "loss": 0.5392, "step": 8666 }, { "epoch": 1.546070823298546, "grad_norm": 0.4039164185523987, "learning_rate": 6.084588010797293e-05, "loss": 0.3906, "step": 8667 }, { "epoch": 1.5462492195165463, "grad_norm": 0.4832989573478699, "learning_rate": 6.0800076688777684e-05, "loss": 0.5393, "step": 8668 }, { "epoch": 1.5464276157345465, "grad_norm": 0.4995843470096588, "learning_rate": 6.0754288129262895e-05, "loss": 0.6855, "step": 8669 }, { "epoch": 1.5466060119525467, "grad_norm": 0.47281551361083984, "learning_rate": 6.070851443302497e-05, "loss": 0.489, "step": 8670 }, { "epoch": 1.5467844081705469, "grad_norm": 0.4991108477115631, "learning_rate": 6.066275560365886e-05, "loss": 0.5685, "step": 8671 }, { "epoch": 1.546962804388547, "grad_norm": 0.5392157435417175, "learning_rate": 6.0617011644758385e-05, "loss": 0.7395, "step": 8672 }, { "epoch": 1.5471412006065473, "grad_norm": 0.6251504421234131, "learning_rate": 6.057128255991637e-05, "loss": 0.777, "step": 8673 }, { "epoch": 1.5473195968245474, "grad_norm": 0.4212150573730469, "learning_rate": 6.052556835272424e-05, "loss": 0.3672, "step": 8674 }, { "epoch": 1.5474979930425476, "grad_norm": 0.3978899419307709, "learning_rate": 6.047986902677252e-05, "loss": 0.352, "step": 8675 }, { "epoch": 1.5476763892605476, "grad_norm": 0.5088796615600586, "learning_rate": 6.0434184585650256e-05, "loss": 0.5676, "step": 8676 }, { "epoch": 1.5478547854785478, "grad_norm": 0.42953309416770935, "learning_rate": 6.038851503294554e-05, "loss": 0.3769, "step": 8677 }, { "epoch": 1.548033181696548, "grad_norm": 0.49997836351394653, "learning_rate": 6.0342860372245344e-05, "loss": 0.6421, "step": 8678 }, { "epoch": 1.5482115779145482, "grad_norm": 0.5206395387649536, "learning_rate": 6.029722060713519e-05, "loss": 0.521, "step": 8679 }, { "epoch": 1.5483899741325484, "grad_norm": 0.4601840376853943, "learning_rate": 6.025159574119979e-05, "loss": 0.5562, "step": 8680 }, { "epoch": 1.5485683703505486, "grad_norm": 0.5508368015289307, "learning_rate": 6.0205985778022305e-05, "loss": 0.5545, "step": 8681 }, { "epoch": 1.5487467665685486, "grad_norm": 0.5534743070602417, "learning_rate": 6.016039072118512e-05, "loss": 0.5463, "step": 8682 }, { "epoch": 1.5489251627865488, "grad_norm": 0.4522656798362732, "learning_rate": 6.011481057426915e-05, "loss": 0.4339, "step": 8683 }, { "epoch": 1.549103559004549, "grad_norm": 0.44534674286842346, "learning_rate": 6.006924534085414e-05, "loss": 0.4096, "step": 8684 }, { "epoch": 1.5492819552225492, "grad_norm": 0.4928872883319855, "learning_rate": 6.002369502451899e-05, "loss": 0.5002, "step": 8685 }, { "epoch": 1.5494603514405494, "grad_norm": 0.509283185005188, "learning_rate": 5.997815962884098e-05, "loss": 0.4871, "step": 8686 }, { "epoch": 1.5496387476585496, "grad_norm": 0.5575897097587585, "learning_rate": 5.993263915739661e-05, "loss": 0.4546, "step": 8687 }, { "epoch": 1.5498171438765498, "grad_norm": 0.49267205595970154, "learning_rate": 5.988713361376089e-05, "loss": 0.5014, "step": 8688 }, { "epoch": 1.54999554009455, "grad_norm": 0.4820170998573303, "learning_rate": 5.984164300150796e-05, "loss": 0.4648, "step": 8689 }, { "epoch": 1.5501739363125502, "grad_norm": 0.5354559421539307, "learning_rate": 5.9796167324210505e-05, "loss": 0.6559, "step": 8690 }, { "epoch": 1.5503523325305504, "grad_norm": 0.5515713095664978, "learning_rate": 5.975070658544021e-05, "loss": 0.5613, "step": 8691 }, { "epoch": 1.5505307287485506, "grad_norm": 0.5345812439918518, "learning_rate": 5.9705260788767594e-05, "loss": 0.6878, "step": 8692 }, { "epoch": 1.5507091249665508, "grad_norm": 0.48849523067474365, "learning_rate": 5.9659829937761865e-05, "loss": 0.4398, "step": 8693 }, { "epoch": 1.550887521184551, "grad_norm": 0.5398678183555603, "learning_rate": 5.9614414035991244e-05, "loss": 0.6064, "step": 8694 }, { "epoch": 1.5510659174025512, "grad_norm": 0.47150692343711853, "learning_rate": 5.9569013087022614e-05, "loss": 0.5028, "step": 8695 }, { "epoch": 1.5512443136205514, "grad_norm": 0.4753498435020447, "learning_rate": 5.9523627094421664e-05, "loss": 0.4533, "step": 8696 }, { "epoch": 1.5514227098385516, "grad_norm": 0.5937864184379578, "learning_rate": 5.947825606175317e-05, "loss": 0.6236, "step": 8697 }, { "epoch": 1.5516011060565515, "grad_norm": 0.5065819025039673, "learning_rate": 5.943289999258036e-05, "loss": 0.5351, "step": 8698 }, { "epoch": 1.5517795022745517, "grad_norm": 0.5110275149345398, "learning_rate": 5.938755889046565e-05, "loss": 0.5605, "step": 8699 }, { "epoch": 1.551957898492552, "grad_norm": 0.5337404608726501, "learning_rate": 5.934223275896999e-05, "loss": 0.6949, "step": 8700 }, { "epoch": 1.5521362947105521, "grad_norm": 0.5709247589111328, "learning_rate": 5.92969216016534e-05, "loss": 0.7602, "step": 8701 }, { "epoch": 1.5523146909285523, "grad_norm": 0.4845641255378723, "learning_rate": 5.9251625422074406e-05, "loss": 0.4694, "step": 8702 }, { "epoch": 1.5524930871465525, "grad_norm": 0.5533928871154785, "learning_rate": 5.920634422379079e-05, "loss": 0.6047, "step": 8703 }, { "epoch": 1.5526714833645525, "grad_norm": 0.5261991024017334, "learning_rate": 5.916107801035875e-05, "loss": 0.6173, "step": 8704 }, { "epoch": 1.5528498795825527, "grad_norm": 0.5172926783561707, "learning_rate": 5.9115826785333473e-05, "loss": 0.5984, "step": 8705 }, { "epoch": 1.553028275800553, "grad_norm": 0.47893017530441284, "learning_rate": 5.907059055226907e-05, "loss": 0.4444, "step": 8706 }, { "epoch": 1.5532066720185531, "grad_norm": 0.5027623772621155, "learning_rate": 5.902536931471819e-05, "loss": 0.4835, "step": 8707 }, { "epoch": 1.5533850682365533, "grad_norm": 0.566230297088623, "learning_rate": 5.898016307623275e-05, "loss": 0.5732, "step": 8708 }, { "epoch": 1.5535634644545535, "grad_norm": 0.49636390805244446, "learning_rate": 5.8934971840363116e-05, "loss": 0.5499, "step": 8709 }, { "epoch": 1.5537418606725537, "grad_norm": 0.5405829548835754, "learning_rate": 5.888979561065849e-05, "loss": 0.5984, "step": 8710 }, { "epoch": 1.553920256890554, "grad_norm": 0.4457230865955353, "learning_rate": 5.8844634390667176e-05, "loss": 0.4734, "step": 8711 }, { "epoch": 1.554098653108554, "grad_norm": 0.5478752851486206, "learning_rate": 5.879948818393591e-05, "loss": 0.5332, "step": 8712 }, { "epoch": 1.5542770493265543, "grad_norm": 0.49561017751693726, "learning_rate": 5.8754356994010634e-05, "loss": 0.5383, "step": 8713 }, { "epoch": 1.5544554455445545, "grad_norm": 0.571239709854126, "learning_rate": 5.8709240824435795e-05, "loss": 0.4595, "step": 8714 }, { "epoch": 1.5546338417625547, "grad_norm": 0.48616328835487366, "learning_rate": 5.8664139678754944e-05, "loss": 0.5232, "step": 8715 }, { "epoch": 1.554812237980555, "grad_norm": 0.5562140345573425, "learning_rate": 5.861905356051023e-05, "loss": 0.6471, "step": 8716 }, { "epoch": 1.554990634198555, "grad_norm": 0.516631007194519, "learning_rate": 5.857398247324261e-05, "loss": 0.572, "step": 8717 }, { "epoch": 1.5551690304165553, "grad_norm": 0.4869072139263153, "learning_rate": 5.85289264204921e-05, "loss": 0.5075, "step": 8718 }, { "epoch": 1.5553474266345555, "grad_norm": 0.5700942873954773, "learning_rate": 5.8483885405797246e-05, "loss": 0.6769, "step": 8719 }, { "epoch": 1.5555258228525555, "grad_norm": 1.086318850517273, "learning_rate": 5.843885943269567e-05, "loss": 0.4651, "step": 8720 }, { "epoch": 1.5557042190705557, "grad_norm": 0.45219793915748596, "learning_rate": 5.83938485047236e-05, "loss": 0.4308, "step": 8721 }, { "epoch": 1.5558826152885559, "grad_norm": 0.5690205097198486, "learning_rate": 5.834885262541617e-05, "loss": 0.628, "step": 8722 }, { "epoch": 1.556061011506556, "grad_norm": 0.5013719797134399, "learning_rate": 5.830387179830748e-05, "loss": 0.5243, "step": 8723 }, { "epoch": 1.5562394077245563, "grad_norm": 0.4831756055355072, "learning_rate": 5.825890602693013e-05, "loss": 0.4328, "step": 8724 }, { "epoch": 1.5564178039425565, "grad_norm": 0.511838436126709, "learning_rate": 5.8213955314815853e-05, "loss": 0.4934, "step": 8725 }, { "epoch": 1.5565962001605564, "grad_norm": 0.5139269232749939, "learning_rate": 5.816901966549495e-05, "loss": 0.4621, "step": 8726 }, { "epoch": 1.5567745963785566, "grad_norm": 0.4796489477157593, "learning_rate": 5.8124099082496745e-05, "loss": 0.5047, "step": 8727 }, { "epoch": 1.5569529925965568, "grad_norm": 0.47193264961242676, "learning_rate": 5.807919356934915e-05, "loss": 0.467, "step": 8728 }, { "epoch": 1.557131388814557, "grad_norm": 0.570898711681366, "learning_rate": 5.8034303129579164e-05, "loss": 0.7563, "step": 8729 }, { "epoch": 1.5573097850325572, "grad_norm": 0.4879952073097229, "learning_rate": 5.798942776671243e-05, "loss": 0.4973, "step": 8730 }, { "epoch": 1.5574881812505574, "grad_norm": 0.44711223244667053, "learning_rate": 5.794456748427332e-05, "loss": 0.3708, "step": 8731 }, { "epoch": 1.5576665774685576, "grad_norm": 0.6115127801895142, "learning_rate": 5.78997222857853e-05, "loss": 0.6516, "step": 8732 }, { "epoch": 1.5578449736865578, "grad_norm": 0.5244799852371216, "learning_rate": 5.785489217477036e-05, "loss": 0.507, "step": 8733 }, { "epoch": 1.558023369904558, "grad_norm": 0.5613646507263184, "learning_rate": 5.7810077154749566e-05, "loss": 0.6944, "step": 8734 }, { "epoch": 1.5582017661225582, "grad_norm": 0.49775010347366333, "learning_rate": 5.7765277229242546e-05, "loss": 0.4435, "step": 8735 }, { "epoch": 1.5583801623405584, "grad_norm": 0.46436411142349243, "learning_rate": 5.772049240176799e-05, "loss": 0.4435, "step": 8736 }, { "epoch": 1.5585585585585586, "grad_norm": 0.5458922982215881, "learning_rate": 5.7675722675843144e-05, "loss": 0.5705, "step": 8737 }, { "epoch": 1.5587369547765588, "grad_norm": 0.5422109365463257, "learning_rate": 5.763096805498427e-05, "loss": 0.6135, "step": 8738 }, { "epoch": 1.558915350994559, "grad_norm": 0.574105441570282, "learning_rate": 5.758622854270648e-05, "loss": 0.7073, "step": 8739 }, { "epoch": 1.5590937472125592, "grad_norm": 0.5504136085510254, "learning_rate": 5.7541504142523406e-05, "loss": 0.6011, "step": 8740 }, { "epoch": 1.5592721434305594, "grad_norm": 0.5172324180603027, "learning_rate": 5.7496794857947846e-05, "loss": 0.4303, "step": 8741 }, { "epoch": 1.5594505396485594, "grad_norm": 0.5464860200881958, "learning_rate": 5.745210069249118e-05, "loss": 0.6365, "step": 8742 }, { "epoch": 1.5596289358665596, "grad_norm": 0.47427821159362793, "learning_rate": 5.740742164966362e-05, "loss": 0.4361, "step": 8743 }, { "epoch": 1.5598073320845598, "grad_norm": 1.116687536239624, "learning_rate": 5.736275773297431e-05, "loss": 0.699, "step": 8744 }, { "epoch": 1.55998572830256, "grad_norm": 0.4698363244533539, "learning_rate": 5.731810894593106e-05, "loss": 0.5028, "step": 8745 }, { "epoch": 1.5601641245205602, "grad_norm": 0.47105473279953003, "learning_rate": 5.727347529204069e-05, "loss": 0.5059, "step": 8746 }, { "epoch": 1.5603425207385604, "grad_norm": 0.5197162628173828, "learning_rate": 5.722885677480857e-05, "loss": 0.5229, "step": 8747 }, { "epoch": 1.5605209169565604, "grad_norm": 0.5358749032020569, "learning_rate": 5.718425339773914e-05, "loss": 0.5431, "step": 8748 }, { "epoch": 1.5606993131745606, "grad_norm": 0.6025269627571106, "learning_rate": 5.713966516433541e-05, "loss": 0.791, "step": 8749 }, { "epoch": 1.5608777093925608, "grad_norm": 0.502884030342102, "learning_rate": 5.709509207809946e-05, "loss": 0.5499, "step": 8750 }, { "epoch": 1.561056105610561, "grad_norm": 0.49932992458343506, "learning_rate": 5.705053414253195e-05, "loss": 0.5878, "step": 8751 }, { "epoch": 1.5612345018285612, "grad_norm": 0.4696747362613678, "learning_rate": 5.700599136113238e-05, "loss": 0.4294, "step": 8752 }, { "epoch": 1.5614128980465614, "grad_norm": 0.4963851273059845, "learning_rate": 5.6961463737399215e-05, "loss": 0.5904, "step": 8753 }, { "epoch": 1.5615912942645616, "grad_norm": 0.542914867401123, "learning_rate": 5.691695127482968e-05, "loss": 0.7059, "step": 8754 }, { "epoch": 1.5617696904825618, "grad_norm": 0.44281649589538574, "learning_rate": 5.687245397691962e-05, "loss": 0.416, "step": 8755 }, { "epoch": 1.561948086700562, "grad_norm": 0.48408737778663635, "learning_rate": 5.682797184716401e-05, "loss": 0.5626, "step": 8756 }, { "epoch": 1.5621264829185622, "grad_norm": 0.4773915410041809, "learning_rate": 5.6783504889056285e-05, "loss": 0.5477, "step": 8757 }, { "epoch": 1.5623048791365624, "grad_norm": 0.5725618600845337, "learning_rate": 5.6739053106088984e-05, "loss": 0.5617, "step": 8758 }, { "epoch": 1.5624832753545626, "grad_norm": 0.4656837582588196, "learning_rate": 5.6694616501753256e-05, "loss": 0.4798, "step": 8759 }, { "epoch": 1.5626616715725627, "grad_norm": 0.4770435094833374, "learning_rate": 5.6650195079539194e-05, "loss": 0.5398, "step": 8760 }, { "epoch": 1.562840067790563, "grad_norm": 0.47299638390541077, "learning_rate": 5.6605788842935544e-05, "loss": 0.471, "step": 8761 }, { "epoch": 1.5630184640085631, "grad_norm": 0.4295462369918823, "learning_rate": 5.6561397795430096e-05, "loss": 0.4549, "step": 8762 }, { "epoch": 1.5631968602265633, "grad_norm": 0.5424062609672546, "learning_rate": 5.6517021940509225e-05, "loss": 0.649, "step": 8763 }, { "epoch": 1.5633752564445633, "grad_norm": 0.4859166145324707, "learning_rate": 5.6472661281658125e-05, "loss": 0.5242, "step": 8764 }, { "epoch": 1.5635536526625635, "grad_norm": 0.5274991393089294, "learning_rate": 5.642831582236096e-05, "loss": 0.5842, "step": 8765 }, { "epoch": 1.5637320488805637, "grad_norm": 0.5063260197639465, "learning_rate": 5.6383985566100525e-05, "loss": 0.4953, "step": 8766 }, { "epoch": 1.563910445098564, "grad_norm": 0.5385729074478149, "learning_rate": 5.6339670516358633e-05, "loss": 0.6145, "step": 8767 }, { "epoch": 1.5640888413165641, "grad_norm": 0.429574579000473, "learning_rate": 5.6295370676615584e-05, "loss": 0.3678, "step": 8768 }, { "epoch": 1.5642672375345643, "grad_norm": 0.45450228452682495, "learning_rate": 5.625108605035076e-05, "loss": 0.4379, "step": 8769 }, { "epoch": 1.5644456337525643, "grad_norm": 0.5811430215835571, "learning_rate": 5.620681664104235e-05, "loss": 0.6731, "step": 8770 }, { "epoch": 1.5646240299705645, "grad_norm": 0.5694655179977417, "learning_rate": 5.6162562452167085e-05, "loss": 0.6028, "step": 8771 }, { "epoch": 1.5648024261885647, "grad_norm": 0.5351256728172302, "learning_rate": 5.6118323487200806e-05, "loss": 0.4978, "step": 8772 }, { "epoch": 1.5649808224065649, "grad_norm": 0.43898749351501465, "learning_rate": 5.6074099749617914e-05, "loss": 0.4938, "step": 8773 }, { "epoch": 1.565159218624565, "grad_norm": 0.5240178108215332, "learning_rate": 5.602989124289185e-05, "loss": 0.4156, "step": 8774 }, { "epoch": 1.5653376148425653, "grad_norm": 0.4986781179904938, "learning_rate": 5.598569797049466e-05, "loss": 0.4061, "step": 8775 }, { "epoch": 1.5655160110605655, "grad_norm": 0.6685777306556702, "learning_rate": 5.5941519935897164e-05, "loss": 0.8649, "step": 8776 }, { "epoch": 1.5656944072785657, "grad_norm": 0.46365004777908325, "learning_rate": 5.589735714256927e-05, "loss": 0.4269, "step": 8777 }, { "epoch": 1.5658728034965659, "grad_norm": 0.5603156089782715, "learning_rate": 5.5853209593979354e-05, "loss": 0.7361, "step": 8778 }, { "epoch": 1.566051199714566, "grad_norm": 0.5568613409996033, "learning_rate": 5.580907729359486e-05, "loss": 0.5341, "step": 8779 }, { "epoch": 1.5662295959325663, "grad_norm": 0.5259753465652466, "learning_rate": 5.5764960244881815e-05, "loss": 0.511, "step": 8780 }, { "epoch": 1.5664079921505665, "grad_norm": 0.5009199976921082, "learning_rate": 5.5720858451305255e-05, "loss": 0.5754, "step": 8781 }, { "epoch": 1.5665863883685667, "grad_norm": 0.48167309165000916, "learning_rate": 5.567677191632883e-05, "loss": 0.4789, "step": 8782 }, { "epoch": 1.5667647845865669, "grad_norm": 0.5015170574188232, "learning_rate": 5.563270064341508e-05, "loss": 0.5136, "step": 8783 }, { "epoch": 1.566943180804567, "grad_norm": 0.5342589616775513, "learning_rate": 5.558864463602548e-05, "loss": 0.6359, "step": 8784 }, { "epoch": 1.5671215770225673, "grad_norm": 0.5268881916999817, "learning_rate": 5.5544603897619976e-05, "loss": 0.5012, "step": 8785 }, { "epoch": 1.5672999732405672, "grad_norm": 0.5006667375564575, "learning_rate": 5.5500578431657675e-05, "loss": 0.5217, "step": 8786 }, { "epoch": 1.5674783694585674, "grad_norm": 0.49677345156669617, "learning_rate": 5.545656824159617e-05, "loss": 0.5327, "step": 8787 }, { "epoch": 1.5676567656765676, "grad_norm": 0.5768446326255798, "learning_rate": 5.5412573330892165e-05, "loss": 0.6573, "step": 8788 }, { "epoch": 1.5678351618945678, "grad_norm": 0.48240020871162415, "learning_rate": 5.53685937030009e-05, "loss": 0.5009, "step": 8789 }, { "epoch": 1.568013558112568, "grad_norm": 0.4898053705692291, "learning_rate": 5.532462936137647e-05, "loss": 0.5342, "step": 8790 }, { "epoch": 1.5681919543305682, "grad_norm": 0.5353338122367859, "learning_rate": 5.528068030947192e-05, "loss": 0.5924, "step": 8791 }, { "epoch": 1.5683703505485682, "grad_norm": 0.47058504819869995, "learning_rate": 5.52367465507389e-05, "loss": 0.4101, "step": 8792 }, { "epoch": 1.5685487467665684, "grad_norm": 0.47491228580474854, "learning_rate": 5.519282808862805e-05, "loss": 0.4776, "step": 8793 }, { "epoch": 1.5687271429845686, "grad_norm": 0.47631198167800903, "learning_rate": 5.5148924926588574e-05, "loss": 0.4497, "step": 8794 }, { "epoch": 1.5689055392025688, "grad_norm": 0.4845799207687378, "learning_rate": 5.510503706806877e-05, "loss": 0.5219, "step": 8795 }, { "epoch": 1.569083935420569, "grad_norm": 0.5517118573188782, "learning_rate": 5.506116451651547e-05, "loss": 0.5573, "step": 8796 }, { "epoch": 1.5692623316385692, "grad_norm": 0.529208242893219, "learning_rate": 5.501730727537435e-05, "loss": 0.5191, "step": 8797 }, { "epoch": 1.5694407278565694, "grad_norm": 0.48718565702438354, "learning_rate": 5.497346534809011e-05, "loss": 0.4791, "step": 8798 }, { "epoch": 1.5696191240745696, "grad_norm": 0.5780670642852783, "learning_rate": 5.4929638738105805e-05, "loss": 0.5571, "step": 8799 }, { "epoch": 1.5697975202925698, "grad_norm": 0.5497144460678101, "learning_rate": 5.488582744886386e-05, "loss": 0.6058, "step": 8800 }, { "epoch": 1.56997591651057, "grad_norm": 0.5026035904884338, "learning_rate": 5.484203148380509e-05, "loss": 0.436, "step": 8801 }, { "epoch": 1.5701543127285702, "grad_norm": 0.4526662528514862, "learning_rate": 5.479825084636911e-05, "loss": 0.4857, "step": 8802 }, { "epoch": 1.5703327089465704, "grad_norm": 0.5049575567245483, "learning_rate": 5.475448553999454e-05, "loss": 0.5069, "step": 8803 }, { "epoch": 1.5705111051645706, "grad_norm": 0.503604531288147, "learning_rate": 5.471073556811862e-05, "loss": 0.5236, "step": 8804 }, { "epoch": 1.5706895013825708, "grad_norm": 0.5270532369613647, "learning_rate": 5.4667000934177564e-05, "loss": 0.5193, "step": 8805 }, { "epoch": 1.570867897600571, "grad_norm": 0.49302002787590027, "learning_rate": 5.4623281641606096e-05, "loss": 0.4832, "step": 8806 }, { "epoch": 1.5710462938185712, "grad_norm": 0.4588510990142822, "learning_rate": 5.457957769383812e-05, "loss": 0.4937, "step": 8807 }, { "epoch": 1.5712246900365712, "grad_norm": 0.4889686703681946, "learning_rate": 5.453588909430593e-05, "loss": 0.4553, "step": 8808 }, { "epoch": 1.5714030862545714, "grad_norm": 0.536016047000885, "learning_rate": 5.4492215846440953e-05, "loss": 0.6425, "step": 8809 }, { "epoch": 1.5715814824725716, "grad_norm": 0.4554092288017273, "learning_rate": 5.4448557953673204e-05, "loss": 0.434, "step": 8810 }, { "epoch": 1.5717598786905718, "grad_norm": 0.4832884669303894, "learning_rate": 5.440491541943152e-05, "loss": 0.4417, "step": 8811 }, { "epoch": 1.571938274908572, "grad_norm": 0.5221408009529114, "learning_rate": 5.4361288247143646e-05, "loss": 0.5256, "step": 8812 }, { "epoch": 1.5721166711265722, "grad_norm": 0.4202393889427185, "learning_rate": 5.4317676440235967e-05, "loss": 0.3462, "step": 8813 }, { "epoch": 1.5722950673445721, "grad_norm": 0.4951157867908478, "learning_rate": 5.427408000213374e-05, "loss": 0.4871, "step": 8814 }, { "epoch": 1.5724734635625723, "grad_norm": 0.4902489483356476, "learning_rate": 5.423049893626114e-05, "loss": 0.5418, "step": 8815 }, { "epoch": 1.5726518597805725, "grad_norm": 0.5259934067726135, "learning_rate": 5.418693324604082e-05, "loss": 0.5426, "step": 8816 }, { "epoch": 1.5728302559985727, "grad_norm": 0.5680667757987976, "learning_rate": 5.414338293489457e-05, "loss": 0.6104, "step": 8817 }, { "epoch": 1.573008652216573, "grad_norm": 0.5012747645378113, "learning_rate": 5.409984800624265e-05, "loss": 0.5192, "step": 8818 }, { "epoch": 1.5731870484345731, "grad_norm": 0.48108309507369995, "learning_rate": 5.4056328463504475e-05, "loss": 0.5373, "step": 8819 }, { "epoch": 1.5733654446525733, "grad_norm": 0.5479810237884521, "learning_rate": 5.401282431009785e-05, "loss": 0.6286, "step": 8820 }, { "epoch": 1.5735438408705735, "grad_norm": 0.5330629944801331, "learning_rate": 5.3969335549439726e-05, "loss": 0.5358, "step": 8821 }, { "epoch": 1.5737222370885737, "grad_norm": 0.5190926194190979, "learning_rate": 5.392586218494563e-05, "loss": 0.6252, "step": 8822 }, { "epoch": 1.573900633306574, "grad_norm": 0.5403081178665161, "learning_rate": 5.388240422002991e-05, "loss": 0.5921, "step": 8823 }, { "epoch": 1.5740790295245741, "grad_norm": 0.5377727746963501, "learning_rate": 5.383896165810578e-05, "loss": 0.4856, "step": 8824 }, { "epoch": 1.5742574257425743, "grad_norm": 0.513603925704956, "learning_rate": 5.3795534502585146e-05, "loss": 0.5591, "step": 8825 }, { "epoch": 1.5744358219605745, "grad_norm": 0.5131911039352417, "learning_rate": 5.375212275687888e-05, "loss": 0.4422, "step": 8826 }, { "epoch": 1.5746142181785747, "grad_norm": 0.519425630569458, "learning_rate": 5.3708726424396365e-05, "loss": 0.5824, "step": 8827 }, { "epoch": 1.574792614396575, "grad_norm": 0.47717246413230896, "learning_rate": 5.366534550854607e-05, "loss": 0.4575, "step": 8828 }, { "epoch": 1.5749710106145751, "grad_norm": 0.563754677772522, "learning_rate": 5.3621980012734965e-05, "loss": 0.5664, "step": 8829 }, { "epoch": 1.575149406832575, "grad_norm": 0.5223338603973389, "learning_rate": 5.357862994036905e-05, "loss": 0.498, "step": 8830 }, { "epoch": 1.5753278030505753, "grad_norm": 0.5984678268432617, "learning_rate": 5.35352952948531e-05, "loss": 0.6791, "step": 8831 }, { "epoch": 1.5755061992685755, "grad_norm": 0.554014265537262, "learning_rate": 5.349197607959042e-05, "loss": 0.6039, "step": 8832 }, { "epoch": 1.5756845954865757, "grad_norm": 0.561259388923645, "learning_rate": 5.3448672297983445e-05, "loss": 0.5535, "step": 8833 }, { "epoch": 1.5758629917045759, "grad_norm": 0.49919480085372925, "learning_rate": 5.3405383953433196e-05, "loss": 0.4131, "step": 8834 }, { "epoch": 1.576041387922576, "grad_norm": 0.5846698880195618, "learning_rate": 5.336211104933938e-05, "loss": 0.6158, "step": 8835 }, { "epoch": 1.576219784140576, "grad_norm": 0.5364250540733337, "learning_rate": 5.3318853589100824e-05, "loss": 0.6105, "step": 8836 }, { "epoch": 1.5763981803585763, "grad_norm": 0.5680550932884216, "learning_rate": 5.3275611576114825e-05, "loss": 0.7472, "step": 8837 }, { "epoch": 1.5765765765765765, "grad_norm": 0.4823981821537018, "learning_rate": 5.323238501377767e-05, "loss": 0.4895, "step": 8838 }, { "epoch": 1.5767549727945767, "grad_norm": 0.6050514578819275, "learning_rate": 5.318917390548428e-05, "loss": 0.7538, "step": 8839 }, { "epoch": 1.5769333690125769, "grad_norm": 0.4770009517669678, "learning_rate": 5.314597825462852e-05, "loss": 0.4793, "step": 8840 }, { "epoch": 1.577111765230577, "grad_norm": 0.4747209846973419, "learning_rate": 5.310279806460286e-05, "loss": 0.4359, "step": 8841 }, { "epoch": 1.5772901614485773, "grad_norm": 0.44378408789634705, "learning_rate": 5.305963333879879e-05, "loss": 0.3823, "step": 8842 }, { "epoch": 1.5774685576665775, "grad_norm": 0.4758981466293335, "learning_rate": 5.301648408060633e-05, "loss": 0.6459, "step": 8843 }, { "epoch": 1.5776469538845777, "grad_norm": 0.5262596011161804, "learning_rate": 5.297335029341433e-05, "loss": 0.5195, "step": 8844 }, { "epoch": 1.5778253501025779, "grad_norm": 0.4246023893356323, "learning_rate": 5.2930231980610756e-05, "loss": 0.3224, "step": 8845 }, { "epoch": 1.578003746320578, "grad_norm": 0.502919614315033, "learning_rate": 5.288712914558189e-05, "loss": 0.5362, "step": 8846 }, { "epoch": 1.5781821425385782, "grad_norm": 0.5143377184867859, "learning_rate": 5.284404179171312e-05, "loss": 0.5843, "step": 8847 }, { "epoch": 1.5783605387565784, "grad_norm": 0.48670274019241333, "learning_rate": 5.2800969922388474e-05, "loss": 0.4592, "step": 8848 }, { "epoch": 1.5785389349745786, "grad_norm": 0.45652881264686584, "learning_rate": 5.2757913540990715e-05, "loss": 0.42, "step": 8849 }, { "epoch": 1.5787173311925788, "grad_norm": 0.5632809996604919, "learning_rate": 5.271487265090164e-05, "loss": 0.7437, "step": 8850 }, { "epoch": 1.578895727410579, "grad_norm": 0.5193389654159546, "learning_rate": 5.267184725550148e-05, "loss": 0.6235, "step": 8851 }, { "epoch": 1.5790741236285792, "grad_norm": 0.45982155203819275, "learning_rate": 5.262883735816959e-05, "loss": 0.4623, "step": 8852 }, { "epoch": 1.5792525198465792, "grad_norm": 0.5154650211334229, "learning_rate": 5.25858429622838e-05, "loss": 0.537, "step": 8853 }, { "epoch": 1.5794309160645794, "grad_norm": 0.46093055605888367, "learning_rate": 5.254286407122103e-05, "loss": 0.4579, "step": 8854 }, { "epoch": 1.5796093122825796, "grad_norm": 0.4196965992450714, "learning_rate": 5.249990068835675e-05, "loss": 0.3861, "step": 8855 }, { "epoch": 1.5797877085005798, "grad_norm": 0.5842517614364624, "learning_rate": 5.24569528170652e-05, "loss": 0.68, "step": 8856 }, { "epoch": 1.57996610471858, "grad_norm": 0.5660757422447205, "learning_rate": 5.2414020460719636e-05, "loss": 0.5804, "step": 8857 }, { "epoch": 1.58014450093658, "grad_norm": 0.49591630697250366, "learning_rate": 5.237110362269182e-05, "loss": 0.5518, "step": 8858 }, { "epoch": 1.5803228971545802, "grad_norm": 0.5173941850662231, "learning_rate": 5.232820230635255e-05, "loss": 0.4721, "step": 8859 }, { "epoch": 1.5805012933725804, "grad_norm": 0.538420557975769, "learning_rate": 5.228531651507112e-05, "loss": 0.5329, "step": 8860 }, { "epoch": 1.5806796895905806, "grad_norm": 0.5405678749084473, "learning_rate": 5.2242446252215856e-05, "loss": 0.652, "step": 8861 }, { "epoch": 1.5808580858085808, "grad_norm": 0.5772570967674255, "learning_rate": 5.219959152115381e-05, "loss": 0.459, "step": 8862 }, { "epoch": 1.581036482026581, "grad_norm": 0.5803507566452026, "learning_rate": 5.215675232525069e-05, "loss": 0.6247, "step": 8863 }, { "epoch": 1.5812148782445812, "grad_norm": 0.49514850974082947, "learning_rate": 5.211392866787115e-05, "loss": 0.4549, "step": 8864 }, { "epoch": 1.5813932744625814, "grad_norm": 0.5379515290260315, "learning_rate": 5.207112055237842e-05, "loss": 0.6696, "step": 8865 }, { "epoch": 1.5815716706805816, "grad_norm": 0.5178011059761047, "learning_rate": 5.20283279821348e-05, "loss": 0.5623, "step": 8866 }, { "epoch": 1.5817500668985818, "grad_norm": 0.4703742563724518, "learning_rate": 5.198555096050103e-05, "loss": 0.5029, "step": 8867 }, { "epoch": 1.581928463116582, "grad_norm": 0.5060920119285583, "learning_rate": 5.194278949083695e-05, "loss": 0.5898, "step": 8868 }, { "epoch": 1.5821068593345822, "grad_norm": 0.4721207618713379, "learning_rate": 5.1900043576500936e-05, "loss": 0.3526, "step": 8869 }, { "epoch": 1.5822852555525824, "grad_norm": 0.46023473143577576, "learning_rate": 5.185731322085019e-05, "loss": 0.5206, "step": 8870 }, { "epoch": 1.5824636517705826, "grad_norm": 0.46701812744140625, "learning_rate": 5.181459842724087e-05, "loss": 0.4646, "step": 8871 }, { "epoch": 1.5826420479885828, "grad_norm": 0.5196385383605957, "learning_rate": 5.177189919902761e-05, "loss": 0.4874, "step": 8872 }, { "epoch": 1.582820444206583, "grad_norm": 0.5126696228981018, "learning_rate": 5.172921553956417e-05, "loss": 0.525, "step": 8873 }, { "epoch": 1.5829988404245832, "grad_norm": 0.5253236889839172, "learning_rate": 5.168654745220275e-05, "loss": 0.5603, "step": 8874 }, { "epoch": 1.5831772366425831, "grad_norm": 0.5543179512023926, "learning_rate": 5.164389494029456e-05, "loss": 0.6045, "step": 8875 }, { "epoch": 1.5833556328605833, "grad_norm": 0.4786223769187927, "learning_rate": 5.160125800718956e-05, "loss": 0.4731, "step": 8876 }, { "epoch": 1.5835340290785835, "grad_norm": 0.5591602921485901, "learning_rate": 5.1558636656236306e-05, "loss": 0.6183, "step": 8877 }, { "epoch": 1.5837124252965837, "grad_norm": 0.6518511772155762, "learning_rate": 5.151603089078241e-05, "loss": 0.5388, "step": 8878 }, { "epoch": 1.583890821514584, "grad_norm": 0.5040208101272583, "learning_rate": 5.147344071417398e-05, "loss": 0.4481, "step": 8879 }, { "epoch": 1.584069217732584, "grad_norm": 0.533556342124939, "learning_rate": 5.143086612975611e-05, "loss": 0.5437, "step": 8880 }, { "epoch": 1.584247613950584, "grad_norm": 0.4733051657676697, "learning_rate": 5.1388307140872614e-05, "loss": 0.51, "step": 8881 }, { "epoch": 1.5844260101685843, "grad_norm": 0.5339987277984619, "learning_rate": 5.134576375086591e-05, "loss": 0.5314, "step": 8882 }, { "epoch": 1.5846044063865845, "grad_norm": 0.477483332157135, "learning_rate": 5.130323596307751e-05, "loss": 0.3984, "step": 8883 }, { "epoch": 1.5847828026045847, "grad_norm": 0.509962260723114, "learning_rate": 5.126072378084737e-05, "loss": 0.461, "step": 8884 }, { "epoch": 1.584961198822585, "grad_norm": 0.5546766519546509, "learning_rate": 5.121822720751454e-05, "loss": 0.6239, "step": 8885 }, { "epoch": 1.585139595040585, "grad_norm": 0.5097823739051819, "learning_rate": 5.117574624641652e-05, "loss": 0.4881, "step": 8886 }, { "epoch": 1.5853179912585853, "grad_norm": 0.43369320034980774, "learning_rate": 5.1133280900889924e-05, "loss": 0.3753, "step": 8887 }, { "epoch": 1.5854963874765855, "grad_norm": 0.523571252822876, "learning_rate": 5.109083117426977e-05, "loss": 0.6094, "step": 8888 }, { "epoch": 1.5856747836945857, "grad_norm": 0.5631095170974731, "learning_rate": 5.104839706989023e-05, "loss": 0.5408, "step": 8889 }, { "epoch": 1.585853179912586, "grad_norm": 0.4018095135688782, "learning_rate": 5.100597859108388e-05, "loss": 0.3599, "step": 8890 }, { "epoch": 1.586031576130586, "grad_norm": 0.5422604084014893, "learning_rate": 5.096357574118235e-05, "loss": 0.4818, "step": 8891 }, { "epoch": 1.5862099723485863, "grad_norm": 0.4900394082069397, "learning_rate": 5.092118852351599e-05, "loss": 0.4453, "step": 8892 }, { "epoch": 1.5863883685665865, "grad_norm": 0.5061340928077698, "learning_rate": 5.0878816941413744e-05, "loss": 0.5332, "step": 8893 }, { "epoch": 1.5865667647845867, "grad_norm": 0.4389035999774933, "learning_rate": 5.0836460998203606e-05, "loss": 0.361, "step": 8894 }, { "epoch": 1.586745161002587, "grad_norm": 0.533852756023407, "learning_rate": 5.0794120697212094e-05, "loss": 0.5116, "step": 8895 }, { "epoch": 1.586923557220587, "grad_norm": 0.5132311582565308, "learning_rate": 5.075179604176458e-05, "loss": 0.5874, "step": 8896 }, { "epoch": 1.587101953438587, "grad_norm": 0.49478766322135925, "learning_rate": 5.07094870351853e-05, "loss": 0.4595, "step": 8897 }, { "epoch": 1.5872803496565873, "grad_norm": 0.5392327904701233, "learning_rate": 5.066719368079708e-05, "loss": 0.7202, "step": 8898 }, { "epoch": 1.5874587458745875, "grad_norm": 0.5095738768577576, "learning_rate": 5.062491598192179e-05, "loss": 0.5124, "step": 8899 }, { "epoch": 1.5876371420925877, "grad_norm": 0.5276995301246643, "learning_rate": 5.058265394187969e-05, "loss": 0.6014, "step": 8900 }, { "epoch": 1.5878155383105879, "grad_norm": 0.5482483506202698, "learning_rate": 5.054040756399023e-05, "loss": 0.7651, "step": 8901 }, { "epoch": 1.5879939345285878, "grad_norm": 0.46668532490730286, "learning_rate": 5.049817685157132e-05, "loss": 0.4828, "step": 8902 }, { "epoch": 1.588172330746588, "grad_norm": 0.6453546285629272, "learning_rate": 5.045596180793968e-05, "loss": 0.7255, "step": 8903 }, { "epoch": 1.5883507269645882, "grad_norm": 0.47187116742134094, "learning_rate": 5.041376243641099e-05, "loss": 0.4884, "step": 8904 }, { "epoch": 1.5885291231825884, "grad_norm": 0.4838643968105316, "learning_rate": 5.037157874029946e-05, "loss": 0.5405, "step": 8905 }, { "epoch": 1.5887075194005886, "grad_norm": 0.5167852640151978, "learning_rate": 5.032941072291822e-05, "loss": 0.6482, "step": 8906 }, { "epoch": 1.5888859156185888, "grad_norm": 0.44085460901260376, "learning_rate": 5.028725838757919e-05, "loss": 0.4037, "step": 8907 }, { "epoch": 1.589064311836589, "grad_norm": 0.5068758726119995, "learning_rate": 5.024512173759288e-05, "loss": 0.5228, "step": 8908 }, { "epoch": 1.5892427080545892, "grad_norm": 0.4763888418674469, "learning_rate": 5.020300077626883e-05, "loss": 0.4412, "step": 8909 }, { "epoch": 1.5894211042725894, "grad_norm": 0.5120984315872192, "learning_rate": 5.016089550691505e-05, "loss": 0.5497, "step": 8910 }, { "epoch": 1.5895995004905896, "grad_norm": 0.5709746479988098, "learning_rate": 5.0118805932838604e-05, "loss": 0.5747, "step": 8911 }, { "epoch": 1.5897778967085898, "grad_norm": 0.5391095280647278, "learning_rate": 5.0076732057345034e-05, "loss": 0.547, "step": 8912 }, { "epoch": 1.58995629292659, "grad_norm": 0.5848898887634277, "learning_rate": 5.0034673883738974e-05, "loss": 0.5088, "step": 8913 }, { "epoch": 1.5901346891445902, "grad_norm": 0.4863363206386566, "learning_rate": 4.999263141532359e-05, "loss": 0.4024, "step": 8914 }, { "epoch": 1.5903130853625904, "grad_norm": 0.5417057871818542, "learning_rate": 4.99506046554008e-05, "loss": 0.5766, "step": 8915 }, { "epoch": 1.5904914815805906, "grad_norm": 0.5208100080490112, "learning_rate": 4.990859360727148e-05, "loss": 0.617, "step": 8916 }, { "epoch": 1.5906698777985908, "grad_norm": 0.5628762245178223, "learning_rate": 4.9866598274235064e-05, "loss": 0.6443, "step": 8917 }, { "epoch": 1.590848274016591, "grad_norm": 0.5478858351707458, "learning_rate": 4.9824618659589957e-05, "loss": 0.7009, "step": 8918 }, { "epoch": 1.591026670234591, "grad_norm": 0.5053479075431824, "learning_rate": 4.9782654766633076e-05, "loss": 0.4551, "step": 8919 }, { "epoch": 1.5912050664525912, "grad_norm": 0.46800491213798523, "learning_rate": 4.9740706598660396e-05, "loss": 0.4591, "step": 8920 }, { "epoch": 1.5913834626705914, "grad_norm": 0.47018712759017944, "learning_rate": 4.9698774158966395e-05, "loss": 0.4372, "step": 8921 }, { "epoch": 1.5915618588885916, "grad_norm": 0.4378306567668915, "learning_rate": 4.965685745084447e-05, "loss": 0.4064, "step": 8922 }, { "epoch": 1.5917402551065918, "grad_norm": 0.4751421809196472, "learning_rate": 4.961495647758679e-05, "loss": 0.5386, "step": 8923 }, { "epoch": 1.5919186513245918, "grad_norm": 0.4474140703678131, "learning_rate": 4.957307124248417e-05, "loss": 0.5067, "step": 8924 }, { "epoch": 1.592097047542592, "grad_norm": 0.5166023969650269, "learning_rate": 4.9531201748826335e-05, "loss": 0.5903, "step": 8925 }, { "epoch": 1.5922754437605922, "grad_norm": 0.5033789277076721, "learning_rate": 4.9489347999901567e-05, "loss": 0.5299, "step": 8926 }, { "epoch": 1.5924538399785924, "grad_norm": 0.4894052743911743, "learning_rate": 4.944750999899719e-05, "loss": 0.4912, "step": 8927 }, { "epoch": 1.5926322361965926, "grad_norm": 0.484478235244751, "learning_rate": 4.9405687749399076e-05, "loss": 0.5635, "step": 8928 }, { "epoch": 1.5928106324145928, "grad_norm": 0.47258055210113525, "learning_rate": 4.936388125439184e-05, "loss": 0.4479, "step": 8929 }, { "epoch": 1.592989028632593, "grad_norm": 0.4957214593887329, "learning_rate": 4.932209051725914e-05, "loss": 0.554, "step": 8930 }, { "epoch": 1.5931674248505932, "grad_norm": 0.5196431279182434, "learning_rate": 4.9280315541282985e-05, "loss": 0.5595, "step": 8931 }, { "epoch": 1.5933458210685933, "grad_norm": 0.4598284065723419, "learning_rate": 4.923855632974455e-05, "loss": 0.421, "step": 8932 }, { "epoch": 1.5935242172865935, "grad_norm": 0.42510855197906494, "learning_rate": 4.919681288592345e-05, "loss": 0.3646, "step": 8933 }, { "epoch": 1.5937026135045937, "grad_norm": 0.5351304411888123, "learning_rate": 4.91550852130983e-05, "loss": 0.5159, "step": 8934 }, { "epoch": 1.593881009722594, "grad_norm": 0.499664306640625, "learning_rate": 4.911337331454635e-05, "loss": 0.4688, "step": 8935 }, { "epoch": 1.5940594059405941, "grad_norm": 0.47697949409484863, "learning_rate": 4.907167719354347e-05, "loss": 0.4619, "step": 8936 }, { "epoch": 1.5942378021585943, "grad_norm": 0.48516783118247986, "learning_rate": 4.9029996853364734e-05, "loss": 0.5222, "step": 8937 }, { "epoch": 1.5944161983765945, "grad_norm": 0.4567139446735382, "learning_rate": 4.898833229728347e-05, "loss": 0.4139, "step": 8938 }, { "epoch": 1.5945945945945947, "grad_norm": 0.5564625859260559, "learning_rate": 4.894668352857218e-05, "loss": 0.5797, "step": 8939 }, { "epoch": 1.594772990812595, "grad_norm": 0.5010836124420166, "learning_rate": 4.890505055050182e-05, "loss": 0.5232, "step": 8940 }, { "epoch": 1.594951387030595, "grad_norm": 0.5170525908470154, "learning_rate": 4.886343336634222e-05, "loss": 0.5822, "step": 8941 }, { "epoch": 1.5951297832485951, "grad_norm": 0.5084660649299622, "learning_rate": 4.8821831979362044e-05, "loss": 0.5954, "step": 8942 }, { "epoch": 1.5953081794665953, "grad_norm": 0.49296459555625916, "learning_rate": 4.878024639282855e-05, "loss": 0.5069, "step": 8943 }, { "epoch": 1.5954865756845955, "grad_norm": 0.515155017375946, "learning_rate": 4.8738676610008e-05, "loss": 0.5101, "step": 8944 }, { "epoch": 1.5956649719025957, "grad_norm": 0.3873237073421478, "learning_rate": 4.869712263416509e-05, "loss": 0.2959, "step": 8945 }, { "epoch": 1.5958433681205957, "grad_norm": 0.4798237681388855, "learning_rate": 4.865558446856361e-05, "loss": 0.5004, "step": 8946 }, { "epoch": 1.5960217643385959, "grad_norm": 0.5282852053642273, "learning_rate": 4.8614062116465826e-05, "loss": 0.5694, "step": 8947 }, { "epoch": 1.596200160556596, "grad_norm": 0.43830955028533936, "learning_rate": 4.8572555581132995e-05, "loss": 0.4124, "step": 8948 }, { "epoch": 1.5963785567745963, "grad_norm": 0.5239266753196716, "learning_rate": 4.853106486582498e-05, "loss": 0.6721, "step": 8949 }, { "epoch": 1.5965569529925965, "grad_norm": 0.46557673811912537, "learning_rate": 4.8489589973800346e-05, "loss": 0.4868, "step": 8950 }, { "epoch": 1.5967353492105967, "grad_norm": 0.5202271342277527, "learning_rate": 4.844813090831668e-05, "loss": 0.734, "step": 8951 }, { "epoch": 1.5969137454285969, "grad_norm": 0.4756893515586853, "learning_rate": 4.840668767262993e-05, "loss": 0.5069, "step": 8952 }, { "epoch": 1.597092141646597, "grad_norm": 0.40755748748779297, "learning_rate": 4.836526026999532e-05, "loss": 0.3434, "step": 8953 }, { "epoch": 1.5972705378645973, "grad_norm": 0.5153047442436218, "learning_rate": 4.8323848703666405e-05, "loss": 0.4607, "step": 8954 }, { "epoch": 1.5974489340825975, "grad_norm": 0.5465795397758484, "learning_rate": 4.8282452976895566e-05, "loss": 0.5989, "step": 8955 }, { "epoch": 1.5976273303005977, "grad_norm": 0.47764524817466736, "learning_rate": 4.8241073092934104e-05, "loss": 0.4355, "step": 8956 }, { "epoch": 1.5978057265185979, "grad_norm": 0.5183848142623901, "learning_rate": 4.8199709055031876e-05, "loss": 0.4979, "step": 8957 }, { "epoch": 1.597984122736598, "grad_norm": 0.55250483751297, "learning_rate": 4.815836086643774e-05, "loss": 0.5273, "step": 8958 }, { "epoch": 1.5981625189545983, "grad_norm": 0.4400150775909424, "learning_rate": 4.811702853039901e-05, "loss": 0.4164, "step": 8959 }, { "epoch": 1.5983409151725985, "grad_norm": 0.5529983639717102, "learning_rate": 4.807571205016206e-05, "loss": 0.6202, "step": 8960 }, { "epoch": 1.5985193113905987, "grad_norm": 0.5263171792030334, "learning_rate": 4.803441142897178e-05, "loss": 0.5444, "step": 8961 }, { "epoch": 1.5986977076085989, "grad_norm": 0.4600481688976288, "learning_rate": 4.799312667007183e-05, "loss": 0.4249, "step": 8962 }, { "epoch": 1.5988761038265988, "grad_norm": 0.5363233089447021, "learning_rate": 4.795185777670485e-05, "loss": 0.4851, "step": 8963 }, { "epoch": 1.599054500044599, "grad_norm": 0.478712797164917, "learning_rate": 4.791060475211198e-05, "loss": 0.3852, "step": 8964 }, { "epoch": 1.5992328962625992, "grad_norm": 0.5279228687286377, "learning_rate": 4.7869367599533284e-05, "loss": 0.6256, "step": 8965 }, { "epoch": 1.5994112924805994, "grad_norm": 0.4477089047431946, "learning_rate": 4.782814632220742e-05, "loss": 0.4195, "step": 8966 }, { "epoch": 1.5995896886985996, "grad_norm": 0.5009279847145081, "learning_rate": 4.778694092337194e-05, "loss": 0.5, "step": 8967 }, { "epoch": 1.5997680849165996, "grad_norm": 0.47871533036231995, "learning_rate": 4.7745751406263163e-05, "loss": 0.5258, "step": 8968 }, { "epoch": 1.5999464811345998, "grad_norm": 0.47330227494239807, "learning_rate": 4.770457777411597e-05, "loss": 0.4042, "step": 8969 }, { "epoch": 1.6001248773526, "grad_norm": 0.4700087308883667, "learning_rate": 4.766342003016424e-05, "loss": 0.462, "step": 8970 }, { "epoch": 1.6003032735706002, "grad_norm": 0.5046184659004211, "learning_rate": 4.7622278177640366e-05, "loss": 0.4632, "step": 8971 }, { "epoch": 1.6004816697886004, "grad_norm": 0.5220001935958862, "learning_rate": 4.758115221977574e-05, "loss": 0.531, "step": 8972 }, { "epoch": 1.6006600660066006, "grad_norm": 0.5016428232192993, "learning_rate": 4.7540042159800264e-05, "loss": 0.5008, "step": 8973 }, { "epoch": 1.6008384622246008, "grad_norm": 0.4888211786746979, "learning_rate": 4.7498948000942814e-05, "loss": 0.4499, "step": 8974 }, { "epoch": 1.601016858442601, "grad_norm": 0.5536308288574219, "learning_rate": 4.745786974643082e-05, "loss": 0.5715, "step": 8975 }, { "epoch": 1.6011952546606012, "grad_norm": 0.48591530323028564, "learning_rate": 4.741680739949053e-05, "loss": 0.4089, "step": 8976 }, { "epoch": 1.6013736508786014, "grad_norm": 0.6232868432998657, "learning_rate": 4.7375760963347056e-05, "loss": 0.5869, "step": 8977 }, { "epoch": 1.6015520470966016, "grad_norm": 0.5616008043289185, "learning_rate": 4.733473044122408e-05, "loss": 0.5639, "step": 8978 }, { "epoch": 1.6017304433146018, "grad_norm": 0.5132565498352051, "learning_rate": 4.72937158363442e-05, "loss": 0.53, "step": 8979 }, { "epoch": 1.601908839532602, "grad_norm": 0.5778293609619141, "learning_rate": 4.725271715192861e-05, "loss": 0.7178, "step": 8980 }, { "epoch": 1.6020872357506022, "grad_norm": 0.5367584228515625, "learning_rate": 4.721173439119742e-05, "loss": 0.5345, "step": 8981 }, { "epoch": 1.6022656319686024, "grad_norm": 0.4915335476398468, "learning_rate": 4.7170767557369264e-05, "loss": 0.5284, "step": 8982 }, { "epoch": 1.6024440281866026, "grad_norm": 0.524190366268158, "learning_rate": 4.712981665366176e-05, "loss": 0.5688, "step": 8983 }, { "epoch": 1.6026224244046028, "grad_norm": 0.5471283793449402, "learning_rate": 4.70888816832912e-05, "loss": 0.5799, "step": 8984 }, { "epoch": 1.6028008206226028, "grad_norm": 0.5151530504226685, "learning_rate": 4.7047962649472504e-05, "loss": 0.5424, "step": 8985 }, { "epoch": 1.602979216840603, "grad_norm": 0.5410858988761902, "learning_rate": 4.7007059555419535e-05, "loss": 0.624, "step": 8986 }, { "epoch": 1.6031576130586032, "grad_norm": 0.4905013144016266, "learning_rate": 4.696617240434475e-05, "loss": 0.4588, "step": 8987 }, { "epoch": 1.6033360092766034, "grad_norm": 0.4888882637023926, "learning_rate": 4.692530119945937e-05, "loss": 0.4577, "step": 8988 }, { "epoch": 1.6035144054946036, "grad_norm": 0.4958007335662842, "learning_rate": 4.688444594397351e-05, "loss": 0.519, "step": 8989 }, { "epoch": 1.6036928017126035, "grad_norm": 0.5347129106521606, "learning_rate": 4.684360664109577e-05, "loss": 0.687, "step": 8990 }, { "epoch": 1.6038711979306037, "grad_norm": 0.5105026364326477, "learning_rate": 4.680278329403381e-05, "loss": 0.5317, "step": 8991 }, { "epoch": 1.604049594148604, "grad_norm": 0.4922098219394684, "learning_rate": 4.676197590599377e-05, "loss": 0.482, "step": 8992 }, { "epoch": 1.6042279903666041, "grad_norm": 0.44692763686180115, "learning_rate": 4.672118448018073e-05, "loss": 0.4144, "step": 8993 }, { "epoch": 1.6044063865846043, "grad_norm": 0.5374802947044373, "learning_rate": 4.6680409019798364e-05, "loss": 0.6081, "step": 8994 }, { "epoch": 1.6045847828026045, "grad_norm": 0.54610675573349, "learning_rate": 4.6639649528049135e-05, "loss": 0.6023, "step": 8995 }, { "epoch": 1.6047631790206047, "grad_norm": 0.4369504451751709, "learning_rate": 4.659890600813438e-05, "loss": 0.357, "step": 8996 }, { "epoch": 1.604941575238605, "grad_norm": 0.47287872433662415, "learning_rate": 4.6558178463253944e-05, "loss": 0.4559, "step": 8997 }, { "epoch": 1.6051199714566051, "grad_norm": 0.4916176497936249, "learning_rate": 4.651746689660663e-05, "loss": 0.5313, "step": 8998 }, { "epoch": 1.6052983676746053, "grad_norm": 0.5396872758865356, "learning_rate": 4.647677131138997e-05, "loss": 0.5313, "step": 8999 }, { "epoch": 1.6054767638926055, "grad_norm": 0.5110781788825989, "learning_rate": 4.643609171080001e-05, "loss": 0.5488, "step": 9000 }, { "epoch": 1.6056551601106057, "grad_norm": 0.5322538018226624, "learning_rate": 4.63954280980319e-05, "loss": 0.5717, "step": 9001 }, { "epoch": 1.605833556328606, "grad_norm": 0.49910300970077515, "learning_rate": 4.63547804762792e-05, "loss": 0.5186, "step": 9002 }, { "epoch": 1.6060119525466061, "grad_norm": 0.48461076617240906, "learning_rate": 4.631414884873444e-05, "loss": 0.5344, "step": 9003 }, { "epoch": 1.6061903487646063, "grad_norm": 0.626358151435852, "learning_rate": 4.627353321858874e-05, "loss": 0.7774, "step": 9004 }, { "epoch": 1.6063687449826065, "grad_norm": 0.6041339039802551, "learning_rate": 4.6232933589032105e-05, "loss": 0.6707, "step": 9005 }, { "epoch": 1.6065471412006067, "grad_norm": 0.4728602468967438, "learning_rate": 4.619234996325314e-05, "loss": 0.4132, "step": 9006 }, { "epoch": 1.6067255374186067, "grad_norm": 0.5530493259429932, "learning_rate": 4.6151782344439366e-05, "loss": 0.5413, "step": 9007 }, { "epoch": 1.6069039336366069, "grad_norm": 0.5874399542808533, "learning_rate": 4.611123073577686e-05, "loss": 0.7141, "step": 9008 }, { "epoch": 1.607082329854607, "grad_norm": 0.5239754319190979, "learning_rate": 4.607069514045051e-05, "loss": 0.6276, "step": 9009 }, { "epoch": 1.6072607260726073, "grad_norm": 0.5062006115913391, "learning_rate": 4.603017556164407e-05, "loss": 0.4942, "step": 9010 }, { "epoch": 1.6074391222906075, "grad_norm": 0.49783483147621155, "learning_rate": 4.5989672002539785e-05, "loss": 0.4906, "step": 9011 }, { "epoch": 1.6076175185086075, "grad_norm": 0.5406367182731628, "learning_rate": 4.594918446631896e-05, "loss": 0.6172, "step": 9012 }, { "epoch": 1.6077959147266077, "grad_norm": 0.48416727781295776, "learning_rate": 4.590871295616128e-05, "loss": 0.5055, "step": 9013 }, { "epoch": 1.6079743109446079, "grad_norm": 0.5911693572998047, "learning_rate": 4.586825747524548e-05, "loss": 0.7576, "step": 9014 }, { "epoch": 1.608152707162608, "grad_norm": 0.5087306499481201, "learning_rate": 4.582781802674896e-05, "loss": 0.4797, "step": 9015 }, { "epoch": 1.6083311033806083, "grad_norm": 0.4570036232471466, "learning_rate": 4.578739461384765e-05, "loss": 0.5285, "step": 9016 }, { "epoch": 1.6085094995986084, "grad_norm": 0.46012383699417114, "learning_rate": 4.57469872397166e-05, "loss": 0.4309, "step": 9017 }, { "epoch": 1.6086878958166086, "grad_norm": 0.4837871789932251, "learning_rate": 4.570659590752918e-05, "loss": 0.5151, "step": 9018 }, { "epoch": 1.6088662920346088, "grad_norm": 0.5085347890853882, "learning_rate": 4.566622062045786e-05, "loss": 0.6172, "step": 9019 }, { "epoch": 1.609044688252609, "grad_norm": 0.4480245113372803, "learning_rate": 4.562586138167368e-05, "loss": 0.3621, "step": 9020 }, { "epoch": 1.6092230844706092, "grad_norm": 0.5031635761260986, "learning_rate": 4.558551819434631e-05, "loss": 0.5342, "step": 9021 }, { "epoch": 1.6094014806886094, "grad_norm": 0.5782181024551392, "learning_rate": 4.554519106164442e-05, "loss": 0.551, "step": 9022 }, { "epoch": 1.6095798769066096, "grad_norm": 0.5325855612754822, "learning_rate": 4.550487998673519e-05, "loss": 0.5286, "step": 9023 }, { "epoch": 1.6097582731246098, "grad_norm": 0.4554942548274994, "learning_rate": 4.5464584972784774e-05, "loss": 0.453, "step": 9024 }, { "epoch": 1.60993666934261, "grad_norm": 0.5228267312049866, "learning_rate": 4.5424306022957745e-05, "loss": 0.6083, "step": 9025 }, { "epoch": 1.6101150655606102, "grad_norm": 0.5161296725273132, "learning_rate": 4.538404314041775e-05, "loss": 0.5022, "step": 9026 }, { "epoch": 1.6102934617786104, "grad_norm": 0.5143990516662598, "learning_rate": 4.534379632832691e-05, "loss": 0.4937, "step": 9027 }, { "epoch": 1.6104718579966106, "grad_norm": 0.483243852853775, "learning_rate": 4.530356558984622e-05, "loss": 0.4821, "step": 9028 }, { "epoch": 1.6106502542146106, "grad_norm": 0.5501946210861206, "learning_rate": 4.5263350928135465e-05, "loss": 0.6569, "step": 9029 }, { "epoch": 1.6108286504326108, "grad_norm": 0.5732588171958923, "learning_rate": 4.5223152346352964e-05, "loss": 0.7411, "step": 9030 }, { "epoch": 1.611007046650611, "grad_norm": 0.4644021987915039, "learning_rate": 4.518296984765599e-05, "loss": 0.506, "step": 9031 }, { "epoch": 1.6111854428686112, "grad_norm": 0.5346475839614868, "learning_rate": 4.514280343520041e-05, "loss": 0.5926, "step": 9032 }, { "epoch": 1.6113638390866114, "grad_norm": 0.4996803104877472, "learning_rate": 4.510265311214093e-05, "loss": 0.4819, "step": 9033 }, { "epoch": 1.6115422353046114, "grad_norm": 0.5373480319976807, "learning_rate": 4.50625188816309e-05, "loss": 0.5144, "step": 9034 }, { "epoch": 1.6117206315226116, "grad_norm": 0.5674717426300049, "learning_rate": 4.5022400746822374e-05, "loss": 0.5336, "step": 9035 }, { "epoch": 1.6118990277406118, "grad_norm": 0.531956672668457, "learning_rate": 4.498229871086637e-05, "loss": 0.4986, "step": 9036 }, { "epoch": 1.612077423958612, "grad_norm": 0.5014679431915283, "learning_rate": 4.4942212776912325e-05, "loss": 0.5051, "step": 9037 }, { "epoch": 1.6122558201766122, "grad_norm": 0.5485700368881226, "learning_rate": 4.4902142948108684e-05, "loss": 0.502, "step": 9038 }, { "epoch": 1.6124342163946124, "grad_norm": 0.5426976084709167, "learning_rate": 4.486208922760243e-05, "loss": 0.5571, "step": 9039 }, { "epoch": 1.6126126126126126, "grad_norm": 0.49919578433036804, "learning_rate": 4.48220516185395e-05, "loss": 0.4595, "step": 9040 }, { "epoch": 1.6127910088306128, "grad_norm": 0.43158578872680664, "learning_rate": 4.4782030124064314e-05, "loss": 0.3117, "step": 9041 }, { "epoch": 1.612969405048613, "grad_norm": 0.5657439231872559, "learning_rate": 4.474202474732011e-05, "loss": 0.5668, "step": 9042 }, { "epoch": 1.6131478012666132, "grad_norm": 0.5453231334686279, "learning_rate": 4.470203549144902e-05, "loss": 0.6741, "step": 9043 }, { "epoch": 1.6133261974846134, "grad_norm": 0.5499635338783264, "learning_rate": 4.4662062359591585e-05, "loss": 0.719, "step": 9044 }, { "epoch": 1.6135045937026136, "grad_norm": 0.43844422698020935, "learning_rate": 4.4622105354887534e-05, "loss": 0.3209, "step": 9045 }, { "epoch": 1.6136829899206138, "grad_norm": 0.5634275078773499, "learning_rate": 4.458216448047494e-05, "loss": 0.5132, "step": 9046 }, { "epoch": 1.613861386138614, "grad_norm": 0.4690277874469757, "learning_rate": 4.4542239739490705e-05, "loss": 0.3584, "step": 9047 }, { "epoch": 1.6140397823566142, "grad_norm": 0.4696029722690582, "learning_rate": 4.45023311350706e-05, "loss": 0.4606, "step": 9048 }, { "epoch": 1.6142181785746144, "grad_norm": 0.4961758553981781, "learning_rate": 4.446243867034891e-05, "loss": 0.5177, "step": 9049 }, { "epoch": 1.6143965747926146, "grad_norm": 0.5290566682815552, "learning_rate": 4.44225623484589e-05, "loss": 0.5366, "step": 9050 }, { "epoch": 1.6145749710106145, "grad_norm": 0.5504355430603027, "learning_rate": 4.438270217253232e-05, "loss": 0.6723, "step": 9051 }, { "epoch": 1.6147533672286147, "grad_norm": 0.49992886185646057, "learning_rate": 4.434285814569988e-05, "loss": 0.5295, "step": 9052 }, { "epoch": 1.614931763446615, "grad_norm": 0.5225392580032349, "learning_rate": 4.430303027109081e-05, "loss": 0.575, "step": 9053 }, { "epoch": 1.6151101596646151, "grad_norm": 0.5753360390663147, "learning_rate": 4.4263218551833294e-05, "loss": 0.6629, "step": 9054 }, { "epoch": 1.6152885558826153, "grad_norm": 0.4710274040699005, "learning_rate": 4.422342299105403e-05, "loss": 0.5181, "step": 9055 }, { "epoch": 1.6154669521006153, "grad_norm": 0.5554249286651611, "learning_rate": 4.4183643591878515e-05, "loss": 0.6106, "step": 9056 }, { "epoch": 1.6156453483186155, "grad_norm": 0.5206986665725708, "learning_rate": 4.414388035743114e-05, "loss": 0.5476, "step": 9057 }, { "epoch": 1.6158237445366157, "grad_norm": 0.5251834392547607, "learning_rate": 4.410413329083473e-05, "loss": 0.6463, "step": 9058 }, { "epoch": 1.616002140754616, "grad_norm": 0.49586114287376404, "learning_rate": 4.4064402395211116e-05, "loss": 0.427, "step": 9059 }, { "epoch": 1.616180536972616, "grad_norm": 0.4815883934497833, "learning_rate": 4.402468767368076e-05, "loss": 0.4655, "step": 9060 }, { "epoch": 1.6163589331906163, "grad_norm": 0.5105904936790466, "learning_rate": 4.3984989129362744e-05, "loss": 0.4924, "step": 9061 }, { "epoch": 1.6165373294086165, "grad_norm": 0.47116348147392273, "learning_rate": 4.3945306765375086e-05, "loss": 0.4197, "step": 9062 }, { "epoch": 1.6167157256266167, "grad_norm": 0.516294538974762, "learning_rate": 4.390564058483429e-05, "loss": 0.5377, "step": 9063 }, { "epoch": 1.616894121844617, "grad_norm": 0.4575921595096588, "learning_rate": 4.3865990590855885e-05, "loss": 0.5038, "step": 9064 }, { "epoch": 1.617072518062617, "grad_norm": 0.5077314376831055, "learning_rate": 4.3826356786553776e-05, "loss": 0.4385, "step": 9065 }, { "epoch": 1.6172509142806173, "grad_norm": 0.4583306908607483, "learning_rate": 4.378673917504094e-05, "loss": 0.4456, "step": 9066 }, { "epoch": 1.6174293104986175, "grad_norm": 0.5505391955375671, "learning_rate": 4.37471377594289e-05, "loss": 0.5579, "step": 9067 }, { "epoch": 1.6176077067166177, "grad_norm": 0.501794159412384, "learning_rate": 4.3707552542827824e-05, "loss": 0.4491, "step": 9068 }, { "epoch": 1.6177861029346179, "grad_norm": 0.4820462465286255, "learning_rate": 4.366798352834686e-05, "loss": 0.4494, "step": 9069 }, { "epoch": 1.617964499152618, "grad_norm": 0.6349626183509827, "learning_rate": 4.3628430719093614e-05, "loss": 0.6564, "step": 9070 }, { "epoch": 1.6181428953706183, "grad_norm": 0.577896773815155, "learning_rate": 4.3588894118174685e-05, "loss": 0.6188, "step": 9071 }, { "epoch": 1.6183212915886185, "grad_norm": 0.5295639634132385, "learning_rate": 4.3549373728695105e-05, "loss": 0.5548, "step": 9072 }, { "epoch": 1.6184996878066185, "grad_norm": 0.5311222076416016, "learning_rate": 4.350986955375893e-05, "loss": 0.4302, "step": 9073 }, { "epoch": 1.6186780840246187, "grad_norm": 0.4720657467842102, "learning_rate": 4.3470381596468714e-05, "loss": 0.4666, "step": 9074 }, { "epoch": 1.6188564802426189, "grad_norm": 0.4858959913253784, "learning_rate": 4.3430909859925814e-05, "loss": 0.5341, "step": 9075 }, { "epoch": 1.619034876460619, "grad_norm": 0.5353506207466125, "learning_rate": 4.339145434723044e-05, "loss": 0.5534, "step": 9076 }, { "epoch": 1.6192132726786193, "grad_norm": 0.4330075681209564, "learning_rate": 4.335201506148126e-05, "loss": 0.363, "step": 9077 }, { "epoch": 1.6193916688966192, "grad_norm": 0.5149824023246765, "learning_rate": 4.3312592005775946e-05, "loss": 0.5245, "step": 9078 }, { "epoch": 1.6195700651146194, "grad_norm": 0.49177101254463196, "learning_rate": 4.327318518321074e-05, "loss": 0.5551, "step": 9079 }, { "epoch": 1.6197484613326196, "grad_norm": 0.47272542119026184, "learning_rate": 4.323379459688051e-05, "loss": 0.4035, "step": 9080 }, { "epoch": 1.6199268575506198, "grad_norm": 0.509996235370636, "learning_rate": 4.319442024987916e-05, "loss": 0.5317, "step": 9081 }, { "epoch": 1.62010525376862, "grad_norm": 0.5650163888931274, "learning_rate": 4.315506214529899e-05, "loss": 0.7105, "step": 9082 }, { "epoch": 1.6202836499866202, "grad_norm": 0.5097303986549377, "learning_rate": 4.3115720286231257e-05, "loss": 0.4972, "step": 9083 }, { "epoch": 1.6204620462046204, "grad_norm": 0.5151503086090088, "learning_rate": 4.3076394675765796e-05, "loss": 0.5673, "step": 9084 }, { "epoch": 1.6206404424226206, "grad_norm": 0.5100242495536804, "learning_rate": 4.30370853169913e-05, "loss": 0.4747, "step": 9085 }, { "epoch": 1.6208188386406208, "grad_norm": 0.5815212726593018, "learning_rate": 4.299779221299499e-05, "loss": 0.6796, "step": 9086 }, { "epoch": 1.620997234858621, "grad_norm": 0.5294939875602722, "learning_rate": 4.2958515366863075e-05, "loss": 0.5649, "step": 9087 }, { "epoch": 1.6211756310766212, "grad_norm": 0.45899614691734314, "learning_rate": 4.291925478168024e-05, "loss": 0.4118, "step": 9088 }, { "epoch": 1.6213540272946214, "grad_norm": 0.5030704736709595, "learning_rate": 4.288001046052992e-05, "loss": 0.5717, "step": 9089 }, { "epoch": 1.6215324235126216, "grad_norm": 0.4515380561351776, "learning_rate": 4.284078240649458e-05, "loss": 0.4641, "step": 9090 }, { "epoch": 1.6217108197306218, "grad_norm": 0.5059680938720703, "learning_rate": 4.280157062265497e-05, "loss": 0.4881, "step": 9091 }, { "epoch": 1.621889215948622, "grad_norm": 0.4840905964374542, "learning_rate": 4.2762375112090886e-05, "loss": 0.4164, "step": 9092 }, { "epoch": 1.6220676121666222, "grad_norm": 0.5307683348655701, "learning_rate": 4.2723195877880706e-05, "loss": 0.5169, "step": 9093 }, { "epoch": 1.6222460083846224, "grad_norm": 0.6119624376296997, "learning_rate": 4.268403292310144e-05, "loss": 0.4207, "step": 9094 }, { "epoch": 1.6224244046026224, "grad_norm": 0.4793750047683716, "learning_rate": 4.264488625082907e-05, "loss": 0.4304, "step": 9095 }, { "epoch": 1.6226028008206226, "grad_norm": 0.5105946660041809, "learning_rate": 4.260575586413806e-05, "loss": 0.4994, "step": 9096 }, { "epoch": 1.6227811970386228, "grad_norm": 0.5425340533256531, "learning_rate": 4.256664176610178e-05, "loss": 0.5135, "step": 9097 }, { "epoch": 1.622959593256623, "grad_norm": 0.5028733611106873, "learning_rate": 4.252754395979216e-05, "loss": 0.4335, "step": 9098 }, { "epoch": 1.6231379894746232, "grad_norm": 0.48356130719184875, "learning_rate": 4.2488462448280005e-05, "loss": 0.3819, "step": 9099 }, { "epoch": 1.6233163856926232, "grad_norm": 0.5290387868881226, "learning_rate": 4.244939723463467e-05, "loss": 0.4932, "step": 9100 }, { "epoch": 1.6234947819106234, "grad_norm": 0.4586488604545593, "learning_rate": 4.241034832192434e-05, "loss": 0.3835, "step": 9101 }, { "epoch": 1.6236731781286236, "grad_norm": 0.5234144926071167, "learning_rate": 4.237131571321598e-05, "loss": 0.5544, "step": 9102 }, { "epoch": 1.6238515743466237, "grad_norm": 0.46121421456336975, "learning_rate": 4.233229941157504e-05, "loss": 0.3899, "step": 9103 }, { "epoch": 1.624029970564624, "grad_norm": 0.500857412815094, "learning_rate": 4.229329942006604e-05, "loss": 0.6343, "step": 9104 }, { "epoch": 1.6242083667826241, "grad_norm": 0.47724878787994385, "learning_rate": 4.225431574175184e-05, "loss": 0.4782, "step": 9105 }, { "epoch": 1.6243867630006243, "grad_norm": 0.46206358075141907, "learning_rate": 4.221534837969429e-05, "loss": 0.44, "step": 9106 }, { "epoch": 1.6245651592186245, "grad_norm": 0.5365794897079468, "learning_rate": 4.217639733695391e-05, "loss": 0.5337, "step": 9107 }, { "epoch": 1.6247435554366247, "grad_norm": 0.49346688389778137, "learning_rate": 4.21374626165898e-05, "loss": 0.4771, "step": 9108 }, { "epoch": 1.624921951654625, "grad_norm": 0.5585914254188538, "learning_rate": 4.209854422165998e-05, "loss": 0.5715, "step": 9109 }, { "epoch": 1.6251003478726251, "grad_norm": 0.49180254340171814, "learning_rate": 4.205964215522096e-05, "loss": 0.5383, "step": 9110 }, { "epoch": 1.6252787440906253, "grad_norm": 0.5028634667396545, "learning_rate": 4.202075642032824e-05, "loss": 0.5635, "step": 9111 }, { "epoch": 1.6254571403086255, "grad_norm": 0.583336353302002, "learning_rate": 4.198188702003575e-05, "loss": 0.6608, "step": 9112 }, { "epoch": 1.6256355365266257, "grad_norm": 0.5647164583206177, "learning_rate": 4.194303395739638e-05, "loss": 0.7343, "step": 9113 }, { "epoch": 1.625813932744626, "grad_norm": 0.4452681839466095, "learning_rate": 4.19041972354616e-05, "loss": 0.4388, "step": 9114 }, { "epoch": 1.6259923289626261, "grad_norm": 0.3759744167327881, "learning_rate": 4.186537685728156e-05, "loss": 0.2556, "step": 9115 }, { "epoch": 1.6261707251806263, "grad_norm": 0.6420390605926514, "learning_rate": 4.1826572825905296e-05, "loss": 0.8147, "step": 9116 }, { "epoch": 1.6263491213986263, "grad_norm": 0.4719489812850952, "learning_rate": 4.178778514438036e-05, "loss": 0.501, "step": 9117 }, { "epoch": 1.6265275176166265, "grad_norm": 0.5943500995635986, "learning_rate": 4.174901381575327e-05, "loss": 0.5954, "step": 9118 }, { "epoch": 1.6267059138346267, "grad_norm": 0.45145153999328613, "learning_rate": 4.171025884306892e-05, "loss": 0.3752, "step": 9119 }, { "epoch": 1.626884310052627, "grad_norm": 0.49264243245124817, "learning_rate": 4.1671520229371234e-05, "loss": 0.3958, "step": 9120 }, { "epoch": 1.627062706270627, "grad_norm": 0.5612894296646118, "learning_rate": 4.163279797770275e-05, "loss": 0.6676, "step": 9121 }, { "epoch": 1.627241102488627, "grad_norm": 0.6251209378242493, "learning_rate": 4.1594092091104594e-05, "loss": 0.7134, "step": 9122 }, { "epoch": 1.6274194987066273, "grad_norm": 0.49562308192253113, "learning_rate": 4.155540257261681e-05, "loss": 0.5082, "step": 9123 }, { "epoch": 1.6275978949246275, "grad_norm": 0.6067000031471252, "learning_rate": 4.1516729425277924e-05, "loss": 0.6371, "step": 9124 }, { "epoch": 1.6277762911426277, "grad_norm": 0.46219685673713684, "learning_rate": 4.14780726521255e-05, "loss": 0.4726, "step": 9125 }, { "epoch": 1.6279546873606279, "grad_norm": 0.5683661103248596, "learning_rate": 4.143943225619548e-05, "loss": 0.6561, "step": 9126 }, { "epoch": 1.628133083578628, "grad_norm": 0.5201235413551331, "learning_rate": 4.140080824052264e-05, "loss": 0.6107, "step": 9127 }, { "epoch": 1.6283114797966283, "grad_norm": 0.5444372296333313, "learning_rate": 4.1362200608140635e-05, "loss": 0.6235, "step": 9128 }, { "epoch": 1.6284898760146285, "grad_norm": 0.5139498114585876, "learning_rate": 4.132360936208154e-05, "loss": 0.4917, "step": 9129 }, { "epoch": 1.6286682722326287, "grad_norm": 0.43743810057640076, "learning_rate": 4.1285034505376436e-05, "loss": 0.4096, "step": 9130 }, { "epoch": 1.6288466684506289, "grad_norm": 0.5461446046829224, "learning_rate": 4.124647604105483e-05, "loss": 0.6073, "step": 9131 }, { "epoch": 1.629025064668629, "grad_norm": 0.5800282955169678, "learning_rate": 4.120793397214523e-05, "loss": 0.6349, "step": 9132 }, { "epoch": 1.6292034608866293, "grad_norm": 0.6056262850761414, "learning_rate": 4.1169408301674566e-05, "loss": 0.5903, "step": 9133 }, { "epoch": 1.6293818571046295, "grad_norm": 0.47324472665786743, "learning_rate": 4.113089903266879e-05, "loss": 0.4385, "step": 9134 }, { "epoch": 1.6295602533226297, "grad_norm": 0.5741338133811951, "learning_rate": 4.109240616815227e-05, "loss": 0.6246, "step": 9135 }, { "epoch": 1.6297386495406299, "grad_norm": 0.5018966794013977, "learning_rate": 4.105392971114824e-05, "loss": 0.5335, "step": 9136 }, { "epoch": 1.62991704575863, "grad_norm": 0.5375863909721375, "learning_rate": 4.101546966467873e-05, "loss": 0.6031, "step": 9137 }, { "epoch": 1.6300954419766303, "grad_norm": 0.5858981013298035, "learning_rate": 4.0977026031764286e-05, "loss": 0.4568, "step": 9138 }, { "epoch": 1.6302738381946302, "grad_norm": 0.43643826246261597, "learning_rate": 4.093859881542422e-05, "loss": 0.3848, "step": 9139 }, { "epoch": 1.6304522344126304, "grad_norm": 0.5104368329048157, "learning_rate": 4.09001880186767e-05, "loss": 0.5939, "step": 9140 }, { "epoch": 1.6306306306306306, "grad_norm": 0.5159505605697632, "learning_rate": 4.0861793644538374e-05, "loss": 0.55, "step": 9141 }, { "epoch": 1.6308090268486308, "grad_norm": 0.5293989777565002, "learning_rate": 4.082341569602482e-05, "loss": 0.5429, "step": 9142 }, { "epoch": 1.630987423066631, "grad_norm": 0.42517781257629395, "learning_rate": 4.0785054176150135e-05, "loss": 0.4012, "step": 9143 }, { "epoch": 1.631165819284631, "grad_norm": 0.5382733941078186, "learning_rate": 4.07467090879273e-05, "loss": 0.5455, "step": 9144 }, { "epoch": 1.6313442155026312, "grad_norm": 0.503835141658783, "learning_rate": 4.0708380434367864e-05, "loss": 0.5749, "step": 9145 }, { "epoch": 1.6315226117206314, "grad_norm": 0.44363367557525635, "learning_rate": 4.067006821848218e-05, "loss": 0.4532, "step": 9146 }, { "epoch": 1.6317010079386316, "grad_norm": 0.4841151833534241, "learning_rate": 4.063177244327929e-05, "loss": 0.4812, "step": 9147 }, { "epoch": 1.6318794041566318, "grad_norm": 0.4346253573894501, "learning_rate": 4.059349311176683e-05, "loss": 0.4208, "step": 9148 }, { "epoch": 1.632057800374632, "grad_norm": 0.46320968866348267, "learning_rate": 4.055523022695135e-05, "loss": 0.5005, "step": 9149 }, { "epoch": 1.6322361965926322, "grad_norm": 0.4925728440284729, "learning_rate": 4.051698379183791e-05, "loss": 0.5086, "step": 9150 }, { "epoch": 1.6324145928106324, "grad_norm": 0.4870889484882355, "learning_rate": 4.047875380943039e-05, "loss": 0.4466, "step": 9151 }, { "epoch": 1.6325929890286326, "grad_norm": 0.4800761342048645, "learning_rate": 4.0440540282731476e-05, "loss": 0.4516, "step": 9152 }, { "epoch": 1.6327713852466328, "grad_norm": 0.5413798689842224, "learning_rate": 4.040234321474226e-05, "loss": 0.455, "step": 9153 }, { "epoch": 1.632949781464633, "grad_norm": 0.4921496510505676, "learning_rate": 4.0364162608462904e-05, "loss": 0.4303, "step": 9154 }, { "epoch": 1.6331281776826332, "grad_norm": 0.5225081443786621, "learning_rate": 4.0325998466891914e-05, "loss": 0.4553, "step": 9155 }, { "epoch": 1.6333065739006334, "grad_norm": 0.4533441364765167, "learning_rate": 4.0287850793026825e-05, "loss": 0.4079, "step": 9156 }, { "epoch": 1.6334849701186336, "grad_norm": 0.521223247051239, "learning_rate": 4.024971958986365e-05, "loss": 0.5097, "step": 9157 }, { "epoch": 1.6336633663366338, "grad_norm": 0.4792344272136688, "learning_rate": 4.0211604860397295e-05, "loss": 0.4567, "step": 9158 }, { "epoch": 1.633841762554634, "grad_norm": 0.5261356830596924, "learning_rate": 4.0173506607621227e-05, "loss": 0.5597, "step": 9159 }, { "epoch": 1.6340201587726342, "grad_norm": 0.5096288919448853, "learning_rate": 4.013542483452759e-05, "loss": 0.3727, "step": 9160 }, { "epoch": 1.6341985549906342, "grad_norm": 0.5453589558601379, "learning_rate": 4.0097359544107424e-05, "loss": 0.5976, "step": 9161 }, { "epoch": 1.6343769512086344, "grad_norm": 0.5257166028022766, "learning_rate": 4.005931073935024e-05, "loss": 0.4802, "step": 9162 }, { "epoch": 1.6345553474266346, "grad_norm": 0.4476458728313446, "learning_rate": 4.002127842324452e-05, "loss": 0.4914, "step": 9163 }, { "epoch": 1.6347337436446348, "grad_norm": 0.4358140230178833, "learning_rate": 3.998326259877716e-05, "loss": 0.3802, "step": 9164 }, { "epoch": 1.634912139862635, "grad_norm": 0.4720216989517212, "learning_rate": 3.994526326893405e-05, "loss": 0.4643, "step": 9165 }, { "epoch": 1.635090536080635, "grad_norm": 0.5170959234237671, "learning_rate": 3.990728043669953e-05, "loss": 0.6022, "step": 9166 }, { "epoch": 1.6352689322986351, "grad_norm": 0.49744245409965515, "learning_rate": 3.986931410505676e-05, "loss": 0.4991, "step": 9167 }, { "epoch": 1.6354473285166353, "grad_norm": 0.5377562642097473, "learning_rate": 3.9831364276987717e-05, "loss": 0.5476, "step": 9168 }, { "epoch": 1.6356257247346355, "grad_norm": 0.5449107885360718, "learning_rate": 3.97934309554728e-05, "loss": 0.5108, "step": 9169 }, { "epoch": 1.6358041209526357, "grad_norm": 0.49150487780570984, "learning_rate": 3.9755514143491434e-05, "loss": 0.3777, "step": 9170 }, { "epoch": 1.635982517170636, "grad_norm": 0.5345403552055359, "learning_rate": 3.971761384402145e-05, "loss": 0.5278, "step": 9171 }, { "epoch": 1.6361609133886361, "grad_norm": 0.5451551079750061, "learning_rate": 3.9679730060039634e-05, "loss": 0.4898, "step": 9172 }, { "epoch": 1.6363393096066363, "grad_norm": 0.5874769687652588, "learning_rate": 3.964186279452131e-05, "loss": 0.6324, "step": 9173 }, { "epoch": 1.6365177058246365, "grad_norm": 0.47333958745002747, "learning_rate": 3.960401205044051e-05, "loss": 0.4932, "step": 9174 }, { "epoch": 1.6366961020426367, "grad_norm": 0.4722568094730377, "learning_rate": 3.95661778307701e-05, "loss": 0.3842, "step": 9175 }, { "epoch": 1.636874498260637, "grad_norm": 0.55815190076828, "learning_rate": 3.952836013848149e-05, "loss": 0.668, "step": 9176 }, { "epoch": 1.637052894478637, "grad_norm": 0.5843459963798523, "learning_rate": 3.9490558976544965e-05, "loss": 0.6534, "step": 9177 }, { "epoch": 1.6372312906966373, "grad_norm": 0.44996100664138794, "learning_rate": 3.9452774347929264e-05, "loss": 0.3837, "step": 9178 }, { "epoch": 1.6374096869146375, "grad_norm": 0.46350741386413574, "learning_rate": 3.9415006255602123e-05, "loss": 0.4841, "step": 9179 }, { "epoch": 1.6375880831326377, "grad_norm": 0.5201796889305115, "learning_rate": 3.9377254702529784e-05, "loss": 0.5497, "step": 9180 }, { "epoch": 1.637766479350638, "grad_norm": 0.5969486236572266, "learning_rate": 3.933951969167709e-05, "loss": 0.6576, "step": 9181 }, { "epoch": 1.637944875568638, "grad_norm": 0.5387462377548218, "learning_rate": 3.9301801226008014e-05, "loss": 0.7601, "step": 9182 }, { "epoch": 1.638123271786638, "grad_norm": 0.6476037502288818, "learning_rate": 3.926409930848471e-05, "loss": 0.7146, "step": 9183 }, { "epoch": 1.6383016680046383, "grad_norm": 0.48097339272499084, "learning_rate": 3.922641394206844e-05, "loss": 0.4778, "step": 9184 }, { "epoch": 1.6384800642226385, "grad_norm": 0.46164533495903015, "learning_rate": 3.91887451297189e-05, "loss": 0.3921, "step": 9185 }, { "epoch": 1.6386584604406387, "grad_norm": 0.4835575520992279, "learning_rate": 3.915109287439453e-05, "loss": 0.4748, "step": 9186 }, { "epoch": 1.6388368566586389, "grad_norm": 0.48624852299690247, "learning_rate": 3.911345717905268e-05, "loss": 0.4619, "step": 9187 }, { "epoch": 1.6390152528766389, "grad_norm": 0.5269492864608765, "learning_rate": 3.907583804664908e-05, "loss": 0.4905, "step": 9188 }, { "epoch": 1.639193649094639, "grad_norm": 0.538076639175415, "learning_rate": 3.9038235480138435e-05, "loss": 0.6448, "step": 9189 }, { "epoch": 1.6393720453126392, "grad_norm": 0.5943297147750854, "learning_rate": 3.9000649482473946e-05, "loss": 0.6616, "step": 9190 }, { "epoch": 1.6395504415306394, "grad_norm": 0.509810745716095, "learning_rate": 3.8963080056607705e-05, "loss": 0.5005, "step": 9191 }, { "epoch": 1.6397288377486396, "grad_norm": 0.5434547662734985, "learning_rate": 3.892552720549028e-05, "loss": 0.5415, "step": 9192 }, { "epoch": 1.6399072339666398, "grad_norm": 0.47341305017471313, "learning_rate": 3.8887990932071156e-05, "loss": 0.4871, "step": 9193 }, { "epoch": 1.64008563018464, "grad_norm": 0.5444488525390625, "learning_rate": 3.885047123929841e-05, "loss": 0.6654, "step": 9194 }, { "epoch": 1.6402640264026402, "grad_norm": 0.49865496158599854, "learning_rate": 3.8812968130118696e-05, "loss": 0.5161, "step": 9195 }, { "epoch": 1.6404424226206404, "grad_norm": 0.6822000741958618, "learning_rate": 3.877548160747768e-05, "loss": 0.7265, "step": 9196 }, { "epoch": 1.6406208188386406, "grad_norm": 0.5185505151748657, "learning_rate": 3.873801167431928e-05, "loss": 0.4423, "step": 9197 }, { "epoch": 1.6407992150566408, "grad_norm": 0.5268929600715637, "learning_rate": 3.8700558333586686e-05, "loss": 0.563, "step": 9198 }, { "epoch": 1.640977611274641, "grad_norm": 0.5534122586250305, "learning_rate": 3.86631215882213e-05, "loss": 0.6052, "step": 9199 }, { "epoch": 1.6411560074926412, "grad_norm": 0.4893801212310791, "learning_rate": 3.862570144116334e-05, "loss": 0.5551, "step": 9200 }, { "epoch": 1.6413344037106414, "grad_norm": 0.5151475667953491, "learning_rate": 3.858829789535187e-05, "loss": 0.5263, "step": 9201 }, { "epoch": 1.6415127999286416, "grad_norm": 0.5644826292991638, "learning_rate": 3.8550910953724456e-05, "loss": 0.5625, "step": 9202 }, { "epoch": 1.6416911961466418, "grad_norm": 0.5256550312042236, "learning_rate": 3.851354061921758e-05, "loss": 0.6462, "step": 9203 }, { "epoch": 1.641869592364642, "grad_norm": 0.47919219732284546, "learning_rate": 3.847618689476612e-05, "loss": 0.4722, "step": 9204 }, { "epoch": 1.642047988582642, "grad_norm": 0.5013111233711243, "learning_rate": 3.8438849783304e-05, "loss": 0.5176, "step": 9205 }, { "epoch": 1.6422263848006422, "grad_norm": 0.5357193350791931, "learning_rate": 3.840152928776358e-05, "loss": 0.6217, "step": 9206 }, { "epoch": 1.6424047810186424, "grad_norm": 0.5302641987800598, "learning_rate": 3.836422541107593e-05, "loss": 0.5448, "step": 9207 }, { "epoch": 1.6425831772366426, "grad_norm": 0.5999414324760437, "learning_rate": 3.832693815617097e-05, "loss": 0.5155, "step": 9208 }, { "epoch": 1.6427615734546428, "grad_norm": 0.5149564146995544, "learning_rate": 3.828966752597718e-05, "loss": 0.5502, "step": 9209 }, { "epoch": 1.6429399696726428, "grad_norm": 0.46984735131263733, "learning_rate": 3.8252413523421816e-05, "loss": 0.3823, "step": 9210 }, { "epoch": 1.643118365890643, "grad_norm": 0.4093347191810608, "learning_rate": 3.821517615143075e-05, "loss": 0.3818, "step": 9211 }, { "epoch": 1.6432967621086432, "grad_norm": 0.5009578466415405, "learning_rate": 3.817795541292859e-05, "loss": 0.5433, "step": 9212 }, { "epoch": 1.6434751583266434, "grad_norm": 0.5088503360748291, "learning_rate": 3.8140751310838715e-05, "loss": 0.4853, "step": 9213 }, { "epoch": 1.6436535545446436, "grad_norm": 0.42338132858276367, "learning_rate": 3.810356384808303e-05, "loss": 0.3726, "step": 9214 }, { "epoch": 1.6438319507626438, "grad_norm": 0.5196266770362854, "learning_rate": 3.806639302758227e-05, "loss": 0.4542, "step": 9215 }, { "epoch": 1.644010346980644, "grad_norm": 0.5105109214782715, "learning_rate": 3.802923885225576e-05, "loss": 0.4412, "step": 9216 }, { "epoch": 1.6441887431986442, "grad_norm": 0.5296480655670166, "learning_rate": 3.7992101325021674e-05, "loss": 0.5869, "step": 9217 }, { "epoch": 1.6443671394166444, "grad_norm": 0.5003146529197693, "learning_rate": 3.7954980448796724e-05, "loss": 0.4731, "step": 9218 }, { "epoch": 1.6445455356346446, "grad_norm": 0.5123296976089478, "learning_rate": 3.7917876226496284e-05, "loss": 0.5295, "step": 9219 }, { "epoch": 1.6447239318526448, "grad_norm": 0.45916229486465454, "learning_rate": 3.788078866103467e-05, "loss": 0.4446, "step": 9220 }, { "epoch": 1.644902328070645, "grad_norm": 0.5705180168151855, "learning_rate": 3.7843717755324525e-05, "loss": 0.5378, "step": 9221 }, { "epoch": 1.6450807242886452, "grad_norm": 0.5570235848426819, "learning_rate": 3.78066635122776e-05, "loss": 0.5277, "step": 9222 }, { "epoch": 1.6452591205066454, "grad_norm": 0.5336431860923767, "learning_rate": 3.7769625934803904e-05, "loss": 0.5871, "step": 9223 }, { "epoch": 1.6454375167246456, "grad_norm": 0.4929081201553345, "learning_rate": 3.773260502581255e-05, "loss": 0.5053, "step": 9224 }, { "epoch": 1.6456159129426458, "grad_norm": 0.4710312783718109, "learning_rate": 3.7695600788210967e-05, "loss": 0.5122, "step": 9225 }, { "epoch": 1.645794309160646, "grad_norm": 0.44707542657852173, "learning_rate": 3.7658613224905606e-05, "loss": 0.3666, "step": 9226 }, { "epoch": 1.645972705378646, "grad_norm": 0.6010311245918274, "learning_rate": 3.7621642338801335e-05, "loss": 0.5016, "step": 9227 }, { "epoch": 1.6461511015966461, "grad_norm": 0.5563137531280518, "learning_rate": 3.758468813280186e-05, "loss": 0.5181, "step": 9228 }, { "epoch": 1.6463294978146463, "grad_norm": 0.4832812547683716, "learning_rate": 3.754775060980964e-05, "loss": 0.5077, "step": 9229 }, { "epoch": 1.6465078940326465, "grad_norm": 0.5060386657714844, "learning_rate": 3.75108297727256e-05, "loss": 0.5059, "step": 9230 }, { "epoch": 1.6466862902506467, "grad_norm": 0.4577668309211731, "learning_rate": 3.7473925624449625e-05, "loss": 0.4391, "step": 9231 }, { "epoch": 1.6468646864686467, "grad_norm": 0.5205204486846924, "learning_rate": 3.743703816788005e-05, "loss": 0.614, "step": 9232 }, { "epoch": 1.647043082686647, "grad_norm": 0.550428569316864, "learning_rate": 3.740016740591398e-05, "loss": 0.558, "step": 9233 }, { "epoch": 1.647221478904647, "grad_norm": 0.5250325798988342, "learning_rate": 3.736331334144733e-05, "loss": 0.4774, "step": 9234 }, { "epoch": 1.6473998751226473, "grad_norm": 0.5304805040359497, "learning_rate": 3.7326475977374486e-05, "loss": 0.5603, "step": 9235 }, { "epoch": 1.6475782713406475, "grad_norm": 0.5167137384414673, "learning_rate": 3.728965531658876e-05, "loss": 0.5417, "step": 9236 }, { "epoch": 1.6477566675586477, "grad_norm": 0.4665099084377289, "learning_rate": 3.725285136198189e-05, "loss": 0.5014, "step": 9237 }, { "epoch": 1.647935063776648, "grad_norm": 0.5300410985946655, "learning_rate": 3.72160641164446e-05, "loss": 0.5706, "step": 9238 }, { "epoch": 1.648113459994648, "grad_norm": 0.5009768009185791, "learning_rate": 3.7179293582866064e-05, "loss": 0.4469, "step": 9239 }, { "epoch": 1.6482918562126483, "grad_norm": 0.4268481433391571, "learning_rate": 3.714253976413418e-05, "loss": 0.3472, "step": 9240 }, { "epoch": 1.6484702524306485, "grad_norm": 0.5363745093345642, "learning_rate": 3.710580266313565e-05, "loss": 0.6022, "step": 9241 }, { "epoch": 1.6486486486486487, "grad_norm": 0.5669705271720886, "learning_rate": 3.706908228275571e-05, "loss": 0.6687, "step": 9242 }, { "epoch": 1.6488270448666489, "grad_norm": 0.5033018589019775, "learning_rate": 3.703237862587844e-05, "loss": 0.4968, "step": 9243 }, { "epoch": 1.649005441084649, "grad_norm": 0.491277813911438, "learning_rate": 3.699569169538655e-05, "loss": 0.4729, "step": 9244 }, { "epoch": 1.6491838373026493, "grad_norm": 0.43952322006225586, "learning_rate": 3.695902149416133e-05, "loss": 0.3538, "step": 9245 }, { "epoch": 1.6493622335206495, "grad_norm": 0.6005352735519409, "learning_rate": 3.692236802508292e-05, "loss": 0.6145, "step": 9246 }, { "epoch": 1.6495406297386497, "grad_norm": 0.566465437412262, "learning_rate": 3.688573129102999e-05, "loss": 0.5943, "step": 9247 }, { "epoch": 1.6497190259566499, "grad_norm": 0.4471827745437622, "learning_rate": 3.6849111294880056e-05, "loss": 0.4295, "step": 9248 }, { "epoch": 1.6498974221746499, "grad_norm": 0.4851609766483307, "learning_rate": 3.681250803950914e-05, "loss": 0.4421, "step": 9249 }, { "epoch": 1.65007581839265, "grad_norm": 0.5344951152801514, "learning_rate": 3.6775921527792164e-05, "loss": 0.483, "step": 9250 }, { "epoch": 1.6502542146106502, "grad_norm": 0.5168811082839966, "learning_rate": 3.673935176260249e-05, "loss": 0.4603, "step": 9251 }, { "epoch": 1.6504326108286504, "grad_norm": 0.5323466062545776, "learning_rate": 3.67027987468124e-05, "loss": 0.5537, "step": 9252 }, { "epoch": 1.6506110070466506, "grad_norm": 0.46962469816207886, "learning_rate": 3.6666262483292715e-05, "loss": 0.3963, "step": 9253 }, { "epoch": 1.6507894032646506, "grad_norm": 0.5902261734008789, "learning_rate": 3.662974297491292e-05, "loss": 0.6236, "step": 9254 }, { "epoch": 1.6509677994826508, "grad_norm": 0.5006431937217712, "learning_rate": 3.6593240224541357e-05, "loss": 0.4853, "step": 9255 }, { "epoch": 1.651146195700651, "grad_norm": 0.6096543669700623, "learning_rate": 3.6556754235044815e-05, "loss": 0.7908, "step": 9256 }, { "epoch": 1.6513245919186512, "grad_norm": 0.4698275625705719, "learning_rate": 3.6520285009289e-05, "loss": 0.4004, "step": 9257 }, { "epoch": 1.6515029881366514, "grad_norm": 0.49396297335624695, "learning_rate": 3.648383255013804e-05, "loss": 0.5341, "step": 9258 }, { "epoch": 1.6516813843546516, "grad_norm": 0.49175316095352173, "learning_rate": 3.644739686045503e-05, "loss": 0.4549, "step": 9259 }, { "epoch": 1.6518597805726518, "grad_norm": 0.533931314945221, "learning_rate": 3.6410977943101606e-05, "loss": 0.5214, "step": 9260 }, { "epoch": 1.652038176790652, "grad_norm": 0.5131736397743225, "learning_rate": 3.6374575800938004e-05, "loss": 0.546, "step": 9261 }, { "epoch": 1.6522165730086522, "grad_norm": 0.4640941321849823, "learning_rate": 3.633819043682338e-05, "loss": 0.4511, "step": 9262 }, { "epoch": 1.6523949692266524, "grad_norm": 0.506545901298523, "learning_rate": 3.6301821853615216e-05, "loss": 0.4355, "step": 9263 }, { "epoch": 1.6525733654446526, "grad_norm": 0.5474511981010437, "learning_rate": 3.6265470054170107e-05, "loss": 0.5829, "step": 9264 }, { "epoch": 1.6527517616626528, "grad_norm": 0.5812618732452393, "learning_rate": 3.622913504134298e-05, "loss": 0.5626, "step": 9265 }, { "epoch": 1.652930157880653, "grad_norm": 0.5066866874694824, "learning_rate": 3.619281681798756e-05, "loss": 0.5741, "step": 9266 }, { "epoch": 1.6531085540986532, "grad_norm": 0.48909926414489746, "learning_rate": 3.615651538695633e-05, "loss": 0.4439, "step": 9267 }, { "epoch": 1.6532869503166534, "grad_norm": 0.5382171869277954, "learning_rate": 3.6120230751100295e-05, "loss": 0.5164, "step": 9268 }, { "epoch": 1.6534653465346536, "grad_norm": 0.5178452730178833, "learning_rate": 3.608396291326938e-05, "loss": 0.5316, "step": 9269 }, { "epoch": 1.6536437427526538, "grad_norm": 0.5068187713623047, "learning_rate": 3.6047711876311895e-05, "loss": 0.4757, "step": 9270 }, { "epoch": 1.6538221389706538, "grad_norm": 0.5174327492713928, "learning_rate": 3.601147764307511e-05, "loss": 0.4613, "step": 9271 }, { "epoch": 1.654000535188654, "grad_norm": 0.4983610212802887, "learning_rate": 3.597526021640471e-05, "loss": 0.4841, "step": 9272 }, { "epoch": 1.6541789314066542, "grad_norm": 0.5520349740982056, "learning_rate": 3.593905959914528e-05, "loss": 0.6076, "step": 9273 }, { "epoch": 1.6543573276246544, "grad_norm": 0.5307412147521973, "learning_rate": 3.590287579414006e-05, "loss": 0.5639, "step": 9274 }, { "epoch": 1.6545357238426546, "grad_norm": 0.5179523825645447, "learning_rate": 3.586670880423079e-05, "loss": 0.5217, "step": 9275 }, { "epoch": 1.6547141200606548, "grad_norm": 0.5160864591598511, "learning_rate": 3.5830558632258095e-05, "loss": 0.597, "step": 9276 }, { "epoch": 1.6548925162786547, "grad_norm": 0.5735654830932617, "learning_rate": 3.579442528106111e-05, "loss": 0.6853, "step": 9277 }, { "epoch": 1.655070912496655, "grad_norm": 0.5181892514228821, "learning_rate": 3.5758308753477855e-05, "loss": 0.4761, "step": 9278 }, { "epoch": 1.6552493087146551, "grad_norm": 0.4839540421962738, "learning_rate": 3.5722209052344826e-05, "loss": 0.444, "step": 9279 }, { "epoch": 1.6554277049326553, "grad_norm": 0.4774402678012848, "learning_rate": 3.5686126180497214e-05, "loss": 0.4561, "step": 9280 }, { "epoch": 1.6556061011506555, "grad_norm": 0.5063000321388245, "learning_rate": 3.56500601407691e-05, "loss": 0.4304, "step": 9281 }, { "epoch": 1.6557844973686557, "grad_norm": 0.39081576466560364, "learning_rate": 3.561401093599295e-05, "loss": 0.318, "step": 9282 }, { "epoch": 1.655962893586656, "grad_norm": 0.5337099432945251, "learning_rate": 3.557797856900022e-05, "loss": 0.3636, "step": 9283 }, { "epoch": 1.6561412898046561, "grad_norm": 0.5275738835334778, "learning_rate": 3.554196304262067e-05, "loss": 0.4887, "step": 9284 }, { "epoch": 1.6563196860226563, "grad_norm": 0.5422784686088562, "learning_rate": 3.5505964359683146e-05, "loss": 0.5377, "step": 9285 }, { "epoch": 1.6564980822406565, "grad_norm": 0.48426032066345215, "learning_rate": 3.546998252301487e-05, "loss": 0.4597, "step": 9286 }, { "epoch": 1.6566764784586567, "grad_norm": 0.5601605176925659, "learning_rate": 3.543401753544179e-05, "loss": 0.6027, "step": 9287 }, { "epoch": 1.656854874676657, "grad_norm": 0.4989088773727417, "learning_rate": 3.539806939978868e-05, "loss": 0.4151, "step": 9288 }, { "epoch": 1.6570332708946571, "grad_norm": 0.5130565762519836, "learning_rate": 3.536213811887876e-05, "loss": 0.4985, "step": 9289 }, { "epoch": 1.6572116671126573, "grad_norm": 0.4882428050041199, "learning_rate": 3.532622369553423e-05, "loss": 0.5041, "step": 9290 }, { "epoch": 1.6573900633306575, "grad_norm": 0.5201172828674316, "learning_rate": 3.529032613257574e-05, "loss": 0.5467, "step": 9291 }, { "epoch": 1.6575684595486577, "grad_norm": 0.5011335015296936, "learning_rate": 3.525444543282255e-05, "loss": 0.4953, "step": 9292 }, { "epoch": 1.6577468557666577, "grad_norm": 0.4961317777633667, "learning_rate": 3.521858159909289e-05, "loss": 0.4933, "step": 9293 }, { "epoch": 1.657925251984658, "grad_norm": 0.5875723958015442, "learning_rate": 3.518273463420332e-05, "loss": 0.5181, "step": 9294 }, { "epoch": 1.658103648202658, "grad_norm": 0.4927757680416107, "learning_rate": 3.5146904540969414e-05, "loss": 0.3826, "step": 9295 }, { "epoch": 1.6582820444206583, "grad_norm": 0.5020849108695984, "learning_rate": 3.511109132220508e-05, "loss": 0.4191, "step": 9296 }, { "epoch": 1.6584604406386585, "grad_norm": 0.44826123118400574, "learning_rate": 3.507529498072323e-05, "loss": 0.3812, "step": 9297 }, { "epoch": 1.6586388368566587, "grad_norm": 0.5366451144218445, "learning_rate": 3.5039515519335236e-05, "loss": 0.4824, "step": 9298 }, { "epoch": 1.6588172330746587, "grad_norm": 0.5516796112060547, "learning_rate": 3.500375294085112e-05, "loss": 0.5474, "step": 9299 }, { "epoch": 1.6589956292926589, "grad_norm": 0.5130575895309448, "learning_rate": 3.4968007248079776e-05, "loss": 0.4346, "step": 9300 }, { "epoch": 1.659174025510659, "grad_norm": 0.6171131134033203, "learning_rate": 3.493227844382857e-05, "loss": 0.6627, "step": 9301 }, { "epoch": 1.6593524217286593, "grad_norm": 0.5802091956138611, "learning_rate": 3.48965665309037e-05, "loss": 0.5415, "step": 9302 }, { "epoch": 1.6595308179466595, "grad_norm": 0.5305059552192688, "learning_rate": 3.48608715121099e-05, "loss": 0.5481, "step": 9303 }, { "epoch": 1.6597092141646597, "grad_norm": 0.5003054738044739, "learning_rate": 3.4825193390250645e-05, "loss": 0.511, "step": 9304 }, { "epoch": 1.6598876103826599, "grad_norm": 0.5035673379898071, "learning_rate": 3.478953216812816e-05, "loss": 0.5759, "step": 9305 }, { "epoch": 1.66006600660066, "grad_norm": 0.5683543682098389, "learning_rate": 3.4753887848543163e-05, "loss": 0.602, "step": 9306 }, { "epoch": 1.6602444028186603, "grad_norm": 0.4327681362628937, "learning_rate": 3.471826043429524e-05, "loss": 0.3409, "step": 9307 }, { "epoch": 1.6604227990366605, "grad_norm": 0.5470651388168335, "learning_rate": 3.468264992818246e-05, "loss": 0.5559, "step": 9308 }, { "epoch": 1.6606011952546607, "grad_norm": 0.49540919065475464, "learning_rate": 3.464705633300172e-05, "loss": 0.4914, "step": 9309 }, { "epoch": 1.6607795914726609, "grad_norm": 0.45050835609436035, "learning_rate": 3.461147965154846e-05, "loss": 0.4139, "step": 9310 }, { "epoch": 1.660957987690661, "grad_norm": 0.5284720659255981, "learning_rate": 3.457591988661696e-05, "loss": 0.4919, "step": 9311 }, { "epoch": 1.6611363839086613, "grad_norm": 0.4902994930744171, "learning_rate": 3.4540377040999995e-05, "loss": 0.4287, "step": 9312 }, { "epoch": 1.6613147801266615, "grad_norm": 0.4877963364124298, "learning_rate": 3.450485111748905e-05, "loss": 0.4539, "step": 9313 }, { "epoch": 1.6614931763446616, "grad_norm": 0.5532664656639099, "learning_rate": 3.446934211887443e-05, "loss": 0.4228, "step": 9314 }, { "epoch": 1.6616715725626616, "grad_norm": 0.5521690249443054, "learning_rate": 3.443385004794486e-05, "loss": 0.5662, "step": 9315 }, { "epoch": 1.6618499687806618, "grad_norm": 0.6033298969268799, "learning_rate": 3.439837490748798e-05, "loss": 0.6267, "step": 9316 }, { "epoch": 1.662028364998662, "grad_norm": 0.5335450172424316, "learning_rate": 3.436291670028993e-05, "loss": 0.5272, "step": 9317 }, { "epoch": 1.6622067612166622, "grad_norm": 0.5540075302124023, "learning_rate": 3.432747542913564e-05, "loss": 0.4624, "step": 9318 }, { "epoch": 1.6623851574346624, "grad_norm": 0.5262585282325745, "learning_rate": 3.429205109680858e-05, "loss": 0.526, "step": 9319 }, { "epoch": 1.6625635536526626, "grad_norm": 0.5248980522155762, "learning_rate": 3.425664370609099e-05, "loss": 0.5116, "step": 9320 }, { "epoch": 1.6627419498706626, "grad_norm": 0.547280490398407, "learning_rate": 3.422125325976383e-05, "loss": 0.561, "step": 9321 }, { "epoch": 1.6629203460886628, "grad_norm": 0.4935222268104553, "learning_rate": 3.418587976060653e-05, "loss": 0.4185, "step": 9322 }, { "epoch": 1.663098742306663, "grad_norm": 0.5188893675804138, "learning_rate": 3.415052321139739e-05, "loss": 0.5019, "step": 9323 }, { "epoch": 1.6632771385246632, "grad_norm": 0.4804520905017853, "learning_rate": 3.411518361491328e-05, "loss": 0.4503, "step": 9324 }, { "epoch": 1.6634555347426634, "grad_norm": 0.5874333381652832, "learning_rate": 3.407986097392971e-05, "loss": 0.5774, "step": 9325 }, { "epoch": 1.6636339309606636, "grad_norm": 0.49579691886901855, "learning_rate": 3.404455529122097e-05, "loss": 0.4188, "step": 9326 }, { "epoch": 1.6638123271786638, "grad_norm": 0.45815184712409973, "learning_rate": 3.400926656955988e-05, "loss": 0.4121, "step": 9327 }, { "epoch": 1.663990723396664, "grad_norm": 0.4780590534210205, "learning_rate": 3.397399481171812e-05, "loss": 0.4392, "step": 9328 }, { "epoch": 1.6641691196146642, "grad_norm": 0.4702582359313965, "learning_rate": 3.393874002046576e-05, "loss": 0.4654, "step": 9329 }, { "epoch": 1.6643475158326644, "grad_norm": 0.4759620428085327, "learning_rate": 3.3903502198571855e-05, "loss": 0.4035, "step": 9330 }, { "epoch": 1.6645259120506646, "grad_norm": 0.5115009546279907, "learning_rate": 3.386828134880382e-05, "loss": 0.5284, "step": 9331 }, { "epoch": 1.6647043082686648, "grad_norm": 0.5846856832504272, "learning_rate": 3.383307747392802e-05, "loss": 0.6325, "step": 9332 }, { "epoch": 1.664882704486665, "grad_norm": 0.6280512809753418, "learning_rate": 3.379789057670929e-05, "loss": 0.5222, "step": 9333 }, { "epoch": 1.6650611007046652, "grad_norm": 0.5682896375656128, "learning_rate": 3.376272065991115e-05, "loss": 0.5462, "step": 9334 }, { "epoch": 1.6652394969226654, "grad_norm": 0.5903425216674805, "learning_rate": 3.372756772629587e-05, "loss": 0.8133, "step": 9335 }, { "epoch": 1.6654178931406656, "grad_norm": 0.5490643978118896, "learning_rate": 3.369243177862436e-05, "loss": 0.6145, "step": 9336 }, { "epoch": 1.6655962893586655, "grad_norm": 0.5370810031890869, "learning_rate": 3.3657312819656226e-05, "loss": 0.5671, "step": 9337 }, { "epoch": 1.6657746855766657, "grad_norm": 0.5224997997283936, "learning_rate": 3.362221085214964e-05, "loss": 0.5352, "step": 9338 }, { "epoch": 1.665953081794666, "grad_norm": 0.5418823957443237, "learning_rate": 3.358712587886143e-05, "loss": 0.5853, "step": 9339 }, { "epoch": 1.6661314780126661, "grad_norm": 0.5162733197212219, "learning_rate": 3.3552057902547286e-05, "loss": 0.4823, "step": 9340 }, { "epoch": 1.6663098742306663, "grad_norm": 0.4687744081020355, "learning_rate": 3.351700692596132e-05, "loss": 0.4868, "step": 9341 }, { "epoch": 1.6664882704486665, "grad_norm": 0.5440129637718201, "learning_rate": 3.348197295185654e-05, "loss": 0.5426, "step": 9342 }, { "epoch": 1.6666666666666665, "grad_norm": 0.4801231920719147, "learning_rate": 3.344695598298436e-05, "loss": 0.5322, "step": 9343 }, { "epoch": 1.6668450628846667, "grad_norm": 0.4614768624305725, "learning_rate": 3.341195602209512e-05, "loss": 0.3709, "step": 9344 }, { "epoch": 1.667023459102667, "grad_norm": 0.4808447062969208, "learning_rate": 3.3376973071937656e-05, "loss": 0.4343, "step": 9345 }, { "epoch": 1.6672018553206671, "grad_norm": 0.42641395330429077, "learning_rate": 3.3342007135259425e-05, "loss": 0.442, "step": 9346 }, { "epoch": 1.6673802515386673, "grad_norm": 0.4673963189125061, "learning_rate": 3.3307058214806814e-05, "loss": 0.4625, "step": 9347 }, { "epoch": 1.6675586477566675, "grad_norm": 0.4446796178817749, "learning_rate": 3.327212631332452e-05, "loss": 0.3887, "step": 9348 }, { "epoch": 1.6677370439746677, "grad_norm": 0.42202529311180115, "learning_rate": 3.323721143355621e-05, "loss": 0.3426, "step": 9349 }, { "epoch": 1.667915440192668, "grad_norm": 0.5200551748275757, "learning_rate": 3.320231357824399e-05, "loss": 0.5275, "step": 9350 }, { "epoch": 1.668093836410668, "grad_norm": 0.4585854411125183, "learning_rate": 3.3167432750128764e-05, "loss": 0.4564, "step": 9351 }, { "epoch": 1.6682722326286683, "grad_norm": 0.5158877968788147, "learning_rate": 3.313256895195013e-05, "loss": 0.4869, "step": 9352 }, { "epoch": 1.6684506288466685, "grad_norm": 0.4725387692451477, "learning_rate": 3.3097722186446135e-05, "loss": 0.5002, "step": 9353 }, { "epoch": 1.6686290250646687, "grad_norm": 0.44371485710144043, "learning_rate": 3.306289245635374e-05, "loss": 0.398, "step": 9354 }, { "epoch": 1.668807421282669, "grad_norm": 0.5774705410003662, "learning_rate": 3.3028079764408386e-05, "loss": 0.5725, "step": 9355 }, { "epoch": 1.668985817500669, "grad_norm": 0.5633199214935303, "learning_rate": 3.2993284113344315e-05, "loss": 0.5249, "step": 9356 }, { "epoch": 1.6691642137186693, "grad_norm": 0.4772298336029053, "learning_rate": 3.295850550589427e-05, "loss": 0.3945, "step": 9357 }, { "epoch": 1.6693426099366695, "grad_norm": 0.4757455587387085, "learning_rate": 3.292374394478986e-05, "loss": 0.4001, "step": 9358 }, { "epoch": 1.6695210061546695, "grad_norm": 0.49446776509284973, "learning_rate": 3.288899943276119e-05, "loss": 0.3805, "step": 9359 }, { "epoch": 1.6696994023726697, "grad_norm": 0.4915049970149994, "learning_rate": 3.285427197253704e-05, "loss": 0.4954, "step": 9360 }, { "epoch": 1.6698777985906699, "grad_norm": 0.5779103636741638, "learning_rate": 3.281956156684496e-05, "loss": 0.514, "step": 9361 }, { "epoch": 1.67005619480867, "grad_norm": 0.5662655830383301, "learning_rate": 3.278486821841098e-05, "loss": 0.5022, "step": 9362 }, { "epoch": 1.6702345910266703, "grad_norm": 0.5742835998535156, "learning_rate": 3.275019192996004e-05, "loss": 0.6153, "step": 9363 }, { "epoch": 1.6704129872446705, "grad_norm": 0.517959713935852, "learning_rate": 3.271553270421551e-05, "loss": 0.4259, "step": 9364 }, { "epoch": 1.6705913834626704, "grad_norm": 0.5644903779029846, "learning_rate": 3.26808905438995e-05, "loss": 0.6748, "step": 9365 }, { "epoch": 1.6707697796806706, "grad_norm": 0.5745575428009033, "learning_rate": 3.264626545173291e-05, "loss": 0.6036, "step": 9366 }, { "epoch": 1.6709481758986708, "grad_norm": 0.5247329473495483, "learning_rate": 3.261165743043501e-05, "loss": 0.5762, "step": 9367 }, { "epoch": 1.671126572116671, "grad_norm": 0.5291442275047302, "learning_rate": 3.2577066482724074e-05, "loss": 0.5571, "step": 9368 }, { "epoch": 1.6713049683346712, "grad_norm": 0.5165200233459473, "learning_rate": 3.2542492611316696e-05, "loss": 0.6014, "step": 9369 }, { "epoch": 1.6714833645526714, "grad_norm": 0.5730751156806946, "learning_rate": 3.250793581892844e-05, "loss": 0.7261, "step": 9370 }, { "epoch": 1.6716617607706716, "grad_norm": 0.5317413806915283, "learning_rate": 3.2473396108273297e-05, "loss": 0.4659, "step": 9371 }, { "epoch": 1.6718401569886718, "grad_norm": 0.5281661152839661, "learning_rate": 3.243887348206395e-05, "loss": 0.5902, "step": 9372 }, { "epoch": 1.672018553206672, "grad_norm": 0.4783737361431122, "learning_rate": 3.240436794301194e-05, "loss": 0.5077, "step": 9373 }, { "epoch": 1.6721969494246722, "grad_norm": 0.5345422029495239, "learning_rate": 3.2369879493827167e-05, "loss": 0.6897, "step": 9374 }, { "epoch": 1.6723753456426724, "grad_norm": 0.4720827639102936, "learning_rate": 3.233540813721844e-05, "loss": 0.3754, "step": 9375 }, { "epoch": 1.6725537418606726, "grad_norm": 0.4707276225090027, "learning_rate": 3.2300953875893046e-05, "loss": 0.4113, "step": 9376 }, { "epoch": 1.6727321380786728, "grad_norm": 0.5764208436012268, "learning_rate": 3.226651671255712e-05, "loss": 0.5561, "step": 9377 }, { "epoch": 1.672910534296673, "grad_norm": 0.4676361680030823, "learning_rate": 3.2232096649915196e-05, "loss": 0.4113, "step": 9378 }, { "epoch": 1.6730889305146732, "grad_norm": 0.5252630710601807, "learning_rate": 3.219769369067077e-05, "loss": 0.5728, "step": 9379 }, { "epoch": 1.6732673267326734, "grad_norm": 0.4781394302845001, "learning_rate": 3.216330783752569e-05, "loss": 0.2909, "step": 9380 }, { "epoch": 1.6734457229506734, "grad_norm": 0.47112560272216797, "learning_rate": 3.2128939093180655e-05, "loss": 0.4191, "step": 9381 }, { "epoch": 1.6736241191686736, "grad_norm": 0.5391256809234619, "learning_rate": 3.209458746033506e-05, "loss": 0.7091, "step": 9382 }, { "epoch": 1.6738025153866738, "grad_norm": 0.49783357977867126, "learning_rate": 3.206025294168677e-05, "loss": 0.5378, "step": 9383 }, { "epoch": 1.673980911604674, "grad_norm": 0.5017059445381165, "learning_rate": 3.202593553993238e-05, "loss": 0.4537, "step": 9384 }, { "epoch": 1.6741593078226742, "grad_norm": 0.5273604989051819, "learning_rate": 3.1991635257767274e-05, "loss": 0.4771, "step": 9385 }, { "epoch": 1.6743377040406744, "grad_norm": 0.5304795503616333, "learning_rate": 3.195735209788528e-05, "loss": 0.3931, "step": 9386 }, { "epoch": 1.6745161002586744, "grad_norm": 0.5918444395065308, "learning_rate": 3.1923086062979056e-05, "loss": 0.7699, "step": 9387 }, { "epoch": 1.6746944964766746, "grad_norm": 0.5880241990089417, "learning_rate": 3.188883715573976e-05, "loss": 0.6155, "step": 9388 }, { "epoch": 1.6748728926946748, "grad_norm": 0.4606783092021942, "learning_rate": 3.185460537885737e-05, "loss": 0.4407, "step": 9389 }, { "epoch": 1.675051288912675, "grad_norm": 0.44340142607688904, "learning_rate": 3.182039073502035e-05, "loss": 0.3536, "step": 9390 }, { "epoch": 1.6752296851306752, "grad_norm": 0.5192257761955261, "learning_rate": 3.1786193226916005e-05, "loss": 0.5362, "step": 9391 }, { "epoch": 1.6754080813486754, "grad_norm": 0.6925177574157715, "learning_rate": 3.175201285723017e-05, "loss": 0.7919, "step": 9392 }, { "epoch": 1.6755864775666756, "grad_norm": 0.557155966758728, "learning_rate": 3.171784962864724e-05, "loss": 0.4573, "step": 9393 }, { "epoch": 1.6757648737846758, "grad_norm": 0.5083150267601013, "learning_rate": 3.1683703543850526e-05, "loss": 0.5892, "step": 9394 }, { "epoch": 1.675943270002676, "grad_norm": 0.46054404973983765, "learning_rate": 3.164957460552173e-05, "loss": 0.3862, "step": 9395 }, { "epoch": 1.6761216662206762, "grad_norm": 0.5175045728683472, "learning_rate": 3.16154628163414e-05, "loss": 0.5591, "step": 9396 }, { "epoch": 1.6763000624386764, "grad_norm": 0.4765566885471344, "learning_rate": 3.1581368178988654e-05, "loss": 0.405, "step": 9397 }, { "epoch": 1.6764784586566766, "grad_norm": 0.5212094783782959, "learning_rate": 3.154729069614123e-05, "loss": 0.4395, "step": 9398 }, { "epoch": 1.6766568548746767, "grad_norm": 0.5103222727775574, "learning_rate": 3.1513230370475654e-05, "loss": 0.4392, "step": 9399 }, { "epoch": 1.676835251092677, "grad_norm": 0.46660804748535156, "learning_rate": 3.147918720466689e-05, "loss": 0.4229, "step": 9400 }, { "epoch": 1.6770136473106771, "grad_norm": 0.500568687915802, "learning_rate": 3.1445161201388766e-05, "loss": 0.4259, "step": 9401 }, { "epoch": 1.6771920435286773, "grad_norm": 0.5917816162109375, "learning_rate": 3.141115236331357e-05, "loss": 0.7201, "step": 9402 }, { "epoch": 1.6773704397466773, "grad_norm": 0.49264368414878845, "learning_rate": 3.137716069311247e-05, "loss": 0.4191, "step": 9403 }, { "epoch": 1.6775488359646775, "grad_norm": 0.5691975355148315, "learning_rate": 3.134318619345508e-05, "loss": 0.6493, "step": 9404 }, { "epoch": 1.6777272321826777, "grad_norm": 0.5436373353004456, "learning_rate": 3.130922886700968e-05, "loss": 0.5603, "step": 9405 }, { "epoch": 1.677905628400678, "grad_norm": 0.47983697056770325, "learning_rate": 3.127528871644342e-05, "loss": 0.4834, "step": 9406 }, { "epoch": 1.6780840246186781, "grad_norm": 0.5476971864700317, "learning_rate": 3.12413657444218e-05, "loss": 0.5386, "step": 9407 }, { "epoch": 1.6782624208366783, "grad_norm": 0.5388808846473694, "learning_rate": 3.120745995360921e-05, "loss": 0.483, "step": 9408 }, { "epoch": 1.6784408170546783, "grad_norm": 0.5438225865364075, "learning_rate": 3.117357134666851e-05, "loss": 0.5662, "step": 9409 }, { "epoch": 1.6786192132726785, "grad_norm": 0.5238845348358154, "learning_rate": 3.113969992626142e-05, "loss": 0.5429, "step": 9410 }, { "epoch": 1.6787976094906787, "grad_norm": 0.5313870906829834, "learning_rate": 3.110584569504804e-05, "loss": 0.5065, "step": 9411 }, { "epoch": 1.6789760057086789, "grad_norm": 0.5582557916641235, "learning_rate": 3.1072008655687376e-05, "loss": 0.5529, "step": 9412 }, { "epoch": 1.679154401926679, "grad_norm": 0.552903950214386, "learning_rate": 3.103818881083695e-05, "loss": 0.553, "step": 9413 }, { "epoch": 1.6793327981446793, "grad_norm": 0.5650268793106079, "learning_rate": 3.100438616315293e-05, "loss": 0.6105, "step": 9414 }, { "epoch": 1.6795111943626795, "grad_norm": 0.4516279995441437, "learning_rate": 3.0970600715290204e-05, "loss": 0.3535, "step": 9415 }, { "epoch": 1.6796895905806797, "grad_norm": 0.4835292100906372, "learning_rate": 3.0936832469902226e-05, "loss": 0.4125, "step": 9416 }, { "epoch": 1.6798679867986799, "grad_norm": 0.6102882623672485, "learning_rate": 3.0903081429641186e-05, "loss": 0.5305, "step": 9417 }, { "epoch": 1.68004638301668, "grad_norm": 0.485422283411026, "learning_rate": 3.086934759715784e-05, "loss": 0.4507, "step": 9418 }, { "epoch": 1.6802247792346803, "grad_norm": 0.4775411784648895, "learning_rate": 3.0835630975101587e-05, "loss": 0.4746, "step": 9419 }, { "epoch": 1.6804031754526805, "grad_norm": 0.4786146283149719, "learning_rate": 3.08019315661206e-05, "loss": 0.5359, "step": 9420 }, { "epoch": 1.6805815716706807, "grad_norm": 0.5550956130027771, "learning_rate": 3.076824937286155e-05, "loss": 0.4877, "step": 9421 }, { "epoch": 1.6807599678886809, "grad_norm": 0.5382834076881409, "learning_rate": 3.073458439796989e-05, "loss": 0.6299, "step": 9422 }, { "epoch": 1.680938364106681, "grad_norm": 0.5830110311508179, "learning_rate": 3.0700936644089574e-05, "loss": 0.7122, "step": 9423 }, { "epoch": 1.6811167603246813, "grad_norm": 0.506211519241333, "learning_rate": 3.0667306113863366e-05, "loss": 0.3911, "step": 9424 }, { "epoch": 1.6812951565426812, "grad_norm": 0.46619656682014465, "learning_rate": 3.0633692809932554e-05, "loss": 0.4391, "step": 9425 }, { "epoch": 1.6814735527606814, "grad_norm": 0.4801982343196869, "learning_rate": 3.060009673493702e-05, "loss": 0.4515, "step": 9426 }, { "epoch": 1.6816519489786816, "grad_norm": 0.5707588791847229, "learning_rate": 3.0566517891515546e-05, "loss": 0.645, "step": 9427 }, { "epoch": 1.6818303451966818, "grad_norm": 0.5557790994644165, "learning_rate": 3.0532956282305294e-05, "loss": 0.6412, "step": 9428 }, { "epoch": 1.682008741414682, "grad_norm": 0.5159487128257751, "learning_rate": 3.0499411909942265e-05, "loss": 0.5476, "step": 9429 }, { "epoch": 1.6821871376326822, "grad_norm": 0.4673673212528229, "learning_rate": 3.0465884777060943e-05, "loss": 0.4216, "step": 9430 }, { "epoch": 1.6823655338506822, "grad_norm": 0.484874963760376, "learning_rate": 3.0432374886294523e-05, "loss": 0.5434, "step": 9431 }, { "epoch": 1.6825439300686824, "grad_norm": 0.7181788682937622, "learning_rate": 3.0398882240274955e-05, "loss": 0.8602, "step": 9432 }, { "epoch": 1.6827223262866826, "grad_norm": 0.4546528458595276, "learning_rate": 3.036540684163261e-05, "loss": 0.3598, "step": 9433 }, { "epoch": 1.6829007225046828, "grad_norm": 0.4792789816856384, "learning_rate": 3.033194869299674e-05, "loss": 0.4134, "step": 9434 }, { "epoch": 1.683079118722683, "grad_norm": 0.4788531959056854, "learning_rate": 3.0298507796995056e-05, "loss": 0.4332, "step": 9435 }, { "epoch": 1.6832575149406832, "grad_norm": 0.6334043145179749, "learning_rate": 3.0265084156254064e-05, "loss": 0.4675, "step": 9436 }, { "epoch": 1.6834359111586834, "grad_norm": 0.48785632848739624, "learning_rate": 3.0231677773398748e-05, "loss": 0.4613, "step": 9437 }, { "epoch": 1.6836143073766836, "grad_norm": 0.5417238473892212, "learning_rate": 3.019828865105295e-05, "loss": 0.6667, "step": 9438 }, { "epoch": 1.6837927035946838, "grad_norm": 0.46144363284111023, "learning_rate": 3.016491679183897e-05, "loss": 0.3897, "step": 9439 }, { "epoch": 1.683971099812684, "grad_norm": 0.4795248508453369, "learning_rate": 3.013156219837776e-05, "loss": 0.4471, "step": 9440 }, { "epoch": 1.6841494960306842, "grad_norm": 0.5149518251419067, "learning_rate": 3.0098224873289086e-05, "loss": 0.5248, "step": 9441 }, { "epoch": 1.6843278922486844, "grad_norm": 0.5572359561920166, "learning_rate": 3.0064904819191162e-05, "loss": 0.6678, "step": 9442 }, { "epoch": 1.6845062884666846, "grad_norm": 0.5138469934463501, "learning_rate": 3.003160203870095e-05, "loss": 0.5152, "step": 9443 }, { "epoch": 1.6846846846846848, "grad_norm": 0.5161378383636475, "learning_rate": 2.9998316534434135e-05, "loss": 0.4903, "step": 9444 }, { "epoch": 1.684863080902685, "grad_norm": 0.49434447288513184, "learning_rate": 2.996504830900479e-05, "loss": 0.5285, "step": 9445 }, { "epoch": 1.6850414771206852, "grad_norm": 0.5236574411392212, "learning_rate": 2.9931797365025937e-05, "loss": 0.5159, "step": 9446 }, { "epoch": 1.6852198733386852, "grad_norm": 0.544776201248169, "learning_rate": 2.9898563705108932e-05, "loss": 0.6266, "step": 9447 }, { "epoch": 1.6853982695566854, "grad_norm": 0.5385255217552185, "learning_rate": 2.9865347331864106e-05, "loss": 0.5478, "step": 9448 }, { "epoch": 1.6855766657746856, "grad_norm": 0.5094165205955505, "learning_rate": 2.9832148247900092e-05, "loss": 0.5402, "step": 9449 }, { "epoch": 1.6857550619926858, "grad_norm": 0.5184796452522278, "learning_rate": 2.9798966455824473e-05, "loss": 0.4279, "step": 9450 }, { "epoch": 1.685933458210686, "grad_norm": 0.5272186398506165, "learning_rate": 2.9765801958243245e-05, "loss": 0.4309, "step": 9451 }, { "epoch": 1.6861118544286862, "grad_norm": 0.49219128489494324, "learning_rate": 2.973265475776113e-05, "loss": 0.4853, "step": 9452 }, { "epoch": 1.6862902506466861, "grad_norm": 0.46345192193984985, "learning_rate": 2.9699524856981574e-05, "loss": 0.5184, "step": 9453 }, { "epoch": 1.6864686468646863, "grad_norm": 0.528198778629303, "learning_rate": 2.966641225850647e-05, "loss": 0.572, "step": 9454 }, { "epoch": 1.6866470430826865, "grad_norm": 0.5292263627052307, "learning_rate": 2.963331696493657e-05, "loss": 0.4926, "step": 9455 }, { "epoch": 1.6868254393006867, "grad_norm": 0.48872697353363037, "learning_rate": 2.9600238978871097e-05, "loss": 0.3942, "step": 9456 }, { "epoch": 1.687003835518687, "grad_norm": 0.5367798805236816, "learning_rate": 2.9567178302908005e-05, "loss": 0.5765, "step": 9457 }, { "epoch": 1.6871822317366871, "grad_norm": 0.48006823658943176, "learning_rate": 2.953413493964391e-05, "loss": 0.4903, "step": 9458 }, { "epoch": 1.6873606279546873, "grad_norm": 0.4655410349369049, "learning_rate": 2.9501108891673928e-05, "loss": 0.3782, "step": 9459 }, { "epoch": 1.6875390241726875, "grad_norm": 0.49426302313804626, "learning_rate": 2.9468100161592043e-05, "loss": 0.5576, "step": 9460 }, { "epoch": 1.6877174203906877, "grad_norm": 0.4561796188354492, "learning_rate": 2.9435108751990596e-05, "loss": 0.4239, "step": 9461 }, { "epoch": 1.687895816608688, "grad_norm": 0.4534534513950348, "learning_rate": 2.940213466546085e-05, "loss": 0.3932, "step": 9462 }, { "epoch": 1.6880742128266881, "grad_norm": 0.5681625604629517, "learning_rate": 2.936917790459251e-05, "loss": 0.5814, "step": 9463 }, { "epoch": 1.6882526090446883, "grad_norm": 0.5786123275756836, "learning_rate": 2.9336238471973947e-05, "loss": 0.7368, "step": 9464 }, { "epoch": 1.6884310052626885, "grad_norm": 0.6435004472732544, "learning_rate": 2.930331637019229e-05, "loss": 0.6626, "step": 9465 }, { "epoch": 1.6886094014806887, "grad_norm": 0.4304559528827667, "learning_rate": 2.9270411601833162e-05, "loss": 0.3071, "step": 9466 }, { "epoch": 1.688787797698689, "grad_norm": 0.5037026405334473, "learning_rate": 2.9237524169480974e-05, "loss": 0.4809, "step": 9467 }, { "epoch": 1.6889661939166891, "grad_norm": 0.5338348150253296, "learning_rate": 2.9204654075718568e-05, "loss": 0.5474, "step": 9468 }, { "epoch": 1.689144590134689, "grad_norm": 0.44183140993118286, "learning_rate": 2.917180132312766e-05, "loss": 0.3869, "step": 9469 }, { "epoch": 1.6893229863526893, "grad_norm": 0.5616282224655151, "learning_rate": 2.913896591428841e-05, "loss": 0.5424, "step": 9470 }, { "epoch": 1.6895013825706895, "grad_norm": 0.5086612701416016, "learning_rate": 2.9106147851779785e-05, "loss": 0.5467, "step": 9471 }, { "epoch": 1.6896797787886897, "grad_norm": 0.44916772842407227, "learning_rate": 2.907334713817919e-05, "loss": 0.3491, "step": 9472 }, { "epoch": 1.6898581750066899, "grad_norm": 0.5420513153076172, "learning_rate": 2.9040563776062845e-05, "loss": 0.5353, "step": 9473 }, { "epoch": 1.69003657122469, "grad_norm": 0.4886205196380615, "learning_rate": 2.9007797768005606e-05, "loss": 0.4478, "step": 9474 }, { "epoch": 1.69021496744269, "grad_norm": 0.47686657309532166, "learning_rate": 2.897504911658075e-05, "loss": 0.4001, "step": 9475 }, { "epoch": 1.6903933636606903, "grad_norm": 0.5385986566543579, "learning_rate": 2.8942317824360493e-05, "loss": 0.6524, "step": 9476 }, { "epoch": 1.6905717598786905, "grad_norm": 0.5010984539985657, "learning_rate": 2.890960389391545e-05, "loss": 0.4801, "step": 9477 }, { "epoch": 1.6907501560966907, "grad_norm": 0.6029075384140015, "learning_rate": 2.887690732781492e-05, "loss": 0.6388, "step": 9478 }, { "epoch": 1.6909285523146909, "grad_norm": 0.47980138659477234, "learning_rate": 2.8844228128627e-05, "loss": 0.4344, "step": 9479 }, { "epoch": 1.691106948532691, "grad_norm": 0.5094255208969116, "learning_rate": 2.881156629891815e-05, "loss": 0.5046, "step": 9480 }, { "epoch": 1.6912853447506913, "grad_norm": 0.4894050061702728, "learning_rate": 2.8778921841253774e-05, "loss": 0.4561, "step": 9481 }, { "epoch": 1.6914637409686915, "grad_norm": 0.531238317489624, "learning_rate": 2.8746294758197622e-05, "loss": 0.5447, "step": 9482 }, { "epoch": 1.6916421371866917, "grad_norm": 0.5232047438621521, "learning_rate": 2.871368505231234e-05, "loss": 0.474, "step": 9483 }, { "epoch": 1.6918205334046919, "grad_norm": 0.4739258885383606, "learning_rate": 2.8681092726158957e-05, "loss": 0.4283, "step": 9484 }, { "epoch": 1.691998929622692, "grad_norm": 0.4757033884525299, "learning_rate": 2.8648517782297294e-05, "loss": 0.3738, "step": 9485 }, { "epoch": 1.6921773258406922, "grad_norm": 0.5776329636573792, "learning_rate": 2.861596022328583e-05, "loss": 0.7618, "step": 9486 }, { "epoch": 1.6923557220586924, "grad_norm": 0.5738844275474548, "learning_rate": 2.8583420051681545e-05, "loss": 0.678, "step": 9487 }, { "epoch": 1.6925341182766926, "grad_norm": 0.5281713604927063, "learning_rate": 2.8550897270040148e-05, "loss": 0.4907, "step": 9488 }, { "epoch": 1.6927125144946928, "grad_norm": 0.5512872338294983, "learning_rate": 2.851839188091604e-05, "loss": 0.6453, "step": 9489 }, { "epoch": 1.692890910712693, "grad_norm": 0.5240409970283508, "learning_rate": 2.8485903886862093e-05, "loss": 0.443, "step": 9490 }, { "epoch": 1.693069306930693, "grad_norm": 0.6053004860877991, "learning_rate": 2.845343329042996e-05, "loss": 0.568, "step": 9491 }, { "epoch": 1.6932477031486932, "grad_norm": 0.6063686013221741, "learning_rate": 2.842098009416977e-05, "loss": 0.5555, "step": 9492 }, { "epoch": 1.6934260993666934, "grad_norm": 0.5201789736747742, "learning_rate": 2.8388544300630538e-05, "loss": 0.515, "step": 9493 }, { "epoch": 1.6936044955846936, "grad_norm": 0.47415268421173096, "learning_rate": 2.8356125912359587e-05, "loss": 0.3263, "step": 9494 }, { "epoch": 1.6937828918026938, "grad_norm": 0.4585654139518738, "learning_rate": 2.832372493190319e-05, "loss": 0.3953, "step": 9495 }, { "epoch": 1.693961288020694, "grad_norm": 0.552780032157898, "learning_rate": 2.8291341361805973e-05, "loss": 0.5802, "step": 9496 }, { "epoch": 1.694139684238694, "grad_norm": 0.5424774885177612, "learning_rate": 2.8258975204611488e-05, "loss": 0.5196, "step": 9497 }, { "epoch": 1.6943180804566942, "grad_norm": 0.5058227777481079, "learning_rate": 2.8226626462861645e-05, "loss": 0.4721, "step": 9498 }, { "epoch": 1.6944964766746944, "grad_norm": 0.4840177297592163, "learning_rate": 2.819429513909705e-05, "loss": 0.5075, "step": 9499 }, { "epoch": 1.6946748728926946, "grad_norm": 0.47189828753471375, "learning_rate": 2.816198123585714e-05, "loss": 0.3598, "step": 9500 }, { "epoch": 1.6948532691106948, "grad_norm": 0.46200263500213623, "learning_rate": 2.81296847556797e-05, "loss": 0.4265, "step": 9501 }, { "epoch": 1.695031665328695, "grad_norm": 0.5357261300086975, "learning_rate": 2.809740570110142e-05, "loss": 0.4951, "step": 9502 }, { "epoch": 1.6952100615466952, "grad_norm": 0.5782269835472107, "learning_rate": 2.8065144074657322e-05, "loss": 0.586, "step": 9503 }, { "epoch": 1.6953884577646954, "grad_norm": 0.5674153566360474, "learning_rate": 2.8032899878881303e-05, "loss": 0.5702, "step": 9504 }, { "epoch": 1.6955668539826956, "grad_norm": 0.5033431649208069, "learning_rate": 2.8000673116305857e-05, "loss": 0.4803, "step": 9505 }, { "epoch": 1.6957452502006958, "grad_norm": 0.5172526836395264, "learning_rate": 2.7968463789461968e-05, "loss": 0.5727, "step": 9506 }, { "epoch": 1.695923646418696, "grad_norm": 0.5547386407852173, "learning_rate": 2.793627190087944e-05, "loss": 0.5522, "step": 9507 }, { "epoch": 1.6961020426366962, "grad_norm": 0.5478581786155701, "learning_rate": 2.7904097453086503e-05, "loss": 0.5495, "step": 9508 }, { "epoch": 1.6962804388546964, "grad_norm": 0.5385379791259766, "learning_rate": 2.7871940448610244e-05, "loss": 0.4659, "step": 9509 }, { "epoch": 1.6964588350726966, "grad_norm": 0.4548676609992981, "learning_rate": 2.7839800889976173e-05, "loss": 0.4397, "step": 9510 }, { "epoch": 1.6966372312906968, "grad_norm": 0.5475732684135437, "learning_rate": 2.7807678779708466e-05, "loss": 0.6873, "step": 9511 }, { "epoch": 1.696815627508697, "grad_norm": 0.49754053354263306, "learning_rate": 2.7775574120330132e-05, "loss": 0.4393, "step": 9512 }, { "epoch": 1.696994023726697, "grad_norm": 0.5146554708480835, "learning_rate": 2.7743486914362513e-05, "loss": 0.4761, "step": 9513 }, { "epoch": 1.6971724199446971, "grad_norm": 0.5417225956916809, "learning_rate": 2.771141716432585e-05, "loss": 0.5846, "step": 9514 }, { "epoch": 1.6973508161626973, "grad_norm": 0.5172662734985352, "learning_rate": 2.7679364872738753e-05, "loss": 0.4245, "step": 9515 }, { "epoch": 1.6975292123806975, "grad_norm": 0.5381408929824829, "learning_rate": 2.764733004211875e-05, "loss": 0.4776, "step": 9516 }, { "epoch": 1.6977076085986977, "grad_norm": 0.4274336099624634, "learning_rate": 2.7615312674981686e-05, "loss": 0.3263, "step": 9517 }, { "epoch": 1.697886004816698, "grad_norm": 0.5179285407066345, "learning_rate": 2.7583312773842267e-05, "loss": 0.5205, "step": 9518 }, { "epoch": 1.698064401034698, "grad_norm": 0.5593109726905823, "learning_rate": 2.7551330341213793e-05, "loss": 0.568, "step": 9519 }, { "epoch": 1.698242797252698, "grad_norm": 0.6157004833221436, "learning_rate": 2.7519365379608058e-05, "loss": 0.7443, "step": 9520 }, { "epoch": 1.6984211934706983, "grad_norm": 0.5052160620689392, "learning_rate": 2.7487417891535693e-05, "loss": 0.4443, "step": 9521 }, { "epoch": 1.6985995896886985, "grad_norm": 0.569677472114563, "learning_rate": 2.7455487879505747e-05, "loss": 0.5609, "step": 9522 }, { "epoch": 1.6987779859066987, "grad_norm": 0.45186012983322144, "learning_rate": 2.742357534602599e-05, "loss": 0.3359, "step": 9523 }, { "epoch": 1.698956382124699, "grad_norm": 0.5323873162269592, "learning_rate": 2.7391680293602866e-05, "loss": 0.5906, "step": 9524 }, { "epoch": 1.699134778342699, "grad_norm": 0.47437727451324463, "learning_rate": 2.7359802724741367e-05, "loss": 0.4004, "step": 9525 }, { "epoch": 1.6993131745606993, "grad_norm": 0.47239458560943604, "learning_rate": 2.7327942641945157e-05, "loss": 0.4497, "step": 9526 }, { "epoch": 1.6994915707786995, "grad_norm": 0.5352083444595337, "learning_rate": 2.7296100047716488e-05, "loss": 0.5579, "step": 9527 }, { "epoch": 1.6996699669966997, "grad_norm": 0.530881404876709, "learning_rate": 2.7264274944556328e-05, "loss": 0.5424, "step": 9528 }, { "epoch": 1.6998483632147, "grad_norm": 0.5889961123466492, "learning_rate": 2.7232467334964095e-05, "loss": 0.7217, "step": 9529 }, { "epoch": 1.7000267594327, "grad_norm": 0.5244048833847046, "learning_rate": 2.7200677221438098e-05, "loss": 0.5383, "step": 9530 }, { "epoch": 1.7002051556507003, "grad_norm": 0.5565990209579468, "learning_rate": 2.7168904606475005e-05, "loss": 0.642, "step": 9531 }, { "epoch": 1.7003835518687005, "grad_norm": 0.4610227644443512, "learning_rate": 2.7137149492570207e-05, "loss": 0.3604, "step": 9532 }, { "epoch": 1.7005619480867007, "grad_norm": 0.4633643329143524, "learning_rate": 2.7105411882217852e-05, "loss": 0.3937, "step": 9533 }, { "epoch": 1.700740344304701, "grad_norm": 0.5488377213478088, "learning_rate": 2.7073691777910387e-05, "loss": 0.5148, "step": 9534 }, { "epoch": 1.7009187405227009, "grad_norm": 0.5138922929763794, "learning_rate": 2.7041989182139375e-05, "loss": 0.4672, "step": 9535 }, { "epoch": 1.701097136740701, "grad_norm": 0.5196233987808228, "learning_rate": 2.7010304097394578e-05, "loss": 0.4599, "step": 9536 }, { "epoch": 1.7012755329587013, "grad_norm": 0.45630475878715515, "learning_rate": 2.6978636526164474e-05, "loss": 0.3845, "step": 9537 }, { "epoch": 1.7014539291767015, "grad_norm": 0.5754191279411316, "learning_rate": 2.6946986470936352e-05, "loss": 0.7097, "step": 9538 }, { "epoch": 1.7016323253947017, "grad_norm": 0.5594825148582458, "learning_rate": 2.6915353934195864e-05, "loss": 0.6275, "step": 9539 }, { "epoch": 1.7018107216127019, "grad_norm": 0.5201573967933655, "learning_rate": 2.688373891842752e-05, "loss": 0.5501, "step": 9540 }, { "epoch": 1.7019891178307018, "grad_norm": 0.5017893314361572, "learning_rate": 2.685214142611428e-05, "loss": 0.4429, "step": 9541 }, { "epoch": 1.702167514048702, "grad_norm": 0.5738409161567688, "learning_rate": 2.682056145973785e-05, "loss": 0.6522, "step": 9542 }, { "epoch": 1.7023459102667022, "grad_norm": 0.5462552905082703, "learning_rate": 2.6788999021778506e-05, "loss": 0.7407, "step": 9543 }, { "epoch": 1.7025243064847024, "grad_norm": 0.5017895102500916, "learning_rate": 2.6757454114715058e-05, "loss": 0.4998, "step": 9544 }, { "epoch": 1.7027027027027026, "grad_norm": 0.47052887082099915, "learning_rate": 2.6725926741025143e-05, "loss": 0.3548, "step": 9545 }, { "epoch": 1.7028810989207028, "grad_norm": 0.5245179533958435, "learning_rate": 2.6694416903184805e-05, "loss": 0.5281, "step": 9546 }, { "epoch": 1.703059495138703, "grad_norm": 0.4874159097671509, "learning_rate": 2.6662924603668927e-05, "loss": 0.443, "step": 9547 }, { "epoch": 1.7032378913567032, "grad_norm": 0.5207834243774414, "learning_rate": 2.6631449844950806e-05, "loss": 0.4949, "step": 9548 }, { "epoch": 1.7034162875747034, "grad_norm": 0.43551138043403625, "learning_rate": 2.6599992629502466e-05, "loss": 0.3538, "step": 9549 }, { "epoch": 1.7035946837927036, "grad_norm": 0.49214231967926025, "learning_rate": 2.656855295979463e-05, "loss": 0.4541, "step": 9550 }, { "epoch": 1.7037730800107038, "grad_norm": 0.4991733729839325, "learning_rate": 2.6537130838296452e-05, "loss": 0.3988, "step": 9551 }, { "epoch": 1.703951476228704, "grad_norm": 0.5545747876167297, "learning_rate": 2.650572626747588e-05, "loss": 0.5933, "step": 9552 }, { "epoch": 1.7041298724467042, "grad_norm": 0.542052686214447, "learning_rate": 2.6474339249799362e-05, "loss": 0.5359, "step": 9553 }, { "epoch": 1.7043082686647044, "grad_norm": 0.5299772620201111, "learning_rate": 2.6442969787732085e-05, "loss": 0.5399, "step": 9554 }, { "epoch": 1.7044866648827046, "grad_norm": 0.490618497133255, "learning_rate": 2.641161788373772e-05, "loss": 0.4699, "step": 9555 }, { "epoch": 1.7046650611007048, "grad_norm": 0.5627853870391846, "learning_rate": 2.6380283540278714e-05, "loss": 0.6102, "step": 9556 }, { "epoch": 1.7048434573187048, "grad_norm": 0.6248000860214233, "learning_rate": 2.634896675981599e-05, "loss": 0.5612, "step": 9557 }, { "epoch": 1.705021853536705, "grad_norm": 0.5680274367332458, "learning_rate": 2.6317667544809132e-05, "loss": 0.5528, "step": 9558 }, { "epoch": 1.7052002497547052, "grad_norm": 0.5207681059837341, "learning_rate": 2.628638589771648e-05, "loss": 0.526, "step": 9559 }, { "epoch": 1.7053786459727054, "grad_norm": 0.4698508381843567, "learning_rate": 2.6255121820994737e-05, "loss": 0.4154, "step": 9560 }, { "epoch": 1.7055570421907056, "grad_norm": 0.45155656337738037, "learning_rate": 2.6223875317099492e-05, "loss": 0.4199, "step": 9561 }, { "epoch": 1.7057354384087058, "grad_norm": 0.5513625741004944, "learning_rate": 2.6192646388484732e-05, "loss": 0.5584, "step": 9562 }, { "epoch": 1.7059138346267058, "grad_norm": 0.4611358642578125, "learning_rate": 2.6161435037603264e-05, "loss": 0.3435, "step": 9563 }, { "epoch": 1.706092230844706, "grad_norm": 0.5059071183204651, "learning_rate": 2.61302412669063e-05, "loss": 0.4466, "step": 9564 }, { "epoch": 1.7062706270627062, "grad_norm": 0.5149518251419067, "learning_rate": 2.609906507884388e-05, "loss": 0.4098, "step": 9565 }, { "epoch": 1.7064490232807064, "grad_norm": 0.5157696604728699, "learning_rate": 2.606790647586457e-05, "loss": 0.4633, "step": 9566 }, { "epoch": 1.7066274194987066, "grad_norm": 0.47338274121284485, "learning_rate": 2.6036765460415447e-05, "loss": 0.4056, "step": 9567 }, { "epoch": 1.7068058157167068, "grad_norm": 0.49794062972068787, "learning_rate": 2.6005642034942463e-05, "loss": 0.5416, "step": 9568 }, { "epoch": 1.706984211934707, "grad_norm": 0.6232036352157593, "learning_rate": 2.597453620188997e-05, "loss": 0.6416, "step": 9569 }, { "epoch": 1.7071626081527072, "grad_norm": 0.5632308125495911, "learning_rate": 2.5943447963700933e-05, "loss": 0.5564, "step": 9570 }, { "epoch": 1.7073410043707073, "grad_norm": 0.5408989191055298, "learning_rate": 2.591237732281712e-05, "loss": 0.5595, "step": 9571 }, { "epoch": 1.7075194005887075, "grad_norm": 0.5331925749778748, "learning_rate": 2.588132428167869e-05, "loss": 0.567, "step": 9572 }, { "epoch": 1.7076977968067077, "grad_norm": 0.4525754749774933, "learning_rate": 2.5850288842724696e-05, "loss": 0.3582, "step": 9573 }, { "epoch": 1.707876193024708, "grad_norm": 0.4124913513660431, "learning_rate": 2.5819271008392486e-05, "loss": 0.3726, "step": 9574 }, { "epoch": 1.7080545892427081, "grad_norm": 0.45310550928115845, "learning_rate": 2.578827078111831e-05, "loss": 0.4381, "step": 9575 }, { "epoch": 1.7082329854607083, "grad_norm": 0.45527383685112, "learning_rate": 2.5757288163336808e-05, "loss": 0.4271, "step": 9576 }, { "epoch": 1.7084113816787085, "grad_norm": 0.5173305869102478, "learning_rate": 2.572632315748144e-05, "loss": 0.5474, "step": 9577 }, { "epoch": 1.7085897778967087, "grad_norm": 0.4721916913986206, "learning_rate": 2.569537576598416e-05, "loss": 0.37, "step": 9578 }, { "epoch": 1.7087681741147087, "grad_norm": 0.503395676612854, "learning_rate": 2.5664445991275486e-05, "loss": 0.5476, "step": 9579 }, { "epoch": 1.708946570332709, "grad_norm": 0.4126671254634857, "learning_rate": 2.563353383578468e-05, "loss": 0.3101, "step": 9580 }, { "epoch": 1.7091249665507091, "grad_norm": 0.546122133731842, "learning_rate": 2.560263930193957e-05, "loss": 0.4748, "step": 9581 }, { "epoch": 1.7093033627687093, "grad_norm": 0.4883154332637787, "learning_rate": 2.557176239216666e-05, "loss": 0.465, "step": 9582 }, { "epoch": 1.7094817589867095, "grad_norm": 0.5338440537452698, "learning_rate": 2.554090310889093e-05, "loss": 0.4543, "step": 9583 }, { "epoch": 1.7096601552047097, "grad_norm": 0.49619346857070923, "learning_rate": 2.5510061454536048e-05, "loss": 0.4506, "step": 9584 }, { "epoch": 1.7098385514227097, "grad_norm": 0.5309063196182251, "learning_rate": 2.5479237431524387e-05, "loss": 0.6047, "step": 9585 }, { "epoch": 1.7100169476407099, "grad_norm": 0.4647756516933441, "learning_rate": 2.5448431042276732e-05, "loss": 0.3435, "step": 9586 }, { "epoch": 1.71019534385871, "grad_norm": 0.5021915435791016, "learning_rate": 2.5417642289212756e-05, "loss": 0.5529, "step": 9587 }, { "epoch": 1.7103737400767103, "grad_norm": 0.5375409126281738, "learning_rate": 2.5386871174750415e-05, "loss": 0.5268, "step": 9588 }, { "epoch": 1.7105521362947105, "grad_norm": 0.5530412793159485, "learning_rate": 2.5356117701306637e-05, "loss": 0.4953, "step": 9589 }, { "epoch": 1.7107305325127107, "grad_norm": 0.5221925973892212, "learning_rate": 2.532538187129668e-05, "loss": 0.536, "step": 9590 }, { "epoch": 1.7109089287307109, "grad_norm": 0.46529194712638855, "learning_rate": 2.5294663687134512e-05, "loss": 0.436, "step": 9591 }, { "epoch": 1.711087324948711, "grad_norm": 0.5522665977478027, "learning_rate": 2.5263963151232806e-05, "loss": 0.4987, "step": 9592 }, { "epoch": 1.7112657211667113, "grad_norm": 0.5286535024642944, "learning_rate": 2.523328026600266e-05, "loss": 0.5313, "step": 9593 }, { "epoch": 1.7114441173847115, "grad_norm": 0.476859986782074, "learning_rate": 2.5202615033854016e-05, "loss": 0.4428, "step": 9594 }, { "epoch": 1.7116225136027117, "grad_norm": 0.5317862629890442, "learning_rate": 2.5171967457195217e-05, "loss": 0.5632, "step": 9595 }, { "epoch": 1.7118009098207119, "grad_norm": 0.5589151978492737, "learning_rate": 2.5141337538433312e-05, "loss": 0.6869, "step": 9596 }, { "epoch": 1.711979306038712, "grad_norm": 0.4832451343536377, "learning_rate": 2.5110725279974074e-05, "loss": 0.4022, "step": 9597 }, { "epoch": 1.7121577022567123, "grad_norm": 0.45813506841659546, "learning_rate": 2.508013068422163e-05, "loss": 0.3612, "step": 9598 }, { "epoch": 1.7123360984747125, "grad_norm": 0.4622073471546173, "learning_rate": 2.504955375357895e-05, "loss": 0.3288, "step": 9599 }, { "epoch": 1.7125144946927127, "grad_norm": 0.5384669899940491, "learning_rate": 2.5018994490447505e-05, "loss": 0.4338, "step": 9600 }, { "epoch": 1.7126928909107126, "grad_norm": 0.5117572546005249, "learning_rate": 2.4988452897227454e-05, "loss": 0.4504, "step": 9601 }, { "epoch": 1.7128712871287128, "grad_norm": 0.5058663487434387, "learning_rate": 2.495792897631749e-05, "loss": 0.41, "step": 9602 }, { "epoch": 1.713049683346713, "grad_norm": 0.481413871049881, "learning_rate": 2.492742273011489e-05, "loss": 0.4433, "step": 9603 }, { "epoch": 1.7132280795647132, "grad_norm": 0.5749334692955017, "learning_rate": 2.4896934161015683e-05, "loss": 0.542, "step": 9604 }, { "epoch": 1.7134064757827134, "grad_norm": 0.4859156012535095, "learning_rate": 2.486646327141437e-05, "loss": 0.4481, "step": 9605 }, { "epoch": 1.7135848720007136, "grad_norm": 0.5681634545326233, "learning_rate": 2.4836010063704174e-05, "loss": 0.5799, "step": 9606 }, { "epoch": 1.7137632682187136, "grad_norm": 0.5673406720161438, "learning_rate": 2.4805574540276822e-05, "loss": 0.659, "step": 9607 }, { "epoch": 1.7139416644367138, "grad_norm": 0.5422623753547668, "learning_rate": 2.477515670352279e-05, "loss": 0.4897, "step": 9608 }, { "epoch": 1.714120060654714, "grad_norm": 0.4804922044277191, "learning_rate": 2.4744756555830972e-05, "loss": 0.4234, "step": 9609 }, { "epoch": 1.7142984568727142, "grad_norm": 0.5365844368934631, "learning_rate": 2.4714374099589042e-05, "loss": 0.5374, "step": 9610 }, { "epoch": 1.7144768530907144, "grad_norm": 0.5775295495986938, "learning_rate": 2.468400933718326e-05, "loss": 0.5003, "step": 9611 }, { "epoch": 1.7146552493087146, "grad_norm": 0.5075268149375916, "learning_rate": 2.4653662270998383e-05, "loss": 0.4016, "step": 9612 }, { "epoch": 1.7148336455267148, "grad_norm": 0.5659419298171997, "learning_rate": 2.462333290341795e-05, "loss": 0.6249, "step": 9613 }, { "epoch": 1.715012041744715, "grad_norm": 0.5081014633178711, "learning_rate": 2.4593021236823914e-05, "loss": 0.394, "step": 9614 }, { "epoch": 1.7151904379627152, "grad_norm": 0.5532371401786804, "learning_rate": 2.456272727359704e-05, "loss": 0.606, "step": 9615 }, { "epoch": 1.7153688341807154, "grad_norm": 0.5273371934890747, "learning_rate": 2.4532451016116535e-05, "loss": 0.6084, "step": 9616 }, { "epoch": 1.7155472303987156, "grad_norm": 0.5197193622589111, "learning_rate": 2.4502192466760276e-05, "loss": 0.5743, "step": 9617 }, { "epoch": 1.7157256266167158, "grad_norm": 0.5154910683631897, "learning_rate": 2.4471951627904804e-05, "loss": 0.5972, "step": 9618 }, { "epoch": 1.715904022834716, "grad_norm": 0.5434054732322693, "learning_rate": 2.4441728501925165e-05, "loss": 0.5537, "step": 9619 }, { "epoch": 1.7160824190527162, "grad_norm": 0.5460578799247742, "learning_rate": 2.4411523091195153e-05, "loss": 0.6007, "step": 9620 }, { "epoch": 1.7162608152707164, "grad_norm": 0.4608527421951294, "learning_rate": 2.4381335398086985e-05, "loss": 0.3695, "step": 9621 }, { "epoch": 1.7164392114887166, "grad_norm": 0.6361474990844727, "learning_rate": 2.4351165424971706e-05, "loss": 0.7393, "step": 9622 }, { "epoch": 1.7166176077067166, "grad_norm": 0.5027257204055786, "learning_rate": 2.4321013174218782e-05, "loss": 0.4491, "step": 9623 }, { "epoch": 1.7167960039247168, "grad_norm": 0.6172974705696106, "learning_rate": 2.4290878648196318e-05, "loss": 0.6406, "step": 9624 }, { "epoch": 1.716974400142717, "grad_norm": 0.48582372069358826, "learning_rate": 2.4260761849271116e-05, "loss": 0.5167, "step": 9625 }, { "epoch": 1.7171527963607172, "grad_norm": 0.5107640624046326, "learning_rate": 2.4230662779808538e-05, "loss": 0.4779, "step": 9626 }, { "epoch": 1.7173311925787174, "grad_norm": 0.591314971446991, "learning_rate": 2.4200581442172576e-05, "loss": 0.6189, "step": 9627 }, { "epoch": 1.7175095887967176, "grad_norm": 0.5267913937568665, "learning_rate": 2.4170517838725816e-05, "loss": 0.4974, "step": 9628 }, { "epoch": 1.7176879850147175, "grad_norm": 0.5801319479942322, "learning_rate": 2.4140471971829338e-05, "loss": 0.5212, "step": 9629 }, { "epoch": 1.7178663812327177, "grad_norm": 0.5426841974258423, "learning_rate": 2.4110443843843034e-05, "loss": 0.4802, "step": 9630 }, { "epoch": 1.718044777450718, "grad_norm": 0.5340462327003479, "learning_rate": 2.408043345712521e-05, "loss": 0.5782, "step": 9631 }, { "epoch": 1.7182231736687181, "grad_norm": 0.5407845973968506, "learning_rate": 2.4050440814032986e-05, "loss": 0.5252, "step": 9632 }, { "epoch": 1.7184015698867183, "grad_norm": 0.5170387625694275, "learning_rate": 2.4020465916921862e-05, "loss": 0.5455, "step": 9633 }, { "epoch": 1.7185799661047185, "grad_norm": 0.45365792512893677, "learning_rate": 2.3990508768146125e-05, "loss": 0.3647, "step": 9634 }, { "epoch": 1.7187583623227187, "grad_norm": 0.5068604946136475, "learning_rate": 2.3960569370058528e-05, "loss": 0.5216, "step": 9635 }, { "epoch": 1.718936758540719, "grad_norm": 0.6078252792358398, "learning_rate": 2.3930647725010607e-05, "loss": 0.7029, "step": 9636 }, { "epoch": 1.7191151547587191, "grad_norm": 0.4622660279273987, "learning_rate": 2.3900743835352316e-05, "loss": 0.4294, "step": 9637 }, { "epoch": 1.7192935509767193, "grad_norm": 0.45116907358169556, "learning_rate": 2.387085770343225e-05, "loss": 0.3998, "step": 9638 }, { "epoch": 1.7194719471947195, "grad_norm": 0.4865681231021881, "learning_rate": 2.3840989331597757e-05, "loss": 0.424, "step": 9639 }, { "epoch": 1.7196503434127197, "grad_norm": 0.5520277619361877, "learning_rate": 2.3811138722194593e-05, "loss": 0.5527, "step": 9640 }, { "epoch": 1.71982873963072, "grad_norm": 0.4993618130683899, "learning_rate": 2.378130587756727e-05, "loss": 0.4283, "step": 9641 }, { "epoch": 1.7200071358487201, "grad_norm": 0.5111703276634216, "learning_rate": 2.3751490800058865e-05, "loss": 0.4524, "step": 9642 }, { "epoch": 1.7201855320667203, "grad_norm": 0.554426372051239, "learning_rate": 2.3721693492010977e-05, "loss": 0.5797, "step": 9643 }, { "epoch": 1.7203639282847205, "grad_norm": 0.4680947959423065, "learning_rate": 2.369191395576395e-05, "loss": 0.3494, "step": 9644 }, { "epoch": 1.7205423245027205, "grad_norm": 0.5217585563659668, "learning_rate": 2.3662152193656554e-05, "loss": 0.6172, "step": 9645 }, { "epoch": 1.7207207207207207, "grad_norm": 0.5365320444107056, "learning_rate": 2.3632408208026395e-05, "loss": 0.5242, "step": 9646 }, { "epoch": 1.7208991169387209, "grad_norm": 0.6032186150550842, "learning_rate": 2.3602682001209408e-05, "loss": 0.6657, "step": 9647 }, { "epoch": 1.721077513156721, "grad_norm": 0.5880352258682251, "learning_rate": 2.3572973575540418e-05, "loss": 0.665, "step": 9648 }, { "epoch": 1.7212559093747213, "grad_norm": 0.5372354388237, "learning_rate": 2.3543282933352645e-05, "loss": 0.4893, "step": 9649 }, { "epoch": 1.7214343055927215, "grad_norm": 0.44951343536376953, "learning_rate": 2.3513610076977916e-05, "loss": 0.3493, "step": 9650 }, { "epoch": 1.7216127018107215, "grad_norm": 0.49766871333122253, "learning_rate": 2.348395500874684e-05, "loss": 0.4823, "step": 9651 }, { "epoch": 1.7217910980287217, "grad_norm": 0.5321714282035828, "learning_rate": 2.345431773098841e-05, "loss": 0.5048, "step": 9652 }, { "epoch": 1.7219694942467219, "grad_norm": 0.5095254182815552, "learning_rate": 2.342469824603044e-05, "loss": 0.4675, "step": 9653 }, { "epoch": 1.722147890464722, "grad_norm": 0.48094990849494934, "learning_rate": 2.339509655619909e-05, "loss": 0.4484, "step": 9654 }, { "epoch": 1.7223262866827223, "grad_norm": 0.5008235573768616, "learning_rate": 2.336551266381942e-05, "loss": 0.3872, "step": 9655 }, { "epoch": 1.7225046829007225, "grad_norm": 0.552046537399292, "learning_rate": 2.3335946571214795e-05, "loss": 0.6718, "step": 9656 }, { "epoch": 1.7226830791187226, "grad_norm": 0.5397679805755615, "learning_rate": 2.330639828070738e-05, "loss": 0.4931, "step": 9657 }, { "epoch": 1.7228614753367228, "grad_norm": 0.5258644819259644, "learning_rate": 2.3276867794617936e-05, "loss": 0.3977, "step": 9658 }, { "epoch": 1.723039871554723, "grad_norm": 0.5579936504364014, "learning_rate": 2.3247355115265684e-05, "loss": 0.5178, "step": 9659 }, { "epoch": 1.7232182677727232, "grad_norm": 0.53934246301651, "learning_rate": 2.3217860244968638e-05, "loss": 0.5224, "step": 9660 }, { "epoch": 1.7233966639907234, "grad_norm": 0.5465741157531738, "learning_rate": 2.3188383186043187e-05, "loss": 0.5329, "step": 9661 }, { "epoch": 1.7235750602087236, "grad_norm": 0.6120946407318115, "learning_rate": 2.3158923940804572e-05, "loss": 0.7921, "step": 9662 }, { "epoch": 1.7237534564267238, "grad_norm": 0.5330802798271179, "learning_rate": 2.3129482511566463e-05, "loss": 0.5436, "step": 9663 }, { "epoch": 1.723931852644724, "grad_norm": 0.4863676428794861, "learning_rate": 2.3100058900641125e-05, "loss": 0.4046, "step": 9664 }, { "epoch": 1.7241102488627242, "grad_norm": 0.43972915410995483, "learning_rate": 2.3070653110339567e-05, "loss": 0.4352, "step": 9665 }, { "epoch": 1.7242886450807244, "grad_norm": 0.5199171304702759, "learning_rate": 2.3041265142971196e-05, "loss": 0.486, "step": 9666 }, { "epoch": 1.7244670412987244, "grad_norm": 0.4947983920574188, "learning_rate": 2.3011895000844247e-05, "loss": 0.4544, "step": 9667 }, { "epoch": 1.7246454375167246, "grad_norm": 0.565777599811554, "learning_rate": 2.298254268626532e-05, "loss": 0.5195, "step": 9668 }, { "epoch": 1.7248238337347248, "grad_norm": 0.4736662805080414, "learning_rate": 2.2953208201539873e-05, "loss": 0.3791, "step": 9669 }, { "epoch": 1.725002229952725, "grad_norm": 0.5035544633865356, "learning_rate": 2.292389154897173e-05, "loss": 0.488, "step": 9670 }, { "epoch": 1.7251806261707252, "grad_norm": 0.6010121703147888, "learning_rate": 2.2894592730863335e-05, "loss": 0.6792, "step": 9671 }, { "epoch": 1.7253590223887254, "grad_norm": 0.47702497243881226, "learning_rate": 2.2865311749515978e-05, "loss": 0.3775, "step": 9672 }, { "epoch": 1.7255374186067254, "grad_norm": 0.5288291573524475, "learning_rate": 2.2836048607229264e-05, "loss": 0.4574, "step": 9673 }, { "epoch": 1.7257158148247256, "grad_norm": 0.5164685845375061, "learning_rate": 2.2806803306301583e-05, "loss": 0.4143, "step": 9674 }, { "epoch": 1.7258942110427258, "grad_norm": 0.5557328462600708, "learning_rate": 2.2777575849029785e-05, "loss": 0.5724, "step": 9675 }, { "epoch": 1.726072607260726, "grad_norm": 0.5354199409484863, "learning_rate": 2.2748366237709372e-05, "loss": 0.499, "step": 9676 }, { "epoch": 1.7262510034787262, "grad_norm": 0.5438181757926941, "learning_rate": 2.271917447463451e-05, "loss": 0.6275, "step": 9677 }, { "epoch": 1.7264293996967264, "grad_norm": 0.5154425501823425, "learning_rate": 2.2690000562097858e-05, "loss": 0.4563, "step": 9678 }, { "epoch": 1.7266077959147266, "grad_norm": 0.58868008852005, "learning_rate": 2.2660844502390754e-05, "loss": 0.632, "step": 9679 }, { "epoch": 1.7267861921327268, "grad_norm": 0.541050136089325, "learning_rate": 2.263170629780306e-05, "loss": 0.59, "step": 9680 }, { "epoch": 1.726964588350727, "grad_norm": 0.5016245245933533, "learning_rate": 2.2602585950623367e-05, "loss": 0.4542, "step": 9681 }, { "epoch": 1.7271429845687272, "grad_norm": 0.5481529235839844, "learning_rate": 2.25734834631387e-05, "loss": 0.6274, "step": 9682 }, { "epoch": 1.7273213807867274, "grad_norm": 0.5226790308952332, "learning_rate": 2.2544398837634732e-05, "loss": 0.4746, "step": 9683 }, { "epoch": 1.7274997770047276, "grad_norm": 0.5335855484008789, "learning_rate": 2.2515332076395862e-05, "loss": 0.4868, "step": 9684 }, { "epoch": 1.7276781732227278, "grad_norm": 0.5473877787590027, "learning_rate": 2.2486283181704842e-05, "loss": 0.4419, "step": 9685 }, { "epoch": 1.727856569440728, "grad_norm": 0.5562229752540588, "learning_rate": 2.2457252155843293e-05, "loss": 0.6055, "step": 9686 }, { "epoch": 1.7280349656587282, "grad_norm": 0.5625514984130859, "learning_rate": 2.24282390010912e-05, "loss": 0.5543, "step": 9687 }, { "epoch": 1.7282133618767284, "grad_norm": 0.5255547165870667, "learning_rate": 2.2399243719727265e-05, "loss": 0.5099, "step": 9688 }, { "epoch": 1.7283917580947283, "grad_norm": 0.6275233626365662, "learning_rate": 2.237026631402883e-05, "loss": 0.5615, "step": 9689 }, { "epoch": 1.7285701543127285, "grad_norm": 0.5601621866226196, "learning_rate": 2.234130678627169e-05, "loss": 0.5324, "step": 9690 }, { "epoch": 1.7287485505307287, "grad_norm": 0.5600411891937256, "learning_rate": 2.231236513873039e-05, "loss": 0.5877, "step": 9691 }, { "epoch": 1.728926946748729, "grad_norm": 0.5259904861450195, "learning_rate": 2.228344137367791e-05, "loss": 0.5547, "step": 9692 }, { "epoch": 1.7291053429667291, "grad_norm": 0.4963001012802124, "learning_rate": 2.225453549338599e-05, "loss": 0.4832, "step": 9693 }, { "epoch": 1.7292837391847293, "grad_norm": 0.534983217716217, "learning_rate": 2.222564750012479e-05, "loss": 0.5112, "step": 9694 }, { "epoch": 1.7294621354027293, "grad_norm": 0.47947463393211365, "learning_rate": 2.219677739616327e-05, "loss": 0.4251, "step": 9695 }, { "epoch": 1.7296405316207295, "grad_norm": 0.5085344314575195, "learning_rate": 2.216792518376884e-05, "loss": 0.4215, "step": 9696 }, { "epoch": 1.7298189278387297, "grad_norm": 0.48446446657180786, "learning_rate": 2.213909086520746e-05, "loss": 0.443, "step": 9697 }, { "epoch": 1.72999732405673, "grad_norm": 0.4940386116504669, "learning_rate": 2.2110274442743853e-05, "loss": 0.4204, "step": 9698 }, { "epoch": 1.73017572027473, "grad_norm": 0.5226745009422302, "learning_rate": 2.2081475918641208e-05, "loss": 0.5349, "step": 9699 }, { "epoch": 1.7303541164927303, "grad_norm": 0.47551804780960083, "learning_rate": 2.2052695295161407e-05, "loss": 0.4946, "step": 9700 }, { "epoch": 1.7305325127107305, "grad_norm": 0.5348621606826782, "learning_rate": 2.2023932574564754e-05, "loss": 0.5195, "step": 9701 }, { "epoch": 1.7307109089287307, "grad_norm": 0.5494500398635864, "learning_rate": 2.199518775911036e-05, "loss": 0.5932, "step": 9702 }, { "epoch": 1.730889305146731, "grad_norm": 0.5604888796806335, "learning_rate": 2.196646085105583e-05, "loss": 0.6247, "step": 9703 }, { "epoch": 1.731067701364731, "grad_norm": 0.5602217316627502, "learning_rate": 2.1937751852657285e-05, "loss": 0.7553, "step": 9704 }, { "epoch": 1.7312460975827313, "grad_norm": 0.5081519484519958, "learning_rate": 2.190906076616961e-05, "loss": 0.4189, "step": 9705 }, { "epoch": 1.7314244938007315, "grad_norm": 0.5131904482841492, "learning_rate": 2.188038759384611e-05, "loss": 0.4722, "step": 9706 }, { "epoch": 1.7316028900187317, "grad_norm": 0.47898441553115845, "learning_rate": 2.1851732337938855e-05, "loss": 0.393, "step": 9707 }, { "epoch": 1.7317812862367319, "grad_norm": 0.4974140524864197, "learning_rate": 2.1823095000698346e-05, "loss": 0.5102, "step": 9708 }, { "epoch": 1.731959682454732, "grad_norm": 0.5431427955627441, "learning_rate": 2.1794475584373723e-05, "loss": 0.4054, "step": 9709 }, { "epoch": 1.7321380786727323, "grad_norm": 0.5118406414985657, "learning_rate": 2.1765874091212834e-05, "loss": 0.5844, "step": 9710 }, { "epoch": 1.7323164748907325, "grad_norm": 0.5889133214950562, "learning_rate": 2.1737290523461932e-05, "loss": 0.662, "step": 9711 }, { "epoch": 1.7324948711087325, "grad_norm": 0.5474311113357544, "learning_rate": 2.170872488336606e-05, "loss": 0.5537, "step": 9712 }, { "epoch": 1.7326732673267327, "grad_norm": 0.5133411884307861, "learning_rate": 2.1680177173168615e-05, "loss": 0.5039, "step": 9713 }, { "epoch": 1.7328516635447329, "grad_norm": 0.5092921257019043, "learning_rate": 2.1651647395111884e-05, "loss": 0.4727, "step": 9714 }, { "epoch": 1.733030059762733, "grad_norm": 0.521981954574585, "learning_rate": 2.1623135551436442e-05, "loss": 0.5204, "step": 9715 }, { "epoch": 1.7332084559807333, "grad_norm": 0.48075124621391296, "learning_rate": 2.1594641644381684e-05, "loss": 0.4316, "step": 9716 }, { "epoch": 1.7333868521987332, "grad_norm": 0.5798913836479187, "learning_rate": 2.156616567618544e-05, "loss": 0.6439, "step": 9717 }, { "epoch": 1.7335652484167334, "grad_norm": 0.42159613966941833, "learning_rate": 2.153770764908425e-05, "loss": 0.3031, "step": 9718 }, { "epoch": 1.7337436446347336, "grad_norm": 0.7180142998695374, "learning_rate": 2.150926756531324e-05, "loss": 0.4069, "step": 9719 }, { "epoch": 1.7339220408527338, "grad_norm": 0.4658437669277191, "learning_rate": 2.148084542710596e-05, "loss": 0.4876, "step": 9720 }, { "epoch": 1.734100437070734, "grad_norm": 0.48459357023239136, "learning_rate": 2.1452441236694792e-05, "loss": 0.4056, "step": 9721 }, { "epoch": 1.7342788332887342, "grad_norm": 0.5127313733100891, "learning_rate": 2.142405499631056e-05, "loss": 0.4503, "step": 9722 }, { "epoch": 1.7344572295067344, "grad_norm": 0.47491639852523804, "learning_rate": 2.139568670818262e-05, "loss": 0.4279, "step": 9723 }, { "epoch": 1.7346356257247346, "grad_norm": 0.5612772107124329, "learning_rate": 2.136733637453911e-05, "loss": 0.6575, "step": 9724 }, { "epoch": 1.7348140219427348, "grad_norm": 0.566277027130127, "learning_rate": 2.1339003997606577e-05, "loss": 0.628, "step": 9725 }, { "epoch": 1.734992418160735, "grad_norm": 0.5447231531143188, "learning_rate": 2.1310689579610327e-05, "loss": 0.5066, "step": 9726 }, { "epoch": 1.7351708143787352, "grad_norm": 0.463008850812912, "learning_rate": 2.1282393122774053e-05, "loss": 0.3353, "step": 9727 }, { "epoch": 1.7353492105967354, "grad_norm": 0.4631538689136505, "learning_rate": 2.1254114629320227e-05, "loss": 0.3234, "step": 9728 }, { "epoch": 1.7355276068147356, "grad_norm": 0.5900436043739319, "learning_rate": 2.1225854101469794e-05, "loss": 0.5786, "step": 9729 }, { "epoch": 1.7357060030327358, "grad_norm": 0.4738953113555908, "learning_rate": 2.1197611541442313e-05, "loss": 0.4288, "step": 9730 }, { "epoch": 1.735884399250736, "grad_norm": 0.4944877028465271, "learning_rate": 2.116938695145598e-05, "loss": 0.3708, "step": 9731 }, { "epoch": 1.7360627954687362, "grad_norm": 0.49892377853393555, "learning_rate": 2.11411803337275e-05, "loss": 0.4571, "step": 9732 }, { "epoch": 1.7362411916867364, "grad_norm": 0.5799189209938049, "learning_rate": 2.1112991690472234e-05, "loss": 0.5227, "step": 9733 }, { "epoch": 1.7364195879047364, "grad_norm": 0.5338386297225952, "learning_rate": 2.1084821023904133e-05, "loss": 0.4744, "step": 9734 }, { "epoch": 1.7365979841227366, "grad_norm": 1.029298186302185, "learning_rate": 2.1056668336235624e-05, "loss": 0.5359, "step": 9735 }, { "epoch": 1.7367763803407368, "grad_norm": 0.520614504814148, "learning_rate": 2.1028533629677937e-05, "loss": 0.5175, "step": 9736 }, { "epoch": 1.736954776558737, "grad_norm": 0.5271267890930176, "learning_rate": 2.1000416906440613e-05, "loss": 0.5389, "step": 9737 }, { "epoch": 1.7371331727767372, "grad_norm": 0.5082148313522339, "learning_rate": 2.0972318168732048e-05, "loss": 0.4963, "step": 9738 }, { "epoch": 1.7373115689947372, "grad_norm": 0.42362144589424133, "learning_rate": 2.094423741875903e-05, "loss": 0.4004, "step": 9739 }, { "epoch": 1.7374899652127374, "grad_norm": 0.5850268602371216, "learning_rate": 2.0916174658727054e-05, "loss": 0.5616, "step": 9740 }, { "epoch": 1.7376683614307376, "grad_norm": 0.5065614581108093, "learning_rate": 2.0888129890840102e-05, "loss": 0.4137, "step": 9741 }, { "epoch": 1.7378467576487377, "grad_norm": 0.535125732421875, "learning_rate": 2.0860103117300882e-05, "loss": 0.5246, "step": 9742 }, { "epoch": 1.738025153866738, "grad_norm": 0.49829819798469543, "learning_rate": 2.0832094340310554e-05, "loss": 0.3795, "step": 9743 }, { "epoch": 1.7382035500847381, "grad_norm": 0.5445170402526855, "learning_rate": 2.0804103562068883e-05, "loss": 0.5326, "step": 9744 }, { "epoch": 1.7383819463027383, "grad_norm": 0.5501541495323181, "learning_rate": 2.0776130784774333e-05, "loss": 0.5399, "step": 9745 }, { "epoch": 1.7385603425207385, "grad_norm": 0.48159241676330566, "learning_rate": 2.0748176010623758e-05, "loss": 0.4521, "step": 9746 }, { "epoch": 1.7387387387387387, "grad_norm": 0.5003437399864197, "learning_rate": 2.0720239241812848e-05, "loss": 0.4789, "step": 9747 }, { "epoch": 1.738917134956739, "grad_norm": 0.4868369400501251, "learning_rate": 2.0692320480535624e-05, "loss": 0.3965, "step": 9748 }, { "epoch": 1.7390955311747391, "grad_norm": 0.5282782316207886, "learning_rate": 2.066441972898489e-05, "loss": 0.4808, "step": 9749 }, { "epoch": 1.7392739273927393, "grad_norm": 0.4921809434890747, "learning_rate": 2.0636536989351972e-05, "loss": 0.458, "step": 9750 }, { "epoch": 1.7394523236107395, "grad_norm": 0.5522050857543945, "learning_rate": 2.0608672263826705e-05, "loss": 0.5707, "step": 9751 }, { "epoch": 1.7396307198287397, "grad_norm": 0.6109182834625244, "learning_rate": 2.0580825554597644e-05, "loss": 0.773, "step": 9752 }, { "epoch": 1.73980911604674, "grad_norm": 0.5634390115737915, "learning_rate": 2.055299686385176e-05, "loss": 0.6855, "step": 9753 }, { "epoch": 1.7399875122647401, "grad_norm": 0.47607871890068054, "learning_rate": 2.0525186193774802e-05, "loss": 0.4555, "step": 9754 }, { "epoch": 1.7401659084827403, "grad_norm": 0.5108838677406311, "learning_rate": 2.0497393546551003e-05, "loss": 0.4981, "step": 9755 }, { "epoch": 1.7403443047007403, "grad_norm": 0.522916316986084, "learning_rate": 2.046961892436308e-05, "loss": 0.5068, "step": 9756 }, { "epoch": 1.7405227009187405, "grad_norm": 0.49157410860061646, "learning_rate": 2.0441862329392547e-05, "loss": 0.4636, "step": 9757 }, { "epoch": 1.7407010971367407, "grad_norm": 0.5237782597541809, "learning_rate": 2.0414123763819348e-05, "loss": 0.4787, "step": 9758 }, { "epoch": 1.740879493354741, "grad_norm": 0.521073043346405, "learning_rate": 2.0386403229822102e-05, "loss": 0.4384, "step": 9759 }, { "epoch": 1.741057889572741, "grad_norm": 0.5777454376220703, "learning_rate": 2.035870072957788e-05, "loss": 0.6771, "step": 9760 }, { "epoch": 1.741236285790741, "grad_norm": 0.5693924427032471, "learning_rate": 2.0331016265262543e-05, "loss": 0.6439, "step": 9761 }, { "epoch": 1.7414146820087413, "grad_norm": 0.5041927099227905, "learning_rate": 2.030334983905027e-05, "loss": 0.4557, "step": 9762 }, { "epoch": 1.7415930782267415, "grad_norm": 0.5025537014007568, "learning_rate": 2.0275701453114108e-05, "loss": 0.4614, "step": 9763 }, { "epoch": 1.7417714744447417, "grad_norm": 0.5347465872764587, "learning_rate": 2.024807110962551e-05, "loss": 0.5502, "step": 9764 }, { "epoch": 1.7419498706627419, "grad_norm": 0.4626553952693939, "learning_rate": 2.0220458810754488e-05, "loss": 0.3479, "step": 9765 }, { "epoch": 1.742128266880742, "grad_norm": 0.4479665458202362, "learning_rate": 2.019286455866981e-05, "loss": 0.4113, "step": 9766 }, { "epoch": 1.7423066630987423, "grad_norm": 0.5242836475372314, "learning_rate": 2.0165288355538656e-05, "loss": 0.522, "step": 9767 }, { "epoch": 1.7424850593167425, "grad_norm": 0.4955342411994934, "learning_rate": 2.01377302035268e-05, "loss": 0.3913, "step": 9768 }, { "epoch": 1.7426634555347427, "grad_norm": 0.5188047289848328, "learning_rate": 2.0110190104798727e-05, "loss": 0.4209, "step": 9769 }, { "epoch": 1.7428418517527429, "grad_norm": 0.47911545634269714, "learning_rate": 2.0082668061517373e-05, "loss": 0.3811, "step": 9770 }, { "epoch": 1.743020247970743, "grad_norm": 0.5450549125671387, "learning_rate": 2.0055164075844345e-05, "loss": 0.4722, "step": 9771 }, { "epoch": 1.7431986441887433, "grad_norm": 0.5390350818634033, "learning_rate": 2.0027678149939747e-05, "loss": 0.558, "step": 9772 }, { "epoch": 1.7433770404067435, "grad_norm": 0.5970989465713501, "learning_rate": 2.0000210285962385e-05, "loss": 0.5316, "step": 9773 }, { "epoch": 1.7435554366247437, "grad_norm": 0.5830510258674622, "learning_rate": 1.9972760486069498e-05, "loss": 0.6375, "step": 9774 }, { "epoch": 1.7437338328427439, "grad_norm": 0.5821729898452759, "learning_rate": 1.9945328752417057e-05, "loss": 0.5732, "step": 9775 }, { "epoch": 1.743912229060744, "grad_norm": 0.5588756203651428, "learning_rate": 1.9917915087159482e-05, "loss": 0.5578, "step": 9776 }, { "epoch": 1.7440906252787443, "grad_norm": 0.5491783618927002, "learning_rate": 1.9890519492449798e-05, "loss": 0.5814, "step": 9777 }, { "epoch": 1.7442690214967442, "grad_norm": 0.5677632093429565, "learning_rate": 1.9863141970439758e-05, "loss": 0.5373, "step": 9778 }, { "epoch": 1.7444474177147444, "grad_norm": 0.4712635576725006, "learning_rate": 1.983578252327939e-05, "loss": 0.3357, "step": 9779 }, { "epoch": 1.7446258139327446, "grad_norm": 0.5437815189361572, "learning_rate": 1.9808441153117723e-05, "loss": 0.5909, "step": 9780 }, { "epoch": 1.7448042101507448, "grad_norm": 0.49370652437210083, "learning_rate": 1.9781117862102045e-05, "loss": 0.5412, "step": 9781 }, { "epoch": 1.744982606368745, "grad_norm": 0.5577530860900879, "learning_rate": 1.9753812652378217e-05, "loss": 0.672, "step": 9782 }, { "epoch": 1.745161002586745, "grad_norm": 0.5543391108512878, "learning_rate": 1.9726525526090917e-05, "loss": 0.5612, "step": 9783 }, { "epoch": 1.7453393988047452, "grad_norm": 0.4798511564731598, "learning_rate": 1.9699256485383177e-05, "loss": 0.4455, "step": 9784 }, { "epoch": 1.7455177950227454, "grad_norm": 0.5388596057891846, "learning_rate": 1.967200553239676e-05, "loss": 0.4951, "step": 9785 }, { "epoch": 1.7456961912407456, "grad_norm": 0.4370432496070862, "learning_rate": 1.9644772669271894e-05, "loss": 0.3437, "step": 9786 }, { "epoch": 1.7458745874587458, "grad_norm": 0.5266339182853699, "learning_rate": 1.9617557898147454e-05, "loss": 0.5436, "step": 9787 }, { "epoch": 1.746052983676746, "grad_norm": 0.4976480305194855, "learning_rate": 1.9590361221160897e-05, "loss": 0.396, "step": 9788 }, { "epoch": 1.7462313798947462, "grad_norm": 0.5532608032226562, "learning_rate": 1.956318264044818e-05, "loss": 0.5835, "step": 9789 }, { "epoch": 1.7464097761127464, "grad_norm": 0.45472416281700134, "learning_rate": 1.9536022158143956e-05, "loss": 0.3592, "step": 9790 }, { "epoch": 1.7465881723307466, "grad_norm": 0.5193656086921692, "learning_rate": 1.9508879776381355e-05, "loss": 0.4665, "step": 9791 }, { "epoch": 1.7467665685487468, "grad_norm": 0.5724708437919617, "learning_rate": 1.948175549729217e-05, "loss": 0.5152, "step": 9792 }, { "epoch": 1.746944964766747, "grad_norm": 0.5208200216293335, "learning_rate": 1.945464932300667e-05, "loss": 0.6308, "step": 9793 }, { "epoch": 1.7471233609847472, "grad_norm": 0.4895492494106293, "learning_rate": 1.9427561255653815e-05, "loss": 0.5369, "step": 9794 }, { "epoch": 1.7473017572027474, "grad_norm": 0.46594980359077454, "learning_rate": 1.94004912973611e-05, "loss": 0.4012, "step": 9795 }, { "epoch": 1.7474801534207476, "grad_norm": 0.5352393984794617, "learning_rate": 1.937343945025455e-05, "loss": 0.5697, "step": 9796 }, { "epoch": 1.7476585496387478, "grad_norm": 0.5204436182975769, "learning_rate": 1.934640571645882e-05, "loss": 0.564, "step": 9797 }, { "epoch": 1.747836945856748, "grad_norm": 0.5590188503265381, "learning_rate": 1.9319390098097108e-05, "loss": 0.5922, "step": 9798 }, { "epoch": 1.7480153420747482, "grad_norm": 0.5261103510856628, "learning_rate": 1.9292392597291293e-05, "loss": 0.3612, "step": 9799 }, { "epoch": 1.7481937382927482, "grad_norm": 0.4883411228656769, "learning_rate": 1.9265413216161598e-05, "loss": 0.4208, "step": 9800 }, { "epoch": 1.7483721345107484, "grad_norm": 0.5407469272613525, "learning_rate": 1.9238451956827135e-05, "loss": 0.5281, "step": 9801 }, { "epoch": 1.7485505307287486, "grad_norm": 0.5203365087509155, "learning_rate": 1.921150882140532e-05, "loss": 0.4411, "step": 9802 }, { "epoch": 1.7487289269467488, "grad_norm": 0.5493040084838867, "learning_rate": 1.9184583812012268e-05, "loss": 0.5184, "step": 9803 }, { "epoch": 1.748907323164749, "grad_norm": 0.41875502467155457, "learning_rate": 1.9157676930762702e-05, "loss": 0.3174, "step": 9804 }, { "epoch": 1.749085719382749, "grad_norm": 0.5331047177314758, "learning_rate": 1.913078817976982e-05, "loss": 0.5325, "step": 9805 }, { "epoch": 1.7492641156007491, "grad_norm": 0.5197538137435913, "learning_rate": 1.9103917561145516e-05, "loss": 0.4778, "step": 9806 }, { "epoch": 1.7494425118187493, "grad_norm": 0.5139304995536804, "learning_rate": 1.9077065077000104e-05, "loss": 0.5117, "step": 9807 }, { "epoch": 1.7496209080367495, "grad_norm": 0.540160596370697, "learning_rate": 1.9050230729442702e-05, "loss": 0.4415, "step": 9808 }, { "epoch": 1.7497993042547497, "grad_norm": 0.5008589029312134, "learning_rate": 1.9023414520580733e-05, "loss": 0.4228, "step": 9809 }, { "epoch": 1.74997770047275, "grad_norm": 0.5717454552650452, "learning_rate": 1.8996616452520404e-05, "loss": 0.5889, "step": 9810 }, { "epoch": 1.7501560966907501, "grad_norm": 0.5014664530754089, "learning_rate": 1.8969836527366448e-05, "loss": 0.4555, "step": 9811 }, { "epoch": 1.7503344929087503, "grad_norm": 0.5449042320251465, "learning_rate": 1.8943074747222068e-05, "loss": 0.5205, "step": 9812 }, { "epoch": 1.7505128891267505, "grad_norm": 0.49682438373565674, "learning_rate": 1.8916331114189195e-05, "loss": 0.4651, "step": 9813 }, { "epoch": 1.7506912853447507, "grad_norm": 0.5355203151702881, "learning_rate": 1.8889605630368235e-05, "loss": 0.5275, "step": 9814 }, { "epoch": 1.750869681562751, "grad_norm": 0.5955283045768738, "learning_rate": 1.8862898297858173e-05, "loss": 0.6346, "step": 9815 }, { "epoch": 1.751048077780751, "grad_norm": 0.5433852672576904, "learning_rate": 1.8836209118756637e-05, "loss": 0.5114, "step": 9816 }, { "epoch": 1.7512264739987513, "grad_norm": 0.5149011611938477, "learning_rate": 1.8809538095159727e-05, "loss": 0.4793, "step": 9817 }, { "epoch": 1.7514048702167515, "grad_norm": 0.4862879514694214, "learning_rate": 1.8782885229162245e-05, "loss": 0.3931, "step": 9818 }, { "epoch": 1.7515832664347517, "grad_norm": 0.6665529012680054, "learning_rate": 1.8756250522857397e-05, "loss": 0.8526, "step": 9819 }, { "epoch": 1.751761662652752, "grad_norm": 0.5120258927345276, "learning_rate": 1.8729633978337184e-05, "loss": 0.5787, "step": 9820 }, { "epoch": 1.751940058870752, "grad_norm": 0.5091425776481628, "learning_rate": 1.870303559769196e-05, "loss": 0.4539, "step": 9821 }, { "epoch": 1.752118455088752, "grad_norm": 0.5244202017784119, "learning_rate": 1.86764553830108e-05, "loss": 0.5742, "step": 9822 }, { "epoch": 1.7522968513067523, "grad_norm": 0.4686015546321869, "learning_rate": 1.8649893336381313e-05, "loss": 0.4521, "step": 9823 }, { "epoch": 1.7524752475247525, "grad_norm": 0.5084050893783569, "learning_rate": 1.8623349459889582e-05, "loss": 0.487, "step": 9824 }, { "epoch": 1.7526536437427527, "grad_norm": 0.4738958477973938, "learning_rate": 1.859682375562044e-05, "loss": 0.3566, "step": 9825 }, { "epoch": 1.7528320399607529, "grad_norm": 0.5699481964111328, "learning_rate": 1.857031622565722e-05, "loss": 0.5028, "step": 9826 }, { "epoch": 1.7530104361787529, "grad_norm": 0.4751138985157013, "learning_rate": 1.85438268720817e-05, "loss": 0.3851, "step": 9827 }, { "epoch": 1.753188832396753, "grad_norm": 0.5180573463439941, "learning_rate": 1.851735569697449e-05, "loss": 0.4933, "step": 9828 }, { "epoch": 1.7533672286147532, "grad_norm": 0.5232348442077637, "learning_rate": 1.849090270241449e-05, "loss": 0.4913, "step": 9829 }, { "epoch": 1.7535456248327534, "grad_norm": 0.43816691637039185, "learning_rate": 1.8464467890479397e-05, "loss": 0.3977, "step": 9830 }, { "epoch": 1.7537240210507536, "grad_norm": 0.45382770895957947, "learning_rate": 1.8438051263245326e-05, "loss": 0.3, "step": 9831 }, { "epoch": 1.7539024172687538, "grad_norm": 0.5431753396987915, "learning_rate": 1.8411652822787118e-05, "loss": 0.6459, "step": 9832 }, { "epoch": 1.754080813486754, "grad_norm": 0.4964623749256134, "learning_rate": 1.8385272571177974e-05, "loss": 0.5321, "step": 9833 }, { "epoch": 1.7542592097047542, "grad_norm": 0.5045402646064758, "learning_rate": 1.8358910510489907e-05, "loss": 0.4846, "step": 9834 }, { "epoch": 1.7544376059227544, "grad_norm": 0.4275517761707306, "learning_rate": 1.8332566642793312e-05, "loss": 0.3162, "step": 9835 }, { "epoch": 1.7546160021407546, "grad_norm": 0.5664304494857788, "learning_rate": 1.8306240970157206e-05, "loss": 0.5896, "step": 9836 }, { "epoch": 1.7547943983587548, "grad_norm": 0.5230024456977844, "learning_rate": 1.8279933494649265e-05, "loss": 0.414, "step": 9837 }, { "epoch": 1.754972794576755, "grad_norm": 0.5773944854736328, "learning_rate": 1.8253644218335584e-05, "loss": 0.557, "step": 9838 }, { "epoch": 1.7551511907947552, "grad_norm": 0.5354689359664917, "learning_rate": 1.8227373143281017e-05, "loss": 0.516, "step": 9839 }, { "epoch": 1.7553295870127554, "grad_norm": 0.47697752714157104, "learning_rate": 1.820112027154877e-05, "loss": 0.4222, "step": 9840 }, { "epoch": 1.7555079832307556, "grad_norm": 0.5502009391784668, "learning_rate": 1.8174885605200782e-05, "loss": 0.4705, "step": 9841 }, { "epoch": 1.7556863794487558, "grad_norm": 0.4850895404815674, "learning_rate": 1.8148669146297565e-05, "loss": 0.4318, "step": 9842 }, { "epoch": 1.755864775666756, "grad_norm": 0.5271044969558716, "learning_rate": 1.8122470896898057e-05, "loss": 0.4357, "step": 9843 }, { "epoch": 1.756043171884756, "grad_norm": 0.5360130071640015, "learning_rate": 1.809629085905992e-05, "loss": 0.4743, "step": 9844 }, { "epoch": 1.7562215681027562, "grad_norm": 0.5907223224639893, "learning_rate": 1.807012903483929e-05, "loss": 0.4996, "step": 9845 }, { "epoch": 1.7563999643207564, "grad_norm": 0.5198745131492615, "learning_rate": 1.8043985426290958e-05, "loss": 0.5376, "step": 9846 }, { "epoch": 1.7565783605387566, "grad_norm": 0.7061692476272583, "learning_rate": 1.801786003546818e-05, "loss": 0.5927, "step": 9847 }, { "epoch": 1.7567567567567568, "grad_norm": 0.4907400906085968, "learning_rate": 1.799175286442281e-05, "loss": 0.4041, "step": 9848 }, { "epoch": 1.7569351529747568, "grad_norm": 0.4804750382900238, "learning_rate": 1.7965663915205376e-05, "loss": 0.4552, "step": 9849 }, { "epoch": 1.757113549192757, "grad_norm": 0.5161197781562805, "learning_rate": 1.7939593189864794e-05, "loss": 0.4615, "step": 9850 }, { "epoch": 1.7572919454107572, "grad_norm": 0.5448468327522278, "learning_rate": 1.791354069044876e-05, "loss": 0.4771, "step": 9851 }, { "epoch": 1.7574703416287574, "grad_norm": 0.5089532136917114, "learning_rate": 1.7887506419003303e-05, "loss": 0.4607, "step": 9852 }, { "epoch": 1.7576487378467576, "grad_norm": 0.49231788516044617, "learning_rate": 1.786149037757326e-05, "loss": 0.5082, "step": 9853 }, { "epoch": 1.7578271340647578, "grad_norm": 0.5540265440940857, "learning_rate": 1.78354925682018e-05, "loss": 0.5348, "step": 9854 }, { "epoch": 1.758005530282758, "grad_norm": 0.6183173060417175, "learning_rate": 1.7809512992930875e-05, "loss": 0.6396, "step": 9855 }, { "epoch": 1.7581839265007582, "grad_norm": 0.4565494656562805, "learning_rate": 1.778355165380091e-05, "loss": 0.407, "step": 9856 }, { "epoch": 1.7583623227187584, "grad_norm": 0.5190179347991943, "learning_rate": 1.7757608552850828e-05, "loss": 0.499, "step": 9857 }, { "epoch": 1.7585407189367586, "grad_norm": 0.5426142811775208, "learning_rate": 1.7731683692118277e-05, "loss": 0.5268, "step": 9858 }, { "epoch": 1.7587191151547588, "grad_norm": 0.5767265558242798, "learning_rate": 1.770577707363927e-05, "loss": 0.4803, "step": 9859 }, { "epoch": 1.758897511372759, "grad_norm": 0.5167482495307922, "learning_rate": 1.7679888699448644e-05, "loss": 0.5015, "step": 9860 }, { "epoch": 1.7590759075907592, "grad_norm": 0.4636717736721039, "learning_rate": 1.7654018571579554e-05, "loss": 0.3958, "step": 9861 }, { "epoch": 1.7592543038087594, "grad_norm": 0.504102349281311, "learning_rate": 1.7628166692063823e-05, "loss": 0.3926, "step": 9862 }, { "epoch": 1.7594327000267596, "grad_norm": 0.5384790301322937, "learning_rate": 1.7602333062931935e-05, "loss": 0.4652, "step": 9863 }, { "epoch": 1.7596110962447598, "grad_norm": 0.48373332619667053, "learning_rate": 1.757651768621274e-05, "loss": 0.3906, "step": 9864 }, { "epoch": 1.75978949246276, "grad_norm": 0.4555506110191345, "learning_rate": 1.755072056393389e-05, "loss": 0.3741, "step": 9865 }, { "epoch": 1.75996788868076, "grad_norm": 0.6488031148910522, "learning_rate": 1.7524941698121354e-05, "loss": 0.7711, "step": 9866 }, { "epoch": 1.7601462848987601, "grad_norm": 0.5170810222625732, "learning_rate": 1.7499181090799928e-05, "loss": 0.4957, "step": 9867 }, { "epoch": 1.7603246811167603, "grad_norm": 0.5525889992713928, "learning_rate": 1.7473438743992736e-05, "loss": 0.6491, "step": 9868 }, { "epoch": 1.7605030773347605, "grad_norm": 0.4711996018886566, "learning_rate": 1.7447714659721586e-05, "loss": 0.4518, "step": 9869 }, { "epoch": 1.7606814735527607, "grad_norm": 0.43584319949150085, "learning_rate": 1.742200884000686e-05, "loss": 0.3699, "step": 9870 }, { "epoch": 1.7608598697707607, "grad_norm": 0.49232321977615356, "learning_rate": 1.7396321286867412e-05, "loss": 0.4172, "step": 9871 }, { "epoch": 1.761038265988761, "grad_norm": 0.6322119235992432, "learning_rate": 1.737065200232088e-05, "loss": 0.5614, "step": 9872 }, { "epoch": 1.761216662206761, "grad_norm": 0.46853572130203247, "learning_rate": 1.7345000988383208e-05, "loss": 0.3119, "step": 9873 }, { "epoch": 1.7613950584247613, "grad_norm": 0.5115934610366821, "learning_rate": 1.7319368247069005e-05, "loss": 0.5075, "step": 9874 }, { "epoch": 1.7615734546427615, "grad_norm": 0.524381697177887, "learning_rate": 1.729375378039155e-05, "loss": 0.4831, "step": 9875 }, { "epoch": 1.7617518508607617, "grad_norm": 0.5672167539596558, "learning_rate": 1.7268157590362487e-05, "loss": 0.5661, "step": 9876 }, { "epoch": 1.761930247078762, "grad_norm": 0.5367761850357056, "learning_rate": 1.7242579678992204e-05, "loss": 0.5691, "step": 9877 }, { "epoch": 1.762108643296762, "grad_norm": 0.5382869243621826, "learning_rate": 1.721702004828951e-05, "loss": 0.4331, "step": 9878 }, { "epoch": 1.7622870395147623, "grad_norm": 0.4912665784358978, "learning_rate": 1.719147870026194e-05, "loss": 0.4589, "step": 9879 }, { "epoch": 1.7624654357327625, "grad_norm": 0.45093002915382385, "learning_rate": 1.716595563691539e-05, "loss": 0.3703, "step": 9880 }, { "epoch": 1.7626438319507627, "grad_norm": 0.614578127861023, "learning_rate": 1.7140450860254535e-05, "loss": 0.5906, "step": 9881 }, { "epoch": 1.7628222281687629, "grad_norm": 0.45444604754447937, "learning_rate": 1.7114964372282466e-05, "loss": 0.4004, "step": 9882 }, { "epoch": 1.763000624386763, "grad_norm": 0.5366213321685791, "learning_rate": 1.708949617500083e-05, "loss": 0.4764, "step": 9883 }, { "epoch": 1.7631790206047633, "grad_norm": 0.48068365454673767, "learning_rate": 1.7064046270409973e-05, "loss": 0.447, "step": 9884 }, { "epoch": 1.7633574168227635, "grad_norm": 0.5525119304656982, "learning_rate": 1.7038614660508657e-05, "loss": 0.6492, "step": 9885 }, { "epoch": 1.7635358130407637, "grad_norm": 0.5292766094207764, "learning_rate": 1.7013201347294284e-05, "loss": 0.4177, "step": 9886 }, { "epoch": 1.7637142092587639, "grad_norm": 0.48070111870765686, "learning_rate": 1.698780633276284e-05, "loss": 0.3513, "step": 9887 }, { "epoch": 1.7638926054767639, "grad_norm": 0.4753524363040924, "learning_rate": 1.6962429618908785e-05, "loss": 0.4376, "step": 9888 }, { "epoch": 1.764071001694764, "grad_norm": 0.5445675849914551, "learning_rate": 1.693707120772528e-05, "loss": 0.5026, "step": 9889 }, { "epoch": 1.7642493979127643, "grad_norm": 0.5858935713768005, "learning_rate": 1.6911731101203863e-05, "loss": 0.7673, "step": 9890 }, { "epoch": 1.7644277941307644, "grad_norm": 0.5799805521965027, "learning_rate": 1.688640930133481e-05, "loss": 0.6229, "step": 9891 }, { "epoch": 1.7646061903487646, "grad_norm": 0.5168468952178955, "learning_rate": 1.6861105810106804e-05, "loss": 0.4652, "step": 9892 }, { "epoch": 1.7647845865667646, "grad_norm": 0.5596296787261963, "learning_rate": 1.683582062950728e-05, "loss": 0.6326, "step": 9893 }, { "epoch": 1.7649629827847648, "grad_norm": 0.5667998194694519, "learning_rate": 1.6810553761522047e-05, "loss": 0.5322, "step": 9894 }, { "epoch": 1.765141379002765, "grad_norm": 0.5282207131385803, "learning_rate": 1.6785305208135538e-05, "loss": 0.4871, "step": 9895 }, { "epoch": 1.7653197752207652, "grad_norm": 0.5011943578720093, "learning_rate": 1.6760074971330863e-05, "loss": 0.4561, "step": 9896 }, { "epoch": 1.7654981714387654, "grad_norm": 0.49850714206695557, "learning_rate": 1.6734863053089467e-05, "loss": 0.4113, "step": 9897 }, { "epoch": 1.7656765676567656, "grad_norm": 0.5289611220359802, "learning_rate": 1.670966945539157e-05, "loss": 0.5378, "step": 9898 }, { "epoch": 1.7658549638747658, "grad_norm": 0.5059836506843567, "learning_rate": 1.6684494180215837e-05, "loss": 0.4601, "step": 9899 }, { "epoch": 1.766033360092766, "grad_norm": 0.577877402305603, "learning_rate": 1.6659337229539525e-05, "loss": 0.6335, "step": 9900 }, { "epoch": 1.7662117563107662, "grad_norm": 0.454922080039978, "learning_rate": 1.6634198605338437e-05, "loss": 0.3668, "step": 9901 }, { "epoch": 1.7663901525287664, "grad_norm": 0.5340895652770996, "learning_rate": 1.6609078309586967e-05, "loss": 0.4897, "step": 9902 }, { "epoch": 1.7665685487467666, "grad_norm": 0.5871651768684387, "learning_rate": 1.6583976344258097e-05, "loss": 0.7127, "step": 9903 }, { "epoch": 1.7667469449647668, "grad_norm": 0.48758915066719055, "learning_rate": 1.6558892711323215e-05, "loss": 0.4231, "step": 9904 }, { "epoch": 1.766925341182767, "grad_norm": 0.5873585343360901, "learning_rate": 1.65338274127525e-05, "loss": 0.6313, "step": 9905 }, { "epoch": 1.7671037374007672, "grad_norm": 0.5112351775169373, "learning_rate": 1.6508780450514516e-05, "loss": 0.4986, "step": 9906 }, { "epoch": 1.7672821336187674, "grad_norm": 0.5402355194091797, "learning_rate": 1.6483751826576382e-05, "loss": 0.6041, "step": 9907 }, { "epoch": 1.7674605298367676, "grad_norm": 0.925947368144989, "learning_rate": 1.6458741542903942e-05, "loss": 0.4839, "step": 9908 }, { "epoch": 1.7676389260547678, "grad_norm": 0.5026435256004333, "learning_rate": 1.6433749601461378e-05, "loss": 0.4032, "step": 9909 }, { "epoch": 1.7678173222727678, "grad_norm": 0.5951597690582275, "learning_rate": 1.6408776004211674e-05, "loss": 0.5571, "step": 9910 }, { "epoch": 1.767995718490768, "grad_norm": 0.4633539915084839, "learning_rate": 1.6383820753116118e-05, "loss": 0.4171, "step": 9911 }, { "epoch": 1.7681741147087682, "grad_norm": 0.5063800811767578, "learning_rate": 1.6358883850134816e-05, "loss": 0.3949, "step": 9912 }, { "epoch": 1.7683525109267684, "grad_norm": 0.5754553079605103, "learning_rate": 1.6333965297226166e-05, "loss": 0.6068, "step": 9913 }, { "epoch": 1.7685309071447686, "grad_norm": 0.477913498878479, "learning_rate": 1.6309065096347386e-05, "loss": 0.4111, "step": 9914 }, { "epoch": 1.7687093033627685, "grad_norm": 0.49996158480644226, "learning_rate": 1.6284183249454048e-05, "loss": 0.3918, "step": 9915 }, { "epoch": 1.7688876995807687, "grad_norm": 0.6070882678031921, "learning_rate": 1.6259319758500312e-05, "loss": 0.6542, "step": 9916 }, { "epoch": 1.769066095798769, "grad_norm": 0.5135297179222107, "learning_rate": 1.6234474625439117e-05, "loss": 0.4433, "step": 9917 }, { "epoch": 1.7692444920167691, "grad_norm": 0.48473936319351196, "learning_rate": 1.620964785222162e-05, "loss": 0.5045, "step": 9918 }, { "epoch": 1.7694228882347693, "grad_norm": 0.4841310679912567, "learning_rate": 1.618483944079782e-05, "loss": 0.3964, "step": 9919 }, { "epoch": 1.7696012844527695, "grad_norm": 0.54714035987854, "learning_rate": 1.6160049393116104e-05, "loss": 0.5497, "step": 9920 }, { "epoch": 1.7697796806707697, "grad_norm": 0.49543190002441406, "learning_rate": 1.6135277711123443e-05, "loss": 0.5597, "step": 9921 }, { "epoch": 1.76995807688877, "grad_norm": 0.5189563035964966, "learning_rate": 1.6110524396765496e-05, "loss": 0.4857, "step": 9922 }, { "epoch": 1.7701364731067701, "grad_norm": 0.5028755068778992, "learning_rate": 1.6085789451986245e-05, "loss": 0.4835, "step": 9923 }, { "epoch": 1.7703148693247703, "grad_norm": 0.5071883201599121, "learning_rate": 1.606107287872846e-05, "loss": 0.5195, "step": 9924 }, { "epoch": 1.7704932655427705, "grad_norm": 0.5900061130523682, "learning_rate": 1.603637467893332e-05, "loss": 0.6589, "step": 9925 }, { "epoch": 1.7706716617607707, "grad_norm": 0.644641637802124, "learning_rate": 1.6011694854540683e-05, "loss": 0.4762, "step": 9926 }, { "epoch": 1.770850057978771, "grad_norm": 0.543786883354187, "learning_rate": 1.5987033407488806e-05, "loss": 0.5394, "step": 9927 }, { "epoch": 1.7710284541967711, "grad_norm": 0.5951870083808899, "learning_rate": 1.5962390339714613e-05, "loss": 0.6289, "step": 9928 }, { "epoch": 1.7712068504147713, "grad_norm": 0.5071197748184204, "learning_rate": 1.593776565315358e-05, "loss": 0.4177, "step": 9929 }, { "epoch": 1.7713852466327715, "grad_norm": 0.4966924786567688, "learning_rate": 1.591315934973969e-05, "loss": 0.4219, "step": 9930 }, { "epoch": 1.7715636428507717, "grad_norm": 0.4919562339782715, "learning_rate": 1.588857143140554e-05, "loss": 0.4038, "step": 9931 }, { "epoch": 1.7717420390687717, "grad_norm": 0.59256911277771, "learning_rate": 1.5864001900082247e-05, "loss": 0.5511, "step": 9932 }, { "epoch": 1.771920435286772, "grad_norm": 0.5095023512840271, "learning_rate": 1.5839450757699465e-05, "loss": 0.5277, "step": 9933 }, { "epoch": 1.772098831504772, "grad_norm": 0.5355534553527832, "learning_rate": 1.581491800618548e-05, "loss": 0.4272, "step": 9934 }, { "epoch": 1.7722772277227723, "grad_norm": 0.5533571839332581, "learning_rate": 1.5790403647467033e-05, "loss": 0.5586, "step": 9935 }, { "epoch": 1.7724556239407725, "grad_norm": 0.45834532380104065, "learning_rate": 1.5765907683469527e-05, "loss": 0.4001, "step": 9936 }, { "epoch": 1.7726340201587725, "grad_norm": 0.5351871848106384, "learning_rate": 1.5741430116116813e-05, "loss": 0.5495, "step": 9937 }, { "epoch": 1.7728124163767727, "grad_norm": 0.48931190371513367, "learning_rate": 1.5716970947331376e-05, "loss": 0.4277, "step": 9938 }, { "epoch": 1.7729908125947729, "grad_norm": 0.48030897974967957, "learning_rate": 1.5692530179034215e-05, "loss": 0.4325, "step": 9939 }, { "epoch": 1.773169208812773, "grad_norm": 0.5139449238777161, "learning_rate": 1.5668107813144927e-05, "loss": 0.5081, "step": 9940 }, { "epoch": 1.7733476050307733, "grad_norm": 0.5078691840171814, "learning_rate": 1.564370385158159e-05, "loss": 0.3923, "step": 9941 }, { "epoch": 1.7735260012487735, "grad_norm": 0.4742141664028168, "learning_rate": 1.5619318296260897e-05, "loss": 0.4197, "step": 9942 }, { "epoch": 1.7737043974667737, "grad_norm": 0.5286515355110168, "learning_rate": 1.5594951149098092e-05, "loss": 0.4823, "step": 9943 }, { "epoch": 1.7738827936847739, "grad_norm": 0.481815904378891, "learning_rate": 1.5570602412006944e-05, "loss": 0.4204, "step": 9944 }, { "epoch": 1.774061189902774, "grad_norm": 0.6102718114852905, "learning_rate": 1.554627208689982e-05, "loss": 0.6736, "step": 9945 }, { "epoch": 1.7742395861207743, "grad_norm": 0.534480094909668, "learning_rate": 1.552196017568755e-05, "loss": 0.4361, "step": 9946 }, { "epoch": 1.7744179823387745, "grad_norm": 0.4984724521636963, "learning_rate": 1.549766668027963e-05, "loss": 0.4064, "step": 9947 }, { "epoch": 1.7745963785567747, "grad_norm": 0.5009212493896484, "learning_rate": 1.5473391602584096e-05, "loss": 0.437, "step": 9948 }, { "epoch": 1.7747747747747749, "grad_norm": 0.5126849412918091, "learning_rate": 1.544913494450742e-05, "loss": 0.4619, "step": 9949 }, { "epoch": 1.774953170992775, "grad_norm": 0.5047542452812195, "learning_rate": 1.5424896707954773e-05, "loss": 0.451, "step": 9950 }, { "epoch": 1.7751315672107753, "grad_norm": 0.5584752559661865, "learning_rate": 1.5400676894829767e-05, "loss": 0.5518, "step": 9951 }, { "epoch": 1.7753099634287755, "grad_norm": 0.5159489512443542, "learning_rate": 1.5376475507034694e-05, "loss": 0.4518, "step": 9952 }, { "epoch": 1.7754883596467756, "grad_norm": 0.589447021484375, "learning_rate": 1.535229254647025e-05, "loss": 0.5093, "step": 9953 }, { "epoch": 1.7756667558647756, "grad_norm": 0.5014997720718384, "learning_rate": 1.5328128015035746e-05, "loss": 0.3893, "step": 9954 }, { "epoch": 1.7758451520827758, "grad_norm": 0.5109811425209045, "learning_rate": 1.5303981914629117e-05, "loss": 0.4326, "step": 9955 }, { "epoch": 1.776023548300776, "grad_norm": 0.5431187152862549, "learning_rate": 1.52798542471467e-05, "loss": 0.4706, "step": 9956 }, { "epoch": 1.7762019445187762, "grad_norm": 0.6107049584388733, "learning_rate": 1.5255745014483569e-05, "loss": 0.4079, "step": 9957 }, { "epoch": 1.7763803407367764, "grad_norm": 0.5987387299537659, "learning_rate": 1.5231654218533175e-05, "loss": 0.5319, "step": 9958 }, { "epoch": 1.7765587369547764, "grad_norm": 0.5544418096542358, "learning_rate": 1.5207581861187647e-05, "loss": 0.4959, "step": 9959 }, { "epoch": 1.7767371331727766, "grad_norm": 0.6012864112854004, "learning_rate": 1.5183527944337583e-05, "loss": 0.5722, "step": 9960 }, { "epoch": 1.7769155293907768, "grad_norm": 0.5390208959579468, "learning_rate": 1.5159492469872221e-05, "loss": 0.6233, "step": 9961 }, { "epoch": 1.777093925608777, "grad_norm": 0.5232547521591187, "learning_rate": 1.513547543967922e-05, "loss": 0.5397, "step": 9962 }, { "epoch": 1.7772723218267772, "grad_norm": 0.508708655834198, "learning_rate": 1.5111476855644901e-05, "loss": 0.4217, "step": 9963 }, { "epoch": 1.7774507180447774, "grad_norm": 0.49109214544296265, "learning_rate": 1.5087496719654149e-05, "loss": 0.4588, "step": 9964 }, { "epoch": 1.7776291142627776, "grad_norm": 0.4775922894477844, "learning_rate": 1.5063535033590287e-05, "loss": 0.4091, "step": 9965 }, { "epoch": 1.7778075104807778, "grad_norm": 0.4845198392868042, "learning_rate": 1.5039591799335312e-05, "loss": 0.374, "step": 9966 }, { "epoch": 1.777985906698778, "grad_norm": 0.48541465401649475, "learning_rate": 1.5015667018769692e-05, "loss": 0.5168, "step": 9967 }, { "epoch": 1.7781643029167782, "grad_norm": 0.5408685803413391, "learning_rate": 1.4991760693772422e-05, "loss": 0.617, "step": 9968 }, { "epoch": 1.7783426991347784, "grad_norm": 0.48128771781921387, "learning_rate": 1.4967872826221168e-05, "loss": 0.4295, "step": 9969 }, { "epoch": 1.7785210953527786, "grad_norm": 0.49828970432281494, "learning_rate": 1.4944003417992014e-05, "loss": 0.4724, "step": 9970 }, { "epoch": 1.7786994915707788, "grad_norm": 0.487578809261322, "learning_rate": 1.4920152470959707e-05, "loss": 0.4936, "step": 9971 }, { "epoch": 1.778877887788779, "grad_norm": 0.47483158111572266, "learning_rate": 1.489631998699742e-05, "loss": 0.4905, "step": 9972 }, { "epoch": 1.7790562840067792, "grad_norm": 0.4644508957862854, "learning_rate": 1.487250596797704e-05, "loss": 0.4285, "step": 9973 }, { "epoch": 1.7792346802247794, "grad_norm": 0.5382997989654541, "learning_rate": 1.4848710415768824e-05, "loss": 0.5722, "step": 9974 }, { "epoch": 1.7794130764427796, "grad_norm": 0.5405296087265015, "learning_rate": 1.4824933332241692e-05, "loss": 0.4465, "step": 9975 }, { "epoch": 1.7795914726607795, "grad_norm": 0.526763916015625, "learning_rate": 1.4801174719263122e-05, "loss": 0.5126, "step": 9976 }, { "epoch": 1.7797698688787797, "grad_norm": 0.4779605567455292, "learning_rate": 1.477743457869904e-05, "loss": 0.3936, "step": 9977 }, { "epoch": 1.77994826509678, "grad_norm": 0.5108462572097778, "learning_rate": 1.4753712912414035e-05, "loss": 0.4744, "step": 9978 }, { "epoch": 1.7801266613147801, "grad_norm": 0.5295320749282837, "learning_rate": 1.4730009722271204e-05, "loss": 0.5429, "step": 9979 }, { "epoch": 1.7803050575327803, "grad_norm": 0.5670889019966125, "learning_rate": 1.4706325010132137e-05, "loss": 0.5606, "step": 9980 }, { "epoch": 1.7804834537507803, "grad_norm": 0.5170961022377014, "learning_rate": 1.4682658777857072e-05, "loss": 0.5119, "step": 9981 }, { "epoch": 1.7806618499687805, "grad_norm": 0.4892038106918335, "learning_rate": 1.4659011027304686e-05, "loss": 0.4014, "step": 9982 }, { "epoch": 1.7808402461867807, "grad_norm": 0.46245473623275757, "learning_rate": 1.4635381760332356e-05, "loss": 0.3665, "step": 9983 }, { "epoch": 1.781018642404781, "grad_norm": 0.5037046074867249, "learning_rate": 1.461177097879579e-05, "loss": 0.388, "step": 9984 }, { "epoch": 1.7811970386227811, "grad_norm": 0.4744492471218109, "learning_rate": 1.458817868454948e-05, "loss": 0.3254, "step": 9985 }, { "epoch": 1.7813754348407813, "grad_norm": 0.4990524649620056, "learning_rate": 1.456460487944633e-05, "loss": 0.4834, "step": 9986 }, { "epoch": 1.7815538310587815, "grad_norm": 0.631146252155304, "learning_rate": 1.4541049565337749e-05, "loss": 0.5804, "step": 9987 }, { "epoch": 1.7817322272767817, "grad_norm": 0.5024986863136292, "learning_rate": 1.451751274407384e-05, "loss": 0.5254, "step": 9988 }, { "epoch": 1.781910623494782, "grad_norm": 0.5565928816795349, "learning_rate": 1.4493994417503127e-05, "loss": 0.4387, "step": 9989 }, { "epoch": 1.782089019712782, "grad_norm": 0.5144158005714417, "learning_rate": 1.4470494587472765e-05, "loss": 0.4451, "step": 9990 }, { "epoch": 1.7822674159307823, "grad_norm": 0.5400242805480957, "learning_rate": 1.4447013255828368e-05, "loss": 0.5332, "step": 9991 }, { "epoch": 1.7824458121487825, "grad_norm": 0.5203432440757751, "learning_rate": 1.4423550424414234e-05, "loss": 0.4738, "step": 9992 }, { "epoch": 1.7826242083667827, "grad_norm": 0.5619713068008423, "learning_rate": 1.4400106095073029e-05, "loss": 0.5508, "step": 9993 }, { "epoch": 1.782802604584783, "grad_norm": 0.5214311480522156, "learning_rate": 1.4376680269646086e-05, "loss": 0.4902, "step": 9994 }, { "epoch": 1.782981000802783, "grad_norm": 0.4371115267276764, "learning_rate": 1.4353272949973322e-05, "loss": 0.3828, "step": 9995 }, { "epoch": 1.7831593970207833, "grad_norm": 0.5262027382850647, "learning_rate": 1.4329884137893074e-05, "loss": 0.5594, "step": 9996 }, { "epoch": 1.7833377932387835, "grad_norm": 0.5280055403709412, "learning_rate": 1.430651383524234e-05, "loss": 0.454, "step": 9997 }, { "epoch": 1.7835161894567835, "grad_norm": 0.5150504112243652, "learning_rate": 1.4283162043856546e-05, "loss": 0.5101, "step": 9998 }, { "epoch": 1.7836945856747837, "grad_norm": 0.5318194627761841, "learning_rate": 1.4259828765569777e-05, "loss": 0.4295, "step": 9999 }, { "epoch": 1.7838729818927839, "grad_norm": 0.5350292921066284, "learning_rate": 1.423651400221465e-05, "loss": 0.4757, "step": 10000 }, { "epoch": 1.784051378110784, "grad_norm": 0.5203887224197388, "learning_rate": 1.4213217755622205e-05, "loss": 0.5545, "step": 10001 }, { "epoch": 1.7842297743287843, "grad_norm": 0.48459410667419434, "learning_rate": 1.4189940027622194e-05, "loss": 0.4402, "step": 10002 }, { "epoch": 1.7844081705467842, "grad_norm": 0.5484018921852112, "learning_rate": 1.41666808200428e-05, "loss": 0.5066, "step": 10003 }, { "epoch": 1.7845865667647844, "grad_norm": 0.6117169260978699, "learning_rate": 1.4143440134710833e-05, "loss": 0.7196, "step": 10004 }, { "epoch": 1.7847649629827846, "grad_norm": 0.5282416939735413, "learning_rate": 1.4120217973451533e-05, "loss": 0.4336, "step": 10005 }, { "epoch": 1.7849433592007848, "grad_norm": 0.5563318729400635, "learning_rate": 1.4097014338088855e-05, "loss": 0.5367, "step": 10006 }, { "epoch": 1.785121755418785, "grad_norm": 0.6051769256591797, "learning_rate": 1.4073829230445173e-05, "loss": 0.6119, "step": 10007 }, { "epoch": 1.7853001516367852, "grad_norm": 0.5828459858894348, "learning_rate": 1.4050662652341312e-05, "loss": 0.6749, "step": 10008 }, { "epoch": 1.7854785478547854, "grad_norm": 0.5630053281784058, "learning_rate": 1.4027514605596952e-05, "loss": 0.5675, "step": 10009 }, { "epoch": 1.7856569440727856, "grad_norm": 0.613758385181427, "learning_rate": 1.4004385092030031e-05, "loss": 0.5525, "step": 10010 }, { "epoch": 1.7858353402907858, "grad_norm": 0.4936037063598633, "learning_rate": 1.3981274113457148e-05, "loss": 0.4094, "step": 10011 }, { "epoch": 1.786013736508786, "grad_norm": 0.5002244114875793, "learning_rate": 1.3958181671693466e-05, "loss": 0.4153, "step": 10012 }, { "epoch": 1.7861921327267862, "grad_norm": 0.5575555562973022, "learning_rate": 1.3935107768552557e-05, "loss": 0.6104, "step": 10013 }, { "epoch": 1.7863705289447864, "grad_norm": 0.5280625224113464, "learning_rate": 1.3912052405846754e-05, "loss": 0.565, "step": 10014 }, { "epoch": 1.7865489251627866, "grad_norm": 0.505584716796875, "learning_rate": 1.3889015585386689e-05, "loss": 0.4413, "step": 10015 }, { "epoch": 1.7867273213807868, "grad_norm": 0.4564724862575531, "learning_rate": 1.3865997308981804e-05, "loss": 0.3544, "step": 10016 }, { "epoch": 1.786905717598787, "grad_norm": 0.6348497271537781, "learning_rate": 1.3842997578439819e-05, "loss": 0.6875, "step": 10017 }, { "epoch": 1.7870841138167872, "grad_norm": 0.5605414509773254, "learning_rate": 1.3820016395567209e-05, "loss": 0.5125, "step": 10018 }, { "epoch": 1.7872625100347874, "grad_norm": 0.4736771285533905, "learning_rate": 1.379705376216886e-05, "loss": 0.4449, "step": 10019 }, { "epoch": 1.7874409062527874, "grad_norm": 0.4950930178165436, "learning_rate": 1.3774109680048274e-05, "loss": 0.4283, "step": 10020 }, { "epoch": 1.7876193024707876, "grad_norm": 0.4793688654899597, "learning_rate": 1.3751184151007485e-05, "loss": 0.4695, "step": 10021 }, { "epoch": 1.7877976986887878, "grad_norm": 0.4778786599636078, "learning_rate": 1.3728277176846965e-05, "loss": 0.4909, "step": 10022 }, { "epoch": 1.787976094906788, "grad_norm": 0.5141474008560181, "learning_rate": 1.3705388759365945e-05, "loss": 0.4603, "step": 10023 }, { "epoch": 1.7881544911247882, "grad_norm": 0.4962654411792755, "learning_rate": 1.3682518900361902e-05, "loss": 0.4038, "step": 10024 }, { "epoch": 1.7883328873427882, "grad_norm": 0.44499000906944275, "learning_rate": 1.3659667601631231e-05, "loss": 0.3354, "step": 10025 }, { "epoch": 1.7885112835607884, "grad_norm": 0.5469255447387695, "learning_rate": 1.3636834864968556e-05, "loss": 0.5772, "step": 10026 }, { "epoch": 1.7886896797787886, "grad_norm": 0.5044131278991699, "learning_rate": 1.3614020692167107e-05, "loss": 0.4198, "step": 10027 }, { "epoch": 1.7888680759967888, "grad_norm": 0.5094908475875854, "learning_rate": 1.3591225085018782e-05, "loss": 0.5186, "step": 10028 }, { "epoch": 1.789046472214789, "grad_norm": 0.4827946424484253, "learning_rate": 1.3568448045313874e-05, "loss": 0.4773, "step": 10029 }, { "epoch": 1.7892248684327892, "grad_norm": 0.5509223341941833, "learning_rate": 1.354568957484134e-05, "loss": 0.5378, "step": 10030 }, { "epoch": 1.7894032646507894, "grad_norm": 0.49218085408210754, "learning_rate": 1.3522949675388557e-05, "loss": 0.4328, "step": 10031 }, { "epoch": 1.7895816608687896, "grad_norm": 0.5489743947982788, "learning_rate": 1.3500228348741594e-05, "loss": 0.5715, "step": 10032 }, { "epoch": 1.7897600570867898, "grad_norm": 0.5286942720413208, "learning_rate": 1.3477525596684914e-05, "loss": 0.4155, "step": 10033 }, { "epoch": 1.78993845330479, "grad_norm": 0.5898469090461731, "learning_rate": 1.3454841421001562e-05, "loss": 0.6253, "step": 10034 }, { "epoch": 1.7901168495227902, "grad_norm": 0.4971984326839447, "learning_rate": 1.3432175823473197e-05, "loss": 0.401, "step": 10035 }, { "epoch": 1.7902952457407904, "grad_norm": 0.5394257307052612, "learning_rate": 1.3409528805879895e-05, "loss": 0.5684, "step": 10036 }, { "epoch": 1.7904736419587906, "grad_norm": 0.47669845819473267, "learning_rate": 1.3386900370000455e-05, "loss": 0.4986, "step": 10037 }, { "epoch": 1.7906520381767908, "grad_norm": 0.4922144114971161, "learning_rate": 1.3364290517611982e-05, "loss": 0.4396, "step": 10038 }, { "epoch": 1.790830434394791, "grad_norm": 0.5299371480941772, "learning_rate": 1.334169925049028e-05, "loss": 0.4327, "step": 10039 }, { "epoch": 1.7910088306127911, "grad_norm": 0.5726783871650696, "learning_rate": 1.3319126570409734e-05, "loss": 0.5291, "step": 10040 }, { "epoch": 1.7911872268307913, "grad_norm": 0.5141592621803284, "learning_rate": 1.3296572479143093e-05, "loss": 0.4491, "step": 10041 }, { "epoch": 1.7913656230487913, "grad_norm": 0.5207017660140991, "learning_rate": 1.3274036978461829e-05, "loss": 0.4182, "step": 10042 }, { "epoch": 1.7915440192667915, "grad_norm": 0.5985919833183289, "learning_rate": 1.3251520070135803e-05, "loss": 0.6607, "step": 10043 }, { "epoch": 1.7917224154847917, "grad_norm": 0.5005565285682678, "learning_rate": 1.3229021755933546e-05, "loss": 0.4086, "step": 10044 }, { "epoch": 1.791900811702792, "grad_norm": 0.5269946455955505, "learning_rate": 1.3206542037621978e-05, "loss": 0.4658, "step": 10045 }, { "epoch": 1.7920792079207921, "grad_norm": 0.5867735147476196, "learning_rate": 1.318408091696674e-05, "loss": 0.6541, "step": 10046 }, { "epoch": 1.792257604138792, "grad_norm": 0.491451621055603, "learning_rate": 1.3161638395731867e-05, "loss": 0.3777, "step": 10047 }, { "epoch": 1.7924360003567923, "grad_norm": 0.5263445377349854, "learning_rate": 1.3139214475679977e-05, "loss": 0.4627, "step": 10048 }, { "epoch": 1.7926143965747925, "grad_norm": 0.4960615038871765, "learning_rate": 1.3116809158572273e-05, "loss": 0.4039, "step": 10049 }, { "epoch": 1.7927927927927927, "grad_norm": 0.5893881916999817, "learning_rate": 1.3094422446168403e-05, "loss": 0.6089, "step": 10050 }, { "epoch": 1.7929711890107929, "grad_norm": 0.636078953742981, "learning_rate": 1.3072054340226708e-05, "loss": 0.5522, "step": 10051 }, { "epoch": 1.793149585228793, "grad_norm": 0.6112774610519409, "learning_rate": 1.304970484250384e-05, "loss": 0.6133, "step": 10052 }, { "epoch": 1.7933279814467933, "grad_norm": 0.5139406323432922, "learning_rate": 1.3027373954755229e-05, "loss": 0.3866, "step": 10053 }, { "epoch": 1.7935063776647935, "grad_norm": 0.5027743577957153, "learning_rate": 1.3005061678734665e-05, "loss": 0.4824, "step": 10054 }, { "epoch": 1.7936847738827937, "grad_norm": 0.45598676800727844, "learning_rate": 1.2982768016194551e-05, "loss": 0.3925, "step": 10055 }, { "epoch": 1.7938631701007939, "grad_norm": 0.5171281099319458, "learning_rate": 1.2960492968885906e-05, "loss": 0.499, "step": 10056 }, { "epoch": 1.794041566318794, "grad_norm": 0.5457966923713684, "learning_rate": 1.2938236538558079e-05, "loss": 0.4644, "step": 10057 }, { "epoch": 1.7942199625367943, "grad_norm": 0.5165960788726807, "learning_rate": 1.2915998726959172e-05, "loss": 0.5193, "step": 10058 }, { "epoch": 1.7943983587547945, "grad_norm": 0.5514568090438843, "learning_rate": 1.2893779535835703e-05, "loss": 0.4803, "step": 10059 }, { "epoch": 1.7945767549727947, "grad_norm": 0.47774699330329895, "learning_rate": 1.2871578966932723e-05, "loss": 0.3729, "step": 10060 }, { "epoch": 1.7947551511907949, "grad_norm": 0.5756377577781677, "learning_rate": 1.2849397021993947e-05, "loss": 0.554, "step": 10061 }, { "epoch": 1.794933547408795, "grad_norm": 0.5572470426559448, "learning_rate": 1.2827233702761398e-05, "loss": 0.4625, "step": 10062 }, { "epoch": 1.7951119436267953, "grad_norm": 0.508305549621582, "learning_rate": 1.2805089010975906e-05, "loss": 0.4027, "step": 10063 }, { "epoch": 1.7952903398447952, "grad_norm": 0.5296847224235535, "learning_rate": 1.2782962948376608e-05, "loss": 0.5182, "step": 10064 }, { "epoch": 1.7954687360627954, "grad_norm": 0.5482270121574402, "learning_rate": 1.2760855516701364e-05, "loss": 0.6118, "step": 10065 }, { "epoch": 1.7956471322807956, "grad_norm": 0.6912332773208618, "learning_rate": 1.2738766717686396e-05, "loss": 0.5096, "step": 10066 }, { "epoch": 1.7958255284987958, "grad_norm": 0.5811026692390442, "learning_rate": 1.271669655306662e-05, "loss": 0.6013, "step": 10067 }, { "epoch": 1.796003924716796, "grad_norm": 0.5055823922157288, "learning_rate": 1.2694645024575374e-05, "loss": 0.4401, "step": 10068 }, { "epoch": 1.796182320934796, "grad_norm": 0.6301665306091309, "learning_rate": 1.2672612133944578e-05, "loss": 0.5362, "step": 10069 }, { "epoch": 1.7963607171527962, "grad_norm": 0.4803159534931183, "learning_rate": 1.265059788290468e-05, "loss": 0.3668, "step": 10070 }, { "epoch": 1.7965391133707964, "grad_norm": 0.4969823360443115, "learning_rate": 1.2628602273184714e-05, "loss": 0.4755, "step": 10071 }, { "epoch": 1.7967175095887966, "grad_norm": 0.5457087755203247, "learning_rate": 1.2606625306512159e-05, "loss": 0.5974, "step": 10072 }, { "epoch": 1.7968959058067968, "grad_norm": 0.6440889835357666, "learning_rate": 1.2584666984613107e-05, "loss": 0.6619, "step": 10073 }, { "epoch": 1.797074302024797, "grad_norm": 0.5694631338119507, "learning_rate": 1.2562727309212125e-05, "loss": 0.5553, "step": 10074 }, { "epoch": 1.7972526982427972, "grad_norm": 0.582349956035614, "learning_rate": 1.2540806282032385e-05, "loss": 0.5732, "step": 10075 }, { "epoch": 1.7974310944607974, "grad_norm": 0.539436936378479, "learning_rate": 1.2518903904795515e-05, "loss": 0.4747, "step": 10076 }, { "epoch": 1.7976094906787976, "grad_norm": 0.5120388865470886, "learning_rate": 1.2497020179221747e-05, "loss": 0.5279, "step": 10077 }, { "epoch": 1.7977878868967978, "grad_norm": 0.5143983364105225, "learning_rate": 1.247515510702979e-05, "loss": 0.453, "step": 10078 }, { "epoch": 1.797966283114798, "grad_norm": 0.46396833658218384, "learning_rate": 1.2453308689936965e-05, "loss": 0.3753, "step": 10079 }, { "epoch": 1.7981446793327982, "grad_norm": 0.5515454411506653, "learning_rate": 1.2431480929659066e-05, "loss": 0.5563, "step": 10080 }, { "epoch": 1.7983230755507984, "grad_norm": 0.5875598788261414, "learning_rate": 1.2409671827910363e-05, "loss": 0.5579, "step": 10081 }, { "epoch": 1.7985014717687986, "grad_norm": 0.43758925795555115, "learning_rate": 1.2387881386403844e-05, "loss": 0.3783, "step": 10082 }, { "epoch": 1.7986798679867988, "grad_norm": 0.4410098195075989, "learning_rate": 1.2366109606850834e-05, "loss": 0.336, "step": 10083 }, { "epoch": 1.798858264204799, "grad_norm": 0.5217193365097046, "learning_rate": 1.2344356490961328e-05, "loss": 0.4554, "step": 10084 }, { "epoch": 1.7990366604227992, "grad_norm": 0.5786857604980469, "learning_rate": 1.2322622040443793e-05, "loss": 0.5736, "step": 10085 }, { "epoch": 1.7992150566407992, "grad_norm": 0.5340781807899475, "learning_rate": 1.2300906257005196e-05, "loss": 0.5068, "step": 10086 }, { "epoch": 1.7993934528587994, "grad_norm": 0.5103334784507751, "learning_rate": 1.22792091423512e-05, "loss": 0.4845, "step": 10087 }, { "epoch": 1.7995718490767996, "grad_norm": 0.5319827795028687, "learning_rate": 1.2257530698185776e-05, "loss": 0.5371, "step": 10088 }, { "epoch": 1.7997502452947998, "grad_norm": 0.5461127161979675, "learning_rate": 1.2235870926211617e-05, "loss": 0.5464, "step": 10089 }, { "epoch": 1.7999286415128, "grad_norm": 0.6167702078819275, "learning_rate": 1.2214229828129808e-05, "loss": 0.5783, "step": 10090 }, { "epoch": 1.8001070377308, "grad_norm": 0.538575291633606, "learning_rate": 1.2192607405640072e-05, "loss": 0.5085, "step": 10091 }, { "epoch": 1.8002854339488001, "grad_norm": 0.4790734648704529, "learning_rate": 1.2171003660440633e-05, "loss": 0.3925, "step": 10092 }, { "epoch": 1.8004638301668003, "grad_norm": 0.5150331854820251, "learning_rate": 1.214941859422819e-05, "loss": 0.5111, "step": 10093 }, { "epoch": 1.8006422263848005, "grad_norm": 0.5167433023452759, "learning_rate": 1.2127852208698081e-05, "loss": 0.4739, "step": 10094 }, { "epoch": 1.8008206226028007, "grad_norm": 0.48076915740966797, "learning_rate": 1.2106304505544063e-05, "loss": 0.402, "step": 10095 }, { "epoch": 1.800999018820801, "grad_norm": 0.5094106197357178, "learning_rate": 1.208477548645856e-05, "loss": 0.562, "step": 10096 }, { "epoch": 1.8011774150388011, "grad_norm": 0.4915674030780792, "learning_rate": 1.2063265153132359e-05, "loss": 0.4165, "step": 10097 }, { "epoch": 1.8013558112568013, "grad_norm": 0.4733083248138428, "learning_rate": 1.2041773507254966e-05, "loss": 0.3459, "step": 10098 }, { "epoch": 1.8015342074748015, "grad_norm": 0.5540547966957092, "learning_rate": 1.202030055051423e-05, "loss": 0.4423, "step": 10099 }, { "epoch": 1.8017126036928017, "grad_norm": 0.5337905883789062, "learning_rate": 1.1998846284596687e-05, "loss": 0.5369, "step": 10100 }, { "epoch": 1.801890999910802, "grad_norm": 0.5766735672950745, "learning_rate": 1.1977410711187381e-05, "loss": 0.574, "step": 10101 }, { "epoch": 1.8020693961288021, "grad_norm": 0.6233359575271606, "learning_rate": 1.1955993831969769e-05, "loss": 0.6326, "step": 10102 }, { "epoch": 1.8022477923468023, "grad_norm": 0.538557231426239, "learning_rate": 1.1934595648625978e-05, "loss": 0.51, "step": 10103 }, { "epoch": 1.8024261885648025, "grad_norm": 0.5933203101158142, "learning_rate": 1.1913216162836582e-05, "loss": 0.6019, "step": 10104 }, { "epoch": 1.8026045847828027, "grad_norm": 0.5655239224433899, "learning_rate": 1.1891855376280764e-05, "loss": 0.5871, "step": 10105 }, { "epoch": 1.802782981000803, "grad_norm": 0.4605049788951874, "learning_rate": 1.1870513290636154e-05, "loss": 0.3936, "step": 10106 }, { "epoch": 1.8029613772188031, "grad_norm": 0.4446168541908264, "learning_rate": 1.1849189907578938e-05, "loss": 0.3525, "step": 10107 }, { "epoch": 1.803139773436803, "grad_norm": 0.5252434611320496, "learning_rate": 1.1827885228783863e-05, "loss": 0.5285, "step": 10108 }, { "epoch": 1.8033181696548033, "grad_norm": 0.49297425150871277, "learning_rate": 1.1806599255924172e-05, "loss": 0.4882, "step": 10109 }, { "epoch": 1.8034965658728035, "grad_norm": 0.5996000170707703, "learning_rate": 1.1785331990671722e-05, "loss": 0.5251, "step": 10110 }, { "epoch": 1.8036749620908037, "grad_norm": 0.5449775457382202, "learning_rate": 1.1764083434696732e-05, "loss": 0.5504, "step": 10111 }, { "epoch": 1.803853358308804, "grad_norm": 0.5374523997306824, "learning_rate": 1.1742853589668145e-05, "loss": 0.5502, "step": 10112 }, { "epoch": 1.8040317545268039, "grad_norm": 0.5069290399551392, "learning_rate": 1.1721642457253323e-05, "loss": 0.475, "step": 10113 }, { "epoch": 1.804210150744804, "grad_norm": 0.5023181438446045, "learning_rate": 1.1700450039118127e-05, "loss": 0.4728, "step": 10114 }, { "epoch": 1.8043885469628043, "grad_norm": 0.44995036721229553, "learning_rate": 1.1679276336927058e-05, "loss": 0.3337, "step": 10115 }, { "epoch": 1.8045669431808045, "grad_norm": 0.5543752312660217, "learning_rate": 1.1658121352342982e-05, "loss": 0.5126, "step": 10116 }, { "epoch": 1.8047453393988047, "grad_norm": 0.5311068296432495, "learning_rate": 1.1636985087027597e-05, "loss": 0.5781, "step": 10117 }, { "epoch": 1.8049237356168049, "grad_norm": 0.5337813496589661, "learning_rate": 1.1615867542640795e-05, "loss": 0.4699, "step": 10118 }, { "epoch": 1.805102131834805, "grad_norm": 0.5538474321365356, "learning_rate": 1.1594768720841142e-05, "loss": 0.616, "step": 10119 }, { "epoch": 1.8052805280528053, "grad_norm": 0.5138535499572754, "learning_rate": 1.157368862328581e-05, "loss": 0.5966, "step": 10120 }, { "epoch": 1.8054589242708055, "grad_norm": 0.5369413495063782, "learning_rate": 1.1552627251630338e-05, "loss": 0.6007, "step": 10121 }, { "epoch": 1.8056373204888057, "grad_norm": 0.4994553029537201, "learning_rate": 1.1531584607528928e-05, "loss": 0.3921, "step": 10122 }, { "epoch": 1.8058157167068059, "grad_norm": 0.6017369031906128, "learning_rate": 1.1510560692634203e-05, "loss": 0.553, "step": 10123 }, { "epoch": 1.805994112924806, "grad_norm": 0.5081920027732849, "learning_rate": 1.1489555508597455e-05, "loss": 0.3915, "step": 10124 }, { "epoch": 1.8061725091428062, "grad_norm": 0.5011181235313416, "learning_rate": 1.1468569057068363e-05, "loss": 0.4489, "step": 10125 }, { "epoch": 1.8063509053608064, "grad_norm": 0.5123955607414246, "learning_rate": 1.1447601339695218e-05, "loss": 0.4911, "step": 10126 }, { "epoch": 1.8065293015788066, "grad_norm": 0.6243327856063843, "learning_rate": 1.1426652358124817e-05, "loss": 0.7168, "step": 10127 }, { "epoch": 1.8067076977968068, "grad_norm": 0.525888204574585, "learning_rate": 1.1405722114002425e-05, "loss": 0.512, "step": 10128 }, { "epoch": 1.806886094014807, "grad_norm": 0.5606545209884644, "learning_rate": 1.1384810608972007e-05, "loss": 0.5632, "step": 10129 }, { "epoch": 1.807064490232807, "grad_norm": 0.5493068695068359, "learning_rate": 1.1363917844675803e-05, "loss": 0.4691, "step": 10130 }, { "epoch": 1.8072428864508072, "grad_norm": 0.6075304746627808, "learning_rate": 1.1343043822754834e-05, "loss": 0.7009, "step": 10131 }, { "epoch": 1.8074212826688074, "grad_norm": 0.49980399012565613, "learning_rate": 1.132218854484851e-05, "loss": 0.4691, "step": 10132 }, { "epoch": 1.8075996788868076, "grad_norm": 0.5099700093269348, "learning_rate": 1.1301352012594774e-05, "loss": 0.4577, "step": 10133 }, { "epoch": 1.8077780751048078, "grad_norm": 0.5869147777557373, "learning_rate": 1.1280534227630173e-05, "loss": 0.6752, "step": 10134 }, { "epoch": 1.807956471322808, "grad_norm": 0.5993606448173523, "learning_rate": 1.1259735191589626e-05, "loss": 0.6543, "step": 10135 }, { "epoch": 1.808134867540808, "grad_norm": 0.5113434195518494, "learning_rate": 1.123895490610677e-05, "loss": 0.5406, "step": 10136 }, { "epoch": 1.8083132637588082, "grad_norm": 0.5298689007759094, "learning_rate": 1.1218193372813628e-05, "loss": 0.4952, "step": 10137 }, { "epoch": 1.8084916599768084, "grad_norm": 0.5689482092857361, "learning_rate": 1.1197450593340875e-05, "loss": 0.543, "step": 10138 }, { "epoch": 1.8086700561948086, "grad_norm": 0.46555694937705994, "learning_rate": 1.1176726569317563e-05, "loss": 0.4123, "step": 10139 }, { "epoch": 1.8088484524128088, "grad_norm": 0.4243280589580536, "learning_rate": 1.1156021302371338e-05, "loss": 0.4059, "step": 10140 }, { "epoch": 1.809026848630809, "grad_norm": 0.6901927590370178, "learning_rate": 1.1135334794128455e-05, "loss": 0.6079, "step": 10141 }, { "epoch": 1.8092052448488092, "grad_norm": 0.5594136714935303, "learning_rate": 1.1114667046213555e-05, "loss": 0.5835, "step": 10142 }, { "epoch": 1.8093836410668094, "grad_norm": 0.4830605983734131, "learning_rate": 1.1094018060249928e-05, "loss": 0.3979, "step": 10143 }, { "epoch": 1.8095620372848096, "grad_norm": 0.5415348410606384, "learning_rate": 1.10733878378593e-05, "loss": 0.5458, "step": 10144 }, { "epoch": 1.8097404335028098, "grad_norm": 0.5037901401519775, "learning_rate": 1.1052776380661988e-05, "loss": 0.4389, "step": 10145 }, { "epoch": 1.80991882972081, "grad_norm": 0.47683677077293396, "learning_rate": 1.1032183690276754e-05, "loss": 0.4552, "step": 10146 }, { "epoch": 1.8100972259388102, "grad_norm": 0.5881403684616089, "learning_rate": 1.1011609768320995e-05, "loss": 0.617, "step": 10147 }, { "epoch": 1.8102756221568104, "grad_norm": 0.5397078394889832, "learning_rate": 1.099105461641059e-05, "loss": 0.5371, "step": 10148 }, { "epoch": 1.8104540183748106, "grad_norm": 0.5096381306648254, "learning_rate": 1.0970518236159882e-05, "loss": 0.4908, "step": 10149 }, { "epoch": 1.8106324145928108, "grad_norm": 0.5621026754379272, "learning_rate": 1.0950000629181806e-05, "loss": 0.5941, "step": 10150 }, { "epoch": 1.810810810810811, "grad_norm": 0.501204252243042, "learning_rate": 1.0929501797087848e-05, "loss": 0.4206, "step": 10151 }, { "epoch": 1.810989207028811, "grad_norm": 0.48253610730171204, "learning_rate": 1.0909021741487862e-05, "loss": 0.4153, "step": 10152 }, { "epoch": 1.8111676032468111, "grad_norm": 0.60355144739151, "learning_rate": 1.0888560463990476e-05, "loss": 0.518, "step": 10153 }, { "epoch": 1.8113459994648113, "grad_norm": 0.4504523277282715, "learning_rate": 1.086811796620263e-05, "loss": 0.3618, "step": 10154 }, { "epoch": 1.8115243956828115, "grad_norm": 0.5853896737098694, "learning_rate": 1.0847694249729922e-05, "loss": 0.6146, "step": 10155 }, { "epoch": 1.8117027919008117, "grad_norm": 0.5694953203201294, "learning_rate": 1.0827289316176353e-05, "loss": 0.5908, "step": 10156 }, { "epoch": 1.811881188118812, "grad_norm": 0.5596148371696472, "learning_rate": 1.0806903167144583e-05, "loss": 0.583, "step": 10157 }, { "epoch": 1.812059584336812, "grad_norm": 0.4649914801120758, "learning_rate": 1.0786535804235693e-05, "loss": 0.4772, "step": 10158 }, { "epoch": 1.8122379805548121, "grad_norm": 0.5577880144119263, "learning_rate": 1.0766187229049345e-05, "loss": 0.5872, "step": 10159 }, { "epoch": 1.8124163767728123, "grad_norm": 0.6191621422767639, "learning_rate": 1.0745857443183737e-05, "loss": 0.6917, "step": 10160 }, { "epoch": 1.8125947729908125, "grad_norm": 0.6169114112854004, "learning_rate": 1.0725546448235424e-05, "loss": 0.7141, "step": 10161 }, { "epoch": 1.8127731692088127, "grad_norm": 0.5002092719078064, "learning_rate": 1.0705254245799823e-05, "loss": 0.435, "step": 10162 }, { "epoch": 1.812951565426813, "grad_norm": 0.48867136240005493, "learning_rate": 1.068498083747052e-05, "loss": 0.4144, "step": 10163 }, { "epoch": 1.813129961644813, "grad_norm": 0.5426896214485168, "learning_rate": 1.0664726224839882e-05, "loss": 0.4681, "step": 10164 }, { "epoch": 1.8133083578628133, "grad_norm": 0.5177076458930969, "learning_rate": 1.0644490409498636e-05, "loss": 0.4917, "step": 10165 }, { "epoch": 1.8134867540808135, "grad_norm": 0.5665025115013123, "learning_rate": 1.0624273393036093e-05, "loss": 0.5453, "step": 10166 }, { "epoch": 1.8136651502988137, "grad_norm": 0.5852944254875183, "learning_rate": 1.0604075177040151e-05, "loss": 0.607, "step": 10167 }, { "epoch": 1.813843546516814, "grad_norm": 0.5647523999214172, "learning_rate": 1.0583895763097068e-05, "loss": 0.6147, "step": 10168 }, { "epoch": 1.814021942734814, "grad_norm": 0.4771578907966614, "learning_rate": 1.0563735152791826e-05, "loss": 0.3808, "step": 10169 }, { "epoch": 1.8142003389528143, "grad_norm": 0.49404028058052063, "learning_rate": 1.0543593347707742e-05, "loss": 0.4938, "step": 10170 }, { "epoch": 1.8143787351708145, "grad_norm": 0.5241032242774963, "learning_rate": 1.0523470349426856e-05, "loss": 0.4203, "step": 10171 }, { "epoch": 1.8145571313888147, "grad_norm": 0.4687019884586334, "learning_rate": 1.0503366159529515e-05, "loss": 0.4542, "step": 10172 }, { "epoch": 1.814735527606815, "grad_norm": 0.5116732120513916, "learning_rate": 1.0483280779594707e-05, "loss": 0.4266, "step": 10173 }, { "epoch": 1.8149139238248149, "grad_norm": 0.593781054019928, "learning_rate": 1.0463214211200001e-05, "loss": 0.5538, "step": 10174 }, { "epoch": 1.815092320042815, "grad_norm": 0.5247955918312073, "learning_rate": 1.0443166455921332e-05, "loss": 0.4487, "step": 10175 }, { "epoch": 1.8152707162608153, "grad_norm": 0.5464117527008057, "learning_rate": 1.042313751533333e-05, "loss": 0.527, "step": 10176 }, { "epoch": 1.8154491124788155, "grad_norm": 0.5567406415939331, "learning_rate": 1.040312739100896e-05, "loss": 0.6, "step": 10177 }, { "epoch": 1.8156275086968157, "grad_norm": 0.5131311416625977, "learning_rate": 1.038313608451985e-05, "loss": 0.4397, "step": 10178 }, { "epoch": 1.8158059049148159, "grad_norm": 0.45174816250801086, "learning_rate": 1.0363163597436165e-05, "loss": 0.389, "step": 10179 }, { "epoch": 1.8159843011328158, "grad_norm": 0.47523677349090576, "learning_rate": 1.0343209931326453e-05, "loss": 0.3687, "step": 10180 }, { "epoch": 1.816162697350816, "grad_norm": 0.49906423687934875, "learning_rate": 1.0323275087757905e-05, "loss": 0.4489, "step": 10181 }, { "epoch": 1.8163410935688162, "grad_norm": 0.4328477382659912, "learning_rate": 1.0303359068296187e-05, "loss": 0.307, "step": 10182 }, { "epoch": 1.8165194897868164, "grad_norm": 0.48864254355430603, "learning_rate": 1.0283461874505545e-05, "loss": 0.4578, "step": 10183 }, { "epoch": 1.8166978860048166, "grad_norm": 0.5428241491317749, "learning_rate": 1.0263583507948592e-05, "loss": 0.5653, "step": 10184 }, { "epoch": 1.8168762822228168, "grad_norm": 0.7537945508956909, "learning_rate": 1.0243723970186663e-05, "loss": 0.4563, "step": 10185 }, { "epoch": 1.817054678440817, "grad_norm": 1.7523473501205444, "learning_rate": 1.0223883262779455e-05, "loss": 0.3763, "step": 10186 }, { "epoch": 1.8172330746588172, "grad_norm": 0.5494785308837891, "learning_rate": 1.0204061387285274e-05, "loss": 0.5098, "step": 10187 }, { "epoch": 1.8174114708768174, "grad_norm": 0.5195872187614441, "learning_rate": 1.0184258345260933e-05, "loss": 0.476, "step": 10188 }, { "epoch": 1.8175898670948176, "grad_norm": 0.674954891204834, "learning_rate": 1.0164474138261714e-05, "loss": 0.447, "step": 10189 }, { "epoch": 1.8177682633128178, "grad_norm": 0.5051981806755066, "learning_rate": 1.0144708767841514e-05, "loss": 0.4994, "step": 10190 }, { "epoch": 1.817946659530818, "grad_norm": 0.5411693453788757, "learning_rate": 1.0124962235552647e-05, "loss": 0.504, "step": 10191 }, { "epoch": 1.8181250557488182, "grad_norm": 0.5327993631362915, "learning_rate": 1.0105234542946013e-05, "loss": 0.5145, "step": 10192 }, { "epoch": 1.8183034519668184, "grad_norm": 0.5137004256248474, "learning_rate": 1.0085525691571063e-05, "loss": 0.5566, "step": 10193 }, { "epoch": 1.8184818481848186, "grad_norm": 0.4950316250324249, "learning_rate": 1.0065835682975644e-05, "loss": 0.4582, "step": 10194 }, { "epoch": 1.8186602444028188, "grad_norm": 0.6043919324874878, "learning_rate": 1.0046164518706269e-05, "loss": 0.6262, "step": 10195 }, { "epoch": 1.8188386406208188, "grad_norm": 0.48688915371894836, "learning_rate": 1.0026512200307841e-05, "loss": 0.4427, "step": 10196 }, { "epoch": 1.819017036838819, "grad_norm": 0.5831645727157593, "learning_rate": 1.0006878729323905e-05, "loss": 0.6149, "step": 10197 }, { "epoch": 1.8191954330568192, "grad_norm": 0.5436547994613647, "learning_rate": 9.987264107296445e-06, "loss": 0.541, "step": 10198 }, { "epoch": 1.8193738292748194, "grad_norm": 0.4676104784011841, "learning_rate": 9.967668335765927e-06, "loss": 0.448, "step": 10199 }, { "epoch": 1.8195522254928196, "grad_norm": 0.5124318599700928, "learning_rate": 9.948091416271482e-06, "loss": 0.4321, "step": 10200 }, { "epoch": 1.8197306217108198, "grad_norm": 0.5362337231636047, "learning_rate": 9.928533350350627e-06, "loss": 0.4066, "step": 10201 }, { "epoch": 1.8199090179288198, "grad_norm": 0.4841192066669464, "learning_rate": 9.908994139539467e-06, "loss": 0.4793, "step": 10202 }, { "epoch": 1.82008741414682, "grad_norm": 0.577235996723175, "learning_rate": 9.889473785372554e-06, "loss": 0.5232, "step": 10203 }, { "epoch": 1.8202658103648202, "grad_norm": 0.5391908288002014, "learning_rate": 9.869972289383078e-06, "loss": 0.4879, "step": 10204 }, { "epoch": 1.8204442065828204, "grad_norm": 0.5783559679985046, "learning_rate": 9.85048965310259e-06, "loss": 0.5618, "step": 10205 }, { "epoch": 1.8206226028008206, "grad_norm": 0.47223010659217834, "learning_rate": 9.831025878061366e-06, "loss": 0.4077, "step": 10206 }, { "epoch": 1.8208009990188208, "grad_norm": 0.5232735872268677, "learning_rate": 9.811580965787964e-06, "loss": 0.5183, "step": 10207 }, { "epoch": 1.820979395236821, "grad_norm": 0.5101804137229919, "learning_rate": 9.792154917809631e-06, "loss": 0.4681, "step": 10208 }, { "epoch": 1.8211577914548212, "grad_norm": 0.5707786679267883, "learning_rate": 9.772747735652122e-06, "loss": 0.6249, "step": 10209 }, { "epoch": 1.8213361876728213, "grad_norm": 0.5304009318351746, "learning_rate": 9.753359420839631e-06, "loss": 0.4669, "step": 10210 }, { "epoch": 1.8215145838908215, "grad_norm": 0.555030107498169, "learning_rate": 9.733989974894858e-06, "loss": 0.5323, "step": 10211 }, { "epoch": 1.8216929801088217, "grad_norm": 0.5161325931549072, "learning_rate": 9.71463939933917e-06, "loss": 0.4242, "step": 10212 }, { "epoch": 1.821871376326822, "grad_norm": 0.4960889518260956, "learning_rate": 9.69530769569224e-06, "loss": 0.4857, "step": 10213 }, { "epoch": 1.8220497725448221, "grad_norm": 0.5438405871391296, "learning_rate": 9.675994865472492e-06, "loss": 0.5828, "step": 10214 }, { "epoch": 1.8222281687628223, "grad_norm": 0.5128703713417053, "learning_rate": 9.656700910196631e-06, "loss": 0.4462, "step": 10215 }, { "epoch": 1.8224065649808225, "grad_norm": 0.506435751914978, "learning_rate": 9.637425831380109e-06, "loss": 0.4737, "step": 10216 }, { "epoch": 1.8225849611988227, "grad_norm": 0.5082954168319702, "learning_rate": 9.618169630536688e-06, "loss": 0.5197, "step": 10217 }, { "epoch": 1.8227633574168227, "grad_norm": 0.5157825946807861, "learning_rate": 9.598932309178798e-06, "loss": 0.4332, "step": 10218 }, { "epoch": 1.822941753634823, "grad_norm": 0.5034956932067871, "learning_rate": 9.579713868817313e-06, "loss": 0.4036, "step": 10219 }, { "epoch": 1.8231201498528231, "grad_norm": 0.566260576248169, "learning_rate": 9.560514310961637e-06, "loss": 0.6078, "step": 10220 }, { "epoch": 1.8232985460708233, "grad_norm": 0.5446032881736755, "learning_rate": 9.541333637119704e-06, "loss": 0.449, "step": 10221 }, { "epoch": 1.8234769422888235, "grad_norm": 0.5551034808158875, "learning_rate": 9.522171848797917e-06, "loss": 0.4385, "step": 10222 }, { "epoch": 1.8236553385068237, "grad_norm": 0.5290305018424988, "learning_rate": 9.503028947501269e-06, "loss": 0.3873, "step": 10223 }, { "epoch": 1.8238337347248237, "grad_norm": 0.5309109091758728, "learning_rate": 9.483904934733278e-06, "loss": 0.5638, "step": 10224 }, { "epoch": 1.8240121309428239, "grad_norm": 0.5156473517417908, "learning_rate": 9.464799811995855e-06, "loss": 0.4597, "step": 10225 }, { "epoch": 1.824190527160824, "grad_norm": 0.5108242630958557, "learning_rate": 9.44571358078955e-06, "loss": 0.4872, "step": 10226 }, { "epoch": 1.8243689233788243, "grad_norm": 0.45744287967681885, "learning_rate": 9.426646242613385e-06, "loss": 0.3599, "step": 10227 }, { "epoch": 1.8245473195968245, "grad_norm": 0.5380007028579712, "learning_rate": 9.407597798964911e-06, "loss": 0.4486, "step": 10228 }, { "epoch": 1.8247257158148247, "grad_norm": 0.5029316544532776, "learning_rate": 9.388568251340157e-06, "loss": 0.4662, "step": 10229 }, { "epoch": 1.8249041120328249, "grad_norm": 0.5693724751472473, "learning_rate": 9.369557601233703e-06, "loss": 0.4787, "step": 10230 }, { "epoch": 1.825082508250825, "grad_norm": 0.49826717376708984, "learning_rate": 9.350565850138688e-06, "loss": 0.4471, "step": 10231 }, { "epoch": 1.8252609044688253, "grad_norm": 0.6438023447990417, "learning_rate": 9.331592999546612e-06, "loss": 0.6015, "step": 10232 }, { "epoch": 1.8254393006868255, "grad_norm": 0.6153572797775269, "learning_rate": 9.312639050947702e-06, "loss": 0.7253, "step": 10233 }, { "epoch": 1.8256176969048257, "grad_norm": 0.508865237236023, "learning_rate": 9.293704005830488e-06, "loss": 0.4195, "step": 10234 }, { "epoch": 1.8257960931228259, "grad_norm": 0.533497154712677, "learning_rate": 9.274787865682227e-06, "loss": 0.4233, "step": 10235 }, { "epoch": 1.825974489340826, "grad_norm": 0.5289534330368042, "learning_rate": 9.255890631988505e-06, "loss": 0.5847, "step": 10236 }, { "epoch": 1.8261528855588263, "grad_norm": 0.5981556177139282, "learning_rate": 9.237012306233555e-06, "loss": 0.7135, "step": 10237 }, { "epoch": 1.8263312817768265, "grad_norm": 0.5175567269325256, "learning_rate": 9.21815288990005e-06, "loss": 0.4407, "step": 10238 }, { "epoch": 1.8265096779948267, "grad_norm": 0.4687216877937317, "learning_rate": 9.199312384469166e-06, "loss": 0.408, "step": 10239 }, { "epoch": 1.8266880742128266, "grad_norm": 0.5143327713012695, "learning_rate": 9.180490791420693e-06, "loss": 0.5345, "step": 10240 }, { "epoch": 1.8268664704308268, "grad_norm": 0.5955470204353333, "learning_rate": 9.161688112232836e-06, "loss": 0.6327, "step": 10241 }, { "epoch": 1.827044866648827, "grad_norm": 0.482693612575531, "learning_rate": 9.142904348382359e-06, "loss": 0.39, "step": 10242 }, { "epoch": 1.8272232628668272, "grad_norm": 0.6863899230957031, "learning_rate": 9.124139501344496e-06, "loss": 0.6892, "step": 10243 }, { "epoch": 1.8274016590848274, "grad_norm": 0.49588871002197266, "learning_rate": 9.105393572593102e-06, "loss": 0.3929, "step": 10244 }, { "epoch": 1.8275800553028276, "grad_norm": 0.5905642509460449, "learning_rate": 9.086666563600437e-06, "loss": 0.7023, "step": 10245 }, { "epoch": 1.8277584515208276, "grad_norm": 0.5300746560096741, "learning_rate": 9.067958475837274e-06, "loss": 0.5067, "step": 10246 }, { "epoch": 1.8279368477388278, "grad_norm": 0.5370733141899109, "learning_rate": 9.04926931077299e-06, "loss": 0.5826, "step": 10247 }, { "epoch": 1.828115243956828, "grad_norm": 0.5125030279159546, "learning_rate": 9.030599069875383e-06, "loss": 0.4693, "step": 10248 }, { "epoch": 1.8282936401748282, "grad_norm": 0.48797720670700073, "learning_rate": 9.011947754610839e-06, "loss": 0.4299, "step": 10249 }, { "epoch": 1.8284720363928284, "grad_norm": 0.5317216515541077, "learning_rate": 8.99331536644421e-06, "loss": 0.4746, "step": 10250 }, { "epoch": 1.8286504326108286, "grad_norm": 0.5748763680458069, "learning_rate": 8.974701906838884e-06, "loss": 0.51, "step": 10251 }, { "epoch": 1.8288288288288288, "grad_norm": 0.573381781578064, "learning_rate": 8.956107377256772e-06, "loss": 0.6049, "step": 10252 }, { "epoch": 1.829007225046829, "grad_norm": 0.4922345280647278, "learning_rate": 8.937531779158181e-06, "loss": 0.371, "step": 10253 }, { "epoch": 1.8291856212648292, "grad_norm": 0.494431734085083, "learning_rate": 8.918975114002192e-06, "loss": 0.4184, "step": 10254 }, { "epoch": 1.8293640174828294, "grad_norm": 0.5176249146461487, "learning_rate": 8.900437383246084e-06, "loss": 0.45, "step": 10255 }, { "epoch": 1.8295424137008296, "grad_norm": 0.4918590188026428, "learning_rate": 8.881918588345917e-06, "loss": 0.5091, "step": 10256 }, { "epoch": 1.8297208099188298, "grad_norm": 0.5216599702835083, "learning_rate": 8.863418730756106e-06, "loss": 0.4921, "step": 10257 }, { "epoch": 1.82989920613683, "grad_norm": 0.45257261395454407, "learning_rate": 8.844937811929605e-06, "loss": 0.3359, "step": 10258 }, { "epoch": 1.8300776023548302, "grad_norm": 0.5172678828239441, "learning_rate": 8.826475833317914e-06, "loss": 0.5071, "step": 10259 }, { "epoch": 1.8302559985728304, "grad_norm": 0.5505502223968506, "learning_rate": 8.808032796371018e-06, "loss": 0.6637, "step": 10260 }, { "epoch": 1.8304343947908306, "grad_norm": 0.48044058680534363, "learning_rate": 8.78960870253745e-06, "loss": 0.4304, "step": 10261 }, { "epoch": 1.8306127910088306, "grad_norm": 0.4884861707687378, "learning_rate": 8.77120355326419e-06, "loss": 0.3665, "step": 10262 }, { "epoch": 1.8307911872268308, "grad_norm": 0.4926454424858093, "learning_rate": 8.752817349996806e-06, "loss": 0.3983, "step": 10263 }, { "epoch": 1.830969583444831, "grad_norm": 0.5212196111679077, "learning_rate": 8.734450094179309e-06, "loss": 0.5036, "step": 10264 }, { "epoch": 1.8311479796628312, "grad_norm": 0.4858270287513733, "learning_rate": 8.716101787254321e-06, "loss": 0.467, "step": 10265 }, { "epoch": 1.8313263758808314, "grad_norm": 0.5602033138275146, "learning_rate": 8.697772430662858e-06, "loss": 0.5441, "step": 10266 }, { "epoch": 1.8315047720988316, "grad_norm": 0.5834726095199585, "learning_rate": 8.679462025844464e-06, "loss": 0.6227, "step": 10267 }, { "epoch": 1.8316831683168315, "grad_norm": 0.5684033632278442, "learning_rate": 8.66117057423732e-06, "loss": 0.57, "step": 10268 }, { "epoch": 1.8318615645348317, "grad_norm": 0.4957300126552582, "learning_rate": 8.642898077277944e-06, "loss": 0.4303, "step": 10269 }, { "epoch": 1.832039960752832, "grad_norm": 0.5206865668296814, "learning_rate": 8.624644536401521e-06, "loss": 0.5213, "step": 10270 }, { "epoch": 1.8322183569708321, "grad_norm": 0.5535826683044434, "learning_rate": 8.606409953041627e-06, "loss": 0.5366, "step": 10271 }, { "epoch": 1.8323967531888323, "grad_norm": 0.5583673715591431, "learning_rate": 8.588194328630422e-06, "loss": 0.5131, "step": 10272 }, { "epoch": 1.8325751494068325, "grad_norm": 0.45444154739379883, "learning_rate": 8.569997664598567e-06, "loss": 0.2948, "step": 10273 }, { "epoch": 1.8327535456248327, "grad_norm": 0.5591797232627869, "learning_rate": 8.55181996237514e-06, "loss": 0.546, "step": 10274 }, { "epoch": 1.832931941842833, "grad_norm": 0.6100978255271912, "learning_rate": 8.533661223387946e-06, "loss": 0.4099, "step": 10275 }, { "epoch": 1.8331103380608331, "grad_norm": 0.5809532403945923, "learning_rate": 8.515521449063036e-06, "loss": 0.5211, "step": 10276 }, { "epoch": 1.8332887342788333, "grad_norm": 0.5336730480194092, "learning_rate": 8.497400640825186e-06, "loss": 0.5875, "step": 10277 }, { "epoch": 1.8334671304968335, "grad_norm": 0.5325056910514832, "learning_rate": 8.47929880009754e-06, "loss": 0.4842, "step": 10278 }, { "epoch": 1.8336455267148337, "grad_norm": 0.528160810470581, "learning_rate": 8.461215928301819e-06, "loss": 0.5506, "step": 10279 }, { "epoch": 1.833823922932834, "grad_norm": 0.5754840970039368, "learning_rate": 8.443152026858303e-06, "loss": 0.4679, "step": 10280 }, { "epoch": 1.8340023191508341, "grad_norm": 0.5175067782402039, "learning_rate": 8.425107097185636e-06, "loss": 0.5501, "step": 10281 }, { "epoch": 1.8341807153688343, "grad_norm": 0.47843798995018005, "learning_rate": 8.407081140701128e-06, "loss": 0.3725, "step": 10282 }, { "epoch": 1.8343591115868345, "grad_norm": 0.5563110709190369, "learning_rate": 8.389074158820453e-06, "loss": 0.5422, "step": 10283 }, { "epoch": 1.8345375078048345, "grad_norm": 0.5997392535209656, "learning_rate": 8.371086152957952e-06, "loss": 0.6135, "step": 10284 }, { "epoch": 1.8347159040228347, "grad_norm": 0.5224518775939941, "learning_rate": 8.353117124526382e-06, "loss": 0.4452, "step": 10285 }, { "epoch": 1.8348943002408349, "grad_norm": 0.5603434443473816, "learning_rate": 8.33516707493695e-06, "loss": 0.5088, "step": 10286 }, { "epoch": 1.835072696458835, "grad_norm": 0.5151491761207581, "learning_rate": 8.317236005599554e-06, "loss": 0.5069, "step": 10287 }, { "epoch": 1.8352510926768353, "grad_norm": 0.46216633915901184, "learning_rate": 8.299323917922402e-06, "loss": 0.3475, "step": 10288 }, { "epoch": 1.8354294888948355, "grad_norm": 0.5850088000297546, "learning_rate": 8.281430813312368e-06, "loss": 0.6099, "step": 10289 }, { "epoch": 1.8356078851128355, "grad_norm": 0.510653018951416, "learning_rate": 8.263556693174745e-06, "loss": 0.4093, "step": 10290 }, { "epoch": 1.8357862813308357, "grad_norm": 0.6367735862731934, "learning_rate": 8.245701558913327e-06, "loss": 0.5877, "step": 10291 }, { "epoch": 1.8359646775488359, "grad_norm": 0.5245457887649536, "learning_rate": 8.227865411930492e-06, "loss": 0.4942, "step": 10292 }, { "epoch": 1.836143073766836, "grad_norm": 0.5388858318328857, "learning_rate": 8.210048253627034e-06, "loss": 0.5077, "step": 10293 }, { "epoch": 1.8363214699848363, "grad_norm": 0.4671160876750946, "learning_rate": 8.192250085402364e-06, "loss": 0.4215, "step": 10294 }, { "epoch": 1.8364998662028365, "grad_norm": 0.5302553772926331, "learning_rate": 8.174470908654309e-06, "loss": 0.4563, "step": 10295 }, { "epoch": 1.8366782624208366, "grad_norm": 0.5391623377799988, "learning_rate": 8.156710724779249e-06, "loss": 0.5735, "step": 10296 }, { "epoch": 1.8368566586388368, "grad_norm": 0.5140558481216431, "learning_rate": 8.138969535172014e-06, "loss": 0.4466, "step": 10297 }, { "epoch": 1.837035054856837, "grad_norm": 0.497837096452713, "learning_rate": 8.121247341226074e-06, "loss": 0.4269, "step": 10298 }, { "epoch": 1.8372134510748372, "grad_norm": 0.49367204308509827, "learning_rate": 8.103544144333259e-06, "loss": 0.4433, "step": 10299 }, { "epoch": 1.8373918472928374, "grad_norm": 0.5372571349143982, "learning_rate": 8.085859945883984e-06, "loss": 0.5503, "step": 10300 }, { "epoch": 1.8375702435108376, "grad_norm": 0.5628436207771301, "learning_rate": 8.068194747267193e-06, "loss": 0.5568, "step": 10301 }, { "epoch": 1.8377486397288378, "grad_norm": 0.5379055142402649, "learning_rate": 8.050548549870252e-06, "loss": 0.5478, "step": 10302 }, { "epoch": 1.837927035946838, "grad_norm": 1.1105915307998657, "learning_rate": 8.032921355079132e-06, "loss": 0.4982, "step": 10303 }, { "epoch": 1.8381054321648382, "grad_norm": 0.4970918893814087, "learning_rate": 8.015313164278227e-06, "loss": 0.5111, "step": 10304 }, { "epoch": 1.8382838283828384, "grad_norm": 0.4896716773509979, "learning_rate": 7.997723978850486e-06, "loss": 0.3981, "step": 10305 }, { "epoch": 1.8384622246008384, "grad_norm": 0.5600361824035645, "learning_rate": 7.980153800177387e-06, "loss": 0.4834, "step": 10306 }, { "epoch": 1.8386406208188386, "grad_norm": 0.560856819152832, "learning_rate": 7.962602629638827e-06, "loss": 0.6468, "step": 10307 }, { "epoch": 1.8388190170368388, "grad_norm": 0.46853670477867126, "learning_rate": 7.945070468613313e-06, "loss": 0.4358, "step": 10308 }, { "epoch": 1.838997413254839, "grad_norm": 0.4796409606933594, "learning_rate": 7.9275573184778e-06, "loss": 0.4112, "step": 10309 }, { "epoch": 1.8391758094728392, "grad_norm": 0.5527827143669128, "learning_rate": 7.910063180607775e-06, "loss": 0.63, "step": 10310 }, { "epoch": 1.8393542056908394, "grad_norm": 0.5861801505088806, "learning_rate": 7.892588056377214e-06, "loss": 0.6435, "step": 10311 }, { "epoch": 1.8395326019088394, "grad_norm": 0.5089249610900879, "learning_rate": 7.875131947158554e-06, "loss": 0.4791, "step": 10312 }, { "epoch": 1.8397109981268396, "grad_norm": 0.5814892053604126, "learning_rate": 7.857694854322888e-06, "loss": 0.5377, "step": 10313 }, { "epoch": 1.8398893943448398, "grad_norm": 0.516287088394165, "learning_rate": 7.840276779239625e-06, "loss": 0.3958, "step": 10314 }, { "epoch": 1.84006779056284, "grad_norm": 0.5036550164222717, "learning_rate": 7.822877723276834e-06, "loss": 0.518, "step": 10315 }, { "epoch": 1.8402461867808402, "grad_norm": 0.4933060109615326, "learning_rate": 7.805497687801006e-06, "loss": 0.4199, "step": 10316 }, { "epoch": 1.8404245829988404, "grad_norm": 0.4610743522644043, "learning_rate": 7.78813667417716e-06, "loss": 0.3651, "step": 10317 }, { "epoch": 1.8406029792168406, "grad_norm": 0.5514711737632751, "learning_rate": 7.770794683768845e-06, "loss": 0.4906, "step": 10318 }, { "epoch": 1.8407813754348408, "grad_norm": 0.6051323413848877, "learning_rate": 7.753471717938054e-06, "loss": 0.6172, "step": 10319 }, { "epoch": 1.840959771652841, "grad_norm": 0.4640166461467743, "learning_rate": 7.736167778045367e-06, "loss": 0.4343, "step": 10320 }, { "epoch": 1.8411381678708412, "grad_norm": 0.4809337556362152, "learning_rate": 7.718882865449806e-06, "loss": 0.3689, "step": 10321 }, { "epoch": 1.8413165640888414, "grad_norm": 0.5500307083129883, "learning_rate": 7.701616981508924e-06, "loss": 0.5312, "step": 10322 }, { "epoch": 1.8414949603068416, "grad_norm": 0.5196850299835205, "learning_rate": 7.684370127578749e-06, "loss": 0.4428, "step": 10323 }, { "epoch": 1.8416733565248418, "grad_norm": 0.5277876853942871, "learning_rate": 7.66714230501392e-06, "loss": 0.5147, "step": 10324 }, { "epoch": 1.841851752742842, "grad_norm": 0.4931204915046692, "learning_rate": 7.649933515167407e-06, "loss": 0.4189, "step": 10325 }, { "epoch": 1.8420301489608422, "grad_norm": 0.5108685493469238, "learning_rate": 7.632743759390826e-06, "loss": 0.5112, "step": 10326 }, { "epoch": 1.8422085451788424, "grad_norm": 0.5715261697769165, "learning_rate": 7.61557303903429e-06, "loss": 0.5237, "step": 10327 }, { "epoch": 1.8423869413968423, "grad_norm": 0.5731346607208252, "learning_rate": 7.5984213554462775e-06, "loss": 0.6463, "step": 10328 }, { "epoch": 1.8425653376148425, "grad_norm": 0.4316110610961914, "learning_rate": 7.581288709973988e-06, "loss": 0.3569, "step": 10329 }, { "epoch": 1.8427437338328427, "grad_norm": 0.512586772441864, "learning_rate": 7.564175103962956e-06, "loss": 0.4411, "step": 10330 }, { "epoch": 1.842922130050843, "grad_norm": 0.5421055555343628, "learning_rate": 7.547080538757245e-06, "loss": 0.4933, "step": 10331 }, { "epoch": 1.8431005262688431, "grad_norm": 0.5586483478546143, "learning_rate": 7.5300050156995315e-06, "loss": 0.5065, "step": 10332 }, { "epoch": 1.8432789224868433, "grad_norm": 0.6223009824752808, "learning_rate": 7.5129485361308534e-06, "loss": 0.4358, "step": 10333 }, { "epoch": 1.8434573187048433, "grad_norm": 0.5178797841072083, "learning_rate": 7.49591110139089e-06, "loss": 0.4661, "step": 10334 }, { "epoch": 1.8436357149228435, "grad_norm": 0.5489305257797241, "learning_rate": 7.478892712817681e-06, "loss": 0.5131, "step": 10335 }, { "epoch": 1.8438141111408437, "grad_norm": 0.5495460033416748, "learning_rate": 7.4618933717478796e-06, "loss": 0.6006, "step": 10336 }, { "epoch": 1.843992507358844, "grad_norm": 0.4898929297924042, "learning_rate": 7.444913079516613e-06, "loss": 0.3961, "step": 10337 }, { "epoch": 1.844170903576844, "grad_norm": 0.48516249656677246, "learning_rate": 7.42795183745748e-06, "loss": 0.3852, "step": 10338 }, { "epoch": 1.8443492997948443, "grad_norm": 0.6237598061561584, "learning_rate": 7.411009646902639e-06, "loss": 0.6107, "step": 10339 }, { "epoch": 1.8445276960128445, "grad_norm": 0.49778953194618225, "learning_rate": 7.394086509182663e-06, "loss": 0.3913, "step": 10340 }, { "epoch": 1.8447060922308447, "grad_norm": 0.5452266931533813, "learning_rate": 7.377182425626766e-06, "loss": 0.5521, "step": 10341 }, { "epoch": 1.844884488448845, "grad_norm": 0.6213571429252625, "learning_rate": 7.360297397562527e-06, "loss": 0.6084, "step": 10342 }, { "epoch": 1.845062884666845, "grad_norm": 0.5283211469650269, "learning_rate": 7.34343142631616e-06, "loss": 0.5813, "step": 10343 }, { "epoch": 1.8452412808848453, "grad_norm": 0.45486849546432495, "learning_rate": 7.32658451321222e-06, "loss": 0.2873, "step": 10344 }, { "epoch": 1.8454196771028455, "grad_norm": 0.544691264629364, "learning_rate": 7.3097566595738965e-06, "loss": 0.5815, "step": 10345 }, { "epoch": 1.8455980733208457, "grad_norm": 0.6090754866600037, "learning_rate": 7.292947866722882e-06, "loss": 0.567, "step": 10346 }, { "epoch": 1.8457764695388459, "grad_norm": 0.5459246635437012, "learning_rate": 7.276158135979288e-06, "loss": 0.6544, "step": 10347 }, { "epoch": 1.845954865756846, "grad_norm": 0.54819256067276, "learning_rate": 7.259387468661782e-06, "loss": 0.5531, "step": 10348 }, { "epoch": 1.8461332619748463, "grad_norm": 0.5134831070899963, "learning_rate": 7.242635866087505e-06, "loss": 0.4531, "step": 10349 }, { "epoch": 1.8463116581928463, "grad_norm": 0.5044652223587036, "learning_rate": 7.225903329572181e-06, "loss": 0.4553, "step": 10350 }, { "epoch": 1.8464900544108465, "grad_norm": 0.4595658779144287, "learning_rate": 7.209189860429899e-06, "loss": 0.34, "step": 10351 }, { "epoch": 1.8466684506288467, "grad_norm": 0.5554966330528259, "learning_rate": 7.1924954599733864e-06, "loss": 0.4627, "step": 10352 }, { "epoch": 1.8468468468468469, "grad_norm": 0.49979686737060547, "learning_rate": 7.175820129513788e-06, "loss": 0.4567, "step": 10353 }, { "epoch": 1.847025243064847, "grad_norm": 0.527464747428894, "learning_rate": 7.159163870360752e-06, "loss": 0.4319, "step": 10354 }, { "epoch": 1.8472036392828473, "grad_norm": 0.5300561785697937, "learning_rate": 7.142526683822537e-06, "loss": 0.4145, "step": 10355 }, { "epoch": 1.8473820355008472, "grad_norm": 0.5047762393951416, "learning_rate": 7.125908571205708e-06, "loss": 0.411, "step": 10356 }, { "epoch": 1.8475604317188474, "grad_norm": 0.5059321522712708, "learning_rate": 7.109309533815556e-06, "loss": 0.4051, "step": 10357 }, { "epoch": 1.8477388279368476, "grad_norm": 0.5237871408462524, "learning_rate": 7.092729572955675e-06, "loss": 0.4884, "step": 10358 }, { "epoch": 1.8479172241548478, "grad_norm": 0.4973069131374359, "learning_rate": 7.076168689928275e-06, "loss": 0.3889, "step": 10359 }, { "epoch": 1.848095620372848, "grad_norm": 0.5443868637084961, "learning_rate": 7.059626886034093e-06, "loss": 0.4669, "step": 10360 }, { "epoch": 1.8482740165908482, "grad_norm": 0.5395627021789551, "learning_rate": 7.0431041625722e-06, "loss": 0.4496, "step": 10361 }, { "epoch": 1.8484524128088484, "grad_norm": 0.5488491654396057, "learning_rate": 7.026600520840393e-06, "loss": 0.5293, "step": 10362 }, { "epoch": 1.8486308090268486, "grad_norm": 0.5110378265380859, "learning_rate": 7.010115962134855e-06, "loss": 0.4904, "step": 10363 }, { "epoch": 1.8488092052448488, "grad_norm": 0.5103933215141296, "learning_rate": 6.993650487750192e-06, "loss": 0.5069, "step": 10364 }, { "epoch": 1.848987601462849, "grad_norm": 0.5104700326919556, "learning_rate": 6.9772040989796725e-06, "loss": 0.468, "step": 10365 }, { "epoch": 1.8491659976808492, "grad_norm": 0.4703051745891571, "learning_rate": 6.960776797114931e-06, "loss": 0.4197, "step": 10366 }, { "epoch": 1.8493443938988494, "grad_norm": 0.5158678889274597, "learning_rate": 6.944368583446242e-06, "loss": 0.4147, "step": 10367 }, { "epoch": 1.8495227901168496, "grad_norm": 0.5142411589622498, "learning_rate": 6.927979459262212e-06, "loss": 0.4338, "step": 10368 }, { "epoch": 1.8497011863348498, "grad_norm": 0.4859728515148163, "learning_rate": 6.9116094258500905e-06, "loss": 0.38, "step": 10369 }, { "epoch": 1.84987958255285, "grad_norm": 0.5308949947357178, "learning_rate": 6.895258484495515e-06, "loss": 0.5024, "step": 10370 }, { "epoch": 1.8500579787708502, "grad_norm": 0.46590912342071533, "learning_rate": 6.878926636482791e-06, "loss": 0.3446, "step": 10371 }, { "epoch": 1.8502363749888502, "grad_norm": 0.5768349170684814, "learning_rate": 6.862613883094504e-06, "loss": 0.593, "step": 10372 }, { "epoch": 1.8504147712068504, "grad_norm": 0.6099775433540344, "learning_rate": 6.84632022561188e-06, "loss": 0.5706, "step": 10373 }, { "epoch": 1.8505931674248506, "grad_norm": 0.6040098071098328, "learning_rate": 6.830045665314672e-06, "loss": 0.6825, "step": 10374 }, { "epoch": 1.8507715636428508, "grad_norm": 0.4853060841560364, "learning_rate": 6.813790203480996e-06, "loss": 0.4622, "step": 10375 }, { "epoch": 1.850949959860851, "grad_norm": 0.5379960536956787, "learning_rate": 6.7975538413875825e-06, "loss": 0.5277, "step": 10376 }, { "epoch": 1.8511283560788512, "grad_norm": 0.6292576193809509, "learning_rate": 6.781336580309661e-06, "loss": 0.6256, "step": 10377 }, { "epoch": 1.8513067522968512, "grad_norm": 0.5415869355201721, "learning_rate": 6.765138421520878e-06, "loss": 0.3965, "step": 10378 }, { "epoch": 1.8514851485148514, "grad_norm": 0.5575872659683228, "learning_rate": 6.748959366293467e-06, "loss": 0.5549, "step": 10379 }, { "epoch": 1.8516635447328516, "grad_norm": 0.5051519870758057, "learning_rate": 6.732799415898078e-06, "loss": 0.4057, "step": 10380 }, { "epoch": 1.8518419409508518, "grad_norm": 0.48078250885009766, "learning_rate": 6.716658571603973e-06, "loss": 0.3774, "step": 10381 }, { "epoch": 1.852020337168852, "grad_norm": 0.6021243333816528, "learning_rate": 6.7005368346787775e-06, "loss": 0.5807, "step": 10382 }, { "epoch": 1.8521987333868521, "grad_norm": 0.5509424209594727, "learning_rate": 6.6844342063887565e-06, "loss": 0.5441, "step": 10383 }, { "epoch": 1.8523771296048523, "grad_norm": 0.5380014181137085, "learning_rate": 6.6683506879985645e-06, "loss": 0.583, "step": 10384 }, { "epoch": 1.8525555258228525, "grad_norm": 0.5455968976020813, "learning_rate": 6.652286280771358e-06, "loss": 0.4776, "step": 10385 }, { "epoch": 1.8527339220408527, "grad_norm": 0.5762505531311035, "learning_rate": 6.636240985968906e-06, "loss": 0.5963, "step": 10386 }, { "epoch": 1.852912318258853, "grad_norm": 0.5499910712242126, "learning_rate": 6.620214804851338e-06, "loss": 0.5036, "step": 10387 }, { "epoch": 1.8530907144768531, "grad_norm": 0.46007874608039856, "learning_rate": 6.60420773867737e-06, "loss": 0.3352, "step": 10388 }, { "epoch": 1.8532691106948533, "grad_norm": 0.5644509792327881, "learning_rate": 6.588219788704164e-06, "loss": 0.5467, "step": 10389 }, { "epoch": 1.8534475069128535, "grad_norm": 0.5105949640274048, "learning_rate": 6.572250956187465e-06, "loss": 0.4846, "step": 10390 }, { "epoch": 1.8536259031308537, "grad_norm": 0.505388617515564, "learning_rate": 6.556301242381379e-06, "loss": 0.4123, "step": 10391 }, { "epoch": 1.853804299348854, "grad_norm": 0.49062561988830566, "learning_rate": 6.540370648538657e-06, "loss": 0.3651, "step": 10392 }, { "epoch": 1.8539826955668541, "grad_norm": 0.5342191457748413, "learning_rate": 6.524459175910464e-06, "loss": 0.5833, "step": 10393 }, { "epoch": 1.854161091784854, "grad_norm": 0.5109807848930359, "learning_rate": 6.508566825746437e-06, "loss": 0.402, "step": 10394 }, { "epoch": 1.8543394880028543, "grad_norm": 0.5528099536895752, "learning_rate": 6.49269359929483e-06, "loss": 0.5504, "step": 10395 }, { "epoch": 1.8545178842208545, "grad_norm": 0.479790061712265, "learning_rate": 6.476839497802256e-06, "loss": 0.3927, "step": 10396 }, { "epoch": 1.8546962804388547, "grad_norm": 0.5817665457725525, "learning_rate": 6.461004522513913e-06, "loss": 0.5102, "step": 10397 }, { "epoch": 1.854874676656855, "grad_norm": 0.5208722352981567, "learning_rate": 6.445188674673474e-06, "loss": 0.489, "step": 10398 }, { "epoch": 1.855053072874855, "grad_norm": 0.4927591383457184, "learning_rate": 6.429391955523112e-06, "loss": 0.469, "step": 10399 }, { "epoch": 1.855231469092855, "grad_norm": 0.5992792844772339, "learning_rate": 6.413614366303472e-06, "loss": 0.5953, "step": 10400 }, { "epoch": 1.8554098653108553, "grad_norm": 0.5083011984825134, "learning_rate": 6.397855908253758e-06, "loss": 0.5346, "step": 10401 }, { "epoch": 1.8555882615288555, "grad_norm": 0.557491660118103, "learning_rate": 6.382116582611591e-06, "loss": 0.6326, "step": 10402 }, { "epoch": 1.8557666577468557, "grad_norm": 0.5369499325752258, "learning_rate": 6.36639639061315e-06, "loss": 0.5539, "step": 10403 }, { "epoch": 1.8559450539648559, "grad_norm": 0.5842743515968323, "learning_rate": 6.350695333493112e-06, "loss": 0.5387, "step": 10404 }, { "epoch": 1.856123450182856, "grad_norm": 0.5030590295791626, "learning_rate": 6.33501341248463e-06, "loss": 0.4925, "step": 10405 }, { "epoch": 1.8563018464008563, "grad_norm": 0.5017548203468323, "learning_rate": 6.319350628819304e-06, "loss": 0.4771, "step": 10406 }, { "epoch": 1.8564802426188565, "grad_norm": 0.5642099380493164, "learning_rate": 6.303706983727286e-06, "loss": 0.5488, "step": 10407 }, { "epoch": 1.8566586388368567, "grad_norm": 0.5220343470573425, "learning_rate": 6.28808247843729e-06, "loss": 0.5252, "step": 10408 }, { "epoch": 1.8568370350548569, "grad_norm": 0.5058274865150452, "learning_rate": 6.272477114176417e-06, "loss": 0.4143, "step": 10409 }, { "epoch": 1.857015431272857, "grad_norm": 0.5488356351852417, "learning_rate": 6.2568908921703245e-06, "loss": 0.5446, "step": 10410 }, { "epoch": 1.8571938274908573, "grad_norm": 0.5424718856811523, "learning_rate": 6.241323813643091e-06, "loss": 0.4847, "step": 10411 }, { "epoch": 1.8573722237088575, "grad_norm": 0.5151358246803284, "learning_rate": 6.22577587981743e-06, "loss": 0.4357, "step": 10412 }, { "epoch": 1.8575506199268577, "grad_norm": 0.5469593405723572, "learning_rate": 6.210247091914395e-06, "loss": 0.4538, "step": 10413 }, { "epoch": 1.8577290161448579, "grad_norm": 0.5348469614982605, "learning_rate": 6.194737451153648e-06, "loss": 0.5542, "step": 10414 }, { "epoch": 1.857907412362858, "grad_norm": 0.6335954070091248, "learning_rate": 6.179246958753298e-06, "loss": 0.57, "step": 10415 }, { "epoch": 1.858085808580858, "grad_norm": 0.5610979795455933, "learning_rate": 6.163775615929984e-06, "loss": 0.585, "step": 10416 }, { "epoch": 1.8582642047988582, "grad_norm": 0.4940567910671234, "learning_rate": 6.148323423898816e-06, "loss": 0.4222, "step": 10417 }, { "epoch": 1.8584426010168584, "grad_norm": 0.5531600117683411, "learning_rate": 6.132890383873352e-06, "loss": 0.4795, "step": 10418 }, { "epoch": 1.8586209972348586, "grad_norm": 0.5417333841323853, "learning_rate": 6.11747649706576e-06, "loss": 0.5194, "step": 10419 }, { "epoch": 1.8587993934528588, "grad_norm": 0.5340380668640137, "learning_rate": 6.1020817646866014e-06, "loss": 0.534, "step": 10420 }, { "epoch": 1.858977789670859, "grad_norm": 0.5391452312469482, "learning_rate": 6.08670618794499e-06, "loss": 0.4972, "step": 10421 }, { "epoch": 1.859156185888859, "grad_norm": 0.4881535470485687, "learning_rate": 6.07134976804849e-06, "loss": 0.3307, "step": 10422 }, { "epoch": 1.8593345821068592, "grad_norm": 0.4824979305267334, "learning_rate": 6.056012506203218e-06, "loss": 0.4743, "step": 10423 }, { "epoch": 1.8595129783248594, "grad_norm": 0.5851719975471497, "learning_rate": 6.040694403613767e-06, "loss": 0.5364, "step": 10424 }, { "epoch": 1.8596913745428596, "grad_norm": 0.5611156821250916, "learning_rate": 6.025395461483174e-06, "loss": 0.5917, "step": 10425 }, { "epoch": 1.8598697707608598, "grad_norm": 0.5232062935829163, "learning_rate": 6.010115681013034e-06, "loss": 0.381, "step": 10426 }, { "epoch": 1.86004816697886, "grad_norm": 0.5068747401237488, "learning_rate": 5.994855063403415e-06, "loss": 0.453, "step": 10427 }, { "epoch": 1.8602265631968602, "grad_norm": 0.5189545154571533, "learning_rate": 5.979613609852885e-06, "loss": 0.3813, "step": 10428 }, { "epoch": 1.8604049594148604, "grad_norm": 0.5572920441627502, "learning_rate": 5.96439132155846e-06, "loss": 0.597, "step": 10429 }, { "epoch": 1.8605833556328606, "grad_norm": 0.6158145666122437, "learning_rate": 5.949188199715766e-06, "loss": 0.6569, "step": 10430 }, { "epoch": 1.8607617518508608, "grad_norm": 0.5191422700881958, "learning_rate": 5.934004245518793e-06, "loss": 0.401, "step": 10431 }, { "epoch": 1.860940148068861, "grad_norm": 0.5226636528968811, "learning_rate": 5.918839460160086e-06, "loss": 0.4098, "step": 10432 }, { "epoch": 1.8611185442868612, "grad_norm": 0.5896599888801575, "learning_rate": 5.903693844830693e-06, "loss": 0.634, "step": 10433 }, { "epoch": 1.8612969405048614, "grad_norm": 0.5435173511505127, "learning_rate": 5.888567400720135e-06, "loss": 0.4192, "step": 10434 }, { "epoch": 1.8614753367228616, "grad_norm": 0.5945151448249817, "learning_rate": 5.8734601290164615e-06, "loss": 0.5485, "step": 10435 }, { "epoch": 1.8616537329408618, "grad_norm": 0.5320826172828674, "learning_rate": 5.858372030906167e-06, "loss": 0.5008, "step": 10436 }, { "epoch": 1.861832129158862, "grad_norm": 0.569065272808075, "learning_rate": 5.843303107574249e-06, "loss": 0.5214, "step": 10437 }, { "epoch": 1.862010525376862, "grad_norm": 0.5648977756500244, "learning_rate": 5.828253360204261e-06, "loss": 0.6008, "step": 10438 }, { "epoch": 1.8621889215948622, "grad_norm": 0.5260228514671326, "learning_rate": 5.813222789978173e-06, "loss": 0.4568, "step": 10439 }, { "epoch": 1.8623673178128624, "grad_norm": 0.4943072497844696, "learning_rate": 5.798211398076486e-06, "loss": 0.411, "step": 10440 }, { "epoch": 1.8625457140308626, "grad_norm": 0.5590642094612122, "learning_rate": 5.783219185678173e-06, "loss": 0.4807, "step": 10441 }, { "epoch": 1.8627241102488628, "grad_norm": 0.490158349275589, "learning_rate": 5.768246153960766e-06, "loss": 0.4316, "step": 10442 }, { "epoch": 1.862902506466863, "grad_norm": 0.5113316774368286, "learning_rate": 5.7532923041001825e-06, "loss": 0.3966, "step": 10443 }, { "epoch": 1.863080902684863, "grad_norm": 0.5623192191123962, "learning_rate": 5.73835763727093e-06, "loss": 0.5611, "step": 10444 }, { "epoch": 1.8632592989028631, "grad_norm": 0.5246427059173584, "learning_rate": 5.723442154645931e-06, "loss": 0.4583, "step": 10445 }, { "epoch": 1.8634376951208633, "grad_norm": 0.5438576936721802, "learning_rate": 5.708545857396663e-06, "loss": 0.4384, "step": 10446 }, { "epoch": 1.8636160913388635, "grad_norm": 0.4891131818294525, "learning_rate": 5.693668746693109e-06, "loss": 0.4494, "step": 10447 }, { "epoch": 1.8637944875568637, "grad_norm": 0.504887044429779, "learning_rate": 5.678810823703639e-06, "loss": 0.414, "step": 10448 }, { "epoch": 1.863972883774864, "grad_norm": 0.5959895253181458, "learning_rate": 5.663972089595265e-06, "loss": 0.6671, "step": 10449 }, { "epoch": 1.8641512799928641, "grad_norm": 0.45866143703460693, "learning_rate": 5.649152545533331e-06, "loss": 0.347, "step": 10450 }, { "epoch": 1.8643296762108643, "grad_norm": 0.5574930310249329, "learning_rate": 5.634352192681852e-06, "loss": 0.6087, "step": 10451 }, { "epoch": 1.8645080724288645, "grad_norm": 0.5660715103149414, "learning_rate": 5.619571032203147e-06, "loss": 0.5658, "step": 10452 }, { "epoch": 1.8646864686468647, "grad_norm": 0.5469009280204773, "learning_rate": 5.604809065258176e-06, "loss": 0.5725, "step": 10453 }, { "epoch": 1.864864864864865, "grad_norm": 0.5283670425415039, "learning_rate": 5.590066293006374e-06, "loss": 0.4507, "step": 10454 }, { "epoch": 1.8650432610828651, "grad_norm": 0.5461406111717224, "learning_rate": 5.5753427166055635e-06, "loss": 0.5125, "step": 10455 }, { "epoch": 1.8652216573008653, "grad_norm": 0.5304574370384216, "learning_rate": 5.560638337212126e-06, "loss": 0.504, "step": 10456 }, { "epoch": 1.8654000535188655, "grad_norm": 0.5879049301147461, "learning_rate": 5.545953155980998e-06, "loss": 0.5871, "step": 10457 }, { "epoch": 1.8655784497368657, "grad_norm": 0.4767381250858307, "learning_rate": 5.531287174065508e-06, "loss": 0.4185, "step": 10458 }, { "epoch": 1.865756845954866, "grad_norm": 0.5429925918579102, "learning_rate": 5.516640392617511e-06, "loss": 0.5435, "step": 10459 }, { "epoch": 1.8659352421728659, "grad_norm": 0.5666899085044861, "learning_rate": 5.502012812787366e-06, "loss": 0.4597, "step": 10460 }, { "epoch": 1.866113638390866, "grad_norm": 0.4842609167098999, "learning_rate": 5.4874044357239305e-06, "loss": 0.4877, "step": 10461 }, { "epoch": 1.8662920346088663, "grad_norm": 0.5365511178970337, "learning_rate": 5.4728152625745094e-06, "loss": 0.5841, "step": 10462 }, { "epoch": 1.8664704308268665, "grad_norm": 0.4799635112285614, "learning_rate": 5.458245294484964e-06, "loss": 0.4223, "step": 10463 }, { "epoch": 1.8666488270448667, "grad_norm": 0.5694946050643921, "learning_rate": 5.443694532599602e-06, "loss": 0.6436, "step": 10464 }, { "epoch": 1.8668272232628669, "grad_norm": 0.5620405673980713, "learning_rate": 5.429162978061203e-06, "loss": 0.5037, "step": 10465 }, { "epoch": 1.8670056194808669, "grad_norm": 0.5125569105148315, "learning_rate": 5.414650632011131e-06, "loss": 0.5161, "step": 10466 }, { "epoch": 1.867184015698867, "grad_norm": 0.5853519439697266, "learning_rate": 5.400157495589114e-06, "loss": 0.6433, "step": 10467 }, { "epoch": 1.8673624119168672, "grad_norm": 0.46554625034332275, "learning_rate": 5.385683569933464e-06, "loss": 0.3893, "step": 10468 }, { "epoch": 1.8675408081348674, "grad_norm": 0.547414243221283, "learning_rate": 5.371228856180993e-06, "loss": 0.4324, "step": 10469 }, { "epoch": 1.8677192043528676, "grad_norm": 0.6247468590736389, "learning_rate": 5.356793355466933e-06, "loss": 0.729, "step": 10470 }, { "epoch": 1.8678976005708678, "grad_norm": 0.5195764303207397, "learning_rate": 5.342377068925041e-06, "loss": 0.4048, "step": 10471 }, { "epoch": 1.868075996788868, "grad_norm": 0.4851647615432739, "learning_rate": 5.327979997687554e-06, "loss": 0.342, "step": 10472 }, { "epoch": 1.8682543930068682, "grad_norm": 0.5746437907218933, "learning_rate": 5.313602142885232e-06, "loss": 0.4786, "step": 10473 }, { "epoch": 1.8684327892248684, "grad_norm": 0.6188861131668091, "learning_rate": 5.299243505647283e-06, "loss": 0.6486, "step": 10474 }, { "epoch": 1.8686111854428686, "grad_norm": 0.48796316981315613, "learning_rate": 5.2849040871015e-06, "loss": 0.3936, "step": 10475 }, { "epoch": 1.8687895816608688, "grad_norm": 0.5425949692726135, "learning_rate": 5.270583888374009e-06, "loss": 0.5362, "step": 10476 }, { "epoch": 1.868967977878869, "grad_norm": 0.5946719646453857, "learning_rate": 5.256282910589521e-06, "loss": 0.5137, "step": 10477 }, { "epoch": 1.8691463740968692, "grad_norm": 0.5937526822090149, "learning_rate": 5.242001154871306e-06, "loss": 0.5385, "step": 10478 }, { "epoch": 1.8693247703148694, "grad_norm": 0.5041108727455139, "learning_rate": 5.227738622340938e-06, "loss": 0.4807, "step": 10479 }, { "epoch": 1.8695031665328696, "grad_norm": 0.45467957854270935, "learning_rate": 5.213495314118688e-06, "loss": 0.3457, "step": 10480 }, { "epoch": 1.8696815627508698, "grad_norm": 0.5164430141448975, "learning_rate": 5.199271231323133e-06, "loss": 0.4477, "step": 10481 }, { "epoch": 1.8698599589688698, "grad_norm": 0.5357136726379395, "learning_rate": 5.185066375071518e-06, "loss": 0.4991, "step": 10482 }, { "epoch": 1.87003835518687, "grad_norm": 0.4412253797054291, "learning_rate": 5.170880746479395e-06, "loss": 0.3304, "step": 10483 }, { "epoch": 1.8702167514048702, "grad_norm": 0.4293851852416992, "learning_rate": 5.156714346660957e-06, "loss": 0.3241, "step": 10484 }, { "epoch": 1.8703951476228704, "grad_norm": 0.5267345309257507, "learning_rate": 5.142567176728813e-06, "loss": 0.5351, "step": 10485 }, { "epoch": 1.8705735438408706, "grad_norm": 0.5791170001029968, "learning_rate": 5.128439237794047e-06, "loss": 0.4566, "step": 10486 }, { "epoch": 1.8707519400588708, "grad_norm": 0.4116140305995941, "learning_rate": 5.114330530966326e-06, "loss": 0.2923, "step": 10487 }, { "epoch": 1.8709303362768708, "grad_norm": 0.4969407618045807, "learning_rate": 5.100241057353683e-06, "loss": 0.4083, "step": 10488 }, { "epoch": 1.871108732494871, "grad_norm": 0.6181281208992004, "learning_rate": 5.08617081806273e-06, "loss": 0.6443, "step": 10489 }, { "epoch": 1.8712871287128712, "grad_norm": 0.5408806204795837, "learning_rate": 5.072119814198528e-06, "loss": 0.4914, "step": 10490 }, { "epoch": 1.8714655249308714, "grad_norm": 0.4671943783760071, "learning_rate": 5.058088046864611e-06, "loss": 0.3844, "step": 10491 }, { "epoch": 1.8716439211488716, "grad_norm": 0.5404434204101562, "learning_rate": 5.044075517163071e-06, "loss": 0.4863, "step": 10492 }, { "epoch": 1.8718223173668718, "grad_norm": 0.5988584756851196, "learning_rate": 5.030082226194415e-06, "loss": 0.5877, "step": 10493 }, { "epoch": 1.872000713584872, "grad_norm": 0.5672785639762878, "learning_rate": 5.01610817505771e-06, "loss": 0.4992, "step": 10494 }, { "epoch": 1.8721791098028722, "grad_norm": 0.5256212949752808, "learning_rate": 5.002153364850409e-06, "loss": 0.4614, "step": 10495 }, { "epoch": 1.8723575060208724, "grad_norm": 0.47382408380508423, "learning_rate": 4.9882177966685814e-06, "loss": 0.3875, "step": 10496 }, { "epoch": 1.8725359022388726, "grad_norm": 0.5471598505973816, "learning_rate": 4.974301471606685e-06, "loss": 0.4792, "step": 10497 }, { "epoch": 1.8727142984568728, "grad_norm": 0.514196515083313, "learning_rate": 4.96040439075765e-06, "loss": 0.5545, "step": 10498 }, { "epoch": 1.872892694674873, "grad_norm": 0.5053452849388123, "learning_rate": 4.946526555213077e-06, "loss": 0.4108, "step": 10499 }, { "epoch": 1.8730710908928732, "grad_norm": 0.6003931164741516, "learning_rate": 4.9326679660628145e-06, "loss": 0.5834, "step": 10500 }, { "epoch": 1.8732494871108734, "grad_norm": 0.559822678565979, "learning_rate": 4.918828624395383e-06, "loss": 0.517, "step": 10501 }, { "epoch": 1.8734278833288736, "grad_norm": 0.5788659453392029, "learning_rate": 4.905008531297661e-06, "loss": 0.5528, "step": 10502 }, { "epoch": 1.8736062795468738, "grad_norm": 0.4903022050857544, "learning_rate": 4.891207687855115e-06, "loss": 0.4657, "step": 10503 }, { "epoch": 1.8737846757648737, "grad_norm": 0.529029369354248, "learning_rate": 4.877426095151627e-06, "loss": 0.4519, "step": 10504 }, { "epoch": 1.873963071982874, "grad_norm": 0.48290225863456726, "learning_rate": 4.863663754269609e-06, "loss": 0.4803, "step": 10505 }, { "epoch": 1.8741414682008741, "grad_norm": 0.5195373296737671, "learning_rate": 4.849920666289947e-06, "loss": 0.3987, "step": 10506 }, { "epoch": 1.8743198644188743, "grad_norm": 0.537219226360321, "learning_rate": 4.836196832292e-06, "loss": 0.5085, "step": 10507 }, { "epoch": 1.8744982606368745, "grad_norm": 0.4911697208881378, "learning_rate": 4.8224922533536834e-06, "loss": 0.4654, "step": 10508 }, { "epoch": 1.8746766568548747, "grad_norm": 0.5637452006340027, "learning_rate": 4.8088069305513015e-06, "loss": 0.431, "step": 10509 }, { "epoch": 1.8748550530728747, "grad_norm": 0.4910629391670227, "learning_rate": 4.795140864959718e-06, "loss": 0.3722, "step": 10510 }, { "epoch": 1.875033449290875, "grad_norm": 0.5422062873840332, "learning_rate": 4.781494057652269e-06, "loss": 0.5239, "step": 10511 }, { "epoch": 1.875211845508875, "grad_norm": 0.5688182711601257, "learning_rate": 4.767866509700708e-06, "loss": 0.4991, "step": 10512 }, { "epoch": 1.8753902417268753, "grad_norm": 0.5716597437858582, "learning_rate": 4.754258222175428e-06, "loss": 0.5913, "step": 10513 }, { "epoch": 1.8755686379448755, "grad_norm": 0.5582311749458313, "learning_rate": 4.740669196145131e-06, "loss": 0.5269, "step": 10514 }, { "epoch": 1.8757470341628757, "grad_norm": 0.5111207365989685, "learning_rate": 4.727099432677129e-06, "loss": 0.4636, "step": 10515 }, { "epoch": 1.875925430380876, "grad_norm": 0.5934797525405884, "learning_rate": 4.713548932837208e-06, "loss": 0.5653, "step": 10516 }, { "epoch": 1.876103826598876, "grad_norm": 0.602021336555481, "learning_rate": 4.700017697689574e-06, "loss": 0.6561, "step": 10517 }, { "epoch": 1.8762822228168763, "grad_norm": 0.5633701086044312, "learning_rate": 4.686505728297013e-06, "loss": 0.5126, "step": 10518 }, { "epoch": 1.8764606190348765, "grad_norm": 0.5579110383987427, "learning_rate": 4.6730130257207345e-06, "loss": 0.5513, "step": 10519 }, { "epoch": 1.8766390152528767, "grad_norm": 0.5543150901794434, "learning_rate": 4.659539591020417e-06, "loss": 0.5097, "step": 10520 }, { "epoch": 1.8768174114708769, "grad_norm": 0.4759593904018402, "learning_rate": 4.646085425254298e-06, "loss": 0.4002, "step": 10521 }, { "epoch": 1.876995807688877, "grad_norm": 0.6420212388038635, "learning_rate": 4.632650529479032e-06, "loss": 0.6743, "step": 10522 }, { "epoch": 1.8771742039068773, "grad_norm": 0.483822762966156, "learning_rate": 4.61923490474983e-06, "loss": 0.4431, "step": 10523 }, { "epoch": 1.8773526001248775, "grad_norm": 0.5354387760162354, "learning_rate": 4.605838552120295e-06, "loss": 0.4565, "step": 10524 }, { "epoch": 1.8775309963428777, "grad_norm": 0.5774738192558289, "learning_rate": 4.592461472642611e-06, "loss": 0.635, "step": 10525 }, { "epoch": 1.8777093925608777, "grad_norm": 0.4764508605003357, "learning_rate": 4.579103667367385e-06, "loss": 0.357, "step": 10526 }, { "epoch": 1.8778877887788779, "grad_norm": 0.5291125774383545, "learning_rate": 4.565765137343775e-06, "loss": 0.4858, "step": 10527 }, { "epoch": 1.878066184996878, "grad_norm": 0.5577571392059326, "learning_rate": 4.552445883619305e-06, "loss": 0.4481, "step": 10528 }, { "epoch": 1.8782445812148783, "grad_norm": 0.489429771900177, "learning_rate": 4.539145907240139e-06, "loss": 0.4101, "step": 10529 }, { "epoch": 1.8784229774328784, "grad_norm": 0.49116069078445435, "learning_rate": 4.525865209250829e-06, "loss": 0.3959, "step": 10530 }, { "epoch": 1.8786013736508786, "grad_norm": 0.5530174970626831, "learning_rate": 4.512603790694403e-06, "loss": 0.6012, "step": 10531 }, { "epoch": 1.8787797698688786, "grad_norm": 0.5700563788414001, "learning_rate": 4.499361652612444e-06, "loss": 0.5586, "step": 10532 }, { "epoch": 1.8789581660868788, "grad_norm": 0.562254011631012, "learning_rate": 4.486138796044981e-06, "loss": 0.4436, "step": 10533 }, { "epoch": 1.879136562304879, "grad_norm": 0.4984350800514221, "learning_rate": 4.472935222030544e-06, "loss": 0.4187, "step": 10534 }, { "epoch": 1.8793149585228792, "grad_norm": 0.6067535281181335, "learning_rate": 4.459750931606083e-06, "loss": 0.6892, "step": 10535 }, { "epoch": 1.8794933547408794, "grad_norm": 0.5302973389625549, "learning_rate": 4.446585925807129e-06, "loss": 0.4837, "step": 10536 }, { "epoch": 1.8796717509588796, "grad_norm": 0.466941773891449, "learning_rate": 4.43344020566766e-06, "loss": 0.393, "step": 10537 }, { "epoch": 1.8798501471768798, "grad_norm": 0.4711865186691284, "learning_rate": 4.420313772220103e-06, "loss": 0.4012, "step": 10538 }, { "epoch": 1.88002854339488, "grad_norm": 0.574239194393158, "learning_rate": 4.4072066264954355e-06, "loss": 0.5279, "step": 10539 }, { "epoch": 1.8802069396128802, "grad_norm": 0.5025227069854736, "learning_rate": 4.394118769523059e-06, "loss": 0.4958, "step": 10540 }, { "epoch": 1.8803853358308804, "grad_norm": 0.47873345017433167, "learning_rate": 4.381050202330927e-06, "loss": 0.3807, "step": 10541 }, { "epoch": 1.8805637320488806, "grad_norm": 0.5627789497375488, "learning_rate": 4.368000925945386e-06, "loss": 0.5274, "step": 10542 }, { "epoch": 1.8807421282668808, "grad_norm": 0.5269836187362671, "learning_rate": 4.3549709413913675e-06, "loss": 0.4638, "step": 10543 }, { "epoch": 1.880920524484881, "grad_norm": 0.5568525791168213, "learning_rate": 4.34196024969219e-06, "loss": 0.4697, "step": 10544 }, { "epoch": 1.8810989207028812, "grad_norm": 0.4760674834251404, "learning_rate": 4.328968851869758e-06, "loss": 0.4065, "step": 10545 }, { "epoch": 1.8812773169208814, "grad_norm": 0.5974029898643494, "learning_rate": 4.3159967489443955e-06, "loss": 0.6253, "step": 10546 }, { "epoch": 1.8814557131388816, "grad_norm": 0.49886420369148254, "learning_rate": 4.3030439419349255e-06, "loss": 0.4455, "step": 10547 }, { "epoch": 1.8816341093568816, "grad_norm": 0.5672736167907715, "learning_rate": 4.290110431858646e-06, "loss": 0.4771, "step": 10548 }, { "epoch": 1.8818125055748818, "grad_norm": 0.5267834067344666, "learning_rate": 4.277196219731383e-06, "loss": 0.5392, "step": 10549 }, { "epoch": 1.881990901792882, "grad_norm": 0.5531571507453918, "learning_rate": 4.264301306567353e-06, "loss": 0.6175, "step": 10550 }, { "epoch": 1.8821692980108822, "grad_norm": 0.5343933701515198, "learning_rate": 4.251425693379357e-06, "loss": 0.5271, "step": 10551 }, { "epoch": 1.8823476942288824, "grad_norm": 0.5798811316490173, "learning_rate": 4.238569381178642e-06, "loss": 0.6559, "step": 10552 }, { "epoch": 1.8825260904468826, "grad_norm": 0.5240869522094727, "learning_rate": 4.225732370974928e-06, "loss": 0.4699, "step": 10553 }, { "epoch": 1.8827044866648825, "grad_norm": 0.5387951731681824, "learning_rate": 4.212914663776407e-06, "loss": 0.4743, "step": 10554 }, { "epoch": 1.8828828828828827, "grad_norm": 0.5581191778182983, "learning_rate": 4.200116260589831e-06, "loss": 0.4566, "step": 10555 }, { "epoch": 1.883061279100883, "grad_norm": 0.5226394534111023, "learning_rate": 4.1873371624203406e-06, "loss": 0.4776, "step": 10556 }, { "epoch": 1.8832396753188831, "grad_norm": 0.5839105844497681, "learning_rate": 4.174577370271576e-06, "loss": 0.5059, "step": 10557 }, { "epoch": 1.8834180715368833, "grad_norm": 0.5159413814544678, "learning_rate": 4.161836885145765e-06, "loss": 0.4771, "step": 10558 }, { "epoch": 1.8835964677548835, "grad_norm": 0.5594849586486816, "learning_rate": 4.149115708043438e-06, "loss": 0.5485, "step": 10559 }, { "epoch": 1.8837748639728837, "grad_norm": 0.5460865497589111, "learning_rate": 4.136413839963799e-06, "loss": 0.4878, "step": 10560 }, { "epoch": 1.883953260190884, "grad_norm": 0.5533373355865479, "learning_rate": 4.123731281904408e-06, "loss": 0.4856, "step": 10561 }, { "epoch": 1.8841316564088841, "grad_norm": 0.6217470169067383, "learning_rate": 4.111068034861359e-06, "loss": 0.6534, "step": 10562 }, { "epoch": 1.8843100526268843, "grad_norm": 0.5605418682098389, "learning_rate": 4.098424099829218e-06, "loss": 0.5772, "step": 10563 }, { "epoch": 1.8844884488448845, "grad_norm": 0.6147893667221069, "learning_rate": 4.085799477800995e-06, "loss": 0.739, "step": 10564 }, { "epoch": 1.8846668450628847, "grad_norm": 0.5503641963005066, "learning_rate": 4.073194169768285e-06, "loss": 0.5422, "step": 10565 }, { "epoch": 1.884845241280885, "grad_norm": 0.5137640833854675, "learning_rate": 4.0606081767210755e-06, "loss": 0.417, "step": 10566 }, { "epoch": 1.8850236374988851, "grad_norm": 0.46685469150543213, "learning_rate": 4.048041499647853e-06, "loss": 0.3761, "step": 10567 }, { "epoch": 1.8852020337168853, "grad_norm": 0.4922786056995392, "learning_rate": 4.035494139535606e-06, "loss": 0.4437, "step": 10568 }, { "epoch": 1.8853804299348855, "grad_norm": 0.5689695477485657, "learning_rate": 4.0229660973698235e-06, "loss": 0.5262, "step": 10569 }, { "epoch": 1.8855588261528857, "grad_norm": 0.5004351139068604, "learning_rate": 4.010457374134441e-06, "loss": 0.389, "step": 10570 }, { "epoch": 1.8857372223708857, "grad_norm": 0.5173558592796326, "learning_rate": 3.997967970811839e-06, "loss": 0.5562, "step": 10571 }, { "epoch": 1.885915618588886, "grad_norm": 0.48343634605407715, "learning_rate": 3.985497888382983e-06, "loss": 0.4431, "step": 10572 }, { "epoch": 1.886094014806886, "grad_norm": 0.5336623787879944, "learning_rate": 3.973047127827256e-06, "loss": 0.4717, "step": 10573 }, { "epoch": 1.8862724110248863, "grad_norm": 0.5859199166297913, "learning_rate": 3.960615690122543e-06, "loss": 0.5706, "step": 10574 }, { "epoch": 1.8864508072428865, "grad_norm": 0.5274314880371094, "learning_rate": 3.948203576245174e-06, "loss": 0.5479, "step": 10575 }, { "epoch": 1.8866292034608865, "grad_norm": 0.5471269488334656, "learning_rate": 3.935810787170036e-06, "loss": 0.5647, "step": 10576 }, { "epoch": 1.8868075996788867, "grad_norm": 0.48193615674972534, "learning_rate": 3.923437323870405e-06, "loss": 0.4555, "step": 10577 }, { "epoch": 1.8869859958968869, "grad_norm": 0.574404776096344, "learning_rate": 3.911083187318115e-06, "loss": 0.6331, "step": 10578 }, { "epoch": 1.887164392114887, "grad_norm": 0.5169478058815002, "learning_rate": 3.898748378483474e-06, "loss": 0.5231, "step": 10579 }, { "epoch": 1.8873427883328873, "grad_norm": 0.56248939037323, "learning_rate": 3.886432898335207e-06, "loss": 0.6228, "step": 10580 }, { "epoch": 1.8875211845508875, "grad_norm": 0.516521692276001, "learning_rate": 3.874136747840623e-06, "loss": 0.4946, "step": 10581 }, { "epoch": 1.8876995807688877, "grad_norm": 0.5308520793914795, "learning_rate": 3.861859927965394e-06, "loss": 0.5038, "step": 10582 }, { "epoch": 1.8878779769868879, "grad_norm": 0.5017712712287903, "learning_rate": 3.849602439673749e-06, "loss": 0.4414, "step": 10583 }, { "epoch": 1.888056373204888, "grad_norm": 0.5355345010757446, "learning_rate": 3.837364283928446e-06, "loss": 0.4509, "step": 10584 }, { "epoch": 1.8882347694228883, "grad_norm": 0.5393890142440796, "learning_rate": 3.825145461690577e-06, "loss": 0.6249, "step": 10585 }, { "epoch": 1.8884131656408885, "grad_norm": 0.5255075693130493, "learning_rate": 3.8129459739198737e-06, "loss": 0.5318, "step": 10586 }, { "epoch": 1.8885915618588887, "grad_norm": 0.5181835293769836, "learning_rate": 3.800765821574431e-06, "loss": 0.5787, "step": 10587 }, { "epoch": 1.8887699580768889, "grad_norm": 0.5990747213363647, "learning_rate": 3.7886050056109287e-06, "loss": 0.5349, "step": 10588 }, { "epoch": 1.888948354294889, "grad_norm": 0.49007025361061096, "learning_rate": 3.7764635269843804e-06, "loss": 0.3994, "step": 10589 }, { "epoch": 1.8891267505128893, "grad_norm": 0.45775508880615234, "learning_rate": 3.7643413866484678e-06, "loss": 0.325, "step": 10590 }, { "epoch": 1.8893051467308895, "grad_norm": 0.5210441946983337, "learning_rate": 3.7522385855552067e-06, "loss": 0.5073, "step": 10591 }, { "epoch": 1.8894835429488896, "grad_norm": 0.5008862018585205, "learning_rate": 3.7401551246551703e-06, "loss": 0.4124, "step": 10592 }, { "epoch": 1.8896619391668896, "grad_norm": 0.5803366899490356, "learning_rate": 3.728091004897377e-06, "loss": 0.5425, "step": 10593 }, { "epoch": 1.8898403353848898, "grad_norm": 0.48922333121299744, "learning_rate": 3.7160462272293195e-06, "loss": 0.461, "step": 10594 }, { "epoch": 1.89001873160289, "grad_norm": 0.6023316979408264, "learning_rate": 3.704020792597018e-06, "loss": 0.7902, "step": 10595 }, { "epoch": 1.8901971278208902, "grad_norm": 0.5757283568382263, "learning_rate": 3.69201470194494e-06, "loss": 0.5757, "step": 10596 }, { "epoch": 1.8903755240388904, "grad_norm": 0.5134090185165405, "learning_rate": 3.6800279562160257e-06, "loss": 0.5602, "step": 10597 }, { "epoch": 1.8905539202568904, "grad_norm": 0.49674350023269653, "learning_rate": 3.6680605563517153e-06, "loss": 0.4284, "step": 10598 }, { "epoch": 1.8907323164748906, "grad_norm": 0.5035196542739868, "learning_rate": 3.6561125032918975e-06, "loss": 0.4645, "step": 10599 }, { "epoch": 1.8909107126928908, "grad_norm": 0.5700138807296753, "learning_rate": 3.6441837979750427e-06, "loss": 0.6076, "step": 10600 }, { "epoch": 1.891089108910891, "grad_norm": 0.5219821333885193, "learning_rate": 3.632274441337957e-06, "loss": 0.4652, "step": 10601 }, { "epoch": 1.8912675051288912, "grad_norm": 0.5027645826339722, "learning_rate": 3.620384434316004e-06, "loss": 0.4392, "step": 10602 }, { "epoch": 1.8914459013468914, "grad_norm": 0.4853323996067047, "learning_rate": 3.608513777843048e-06, "loss": 0.4215, "step": 10603 }, { "epoch": 1.8916242975648916, "grad_norm": 0.5268037915229797, "learning_rate": 3.59666247285137e-06, "loss": 0.5088, "step": 10604 }, { "epoch": 1.8918026937828918, "grad_norm": 0.5432520508766174, "learning_rate": 3.584830520271809e-06, "loss": 0.4998, "step": 10605 }, { "epoch": 1.891981090000892, "grad_norm": 0.6279560327529907, "learning_rate": 3.5730179210335946e-06, "loss": 0.5981, "step": 10606 }, { "epoch": 1.8921594862188922, "grad_norm": 0.5216326713562012, "learning_rate": 3.5612246760645118e-06, "loss": 0.4698, "step": 10607 }, { "epoch": 1.8923378824368924, "grad_norm": 0.5420396327972412, "learning_rate": 3.54945078629082e-06, "loss": 0.5827, "step": 10608 }, { "epoch": 1.8925162786548926, "grad_norm": 0.5069664716720581, "learning_rate": 3.5376962526371682e-06, "loss": 0.4561, "step": 10609 }, { "epoch": 1.8926946748728928, "grad_norm": 0.4788225293159485, "learning_rate": 3.525961076026818e-06, "loss": 0.4292, "step": 10610 }, { "epoch": 1.892873071090893, "grad_norm": 0.5387194156646729, "learning_rate": 3.514245257381421e-06, "loss": 0.5415, "step": 10611 }, { "epoch": 1.8930514673088932, "grad_norm": 0.5902103185653687, "learning_rate": 3.50254879762113e-06, "loss": 0.6707, "step": 10612 }, { "epoch": 1.8932298635268934, "grad_norm": 0.4795367121696472, "learning_rate": 3.490871697664544e-06, "loss": 0.3785, "step": 10613 }, { "epoch": 1.8934082597448936, "grad_norm": 0.5698316097259521, "learning_rate": 3.4792139584288728e-06, "loss": 0.5774, "step": 10614 }, { "epoch": 1.8935866559628936, "grad_norm": 0.530458927154541, "learning_rate": 3.4675755808296073e-06, "loss": 0.5076, "step": 10615 }, { "epoch": 1.8937650521808937, "grad_norm": 0.5088088512420654, "learning_rate": 3.4559565657808766e-06, "loss": 0.4608, "step": 10616 }, { "epoch": 1.893943448398894, "grad_norm": 0.46197885274887085, "learning_rate": 3.444356914195229e-06, "loss": 0.355, "step": 10617 }, { "epoch": 1.8941218446168941, "grad_norm": 0.5059583783149719, "learning_rate": 3.432776626983658e-06, "loss": 0.4677, "step": 10618 }, { "epoch": 1.8943002408348943, "grad_norm": 0.519899308681488, "learning_rate": 3.421215705055741e-06, "loss": 0.485, "step": 10619 }, { "epoch": 1.8944786370528943, "grad_norm": 0.544313907623291, "learning_rate": 3.4096741493194194e-06, "loss": 0.5031, "step": 10620 }, { "epoch": 1.8946570332708945, "grad_norm": 0.5371688008308411, "learning_rate": 3.3981519606811616e-06, "loss": 0.5519, "step": 10621 }, { "epoch": 1.8948354294888947, "grad_norm": 0.47906023263931274, "learning_rate": 3.3866491400459387e-06, "loss": 0.4269, "step": 10622 }, { "epoch": 1.895013825706895, "grad_norm": 0.5047534704208374, "learning_rate": 3.3751656883171668e-06, "loss": 0.4814, "step": 10623 }, { "epoch": 1.8951922219248951, "grad_norm": 0.5101923942565918, "learning_rate": 3.363701606396735e-06, "loss": 0.4778, "step": 10624 }, { "epoch": 1.8953706181428953, "grad_norm": 0.46724262833595276, "learning_rate": 3.352256895185063e-06, "loss": 0.3659, "step": 10625 }, { "epoch": 1.8955490143608955, "grad_norm": 0.5907710194587708, "learning_rate": 3.3408315555809863e-06, "loss": 0.4965, "step": 10626 }, { "epoch": 1.8957274105788957, "grad_norm": 0.5587756037712097, "learning_rate": 3.3294255884818435e-06, "loss": 0.5713, "step": 10627 }, { "epoch": 1.895905806796896, "grad_norm": 0.5773804783821106, "learning_rate": 3.318038994783501e-06, "loss": 0.5113, "step": 10628 }, { "epoch": 1.896084203014896, "grad_norm": 0.5329833626747131, "learning_rate": 3.306671775380188e-06, "loss": 0.5319, "step": 10629 }, { "epoch": 1.8962625992328963, "grad_norm": 0.541481077671051, "learning_rate": 3.2953239311647175e-06, "loss": 0.3925, "step": 10630 }, { "epoch": 1.8964409954508965, "grad_norm": 0.5258289575576782, "learning_rate": 3.2839954630283497e-06, "loss": 0.4991, "step": 10631 }, { "epoch": 1.8966193916688967, "grad_norm": 0.6551702618598938, "learning_rate": 3.272686371860789e-06, "loss": 0.6916, "step": 10632 }, { "epoch": 1.896797787886897, "grad_norm": 0.5645005702972412, "learning_rate": 3.2613966585502976e-06, "loss": 0.5999, "step": 10633 }, { "epoch": 1.896976184104897, "grad_norm": 0.7276445627212524, "learning_rate": 3.2501263239834987e-06, "loss": 0.4629, "step": 10634 }, { "epoch": 1.8971545803228973, "grad_norm": 0.5128322839736938, "learning_rate": 3.2388753690456296e-06, "loss": 0.5149, "step": 10635 }, { "epoch": 1.8973329765408975, "grad_norm": 0.4655255377292633, "learning_rate": 3.2276437946202607e-06, "loss": 0.3665, "step": 10636 }, { "epoch": 1.8975113727588975, "grad_norm": 0.5978597402572632, "learning_rate": 3.2164316015895476e-06, "loss": 0.6666, "step": 10637 }, { "epoch": 1.8976897689768977, "grad_norm": 0.5160049796104431, "learning_rate": 3.205238790834147e-06, "loss": 0.4178, "step": 10638 }, { "epoch": 1.8978681651948979, "grad_norm": 0.5093833804130554, "learning_rate": 3.194065363233051e-06, "loss": 0.5388, "step": 10639 }, { "epoch": 1.898046561412898, "grad_norm": 0.5614250898361206, "learning_rate": 3.1829113196638614e-06, "loss": 0.4887, "step": 10640 }, { "epoch": 1.8982249576308983, "grad_norm": 0.7175207734107971, "learning_rate": 3.171776661002601e-06, "loss": 0.4711, "step": 10641 }, { "epoch": 1.8984033538488982, "grad_norm": 0.5213533043861389, "learning_rate": 3.1606613881237924e-06, "loss": 0.4632, "step": 10642 }, { "epoch": 1.8985817500668984, "grad_norm": 0.5014880895614624, "learning_rate": 3.1495655019004032e-06, "loss": 0.4144, "step": 10643 }, { "epoch": 1.8987601462848986, "grad_norm": 0.5747901201248169, "learning_rate": 3.138489003203904e-06, "loss": 0.6008, "step": 10644 }, { "epoch": 1.8989385425028988, "grad_norm": 0.5374071598052979, "learning_rate": 3.1274318929042644e-06, "loss": 0.435, "step": 10645 }, { "epoch": 1.899116938720899, "grad_norm": 0.457996129989624, "learning_rate": 3.116394171869874e-06, "loss": 0.4792, "step": 10646 }, { "epoch": 1.8992953349388992, "grad_norm": 0.4922611713409424, "learning_rate": 3.105375840967678e-06, "loss": 0.3421, "step": 10647 }, { "epoch": 1.8994737311568994, "grad_norm": 0.5241499543190002, "learning_rate": 3.0943769010629565e-06, "loss": 0.5663, "step": 10648 }, { "epoch": 1.8996521273748996, "grad_norm": 0.4872463345527649, "learning_rate": 3.0833973530196846e-06, "loss": 0.3615, "step": 10649 }, { "epoch": 1.8998305235928998, "grad_norm": 0.5093347430229187, "learning_rate": 3.07243719770009e-06, "loss": 0.477, "step": 10650 }, { "epoch": 1.9000089198109, "grad_norm": 0.48740455508232117, "learning_rate": 3.0614964359650112e-06, "loss": 0.4094, "step": 10651 }, { "epoch": 1.9001873160289002, "grad_norm": 0.46468567848205566, "learning_rate": 3.0505750686737332e-06, "loss": 0.3435, "step": 10652 }, { "epoch": 1.9003657122469004, "grad_norm": 0.49886494874954224, "learning_rate": 3.0396730966840423e-06, "loss": 0.4067, "step": 10653 }, { "epoch": 1.9005441084649006, "grad_norm": 0.5205758213996887, "learning_rate": 3.0287905208521427e-06, "loss": 0.3478, "step": 10654 }, { "epoch": 1.9007225046829008, "grad_norm": 0.5665127635002136, "learning_rate": 3.017927342032767e-06, "loss": 0.5246, "step": 10655 }, { "epoch": 1.900900900900901, "grad_norm": 0.5088105797767639, "learning_rate": 3.007083561079066e-06, "loss": 0.3591, "step": 10656 }, { "epoch": 1.9010792971189012, "grad_norm": 0.49926015734672546, "learning_rate": 2.9962591788427473e-06, "loss": 0.4791, "step": 10657 }, { "epoch": 1.9012576933369014, "grad_norm": 0.5580708384513855, "learning_rate": 2.985454196173937e-06, "loss": 0.4928, "step": 10658 }, { "epoch": 1.9014360895549014, "grad_norm": 0.5072367787361145, "learning_rate": 2.974668613921261e-06, "loss": 0.4386, "step": 10659 }, { "epoch": 1.9016144857729016, "grad_norm": 0.5622830986976624, "learning_rate": 2.963902432931792e-06, "loss": 0.4739, "step": 10660 }, { "epoch": 1.9017928819909018, "grad_norm": 0.5466598272323608, "learning_rate": 2.9531556540511593e-06, "loss": 0.5038, "step": 10661 }, { "epoch": 1.901971278208902, "grad_norm": 0.5235751271247864, "learning_rate": 2.942428278123327e-06, "loss": 0.5349, "step": 10662 }, { "epoch": 1.9021496744269022, "grad_norm": 0.5213757753372192, "learning_rate": 2.9317203059908703e-06, "loss": 0.4482, "step": 10663 }, { "epoch": 1.9023280706449022, "grad_norm": 0.537200391292572, "learning_rate": 2.921031738494784e-06, "loss": 0.473, "step": 10664 }, { "epoch": 1.9025064668629024, "grad_norm": 0.5032278895378113, "learning_rate": 2.9103625764745344e-06, "loss": 0.4604, "step": 10665 }, { "epoch": 1.9026848630809026, "grad_norm": 0.5438551902770996, "learning_rate": 2.899712820768091e-06, "loss": 0.5475, "step": 10666 }, { "epoch": 1.9028632592989028, "grad_norm": 0.5733838677406311, "learning_rate": 2.8890824722118403e-06, "loss": 0.4181, "step": 10667 }, { "epoch": 1.903041655516903, "grad_norm": 0.5377230644226074, "learning_rate": 2.878471531640725e-06, "loss": 0.4601, "step": 10668 }, { "epoch": 1.9032200517349032, "grad_norm": 0.5529724955558777, "learning_rate": 2.8678799998881345e-06, "loss": 0.4844, "step": 10669 }, { "epoch": 1.9033984479529034, "grad_norm": 0.5532283186912537, "learning_rate": 2.8573078777858753e-06, "loss": 0.5625, "step": 10670 }, { "epoch": 1.9035768441709036, "grad_norm": 0.5340506434440613, "learning_rate": 2.846755166164311e-06, "loss": 0.4384, "step": 10671 }, { "epoch": 1.9037552403889038, "grad_norm": 0.520977258682251, "learning_rate": 2.836221865852223e-06, "loss": 0.4364, "step": 10672 }, { "epoch": 1.903933636606904, "grad_norm": 0.47905829548835754, "learning_rate": 2.8257079776769224e-06, "loss": 0.415, "step": 10673 }, { "epoch": 1.9041120328249042, "grad_norm": 0.540252149105072, "learning_rate": 2.8152135024641366e-06, "loss": 0.5036, "step": 10674 }, { "epoch": 1.9042904290429044, "grad_norm": 0.4911253750324249, "learning_rate": 2.8047384410381237e-06, "loss": 0.4359, "step": 10675 }, { "epoch": 1.9044688252609046, "grad_norm": 0.5184650421142578, "learning_rate": 2.7942827942215585e-06, "loss": 0.4715, "step": 10676 }, { "epoch": 1.9046472214789048, "grad_norm": 0.4889717102050781, "learning_rate": 2.783846562835646e-06, "loss": 0.3955, "step": 10677 }, { "epoch": 1.904825617696905, "grad_norm": 0.5702592134475708, "learning_rate": 2.7734297477000626e-06, "loss": 0.5709, "step": 10678 }, { "epoch": 1.9050040139149051, "grad_norm": 0.5835392475128174, "learning_rate": 2.7630323496328767e-06, "loss": 0.4535, "step": 10679 }, { "epoch": 1.9051824101329053, "grad_norm": 0.5429273843765259, "learning_rate": 2.752654369450769e-06, "loss": 0.5521, "step": 10680 }, { "epoch": 1.9053608063509053, "grad_norm": 0.5422603487968445, "learning_rate": 2.742295807968781e-06, "loss": 0.5759, "step": 10681 }, { "epoch": 1.9055392025689055, "grad_norm": 0.5456951856613159, "learning_rate": 2.731956666000457e-06, "loss": 0.4278, "step": 10682 }, { "epoch": 1.9057175987869057, "grad_norm": 0.5333420038223267, "learning_rate": 2.7216369443578694e-06, "loss": 0.5113, "step": 10683 }, { "epoch": 1.905895995004906, "grad_norm": 0.6626531481742859, "learning_rate": 2.7113366438515085e-06, "loss": 0.6753, "step": 10684 }, { "epoch": 1.9060743912229061, "grad_norm": 0.5112046003341675, "learning_rate": 2.701055765290339e-06, "loss": 0.4424, "step": 10685 }, { "epoch": 1.906252787440906, "grad_norm": 0.48551252484321594, "learning_rate": 2.6907943094818255e-06, "loss": 0.4418, "step": 10686 }, { "epoch": 1.9064311836589063, "grad_norm": 0.5245196223258972, "learning_rate": 2.680552277231907e-06, "loss": 0.4654, "step": 10687 }, { "epoch": 1.9066095798769065, "grad_norm": 0.5276603102684021, "learning_rate": 2.6703296693449953e-06, "loss": 0.4482, "step": 10688 }, { "epoch": 1.9067879760949067, "grad_norm": 0.5473247766494751, "learning_rate": 2.6601264866239485e-06, "loss": 0.5329, "step": 10689 }, { "epoch": 1.9069663723129069, "grad_norm": 0.6117315292358398, "learning_rate": 2.649942729870125e-06, "loss": 0.5775, "step": 10690 }, { "epoch": 1.907144768530907, "grad_norm": 0.5208797454833984, "learning_rate": 2.639778399883358e-06, "loss": 0.5727, "step": 10691 }, { "epoch": 1.9073231647489073, "grad_norm": 0.5800744295120239, "learning_rate": 2.6296334974619528e-06, "loss": 0.6938, "step": 10692 }, { "epoch": 1.9075015609669075, "grad_norm": 0.49815651774406433, "learning_rate": 2.6195080234026613e-06, "loss": 0.4241, "step": 10693 }, { "epoch": 1.9076799571849077, "grad_norm": 0.515434980392456, "learning_rate": 2.6094019785007914e-06, "loss": 0.4804, "step": 10694 }, { "epoch": 1.9078583534029079, "grad_norm": 0.5178319215774536, "learning_rate": 2.5993153635500143e-06, "loss": 0.3885, "step": 10695 }, { "epoch": 1.908036749620908, "grad_norm": 0.48950934410095215, "learning_rate": 2.589248179342529e-06, "loss": 0.395, "step": 10696 }, { "epoch": 1.9082151458389083, "grad_norm": 0.5800197124481201, "learning_rate": 2.5792004266690094e-06, "loss": 0.6178, "step": 10697 }, { "epoch": 1.9083935420569085, "grad_norm": 0.6045670509338379, "learning_rate": 2.569172106318629e-06, "loss": 0.511, "step": 10698 }, { "epoch": 1.9085719382749087, "grad_norm": 0.4140417277812958, "learning_rate": 2.5591632190790084e-06, "loss": 0.2933, "step": 10699 }, { "epoch": 1.9087503344929089, "grad_norm": 0.5823186635971069, "learning_rate": 2.5491737657362123e-06, "loss": 0.4683, "step": 10700 }, { "epoch": 1.908928730710909, "grad_norm": 0.5681852102279663, "learning_rate": 2.5392037470748365e-06, "loss": 0.5934, "step": 10701 }, { "epoch": 1.9091071269289093, "grad_norm": 0.5418094992637634, "learning_rate": 2.5292531638778926e-06, "loss": 0.5774, "step": 10702 }, { "epoch": 1.9092855231469092, "grad_norm": 0.47225964069366455, "learning_rate": 2.519322016926895e-06, "loss": 0.3773, "step": 10703 }, { "epoch": 1.9094639193649094, "grad_norm": 0.5559024810791016, "learning_rate": 2.5094103070018858e-06, "loss": 0.3676, "step": 10704 }, { "epoch": 1.9096423155829096, "grad_norm": 0.5408554077148438, "learning_rate": 2.499518034881271e-06, "loss": 0.3963, "step": 10705 }, { "epoch": 1.9098207118009098, "grad_norm": 0.4998513162136078, "learning_rate": 2.4896452013420114e-06, "loss": 0.3837, "step": 10706 }, { "epoch": 1.90999910801891, "grad_norm": 0.5326787233352661, "learning_rate": 2.4797918071594594e-06, "loss": 0.5206, "step": 10707 }, { "epoch": 1.91017750423691, "grad_norm": 0.5712464451789856, "learning_rate": 2.469957853107607e-06, "loss": 0.5556, "step": 10708 }, { "epoch": 1.9103559004549102, "grad_norm": 0.5410659909248352, "learning_rate": 2.460143339958726e-06, "loss": 0.4366, "step": 10709 }, { "epoch": 1.9105342966729104, "grad_norm": 0.5795556902885437, "learning_rate": 2.4503482684836154e-06, "loss": 0.5793, "step": 10710 }, { "epoch": 1.9107126928909106, "grad_norm": 0.6724327802658081, "learning_rate": 2.440572639451688e-06, "loss": 0.6335, "step": 10711 }, { "epoch": 1.9108910891089108, "grad_norm": 0.44440463185310364, "learning_rate": 2.4308164536306075e-06, "loss": 0.3103, "step": 10712 }, { "epoch": 1.911069485326911, "grad_norm": 0.49097147583961487, "learning_rate": 2.421079711786678e-06, "loss": 0.3918, "step": 10713 }, { "epoch": 1.9112478815449112, "grad_norm": 0.5756399035453796, "learning_rate": 2.4113624146846205e-06, "loss": 0.669, "step": 10714 }, { "epoch": 1.9114262777629114, "grad_norm": 0.5272496938705444, "learning_rate": 2.4016645630875744e-06, "loss": 0.4483, "step": 10715 }, { "epoch": 1.9116046739809116, "grad_norm": 0.561396062374115, "learning_rate": 2.391986157757292e-06, "loss": 0.6147, "step": 10716 }, { "epoch": 1.9117830701989118, "grad_norm": 0.6622545123100281, "learning_rate": 2.382327199453832e-06, "loss": 0.4827, "step": 10717 }, { "epoch": 1.911961466416912, "grad_norm": 0.5634920597076416, "learning_rate": 2.372687688935837e-06, "loss": 0.5104, "step": 10718 }, { "epoch": 1.9121398626349122, "grad_norm": 0.5393690466880798, "learning_rate": 2.363067626960397e-06, "loss": 0.4723, "step": 10719 }, { "epoch": 1.9123182588529124, "grad_norm": 0.501996636390686, "learning_rate": 2.353467014283073e-06, "loss": 0.4674, "step": 10720 }, { "epoch": 1.9124966550709126, "grad_norm": 0.5242270827293396, "learning_rate": 2.3438858516578733e-06, "loss": 0.4293, "step": 10721 }, { "epoch": 1.9126750512889128, "grad_norm": 0.5785874128341675, "learning_rate": 2.3343241398372796e-06, "loss": 0.5556, "step": 10722 }, { "epoch": 1.912853447506913, "grad_norm": 0.49174314737319946, "learning_rate": 2.324781879572302e-06, "loss": 0.3852, "step": 10723 }, { "epoch": 1.9130318437249132, "grad_norm": 0.56098473072052, "learning_rate": 2.3152590716123688e-06, "loss": 0.5528, "step": 10724 }, { "epoch": 1.9132102399429132, "grad_norm": 0.5013071894645691, "learning_rate": 2.3057557167054377e-06, "loss": 0.4798, "step": 10725 }, { "epoch": 1.9133886361609134, "grad_norm": 0.5687433481216431, "learning_rate": 2.2962718155978e-06, "loss": 0.5398, "step": 10726 }, { "epoch": 1.9135670323789136, "grad_norm": 0.5144028067588806, "learning_rate": 2.286807369034416e-06, "loss": 0.3893, "step": 10727 }, { "epoch": 1.9137454285969138, "grad_norm": 0.48423799872398376, "learning_rate": 2.2773623777585796e-06, "loss": 0.5236, "step": 10728 }, { "epoch": 1.913923824814914, "grad_norm": 0.5465452671051025, "learning_rate": 2.267936842512086e-06, "loss": 0.5458, "step": 10729 }, { "epoch": 1.914102221032914, "grad_norm": 0.4663107991218567, "learning_rate": 2.2585307640352326e-06, "loss": 0.4132, "step": 10730 }, { "epoch": 1.9142806172509141, "grad_norm": 0.5530097484588623, "learning_rate": 2.2491441430667614e-06, "loss": 0.6104, "step": 10731 }, { "epoch": 1.9144590134689143, "grad_norm": 0.5279607176780701, "learning_rate": 2.239776980343916e-06, "loss": 0.4833, "step": 10732 }, { "epoch": 1.9146374096869145, "grad_norm": 0.554326057434082, "learning_rate": 2.2304292766023304e-06, "loss": 0.5667, "step": 10733 }, { "epoch": 1.9148158059049147, "grad_norm": 0.5720328688621521, "learning_rate": 2.2211010325762227e-06, "loss": 0.5472, "step": 10734 }, { "epoch": 1.914994202122915, "grad_norm": 0.6049355864524841, "learning_rate": 2.211792248998229e-06, "loss": 0.7378, "step": 10735 }, { "epoch": 1.9151725983409151, "grad_norm": 0.5384811758995056, "learning_rate": 2.2025029265994036e-06, "loss": 0.588, "step": 10736 }, { "epoch": 1.9153509945589153, "grad_norm": 0.5090281963348389, "learning_rate": 2.1932330661093845e-06, "loss": 0.5302, "step": 10737 }, { "epoch": 1.9155293907769155, "grad_norm": 0.5153109431266785, "learning_rate": 2.183982668256201e-06, "loss": 0.5014, "step": 10738 }, { "epoch": 1.9157077869949157, "grad_norm": 0.5636624693870544, "learning_rate": 2.1747517337663836e-06, "loss": 0.6512, "step": 10739 }, { "epoch": 1.915886183212916, "grad_norm": 0.5594334006309509, "learning_rate": 2.1655402633648792e-06, "loss": 0.5636, "step": 10740 }, { "epoch": 1.9160645794309161, "grad_norm": 0.5760837197303772, "learning_rate": 2.1563482577752482e-06, "loss": 0.5173, "step": 10741 }, { "epoch": 1.9162429756489163, "grad_norm": 0.5558215379714966, "learning_rate": 2.1471757177193295e-06, "loss": 0.486, "step": 10742 }, { "epoch": 1.9164213718669165, "grad_norm": 0.4847973883152008, "learning_rate": 2.1380226439175478e-06, "loss": 0.3862, "step": 10743 }, { "epoch": 1.9165997680849167, "grad_norm": 0.548984944820404, "learning_rate": 2.1288890370888546e-06, "loss": 0.546, "step": 10744 }, { "epoch": 1.916778164302917, "grad_norm": 0.5168630480766296, "learning_rate": 2.119774897950538e-06, "loss": 0.5107, "step": 10745 }, { "epoch": 1.9169565605209171, "grad_norm": 0.5281267762184143, "learning_rate": 2.1106802272184146e-06, "loss": 0.4927, "step": 10746 }, { "epoch": 1.917134956738917, "grad_norm": 0.5126060843467712, "learning_rate": 2.1016050256068007e-06, "loss": 0.3642, "step": 10747 }, { "epoch": 1.9173133529569173, "grad_norm": 0.5802842378616333, "learning_rate": 2.092549293828433e-06, "loss": 0.5518, "step": 10748 }, { "epoch": 1.9174917491749175, "grad_norm": 0.5918566584587097, "learning_rate": 2.0835130325946026e-06, "loss": 0.5455, "step": 10749 }, { "epoch": 1.9176701453929177, "grad_norm": 0.5723239779472351, "learning_rate": 2.0744962426149374e-06, "loss": 0.5907, "step": 10750 }, { "epoch": 1.917848541610918, "grad_norm": 0.4773622751235962, "learning_rate": 2.0654989245976473e-06, "loss": 0.4396, "step": 10751 }, { "epoch": 1.9180269378289179, "grad_norm": 0.5784128308296204, "learning_rate": 2.0565210792493903e-06, "loss": 0.6173, "step": 10752 }, { "epoch": 1.918205334046918, "grad_norm": 0.5458529591560364, "learning_rate": 2.047562707275269e-06, "loss": 0.5314, "step": 10753 }, { "epoch": 1.9183837302649183, "grad_norm": 0.4943179488182068, "learning_rate": 2.038623809378859e-06, "loss": 0.3984, "step": 10754 }, { "epoch": 1.9185621264829185, "grad_norm": 0.6198917031288147, "learning_rate": 2.029704386262238e-06, "loss": 0.7195, "step": 10755 }, { "epoch": 1.9187405227009187, "grad_norm": 0.5691092610359192, "learning_rate": 2.020804438625928e-06, "loss": 0.6608, "step": 10756 }, { "epoch": 1.9189189189189189, "grad_norm": 0.6087114214897156, "learning_rate": 2.011923967168899e-06, "loss": 0.7876, "step": 10757 }, { "epoch": 1.919097315136919, "grad_norm": 0.5224511027336121, "learning_rate": 2.0030629725886763e-06, "loss": 0.418, "step": 10758 }, { "epoch": 1.9192757113549193, "grad_norm": 0.5266249775886536, "learning_rate": 1.9942214555811187e-06, "loss": 0.4646, "step": 10759 }, { "epoch": 1.9194541075729195, "grad_norm": 0.5271503925323486, "learning_rate": 1.9853994168407273e-06, "loss": 0.4795, "step": 10760 }, { "epoch": 1.9196325037909197, "grad_norm": 0.5206458568572998, "learning_rate": 1.9765968570603365e-06, "loss": 0.449, "step": 10761 }, { "epoch": 1.9198109000089199, "grad_norm": 0.5074752569198608, "learning_rate": 1.9678137769312543e-06, "loss": 0.4446, "step": 10762 }, { "epoch": 1.91998929622692, "grad_norm": 0.4914679229259491, "learning_rate": 1.9590501771433735e-06, "loss": 0.4738, "step": 10763 }, { "epoch": 1.9201676924449202, "grad_norm": 0.5547556281089783, "learning_rate": 1.9503060583849485e-06, "loss": 0.5181, "step": 10764 }, { "epoch": 1.9203460886629204, "grad_norm": 0.45473459362983704, "learning_rate": 1.941581421342764e-06, "loss": 0.3413, "step": 10765 }, { "epoch": 1.9205244848809206, "grad_norm": 0.5488914251327515, "learning_rate": 1.932876266701994e-06, "loss": 0.5253, "step": 10766 }, { "epoch": 1.9207028810989208, "grad_norm": 0.5307526588439941, "learning_rate": 1.924190595146369e-06, "loss": 0.5097, "step": 10767 }, { "epoch": 1.920881277316921, "grad_norm": 0.530610203742981, "learning_rate": 1.9155244073580936e-06, "loss": 0.4198, "step": 10768 }, { "epoch": 1.921059673534921, "grad_norm": 0.48655280470848083, "learning_rate": 1.9068777040177342e-06, "loss": 0.4815, "step": 10769 }, { "epoch": 1.9212380697529212, "grad_norm": 0.5858240127563477, "learning_rate": 1.8982504858044703e-06, "loss": 0.5908, "step": 10770 }, { "epoch": 1.9214164659709214, "grad_norm": 0.5390949249267578, "learning_rate": 1.889642753395815e-06, "loss": 0.5308, "step": 10771 }, { "epoch": 1.9215948621889216, "grad_norm": 0.4654117524623871, "learning_rate": 1.8810545074678664e-06, "loss": 0.3985, "step": 10772 }, { "epoch": 1.9217732584069218, "grad_norm": 0.570939838886261, "learning_rate": 1.872485748695113e-06, "loss": 0.5217, "step": 10773 }, { "epoch": 1.9219516546249218, "grad_norm": 0.5508378148078918, "learning_rate": 1.8639364777505442e-06, "loss": 0.5657, "step": 10774 }, { "epoch": 1.922130050842922, "grad_norm": 0.49652281403541565, "learning_rate": 1.8554066953056502e-06, "loss": 0.4462, "step": 10775 }, { "epoch": 1.9223084470609222, "grad_norm": 0.5410851836204529, "learning_rate": 1.8468964020302847e-06, "loss": 0.5473, "step": 10776 }, { "epoch": 1.9224868432789224, "grad_norm": 0.5557335615158081, "learning_rate": 1.8384055985929405e-06, "loss": 0.6211, "step": 10777 }, { "epoch": 1.9226652394969226, "grad_norm": 0.49317964911460876, "learning_rate": 1.8299342856603895e-06, "loss": 0.4219, "step": 10778 }, { "epoch": 1.9228436357149228, "grad_norm": 0.5330275297164917, "learning_rate": 1.8214824638980166e-06, "loss": 0.4602, "step": 10779 }, { "epoch": 1.923022031932923, "grad_norm": 0.5371294617652893, "learning_rate": 1.8130501339696237e-06, "loss": 0.4941, "step": 10780 }, { "epoch": 1.9232004281509232, "grad_norm": 0.5647792220115662, "learning_rate": 1.8046372965374592e-06, "loss": 0.5879, "step": 10781 }, { "epoch": 1.9233788243689234, "grad_norm": 0.5888603329658508, "learning_rate": 1.796243952262272e-06, "loss": 0.6028, "step": 10782 }, { "epoch": 1.9235572205869236, "grad_norm": 0.5580531358718872, "learning_rate": 1.787870101803285e-06, "loss": 0.5612, "step": 10783 }, { "epoch": 1.9237356168049238, "grad_norm": 0.5175449848175049, "learning_rate": 1.7795157458181389e-06, "loss": 0.4524, "step": 10784 }, { "epoch": 1.923914013022924, "grad_norm": 0.5534098148345947, "learning_rate": 1.7711808849630307e-06, "loss": 0.5132, "step": 10785 }, { "epoch": 1.9240924092409242, "grad_norm": 0.5773023962974548, "learning_rate": 1.7628655198925481e-06, "loss": 0.5995, "step": 10786 }, { "epoch": 1.9242708054589244, "grad_norm": 0.4896835386753082, "learning_rate": 1.7545696512597797e-06, "loss": 0.4349, "step": 10787 }, { "epoch": 1.9244492016769246, "grad_norm": 0.4547155797481537, "learning_rate": 1.7462932797163156e-06, "loss": 0.3644, "step": 10788 }, { "epoch": 1.9246275978949248, "grad_norm": 0.5199649333953857, "learning_rate": 1.738036405912108e-06, "loss": 0.5188, "step": 10789 }, { "epoch": 1.924805994112925, "grad_norm": 0.5165232419967651, "learning_rate": 1.7297990304956934e-06, "loss": 0.4517, "step": 10790 }, { "epoch": 1.924984390330925, "grad_norm": 0.5304819345474243, "learning_rate": 1.721581154114027e-06, "loss": 0.4473, "step": 10791 }, { "epoch": 1.9251627865489251, "grad_norm": 0.582391619682312, "learning_rate": 1.7133827774125365e-06, "loss": 0.5413, "step": 10792 }, { "epoch": 1.9253411827669253, "grad_norm": 0.5489014983177185, "learning_rate": 1.7052039010350962e-06, "loss": 0.5362, "step": 10793 }, { "epoch": 1.9255195789849255, "grad_norm": 0.560737669467926, "learning_rate": 1.6970445256241363e-06, "loss": 0.5321, "step": 10794 }, { "epoch": 1.9256979752029257, "grad_norm": 0.5768745541572571, "learning_rate": 1.6889046518203943e-06, "loss": 0.5531, "step": 10795 }, { "epoch": 1.9258763714209257, "grad_norm": 0.4823215901851654, "learning_rate": 1.6807842802632756e-06, "loss": 0.4796, "step": 10796 }, { "epoch": 1.926054767638926, "grad_norm": 0.5584346055984497, "learning_rate": 1.6726834115904643e-06, "loss": 0.5695, "step": 10797 }, { "epoch": 1.9262331638569261, "grad_norm": 0.6224119067192078, "learning_rate": 1.6646020464382294e-06, "loss": 0.59, "step": 10798 }, { "epoch": 1.9264115600749263, "grad_norm": 0.5279660820960999, "learning_rate": 1.6565401854413132e-06, "loss": 0.5258, "step": 10799 }, { "epoch": 1.9265899562929265, "grad_norm": 0.529108464717865, "learning_rate": 1.648497829232848e-06, "loss": 0.4812, "step": 10800 }, { "epoch": 1.9267683525109267, "grad_norm": 0.6457714438438416, "learning_rate": 1.6404749784444673e-06, "loss": 0.5662, "step": 10801 }, { "epoch": 1.926946748728927, "grad_norm": 0.5989267230033875, "learning_rate": 1.632471633706334e-06, "loss": 0.5611, "step": 10802 }, { "epoch": 1.927125144946927, "grad_norm": 0.4550947844982147, "learning_rate": 1.6244877956469728e-06, "loss": 0.3234, "step": 10803 }, { "epoch": 1.9273035411649273, "grad_norm": 0.49629008769989014, "learning_rate": 1.6165234648934657e-06, "loss": 0.4037, "step": 10804 }, { "epoch": 1.9274819373829275, "grad_norm": 0.44870060682296753, "learning_rate": 1.6085786420713123e-06, "loss": 0.2973, "step": 10805 }, { "epoch": 1.9276603336009277, "grad_norm": 0.5849354267120361, "learning_rate": 1.6006533278045131e-06, "loss": 0.6132, "step": 10806 }, { "epoch": 1.927838729818928, "grad_norm": 0.4785587787628174, "learning_rate": 1.5927475227155152e-06, "loss": 0.3464, "step": 10807 }, { "epoch": 1.928017126036928, "grad_norm": 0.4874267876148224, "learning_rate": 1.5848612274252105e-06, "loss": 0.4023, "step": 10808 }, { "epoch": 1.9281955222549283, "grad_norm": 0.48776915669441223, "learning_rate": 1.5769944425530202e-06, "loss": 0.3752, "step": 10809 }, { "epoch": 1.9283739184729285, "grad_norm": 0.612381100654602, "learning_rate": 1.5691471687167558e-06, "loss": 0.5973, "step": 10810 }, { "epoch": 1.9285523146909287, "grad_norm": 0.5721031427383423, "learning_rate": 1.561319406532785e-06, "loss": 0.5869, "step": 10811 }, { "epoch": 1.928730710908929, "grad_norm": 0.524873673915863, "learning_rate": 1.5535111566158667e-06, "loss": 0.4592, "step": 10812 }, { "epoch": 1.9289091071269289, "grad_norm": 0.5512157678604126, "learning_rate": 1.5457224195792873e-06, "loss": 0.5546, "step": 10813 }, { "epoch": 1.929087503344929, "grad_norm": 0.562592625617981, "learning_rate": 1.5379531960347247e-06, "loss": 0.4041, "step": 10814 }, { "epoch": 1.9292658995629293, "grad_norm": 0.5566669702529907, "learning_rate": 1.530203486592413e-06, "loss": 0.4858, "step": 10815 }, { "epoch": 1.9294442957809295, "grad_norm": 0.5376592874526978, "learning_rate": 1.5224732918609762e-06, "loss": 0.5448, "step": 10816 }, { "epoch": 1.9296226919989297, "grad_norm": 0.46925434470176697, "learning_rate": 1.5147626124475955e-06, "loss": 0.4111, "step": 10817 }, { "epoch": 1.9298010882169296, "grad_norm": 0.5781516432762146, "learning_rate": 1.5070714489577864e-06, "loss": 0.471, "step": 10818 }, { "epoch": 1.9299794844349298, "grad_norm": 0.7423487305641174, "learning_rate": 1.4993998019956767e-06, "loss": 0.4972, "step": 10819 }, { "epoch": 1.93015788065293, "grad_norm": 0.487798273563385, "learning_rate": 1.4917476721637569e-06, "loss": 0.4267, "step": 10820 }, { "epoch": 1.9303362768709302, "grad_norm": 0.47426673769950867, "learning_rate": 1.4841150600630183e-06, "loss": 0.3958, "step": 10821 }, { "epoch": 1.9305146730889304, "grad_norm": 0.4360140562057495, "learning_rate": 1.4765019662929812e-06, "loss": 0.3328, "step": 10822 }, { "epoch": 1.9306930693069306, "grad_norm": 0.49829891324043274, "learning_rate": 1.468908391451501e-06, "loss": 0.3546, "step": 10823 }, { "epoch": 1.9308714655249308, "grad_norm": 0.5402979254722595, "learning_rate": 1.4613343361349897e-06, "loss": 0.5048, "step": 10824 }, { "epoch": 1.931049861742931, "grad_norm": 0.49174410104751587, "learning_rate": 1.4537798009383596e-06, "loss": 0.4204, "step": 10825 }, { "epoch": 1.9312282579609312, "grad_norm": 0.5174482464790344, "learning_rate": 1.4462447864548866e-06, "loss": 0.4889, "step": 10826 }, { "epoch": 1.9314066541789314, "grad_norm": 0.5101271271705627, "learning_rate": 1.4387292932764029e-06, "loss": 0.447, "step": 10827 }, { "epoch": 1.9315850503969316, "grad_norm": 0.5471371412277222, "learning_rate": 1.4312333219931307e-06, "loss": 0.4726, "step": 10828 }, { "epoch": 1.9317634466149318, "grad_norm": 0.5995847582817078, "learning_rate": 1.4237568731938488e-06, "loss": 0.5217, "step": 10829 }, { "epoch": 1.931941842832932, "grad_norm": 0.5153140425682068, "learning_rate": 1.4162999474657267e-06, "loss": 0.4917, "step": 10830 }, { "epoch": 1.9321202390509322, "grad_norm": 0.4703426957130432, "learning_rate": 1.4088625453944348e-06, "loss": 0.4425, "step": 10831 }, { "epoch": 1.9322986352689324, "grad_norm": 0.5764087438583374, "learning_rate": 1.401444667564089e-06, "loss": 0.553, "step": 10832 }, { "epoch": 1.9324770314869326, "grad_norm": 0.7190420627593994, "learning_rate": 1.3940463145573068e-06, "loss": 0.5883, "step": 10833 }, { "epoch": 1.9326554277049328, "grad_norm": 0.5590774416923523, "learning_rate": 1.3866674869551232e-06, "loss": 0.5026, "step": 10834 }, { "epoch": 1.9328338239229328, "grad_norm": 0.5439345240592957, "learning_rate": 1.3793081853371026e-06, "loss": 0.5682, "step": 10835 }, { "epoch": 1.933012220140933, "grad_norm": 0.52024245262146, "learning_rate": 1.3719684102812547e-06, "loss": 0.428, "step": 10836 }, { "epoch": 1.9331906163589332, "grad_norm": 0.5542047023773193, "learning_rate": 1.3646481623639794e-06, "loss": 0.5323, "step": 10837 }, { "epoch": 1.9333690125769334, "grad_norm": 0.5421310663223267, "learning_rate": 1.3573474421602617e-06, "loss": 0.4396, "step": 10838 }, { "epoch": 1.9335474087949336, "grad_norm": 0.4774705767631531, "learning_rate": 1.3500662502434758e-06, "loss": 0.4457, "step": 10839 }, { "epoch": 1.9337258050129336, "grad_norm": 0.5488468408584595, "learning_rate": 1.34280458718547e-06, "loss": 0.5401, "step": 10840 }, { "epoch": 1.9339042012309338, "grad_norm": 0.4551815986633301, "learning_rate": 1.3355624535565936e-06, "loss": 0.3325, "step": 10841 }, { "epoch": 1.934082597448934, "grad_norm": 0.5352870225906372, "learning_rate": 1.3283398499256138e-06, "loss": 0.5896, "step": 10842 }, { "epoch": 1.9342609936669342, "grad_norm": 0.5666049718856812, "learning_rate": 1.3211367768598548e-06, "loss": 0.5675, "step": 10843 }, { "epoch": 1.9344393898849344, "grad_norm": 0.5233065485954285, "learning_rate": 1.3139532349249473e-06, "loss": 0.5271, "step": 10844 }, { "epoch": 1.9346177861029346, "grad_norm": 0.47093701362609863, "learning_rate": 1.3067892246851897e-06, "loss": 0.3446, "step": 10845 }, { "epoch": 1.9347961823209348, "grad_norm": 0.5583943724632263, "learning_rate": 1.2996447467031326e-06, "loss": 0.5945, "step": 10846 }, { "epoch": 1.934974578538935, "grad_norm": 0.4825393259525299, "learning_rate": 1.292519801539993e-06, "loss": 0.369, "step": 10847 }, { "epoch": 1.9351529747569352, "grad_norm": 0.6124469637870789, "learning_rate": 1.2854143897552961e-06, "loss": 0.5456, "step": 10848 }, { "epoch": 1.9353313709749353, "grad_norm": 0.4597680866718292, "learning_rate": 1.278328511907123e-06, "loss": 0.3464, "step": 10849 }, { "epoch": 1.9355097671929355, "grad_norm": 0.529517650604248, "learning_rate": 1.2712621685520287e-06, "loss": 0.4432, "step": 10850 }, { "epoch": 1.9356881634109357, "grad_norm": 0.52036052942276, "learning_rate": 1.2642153602449303e-06, "loss": 0.488, "step": 10851 }, { "epoch": 1.935866559628936, "grad_norm": 0.5084417462348938, "learning_rate": 1.2571880875393293e-06, "loss": 0.4408, "step": 10852 }, { "epoch": 1.9360449558469361, "grad_norm": 0.5051507353782654, "learning_rate": 1.2501803509871457e-06, "loss": 0.4353, "step": 10853 }, { "epoch": 1.9362233520649363, "grad_norm": 0.4483596980571747, "learning_rate": 1.2431921511387167e-06, "loss": 0.3902, "step": 10854 }, { "epoch": 1.9364017482829365, "grad_norm": 0.5327636003494263, "learning_rate": 1.2362234885429646e-06, "loss": 0.5355, "step": 10855 }, { "epoch": 1.9365801445009367, "grad_norm": 0.5151218175888062, "learning_rate": 1.229274363747146e-06, "loss": 0.5091, "step": 10856 }, { "epoch": 1.9367585407189367, "grad_norm": 0.4956924319267273, "learning_rate": 1.2223447772970742e-06, "loss": 0.4205, "step": 10857 }, { "epoch": 1.936936936936937, "grad_norm": 0.4500170052051544, "learning_rate": 1.2154347297369806e-06, "loss": 0.3676, "step": 10858 }, { "epoch": 1.9371153331549371, "grad_norm": 0.44707950949668884, "learning_rate": 1.2085442216095977e-06, "loss": 0.3637, "step": 10859 }, { "epoch": 1.9372937293729373, "grad_norm": 0.5490984916687012, "learning_rate": 1.201673253456076e-06, "loss": 0.498, "step": 10860 }, { "epoch": 1.9374721255909375, "grad_norm": 0.5158944725990295, "learning_rate": 1.194821825816067e-06, "loss": 0.4, "step": 10861 }, { "epoch": 1.9376505218089375, "grad_norm": 0.519051194190979, "learning_rate": 1.1879899392276961e-06, "loss": 0.4514, "step": 10862 }, { "epoch": 1.9378289180269377, "grad_norm": 0.5193673968315125, "learning_rate": 1.1811775942275061e-06, "loss": 0.5199, "step": 10863 }, { "epoch": 1.9380073142449379, "grad_norm": 0.600447952747345, "learning_rate": 1.174384791350569e-06, "loss": 0.6023, "step": 10864 }, { "epoch": 1.938185710462938, "grad_norm": 0.5528981685638428, "learning_rate": 1.1676115311303747e-06, "loss": 0.4367, "step": 10865 }, { "epoch": 1.9383641066809383, "grad_norm": 0.5125691890716553, "learning_rate": 1.160857814098859e-06, "loss": 0.4637, "step": 10866 }, { "epoch": 1.9385425028989385, "grad_norm": 0.5695079565048218, "learning_rate": 1.154123640786514e-06, "loss": 0.573, "step": 10867 }, { "epoch": 1.9387208991169387, "grad_norm": 0.5509238243103027, "learning_rate": 1.1474090117221947e-06, "loss": 0.5835, "step": 10868 }, { "epoch": 1.9388992953349389, "grad_norm": 0.5293681025505066, "learning_rate": 1.1407139274333124e-06, "loss": 0.5052, "step": 10869 }, { "epoch": 1.939077691552939, "grad_norm": 0.5039731860160828, "learning_rate": 1.1340383884456407e-06, "loss": 0.4013, "step": 10870 }, { "epoch": 1.9392560877709393, "grad_norm": 0.5404682159423828, "learning_rate": 1.1273823952835106e-06, "loss": 0.5339, "step": 10871 }, { "epoch": 1.9394344839889395, "grad_norm": 0.49914366006851196, "learning_rate": 1.1207459484696424e-06, "loss": 0.5104, "step": 10872 }, { "epoch": 1.9396128802069397, "grad_norm": 0.5963449478149414, "learning_rate": 1.1141290485253141e-06, "loss": 0.5313, "step": 10873 }, { "epoch": 1.9397912764249399, "grad_norm": 0.5142577886581421, "learning_rate": 1.107531695970193e-06, "loss": 0.4771, "step": 10874 }, { "epoch": 1.93996967264294, "grad_norm": 0.49809107184410095, "learning_rate": 1.1009538913223927e-06, "loss": 0.4144, "step": 10875 }, { "epoch": 1.9401480688609403, "grad_norm": 0.5837324261665344, "learning_rate": 1.0943956350985828e-06, "loss": 0.6171, "step": 10876 }, { "epoch": 1.9403264650789405, "grad_norm": 0.48720839619636536, "learning_rate": 1.0878569278138239e-06, "loss": 0.331, "step": 10877 }, { "epoch": 1.9405048612969407, "grad_norm": 0.5243699550628662, "learning_rate": 1.0813377699816773e-06, "loss": 0.4904, "step": 10878 }, { "epoch": 1.9406832575149406, "grad_norm": 0.564947783946991, "learning_rate": 1.07483816211415e-06, "loss": 0.6612, "step": 10879 }, { "epoch": 1.9408616537329408, "grad_norm": 0.5145286321640015, "learning_rate": 1.068358104721695e-06, "loss": 0.5127, "step": 10880 }, { "epoch": 1.941040049950941, "grad_norm": 0.5018771290779114, "learning_rate": 1.0618975983132662e-06, "loss": 0.4624, "step": 10881 }, { "epoch": 1.9412184461689412, "grad_norm": 0.490432471036911, "learning_rate": 1.055456643396291e-06, "loss": 0.4464, "step": 10882 }, { "epoch": 1.9413968423869414, "grad_norm": 0.45111989974975586, "learning_rate": 1.0490352404766146e-06, "loss": 0.3621, "step": 10883 }, { "epoch": 1.9415752386049414, "grad_norm": 0.5172296762466431, "learning_rate": 1.042633390058556e-06, "loss": 0.4011, "step": 10884 }, { "epoch": 1.9417536348229416, "grad_norm": 0.5376213192939758, "learning_rate": 1.036251092644963e-06, "loss": 0.5768, "step": 10885 }, { "epoch": 1.9419320310409418, "grad_norm": 0.5566000938415527, "learning_rate": 1.0298883487370736e-06, "loss": 0.5342, "step": 10886 }, { "epoch": 1.942110427258942, "grad_norm": 0.5264571309089661, "learning_rate": 1.023545158834599e-06, "loss": 0.4918, "step": 10887 }, { "epoch": 1.9422888234769422, "grad_norm": 0.4640972316265106, "learning_rate": 1.0172215234357519e-06, "loss": 0.3828, "step": 10888 }, { "epoch": 1.9424672196949424, "grad_norm": 0.5521480441093445, "learning_rate": 1.0109174430371905e-06, "loss": 0.4931, "step": 10889 }, { "epoch": 1.9426456159129426, "grad_norm": 0.48122310638427734, "learning_rate": 1.0046329181340187e-06, "loss": 0.3782, "step": 10890 }, { "epoch": 1.9428240121309428, "grad_norm": 0.5191644430160522, "learning_rate": 9.983679492198139e-07, "loss": 0.4283, "step": 10891 }, { "epoch": 1.943002408348943, "grad_norm": 0.48529642820358276, "learning_rate": 9.92122536786627e-07, "loss": 0.3826, "step": 10892 }, { "epoch": 1.9431808045669432, "grad_norm": 0.5107811689376831, "learning_rate": 9.858966813250102e-07, "loss": 0.497, "step": 10893 }, { "epoch": 1.9433592007849434, "grad_norm": 0.4362351596355438, "learning_rate": 9.796903833239057e-07, "loss": 0.3437, "step": 10894 }, { "epoch": 1.9435375970029436, "grad_norm": 0.5843741297721863, "learning_rate": 9.73503643270729e-07, "loss": 0.6999, "step": 10895 }, { "epoch": 1.9437159932209438, "grad_norm": 0.49152183532714844, "learning_rate": 9.673364616514247e-07, "loss": 0.4538, "step": 10896 }, { "epoch": 1.943894389438944, "grad_norm": 0.6454296112060547, "learning_rate": 9.611888389503553e-07, "loss": 0.5617, "step": 10897 }, { "epoch": 1.9440727856569442, "grad_norm": 0.5631362795829773, "learning_rate": 9.55060775650357e-07, "loss": 0.502, "step": 10898 }, { "epoch": 1.9442511818749444, "grad_norm": 0.5519803762435913, "learning_rate": 9.489522722326838e-07, "loss": 0.6406, "step": 10899 }, { "epoch": 1.9444295780929446, "grad_norm": 0.5168196558952332, "learning_rate": 9.42863329177146e-07, "loss": 0.3918, "step": 10900 }, { "epoch": 1.9446079743109446, "grad_norm": 0.5088583827018738, "learning_rate": 9.367939469619169e-07, "loss": 0.3789, "step": 10901 }, { "epoch": 1.9447863705289448, "grad_norm": 0.5261327624320984, "learning_rate": 9.307441260637261e-07, "loss": 0.5678, "step": 10902 }, { "epoch": 1.944964766746945, "grad_norm": 0.5280705094337463, "learning_rate": 9.247138669577215e-07, "loss": 0.4663, "step": 10903 }, { "epoch": 1.9451431629649452, "grad_norm": 0.5464169383049011, "learning_rate": 9.187031701174963e-07, "loss": 0.5453, "step": 10904 }, { "epoch": 1.9453215591829454, "grad_norm": 0.587669312953949, "learning_rate": 9.127120360151175e-07, "loss": 0.5612, "step": 10905 }, { "epoch": 1.9454999554009453, "grad_norm": 0.512403130531311, "learning_rate": 9.067404651211808e-07, "loss": 0.4129, "step": 10906 }, { "epoch": 1.9456783516189455, "grad_norm": 0.6009140014648438, "learning_rate": 9.007884579046444e-07, "loss": 0.5581, "step": 10907 }, { "epoch": 1.9458567478369457, "grad_norm": 0.5417647957801819, "learning_rate": 8.948560148329676e-07, "loss": 0.5139, "step": 10908 }, { "epoch": 1.946035144054946, "grad_norm": 0.5130847692489624, "learning_rate": 8.889431363721112e-07, "loss": 0.4498, "step": 10909 }, { "epoch": 1.9462135402729461, "grad_norm": 0.5233637094497681, "learning_rate": 8.830498229864537e-07, "loss": 0.4592, "step": 10910 }, { "epoch": 1.9463919364909463, "grad_norm": 0.5232832431793213, "learning_rate": 8.771760751388746e-07, "loss": 0.44, "step": 10911 }, { "epoch": 1.9465703327089465, "grad_norm": 0.47777479887008667, "learning_rate": 8.713218932906719e-07, "loss": 0.4183, "step": 10912 }, { "epoch": 1.9467487289269467, "grad_norm": 0.5267250537872314, "learning_rate": 8.654872779016443e-07, "loss": 0.4499, "step": 10913 }, { "epoch": 1.946927125144947, "grad_norm": 0.5199578404426575, "learning_rate": 8.596722294300364e-07, "loss": 0.4039, "step": 10914 }, { "epoch": 1.9471055213629471, "grad_norm": 0.4668145477771759, "learning_rate": 8.538767483325383e-07, "loss": 0.3787, "step": 10915 }, { "epoch": 1.9472839175809473, "grad_norm": 0.5497430562973022, "learning_rate": 8.481008350643693e-07, "loss": 0.5505, "step": 10916 }, { "epoch": 1.9474623137989475, "grad_norm": 0.5314509272575378, "learning_rate": 8.423444900791111e-07, "loss": 0.4944, "step": 10917 }, { "epoch": 1.9476407100169477, "grad_norm": 0.46878379583358765, "learning_rate": 8.366077138289296e-07, "loss": 0.3517, "step": 10918 }, { "epoch": 1.947819106234948, "grad_norm": 0.4314865469932556, "learning_rate": 8.308905067643536e-07, "loss": 0.3136, "step": 10919 }, { "epoch": 1.9479975024529481, "grad_norm": 0.5057112574577332, "learning_rate": 8.251928693343846e-07, "loss": 0.5543, "step": 10920 }, { "epoch": 1.9481758986709483, "grad_norm": 0.5008751153945923, "learning_rate": 8.19514801986554e-07, "loss": 0.4436, "step": 10921 }, { "epoch": 1.9483542948889485, "grad_norm": 0.49664172530174255, "learning_rate": 8.138563051667824e-07, "loss": 0.3823, "step": 10922 }, { "epoch": 1.9485326911069485, "grad_norm": 0.9543033838272095, "learning_rate": 8.082173793195479e-07, "loss": 0.448, "step": 10923 }, { "epoch": 1.9487110873249487, "grad_norm": 0.49950093030929565, "learning_rate": 8.025980248876352e-07, "loss": 0.3344, "step": 10924 }, { "epoch": 1.9488894835429489, "grad_norm": 0.5399156808853149, "learning_rate": 7.969982423124689e-07, "loss": 0.5778, "step": 10925 }, { "epoch": 1.949067879760949, "grad_norm": 0.5570793151855469, "learning_rate": 7.914180320338082e-07, "loss": 0.511, "step": 10926 }, { "epoch": 1.9492462759789493, "grad_norm": 0.5615115761756897, "learning_rate": 7.858573944899139e-07, "loss": 0.5364, "step": 10927 }, { "epoch": 1.9494246721969493, "grad_norm": 0.5255836844444275, "learning_rate": 7.803163301175753e-07, "loss": 0.5906, "step": 10928 }, { "epoch": 1.9496030684149495, "grad_norm": 0.5021457672119141, "learning_rate": 7.747948393519167e-07, "loss": 0.4357, "step": 10929 }, { "epoch": 1.9497814646329497, "grad_norm": 0.5015770196914673, "learning_rate": 7.692929226266188e-07, "loss": 0.4276, "step": 10930 }, { "epoch": 1.9499598608509499, "grad_norm": 0.5243127346038818, "learning_rate": 7.638105803738083e-07, "loss": 0.5425, "step": 10931 }, { "epoch": 1.95013825706895, "grad_norm": 0.47330746054649353, "learning_rate": 7.583478130240851e-07, "loss": 0.3797, "step": 10932 }, { "epoch": 1.9503166532869503, "grad_norm": 0.49246537685394287, "learning_rate": 7.529046210064394e-07, "loss": 0.435, "step": 10933 }, { "epoch": 1.9504950495049505, "grad_norm": 0.5126248598098755, "learning_rate": 7.474810047484182e-07, "loss": 0.4749, "step": 10934 }, { "epoch": 1.9506734457229506, "grad_norm": 0.5344600081443787, "learning_rate": 7.420769646759584e-07, "loss": 0.4556, "step": 10935 }, { "epoch": 1.9508518419409508, "grad_norm": 0.5256651043891907, "learning_rate": 7.366925012135262e-07, "loss": 0.5272, "step": 10936 }, { "epoch": 1.951030238158951, "grad_norm": 0.49008166790008545, "learning_rate": 7.313276147840053e-07, "loss": 0.4495, "step": 10937 }, { "epoch": 1.9512086343769512, "grad_norm": 0.4859989285469055, "learning_rate": 7.259823058087256e-07, "loss": 0.3527, "step": 10938 }, { "epoch": 1.9513870305949514, "grad_norm": 0.4771299958229065, "learning_rate": 7.206565747075178e-07, "loss": 0.4565, "step": 10939 }, { "epoch": 1.9515654268129516, "grad_norm": 0.49878621101379395, "learning_rate": 7.153504218986862e-07, "loss": 0.39, "step": 10940 }, { "epoch": 1.9517438230309518, "grad_norm": 0.6032863855361938, "learning_rate": 7.10063847798953e-07, "loss": 0.4377, "step": 10941 }, { "epoch": 1.951922219248952, "grad_norm": 0.5695745348930359, "learning_rate": 7.047968528235416e-07, "loss": 0.5126, "step": 10942 }, { "epoch": 1.9521006154669522, "grad_norm": 0.567709743976593, "learning_rate": 6.995494373860656e-07, "loss": 0.5469, "step": 10943 }, { "epoch": 1.9522790116849524, "grad_norm": 0.516247034072876, "learning_rate": 6.943216018987508e-07, "loss": 0.437, "step": 10944 }, { "epoch": 1.9524574079029524, "grad_norm": 0.5342845916748047, "learning_rate": 6.891133467721022e-07, "loss": 0.4906, "step": 10945 }, { "epoch": 1.9526358041209526, "grad_norm": 0.5445780158042908, "learning_rate": 6.839246724151815e-07, "loss": 0.4014, "step": 10946 }, { "epoch": 1.9528142003389528, "grad_norm": 0.5402600169181824, "learning_rate": 6.787555792355793e-07, "loss": 0.5584, "step": 10947 }, { "epoch": 1.952992596556953, "grad_norm": 0.5336621403694153, "learning_rate": 6.736060676391653e-07, "loss": 0.435, "step": 10948 }, { "epoch": 1.9531709927749532, "grad_norm": 0.6429981589317322, "learning_rate": 6.684761380304772e-07, "loss": 0.4705, "step": 10949 }, { "epoch": 1.9533493889929532, "grad_norm": 0.4981113374233246, "learning_rate": 6.633657908123592e-07, "loss": 0.484, "step": 10950 }, { "epoch": 1.9535277852109534, "grad_norm": 0.5144602656364441, "learning_rate": 6.582750263862125e-07, "loss": 0.5402, "step": 10951 }, { "epoch": 1.9537061814289536, "grad_norm": 0.5602140426635742, "learning_rate": 6.532038451518286e-07, "loss": 0.498, "step": 10952 }, { "epoch": 1.9538845776469538, "grad_norm": 0.5775460600852966, "learning_rate": 6.481522475075274e-07, "loss": 0.5634, "step": 10953 }, { "epoch": 1.954062973864954, "grad_norm": 0.4967274069786072, "learning_rate": 6.431202338500475e-07, "loss": 0.3654, "step": 10954 }, { "epoch": 1.9542413700829542, "grad_norm": 0.5827997326850891, "learning_rate": 6.381078045745725e-07, "loss": 0.6665, "step": 10955 }, { "epoch": 1.9544197663009544, "grad_norm": 0.5111951231956482, "learning_rate": 6.331149600748154e-07, "loss": 0.4386, "step": 10956 }, { "epoch": 1.9545981625189546, "grad_norm": 0.4768127202987671, "learning_rate": 6.281417007429069e-07, "loss": 0.4033, "step": 10957 }, { "epoch": 1.9547765587369548, "grad_norm": 0.5813576579093933, "learning_rate": 6.231880269694235e-07, "loss": 0.7205, "step": 10958 }, { "epoch": 1.954954954954955, "grad_norm": 0.4979632496833801, "learning_rate": 6.182539391434428e-07, "loss": 0.3926, "step": 10959 }, { "epoch": 1.9551333511729552, "grad_norm": 0.4942147433757782, "learning_rate": 6.133394376524604e-07, "loss": 0.4401, "step": 10960 }, { "epoch": 1.9553117473909554, "grad_norm": 0.5690099000930786, "learning_rate": 6.084445228825009e-07, "loss": 0.5989, "step": 10961 }, { "epoch": 1.9554901436089556, "grad_norm": 0.5280899405479431, "learning_rate": 6.035691952179789e-07, "loss": 0.4661, "step": 10962 }, { "epoch": 1.9556685398269558, "grad_norm": 0.49601706862449646, "learning_rate": 5.987134550418105e-07, "loss": 0.4591, "step": 10963 }, { "epoch": 1.955846936044956, "grad_norm": 0.5549279451370239, "learning_rate": 5.938773027353572e-07, "loss": 0.4277, "step": 10964 }, { "epoch": 1.9560253322629562, "grad_norm": 0.6416307687759399, "learning_rate": 5.890607386784818e-07, "loss": 0.6958, "step": 10965 }, { "epoch": 1.9562037284809564, "grad_norm": 0.5070635080337524, "learning_rate": 5.842637632494097e-07, "loss": 0.4569, "step": 10966 }, { "epoch": 1.9563821246989563, "grad_norm": 0.5254069566726685, "learning_rate": 5.794863768249503e-07, "loss": 0.5185, "step": 10967 }, { "epoch": 1.9565605209169565, "grad_norm": 0.48455336689949036, "learning_rate": 5.747285797802759e-07, "loss": 0.403, "step": 10968 }, { "epoch": 1.9567389171349567, "grad_norm": 0.574587345123291, "learning_rate": 5.699903724891153e-07, "loss": 0.7152, "step": 10969 }, { "epoch": 1.956917313352957, "grad_norm": 0.5534584522247314, "learning_rate": 5.652717553235597e-07, "loss": 0.5475, "step": 10970 }, { "epoch": 1.9570957095709571, "grad_norm": 0.5564422011375427, "learning_rate": 5.605727286542017e-07, "loss": 0.544, "step": 10971 }, { "epoch": 1.957274105788957, "grad_norm": 0.5110240578651428, "learning_rate": 5.558932928501347e-07, "loss": 0.4253, "step": 10972 }, { "epoch": 1.9574525020069573, "grad_norm": 0.5717257857322693, "learning_rate": 5.512334482788428e-07, "loss": 0.685, "step": 10973 }, { "epoch": 1.9576308982249575, "grad_norm": 0.5493577718734741, "learning_rate": 5.465931953063663e-07, "loss": 0.459, "step": 10974 }, { "epoch": 1.9578092944429577, "grad_norm": 0.5325579643249512, "learning_rate": 5.419725342970805e-07, "loss": 0.4834, "step": 10975 }, { "epoch": 1.957987690660958, "grad_norm": 0.5637785792350769, "learning_rate": 5.37371465613945e-07, "loss": 0.5612, "step": 10976 }, { "epoch": 1.958166086878958, "grad_norm": 0.5901339054107666, "learning_rate": 5.32789989618282e-07, "loss": 0.6958, "step": 10977 }, { "epoch": 1.9583444830969583, "grad_norm": 0.49308058619499207, "learning_rate": 5.282281066699701e-07, "loss": 0.4053, "step": 10978 }, { "epoch": 1.9585228793149585, "grad_norm": 0.5078552961349487, "learning_rate": 5.236858171272229e-07, "loss": 0.433, "step": 10979 }, { "epoch": 1.9587012755329587, "grad_norm": 0.5317704677581787, "learning_rate": 5.191631213468661e-07, "loss": 0.4935, "step": 10980 }, { "epoch": 1.958879671750959, "grad_norm": 0.606121301651001, "learning_rate": 5.1466001968406e-07, "loss": 0.6044, "step": 10981 }, { "epoch": 1.959058067968959, "grad_norm": 0.49278080463409424, "learning_rate": 5.101765124925217e-07, "loss": 0.3807, "step": 10982 }, { "epoch": 1.9592364641869593, "grad_norm": 0.5795280933380127, "learning_rate": 5.057126001243306e-07, "loss": 0.5853, "step": 10983 }, { "epoch": 1.9594148604049595, "grad_norm": 0.5039269924163818, "learning_rate": 5.01268282930123e-07, "loss": 0.4857, "step": 10984 }, { "epoch": 1.9595932566229597, "grad_norm": 0.5751108527183533, "learning_rate": 4.968435612588973e-07, "loss": 0.5788, "step": 10985 }, { "epoch": 1.9597716528409599, "grad_norm": 0.5224045515060425, "learning_rate": 4.924384354582645e-07, "loss": 0.505, "step": 10986 }, { "epoch": 1.95995004905896, "grad_norm": 0.6626994609832764, "learning_rate": 4.880529058741146e-07, "loss": 0.586, "step": 10987 }, { "epoch": 1.9601284452769603, "grad_norm": 0.571682333946228, "learning_rate": 4.836869728508941e-07, "loss": 0.4711, "step": 10988 }, { "epoch": 1.9603068414949603, "grad_norm": 0.5564180016517639, "learning_rate": 4.793406367315512e-07, "loss": 0.5446, "step": 10989 }, { "epoch": 1.9604852377129605, "grad_norm": 0.5148413777351379, "learning_rate": 4.750138978574237e-07, "loss": 0.5736, "step": 10990 }, { "epoch": 1.9606636339309607, "grad_norm": 0.5848867297172546, "learning_rate": 4.7070675656832316e-07, "loss": 0.5588, "step": 10991 }, { "epoch": 1.9608420301489609, "grad_norm": 0.5551292896270752, "learning_rate": 4.6641921320253465e-07, "loss": 0.5043, "step": 10992 }, { "epoch": 1.961020426366961, "grad_norm": 0.543438732624054, "learning_rate": 4.6215126809678875e-07, "loss": 0.4619, "step": 10993 }, { "epoch": 1.961198822584961, "grad_norm": 0.4963773190975189, "learning_rate": 4.579029215862895e-07, "loss": 0.4392, "step": 10994 }, { "epoch": 1.9613772188029612, "grad_norm": 0.5400619506835938, "learning_rate": 4.5367417400471454e-07, "loss": 0.6237, "step": 10995 }, { "epoch": 1.9615556150209614, "grad_norm": 0.5189566612243652, "learning_rate": 4.49465025684187e-07, "loss": 0.416, "step": 10996 }, { "epoch": 1.9617340112389616, "grad_norm": 0.5119401216506958, "learning_rate": 4.452754769553036e-07, "loss": 0.4979, "step": 10997 }, { "epoch": 1.9619124074569618, "grad_norm": 0.4972620904445648, "learning_rate": 4.4110552814707884e-07, "loss": 0.4132, "step": 10998 }, { "epoch": 1.962090803674962, "grad_norm": 0.6130009889602661, "learning_rate": 4.3695517958702856e-07, "loss": 0.6817, "step": 10999 }, { "epoch": 1.9622691998929622, "grad_norm": 0.5110456347465515, "learning_rate": 4.328244316011143e-07, "loss": 0.3981, "step": 11000 }, { "epoch": 1.9624475961109624, "grad_norm": 0.5126025676727295, "learning_rate": 4.287132845137709e-07, "loss": 0.4441, "step": 11001 }, { "epoch": 1.9626259923289626, "grad_norm": 0.5288227796554565, "learning_rate": 4.246217386479068e-07, "loss": 0.4437, "step": 11002 }, { "epoch": 1.9628043885469628, "grad_norm": 0.5382393002510071, "learning_rate": 4.2054979432482044e-07, "loss": 0.571, "step": 11003 }, { "epoch": 1.962982784764963, "grad_norm": 0.4760775566101074, "learning_rate": 4.164974518643672e-07, "loss": 0.4336, "step": 11004 }, { "epoch": 1.9631611809829632, "grad_norm": 0.5154813528060913, "learning_rate": 4.1246471158482015e-07, "loss": 0.4456, "step": 11005 }, { "epoch": 1.9633395772009634, "grad_norm": 0.5530751943588257, "learning_rate": 4.0845157380287047e-07, "loss": 0.5216, "step": 11006 }, { "epoch": 1.9635179734189636, "grad_norm": 0.5821408033370972, "learning_rate": 4.044580388337105e-07, "loss": 0.6407, "step": 11007 }, { "epoch": 1.9636963696369638, "grad_norm": 0.5298261642456055, "learning_rate": 4.0048410699103365e-07, "loss": 0.5755, "step": 11008 }, { "epoch": 1.963874765854964, "grad_norm": 0.5816518068313599, "learning_rate": 3.9652977858692375e-07, "loss": 0.6441, "step": 11009 }, { "epoch": 1.9640531620729642, "grad_norm": 0.5004158020019531, "learning_rate": 3.9259505393193785e-07, "loss": 0.4389, "step": 11010 }, { "epoch": 1.9642315582909642, "grad_norm": 0.4865829348564148, "learning_rate": 3.886799333351343e-07, "loss": 0.4019, "step": 11011 }, { "epoch": 1.9644099545089644, "grad_norm": 0.48600196838378906, "learning_rate": 3.847844171039616e-07, "loss": 0.3581, "step": 11012 }, { "epoch": 1.9645883507269646, "grad_norm": 0.4642302393913269, "learning_rate": 3.809085055444528e-07, "loss": 0.3521, "step": 11013 }, { "epoch": 1.9647667469449648, "grad_norm": 0.45133063197135925, "learning_rate": 3.770521989609199e-07, "loss": 0.3936, "step": 11014 }, { "epoch": 1.964945143162965, "grad_norm": 0.4991499185562134, "learning_rate": 3.7321549765631514e-07, "loss": 0.3983, "step": 11015 }, { "epoch": 1.9651235393809652, "grad_norm": 0.526542067527771, "learning_rate": 3.6939840193195296e-07, "loss": 0.4563, "step": 11016 }, { "epoch": 1.9653019355989652, "grad_norm": 0.5464054346084595, "learning_rate": 3.656009120875936e-07, "loss": 0.4337, "step": 11017 }, { "epoch": 1.9654803318169654, "grad_norm": 0.43672722578048706, "learning_rate": 3.618230284215263e-07, "loss": 0.2778, "step": 11018 }, { "epoch": 1.9656587280349656, "grad_norm": 0.6000391244888306, "learning_rate": 3.580647512304303e-07, "loss": 0.5887, "step": 11019 }, { "epoch": 1.9658371242529658, "grad_norm": 0.5648049712181091, "learning_rate": 3.543260808095139e-07, "loss": 0.623, "step": 11020 }, { "epoch": 1.966015520470966, "grad_norm": 0.4601522386074066, "learning_rate": 3.5060701745240344e-07, "loss": 0.3363, "step": 11021 }, { "epoch": 1.9661939166889661, "grad_norm": 0.48688170313835144, "learning_rate": 3.4690756145117074e-07, "loss": 0.431, "step": 11022 }, { "epoch": 1.9663723129069663, "grad_norm": 0.5293704271316528, "learning_rate": 3.43227713096389e-07, "loss": 0.5696, "step": 11023 }, { "epoch": 1.9665507091249665, "grad_norm": 0.511298418045044, "learning_rate": 3.39567472677077e-07, "loss": 0.4419, "step": 11024 }, { "epoch": 1.9667291053429667, "grad_norm": 0.580195963382721, "learning_rate": 3.3592684048067144e-07, "loss": 0.6206, "step": 11025 }, { "epoch": 1.966907501560967, "grad_norm": 0.5301962494850159, "learning_rate": 3.3230581679316584e-07, "loss": 0.4893, "step": 11026 }, { "epoch": 1.9670858977789671, "grad_norm": 0.48305174708366394, "learning_rate": 3.287044018988883e-07, "loss": 0.4104, "step": 11027 }, { "epoch": 1.9672642939969673, "grad_norm": 0.5521623492240906, "learning_rate": 3.2512259608075134e-07, "loss": 0.5381, "step": 11028 }, { "epoch": 1.9674426902149675, "grad_norm": 0.5551265478134155, "learning_rate": 3.2156039962003e-07, "loss": 0.5689, "step": 11029 }, { "epoch": 1.9676210864329677, "grad_norm": 0.5386922359466553, "learning_rate": 3.180178127965283e-07, "loss": 0.5749, "step": 11030 }, { "epoch": 1.967799482650968, "grad_norm": 0.5408132672309875, "learning_rate": 3.144948358884403e-07, "loss": 0.4842, "step": 11031 }, { "epoch": 1.9679778788689681, "grad_norm": 0.5893284678459167, "learning_rate": 3.109914691724891e-07, "loss": 0.6408, "step": 11032 }, { "epoch": 1.968156275086968, "grad_norm": 0.5585829019546509, "learning_rate": 3.0750771292381573e-07, "loss": 0.5564, "step": 11033 }, { "epoch": 1.9683346713049683, "grad_norm": 0.544394850730896, "learning_rate": 3.0404356741603467e-07, "loss": 0.4695, "step": 11034 }, { "epoch": 1.9685130675229685, "grad_norm": 0.4653702676296234, "learning_rate": 3.0059903292120605e-07, "loss": 0.3943, "step": 11035 }, { "epoch": 1.9686914637409687, "grad_norm": 0.5404059886932373, "learning_rate": 2.971741097098912e-07, "loss": 0.5084, "step": 11036 }, { "epoch": 1.968869859958969, "grad_norm": 0.5264713168144226, "learning_rate": 2.9376879805106947e-07, "loss": 0.4465, "step": 11037 }, { "epoch": 1.969048256176969, "grad_norm": 0.48295858502388, "learning_rate": 2.903830982121658e-07, "loss": 0.3769, "step": 11038 }, { "epoch": 1.969226652394969, "grad_norm": 0.5343292355537415, "learning_rate": 2.870170104591341e-07, "loss": 0.533, "step": 11039 }, { "epoch": 1.9694050486129693, "grad_norm": 0.5259873270988464, "learning_rate": 2.8367053505631846e-07, "loss": 0.4229, "step": 11040 }, { "epoch": 1.9695834448309695, "grad_norm": 0.46989312767982483, "learning_rate": 2.803436722665642e-07, "loss": 0.4079, "step": 11041 }, { "epoch": 1.9697618410489697, "grad_norm": 0.50926274061203, "learning_rate": 2.770364223511623e-07, "loss": 0.494, "step": 11042 }, { "epoch": 1.9699402372669699, "grad_norm": 0.756600022315979, "learning_rate": 2.737487855698495e-07, "loss": 0.5463, "step": 11043 }, { "epoch": 1.97011863348497, "grad_norm": 0.47964367270469666, "learning_rate": 2.7048076218083585e-07, "loss": 0.3886, "step": 11044 }, { "epoch": 1.9702970297029703, "grad_norm": 0.5303892493247986, "learning_rate": 2.672323524408049e-07, "loss": 0.5722, "step": 11045 }, { "epoch": 1.9704754259209705, "grad_norm": 0.4234614670276642, "learning_rate": 2.6400355660488594e-07, "loss": 0.2525, "step": 11046 }, { "epoch": 1.9706538221389707, "grad_norm": 0.5104755163192749, "learning_rate": 2.607943749266262e-07, "loss": 0.4572, "step": 11047 }, { "epoch": 1.9708322183569709, "grad_norm": 0.5379624366760254, "learning_rate": 2.5760480765812946e-07, "loss": 0.5121, "step": 11048 }, { "epoch": 1.971010614574971, "grad_norm": 0.5435539484024048, "learning_rate": 2.544348550498621e-07, "loss": 0.5189, "step": 11049 }, { "epoch": 1.9711890107929713, "grad_norm": 0.6087327599525452, "learning_rate": 2.512845173508194e-07, "loss": 0.5903, "step": 11050 }, { "epoch": 1.9713674070109715, "grad_norm": 0.5233466029167175, "learning_rate": 2.481537948084145e-07, "loss": 0.5574, "step": 11051 }, { "epoch": 1.9715458032289717, "grad_norm": 0.4774353802204132, "learning_rate": 2.4504268766853413e-07, "loss": 0.4658, "step": 11052 }, { "epoch": 1.9717241994469719, "grad_norm": 0.5861150622367859, "learning_rate": 2.4195119617551054e-07, "loss": 0.4947, "step": 11053 }, { "epoch": 1.971902595664972, "grad_norm": 0.4522158205509186, "learning_rate": 2.388793205721773e-07, "loss": 0.2857, "step": 11054 }, { "epoch": 1.972080991882972, "grad_norm": 0.4695200026035309, "learning_rate": 2.358270610997859e-07, "loss": 0.4349, "step": 11055 }, { "epoch": 1.9722593881009722, "grad_norm": 0.5388724207878113, "learning_rate": 2.3279441799803347e-07, "loss": 0.5289, "step": 11056 }, { "epoch": 1.9724377843189724, "grad_norm": 0.5425492525100708, "learning_rate": 2.297813915051461e-07, "loss": 0.4849, "step": 11057 }, { "epoch": 1.9726161805369726, "grad_norm": 0.5144857168197632, "learning_rate": 2.2678798185771233e-07, "loss": 0.4728, "step": 11058 }, { "epoch": 1.9727945767549728, "grad_norm": 0.5156108736991882, "learning_rate": 2.2381418929090515e-07, "loss": 0.4917, "step": 11059 }, { "epoch": 1.972972972972973, "grad_norm": 0.4618896543979645, "learning_rate": 2.208600140382322e-07, "loss": 0.3896, "step": 11060 }, { "epoch": 1.973151369190973, "grad_norm": 0.484020471572876, "learning_rate": 2.1792545633170236e-07, "loss": 0.4211, "step": 11061 }, { "epoch": 1.9733297654089732, "grad_norm": 0.5049816370010376, "learning_rate": 2.1501051640182566e-07, "loss": 0.4561, "step": 11062 }, { "epoch": 1.9735081616269734, "grad_norm": 0.5276315808296204, "learning_rate": 2.121151944775579e-07, "loss": 0.4653, "step": 11063 }, { "epoch": 1.9736865578449736, "grad_norm": 0.45448240637779236, "learning_rate": 2.0923949078624493e-07, "loss": 0.3645, "step": 11064 }, { "epoch": 1.9738649540629738, "grad_norm": 0.5729450583457947, "learning_rate": 2.0638340555376168e-07, "loss": 0.5265, "step": 11065 }, { "epoch": 1.974043350280974, "grad_norm": 0.47857052087783813, "learning_rate": 2.0354693900445643e-07, "loss": 0.3919, "step": 11066 }, { "epoch": 1.9742217464989742, "grad_norm": 0.5773352980613708, "learning_rate": 2.0073009136106768e-07, "loss": 0.4904, "step": 11067 }, { "epoch": 1.9744001427169744, "grad_norm": 0.47996997833251953, "learning_rate": 1.9793286284483514e-07, "loss": 0.4176, "step": 11068 }, { "epoch": 1.9745785389349746, "grad_norm": 0.45277294516563416, "learning_rate": 1.9515525367547192e-07, "loss": 0.3085, "step": 11069 }, { "epoch": 1.9747569351529748, "grad_norm": 0.49644288420677185, "learning_rate": 1.9239726407110913e-07, "loss": 0.4873, "step": 11070 }, { "epoch": 1.974935331370975, "grad_norm": 0.5908966064453125, "learning_rate": 1.8965889424835126e-07, "loss": 0.6073, "step": 11071 }, { "epoch": 1.9751137275889752, "grad_norm": 0.487192839384079, "learning_rate": 1.86940144422304e-07, "loss": 0.4247, "step": 11072 }, { "epoch": 1.9752921238069754, "grad_norm": 0.5491554141044617, "learning_rate": 1.8424101480646326e-07, "loss": 0.4814, "step": 11073 }, { "epoch": 1.9754705200249756, "grad_norm": 0.5391243696212769, "learning_rate": 1.8156150561282614e-07, "loss": 0.4927, "step": 11074 }, { "epoch": 1.9756489162429758, "grad_norm": 0.5700384974479675, "learning_rate": 1.7890161705183538e-07, "loss": 0.6383, "step": 11075 }, { "epoch": 1.975827312460976, "grad_norm": 0.5709267258644104, "learning_rate": 1.7626134933243498e-07, "loss": 0.5058, "step": 11076 }, { "epoch": 1.976005708678976, "grad_norm": 0.5328453779220581, "learning_rate": 1.7364070266193133e-07, "loss": 0.4764, "step": 11077 }, { "epoch": 1.9761841048969762, "grad_norm": 0.5492943525314331, "learning_rate": 1.7103967724618753e-07, "loss": 0.5105, "step": 11078 }, { "epoch": 1.9763625011149764, "grad_norm": 0.6113517880439758, "learning_rate": 1.684582732894846e-07, "loss": 0.53, "step": 11079 }, { "epoch": 1.9765408973329766, "grad_norm": 0.6604119539260864, "learning_rate": 1.658964909945493e-07, "loss": 0.5217, "step": 11080 }, { "epoch": 1.9767192935509768, "grad_norm": 0.5008206367492676, "learning_rate": 1.6335433056258176e-07, "loss": 0.4225, "step": 11081 }, { "epoch": 1.976897689768977, "grad_norm": 0.5335331559181213, "learning_rate": 1.608317921932556e-07, "loss": 0.4722, "step": 11082 }, { "epoch": 1.977076085986977, "grad_norm": 0.5418149828910828, "learning_rate": 1.5832887608471792e-07, "loss": 0.4254, "step": 11083 }, { "epoch": 1.9772544822049771, "grad_norm": 0.5159653425216675, "learning_rate": 1.5584558243347813e-07, "loss": 0.4687, "step": 11084 }, { "epoch": 1.9774328784229773, "grad_norm": 0.5415779948234558, "learning_rate": 1.5338191143463022e-07, "loss": 0.4405, "step": 11085 }, { "epoch": 1.9776112746409775, "grad_norm": 0.6081345677375793, "learning_rate": 1.5093786328163052e-07, "loss": 0.4643, "step": 11086 }, { "epoch": 1.9777896708589777, "grad_norm": 0.5560696721076965, "learning_rate": 1.4851343816646434e-07, "loss": 0.6136, "step": 11087 }, { "epoch": 1.977968067076978, "grad_norm": 0.521943986415863, "learning_rate": 1.4610863627953496e-07, "loss": 0.5582, "step": 11088 }, { "epoch": 1.9781464632949781, "grad_norm": 0.5156327486038208, "learning_rate": 1.4372345780971907e-07, "loss": 0.4443, "step": 11089 }, { "epoch": 1.9783248595129783, "grad_norm": 0.5138960480690002, "learning_rate": 1.4135790294433904e-07, "loss": 0.4079, "step": 11090 }, { "epoch": 1.9785032557309785, "grad_norm": 0.503109335899353, "learning_rate": 1.3901197186919067e-07, "loss": 0.4661, "step": 11091 }, { "epoch": 1.9786816519489787, "grad_norm": 0.601678192615509, "learning_rate": 1.3668566476848775e-07, "loss": 0.6345, "step": 11092 }, { "epoch": 1.978860048166979, "grad_norm": 0.5237042903900146, "learning_rate": 1.3437898182500075e-07, "loss": 0.504, "step": 11093 }, { "epoch": 1.9790384443849791, "grad_norm": 0.4751412868499756, "learning_rate": 1.3209192321986252e-07, "loss": 0.3427, "step": 11094 }, { "epoch": 1.9792168406029793, "grad_norm": 0.4530685245990753, "learning_rate": 1.298244891326794e-07, "loss": 0.3887, "step": 11095 }, { "epoch": 1.9793952368209795, "grad_norm": 0.5451707243919373, "learning_rate": 1.2757667974155895e-07, "loss": 0.627, "step": 11096 }, { "epoch": 1.9795736330389797, "grad_norm": 0.5313323140144348, "learning_rate": 1.253484952230266e-07, "loss": 0.4812, "step": 11097 }, { "epoch": 1.97975202925698, "grad_norm": 0.5562189221382141, "learning_rate": 1.2313993575210901e-07, "loss": 0.5047, "step": 11098 }, { "epoch": 1.9799304254749799, "grad_norm": 0.622951865196228, "learning_rate": 1.209510015022508e-07, "loss": 0.8287, "step": 11099 }, { "epoch": 1.98010882169298, "grad_norm": 0.5174034833908081, "learning_rate": 1.1878169264536997e-07, "loss": 0.4743, "step": 11100 }, { "epoch": 1.9802872179109803, "grad_norm": 0.5302353501319885, "learning_rate": 1.1663200935183028e-07, "loss": 0.5506, "step": 11101 }, { "epoch": 1.9804656141289805, "grad_norm": 0.6083629727363586, "learning_rate": 1.145019517904966e-07, "loss": 0.6897, "step": 11102 }, { "epoch": 1.9806440103469807, "grad_norm": 0.591913640499115, "learning_rate": 1.1239152012865183e-07, "loss": 0.6142, "step": 11103 }, { "epoch": 1.9808224065649809, "grad_norm": 0.49568477272987366, "learning_rate": 1.103007145320245e-07, "loss": 0.4795, "step": 11104 }, { "epoch": 1.9810008027829809, "grad_norm": 0.6029313802719116, "learning_rate": 1.0822953516484436e-07, "loss": 0.627, "step": 11105 }, { "epoch": 1.981179199000981, "grad_norm": 0.5010586977005005, "learning_rate": 1.061779821897868e-07, "loss": 0.4972, "step": 11106 }, { "epoch": 1.9813575952189812, "grad_norm": 0.4987967312335968, "learning_rate": 1.0414605576797298e-07, "loss": 0.4968, "step": 11107 }, { "epoch": 1.9815359914369814, "grad_norm": 0.6345359683036804, "learning_rate": 1.0213375605896969e-07, "loss": 0.6357, "step": 11108 }, { "epoch": 1.9817143876549816, "grad_norm": 0.5932630896568298, "learning_rate": 1.0014108322084492e-07, "loss": 0.6181, "step": 11109 }, { "epoch": 1.9818927838729818, "grad_norm": 0.48465976119041443, "learning_rate": 9.816803741011237e-08, "loss": 0.4411, "step": 11110 }, { "epoch": 1.982071180090982, "grad_norm": 0.5137790441513062, "learning_rate": 9.621461878173143e-08, "loss": 0.4103, "step": 11111 }, { "epoch": 1.9822495763089822, "grad_norm": 0.5033047199249268, "learning_rate": 9.428082748910716e-08, "loss": 0.4768, "step": 11112 }, { "epoch": 1.9824279725269824, "grad_norm": 0.713068425655365, "learning_rate": 9.236666368411806e-08, "loss": 0.4846, "step": 11113 }, { "epoch": 1.9826063687449826, "grad_norm": 0.5444625616073608, "learning_rate": 9.047212751708834e-08, "loss": 0.5477, "step": 11114 }, { "epoch": 1.9827847649629828, "grad_norm": 0.49971529841423035, "learning_rate": 8.859721913684337e-08, "loss": 0.5182, "step": 11115 }, { "epoch": 1.982963161180983, "grad_norm": 0.5162731409072876, "learning_rate": 8.674193869065428e-08, "loss": 0.4779, "step": 11116 }, { "epoch": 1.9831415573989832, "grad_norm": 0.5822873115539551, "learning_rate": 8.49062863241823e-08, "loss": 0.6038, "step": 11117 }, { "epoch": 1.9833199536169834, "grad_norm": 0.5523039698600769, "learning_rate": 8.309026218161764e-08, "loss": 0.5589, "step": 11118 }, { "epoch": 1.9834983498349836, "grad_norm": 0.49146732687950134, "learning_rate": 8.129386640562398e-08, "loss": 0.4082, "step": 11119 }, { "epoch": 1.9836767460529838, "grad_norm": 0.5370675325393677, "learning_rate": 7.951709913722738e-08, "loss": 0.5361, "step": 11120 }, { "epoch": 1.9838551422709838, "grad_norm": 0.5307615399360657, "learning_rate": 7.77599605160384e-08, "loss": 0.4153, "step": 11121 }, { "epoch": 1.984033538488984, "grad_norm": 0.49418723583221436, "learning_rate": 7.602245068003e-08, "loss": 0.4469, "step": 11122 }, { "epoch": 1.9842119347069842, "grad_norm": 0.4878632724285126, "learning_rate": 7.430456976564859e-08, "loss": 0.4843, "step": 11123 }, { "epoch": 1.9843903309249844, "grad_norm": 0.45123180747032166, "learning_rate": 7.260631790784178e-08, "loss": 0.4106, "step": 11124 }, { "epoch": 1.9845687271429846, "grad_norm": 0.4762474000453949, "learning_rate": 7.09276952399751e-08, "loss": 0.349, "step": 11125 }, { "epoch": 1.9847471233609848, "grad_norm": 0.547519862651825, "learning_rate": 6.926870189391532e-08, "loss": 0.4515, "step": 11126 }, { "epoch": 1.9849255195789848, "grad_norm": 0.6240761876106262, "learning_rate": 6.762933799991933e-08, "loss": 0.6499, "step": 11127 }, { "epoch": 1.985103915796985, "grad_norm": 0.5219815969467163, "learning_rate": 6.6009603686773e-08, "loss": 0.5382, "step": 11128 }, { "epoch": 1.9852823120149852, "grad_norm": 0.5049837231636047, "learning_rate": 6.440949908168014e-08, "loss": 0.4419, "step": 11129 }, { "epoch": 1.9854607082329854, "grad_norm": 0.5972961783409119, "learning_rate": 6.282902431029025e-08, "loss": 0.6477, "step": 11130 }, { "epoch": 1.9856391044509856, "grad_norm": 0.5097864270210266, "learning_rate": 6.126817949678176e-08, "loss": 0.4747, "step": 11131 }, { "epoch": 1.9858175006689858, "grad_norm": 0.5179688930511475, "learning_rate": 5.972696476369555e-08, "loss": 0.4006, "step": 11132 }, { "epoch": 1.985995896886986, "grad_norm": 0.5777585506439209, "learning_rate": 5.8205380232073666e-08, "loss": 0.6213, "step": 11133 }, { "epoch": 1.9861742931049862, "grad_norm": 0.5305327773094177, "learning_rate": 5.6703426021487146e-08, "loss": 0.4677, "step": 11134 }, { "epoch": 1.9863526893229864, "grad_norm": 0.5374904870986938, "learning_rate": 5.522110224981391e-08, "loss": 0.492, "step": 11135 }, { "epoch": 1.9865310855409866, "grad_norm": 0.5351753830909729, "learning_rate": 5.375840903354412e-08, "loss": 0.5088, "step": 11136 }, { "epoch": 1.9867094817589868, "grad_norm": 0.513270378112793, "learning_rate": 5.2315346487530334e-08, "loss": 0.4432, "step": 11137 }, { "epoch": 1.986887877976987, "grad_norm": 0.4735656678676605, "learning_rate": 5.089191472507082e-08, "loss": 0.4919, "step": 11138 }, { "epoch": 1.9870662741949872, "grad_norm": 0.5290320515632629, "learning_rate": 4.94881138580483e-08, "loss": 0.4908, "step": 11139 }, { "epoch": 1.9872446704129874, "grad_norm": 0.5589138865470886, "learning_rate": 4.8103943996624654e-08, "loss": 0.5107, "step": 11140 }, { "epoch": 1.9874230666309876, "grad_norm": 0.5663526058197021, "learning_rate": 4.673940524957398e-08, "loss": 0.5412, "step": 11141 }, { "epoch": 1.9876014628489878, "grad_norm": 0.5641487240791321, "learning_rate": 4.539449772406057e-08, "loss": 0.5338, "step": 11142 }, { "epoch": 1.9877798590669877, "grad_norm": 0.5815507769584656, "learning_rate": 4.406922152566661e-08, "loss": 0.6252, "step": 11143 }, { "epoch": 1.987958255284988, "grad_norm": 0.49604350328445435, "learning_rate": 4.276357675853104e-08, "loss": 0.4181, "step": 11144 }, { "epoch": 1.9881366515029881, "grad_norm": 0.5443685054779053, "learning_rate": 4.1477563525182945e-08, "loss": 0.4321, "step": 11145 }, { "epoch": 1.9883150477209883, "grad_norm": 0.532589316368103, "learning_rate": 4.021118192662487e-08, "loss": 0.5099, "step": 11146 }, { "epoch": 1.9884934439389885, "grad_norm": 0.5429589152336121, "learning_rate": 3.8964432062305046e-08, "loss": 0.5191, "step": 11147 }, { "epoch": 1.9886718401569887, "grad_norm": 0.6189258098602295, "learning_rate": 3.773731403014513e-08, "loss": 0.4195, "step": 11148 }, { "epoch": 1.9888502363749887, "grad_norm": 0.5345398187637329, "learning_rate": 3.652982792654025e-08, "loss": 0.4932, "step": 11149 }, { "epoch": 1.989028632592989, "grad_norm": 0.5087316036224365, "learning_rate": 3.534197384630344e-08, "loss": 0.4493, "step": 11150 }, { "epoch": 1.989207028810989, "grad_norm": 0.7846596240997314, "learning_rate": 3.417375188274896e-08, "loss": 0.4459, "step": 11151 }, { "epoch": 1.9893854250289893, "grad_norm": 0.5246078968048096, "learning_rate": 3.302516212763673e-08, "loss": 0.4156, "step": 11152 }, { "epoch": 1.9895638212469895, "grad_norm": 0.5263677835464478, "learning_rate": 3.1896204671144625e-08, "loss": 0.5612, "step": 11153 }, { "epoch": 1.9897422174649897, "grad_norm": 0.5278795957565308, "learning_rate": 3.07868796019517e-08, "loss": 0.474, "step": 11154 }, { "epoch": 1.98992061368299, "grad_norm": 0.5510560274124146, "learning_rate": 2.9697187007182715e-08, "loss": 0.5469, "step": 11155 }, { "epoch": 1.99009900990099, "grad_norm": 0.47594988346099854, "learning_rate": 2.8627126972435857e-08, "loss": 0.4399, "step": 11156 }, { "epoch": 1.9902774061189903, "grad_norm": 0.5883272290229797, "learning_rate": 2.757669958172726e-08, "loss": 0.6997, "step": 11157 }, { "epoch": 1.9904558023369905, "grad_norm": 0.5753530859947205, "learning_rate": 2.654590491757425e-08, "loss": 0.5808, "step": 11158 }, { "epoch": 1.9906341985549907, "grad_norm": 0.45445308089256287, "learning_rate": 2.5534743060939836e-08, "loss": 0.3831, "step": 11159 }, { "epoch": 1.9908125947729909, "grad_norm": 0.5948703289031982, "learning_rate": 2.4543214091232723e-08, "loss": 0.5709, "step": 11160 }, { "epoch": 1.990990990990991, "grad_norm": 0.4452953338623047, "learning_rate": 2.357131808633506e-08, "loss": 0.3258, "step": 11161 }, { "epoch": 1.9911693872089913, "grad_norm": 0.485113263130188, "learning_rate": 2.2619055122574674e-08, "loss": 0.3959, "step": 11162 }, { "epoch": 1.9913477834269915, "grad_norm": 0.48504024744033813, "learning_rate": 2.168642527475284e-08, "loss": 0.4657, "step": 11163 }, { "epoch": 1.9915261796449917, "grad_norm": 0.557052731513977, "learning_rate": 2.0773428616088773e-08, "loss": 0.5872, "step": 11164 }, { "epoch": 1.9917045758629917, "grad_norm": 0.5224217176437378, "learning_rate": 1.9880065218302877e-08, "loss": 0.5756, "step": 11165 }, { "epoch": 1.9918829720809919, "grad_norm": 0.5543699264526367, "learning_rate": 1.900633515156125e-08, "loss": 0.5897, "step": 11166 }, { "epoch": 1.992061368298992, "grad_norm": 0.5769318342208862, "learning_rate": 1.815223848447567e-08, "loss": 0.5921, "step": 11167 }, { "epoch": 1.9922397645169923, "grad_norm": 0.519289493560791, "learning_rate": 1.731777528415912e-08, "loss": 0.5332, "step": 11168 }, { "epoch": 1.9924181607349924, "grad_norm": 0.4625517725944519, "learning_rate": 1.650294561611476e-08, "loss": 0.3986, "step": 11169 }, { "epoch": 1.9925965569529926, "grad_norm": 0.7756746411323547, "learning_rate": 1.5707749544374705e-08, "loss": 0.4403, "step": 11170 }, { "epoch": 1.9927749531709926, "grad_norm": 0.5089825987815857, "learning_rate": 1.4932187131333485e-08, "loss": 0.4378, "step": 11171 }, { "epoch": 1.9929533493889928, "grad_norm": 0.5764268040657043, "learning_rate": 1.4176258437970102e-08, "loss": 0.5419, "step": 11172 }, { "epoch": 1.993131745606993, "grad_norm": 0.5734716653823853, "learning_rate": 1.3439963523625976e-08, "loss": 0.5719, "step": 11173 }, { "epoch": 1.9933101418249932, "grad_norm": 0.5470873117446899, "learning_rate": 1.2723302446115969e-08, "loss": 0.4679, "step": 11174 }, { "epoch": 1.9934885380429934, "grad_norm": 0.5397417545318604, "learning_rate": 1.2026275261756148e-08, "loss": 0.6085, "step": 11175 }, { "epoch": 1.9936669342609936, "grad_norm": 0.5251044034957886, "learning_rate": 1.1348882025252749e-08, "loss": 0.4125, "step": 11176 }, { "epoch": 1.9938453304789938, "grad_norm": 0.5922536253929138, "learning_rate": 1.0691122789840969e-08, "loss": 0.5207, "step": 11177 }, { "epoch": 1.994023726696994, "grad_norm": 0.5680334568023682, "learning_rate": 1.005299760717393e-08, "loss": 0.5015, "step": 11178 }, { "epoch": 1.9942021229149942, "grad_norm": 0.4970249831676483, "learning_rate": 9.434506527378206e-09, "loss": 0.3887, "step": 11179 }, { "epoch": 1.9943805191329944, "grad_norm": 0.46873077750205994, "learning_rate": 8.835649598998297e-09, "loss": 0.4312, "step": 11180 }, { "epoch": 1.9945589153509946, "grad_norm": 0.5385526418685913, "learning_rate": 8.256426869079903e-09, "loss": 0.4843, "step": 11181 }, { "epoch": 1.9947373115689948, "grad_norm": 0.520590603351593, "learning_rate": 7.696838383114412e-09, "loss": 0.5141, "step": 11182 }, { "epoch": 1.994915707786995, "grad_norm": 0.568450927734375, "learning_rate": 7.156884185094414e-09, "loss": 0.5097, "step": 11183 }, { "epoch": 1.9950941040049952, "grad_norm": 0.5293620228767395, "learning_rate": 6.636564317374916e-09, "loss": 0.4605, "step": 11184 }, { "epoch": 1.9952725002229954, "grad_norm": 0.5584262013435364, "learning_rate": 6.13587882083988e-09, "loss": 0.5948, "step": 11185 }, { "epoch": 1.9954508964409956, "grad_norm": 0.5276932120323181, "learning_rate": 5.654827734791201e-09, "loss": 0.4695, "step": 11186 }, { "epoch": 1.9956292926589956, "grad_norm": 0.5315849781036377, "learning_rate": 5.19341109705973e-09, "loss": 0.4228, "step": 11187 }, { "epoch": 1.9958076888769958, "grad_norm": 0.5869420170783997, "learning_rate": 4.751628943838737e-09, "loss": 0.5691, "step": 11188 }, { "epoch": 1.995986085094996, "grad_norm": 0.5161817669868469, "learning_rate": 4.329481309850447e-09, "loss": 0.4109, "step": 11189 }, { "epoch": 1.9961644813129962, "grad_norm": 0.5489330887794495, "learning_rate": 3.926968228262773e-09, "loss": 0.5284, "step": 11190 }, { "epoch": 1.9963428775309964, "grad_norm": 0.49122124910354614, "learning_rate": 3.5440897306338037e-09, "loss": 0.386, "step": 11191 }, { "epoch": 1.9965212737489966, "grad_norm": 0.4992600679397583, "learning_rate": 3.1808458470783395e-09, "loss": 0.5033, "step": 11192 }, { "epoch": 1.9966996699669965, "grad_norm": 0.4739280045032501, "learning_rate": 2.837236606129112e-09, "loss": 0.3841, "step": 11193 }, { "epoch": 1.9968780661849967, "grad_norm": 0.5642136931419373, "learning_rate": 2.5132620347645407e-09, "loss": 0.5123, "step": 11194 }, { "epoch": 1.997056462402997, "grad_norm": 0.4581596553325653, "learning_rate": 2.2089221584087328e-09, "loss": 0.3491, "step": 11195 }, { "epoch": 1.9972348586209971, "grad_norm": 0.5008403062820435, "learning_rate": 1.9242170010147497e-09, "loss": 0.4724, "step": 11196 }, { "epoch": 1.9974132548389973, "grad_norm": 0.5869413614273071, "learning_rate": 1.6591465848703192e-09, "loss": 0.5462, "step": 11197 }, { "epoch": 1.9975916510569975, "grad_norm": 0.48155996203422546, "learning_rate": 1.4137109308476338e-09, "loss": 0.3609, "step": 11198 }, { "epoch": 1.9977700472749977, "grad_norm": 0.5589842796325684, "learning_rate": 1.187910058209063e-09, "loss": 0.5697, "step": 11199 }, { "epoch": 1.997948443492998, "grad_norm": 0.5098150968551636, "learning_rate": 9.817439847181753e-10, "loss": 0.3743, "step": 11200 }, { "epoch": 1.9981268397109981, "grad_norm": 0.47274699807167053, "learning_rate": 7.952127265009601e-10, "loss": 0.2905, "step": 11201 }, { "epoch": 1.9983052359289983, "grad_norm": 0.48699966073036194, "learning_rate": 6.283162982678725e-10, "loss": 0.5163, "step": 11202 }, { "epoch": 1.9984836321469985, "grad_norm": 0.5384899973869324, "learning_rate": 4.810547130917886e-10, "loss": 0.5131, "step": 11203 }, { "epoch": 1.9986620283649987, "grad_norm": 0.5598852038383484, "learning_rate": 3.534279825467834e-10, "loss": 0.6017, "step": 11204 }, { "epoch": 1.998840424582999, "grad_norm": 0.513744592666626, "learning_rate": 2.454361166526198e-10, "loss": 0.4185, "step": 11205 }, { "epoch": 1.9990188208009991, "grad_norm": 0.5312053561210632, "learning_rate": 1.570791239025038e-10, "loss": 0.4817, "step": 11206 }, { "epoch": 1.9991972170189993, "grad_norm": 0.48965707421302795, "learning_rate": 8.835701123532935e-11, "loss": 0.4257, "step": 11207 }, { "epoch": 1.9993756132369995, "grad_norm": 0.6015014052391052, "learning_rate": 3.9269784063433736e-11, "loss": 0.7296, "step": 11208 }, { "epoch": 1.9995540094549995, "grad_norm": 0.4663368761539459, "learning_rate": 9.817446217086356e-12, "loss": 0.3784, "step": 11209 }, { "epoch": 1.9997324056729997, "grad_norm": 0.471098393201828, "learning_rate": 0.0, "loss": 0.4405, "step": 11210 } ], "logging_steps": 1, "max_steps": 11210, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.680882782008115e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }