diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,25340 +3,6342 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 18090, + "global_step": 4523, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0002763957987838585, - "grad_norm": 15.539673805236816, + "epoch": 0.001105460977227504, + "grad_norm": 2.395341396331787, "learning_rate": 5.000000000000001e-07, - "loss": 11.227, + "loss": 4.6826, "step": 5 }, { - "epoch": 0.000552791597567717, - "grad_norm": 16.613014221191406, + "epoch": 0.002210921954455008, + "grad_norm": 2.2102696895599365, "learning_rate": 1.0000000000000002e-06, - "loss": 11.1624, + "loss": 4.4984, "step": 10 }, { - "epoch": 0.0008291873963515755, - "grad_norm": 16.244783401489258, + "epoch": 0.0033163829316825116, + "grad_norm": 2.5083913803100586, "learning_rate": 1.5e-06, - "loss": 11.0487, + "loss": 4.5731, "step": 15 }, { - "epoch": 0.001105583195135434, - "grad_norm": 12.021380424499512, + "epoch": 0.004421843908910016, + "grad_norm": 2.1317508220672607, "learning_rate": 2.0000000000000003e-06, - "loss": 10.8436, + "loss": 4.5149, "step": 20 }, { - "epoch": 0.0013819789939192924, - "grad_norm": 11.764349937438965, + "epoch": 0.0055273048861375195, + "grad_norm": 2.2241172790527344, "learning_rate": 2.5e-06, - "loss": 10.637, + "loss": 4.4769, "step": 25 }, { - "epoch": 0.001658374792703151, - "grad_norm": 8.930374145507812, + "epoch": 0.006632765863365023, + "grad_norm": 2.1349635124206543, "learning_rate": 3e-06, - "loss": 10.4451, + "loss": 4.5924, "step": 30 }, { - "epoch": 0.0019347705914870095, - "grad_norm": 6.179296016693115, + "epoch": 0.007738226840592527, + "grad_norm": 2.366008758544922, "learning_rate": 3.5000000000000004e-06, - "loss": 10.2835, + "loss": 4.5941, "step": 35 }, { - "epoch": 0.002211166390270868, - "grad_norm": 5.235034942626953, + "epoch": 0.008843687817820032, + "grad_norm": 2.4122307300567627, "learning_rate": 4.000000000000001e-06, - "loss": 10.1911, + "loss": 4.4631, "step": 40 }, { - "epoch": 0.0024875621890547263, - "grad_norm": 4.519562244415283, + "epoch": 0.009949148795047534, + "grad_norm": 2.023873805999756, "learning_rate": 4.5e-06, - "loss": 10.1875, + "loss": 4.5361, "step": 45 }, { - "epoch": 0.002763957987838585, - "grad_norm": 4.634215354919434, + "epoch": 0.011054609772275039, + "grad_norm": 2.2571287155151367, "learning_rate": 5e-06, - "loss": 9.9655, + "loss": 4.5539, "step": 50 }, { - "epoch": 0.0030403537866224434, - "grad_norm": 4.817412853240967, + "epoch": 0.012160070749502542, + "grad_norm": 2.1223011016845703, "learning_rate": 5.500000000000001e-06, - "loss": 9.9965, + "loss": 4.4691, "step": 55 }, { - "epoch": 0.003316749585406302, - "grad_norm": 4.490515232086182, + "epoch": 0.013265531726730046, + "grad_norm": 2.125227451324463, "learning_rate": 6e-06, - "loss": 9.9253, + "loss": 4.4713, "step": 60 }, { - "epoch": 0.0035931453841901604, - "grad_norm": 3.931199789047241, + "epoch": 0.014370992703957551, + "grad_norm": 2.272958278656006, "learning_rate": 6.5000000000000004e-06, - "loss": 9.9455, + "loss": 4.5511, "step": 65 }, { - "epoch": 0.003869541182974019, - "grad_norm": 4.179060935974121, + "epoch": 0.015476453681185054, + "grad_norm": 2.1949267387390137, "learning_rate": 7.000000000000001e-06, - "loss": 9.7904, + "loss": 4.4926, "step": 70 }, { - "epoch": 0.0041459369817578775, - "grad_norm": 4.031132698059082, + "epoch": 0.016581914658412557, + "grad_norm": 2.155870199203491, "learning_rate": 7.5e-06, - "loss": 9.7358, + "loss": 4.6169, "step": 75 }, { - "epoch": 0.004422332780541736, - "grad_norm": 3.7969465255737305, + "epoch": 0.017687375635640063, + "grad_norm": 2.253253698348999, "learning_rate": 8.000000000000001e-06, - "loss": 9.7051, + "loss": 4.5112, "step": 80 }, { - "epoch": 0.0046987285793255945, - "grad_norm": 4.057478904724121, + "epoch": 0.018792836612867566, + "grad_norm": 2.2717690467834473, "learning_rate": 8.500000000000002e-06, - "loss": 9.6854, + "loss": 4.5192, "step": 85 }, { - "epoch": 0.004975124378109453, - "grad_norm": 4.059418201446533, + "epoch": 0.01989829759009507, + "grad_norm": 2.4532206058502197, "learning_rate": 9e-06, - "loss": 9.6271, + "loss": 4.5964, "step": 90 }, { - "epoch": 0.005251520176893312, - "grad_norm": 4.01811408996582, + "epoch": 0.021003758567322575, + "grad_norm": 2.420793056488037, "learning_rate": 9.5e-06, - "loss": 9.614, + "loss": 4.5466, "step": 95 }, { - "epoch": 0.00552791597567717, - "grad_norm": 4.239936828613281, + "epoch": 0.022109219544550078, + "grad_norm": 2.122037649154663, "learning_rate": 1e-05, - "loss": 9.423, + "loss": 4.3679, "step": 100 }, { - "epoch": 0.005804311774461028, - "grad_norm": 3.8491415977478027, + "epoch": 0.02321468052177758, + "grad_norm": 2.5729713439941406, "learning_rate": 1.05e-05, - "loss": 9.5076, + "loss": 4.4623, "step": 105 }, { - "epoch": 0.006080707573244887, - "grad_norm": 3.6000020503997803, + "epoch": 0.024320141499005084, + "grad_norm": 2.364610195159912, "learning_rate": 1.1000000000000001e-05, - "loss": 9.5322, + "loss": 4.3146, "step": 110 }, { - "epoch": 0.006357103372028745, - "grad_norm": 5.558045387268066, + "epoch": 0.02542560247623259, + "grad_norm": 2.4596588611602783, "learning_rate": 1.1500000000000002e-05, - "loss": 9.2759, + "loss": 4.4393, "step": 115 }, { - "epoch": 0.006633499170812604, - "grad_norm": 4.2432637214660645, + "epoch": 0.026531063453460093, + "grad_norm": 2.401916265487671, "learning_rate": 1.2e-05, - "loss": 9.3174, + "loss": 4.3398, "step": 120 }, { - "epoch": 0.006909894969596462, - "grad_norm": 3.7061004638671875, + "epoch": 0.027636524430687596, + "grad_norm": 2.1921896934509277, "learning_rate": 1.25e-05, - "loss": 9.3099, + "loss": 4.3521, "step": 125 }, { - "epoch": 0.007186290768380321, - "grad_norm": 3.863173007965088, + "epoch": 0.028741985407915102, + "grad_norm": 2.193477153778076, "learning_rate": 1.3000000000000001e-05, - "loss": 9.0892, + "loss": 4.5969, "step": 130 }, { - "epoch": 0.007462686567164179, - "grad_norm": 4.161376953125, + "epoch": 0.029847446385142605, + "grad_norm": 2.442413568496704, "learning_rate": 1.3500000000000001e-05, - "loss": 9.0976, + "loss": 4.5048, "step": 135 }, { - "epoch": 0.007739082365948038, - "grad_norm": 3.59145188331604, + "epoch": 0.030952907362370108, + "grad_norm": 2.314326047897339, "learning_rate": 1.4000000000000001e-05, - "loss": 9.2131, + "loss": 4.5778, "step": 140 }, { - "epoch": 0.008015478164731896, - "grad_norm": 3.4447615146636963, + "epoch": 0.03205836833959761, + "grad_norm": 2.1947810649871826, "learning_rate": 1.45e-05, - "loss": 8.9092, + "loss": 4.648, "step": 145 }, { - "epoch": 0.008291873963515755, - "grad_norm": 3.227489471435547, + "epoch": 0.033163829316825114, + "grad_norm": 2.268144369125366, "learning_rate": 1.5e-05, - "loss": 8.927, + "loss": 4.5153, "step": 150 }, { - "epoch": 0.008568269762299612, - "grad_norm": 3.0332329273223877, + "epoch": 0.03426929029405262, + "grad_norm": 2.311060905456543, "learning_rate": 1.55e-05, - "loss": 8.8161, + "loss": 4.6327, "step": 155 }, { - "epoch": 0.008844665561083471, - "grad_norm": 4.128123760223389, + "epoch": 0.035374751271280126, + "grad_norm": 2.2926158905029297, "learning_rate": 1.6000000000000003e-05, - "loss": 8.8824, + "loss": 4.3874, "step": 160 }, { - "epoch": 0.00912106135986733, - "grad_norm": 3.798156499862671, + "epoch": 0.03648021224850763, + "grad_norm": 2.1994051933288574, "learning_rate": 1.65e-05, - "loss": 8.7461, + "loss": 4.4664, "step": 165 }, { - "epoch": 0.009397457158651189, - "grad_norm": 4.008655548095703, + "epoch": 0.03758567322573513, + "grad_norm": 2.3250937461853027, "learning_rate": 1.7000000000000003e-05, - "loss": 8.5756, + "loss": 4.4617, "step": 170 }, { - "epoch": 0.009673852957435046, - "grad_norm": 3.501603603363037, + "epoch": 0.038691134202962635, + "grad_norm": 2.3668570518493652, "learning_rate": 1.75e-05, - "loss": 8.7104, + "loss": 4.2885, "step": 175 }, { - "epoch": 0.009950248756218905, - "grad_norm": 3.0541610717773438, + "epoch": 0.03979659518019014, + "grad_norm": 2.1874375343322754, "learning_rate": 1.8e-05, - "loss": 8.6184, + "loss": 4.4582, "step": 180 }, { - "epoch": 0.010226644555002764, - "grad_norm": 3.2599024772644043, + "epoch": 0.04090205615741764, + "grad_norm": 2.365499973297119, "learning_rate": 1.85e-05, - "loss": 8.571, + "loss": 4.4125, "step": 185 }, { - "epoch": 0.010503040353786623, - "grad_norm": 2.8244316577911377, + "epoch": 0.04200751713464515, + "grad_norm": 2.272512435913086, "learning_rate": 1.9e-05, - "loss": 8.5187, + "loss": 4.3934, "step": 190 }, { - "epoch": 0.01077943615257048, - "grad_norm": 3.0465221405029297, + "epoch": 0.04311297811187265, + "grad_norm": 2.343959093093872, "learning_rate": 1.9500000000000003e-05, - "loss": 8.5168, + "loss": 4.5141, "step": 195 }, { - "epoch": 0.01105583195135434, - "grad_norm": 3.2354133129119873, + "epoch": 0.044218439089100156, + "grad_norm": 2.5027010440826416, "learning_rate": 2e-05, - "loss": 8.2329, + "loss": 4.3837, "step": 200 }, { - "epoch": 0.011332227750138198, - "grad_norm": 2.9300291538238525, + "epoch": 0.04532390006632766, + "grad_norm": 2.2487406730651855, "learning_rate": 2.05e-05, - "loss": 8.3095, + "loss": 4.5956, "step": 205 }, { - "epoch": 0.011608623548922056, - "grad_norm": 3.2358179092407227, + "epoch": 0.04642936104355516, + "grad_norm": 2.242449998855591, "learning_rate": 2.1e-05, - "loss": 8.3495, + "loss": 4.6371, "step": 210 }, { - "epoch": 0.011885019347705915, - "grad_norm": 3.006814479827881, + "epoch": 0.047534822020782665, + "grad_norm": 2.183947801589966, "learning_rate": 2.15e-05, - "loss": 8.068, + "loss": 4.314, "step": 215 }, { - "epoch": 0.012161415146489774, - "grad_norm": 3.092186689376831, + "epoch": 0.04864028299801017, + "grad_norm": 2.2349512577056885, "learning_rate": 2.2000000000000003e-05, - "loss": 8.0844, + "loss": 4.4111, "step": 220 }, { - "epoch": 0.012437810945273632, - "grad_norm": 2.7867186069488525, + "epoch": 0.04974574397523768, + "grad_norm": 2.2303688526153564, "learning_rate": 2.25e-05, - "loss": 7.7844, + "loss": 4.3312, "step": 225 }, { - "epoch": 0.01271420674405749, - "grad_norm": 3.151169538497925, + "epoch": 0.05085120495246518, + "grad_norm": 2.2685301303863525, "learning_rate": 2.3000000000000003e-05, - "loss": 8.0737, + "loss": 4.4618, "step": 230 }, { - "epoch": 0.012990602542841349, - "grad_norm": 2.7488925457000732, + "epoch": 0.05195666592969268, + "grad_norm": 2.287493944168091, "learning_rate": 2.35e-05, - "loss": 8.0835, + "loss": 4.4438, "step": 235 }, { - "epoch": 0.013266998341625208, - "grad_norm": 2.673896312713623, + "epoch": 0.053062126906920186, + "grad_norm": 2.350281238555908, "learning_rate": 2.4e-05, - "loss": 7.9807, + "loss": 4.3578, "step": 240 }, { - "epoch": 0.013543394140409067, - "grad_norm": 3.0482277870178223, + "epoch": 0.05416758788414769, + "grad_norm": 2.4053986072540283, "learning_rate": 2.45e-05, - "loss": 7.7995, + "loss": 4.4378, "step": 245 }, { - "epoch": 0.013819789939192924, - "grad_norm": 2.6868321895599365, + "epoch": 0.05527304886137519, + "grad_norm": 2.5036556720733643, "learning_rate": 2.5e-05, - "loss": 7.9454, + "loss": 4.4291, "step": 250 }, { - "epoch": 0.014096185737976783, - "grad_norm": 2.6580042839050293, + "epoch": 0.056378509838602694, + "grad_norm": 2.125025987625122, "learning_rate": 2.5500000000000003e-05, - "loss": 7.7582, + "loss": 4.3374, "step": 255 }, { - "epoch": 0.014372581536760642, - "grad_norm": 2.2857472896575928, + "epoch": 0.057483970815830204, + "grad_norm": 2.461651563644409, "learning_rate": 2.6000000000000002e-05, - "loss": 7.784, + "loss": 4.5828, "step": 260 }, { - "epoch": 0.014648977335544499, - "grad_norm": 2.3364319801330566, + "epoch": 0.05858943179305771, + "grad_norm": 2.3358347415924072, "learning_rate": 2.6500000000000004e-05, - "loss": 7.8675, + "loss": 4.4477, "step": 265 }, { - "epoch": 0.014925373134328358, - "grad_norm": 2.896152973175049, + "epoch": 0.05969489277028521, + "grad_norm": 2.2937681674957275, "learning_rate": 2.7000000000000002e-05, - "loss": 7.6433, + "loss": 4.4706, "step": 270 }, { - "epoch": 0.015201768933112217, - "grad_norm": 3.6049489974975586, + "epoch": 0.06080035374751271, + "grad_norm": 2.173781633377075, "learning_rate": 2.7500000000000004e-05, - "loss": 7.4608, + "loss": 4.3496, "step": 275 }, { - "epoch": 0.015478164731896076, - "grad_norm": 2.9428746700286865, + "epoch": 0.061905814724740216, + "grad_norm": 2.373222827911377, "learning_rate": 2.8000000000000003e-05, - "loss": 7.4888, + "loss": 4.5112, "step": 280 }, { - "epoch": 0.015754560530679935, - "grad_norm": 3.2422735691070557, + "epoch": 0.06301127570196773, + "grad_norm": 2.2600908279418945, "learning_rate": 2.8499999999999998e-05, - "loss": 7.4838, + "loss": 4.4922, "step": 285 }, { - "epoch": 0.016030956329463792, - "grad_norm": 2.6515588760375977, + "epoch": 0.06411673667919522, + "grad_norm": 2.241600275039673, "learning_rate": 2.9e-05, - "loss": 7.4845, + "loss": 4.4719, "step": 290 }, { - "epoch": 0.01630735212824765, - "grad_norm": 2.2527542114257812, + "epoch": 0.06522219765642273, + "grad_norm": 2.2558817863464355, "learning_rate": 2.95e-05, - "loss": 7.679, + "loss": 4.5555, "step": 295 }, { - "epoch": 0.01658374792703151, - "grad_norm": 2.841431140899658, + "epoch": 0.06632765863365023, + "grad_norm": 2.5431759357452393, "learning_rate": 3e-05, - "loss": 7.5644, + "loss": 4.3901, "step": 300 }, { - "epoch": 0.016860143725815367, - "grad_norm": 3.071385383605957, + "epoch": 0.06743311961087774, + "grad_norm": 2.1968157291412354, "learning_rate": 3.05e-05, - "loss": 7.3476, + "loss": 4.3923, "step": 305 }, { - "epoch": 0.017136539524599224, - "grad_norm": 2.392310380935669, + "epoch": 0.06853858058810525, + "grad_norm": 2.1212503910064697, "learning_rate": 3.1e-05, - "loss": 7.4821, + "loss": 4.3401, "step": 310 }, { - "epoch": 0.017412935323383085, - "grad_norm": 2.3450920581817627, + "epoch": 0.06964404156533274, + "grad_norm": 2.430278778076172, "learning_rate": 3.15e-05, - "loss": 7.6444, + "loss": 4.609, "step": 315 }, { - "epoch": 0.017689331122166942, - "grad_norm": 2.5122437477111816, + "epoch": 0.07074950254256025, + "grad_norm": 2.741177797317505, "learning_rate": 3.2000000000000005e-05, - "loss": 7.3695, + "loss": 4.6077, "step": 320 }, { - "epoch": 0.017965726920950803, - "grad_norm": 3.0981411933898926, + "epoch": 0.07185496351978775, + "grad_norm": 2.3611228466033936, "learning_rate": 3.2500000000000004e-05, - "loss": 7.2349, + "loss": 4.3933, "step": 325 }, { - "epoch": 0.01824212271973466, - "grad_norm": 2.4662463665008545, + "epoch": 0.07296042449701526, + "grad_norm": 2.265152931213379, "learning_rate": 3.3e-05, - "loss": 7.4194, + "loss": 4.23, "step": 330 }, { - "epoch": 0.018518518518518517, - "grad_norm": 2.5492987632751465, + "epoch": 0.07406588547424275, + "grad_norm": 2.3802292346954346, "learning_rate": 3.35e-05, - "loss": 7.2674, + "loss": 4.3844, "step": 335 }, { - "epoch": 0.018794914317302378, - "grad_norm": 2.4681410789489746, + "epoch": 0.07517134645147026, + "grad_norm": 2.5243539810180664, "learning_rate": 3.4000000000000007e-05, - "loss": 7.1909, + "loss": 4.4695, "step": 340 }, { - "epoch": 0.019071310116086235, - "grad_norm": 2.5183534622192383, + "epoch": 0.07627680742869777, + "grad_norm": 2.522508144378662, "learning_rate": 3.45e-05, - "loss": 7.3093, + "loss": 4.3575, "step": 345 }, { - "epoch": 0.019347705914870093, - "grad_norm": 2.3964405059814453, + "epoch": 0.07738226840592527, + "grad_norm": 2.1260106563568115, "learning_rate": 3.5e-05, - "loss": 7.3742, + "loss": 4.4234, "step": 350 }, { - "epoch": 0.019624101713653953, - "grad_norm": 2.961442470550537, + "epoch": 0.07848772938315278, + "grad_norm": 2.445948839187622, "learning_rate": 3.55e-05, - "loss": 7.4874, + "loss": 4.3261, "step": 355 }, { - "epoch": 0.01990049751243781, - "grad_norm": 2.420823812484741, + "epoch": 0.07959319036038028, + "grad_norm": 2.191976308822632, "learning_rate": 3.6e-05, - "loss": 7.3551, + "loss": 4.3506, "step": 360 }, { - "epoch": 0.020176893311221668, - "grad_norm": 2.382369041442871, + "epoch": 0.08069865133760779, + "grad_norm": 2.582002878189087, "learning_rate": 3.65e-05, - "loss": 7.2977, + "loss": 4.379, "step": 365 }, { - "epoch": 0.02045328911000553, - "grad_norm": 2.913240671157837, + "epoch": 0.08180411231483528, + "grad_norm": 2.395965099334717, "learning_rate": 3.7e-05, - "loss": 7.3576, + "loss": 4.4162, "step": 370 }, { - "epoch": 0.020729684908789386, - "grad_norm": 2.3969368934631348, + "epoch": 0.08290957329206279, + "grad_norm": 2.313727617263794, "learning_rate": 3.7500000000000003e-05, - "loss": 7.096, + "loss": 4.4531, "step": 375 }, { - "epoch": 0.021006080707573246, - "grad_norm": 3.0736069679260254, + "epoch": 0.0840150342692903, + "grad_norm": 2.2551207542419434, "learning_rate": 3.8e-05, - "loss": 7.1848, + "loss": 4.3614, "step": 380 }, { - "epoch": 0.021282476506357104, - "grad_norm": 2.2005414962768555, + "epoch": 0.0851204952465178, + "grad_norm": 2.3821234703063965, "learning_rate": 3.85e-05, - "loss": 7.082, + "loss": 4.2572, "step": 385 }, { - "epoch": 0.02155887230514096, - "grad_norm": 3.3333756923675537, + "epoch": 0.0862259562237453, + "grad_norm": 2.212198495864868, "learning_rate": 3.9000000000000006e-05, - "loss": 7.1535, + "loss": 4.6126, "step": 390 }, { - "epoch": 0.02183526810392482, - "grad_norm": 3.055220127105713, + "epoch": 0.0873314172009728, + "grad_norm": 2.08597993850708, "learning_rate": 3.9500000000000005e-05, - "loss": 6.8787, + "loss": 4.4071, "step": 395 }, { - "epoch": 0.02211166390270868, - "grad_norm": 2.4127659797668457, + "epoch": 0.08843687817820031, + "grad_norm": 2.2900874614715576, "learning_rate": 4e-05, - "loss": 6.9886, + "loss": 4.4119, "step": 400 }, { - "epoch": 0.022388059701492536, - "grad_norm": 2.8047995567321777, + "epoch": 0.08954233915542781, + "grad_norm": 2.6229662895202637, "learning_rate": 4.05e-05, - "loss": 7.0981, + "loss": 4.5127, "step": 405 }, { - "epoch": 0.022664455500276397, - "grad_norm": 2.9609084129333496, + "epoch": 0.09064780013265532, + "grad_norm": 2.313673496246338, "learning_rate": 4.1e-05, - "loss": 6.9062, + "loss": 4.5682, "step": 410 }, { - "epoch": 0.022940851299060254, - "grad_norm": 3.0030362606048584, + "epoch": 0.09175326110988283, + "grad_norm": 2.5078179836273193, "learning_rate": 4.15e-05, - "loss": 7.1212, + "loss": 4.3089, "step": 415 }, { - "epoch": 0.02321724709784411, - "grad_norm": 3.864861011505127, + "epoch": 0.09285872208711032, + "grad_norm": 2.276742696762085, "learning_rate": 4.2e-05, - "loss": 7.2378, + "loss": 4.4486, "step": 420 }, { - "epoch": 0.023493642896627972, - "grad_norm": 2.880798816680908, + "epoch": 0.09396418306433783, + "grad_norm": 2.0925698280334473, "learning_rate": 4.25e-05, - "loss": 7.073, + "loss": 4.2959, "step": 425 }, { - "epoch": 0.02377003869541183, - "grad_norm": 2.903695821762085, + "epoch": 0.09506964404156533, + "grad_norm": 2.5252251625061035, "learning_rate": 4.3e-05, - "loss": 7.0507, + "loss": 4.4345, "step": 430 }, { - "epoch": 0.02404643449419569, - "grad_norm": 3.056842088699341, + "epoch": 0.09617510501879284, + "grad_norm": 2.374155282974243, "learning_rate": 4.35e-05, - "loss": 6.843, + "loss": 4.3959, "step": 435 }, { - "epoch": 0.024322830292979547, - "grad_norm": 2.9644432067871094, + "epoch": 0.09728056599602034, + "grad_norm": 2.4412851333618164, "learning_rate": 4.4000000000000006e-05, - "loss": 7.0322, + "loss": 4.3762, "step": 440 }, { - "epoch": 0.024599226091763404, - "grad_norm": 2.783032178878784, + "epoch": 0.09838602697324784, + "grad_norm": 2.302851676940918, "learning_rate": 4.4500000000000004e-05, - "loss": 7.0572, + "loss": 4.5145, "step": 445 }, { - "epoch": 0.024875621890547265, - "grad_norm": 2.3176393508911133, + "epoch": 0.09949148795047535, + "grad_norm": 2.3877639770507812, "learning_rate": 4.5e-05, - "loss": 6.8992, + "loss": 4.3736, "step": 450 }, { - "epoch": 0.025152017689331122, - "grad_norm": 2.587258815765381, + "epoch": 0.10059694892770285, + "grad_norm": 2.413830280303955, "learning_rate": 4.55e-05, - "loss": 6.9982, + "loss": 4.377, "step": 455 }, { - "epoch": 0.02542841348811498, - "grad_norm": 3.1604034900665283, + "epoch": 0.10170240990493036, + "grad_norm": 2.5087687969207764, "learning_rate": 4.600000000000001e-05, - "loss": 6.8609, + "loss": 4.4129, "step": 460 }, { - "epoch": 0.02570480928689884, - "grad_norm": 3.7053439617156982, + "epoch": 0.10280787088215786, + "grad_norm": 2.449108600616455, "learning_rate": 4.6500000000000005e-05, - "loss": 7.4181, + "loss": 4.4224, "step": 465 }, { - "epoch": 0.025981205085682697, - "grad_norm": 2.896162986755371, + "epoch": 0.10391333185938537, + "grad_norm": 2.255720376968384, "learning_rate": 4.7e-05, - "loss": 6.7783, + "loss": 4.3521, "step": 470 }, { - "epoch": 0.026257600884466555, - "grad_norm": 2.4549975395202637, + "epoch": 0.10501879283661286, + "grad_norm": 2.461012601852417, "learning_rate": 4.75e-05, - "loss": 6.8819, + "loss": 4.3307, "step": 475 }, { - "epoch": 0.026533996683250415, - "grad_norm": 2.6171348094940186, + "epoch": 0.10612425381384037, + "grad_norm": 2.6323764324188232, "learning_rate": 4.8e-05, - "loss": 7.0355, + "loss": 4.4938, "step": 480 }, { - "epoch": 0.026810392482034272, - "grad_norm": 2.6561520099639893, + "epoch": 0.10722971479106788, + "grad_norm": 2.4425625801086426, "learning_rate": 4.85e-05, - "loss": 6.8347, + "loss": 4.5443, "step": 485 }, { - "epoch": 0.027086788280818133, - "grad_norm": 2.9669582843780518, + "epoch": 0.10833517576829538, + "grad_norm": 2.523211717605591, "learning_rate": 4.9e-05, - "loss": 6.7402, + "loss": 4.3785, "step": 490 }, { - "epoch": 0.02736318407960199, - "grad_norm": 2.837339162826538, + "epoch": 0.10944063674552289, + "grad_norm": 2.4563889503479004, "learning_rate": 4.9500000000000004e-05, - "loss": 6.9979, + "loss": 4.4033, "step": 495 }, { - "epoch": 0.027639579878385848, - "grad_norm": 2.7479424476623535, + "epoch": 0.11054609772275038, + "grad_norm": 2.4348998069763184, "learning_rate": 5e-05, - "loss": 6.8928, + "loss": 4.3472, "step": 500 }, { - "epoch": 0.02791597567716971, - "grad_norm": 3.010474920272827, - "learning_rate": 4.9985787379192726e-05, - "loss": 7.0164, + "epoch": 0.1116515586999779, + "grad_norm": 2.430751323699951, + "learning_rate": 4.993785732040766e-05, + "loss": 4.5632, "step": 505 }, { - "epoch": 0.028192371475953566, - "grad_norm": 2.812869071960449, - "learning_rate": 4.997157475838545e-05, - "loss": 6.9878, + "epoch": 0.11275701967720539, + "grad_norm": 2.5139589309692383, + "learning_rate": 4.9875714640815315e-05, + "loss": 4.3744, "step": 510 }, { - "epoch": 0.028468767274737423, - "grad_norm": 2.1367461681365967, - "learning_rate": 4.9957362137578174e-05, - "loss": 7.0706, + "epoch": 0.1138624806544329, + "grad_norm": 2.3713083267211914, + "learning_rate": 4.981357196122297e-05, + "loss": 4.4622, "step": 515 }, { - "epoch": 0.028745163073521283, - "grad_norm": 2.864809989929199, - "learning_rate": 4.994314951677089e-05, - "loss": 6.9731, + "epoch": 0.11496794163166041, + "grad_norm": 2.348144769668579, + "learning_rate": 4.975142928163063e-05, + "loss": 4.4778, "step": 520 }, { - "epoch": 0.02902155887230514, - "grad_norm": 2.8308370113372803, - "learning_rate": 4.9928936895963616e-05, - "loss": 6.8747, + "epoch": 0.1160734026088879, + "grad_norm": 2.4068586826324463, + "learning_rate": 4.968928660203828e-05, + "loss": 4.4139, "step": 525 }, { - "epoch": 0.029297954671088998, - "grad_norm": 3.112752676010132, - "learning_rate": 4.9914724275156346e-05, - "loss": 6.858, + "epoch": 0.11717886358611541, + "grad_norm": 2.3237857818603516, + "learning_rate": 4.962714392244594e-05, + "loss": 4.3972, "step": 530 }, { - "epoch": 0.02957435046987286, - "grad_norm": 2.711458683013916, - "learning_rate": 4.9900511654349063e-05, - "loss": 7.0695, + "epoch": 0.11828432456334291, + "grad_norm": 2.39794659614563, + "learning_rate": 4.9565001242853596e-05, + "loss": 4.2682, "step": 535 }, { - "epoch": 0.029850746268656716, - "grad_norm": 3.2045886516571045, - "learning_rate": 4.988629903354179e-05, - "loss": 7.0511, + "epoch": 0.11938978554057042, + "grad_norm": 2.433943748474121, + "learning_rate": 4.950285856326125e-05, + "loss": 4.6123, "step": 540 }, { - "epoch": 0.030127142067440577, - "grad_norm": 2.655226707458496, - "learning_rate": 4.987208641273451e-05, - "loss": 7.1596, + "epoch": 0.12049524651779792, + "grad_norm": 2.3196094036102295, + "learning_rate": 4.944071588366891e-05, + "loss": 4.4894, "step": 545 }, { - "epoch": 0.030403537866224434, - "grad_norm": 3.0165657997131348, - "learning_rate": 4.9857873791927235e-05, - "loss": 7.2406, + "epoch": 0.12160070749502543, + "grad_norm": 2.388373851776123, + "learning_rate": 4.9378573204076564e-05, + "loss": 4.4735, "step": 550 }, { - "epoch": 0.03067993366500829, - "grad_norm": 3.2889368534088135, - "learning_rate": 4.984366117111995e-05, - "loss": 7.1024, + "epoch": 0.12270616847225294, + "grad_norm": 2.471214532852173, + "learning_rate": 4.931643052448422e-05, + "loss": 4.3991, "step": 555 }, { - "epoch": 0.03095632946379215, - "grad_norm": 3.375981569290161, - "learning_rate": 4.9829448550312677e-05, - "loss": 6.8454, + "epoch": 0.12381162944948043, + "grad_norm": 2.5611140727996826, + "learning_rate": 4.925428784489187e-05, + "loss": 4.3706, "step": 560 }, { - "epoch": 0.03123272526257601, - "grad_norm": 3.135676383972168, - "learning_rate": 4.981523592950541e-05, - "loss": 7.3552, + "epoch": 0.12491709042670794, + "grad_norm": 2.365116834640503, + "learning_rate": 4.919214516529953e-05, + "loss": 4.3329, "step": 565 }, { - "epoch": 0.03150912106135987, - "grad_norm": 3.073650598526001, - "learning_rate": 4.9801023308698125e-05, - "loss": 7.1633, + "epoch": 0.12602255140393545, + "grad_norm": 2.5337095260620117, + "learning_rate": 4.913000248570719e-05, + "loss": 4.3502, "step": 570 }, { - "epoch": 0.03178551686014373, - "grad_norm": 2.4921205043792725, - "learning_rate": 4.978681068789085e-05, - "loss": 7.0104, + "epoch": 0.12712801238116295, + "grad_norm": 2.3166821002960205, + "learning_rate": 4.906785980611484e-05, + "loss": 4.4643, "step": 575 }, { - "epoch": 0.032061912658927584, - "grad_norm": 2.602283477783203, - "learning_rate": 4.977259806708357e-05, - "loss": 6.7988, + "epoch": 0.12823347335839044, + "grad_norm": 2.7000489234924316, + "learning_rate": 4.90057171265225e-05, + "loss": 4.2057, "step": 580 }, { - "epoch": 0.03233830845771144, - "grad_norm": 2.6181817054748535, - "learning_rate": 4.9758385446276296e-05, - "loss": 6.9489, + "epoch": 0.12933893433561794, + "grad_norm": 2.541940450668335, + "learning_rate": 4.894357444693015e-05, + "loss": 4.3045, "step": 585 }, { - "epoch": 0.0326147042564953, - "grad_norm": 2.7632694244384766, - "learning_rate": 4.974417282546902e-05, - "loss": 6.8527, + "epoch": 0.13044439531284546, + "grad_norm": 2.4047327041625977, + "learning_rate": 4.888143176733781e-05, + "loss": 4.2141, "step": 590 }, { - "epoch": 0.03289110005527916, - "grad_norm": 2.530285120010376, - "learning_rate": 4.972996020466174e-05, - "loss": 6.902, + "epoch": 0.13154985629007296, + "grad_norm": 2.3533935546875, + "learning_rate": 4.881928908774547e-05, + "loss": 4.3564, "step": 595 }, { - "epoch": 0.03316749585406302, - "grad_norm": 2.339693784713745, - "learning_rate": 4.971574758385447e-05, - "loss": 6.7824, + "epoch": 0.13265531726730045, + "grad_norm": 2.469710350036621, + "learning_rate": 4.875714640815312e-05, + "loss": 4.3387, "step": 600 }, { - "epoch": 0.03344389165284688, - "grad_norm": 2.539701223373413, - "learning_rate": 4.9701534963047186e-05, - "loss": 6.7379, + "epoch": 0.13376077824452798, + "grad_norm": 2.4111387729644775, + "learning_rate": 4.8695003728560775e-05, + "loss": 4.3647, "step": 605 }, { - "epoch": 0.033720287451630734, - "grad_norm": 2.586127758026123, - "learning_rate": 4.968732234223991e-05, - "loss": 7.1041, + "epoch": 0.13486623922175547, + "grad_norm": 2.5026888847351074, + "learning_rate": 4.863286104896843e-05, + "loss": 4.4231, "step": 610 }, { - "epoch": 0.03399668325041459, - "grad_norm": 2.9324231147766113, - "learning_rate": 4.9673109721432634e-05, - "loss": 6.778, + "epoch": 0.13597170019898297, + "grad_norm": 2.4435007572174072, + "learning_rate": 4.857071836937609e-05, + "loss": 4.45, "step": 615 }, { - "epoch": 0.03427307904919845, - "grad_norm": 2.5098533630371094, - "learning_rate": 4.965889710062536e-05, - "loss": 6.8565, + "epoch": 0.1370771611762105, + "grad_norm": 2.518418550491333, + "learning_rate": 4.850857568978375e-05, + "loss": 4.4376, "step": 620 }, { - "epoch": 0.03454947484798231, - "grad_norm": 3.0486905574798584, - "learning_rate": 4.964468447981808e-05, - "loss": 6.6843, + "epoch": 0.138182622153438, + "grad_norm": 2.4196436405181885, + "learning_rate": 4.84464330101914e-05, + "loss": 4.585, "step": 625 }, { - "epoch": 0.03482587064676617, - "grad_norm": 2.632387161254883, - "learning_rate": 4.96304718590108e-05, - "loss": 6.7764, + "epoch": 0.13928808313066549, + "grad_norm": 2.3251471519470215, + "learning_rate": 4.8384290330599056e-05, + "loss": 4.387, "step": 630 }, { - "epoch": 0.03510226644555003, - "grad_norm": 2.5123403072357178, - "learning_rate": 4.961625923820353e-05, - "loss": 6.7861, + "epoch": 0.14039354410789298, + "grad_norm": 2.599461078643799, + "learning_rate": 4.832214765100672e-05, + "loss": 4.4816, "step": 635 }, { - "epoch": 0.035378662244333885, - "grad_norm": 2.678485631942749, - "learning_rate": 4.9602046617396253e-05, - "loss": 6.7484, + "epoch": 0.1414990050851205, + "grad_norm": 2.5266942977905273, + "learning_rate": 4.826000497141437e-05, + "loss": 4.4737, "step": 640 }, { - "epoch": 0.03565505804311774, - "grad_norm": 2.7005465030670166, - "learning_rate": 4.958783399658897e-05, - "loss": 6.7331, + "epoch": 0.142604466062348, + "grad_norm": 2.3561177253723145, + "learning_rate": 4.8197862291822025e-05, + "loss": 4.355, "step": 645 }, { - "epoch": 0.035931453841901606, - "grad_norm": 2.8947432041168213, - "learning_rate": 4.95736213757817e-05, - "loss": 6.7002, + "epoch": 0.1437099270395755, + "grad_norm": 2.291571855545044, + "learning_rate": 4.813571961222968e-05, + "loss": 4.4803, "step": 650 }, { - "epoch": 0.03620784964068546, - "grad_norm": 2.788076162338257, - "learning_rate": 4.955940875497442e-05, - "loss": 6.9371, + "epoch": 0.14481538801680302, + "grad_norm": 2.5574657917022705, + "learning_rate": 4.807357693263734e-05, + "loss": 4.3132, "step": 655 }, { - "epoch": 0.03648424543946932, - "grad_norm": 2.3308234214782715, - "learning_rate": 4.954519613416714e-05, - "loss": 6.7643, + "epoch": 0.14592084899403052, + "grad_norm": 2.816318988800049, + "learning_rate": 4.801143425304499e-05, + "loss": 4.4246, "step": 660 }, { - "epoch": 0.03676064123825318, - "grad_norm": 2.0851082801818848, - "learning_rate": 4.953098351335986e-05, - "loss": 6.5921, + "epoch": 0.147026309971258, + "grad_norm": 2.3737952709198, + "learning_rate": 4.794929157345265e-05, + "loss": 4.5105, "step": 665 }, { - "epoch": 0.037037037037037035, - "grad_norm": 2.5905802249908447, - "learning_rate": 4.951677089255259e-05, - "loss": 6.7768, + "epoch": 0.1481317709484855, + "grad_norm": 2.4100232124328613, + "learning_rate": 4.7887148893860305e-05, + "loss": 4.5111, "step": 670 }, { - "epoch": 0.03731343283582089, - "grad_norm": 2.8827717304229736, - "learning_rate": 4.9502558271745315e-05, - "loss": 6.7569, + "epoch": 0.14923723192571303, + "grad_norm": 2.36722731590271, + "learning_rate": 4.782500621426796e-05, + "loss": 4.3462, "step": 675 }, { - "epoch": 0.037589828634604756, - "grad_norm": 4.079367160797119, - "learning_rate": 4.948834565093803e-05, - "loss": 6.8087, + "epoch": 0.15034269290294053, + "grad_norm": 2.747675657272339, + "learning_rate": 4.776286353467562e-05, + "loss": 4.518, "step": 680 }, { - "epoch": 0.037866224433388614, - "grad_norm": 2.45166277885437, - "learning_rate": 4.947413303013076e-05, - "loss": 6.7949, + "epoch": 0.15144815388016802, + "grad_norm": 2.8760783672332764, + "learning_rate": 4.7700720855083274e-05, + "loss": 4.544, "step": 685 }, { - "epoch": 0.03814262023217247, - "grad_norm": 3.1089086532592773, - "learning_rate": 4.945992040932348e-05, - "loss": 6.5695, + "epoch": 0.15255361485739555, + "grad_norm": 2.1986746788024902, + "learning_rate": 4.763857817549093e-05, + "loss": 4.4212, "step": 690 }, { - "epoch": 0.03841901603095633, - "grad_norm": 2.3953475952148438, - "learning_rate": 4.9445707788516204e-05, - "loss": 6.7555, + "epoch": 0.15365907583462304, + "grad_norm": 2.2483763694763184, + "learning_rate": 4.7576435495898586e-05, + "loss": 4.4373, "step": 695 }, { - "epoch": 0.038695411829740185, - "grad_norm": 3.1515231132507324, - "learning_rate": 4.943149516770893e-05, - "loss": 6.7233, + "epoch": 0.15476453681185054, + "grad_norm": 2.5549709796905518, + "learning_rate": 4.751429281630624e-05, + "loss": 4.4253, "step": 700 }, { - "epoch": 0.03897180762852405, - "grad_norm": 2.5655906200408936, - "learning_rate": 4.941728254690165e-05, - "loss": 6.8136, + "epoch": 0.15586999778907804, + "grad_norm": 2.2713725566864014, + "learning_rate": 4.74521501367139e-05, + "loss": 4.2794, "step": 705 }, { - "epoch": 0.03924820342730791, - "grad_norm": 2.360464096069336, - "learning_rate": 4.9403069926094376e-05, - "loss": 6.4072, + "epoch": 0.15697545876630556, + "grad_norm": 2.340376615524292, + "learning_rate": 4.7390007457121555e-05, + "loss": 4.5125, "step": 710 }, { - "epoch": 0.039524599226091764, - "grad_norm": 2.64111590385437, - "learning_rate": 4.938885730528709e-05, - "loss": 6.9907, + "epoch": 0.15808091974353305, + "grad_norm": 2.421940803527832, + "learning_rate": 4.7327864777529204e-05, + "loss": 4.2371, "step": 715 }, { - "epoch": 0.03980099502487562, - "grad_norm": 2.419311761856079, - "learning_rate": 4.9374644684479824e-05, - "loss": 6.6928, + "epoch": 0.15918638072076055, + "grad_norm": 2.4546539783477783, + "learning_rate": 4.726572209793687e-05, + "loss": 4.4549, "step": 720 }, { - "epoch": 0.04007739082365948, - "grad_norm": 3.4732820987701416, - "learning_rate": 4.936043206367255e-05, - "loss": 6.7389, + "epoch": 0.16029184169798807, + "grad_norm": 2.427361011505127, + "learning_rate": 4.720357941834452e-05, + "loss": 4.4328, "step": 725 }, { - "epoch": 0.040353786622443336, - "grad_norm": 2.9171254634857178, - "learning_rate": 4.9346219442865265e-05, - "loss": 6.7079, + "epoch": 0.16139730267521557, + "grad_norm": 2.4004828929901123, + "learning_rate": 4.714143673875217e-05, + "loss": 4.2623, "step": 730 }, { - "epoch": 0.0406301824212272, - "grad_norm": 3.3903751373291016, - "learning_rate": 4.933200682205799e-05, - "loss": 6.4443, + "epoch": 0.16250276365244307, + "grad_norm": 2.3959038257598877, + "learning_rate": 4.7079294059159836e-05, + "loss": 4.2597, "step": 735 }, { - "epoch": 0.04090657822001106, - "grad_norm": 3.01202130317688, - "learning_rate": 4.931779420125071e-05, - "loss": 7.024, + "epoch": 0.16360822462967056, + "grad_norm": 2.257460594177246, + "learning_rate": 4.7017151379567485e-05, + "loss": 4.382, "step": 740 }, { - "epoch": 0.041182974018794914, - "grad_norm": 2.3956549167633057, - "learning_rate": 4.930358158044344e-05, - "loss": 6.4869, + "epoch": 0.16471368560689809, + "grad_norm": 2.546736478805542, + "learning_rate": 4.695500869997515e-05, + "loss": 4.4304, "step": 745 }, { - "epoch": 0.04145936981757877, - "grad_norm": 2.6889407634735107, - "learning_rate": 4.928936895963616e-05, - "loss": 6.5392, + "epoch": 0.16581914658412558, + "grad_norm": 2.665574789047241, + "learning_rate": 4.6892866020382804e-05, + "loss": 4.443, "step": 750 }, { - "epoch": 0.04173576561636263, - "grad_norm": 2.521833896636963, - "learning_rate": 4.9275156338828885e-05, - "loss": 6.5671, + "epoch": 0.16692460756135308, + "grad_norm": 2.587796926498413, + "learning_rate": 4.6830723340790454e-05, + "loss": 4.3819, "step": 755 }, { - "epoch": 0.04201216141514649, - "grad_norm": 2.741121530532837, - "learning_rate": 4.926094371802161e-05, - "loss": 6.6626, + "epoch": 0.1680300685385806, + "grad_norm": 2.6442179679870605, + "learning_rate": 4.6768580661198117e-05, + "loss": 4.3984, "step": 760 }, { - "epoch": 0.04228855721393035, - "grad_norm": 2.8125641345977783, - "learning_rate": 4.9246731097214326e-05, - "loss": 6.961, + "epoch": 0.1691355295158081, + "grad_norm": 2.596620798110962, + "learning_rate": 4.670643798160577e-05, + "loss": 4.2336, "step": 765 }, { - "epoch": 0.04256495301271421, - "grad_norm": 2.6478195190429688, - "learning_rate": 4.923251847640705e-05, - "loss": 6.6519, + "epoch": 0.1702409904930356, + "grad_norm": 2.4057729244232178, + "learning_rate": 4.664429530201342e-05, + "loss": 4.3909, "step": 770 }, { - "epoch": 0.042841348811498065, - "grad_norm": 2.3772130012512207, - "learning_rate": 4.9218305855599774e-05, - "loss": 6.5116, + "epoch": 0.1713464514702631, + "grad_norm": 2.406342029571533, + "learning_rate": 4.6582152622421085e-05, + "loss": 4.3091, "step": 775 }, { - "epoch": 0.04311774461028192, - "grad_norm": 2.921616554260254, - "learning_rate": 4.92040932347925e-05, - "loss": 6.7481, + "epoch": 0.1724519124474906, + "grad_norm": 2.4423723220825195, + "learning_rate": 4.6520009942828734e-05, + "loss": 4.3409, "step": 780 }, { - "epoch": 0.04339414040906578, - "grad_norm": 2.6683406829833984, - "learning_rate": 4.918988061398522e-05, - "loss": 6.5871, + "epoch": 0.1735573734247181, + "grad_norm": 2.342496633529663, + "learning_rate": 4.645786726323639e-05, + "loss": 4.3805, "step": 785 }, { - "epoch": 0.04367053620784964, - "grad_norm": 2.636958360671997, - "learning_rate": 4.9175667993177946e-05, - "loss": 6.353, + "epoch": 0.1746628344019456, + "grad_norm": 2.482818365097046, + "learning_rate": 4.6395724583644054e-05, + "loss": 4.323, "step": 790 }, { - "epoch": 0.0439469320066335, - "grad_norm": 3.1544365882873535, - "learning_rate": 4.916145537237067e-05, - "loss": 6.7493, + "epoch": 0.17576829537917313, + "grad_norm": 2.6542818546295166, + "learning_rate": 4.63335819040517e-05, + "loss": 4.3603, "step": 795 }, { - "epoch": 0.04422332780541736, - "grad_norm": 3.5060462951660156, - "learning_rate": 4.914724275156339e-05, - "loss": 6.4544, + "epoch": 0.17687375635640062, + "grad_norm": 2.499776840209961, + "learning_rate": 4.627143922445936e-05, + "loss": 4.4008, "step": 800 }, { - "epoch": 0.044499723604201215, - "grad_norm": 2.6805920600891113, - "learning_rate": 4.913303013075611e-05, - "loss": 6.736, + "epoch": 0.17797921733362812, + "grad_norm": 2.341139316558838, + "learning_rate": 4.6209296544867015e-05, + "loss": 4.3715, "step": 805 }, { - "epoch": 0.04477611940298507, - "grad_norm": 2.880324125289917, - "learning_rate": 4.911881750994884e-05, - "loss": 6.8972, + "epoch": 0.17908467831085562, + "grad_norm": 2.29777455329895, + "learning_rate": 4.614715386527467e-05, + "loss": 4.4741, "step": 810 }, { - "epoch": 0.045052515201768936, - "grad_norm": 2.333005428314209, - "learning_rate": 4.910460488914156e-05, - "loss": 6.7566, + "epoch": 0.18019013928808314, + "grad_norm": 2.515763282775879, + "learning_rate": 4.608501118568233e-05, + "loss": 4.3415, "step": 815 }, { - "epoch": 0.04532891100055279, - "grad_norm": 2.3347644805908203, - "learning_rate": 4.909039226833428e-05, - "loss": 6.4314, + "epoch": 0.18129560026531064, + "grad_norm": 2.4565176963806152, + "learning_rate": 4.6022868506089984e-05, + "loss": 4.2374, "step": 820 }, { - "epoch": 0.04560530679933665, - "grad_norm": 3.022331476211548, - "learning_rate": 4.907617964752701e-05, - "loss": 6.773, + "epoch": 0.18240106124253813, + "grad_norm": 2.6354682445526123, + "learning_rate": 4.596072582649764e-05, + "loss": 4.4921, "step": 825 }, { - "epoch": 0.04588170259812051, - "grad_norm": 2.466000556945801, - "learning_rate": 4.906196702671973e-05, - "loss": 6.6999, + "epoch": 0.18350652221976566, + "grad_norm": 2.610104560852051, + "learning_rate": 4.5898583146905296e-05, + "loss": 4.4667, "step": 830 }, { - "epoch": 0.046158098396904365, - "grad_norm": 2.828768730163574, - "learning_rate": 4.9047754405912455e-05, - "loss": 6.92, + "epoch": 0.18461198319699315, + "grad_norm": 2.362448215484619, + "learning_rate": 4.583644046731295e-05, + "loss": 4.3195, "step": 835 }, { - "epoch": 0.04643449419568822, - "grad_norm": 2.3771440982818604, - "learning_rate": 4.903354178510517e-05, - "loss": 6.8686, + "epoch": 0.18571744417422065, + "grad_norm": 2.380387306213379, + "learning_rate": 4.577429778772061e-05, + "loss": 4.4707, "step": 840 }, { - "epoch": 0.046710889994472087, - "grad_norm": 2.7735869884490967, - "learning_rate": 4.90193291642979e-05, - "loss": 6.3304, + "epoch": 0.18682290515144814, + "grad_norm": 2.4917492866516113, + "learning_rate": 4.5712155108128265e-05, + "loss": 4.5139, "step": 845 }, { - "epoch": 0.046987285793255944, - "grad_norm": 2.560903310775757, - "learning_rate": 4.900511654349062e-05, - "loss": 6.5665, + "epoch": 0.18792836612867567, + "grad_norm": 2.3864855766296387, + "learning_rate": 4.565001242853592e-05, + "loss": 4.5081, "step": 850 }, { - "epoch": 0.0472636815920398, - "grad_norm": 2.7346339225769043, - "learning_rate": 4.8990903922683344e-05, - "loss": 6.4082, + "epoch": 0.18903382710590316, + "grad_norm": 2.3583791255950928, + "learning_rate": 4.558786974894358e-05, + "loss": 4.3965, "step": 855 }, { - "epoch": 0.04754007739082366, - "grad_norm": 2.8105099201202393, - "learning_rate": 4.897669130187607e-05, - "loss": 6.55, + "epoch": 0.19013928808313066, + "grad_norm": 2.506446599960327, + "learning_rate": 4.552572706935123e-05, + "loss": 4.4016, "step": 860 }, { - "epoch": 0.047816473189607515, - "grad_norm": 3.484123468399048, - "learning_rate": 4.896247868106879e-05, - "loss": 6.313, + "epoch": 0.19124474906035818, + "grad_norm": 2.2975127696990967, + "learning_rate": 4.546358438975889e-05, + "loss": 4.4601, "step": 865 }, { - "epoch": 0.04809286898839138, - "grad_norm": 2.645644426345825, - "learning_rate": 4.8948266060261516e-05, - "loss": 6.3278, + "epoch": 0.19235021003758568, + "grad_norm": 2.433366537094116, + "learning_rate": 4.5401441710166546e-05, + "loss": 4.2386, "step": 870 }, { - "epoch": 0.04836926478717524, - "grad_norm": 2.5998237133026123, - "learning_rate": 4.893405343945423e-05, - "loss": 6.6541, + "epoch": 0.19345567101481317, + "grad_norm": 2.3259806632995605, + "learning_rate": 4.53392990305742e-05, + "loss": 4.3704, "step": 875 }, { - "epoch": 0.048645660585959094, - "grad_norm": 2.4943838119506836, - "learning_rate": 4.8919840818646964e-05, - "loss": 6.6822, + "epoch": 0.19456113199204067, + "grad_norm": 2.514643907546997, + "learning_rate": 4.527715635098186e-05, + "loss": 4.4008, "step": 880 }, { - "epoch": 0.04892205638474295, - "grad_norm": 3.156522035598755, - "learning_rate": 4.890562819783968e-05, - "loss": 6.5528, + "epoch": 0.1956665929692682, + "grad_norm": 2.3121140003204346, + "learning_rate": 4.5215013671389514e-05, + "loss": 4.2446, "step": 885 }, { - "epoch": 0.04919845218352681, - "grad_norm": 3.012643337249756, - "learning_rate": 4.8891415577032405e-05, - "loss": 6.4346, + "epoch": 0.1967720539464957, + "grad_norm": 2.412771224975586, + "learning_rate": 4.515287099179717e-05, + "loss": 4.4833, "step": 890 }, { - "epoch": 0.049474847982310666, - "grad_norm": 2.3925185203552246, - "learning_rate": 4.887720295622513e-05, - "loss": 6.7773, + "epoch": 0.19787751492372319, + "grad_norm": 2.4728493690490723, + "learning_rate": 4.509072831220482e-05, + "loss": 4.2572, "step": 895 }, { - "epoch": 0.04975124378109453, - "grad_norm": 2.846290349960327, - "learning_rate": 4.886299033541785e-05, - "loss": 6.3221, + "epoch": 0.1989829759009507, + "grad_norm": 2.3301310539245605, + "learning_rate": 4.502858563261248e-05, + "loss": 4.3495, "step": 900 }, { - "epoch": 0.05002763957987839, - "grad_norm": 2.715515375137329, - "learning_rate": 4.884877771461058e-05, - "loss": 6.4721, + "epoch": 0.2000884368781782, + "grad_norm": 2.5001354217529297, + "learning_rate": 4.496644295302014e-05, + "loss": 4.3298, "step": 905 }, { - "epoch": 0.050304035378662244, - "grad_norm": 2.458165407180786, - "learning_rate": 4.8834565093803294e-05, - "loss": 6.7841, + "epoch": 0.2011938978554057, + "grad_norm": 2.338364601135254, + "learning_rate": 4.490430027342779e-05, + "loss": 4.3985, "step": 910 }, { - "epoch": 0.0505804311774461, - "grad_norm": 3.0126917362213135, - "learning_rate": 4.8820352472996025e-05, - "loss": 6.5939, + "epoch": 0.2022993588326332, + "grad_norm": 2.30706524848938, + "learning_rate": 4.484215759383545e-05, + "loss": 4.3349, "step": 915 }, { - "epoch": 0.05085682697622996, - "grad_norm": 2.5035128593444824, - "learning_rate": 4.880613985218875e-05, - "loss": 6.4, + "epoch": 0.20340481980986072, + "grad_norm": 2.396179437637329, + "learning_rate": 4.478001491424311e-05, + "loss": 4.3986, "step": 920 }, { - "epoch": 0.05113322277501382, - "grad_norm": 3.497965097427368, - "learning_rate": 4.8791927231381466e-05, - "loss": 6.7437, + "epoch": 0.20451028078708822, + "grad_norm": 2.477341890335083, + "learning_rate": 4.471787223465076e-05, + "loss": 4.3669, "step": 925 }, { - "epoch": 0.05140961857379768, - "grad_norm": 2.88411021232605, - "learning_rate": 4.877771461057419e-05, - "loss": 6.4189, + "epoch": 0.2056157417643157, + "grad_norm": 2.5613510608673096, + "learning_rate": 4.465572955505842e-05, + "loss": 4.3262, "step": 930 }, { - "epoch": 0.05168601437258154, - "grad_norm": 2.9255616664886475, - "learning_rate": 4.8763501989766914e-05, - "loss": 6.7637, + "epoch": 0.20672120274154324, + "grad_norm": 2.5783421993255615, + "learning_rate": 4.459358687546607e-05, + "loss": 4.3136, "step": 935 }, { - "epoch": 0.051962410171365395, - "grad_norm": 3.0357139110565186, - "learning_rate": 4.874928936895964e-05, - "loss": 6.4721, + "epoch": 0.20782666371877073, + "grad_norm": 2.4187774658203125, + "learning_rate": 4.4531444195873725e-05, + "loss": 4.3181, "step": 940 }, { - "epoch": 0.05223880597014925, - "grad_norm": 2.340343713760376, - "learning_rate": 4.873507674815236e-05, - "loss": 6.3291, + "epoch": 0.20893212469599823, + "grad_norm": 2.5953481197357178, + "learning_rate": 4.446930151628139e-05, + "loss": 4.5064, "step": 945 }, { - "epoch": 0.05251520176893311, - "grad_norm": 2.570775270462036, - "learning_rate": 4.8720864127345086e-05, - "loss": 6.2555, + "epoch": 0.21003758567322572, + "grad_norm": 2.513113260269165, + "learning_rate": 4.440715883668904e-05, + "loss": 4.3423, "step": 950 }, { - "epoch": 0.05279159756771697, - "grad_norm": 2.6438934803009033, - "learning_rate": 4.870665150653781e-05, - "loss": 6.6452, + "epoch": 0.21114304665045325, + "grad_norm": 2.44311261177063, + "learning_rate": 4.4345016157096694e-05, + "loss": 4.376, "step": 955 }, { - "epoch": 0.05306799336650083, - "grad_norm": 3.2586843967437744, - "learning_rate": 4.869243888573053e-05, - "loss": 6.6964, + "epoch": 0.21224850762768074, + "grad_norm": 2.427305221557617, + "learning_rate": 4.428287347750435e-05, + "loss": 4.3677, "step": 960 }, { - "epoch": 0.05334438916528469, - "grad_norm": 2.744736433029175, - "learning_rate": 4.867822626492326e-05, - "loss": 6.3793, + "epoch": 0.21335396860490824, + "grad_norm": 2.3907687664031982, + "learning_rate": 4.4220730797912006e-05, + "loss": 4.3134, "step": 965 }, { - "epoch": 0.053620784964068545, - "grad_norm": 2.517343044281006, - "learning_rate": 4.866401364411598e-05, - "loss": 6.6729, + "epoch": 0.21445942958213576, + "grad_norm": 2.348848819732666, + "learning_rate": 4.415858811831967e-05, + "loss": 4.5477, "step": 970 }, { - "epoch": 0.0538971807628524, - "grad_norm": 2.725949287414551, - "learning_rate": 4.86498010233087e-05, - "loss": 6.341, + "epoch": 0.21556489055936326, + "grad_norm": 2.830244302749634, + "learning_rate": 4.409644543872732e-05, + "loss": 4.4073, "step": 975 }, { - "epoch": 0.054173576561636266, - "grad_norm": 2.8635339736938477, - "learning_rate": 4.863558840250142e-05, - "loss": 6.4309, + "epoch": 0.21667035153659076, + "grad_norm": 2.5423595905303955, + "learning_rate": 4.4034302759134975e-05, + "loss": 4.4871, "step": 980 }, { - "epoch": 0.054449972360420124, - "grad_norm": 2.70915150642395, - "learning_rate": 4.862137578169415e-05, - "loss": 6.3596, + "epoch": 0.21777581251381825, + "grad_norm": 2.4786319732666016, + "learning_rate": 4.397216007954264e-05, + "loss": 4.4376, "step": 985 }, { - "epoch": 0.05472636815920398, - "grad_norm": 3.162316083908081, - "learning_rate": 4.860716316088687e-05, - "loss": 6.6671, + "epoch": 0.21888127349104577, + "grad_norm": 2.5218095779418945, + "learning_rate": 4.391001739995029e-05, + "loss": 4.5045, "step": 990 }, { - "epoch": 0.05500276395798784, - "grad_norm": 2.084820508956909, - "learning_rate": 4.859295054007959e-05, - "loss": 6.5709, + "epoch": 0.21998673446827327, + "grad_norm": 2.492645740509033, + "learning_rate": 4.384787472035794e-05, + "loss": 4.3694, "step": 995 }, { - "epoch": 0.055279159756771695, - "grad_norm": 2.8458499908447266, - "learning_rate": 4.857873791927232e-05, - "loss": 6.4753, + "epoch": 0.22109219544550077, + "grad_norm": 2.3848962783813477, + "learning_rate": 4.37857320407656e-05, + "loss": 4.2347, "step": 1000 }, { - "epoch": 0.05555555555555555, - "grad_norm": 3.0729644298553467, - "learning_rate": 4.856452529846504e-05, - "loss": 6.5452, + "epoch": 0.2221976564227283, + "grad_norm": 2.4425323009490967, + "learning_rate": 4.3723589361173255e-05, + "loss": 4.4254, "step": 1005 }, { - "epoch": 0.05583195135433942, - "grad_norm": 2.171372890472412, - "learning_rate": 4.855031267765776e-05, - "loss": 6.2128, + "epoch": 0.2233031173999558, + "grad_norm": 2.4466652870178223, + "learning_rate": 4.366144668158091e-05, + "loss": 4.3855, "step": 1010 }, { - "epoch": 0.056108347153123274, - "grad_norm": 2.624799966812134, - "learning_rate": 4.8536100056850484e-05, - "loss": 6.4788, + "epoch": 0.22440857837718328, + "grad_norm": 2.5668978691101074, + "learning_rate": 4.359930400198857e-05, + "loss": 4.1885, "step": 1015 }, { - "epoch": 0.05638474295190713, - "grad_norm": 2.8430840969085693, - "learning_rate": 4.852188743604321e-05, - "loss": 6.3949, + "epoch": 0.22551403935441078, + "grad_norm": 2.1038079261779785, + "learning_rate": 4.3537161322396224e-05, + "loss": 4.3574, "step": 1020 }, { - "epoch": 0.05666113875069099, - "grad_norm": 3.119079351425171, - "learning_rate": 4.850767481523593e-05, - "loss": 6.6569, + "epoch": 0.2266195003316383, + "grad_norm": 2.3468997478485107, + "learning_rate": 4.347501864280388e-05, + "loss": 4.3984, "step": 1025 }, { - "epoch": 0.056937534549474846, - "grad_norm": 2.7463550567626953, - "learning_rate": 4.8493462194428656e-05, - "loss": 6.5224, + "epoch": 0.2277249613088658, + "grad_norm": 2.2503867149353027, + "learning_rate": 4.3412875963211536e-05, + "loss": 4.3913, "step": 1030 }, { - "epoch": 0.05721393034825871, - "grad_norm": 2.7679648399353027, - "learning_rate": 4.847924957362138e-05, - "loss": 6.5559, + "epoch": 0.2288304222860933, + "grad_norm": 2.508117437362671, + "learning_rate": 4.335073328361919e-05, + "loss": 4.4638, "step": 1035 }, { - "epoch": 0.05749032614704257, - "grad_norm": 3.098196268081665, - "learning_rate": 4.8465036952814104e-05, - "loss": 6.4804, + "epoch": 0.22993588326332082, + "grad_norm": 2.503089666366577, + "learning_rate": 4.328859060402685e-05, + "loss": 4.2682, "step": 1040 }, { - "epoch": 0.057766721945826424, - "grad_norm": 2.649479866027832, - "learning_rate": 4.845082433200682e-05, - "loss": 6.553, + "epoch": 0.2310413442405483, + "grad_norm": 2.4912095069885254, + "learning_rate": 4.3226447924434505e-05, + "loss": 4.4836, "step": 1045 }, { - "epoch": 0.05804311774461028, - "grad_norm": 3.0194578170776367, - "learning_rate": 4.8436611711199545e-05, - "loss": 6.2782, + "epoch": 0.2321468052177758, + "grad_norm": 2.383793354034424, + "learning_rate": 4.3164305244842154e-05, + "loss": 4.4063, "step": 1050 }, { - "epoch": 0.05831951354339414, - "grad_norm": 2.331939220428467, - "learning_rate": 4.8422399090392276e-05, - "loss": 6.4653, + "epoch": 0.2332522661950033, + "grad_norm": 2.299375534057617, + "learning_rate": 4.310216256524982e-05, + "loss": 4.3989, "step": 1055 }, { - "epoch": 0.058595909342177996, - "grad_norm": 2.84271502494812, - "learning_rate": 4.840818646958499e-05, - "loss": 6.5789, + "epoch": 0.23435772717223083, + "grad_norm": 2.432926893234253, + "learning_rate": 4.304001988565747e-05, + "loss": 4.3972, "step": 1060 }, { - "epoch": 0.05887230514096186, - "grad_norm": 2.8978166580200195, - "learning_rate": 4.839397384877772e-05, - "loss": 6.495, + "epoch": 0.23546318814945832, + "grad_norm": 2.6002376079559326, + "learning_rate": 4.297787720606512e-05, + "loss": 4.166, "step": 1065 }, { - "epoch": 0.05914870093974572, - "grad_norm": 2.4874818325042725, - "learning_rate": 4.837976122797044e-05, - "loss": 6.5368, + "epoch": 0.23656864912668582, + "grad_norm": 2.76485013961792, + "learning_rate": 4.2915734526472786e-05, + "loss": 4.4923, "step": 1070 }, { - "epoch": 0.059425096738529574, - "grad_norm": 3.212247848510742, - "learning_rate": 4.8365548607163165e-05, - "loss": 6.3398, + "epoch": 0.23767411010391334, + "grad_norm": 2.4608538150787354, + "learning_rate": 4.285359184688044e-05, + "loss": 4.4156, "step": 1075 }, { - "epoch": 0.05970149253731343, - "grad_norm": 2.3944737911224365, - "learning_rate": 4.835133598635589e-05, - "loss": 6.6459, + "epoch": 0.23877957108114084, + "grad_norm": 2.5879130363464355, + "learning_rate": 4.279144916728809e-05, + "loss": 4.2349, "step": 1080 }, { - "epoch": 0.05997788833609729, - "grad_norm": 3.152069091796875, - "learning_rate": 4.8337123365548606e-05, - "loss": 6.2656, + "epoch": 0.23988503205836834, + "grad_norm": 2.4327921867370605, + "learning_rate": 4.2729306487695754e-05, + "loss": 4.2487, "step": 1085 }, { - "epoch": 0.06025428413488115, - "grad_norm": 3.0014097690582275, - "learning_rate": 4.832291074474134e-05, - "loss": 6.5379, + "epoch": 0.24099049303559583, + "grad_norm": 2.4870424270629883, + "learning_rate": 4.2667163808103404e-05, + "loss": 4.499, "step": 1090 }, { - "epoch": 0.06053067993366501, - "grad_norm": 2.4241628646850586, - "learning_rate": 4.8308698123934054e-05, - "loss": 6.518, + "epoch": 0.24209595401282336, + "grad_norm": 2.573253631591797, + "learning_rate": 4.2605021128511067e-05, + "loss": 4.2689, "step": 1095 }, { - "epoch": 0.06080707573244887, - "grad_norm": 2.194061040878296, - "learning_rate": 4.829448550312678e-05, - "loss": 6.2291, + "epoch": 0.24320141499005085, + "grad_norm": 2.4426496028900146, + "learning_rate": 4.254287844891872e-05, + "loss": 4.3502, "step": 1100 }, { - "epoch": 0.061083471531232725, - "grad_norm": 2.6672093868255615, - "learning_rate": 4.82802728823195e-05, - "loss": 6.2079, + "epoch": 0.24430687596727835, + "grad_norm": 2.2450709342956543, + "learning_rate": 4.248073576932637e-05, + "loss": 4.3314, "step": 1105 }, { - "epoch": 0.06135986733001658, - "grad_norm": 2.9733340740203857, - "learning_rate": 4.8266060261512226e-05, - "loss": 6.7986, + "epoch": 0.24541233694450587, + "grad_norm": 2.6109743118286133, + "learning_rate": 4.2418593089734035e-05, + "loss": 4.305, "step": 1110 }, { - "epoch": 0.06163626312880044, - "grad_norm": 2.6432945728302, - "learning_rate": 4.825184764070495e-05, - "loss": 6.3901, + "epoch": 0.24651779792173337, + "grad_norm": 2.626323938369751, + "learning_rate": 4.2356450410141684e-05, + "loss": 4.2716, "step": 1115 }, { - "epoch": 0.0619126589275843, - "grad_norm": 2.304767370223999, - "learning_rate": 4.823763501989767e-05, - "loss": 6.3117, + "epoch": 0.24762325889896086, + "grad_norm": 2.320756673812866, + "learning_rate": 4.229430773054934e-05, + "loss": 4.4438, "step": 1120 }, { - "epoch": 0.06218905472636816, - "grad_norm": 2.7202680110931396, - "learning_rate": 4.82234223990904e-05, - "loss": 6.5007, + "epoch": 0.24872871987618836, + "grad_norm": 2.481062650680542, + "learning_rate": 4.2232165050957004e-05, + "loss": 4.4925, "step": 1125 }, { - "epoch": 0.06246545052515202, - "grad_norm": 2.201432943344116, - "learning_rate": 4.8209209778283115e-05, - "loss": 6.3724, + "epoch": 0.24983418085341588, + "grad_norm": 2.521596908569336, + "learning_rate": 4.217002237136465e-05, + "loss": 4.4221, "step": 1130 }, { - "epoch": 0.06274184632393588, - "grad_norm": 2.9685938358306885, - "learning_rate": 4.819499715747584e-05, - "loss": 6.4615, + "epoch": 0.2509396418306434, + "grad_norm": 2.361933469772339, + "learning_rate": 4.210787969177231e-05, + "loss": 4.3693, "step": 1135 }, { - "epoch": 0.06301824212271974, - "grad_norm": 2.2716915607452393, - "learning_rate": 4.818078453666856e-05, - "loss": 6.4481, + "epoch": 0.2520451028078709, + "grad_norm": 2.357417106628418, + "learning_rate": 4.204573701217997e-05, + "loss": 4.4775, "step": 1140 }, { - "epoch": 0.0632946379215036, - "grad_norm": 2.6046628952026367, - "learning_rate": 4.816657191586129e-05, - "loss": 6.4018, + "epoch": 0.25315056378509837, + "grad_norm": 2.688908576965332, + "learning_rate": 4.198359433258762e-05, + "loss": 4.29, "step": 1145 }, { - "epoch": 0.06357103372028745, - "grad_norm": 3.6056673526763916, - "learning_rate": 4.815235929505401e-05, - "loss": 6.4416, + "epoch": 0.2542560247623259, + "grad_norm": 2.2829039096832275, + "learning_rate": 4.192145165299528e-05, + "loss": 4.401, "step": 1150 }, { - "epoch": 0.06384742951907131, - "grad_norm": 3.628232002258301, - "learning_rate": 4.813814667424673e-05, - "loss": 6.5809, + "epoch": 0.2553614857395534, + "grad_norm": 2.6343767642974854, + "learning_rate": 4.1859308973402934e-05, + "loss": 4.4336, "step": 1155 }, { - "epoch": 0.06412382531785517, - "grad_norm": 2.789034128189087, - "learning_rate": 4.812393405343946e-05, - "loss": 6.3966, + "epoch": 0.2564669467167809, + "grad_norm": 2.3044660091400146, + "learning_rate": 4.179716629381059e-05, + "loss": 4.3832, "step": 1160 }, { - "epoch": 0.06440022111663903, - "grad_norm": 2.2794246673583984, - "learning_rate": 4.810972143263218e-05, - "loss": 6.3319, + "epoch": 0.2575724076940084, + "grad_norm": 2.5719525814056396, + "learning_rate": 4.1735023614218246e-05, + "loss": 4.2833, "step": 1165 }, { - "epoch": 0.06467661691542288, - "grad_norm": 2.4310152530670166, - "learning_rate": 4.80955088118249e-05, - "loss": 6.5324, + "epoch": 0.2586778686712359, + "grad_norm": 2.6642727851867676, + "learning_rate": 4.16728809346259e-05, + "loss": 4.3051, "step": 1170 }, { - "epoch": 0.06495301271420674, - "grad_norm": 2.6283979415893555, - "learning_rate": 4.8081296191017624e-05, - "loss": 6.5129, + "epoch": 0.2597833296484634, + "grad_norm": 2.5633628368377686, + "learning_rate": 4.161073825503356e-05, + "loss": 4.3245, "step": 1175 }, { - "epoch": 0.0652294085129906, - "grad_norm": 2.8399784564971924, - "learning_rate": 4.806708357021035e-05, - "loss": 6.5845, + "epoch": 0.2608887906256909, + "grad_norm": 2.3659725189208984, + "learning_rate": 4.1548595575441215e-05, + "loss": 4.5625, "step": 1180 }, { - "epoch": 0.06550580431177445, - "grad_norm": 2.680716037750244, - "learning_rate": 4.805287094940307e-05, - "loss": 6.6554, + "epoch": 0.2619942516029184, + "grad_norm": 2.5750010013580322, + "learning_rate": 4.148645289584887e-05, + "loss": 4.2276, "step": 1185 }, { - "epoch": 0.06578220011055833, - "grad_norm": 3.5662529468536377, - "learning_rate": 4.8038658328595796e-05, - "loss": 6.3653, + "epoch": 0.2630997125801459, + "grad_norm": 2.650841474533081, + "learning_rate": 4.142431021625653e-05, + "loss": 4.4841, "step": 1190 }, { - "epoch": 0.06605859590934218, - "grad_norm": 2.429107666015625, - "learning_rate": 4.802444570778852e-05, - "loss": 6.2625, + "epoch": 0.26420517355737344, + "grad_norm": 2.257554292678833, + "learning_rate": 4.136216753666418e-05, + "loss": 4.5292, "step": 1195 }, { - "epoch": 0.06633499170812604, - "grad_norm": 2.9469411373138428, - "learning_rate": 4.8010233086981244e-05, - "loss": 6.3618, + "epoch": 0.2653106345346009, + "grad_norm": 2.3063228130340576, + "learning_rate": 4.130002485707184e-05, + "loss": 4.3253, "step": 1200 }, { - "epoch": 0.0666113875069099, - "grad_norm": 3.321293592453003, - "learning_rate": 4.799602046617396e-05, - "loss": 6.262, + "epoch": 0.26641609551182843, + "grad_norm": 2.4297571182250977, + "learning_rate": 4.123788217747949e-05, + "loss": 4.3772, "step": 1205 }, { - "epoch": 0.06688778330569375, - "grad_norm": 2.542504072189331, - "learning_rate": 4.7981807845366685e-05, - "loss": 6.2755, + "epoch": 0.26752155648905596, + "grad_norm": 2.431993007659912, + "learning_rate": 4.117573949788715e-05, + "loss": 4.3032, "step": 1210 }, { - "epoch": 0.06716417910447761, - "grad_norm": 2.622391700744629, - "learning_rate": 4.796759522455941e-05, - "loss": 6.2732, + "epoch": 0.2686270174662834, + "grad_norm": 2.3991315364837646, + "learning_rate": 4.111359681829481e-05, + "loss": 4.3427, "step": 1215 }, { - "epoch": 0.06744057490326147, - "grad_norm": 2.2811062335968018, - "learning_rate": 4.7953382603752133e-05, - "loss": 6.4282, + "epoch": 0.26973247844351095, + "grad_norm": 2.3820011615753174, + "learning_rate": 4.1051454138702464e-05, + "loss": 4.3706, "step": 1220 }, { - "epoch": 0.06771697070204533, - "grad_norm": 2.973114013671875, - "learning_rate": 4.793916998294486e-05, - "loss": 6.2921, + "epoch": 0.27083793942073847, + "grad_norm": 2.670473337173462, + "learning_rate": 4.098931145911012e-05, + "loss": 4.3521, "step": 1225 }, { - "epoch": 0.06799336650082918, - "grad_norm": 2.5322935581207275, - "learning_rate": 4.792495736213758e-05, - "loss": 6.2762, + "epoch": 0.27194340039796594, + "grad_norm": 2.8199636936187744, + "learning_rate": 4.0927168779517776e-05, + "loss": 4.3276, "step": 1230 }, { - "epoch": 0.06826976229961304, - "grad_norm": 3.3714489936828613, - "learning_rate": 4.7910744741330305e-05, - "loss": 6.4914, + "epoch": 0.27304886137519346, + "grad_norm": 2.347820520401001, + "learning_rate": 4.086502609992543e-05, + "loss": 4.3414, "step": 1235 }, { - "epoch": 0.0685461580983969, - "grad_norm": 2.4342074394226074, - "learning_rate": 4.789653212052302e-05, - "loss": 6.1297, + "epoch": 0.274154322352421, + "grad_norm": 2.271981716156006, + "learning_rate": 4.080288342033309e-05, + "loss": 4.3148, "step": 1240 }, { - "epoch": 0.06882255389718077, - "grad_norm": 3.002720594406128, - "learning_rate": 4.7882319499715747e-05, - "loss": 6.4413, + "epoch": 0.27525978332964846, + "grad_norm": 2.515171527862549, + "learning_rate": 4.074074074074074e-05, + "loss": 4.3787, "step": 1245 }, { - "epoch": 0.06909894969596463, - "grad_norm": 2.581346273422241, - "learning_rate": 4.786810687890848e-05, - "loss": 6.5842, + "epoch": 0.276365244306876, + "grad_norm": 2.4658026695251465, + "learning_rate": 4.06785980611484e-05, + "loss": 4.4014, "step": 1250 }, { - "epoch": 0.06937534549474848, - "grad_norm": 2.6828501224517822, - "learning_rate": 4.7853894258101195e-05, - "loss": 6.3015, + "epoch": 0.27747070528410345, + "grad_norm": 2.4536259174346924, + "learning_rate": 4.061645538155606e-05, + "loss": 4.2641, "step": 1255 }, { - "epoch": 0.06965174129353234, - "grad_norm": 3.318371295928955, - "learning_rate": 4.783968163729392e-05, - "loss": 6.561, + "epoch": 0.27857616626133097, + "grad_norm": 2.491704225540161, + "learning_rate": 4.055431270196371e-05, + "loss": 4.3729, "step": 1260 }, { - "epoch": 0.0699281370923162, - "grad_norm": 3.4630489349365234, - "learning_rate": 4.782546901648664e-05, - "loss": 6.7319, + "epoch": 0.2796816272385585, + "grad_norm": 2.5859057903289795, + "learning_rate": 4.049217002237137e-05, + "loss": 4.3815, "step": 1265 }, { - "epoch": 0.07020453289110005, - "grad_norm": 3.342996597290039, - "learning_rate": 4.7811256395679366e-05, - "loss": 6.2168, + "epoch": 0.28078708821578596, + "grad_norm": 2.5725574493408203, + "learning_rate": 4.043002734277902e-05, + "loss": 4.3624, "step": 1270 }, { - "epoch": 0.07048092868988391, - "grad_norm": 2.5235445499420166, - "learning_rate": 4.779704377487209e-05, - "loss": 6.5335, + "epoch": 0.2818925491930135, + "grad_norm": 2.484657049179077, + "learning_rate": 4.0367884663186675e-05, + "loss": 4.3583, "step": 1275 }, { - "epoch": 0.07075732448866777, - "grad_norm": 3.2131083011627197, - "learning_rate": 4.7782831154064814e-05, - "loss": 6.4834, + "epoch": 0.282998010170241, + "grad_norm": 2.544689178466797, + "learning_rate": 4.030574198359434e-05, + "loss": 4.2289, "step": 1280 }, { - "epoch": 0.07103372028745163, - "grad_norm": 2.924309015274048, - "learning_rate": 4.776861853325754e-05, - "loss": 6.3069, + "epoch": 0.2841034711474685, + "grad_norm": 2.5880086421966553, + "learning_rate": 4.024359930400199e-05, + "loss": 4.3604, "step": 1285 }, { - "epoch": 0.07131011608623548, - "grad_norm": 2.966257333755493, - "learning_rate": 4.7754405912450256e-05, - "loss": 6.1896, + "epoch": 0.285208932124696, + "grad_norm": 2.614906072616577, + "learning_rate": 4.0181456624409644e-05, + "loss": 4.2697, "step": 1290 }, { - "epoch": 0.07158651188501934, - "grad_norm": 2.937412977218628, - "learning_rate": 4.774019329164298e-05, - "loss": 6.4074, + "epoch": 0.2863143931019235, + "grad_norm": 2.6999433040618896, + "learning_rate": 4.011931394481731e-05, + "loss": 4.4131, "step": 1295 }, { - "epoch": 0.07186290768380321, - "grad_norm": 2.4649055004119873, - "learning_rate": 4.772598067083571e-05, - "loss": 6.4064, + "epoch": 0.287419854079151, + "grad_norm": 2.3542439937591553, + "learning_rate": 4.0057171265224956e-05, + "loss": 4.3436, "step": 1300 }, { - "epoch": 0.07213930348258707, - "grad_norm": 2.4708802700042725, - "learning_rate": 4.771176805002843e-05, - "loss": 6.2426, + "epoch": 0.2885253150563785, + "grad_norm": 2.4977333545684814, + "learning_rate": 3.999502858563262e-05, + "loss": 4.2333, "step": 1305 }, { - "epoch": 0.07241569928137093, - "grad_norm": 2.374051094055176, - "learning_rate": 4.769755542922115e-05, - "loss": 6.1917, + "epoch": 0.28963077603360604, + "grad_norm": 2.3839094638824463, + "learning_rate": 3.993288590604027e-05, + "loss": 4.2906, "step": 1310 }, { - "epoch": 0.07269209508015478, - "grad_norm": 5.211256504058838, - "learning_rate": 4.7683342808413875e-05, - "loss": 6.0755, + "epoch": 0.2907362370108335, + "grad_norm": 2.583096504211426, + "learning_rate": 3.9870743226447925e-05, + "loss": 4.2372, "step": 1315 }, { - "epoch": 0.07296849087893864, - "grad_norm": 2.7231972217559814, - "learning_rate": 4.76691301876066e-05, - "loss": 6.2229, + "epoch": 0.29184169798806103, + "grad_norm": 2.8082754611968994, + "learning_rate": 3.980860054685559e-05, + "loss": 4.3763, "step": 1320 }, { - "epoch": 0.0732448866777225, - "grad_norm": 2.7478833198547363, - "learning_rate": 4.765491756679932e-05, - "loss": 6.2957, + "epoch": 0.2929471589652885, + "grad_norm": 2.699869394302368, + "learning_rate": 3.974645786726324e-05, + "loss": 4.3501, "step": 1325 }, { - "epoch": 0.07352128247650636, - "grad_norm": 2.573377847671509, - "learning_rate": 4.764070494599204e-05, - "loss": 6.1357, + "epoch": 0.294052619942516, + "grad_norm": 2.489060878753662, + "learning_rate": 3.968431518767089e-05, + "loss": 4.3261, "step": 1330 }, { - "epoch": 0.07379767827529021, - "grad_norm": 2.7243764400482178, - "learning_rate": 4.762649232518477e-05, - "loss": 6.4513, + "epoch": 0.29515808091974355, + "grad_norm": 2.6914567947387695, + "learning_rate": 3.962217250807855e-05, + "loss": 4.3582, "step": 1335 }, { - "epoch": 0.07407407407407407, - "grad_norm": 2.9036333560943604, - "learning_rate": 4.761227970437749e-05, - "loss": 6.296, + "epoch": 0.296263541896971, + "grad_norm": 2.6697006225585938, + "learning_rate": 3.9560029828486205e-05, + "loss": 4.3114, "step": 1340 }, { - "epoch": 0.07435046987285793, - "grad_norm": 3.135991096496582, - "learning_rate": 4.759806708357021e-05, - "loss": 6.4276, + "epoch": 0.29736900287419854, + "grad_norm": 2.5954415798187256, + "learning_rate": 3.949788714889386e-05, + "loss": 4.2934, "step": 1345 }, { - "epoch": 0.07462686567164178, - "grad_norm": 2.428114175796509, - "learning_rate": 4.7583854462762937e-05, - "loss": 6.2983, + "epoch": 0.29847446385142606, + "grad_norm": 2.985745906829834, + "learning_rate": 3.943574446930152e-05, + "loss": 4.3548, "step": 1350 }, { - "epoch": 0.07490326147042566, - "grad_norm": 2.220736026763916, - "learning_rate": 4.756964184195566e-05, - "loss": 6.2962, + "epoch": 0.29957992482865353, + "grad_norm": 2.397188186645508, + "learning_rate": 3.9373601789709174e-05, + "loss": 4.378, "step": 1355 }, { - "epoch": 0.07517965726920951, - "grad_norm": 2.5993216037750244, - "learning_rate": 4.7555429221148385e-05, - "loss": 6.3815, + "epoch": 0.30068538580588106, + "grad_norm": 2.328190565109253, + "learning_rate": 3.931145911011683e-05, + "loss": 4.3864, "step": 1360 }, { - "epoch": 0.07545605306799337, - "grad_norm": 2.7360010147094727, - "learning_rate": 4.75412166003411e-05, - "loss": 6.3496, + "epoch": 0.3017908467831086, + "grad_norm": 2.659130096435547, + "learning_rate": 3.9249316430524486e-05, + "loss": 4.2503, "step": 1365 }, { - "epoch": 0.07573244886677723, - "grad_norm": 2.4501261711120605, - "learning_rate": 4.752700397953383e-05, - "loss": 6.3474, + "epoch": 0.30289630776033605, + "grad_norm": 2.5458106994628906, + "learning_rate": 3.918717375093214e-05, + "loss": 4.4694, "step": 1370 }, { - "epoch": 0.07600884466556108, - "grad_norm": 2.4407808780670166, - "learning_rate": 4.751279135872655e-05, - "loss": 6.079, + "epoch": 0.30400176873756357, + "grad_norm": 2.6253693103790283, + "learning_rate": 3.91250310713398e-05, + "loss": 4.3011, "step": 1375 }, { - "epoch": 0.07628524046434494, - "grad_norm": 2.870891571044922, - "learning_rate": 4.7498578737919274e-05, - "loss": 6.009, + "epoch": 0.3051072297147911, + "grad_norm": 2.5949649810791016, + "learning_rate": 3.9062888391747455e-05, + "loss": 4.3781, "step": 1380 }, { - "epoch": 0.0765616362631288, - "grad_norm": 2.539259910583496, - "learning_rate": 4.7484366117112e-05, - "loss": 6.4406, + "epoch": 0.30621269069201856, + "grad_norm": 2.6035447120666504, + "learning_rate": 3.900074571215511e-05, + "loss": 4.215, "step": 1385 }, { - "epoch": 0.07683803206191266, - "grad_norm": 3.2458794116973877, - "learning_rate": 4.747015349630472e-05, - "loss": 6.1957, + "epoch": 0.3073181516692461, + "grad_norm": 2.7866146564483643, + "learning_rate": 3.893860303256277e-05, + "loss": 4.3382, "step": 1390 }, { - "epoch": 0.07711442786069651, - "grad_norm": 2.6678242683410645, - "learning_rate": 4.7455940875497446e-05, - "loss": 6.3314, + "epoch": 0.30842361264647356, + "grad_norm": 2.5743088722229004, + "learning_rate": 3.887646035297042e-05, + "loss": 4.3505, "step": 1395 }, { - "epoch": 0.07739082365948037, - "grad_norm": 2.6106460094451904, - "learning_rate": 4.744172825469016e-05, - "loss": 6.1362, + "epoch": 0.3095290736237011, + "grad_norm": 2.6363112926483154, + "learning_rate": 3.881431767337807e-05, + "loss": 4.37, "step": 1400 }, { - "epoch": 0.07766721945826423, - "grad_norm": 2.8481876850128174, - "learning_rate": 4.7427515633882894e-05, - "loss": 6.3231, + "epoch": 0.3106345346009286, + "grad_norm": 2.409414291381836, + "learning_rate": 3.8752174993785736e-05, + "loss": 4.3642, "step": 1405 }, { - "epoch": 0.0779436152570481, - "grad_norm": 2.367493152618408, - "learning_rate": 4.741330301307562e-05, - "loss": 6.0713, + "epoch": 0.31173999557815607, + "grad_norm": 2.6767184734344482, + "learning_rate": 3.869003231419339e-05, + "loss": 4.4374, "step": 1410 }, { - "epoch": 0.07822001105583196, - "grad_norm": 3.546706199645996, - "learning_rate": 4.7399090392268335e-05, - "loss": 6.1802, + "epoch": 0.3128454565553836, + "grad_norm": 2.6071739196777344, + "learning_rate": 3.862788963460104e-05, + "loss": 4.4875, "step": 1415 }, { - "epoch": 0.07849640685461581, - "grad_norm": 2.659611701965332, - "learning_rate": 4.738487777146106e-05, - "loss": 6.2043, + "epoch": 0.3139509175326111, + "grad_norm": 2.8153324127197266, + "learning_rate": 3.8565746955008704e-05, + "loss": 4.2156, "step": 1420 }, { - "epoch": 0.07877280265339967, - "grad_norm": 2.9869518280029297, - "learning_rate": 4.737066515065378e-05, - "loss": 6.354, + "epoch": 0.3150563785098386, + "grad_norm": 2.5854175090789795, + "learning_rate": 3.8503604275416354e-05, + "loss": 4.4762, "step": 1425 }, { - "epoch": 0.07904919845218353, - "grad_norm": 2.5255351066589355, - "learning_rate": 4.735645252984651e-05, - "loss": 6.0689, + "epoch": 0.3161618394870661, + "grad_norm": 2.6283559799194336, + "learning_rate": 3.8441461595824017e-05, + "loss": 4.3707, "step": 1430 }, { - "epoch": 0.07932559425096739, - "grad_norm": 3.4141147136688232, - "learning_rate": 4.7342239909039224e-05, - "loss": 6.2125, + "epoch": 0.31726730046429363, + "grad_norm": 2.392477512359619, + "learning_rate": 3.837931891623167e-05, + "loss": 4.4578, "step": 1435 }, { - "epoch": 0.07960199004975124, - "grad_norm": 2.526256799697876, - "learning_rate": 4.7328027288231955e-05, - "loss": 6.055, + "epoch": 0.3183727614415211, + "grad_norm": 2.5749545097351074, + "learning_rate": 3.831717623663932e-05, + "loss": 4.3093, "step": 1440 }, { - "epoch": 0.0798783858485351, - "grad_norm": 2.8614776134490967, - "learning_rate": 4.731381466742468e-05, - "loss": 6.3068, + "epoch": 0.3194782224187486, + "grad_norm": 2.57065486907959, + "learning_rate": 3.8255033557046985e-05, + "loss": 4.4154, "step": 1445 }, { - "epoch": 0.08015478164731896, - "grad_norm": 2.7898523807525635, - "learning_rate": 4.7299602046617396e-05, - "loss": 6.2226, + "epoch": 0.32058368339597615, + "grad_norm": 2.652879476547241, + "learning_rate": 3.819289087745464e-05, + "loss": 4.5573, "step": 1450 }, { - "epoch": 0.08043117744610281, - "grad_norm": 2.2415056228637695, - "learning_rate": 4.728538942581012e-05, - "loss": 6.1964, + "epoch": 0.3216891443732036, + "grad_norm": 2.846167802810669, + "learning_rate": 3.813074819786229e-05, + "loss": 4.4113, "step": 1455 }, { - "epoch": 0.08070757324488667, - "grad_norm": 3.079174280166626, - "learning_rate": 4.7271176805002844e-05, - "loss": 6.2307, + "epoch": 0.32279460535043114, + "grad_norm": 2.641319513320923, + "learning_rate": 3.8068605518269954e-05, + "loss": 4.3614, "step": 1460 }, { - "epoch": 0.08098396904367054, - "grad_norm": 2.619187355041504, - "learning_rate": 4.725696418419557e-05, - "loss": 6.1481, + "epoch": 0.3239000663276586, + "grad_norm": 2.5918776988983154, + "learning_rate": 3.80064628386776e-05, + "loss": 4.3636, "step": 1465 }, { - "epoch": 0.0812603648424544, - "grad_norm": 3.067775249481201, - "learning_rate": 4.724275156338829e-05, - "loss": 6.3701, + "epoch": 0.32500552730488613, + "grad_norm": 2.6786410808563232, + "learning_rate": 3.794432015908526e-05, + "loss": 4.3731, "step": 1470 }, { - "epoch": 0.08153676064123826, - "grad_norm": 2.4046452045440674, - "learning_rate": 4.7228538942581016e-05, - "loss": 6.1519, + "epoch": 0.32611098828211366, + "grad_norm": 2.548100233078003, + "learning_rate": 3.788217747949292e-05, + "loss": 4.2728, "step": 1475 }, { - "epoch": 0.08181315644002211, - "grad_norm": 2.766226291656494, - "learning_rate": 4.721432632177374e-05, - "loss": 6.1802, + "epoch": 0.3272164492593411, + "grad_norm": 2.409332752227783, + "learning_rate": 3.782003479990057e-05, + "loss": 4.3442, "step": 1480 }, { - "epoch": 0.08208955223880597, - "grad_norm": 2.1929092407226562, - "learning_rate": 4.720011370096646e-05, - "loss": 6.4416, + "epoch": 0.32832191023656865, + "grad_norm": 2.8180229663848877, + "learning_rate": 3.775789212030823e-05, + "loss": 4.3566, "step": 1485 }, { - "epoch": 0.08236594803758983, - "grad_norm": 2.9942455291748047, - "learning_rate": 4.718590108015918e-05, - "loss": 6.1566, + "epoch": 0.32942737121379617, + "grad_norm": 2.634147882461548, + "learning_rate": 3.7695749440715884e-05, + "loss": 4.4708, "step": 1490 }, { - "epoch": 0.08264234383637369, - "grad_norm": 3.3760600090026855, - "learning_rate": 4.717168845935191e-05, - "loss": 6.2593, + "epoch": 0.33053283219102364, + "grad_norm": 2.3490123748779297, + "learning_rate": 3.763360676112354e-05, + "loss": 4.2733, "step": 1495 }, { - "epoch": 0.08291873963515754, - "grad_norm": 3.459946632385254, - "learning_rate": 4.715747583854463e-05, - "loss": 6.0409, + "epoch": 0.33163829316825116, + "grad_norm": 2.638009548187256, + "learning_rate": 3.7571464081531196e-05, + "loss": 4.4472, "step": 1500 }, { - "epoch": 0.0831951354339414, - "grad_norm": 2.9691433906555176, - "learning_rate": 4.714326321773735e-05, - "loss": 6.2912, + "epoch": 0.3327437541454787, + "grad_norm": 2.601348638534546, + "learning_rate": 3.750932140193885e-05, + "loss": 4.5207, "step": 1505 }, { - "epoch": 0.08347153123272526, - "grad_norm": 2.3945460319519043, - "learning_rate": 4.712905059693008e-05, - "loss": 6.3243, + "epoch": 0.33384921512270616, + "grad_norm": 2.6195290088653564, + "learning_rate": 3.744717872234651e-05, + "loss": 4.3151, "step": 1510 }, { - "epoch": 0.08374792703150911, - "grad_norm": 3.2403388023376465, - "learning_rate": 4.71148379761228e-05, - "loss": 5.8076, + "epoch": 0.3349546760999337, + "grad_norm": 2.5007519721984863, + "learning_rate": 3.7385036042754165e-05, + "loss": 4.3751, "step": 1515 }, { - "epoch": 0.08402432283029299, - "grad_norm": 2.4649736881256104, - "learning_rate": 4.710062535531552e-05, - "loss": 6.1595, + "epoch": 0.3360601370771612, + "grad_norm": 2.4757566452026367, + "learning_rate": 3.732289336316182e-05, + "loss": 4.2864, "step": 1520 }, { - "epoch": 0.08430071862907684, - "grad_norm": 2.9593818187713623, - "learning_rate": 4.708641273450824e-05, - "loss": 6.1091, + "epoch": 0.33716559805438867, + "grad_norm": 2.612262487411499, + "learning_rate": 3.726075068356948e-05, + "loss": 4.4617, "step": 1525 }, { - "epoch": 0.0845771144278607, - "grad_norm": 2.794445753097534, - "learning_rate": 4.707220011370097e-05, - "loss": 6.1391, + "epoch": 0.3382710590316162, + "grad_norm": 2.3229122161865234, + "learning_rate": 3.719860800397713e-05, + "loss": 4.2659, "step": 1530 }, { - "epoch": 0.08485351022664456, - "grad_norm": 2.7484805583953857, - "learning_rate": 4.705798749289369e-05, - "loss": 6.0529, + "epoch": 0.33937652000884366, + "grad_norm": 3.0333845615386963, + "learning_rate": 3.713646532438479e-05, + "loss": 4.2091, "step": 1535 }, { - "epoch": 0.08512990602542841, - "grad_norm": 2.891587972640991, - "learning_rate": 4.7043774872086414e-05, - "loss": 6.1436, + "epoch": 0.3404819809860712, + "grad_norm": 2.364445686340332, + "learning_rate": 3.7074322644792446e-05, + "loss": 4.1667, "step": 1540 }, { - "epoch": 0.08540630182421227, - "grad_norm": 3.721700429916382, - "learning_rate": 4.702956225127914e-05, - "loss": 6.4103, + "epoch": 0.3415874419632987, + "grad_norm": 2.6092944145202637, + "learning_rate": 3.70121799652001e-05, + "loss": 4.4148, "step": 1545 }, { - "epoch": 0.08568269762299613, - "grad_norm": 2.4380359649658203, - "learning_rate": 4.701534963047186e-05, - "loss": 6.2798, + "epoch": 0.3426929029405262, + "grad_norm": 2.69758677482605, + "learning_rate": 3.695003728560776e-05, + "loss": 4.3029, "step": 1550 }, { - "epoch": 0.08595909342177999, - "grad_norm": 3.160372257232666, - "learning_rate": 4.7001137009664586e-05, - "loss": 6.4028, + "epoch": 0.3437983639177537, + "grad_norm": 2.665482997894287, + "learning_rate": 3.6887894606015414e-05, + "loss": 4.3617, "step": 1555 }, { - "epoch": 0.08623548922056384, - "grad_norm": 2.3256590366363525, - "learning_rate": 4.69869243888573e-05, - "loss": 6.2657, + "epoch": 0.3449038248949812, + "grad_norm": 2.6900408267974854, + "learning_rate": 3.682575192642307e-05, + "loss": 4.456, "step": 1560 }, { - "epoch": 0.0865118850193477, - "grad_norm": 2.191479444503784, - "learning_rate": 4.6972711768050034e-05, - "loss": 6.1302, + "epoch": 0.3460092858722087, + "grad_norm": 2.335728406906128, + "learning_rate": 3.6763609246830726e-05, + "loss": 4.3155, "step": 1565 }, { - "epoch": 0.08678828081813156, - "grad_norm": 2.7161953449249268, - "learning_rate": 4.695849914724275e-05, - "loss": 6.2056, + "epoch": 0.3471147468494362, + "grad_norm": 2.85036039352417, + "learning_rate": 3.670146656723838e-05, + "loss": 4.3152, "step": 1570 }, { - "epoch": 0.08706467661691543, - "grad_norm": 2.7655863761901855, - "learning_rate": 4.6944286526435475e-05, - "loss": 6.1198, + "epoch": 0.34822020782666374, + "grad_norm": 2.652212381362915, + "learning_rate": 3.663932388764604e-05, + "loss": 4.4341, "step": 1575 }, { - "epoch": 0.08734107241569929, - "grad_norm": 2.158665180206299, - "learning_rate": 4.6930073905628206e-05, - "loss": 6.2007, + "epoch": 0.3493256688038912, + "grad_norm": 2.3771016597747803, + "learning_rate": 3.6577181208053695e-05, + "loss": 4.3358, "step": 1580 }, { - "epoch": 0.08761746821448314, - "grad_norm": 2.5755743980407715, - "learning_rate": 4.691586128482092e-05, - "loss": 6.2911, + "epoch": 0.35043112978111873, + "grad_norm": 2.7119994163513184, + "learning_rate": 3.651503852846135e-05, + "loss": 4.2583, "step": 1585 }, { - "epoch": 0.087893864013267, - "grad_norm": 2.5432612895965576, - "learning_rate": 4.690164866401365e-05, - "loss": 6.1612, + "epoch": 0.35153659075834626, + "grad_norm": 2.4877076148986816, + "learning_rate": 3.645289584886901e-05, + "loss": 4.4398, "step": 1590 }, { - "epoch": 0.08817025981205086, - "grad_norm": 2.5009267330169678, - "learning_rate": 4.688743604320637e-05, - "loss": 6.3085, + "epoch": 0.3526420517355737, + "grad_norm": 2.5400094985961914, + "learning_rate": 3.639075316927666e-05, + "loss": 4.4864, "step": 1595 }, { - "epoch": 0.08844665561083472, - "grad_norm": 2.381110429763794, - "learning_rate": 4.6873223422399095e-05, - "loss": 6.211, + "epoch": 0.35374751271280125, + "grad_norm": 2.929621458053589, + "learning_rate": 3.632861048968432e-05, + "loss": 4.2378, "step": 1600 }, { - "epoch": 0.08872305140961857, - "grad_norm": 2.5844264030456543, - "learning_rate": 4.685901080159182e-05, - "loss": 6.0844, + "epoch": 0.3548529736900287, + "grad_norm": 2.555133581161499, + "learning_rate": 3.6266467810091976e-05, + "loss": 4.3108, "step": 1605 }, { - "epoch": 0.08899944720840243, - "grad_norm": 2.737116575241089, - "learning_rate": 4.6844798180784536e-05, - "loss": 6.2711, + "epoch": 0.35595843466725624, + "grad_norm": 2.410792350769043, + "learning_rate": 3.6204325130499625e-05, + "loss": 4.3592, "step": 1610 }, { - "epoch": 0.08927584300718629, - "grad_norm": 2.8457653522491455, - "learning_rate": 4.683058555997727e-05, - "loss": 6.3523, + "epoch": 0.35706389564448376, + "grad_norm": 2.459975004196167, + "learning_rate": 3.614218245090729e-05, + "loss": 4.5196, "step": 1615 }, { - "epoch": 0.08955223880597014, - "grad_norm": 3.002805471420288, - "learning_rate": 4.6816372939169984e-05, - "loss": 6.3546, + "epoch": 0.35816935662171123, + "grad_norm": 2.834867000579834, + "learning_rate": 3.608003977131494e-05, + "loss": 4.3758, "step": 1620 }, { - "epoch": 0.089828634604754, - "grad_norm": 3.3119866847991943, - "learning_rate": 4.680216031836271e-05, - "loss": 6.3509, + "epoch": 0.35927481759893876, + "grad_norm": 2.6577582359313965, + "learning_rate": 3.6017897091722594e-05, + "loss": 4.3663, "step": 1625 }, { - "epoch": 0.09010503040353787, - "grad_norm": 2.4615001678466797, - "learning_rate": 4.678794769755543e-05, - "loss": 6.1708, + "epoch": 0.3603802785761663, + "grad_norm": 2.725658416748047, + "learning_rate": 3.595575441213026e-05, + "loss": 4.3878, "step": 1630 }, { - "epoch": 0.09038142620232173, - "grad_norm": 2.660804271697998, - "learning_rate": 4.6773735076748156e-05, - "loss": 6.1863, + "epoch": 0.36148573955339375, + "grad_norm": 2.368903160095215, + "learning_rate": 3.5893611732537906e-05, + "loss": 4.3393, "step": 1635 }, { - "epoch": 0.09065782200110559, - "grad_norm": 2.9395289421081543, - "learning_rate": 4.675952245594088e-05, - "loss": 6.5699, + "epoch": 0.36259120053062127, + "grad_norm": 2.2058262825012207, + "learning_rate": 3.583146905294556e-05, + "loss": 4.3152, "step": 1640 }, { - "epoch": 0.09093421779988944, - "grad_norm": 3.1004257202148438, - "learning_rate": 4.67453098351336e-05, - "loss": 6.1951, + "epoch": 0.3636966615078488, + "grad_norm": 2.60345458984375, + "learning_rate": 3.576932637335322e-05, + "loss": 4.4803, "step": 1645 }, { - "epoch": 0.0912106135986733, - "grad_norm": 3.4309775829315186, - "learning_rate": 4.673109721432633e-05, - "loss": 5.9744, + "epoch": 0.36480212248507626, + "grad_norm": 2.657458543777466, + "learning_rate": 3.5707183693760875e-05, + "loss": 4.3058, "step": 1650 }, { - "epoch": 0.09148700939745716, - "grad_norm": 2.9011757373809814, - "learning_rate": 4.6716884593519045e-05, - "loss": 6.1815, + "epoch": 0.3659075834623038, + "grad_norm": 2.596036195755005, + "learning_rate": 3.564504101416854e-05, + "loss": 4.2178, "step": 1655 }, { - "epoch": 0.09176340519624102, - "grad_norm": 3.3524973392486572, - "learning_rate": 4.670267197271177e-05, - "loss": 6.1202, + "epoch": 0.3670130444395313, + "grad_norm": 2.7093770503997803, + "learning_rate": 3.558289833457619e-05, + "loss": 4.3902, "step": 1660 }, { - "epoch": 0.09203980099502487, - "grad_norm": 2.6807689666748047, - "learning_rate": 4.668845935190449e-05, - "loss": 5.9419, + "epoch": 0.3681185054167588, + "grad_norm": 2.2766308784484863, + "learning_rate": 3.552075565498384e-05, + "loss": 4.4526, "step": 1665 }, { - "epoch": 0.09231619679380873, - "grad_norm": 2.4009783267974854, - "learning_rate": 4.667424673109722e-05, - "loss": 6.2655, + "epoch": 0.3692239663939863, + "grad_norm": 2.696753740310669, + "learning_rate": 3.5458612975391506e-05, + "loss": 4.3636, "step": 1670 }, { - "epoch": 0.09259259259259259, - "grad_norm": 3.517772674560547, - "learning_rate": 4.666003411028994e-05, - "loss": 6.1189, + "epoch": 0.37032942737121377, + "grad_norm": 2.463946580886841, + "learning_rate": 3.5396470295799155e-05, + "loss": 4.2369, "step": 1675 }, { - "epoch": 0.09286898839137644, - "grad_norm": 2.677375555038452, - "learning_rate": 4.664582148948266e-05, - "loss": 6.2443, + "epoch": 0.3714348883484413, + "grad_norm": 2.948925018310547, + "learning_rate": 3.533432761620681e-05, + "loss": 4.4674, "step": 1680 }, { - "epoch": 0.09314538419016032, - "grad_norm": 3.299330949783325, - "learning_rate": 4.663160886867539e-05, - "loss": 5.8239, + "epoch": 0.3725403493256688, + "grad_norm": 2.914759874343872, + "learning_rate": 3.527218493661447e-05, + "loss": 4.2563, "step": 1685 }, { - "epoch": 0.09342177998894417, - "grad_norm": 2.366581916809082, - "learning_rate": 4.661739624786811e-05, - "loss": 6.244, + "epoch": 0.3736458103028963, + "grad_norm": 2.562021255493164, + "learning_rate": 3.5210042257022124e-05, + "loss": 4.2267, "step": 1690 }, { - "epoch": 0.09369817578772803, - "grad_norm": 2.2254860401153564, - "learning_rate": 4.660318362706083e-05, - "loss": 6.2237, + "epoch": 0.3747512712801238, + "grad_norm": 2.4976344108581543, + "learning_rate": 3.514789957742978e-05, + "loss": 4.3459, "step": 1695 }, { - "epoch": 0.09397457158651189, - "grad_norm": 2.3326756954193115, - "learning_rate": 4.6588971006253554e-05, - "loss": 6.008, + "epoch": 0.37585673225735133, + "grad_norm": 2.656845808029175, + "learning_rate": 3.5085756897837436e-05, + "loss": 4.2767, "step": 1700 }, { - "epoch": 0.09425096738529574, - "grad_norm": 2.39668345451355, - "learning_rate": 4.657475838544628e-05, - "loss": 6.4698, + "epoch": 0.3769621932345788, + "grad_norm": 2.6122493743896484, + "learning_rate": 3.502361421824509e-05, + "loss": 4.2535, "step": 1705 }, { - "epoch": 0.0945273631840796, - "grad_norm": 2.79451584815979, - "learning_rate": 4.6560545764639e-05, - "loss": 6.0543, + "epoch": 0.3780676542118063, + "grad_norm": 2.7145111560821533, + "learning_rate": 3.496147153865275e-05, + "loss": 4.373, "step": 1710 }, { - "epoch": 0.09480375898286346, - "grad_norm": 3.0504002571105957, - "learning_rate": 4.6546333143831726e-05, - "loss": 5.9177, + "epoch": 0.37917311518903385, + "grad_norm": 2.6271467208862305, + "learning_rate": 3.4899328859060405e-05, + "loss": 4.2728, "step": 1715 }, { - "epoch": 0.09508015478164732, - "grad_norm": 2.6137356758117676, - "learning_rate": 4.653212052302445e-05, - "loss": 6.0108, + "epoch": 0.3802785761662613, + "grad_norm": 2.350149631500244, + "learning_rate": 3.483718617946806e-05, + "loss": 4.1621, "step": 1720 }, { - "epoch": 0.09535655058043117, - "grad_norm": 2.320517063140869, - "learning_rate": 4.6517907902217174e-05, - "loss": 5.9592, + "epoch": 0.38138403714348884, + "grad_norm": 2.773153305053711, + "learning_rate": 3.477504349987572e-05, + "loss": 4.3022, "step": 1725 }, { - "epoch": 0.09563294637921503, - "grad_norm": 2.4836413860321045, - "learning_rate": 4.650369528140989e-05, - "loss": 6.1646, + "epoch": 0.38248949812071636, + "grad_norm": 2.8574771881103516, + "learning_rate": 3.471290082028337e-05, + "loss": 4.2579, "step": 1730 }, { - "epoch": 0.09590934217799889, - "grad_norm": 2.3726565837860107, - "learning_rate": 4.6489482660602615e-05, - "loss": 6.0548, + "epoch": 0.38359495909794383, + "grad_norm": 2.725560426712036, + "learning_rate": 3.465075814069103e-05, + "loss": 4.2797, "step": 1735 }, { - "epoch": 0.09618573797678276, - "grad_norm": 3.755805492401123, - "learning_rate": 4.647527003979534e-05, - "loss": 6.1781, + "epoch": 0.38470042007517136, + "grad_norm": 2.513237476348877, + "learning_rate": 3.4588615461098686e-05, + "loss": 4.405, "step": 1740 }, { - "epoch": 0.09646213377556662, - "grad_norm": 2.5394093990325928, - "learning_rate": 4.646105741898806e-05, - "loss": 5.9294, + "epoch": 0.3858058810523988, + "grad_norm": 2.718583822250366, + "learning_rate": 3.452647278150634e-05, + "loss": 4.2946, "step": 1745 }, { - "epoch": 0.09673852957435047, - "grad_norm": 3.1130166053771973, - "learning_rate": 4.644684479818079e-05, - "loss": 6.0655, + "epoch": 0.38691134202962635, + "grad_norm": 2.4899282455444336, + "learning_rate": 3.446433010191399e-05, + "loss": 4.269, "step": 1750 }, { - "epoch": 0.09701492537313433, - "grad_norm": 3.218590497970581, - "learning_rate": 4.643263217737351e-05, - "loss": 6.2316, + "epoch": 0.38801680300685387, + "grad_norm": 2.5338146686553955, + "learning_rate": 3.4402187422321654e-05, + "loss": 4.4835, "step": 1755 }, { - "epoch": 0.09729132117191819, - "grad_norm": 3.100757598876953, - "learning_rate": 4.6418419556566235e-05, - "loss": 6.105, + "epoch": 0.38912226398408134, + "grad_norm": 2.3587207794189453, + "learning_rate": 3.434004474272931e-05, + "loss": 4.1855, "step": 1760 }, { - "epoch": 0.09756771697070205, - "grad_norm": 2.9586379528045654, - "learning_rate": 4.640420693575895e-05, - "loss": 6.3479, + "epoch": 0.39022772496130886, + "grad_norm": 2.939471960067749, + "learning_rate": 3.427790206313696e-05, + "loss": 4.31, "step": 1765 }, { - "epoch": 0.0978441127694859, - "grad_norm": 2.569976806640625, - "learning_rate": 4.6389994314951676e-05, - "loss": 6.2001, + "epoch": 0.3913331859385364, + "grad_norm": 2.79874324798584, + "learning_rate": 3.421575938354462e-05, + "loss": 4.2398, "step": 1770 }, { - "epoch": 0.09812050856826976, - "grad_norm": 2.5895392894744873, - "learning_rate": 4.637578169414441e-05, - "loss": 6.2953, + "epoch": 0.39243864691576386, + "grad_norm": 2.5179383754730225, + "learning_rate": 3.415361670395227e-05, + "loss": 4.2628, "step": 1775 }, { - "epoch": 0.09839690436705362, - "grad_norm": 3.0334362983703613, - "learning_rate": 4.6361569073337124e-05, - "loss": 6.1325, + "epoch": 0.3935441078929914, + "grad_norm": 2.731872797012329, + "learning_rate": 3.4091474024359935e-05, + "loss": 4.3159, "step": 1780 }, { - "epoch": 0.09867330016583747, - "grad_norm": 3.2410378456115723, - "learning_rate": 4.634735645252985e-05, - "loss": 6.1757, + "epoch": 0.3946495688702189, + "grad_norm": 2.5067148208618164, + "learning_rate": 3.402933134476759e-05, + "loss": 4.4061, "step": 1785 }, { - "epoch": 0.09894969596462133, - "grad_norm": 2.2880196571350098, - "learning_rate": 4.633314383172257e-05, - "loss": 6.1355, + "epoch": 0.39575502984744637, + "grad_norm": 2.3916046619415283, + "learning_rate": 3.396718866517524e-05, + "loss": 4.2791, "step": 1790 }, { - "epoch": 0.0992260917634052, - "grad_norm": 3.8285810947418213, - "learning_rate": 4.6318931210915296e-05, - "loss": 6.078, + "epoch": 0.3968604908246739, + "grad_norm": 2.6597490310668945, + "learning_rate": 3.3905045985582904e-05, + "loss": 4.4391, "step": 1795 }, { - "epoch": 0.09950248756218906, - "grad_norm": 2.5488243103027344, - "learning_rate": 4.630471859010802e-05, - "loss": 5.956, + "epoch": 0.3979659518019014, + "grad_norm": 2.5750606060028076, + "learning_rate": 3.384290330599056e-05, + "loss": 4.1806, "step": 1800 }, { - "epoch": 0.09977888336097292, - "grad_norm": 2.4518303871154785, - "learning_rate": 4.629050596930074e-05, - "loss": 6.0628, + "epoch": 0.3990714127791289, + "grad_norm": 2.561917781829834, + "learning_rate": 3.378076062639821e-05, + "loss": 4.4584, "step": 1805 }, { - "epoch": 0.10005527915975677, - "grad_norm": 3.043971538543701, - "learning_rate": 4.627629334849347e-05, - "loss": 6.2971, + "epoch": 0.4001768737563564, + "grad_norm": 2.576657772064209, + "learning_rate": 3.371861794680587e-05, + "loss": 4.1388, "step": 1810 }, { - "epoch": 0.10033167495854063, - "grad_norm": 3.071781873703003, - "learning_rate": 4.6262080727686185e-05, - "loss": 6.139, + "epoch": 0.4012823347335839, + "grad_norm": 2.5817503929138184, + "learning_rate": 3.365647526721352e-05, + "loss": 4.3074, "step": 1815 }, { - "epoch": 0.10060807075732449, - "grad_norm": 3.2873592376708984, - "learning_rate": 4.624786810687891e-05, - "loss": 6.1132, + "epoch": 0.4023877957108114, + "grad_norm": 2.4846079349517822, + "learning_rate": 3.359433258762118e-05, + "loss": 4.3061, "step": 1820 }, { - "epoch": 0.10088446655610835, - "grad_norm": 3.439819574356079, - "learning_rate": 4.623365548607163e-05, - "loss": 6.1686, + "epoch": 0.4034932566880389, + "grad_norm": 2.833554744720459, + "learning_rate": 3.353218990802884e-05, + "loss": 4.4506, "step": 1825 }, { - "epoch": 0.1011608623548922, - "grad_norm": 3.1802821159362793, - "learning_rate": 4.621944286526436e-05, - "loss": 5.9691, + "epoch": 0.4045987176652664, + "grad_norm": 2.6276683807373047, + "learning_rate": 3.347004722843649e-05, + "loss": 4.3484, "step": 1830 }, { - "epoch": 0.10143725815367606, - "grad_norm": 2.665397882461548, - "learning_rate": 4.620523024445708e-05, - "loss": 6.2404, + "epoch": 0.4057041786424939, + "grad_norm": 2.6111786365509033, + "learning_rate": 3.3407904548844146e-05, + "loss": 4.4257, "step": 1835 }, { - "epoch": 0.10171365395245992, - "grad_norm": 2.8647611141204834, - "learning_rate": 4.61910176236498e-05, - "loss": 6.1963, + "epoch": 0.40680963961972144, + "grad_norm": 2.813497304916382, + "learning_rate": 3.33457618692518e-05, + "loss": 4.3713, "step": 1840 }, { - "epoch": 0.10199004975124377, - "grad_norm": 3.0262868404388428, - "learning_rate": 4.617680500284253e-05, - "loss": 6.1664, + "epoch": 0.4079151005969489, + "grad_norm": 2.7521538734436035, + "learning_rate": 3.328361918965946e-05, + "loss": 4.4385, "step": 1845 }, { - "epoch": 0.10226644555002765, - "grad_norm": 3.05643367767334, - "learning_rate": 4.6162592382035246e-05, - "loss": 6.2904, + "epoch": 0.40902056157417643, + "grad_norm": 2.503818988800049, + "learning_rate": 3.3221476510067115e-05, + "loss": 4.4288, "step": 1850 }, { - "epoch": 0.1025428413488115, - "grad_norm": 2.7000110149383545, - "learning_rate": 4.614837976122797e-05, - "loss": 6.0892, + "epoch": 0.41012602255140396, + "grad_norm": 2.3562381267547607, + "learning_rate": 3.315933383047477e-05, + "loss": 4.2368, "step": 1855 }, { - "epoch": 0.10281923714759536, - "grad_norm": 2.893402338027954, - "learning_rate": 4.6134167140420694e-05, - "loss": 5.8826, + "epoch": 0.4112314835286314, + "grad_norm": 2.526411294937134, + "learning_rate": 3.309719115088243e-05, + "loss": 4.3008, "step": 1860 }, { - "epoch": 0.10309563294637922, - "grad_norm": 2.5244555473327637, - "learning_rate": 4.611995451961342e-05, - "loss": 5.9429, + "epoch": 0.41233694450585895, + "grad_norm": 2.6222381591796875, + "learning_rate": 3.303504847129008e-05, + "loss": 4.1532, "step": 1865 }, { - "epoch": 0.10337202874516307, - "grad_norm": 2.5196962356567383, - "learning_rate": 4.610574189880614e-05, - "loss": 5.8624, + "epoch": 0.4134424054830865, + "grad_norm": 2.6735141277313232, + "learning_rate": 3.297290579169774e-05, + "loss": 4.2497, "step": 1870 }, { - "epoch": 0.10364842454394693, - "grad_norm": 2.5912246704101562, - "learning_rate": 4.609152927799886e-05, - "loss": 6.1336, + "epoch": 0.41454786646031394, + "grad_norm": 2.612273931503296, + "learning_rate": 3.2910763112105396e-05, + "loss": 4.365, "step": 1875 }, { - "epoch": 0.10392482034273079, - "grad_norm": 2.757858991622925, - "learning_rate": 4.607731665719159e-05, - "loss": 6.0385, + "epoch": 0.41565332743754146, + "grad_norm": 2.7102086544036865, + "learning_rate": 3.284862043251305e-05, + "loss": 4.2006, "step": 1880 }, { - "epoch": 0.10420121614151465, - "grad_norm": 2.6587064266204834, - "learning_rate": 4.6063104036384314e-05, - "loss": 6.0566, + "epoch": 0.41675878841476893, + "grad_norm": 2.8893067836761475, + "learning_rate": 3.278647775292071e-05, + "loss": 4.4635, "step": 1885 }, { - "epoch": 0.1044776119402985, - "grad_norm": 3.415855646133423, - "learning_rate": 4.604889141557703e-05, - "loss": 6.4943, + "epoch": 0.41786424939199646, + "grad_norm": 2.6870336532592773, + "learning_rate": 3.2724335073328364e-05, + "loss": 4.3284, "step": 1890 }, { - "epoch": 0.10475400773908236, - "grad_norm": 2.9792914390563965, - "learning_rate": 4.603467879476976e-05, - "loss": 5.8498, + "epoch": 0.418969710369224, + "grad_norm": 2.454735279083252, + "learning_rate": 3.266219239373602e-05, + "loss": 4.2499, "step": 1895 }, { - "epoch": 0.10503040353786622, - "grad_norm": 3.112447500228882, - "learning_rate": 4.602046617396248e-05, - "loss": 6.2611, + "epoch": 0.42007517134645145, + "grad_norm": 2.5673999786376953, + "learning_rate": 3.2600049714143676e-05, + "loss": 4.3258, "step": 1900 }, { - "epoch": 0.10530679933665009, - "grad_norm": 2.8653833866119385, - "learning_rate": 4.60062535531552e-05, - "loss": 6.0917, + "epoch": 0.42118063232367897, + "grad_norm": 2.435605049133301, + "learning_rate": 3.253790703455133e-05, + "loss": 4.2839, "step": 1905 }, { - "epoch": 0.10558319513543395, - "grad_norm": 2.7462995052337646, - "learning_rate": 4.599204093234793e-05, - "loss": 6.2313, + "epoch": 0.4222860933009065, + "grad_norm": 2.7508575916290283, + "learning_rate": 3.247576435495899e-05, + "loss": 4.4643, "step": 1910 }, { - "epoch": 0.1058595909342178, - "grad_norm": 2.4381167888641357, - "learning_rate": 4.597782831154065e-05, - "loss": 6.1274, + "epoch": 0.42339155427813396, + "grad_norm": 2.5757343769073486, + "learning_rate": 3.2413621675366645e-05, + "loss": 4.1323, "step": 1915 }, { - "epoch": 0.10613598673300166, - "grad_norm": 3.3550705909729004, - "learning_rate": 4.5963615690733375e-05, - "loss": 6.246, + "epoch": 0.4244970152553615, + "grad_norm": 2.409933567047119, + "learning_rate": 3.23514789957743e-05, + "loss": 4.2882, "step": 1920 }, { - "epoch": 0.10641238253178552, - "grad_norm": 2.736597776412964, - "learning_rate": 4.594940306992609e-05, - "loss": 6.3576, + "epoch": 0.425602476232589, + "grad_norm": 2.4064886569976807, + "learning_rate": 3.228933631618196e-05, + "loss": 4.3503, "step": 1925 }, { - "epoch": 0.10668877833056938, - "grad_norm": 2.4143283367156982, - "learning_rate": 4.593519044911882e-05, - "loss": 5.9825, + "epoch": 0.4267079372098165, + "grad_norm": 2.539107322692871, + "learning_rate": 3.222719363658961e-05, + "loss": 4.3415, "step": 1930 }, { - "epoch": 0.10696517412935323, - "grad_norm": 2.622124195098877, - "learning_rate": 4.592097782831155e-05, - "loss": 6.1076, + "epoch": 0.427813398187044, + "grad_norm": 2.70954966545105, + "learning_rate": 3.216505095699727e-05, + "loss": 4.3901, "step": 1935 }, { - "epoch": 0.10724156992813709, - "grad_norm": 3.135380744934082, - "learning_rate": 4.5906765207504264e-05, - "loss": 6.0845, + "epoch": 0.4289188591642715, + "grad_norm": 2.902268171310425, + "learning_rate": 3.2102908277404926e-05, + "loss": 4.3829, "step": 1940 }, { - "epoch": 0.10751796572692095, - "grad_norm": 2.970335006713867, - "learning_rate": 4.589255258669699e-05, - "loss": 6.1974, + "epoch": 0.430024320141499, + "grad_norm": 2.919811487197876, + "learning_rate": 3.2040765597812575e-05, + "loss": 4.3388, "step": 1945 }, { - "epoch": 0.1077943615257048, - "grad_norm": 4.032550811767578, - "learning_rate": 4.587833996588971e-05, - "loss": 6.0646, + "epoch": 0.4311297811187265, + "grad_norm": 2.765904188156128, + "learning_rate": 3.197862291822024e-05, + "loss": 4.2619, "step": 1950 }, { - "epoch": 0.10807075732448866, - "grad_norm": 2.632209300994873, - "learning_rate": 4.5864127345082436e-05, - "loss": 5.8096, + "epoch": 0.432235242095954, + "grad_norm": 2.6072490215301514, + "learning_rate": 3.1916480238627894e-05, + "loss": 4.272, "step": 1955 }, { - "epoch": 0.10834715312327253, - "grad_norm": 2.648122549057007, - "learning_rate": 4.5849914724275154e-05, - "loss": 6.139, + "epoch": 0.4333407030731815, + "grad_norm": 2.694185256958008, + "learning_rate": 3.1854337559035544e-05, + "loss": 4.3295, "step": 1960 }, { - "epoch": 0.10862354892205639, - "grad_norm": 2.7293598651885986, - "learning_rate": 4.5835702103467884e-05, - "loss": 5.9382, + "epoch": 0.43444616405040903, + "grad_norm": 2.6962716579437256, + "learning_rate": 3.179219487944321e-05, + "loss": 4.2222, "step": 1965 }, { - "epoch": 0.10889994472084025, - "grad_norm": 2.997490167617798, - "learning_rate": 4.582148948266061e-05, - "loss": 6.1471, + "epoch": 0.4355516250276365, + "grad_norm": 2.681506395339966, + "learning_rate": 3.1730052199850856e-05, + "loss": 4.3914, "step": 1970 }, { - "epoch": 0.1091763405196241, - "grad_norm": 3.3765926361083984, - "learning_rate": 4.5807276861853326e-05, - "loss": 6.2572, + "epoch": 0.436657086004864, + "grad_norm": 2.792881488800049, + "learning_rate": 3.166790952025851e-05, + "loss": 4.4958, "step": 1975 }, { - "epoch": 0.10945273631840796, - "grad_norm": 2.962548017501831, - "learning_rate": 4.579306424104605e-05, - "loss": 6.0159, + "epoch": 0.43776254698209155, + "grad_norm": 2.6680564880371094, + "learning_rate": 3.1605766840666175e-05, + "loss": 4.3593, "step": 1980 }, { - "epoch": 0.10972913211719182, - "grad_norm": 2.896846294403076, - "learning_rate": 4.5778851620238773e-05, - "loss": 5.6825, + "epoch": 0.438868007959319, + "grad_norm": 2.7864387035369873, + "learning_rate": 3.1543624161073825e-05, + "loss": 4.3489, "step": 1985 }, { - "epoch": 0.11000552791597568, - "grad_norm": 3.3139047622680664, - "learning_rate": 4.57646389994315e-05, - "loss": 6.0216, + "epoch": 0.43997346893654654, + "grad_norm": 2.5795204639434814, + "learning_rate": 3.148148148148148e-05, + "loss": 4.269, "step": 1990 }, { - "epoch": 0.11028192371475953, - "grad_norm": 2.7148447036743164, - "learning_rate": 4.575042637862422e-05, - "loss": 6.1073, + "epoch": 0.44107892991377406, + "grad_norm": 2.851243019104004, + "learning_rate": 3.141933880188914e-05, + "loss": 4.3883, "step": 1995 }, { - "epoch": 0.11055831951354339, - "grad_norm": 2.597428798675537, - "learning_rate": 4.5736213757816945e-05, - "loss": 6.2397, + "epoch": 0.44218439089100153, + "grad_norm": 2.732250452041626, + "learning_rate": 3.135719612229679e-05, + "loss": 4.2467, "step": 2000 }, { - "epoch": 0.11083471531232725, - "grad_norm": 3.059511184692383, - "learning_rate": 4.572200113700967e-05, - "loss": 6.2985, + "epoch": 0.44328985186822906, + "grad_norm": 2.4607598781585693, + "learning_rate": 3.1295053442704456e-05, + "loss": 4.3155, "step": 2005 }, { - "epoch": 0.1111111111111111, - "grad_norm": 2.423731803894043, - "learning_rate": 4.570778851620239e-05, - "loss": 6.2662, + "epoch": 0.4443953128454566, + "grad_norm": 2.546980857849121, + "learning_rate": 3.1232910763112105e-05, + "loss": 4.3949, "step": 2010 }, { - "epoch": 0.11138750690989498, - "grad_norm": 2.665515899658203, - "learning_rate": 4.569357589539511e-05, - "loss": 6.1349, + "epoch": 0.44550077382268405, + "grad_norm": 2.734762191772461, + "learning_rate": 3.117076808351976e-05, + "loss": 4.46, "step": 2015 }, { - "epoch": 0.11166390270867883, - "grad_norm": 2.9362165927886963, - "learning_rate": 4.567936327458784e-05, - "loss": 6.1175, + "epoch": 0.4466062347999116, + "grad_norm": 2.5129942893981934, + "learning_rate": 3.110862540392742e-05, + "loss": 4.3879, "step": 2020 }, { - "epoch": 0.11194029850746269, - "grad_norm": 2.7762677669525146, - "learning_rate": 4.566515065378056e-05, - "loss": 5.9836, + "epoch": 0.44771169577713904, + "grad_norm": 2.644542694091797, + "learning_rate": 3.1046482724335074e-05, + "loss": 4.2476, "step": 2025 }, { - "epoch": 0.11221669430624655, - "grad_norm": 3.224057674407959, - "learning_rate": 4.565093803297328e-05, - "loss": 6.0051, + "epoch": 0.44881715675436656, + "grad_norm": 2.771726369857788, + "learning_rate": 3.098434004474273e-05, + "loss": 4.4844, "step": 2030 }, { - "epoch": 0.1124930901050304, - "grad_norm": 3.2076215744018555, - "learning_rate": 4.5636725412166007e-05, - "loss": 6.1282, + "epoch": 0.4499226177315941, + "grad_norm": 2.642275333404541, + "learning_rate": 3.0922197365150386e-05, + "loss": 4.3922, "step": 2035 }, { - "epoch": 0.11276948590381426, - "grad_norm": 2.4749698638916016, - "learning_rate": 4.562251279135873e-05, - "loss": 6.0949, + "epoch": 0.45102807870882156, + "grad_norm": 2.6931073665618896, + "learning_rate": 3.086005468555804e-05, + "loss": 4.3635, "step": 2040 }, { - "epoch": 0.11304588170259812, - "grad_norm": 2.6076836585998535, - "learning_rate": 4.5608300170551454e-05, - "loss": 6.1536, + "epoch": 0.4521335396860491, + "grad_norm": 2.4507226943969727, + "learning_rate": 3.07979120059657e-05, + "loss": 4.3413, "step": 2045 }, { - "epoch": 0.11332227750138198, - "grad_norm": 2.7696752548217773, - "learning_rate": 4.559408754974417e-05, - "loss": 6.0666, + "epoch": 0.4532390006632766, + "grad_norm": 2.632704496383667, + "learning_rate": 3.0735769326373355e-05, + "loss": 4.324, "step": 2050 }, { - "epoch": 0.11359867330016583, - "grad_norm": 2.964919328689575, - "learning_rate": 4.55798749289369e-05, - "loss": 6.0302, + "epoch": 0.45434446164050407, + "grad_norm": 2.6872873306274414, + "learning_rate": 3.067362664678101e-05, + "loss": 4.3887, "step": 2055 }, { - "epoch": 0.11387506909894969, - "grad_norm": 2.8098812103271484, - "learning_rate": 4.556566230812962e-05, - "loss": 6.0762, + "epoch": 0.4554499226177316, + "grad_norm": 2.8722641468048096, + "learning_rate": 3.061148396718867e-05, + "loss": 4.3594, "step": 2060 }, { - "epoch": 0.11415146489773355, - "grad_norm": 3.016932725906372, - "learning_rate": 4.5551449687322344e-05, - "loss": 5.993, + "epoch": 0.4565553835949591, + "grad_norm": 2.642021417617798, + "learning_rate": 3.054934128759632e-05, + "loss": 4.266, "step": 2065 }, { - "epoch": 0.11442786069651742, - "grad_norm": 2.170443058013916, - "learning_rate": 4.553723706651507e-05, - "loss": 6.0171, + "epoch": 0.4576608445721866, + "grad_norm": 2.8870849609375, + "learning_rate": 3.048719860800398e-05, + "loss": 4.4626, "step": 2070 }, { - "epoch": 0.11470425649530128, - "grad_norm": 2.617326259613037, - "learning_rate": 4.552302444570779e-05, - "loss": 5.916, + "epoch": 0.4587663055494141, + "grad_norm": 2.623518943786621, + "learning_rate": 3.0425055928411632e-05, + "loss": 4.3157, "step": 2075 }, { - "epoch": 0.11498065229408513, - "grad_norm": 2.2992942333221436, - "learning_rate": 4.5508811824900516e-05, - "loss": 6.2716, + "epoch": 0.45987176652664163, + "grad_norm": 2.5889763832092285, + "learning_rate": 3.0362913248819292e-05, + "loss": 4.2704, "step": 2080 }, { - "epoch": 0.11525704809286899, - "grad_norm": 2.6933460235595703, - "learning_rate": 4.549459920409323e-05, - "loss": 6.0187, + "epoch": 0.4609772275038691, + "grad_norm": 2.8086538314819336, + "learning_rate": 3.0300770569226945e-05, + "loss": 4.3561, "step": 2085 }, { - "epoch": 0.11553344389165285, - "grad_norm": 2.333048105239868, - "learning_rate": 4.5480386583285964e-05, - "loss": 6.0797, + "epoch": 0.4620826884810966, + "grad_norm": 2.896907091140747, + "learning_rate": 3.02386278896346e-05, + "loss": 4.4201, "step": 2090 }, { - "epoch": 0.1158098396904367, - "grad_norm": 2.7053120136260986, - "learning_rate": 4.546617396247868e-05, - "loss": 6.3752, + "epoch": 0.4631881494583241, + "grad_norm": 2.5891048908233643, + "learning_rate": 3.017648521004226e-05, + "loss": 4.2137, "step": 2095 }, { - "epoch": 0.11608623548922056, - "grad_norm": 2.7705891132354736, - "learning_rate": 4.5451961341671405e-05, - "loss": 5.9062, + "epoch": 0.4642936104355516, + "grad_norm": 2.5606133937835693, + "learning_rate": 3.0114342530449913e-05, + "loss": 4.3985, "step": 2100 }, { - "epoch": 0.11636263128800442, - "grad_norm": 2.7516586780548096, - "learning_rate": 4.543774872086413e-05, - "loss": 5.8142, + "epoch": 0.46539907141277914, + "grad_norm": 2.7957265377044678, + "learning_rate": 3.005219985085757e-05, + "loss": 4.395, "step": 2105 }, { - "epoch": 0.11663902708678828, - "grad_norm": 3.1628334522247314, - "learning_rate": 4.542353610005685e-05, - "loss": 6.151, + "epoch": 0.4665045323900066, + "grad_norm": 2.593770742416382, + "learning_rate": 2.999005717126523e-05, + "loss": 4.4711, "step": 2110 }, { - "epoch": 0.11691542288557213, - "grad_norm": 2.7654128074645996, - "learning_rate": 4.540932347924958e-05, - "loss": 6.0071, + "epoch": 0.46760999336723413, + "grad_norm": 2.482818603515625, + "learning_rate": 2.992791449167288e-05, + "loss": 4.2323, "step": 2115 }, { - "epoch": 0.11719181868435599, - "grad_norm": 2.2299458980560303, - "learning_rate": 4.5395110858442294e-05, - "loss": 5.9453, + "epoch": 0.46871545434446166, + "grad_norm": 2.972776174545288, + "learning_rate": 2.986577181208054e-05, + "loss": 4.3602, "step": 2120 }, { - "epoch": 0.11746821448313986, - "grad_norm": 2.7742066383361816, - "learning_rate": 4.5380898237635025e-05, - "loss": 6.0043, + "epoch": 0.4698209153216891, + "grad_norm": 2.5987308025360107, + "learning_rate": 2.980362913248819e-05, + "loss": 4.5967, "step": 2125 }, { - "epoch": 0.11774461028192372, - "grad_norm": 2.242952585220337, - "learning_rate": 4.536668561682775e-05, - "loss": 5.8624, + "epoch": 0.47092637629891665, + "grad_norm": 2.6634702682495117, + "learning_rate": 2.974148645289585e-05, + "loss": 4.1932, "step": 2130 }, { - "epoch": 0.11802100608070758, - "grad_norm": 3.177030086517334, - "learning_rate": 4.5352472996020466e-05, - "loss": 5.9175, + "epoch": 0.4720318372761442, + "grad_norm": 2.720262050628662, + "learning_rate": 2.967934377330351e-05, + "loss": 4.1392, "step": 2135 }, { - "epoch": 0.11829740187949143, - "grad_norm": 3.0436911582946777, - "learning_rate": 4.533826037521319e-05, - "loss": 6.0026, + "epoch": 0.47313729825337164, + "grad_norm": 2.9388368129730225, + "learning_rate": 2.9617201093711163e-05, + "loss": 4.2334, "step": 2140 }, { - "epoch": 0.11857379767827529, - "grad_norm": 3.5062520503997803, - "learning_rate": 4.5324047754405914e-05, - "loss": 6.2474, + "epoch": 0.47424275923059916, + "grad_norm": 2.426968812942505, + "learning_rate": 2.955505841411882e-05, + "loss": 4.1942, "step": 2145 }, { - "epoch": 0.11885019347705915, - "grad_norm": 2.291909694671631, - "learning_rate": 4.530983513359864e-05, - "loss": 6.2062, + "epoch": 0.4753482202078267, + "grad_norm": 2.53849458694458, + "learning_rate": 2.949291573452647e-05, + "loss": 4.4471, "step": 2150 }, { - "epoch": 0.119126589275843, - "grad_norm": 2.3270986080169678, - "learning_rate": 4.529562251279136e-05, - "loss": 6.2895, + "epoch": 0.47645368118505416, + "grad_norm": 2.7019786834716797, + "learning_rate": 2.943077305493413e-05, + "loss": 4.2433, "step": 2155 }, { - "epoch": 0.11940298507462686, - "grad_norm": 3.0153403282165527, - "learning_rate": 4.5281409891984086e-05, - "loss": 6.1369, + "epoch": 0.4775591421622817, + "grad_norm": 2.578589677810669, + "learning_rate": 2.9368630375341787e-05, + "loss": 4.2682, "step": 2160 }, { - "epoch": 0.11967938087341072, - "grad_norm": 2.496553897857666, - "learning_rate": 4.526719727117681e-05, - "loss": 6.1861, + "epoch": 0.47866460313950915, + "grad_norm": 2.7424092292785645, + "learning_rate": 2.930648769574944e-05, + "loss": 4.4, "step": 2165 }, { - "epoch": 0.11995577667219458, - "grad_norm": 2.5240025520324707, - "learning_rate": 4.525298465036953e-05, - "loss": 6.3099, + "epoch": 0.47977006411673667, + "grad_norm": 2.6316614151000977, + "learning_rate": 2.92443450161571e-05, + "loss": 4.249, "step": 2170 }, { - "epoch": 0.12023217247097844, - "grad_norm": 2.7190871238708496, - "learning_rate": 4.523877202956225e-05, - "loss": 5.737, + "epoch": 0.4808755250939642, + "grad_norm": 2.757974624633789, + "learning_rate": 2.9182202336564756e-05, + "loss": 4.3832, "step": 2175 }, { - "epoch": 0.1205085682697623, - "grad_norm": 2.7631752490997314, - "learning_rate": 4.5224559408754975e-05, - "loss": 6.1848, + "epoch": 0.48198098607119166, + "grad_norm": 2.591416597366333, + "learning_rate": 2.912005965697241e-05, + "loss": 4.4295, "step": 2180 }, { - "epoch": 0.12078496406854616, - "grad_norm": 2.1841232776641846, - "learning_rate": 4.52103467879477e-05, - "loss": 6.0631, + "epoch": 0.4830864470484192, + "grad_norm": 2.576218605041504, + "learning_rate": 2.9057916977380068e-05, + "loss": 4.3352, "step": 2185 }, { - "epoch": 0.12106135986733002, - "grad_norm": 2.531573534011841, - "learning_rate": 4.519613416714042e-05, - "loss": 6.1404, + "epoch": 0.4841919080256467, + "grad_norm": 2.5569541454315186, + "learning_rate": 2.899577429778772e-05, + "loss": 4.1921, "step": 2190 }, { - "epoch": 0.12133775566611388, - "grad_norm": 3.5655975341796875, - "learning_rate": 4.518192154633315e-05, - "loss": 6.1925, + "epoch": 0.4852973690028742, + "grad_norm": 2.489694118499756, + "learning_rate": 2.8933631618195377e-05, + "loss": 4.3463, "step": 2195 }, { - "epoch": 0.12161415146489774, - "grad_norm": 2.4978582859039307, - "learning_rate": 4.516770892552587e-05, - "loss": 5.9768, + "epoch": 0.4864028299801017, + "grad_norm": 2.486515522003174, + "learning_rate": 2.8871488938603037e-05, + "loss": 4.217, "step": 2200 }, { - "epoch": 0.12189054726368159, - "grad_norm": 2.800384759902954, - "learning_rate": 4.515349630471859e-05, - "loss": 6.1647, + "epoch": 0.4875082909573292, + "grad_norm": 2.6798512935638428, + "learning_rate": 2.880934625901069e-05, + "loss": 4.3241, "step": 2205 }, { - "epoch": 0.12216694306246545, - "grad_norm": 3.2375316619873047, - "learning_rate": 4.513928368391132e-05, - "loss": 5.9686, + "epoch": 0.4886137519345567, + "grad_norm": 2.582374095916748, + "learning_rate": 2.8747203579418346e-05, + "loss": 4.3155, "step": 2210 }, { - "epoch": 0.1224433388612493, - "grad_norm": 3.0477945804595947, - "learning_rate": 4.512507106310404e-05, - "loss": 5.8481, + "epoch": 0.4897192129117842, + "grad_norm": 2.598309278488159, + "learning_rate": 2.8685060899826e-05, + "loss": 4.3281, "step": 2215 }, { - "epoch": 0.12271973466003316, - "grad_norm": 2.7675116062164307, - "learning_rate": 4.511085844229676e-05, - "loss": 6.1965, + "epoch": 0.49082467388901174, + "grad_norm": 2.5720064640045166, + "learning_rate": 2.8622918220233658e-05, + "loss": 4.3937, "step": 2220 }, { - "epoch": 0.12299613045881702, - "grad_norm": 3.220231056213379, - "learning_rate": 4.5096645821489484e-05, - "loss": 6.0191, + "epoch": 0.4919301348662392, + "grad_norm": 2.4057793617248535, + "learning_rate": 2.8560775540641317e-05, + "loss": 4.2625, "step": 2225 }, { - "epoch": 0.12327252625760088, - "grad_norm": 2.473461389541626, - "learning_rate": 4.508243320068221e-05, - "loss": 5.7706, + "epoch": 0.49303559584346673, + "grad_norm": 2.5601112842559814, + "learning_rate": 2.8498632861048967e-05, + "loss": 4.2416, "step": 2230 }, { - "epoch": 0.12354892205638475, - "grad_norm": 2.950104236602783, - "learning_rate": 4.506822057987493e-05, - "loss": 6.0164, + "epoch": 0.4941410568206942, + "grad_norm": 2.621948003768921, + "learning_rate": 2.8436490181456626e-05, + "loss": 4.439, "step": 2235 }, { - "epoch": 0.1238253178551686, - "grad_norm": 3.0107226371765137, - "learning_rate": 4.5054007959067656e-05, - "loss": 5.9676, + "epoch": 0.4952465177979217, + "grad_norm": 2.5221333503723145, + "learning_rate": 2.837434750186428e-05, + "loss": 4.3375, "step": 2240 }, { - "epoch": 0.12410171365395246, - "grad_norm": 3.3894078731536865, - "learning_rate": 4.503979533826038e-05, - "loss": 5.9496, + "epoch": 0.49635197877514925, + "grad_norm": 2.555539608001709, + "learning_rate": 2.831220482227194e-05, + "loss": 4.3071, "step": 2245 }, { - "epoch": 0.12437810945273632, - "grad_norm": 2.7371933460235596, - "learning_rate": 4.5025582717453104e-05, - "loss": 5.9917, + "epoch": 0.4974574397523767, + "grad_norm": 2.71470308303833, + "learning_rate": 2.8250062142679595e-05, + "loss": 4.3431, "step": 2250 }, { - "epoch": 0.12465450525152018, - "grad_norm": 2.829718589782715, - "learning_rate": 4.501137009664582e-05, - "loss": 5.881, + "epoch": 0.49856290072960424, + "grad_norm": 2.731353759765625, + "learning_rate": 2.8187919463087248e-05, + "loss": 4.4328, "step": 2255 }, { - "epoch": 0.12493090105030404, - "grad_norm": 2.5616791248321533, - "learning_rate": 4.4997157475838545e-05, - "loss": 5.8512, + "epoch": 0.49966836170683177, + "grad_norm": 2.527031183242798, + "learning_rate": 2.8125776783494907e-05, + "loss": 4.3326, "step": 2260 }, { - "epoch": 0.1252072968490879, - "grad_norm": 2.7623887062072754, - "learning_rate": 4.4982944855031276e-05, - "loss": 6.1254, + "epoch": 0.5007738226840592, + "grad_norm": 2.539781332015991, + "learning_rate": 2.8063634103902563e-05, + "loss": 4.3398, "step": 2265 }, { - "epoch": 0.12548369264787176, - "grad_norm": 2.115161895751953, - "learning_rate": 4.496873223422399e-05, - "loss": 5.9236, + "epoch": 0.5018792836612868, + "grad_norm": 2.465778350830078, + "learning_rate": 2.8001491424310216e-05, + "loss": 4.1966, "step": 2270 }, { - "epoch": 0.12576008844665562, - "grad_norm": 2.32071590423584, - "learning_rate": 4.495451961341672e-05, - "loss": 5.8425, + "epoch": 0.5029847446385143, + "grad_norm": 2.610877513885498, + "learning_rate": 2.7939348744717876e-05, + "loss": 4.4339, "step": 2275 }, { - "epoch": 0.12603648424543948, - "grad_norm": 3.4130492210388184, - "learning_rate": 4.494030699260944e-05, - "loss": 5.8947, + "epoch": 0.5040902056157418, + "grad_norm": 2.833237409591675, + "learning_rate": 2.787720606512553e-05, + "loss": 4.258, "step": 2280 }, { - "epoch": 0.12631288004422334, - "grad_norm": 2.7116572856903076, - "learning_rate": 4.4926094371802165e-05, - "loss": 6.2554, + "epoch": 0.5051956665929692, + "grad_norm": 2.681429386138916, + "learning_rate": 2.7815063385533185e-05, + "loss": 4.3174, "step": 2285 }, { - "epoch": 0.1265892758430072, - "grad_norm": 2.960740804672241, - "learning_rate": 4.491188175099488e-05, - "loss": 5.835, + "epoch": 0.5063011275701967, + "grad_norm": 2.621767044067383, + "learning_rate": 2.7752920705940844e-05, + "loss": 4.3556, "step": 2290 }, { - "epoch": 0.12686567164179105, - "grad_norm": 2.641920804977417, - "learning_rate": 4.4897669130187606e-05, - "loss": 6.0452, + "epoch": 0.5074065885474243, + "grad_norm": 2.3988664150238037, + "learning_rate": 2.7690778026348497e-05, + "loss": 4.4304, "step": 2295 }, { - "epoch": 0.1271420674405749, - "grad_norm": 2.40816068649292, - "learning_rate": 4.488345650938034e-05, - "loss": 6.1682, + "epoch": 0.5085120495246518, + "grad_norm": 2.6011765003204346, + "learning_rate": 2.7628635346756153e-05, + "loss": 4.3996, "step": 2300 }, { - "epoch": 0.12741846323935876, - "grad_norm": 2.953784942626953, - "learning_rate": 4.4869243888573054e-05, - "loss": 5.8982, + "epoch": 0.5096175105018793, + "grad_norm": 2.5418872833251953, + "learning_rate": 2.7566492667163806e-05, + "loss": 4.3227, "step": 2305 }, { - "epoch": 0.12769485903814262, - "grad_norm": 2.7458655834198, - "learning_rate": 4.485503126776578e-05, - "loss": 5.7274, + "epoch": 0.5107229714791068, + "grad_norm": 2.7040741443634033, + "learning_rate": 2.7504349987571466e-05, + "loss": 4.3522, "step": 2310 }, { - "epoch": 0.12797125483692648, - "grad_norm": 2.4384260177612305, - "learning_rate": 4.48408186469585e-05, - "loss": 5.8491, + "epoch": 0.5118284324563342, + "grad_norm": 2.4782514572143555, + "learning_rate": 2.7442207307979122e-05, + "loss": 4.2093, "step": 2315 }, { - "epoch": 0.12824765063571034, - "grad_norm": 3.5330193042755127, - "learning_rate": 4.4826606026151226e-05, - "loss": 6.1105, + "epoch": 0.5129338934335618, + "grad_norm": 2.709933042526245, + "learning_rate": 2.7380064628386775e-05, + "loss": 4.3424, "step": 2320 }, { - "epoch": 0.1285240464344942, - "grad_norm": 3.598749876022339, - "learning_rate": 4.481239340534395e-05, - "loss": 5.7398, + "epoch": 0.5140393544107893, + "grad_norm": 3.0086729526519775, + "learning_rate": 2.7317921948794434e-05, + "loss": 4.5041, "step": 2325 }, { - "epoch": 0.12880044223327805, - "grad_norm": 2.857062339782715, - "learning_rate": 4.479818078453667e-05, - "loss": 6.0652, + "epoch": 0.5151448153880168, + "grad_norm": 2.5372843742370605, + "learning_rate": 2.725577926920209e-05, + "loss": 4.3018, "step": 2330 }, { - "epoch": 0.1290768380320619, - "grad_norm": 2.578871250152588, - "learning_rate": 4.47839681637294e-05, - "loss": 5.9814, + "epoch": 0.5162502763652443, + "grad_norm": 2.94974684715271, + "learning_rate": 2.7193636589609743e-05, + "loss": 4.2941, "step": 2335 }, { - "epoch": 0.12935323383084577, - "grad_norm": 3.088256597518921, - "learning_rate": 4.4769755542922115e-05, - "loss": 5.8343, + "epoch": 0.5173557373424718, + "grad_norm": 2.7399137020111084, + "learning_rate": 2.7131493910017403e-05, + "loss": 4.2627, "step": 2340 }, { - "epoch": 0.12962962962962962, - "grad_norm": 2.48786997795105, - "learning_rate": 4.475554292211484e-05, - "loss": 5.9264, + "epoch": 0.5184611983196993, + "grad_norm": 2.6174683570861816, + "learning_rate": 2.7069351230425055e-05, + "loss": 4.2011, "step": 2345 }, { - "epoch": 0.12990602542841348, - "grad_norm": 2.87296199798584, - "learning_rate": 4.474133030130756e-05, - "loss": 5.8988, + "epoch": 0.5195666592969268, + "grad_norm": 2.434396266937256, + "learning_rate": 2.7007208550832715e-05, + "loss": 4.2168, "step": 2350 }, { - "epoch": 0.13018242122719734, - "grad_norm": 3.202390193939209, - "learning_rate": 4.472711768050029e-05, - "loss": 5.7166, + "epoch": 0.5206721202741543, + "grad_norm": 2.5760498046875, + "learning_rate": 2.694506587124037e-05, + "loss": 4.3722, "step": 2355 }, { - "epoch": 0.1304588170259812, - "grad_norm": 3.6477270126342773, - "learning_rate": 4.471290505969301e-05, - "loss": 5.7914, + "epoch": 0.5217775812513819, + "grad_norm": 2.616143226623535, + "learning_rate": 2.6882923191648024e-05, + "loss": 4.1671, "step": 2360 }, { - "epoch": 0.13073521282476505, - "grad_norm": 2.6237668991088867, - "learning_rate": 4.469869243888573e-05, - "loss": 5.9484, + "epoch": 0.5228830422286094, + "grad_norm": 2.406928539276123, + "learning_rate": 2.6820780512055683e-05, + "loss": 4.2319, "step": 2365 }, { - "epoch": 0.1310116086235489, - "grad_norm": 2.782205104827881, - "learning_rate": 4.468447981807846e-05, - "loss": 6.0338, + "epoch": 0.5239885032058368, + "grad_norm": 2.4793832302093506, + "learning_rate": 2.6758637832463336e-05, + "loss": 4.2182, "step": 2370 }, { - "epoch": 0.1312880044223328, - "grad_norm": 2.7849576473236084, - "learning_rate": 4.467026719727118e-05, - "loss": 6.1244, + "epoch": 0.5250939641830643, + "grad_norm": 2.757474660873413, + "learning_rate": 2.6696495152870992e-05, + "loss": 4.4572, "step": 2375 }, { - "epoch": 0.13156440022111665, - "grad_norm": 2.8187060356140137, - "learning_rate": 4.46560545764639e-05, - "loss": 5.6802, + "epoch": 0.5261994251602918, + "grad_norm": 2.7199547290802, + "learning_rate": 2.6634352473278652e-05, + "loss": 4.2871, "step": 2380 }, { - "epoch": 0.1318407960199005, - "grad_norm": 2.4502339363098145, - "learning_rate": 4.4641841955656624e-05, - "loss": 5.9567, + "epoch": 0.5273048861375194, + "grad_norm": 2.6695070266723633, + "learning_rate": 2.6572209793686305e-05, + "loss": 4.3649, "step": 2385 }, { - "epoch": 0.13211719181868437, - "grad_norm": 2.743391275405884, - "learning_rate": 4.462762933484935e-05, - "loss": 5.737, + "epoch": 0.5284103471147469, + "grad_norm": 2.5903425216674805, + "learning_rate": 2.651006711409396e-05, + "loss": 4.3604, "step": 2390 }, { - "epoch": 0.13239358761746822, - "grad_norm": 2.580054998397827, - "learning_rate": 4.461341671404207e-05, - "loss": 5.7443, + "epoch": 0.5295158080919744, + "grad_norm": 2.871863842010498, + "learning_rate": 2.644792443450162e-05, + "loss": 4.2315, "step": 2395 }, { - "epoch": 0.13266998341625208, - "grad_norm": 3.157968282699585, - "learning_rate": 4.459920409323479e-05, - "loss": 6.083, + "epoch": 0.5306212690692018, + "grad_norm": 2.49452543258667, + "learning_rate": 2.6385781754909273e-05, + "loss": 4.3564, "step": 2400 }, { - "epoch": 0.13294637921503594, - "grad_norm": 2.9599556922912598, - "learning_rate": 4.458499147242752e-05, - "loss": 6.0339, + "epoch": 0.5317267300464293, + "grad_norm": 2.6567633152008057, + "learning_rate": 2.632363907531693e-05, + "loss": 4.2627, "step": 2405 }, { - "epoch": 0.1332227750138198, - "grad_norm": 3.3958144187927246, - "learning_rate": 4.4570778851620244e-05, - "loss": 5.9857, + "epoch": 0.5328321910236569, + "grad_norm": 2.6986489295959473, + "learning_rate": 2.6261496395724582e-05, + "loss": 4.1613, "step": 2410 }, { - "epoch": 0.13349917081260365, - "grad_norm": 3.3407669067382812, - "learning_rate": 4.455656623081296e-05, - "loss": 5.7594, + "epoch": 0.5339376520008844, + "grad_norm": 2.942229986190796, + "learning_rate": 2.6199353716132242e-05, + "loss": 4.3428, "step": 2415 }, { - "epoch": 0.1337755666113875, - "grad_norm": 2.816286563873291, - "learning_rate": 4.4542353610005685e-05, - "loss": 6.0003, + "epoch": 0.5350431129781119, + "grad_norm": 2.7262582778930664, + "learning_rate": 2.6137211036539898e-05, + "loss": 4.273, "step": 2420 }, { - "epoch": 0.13405196241017137, - "grad_norm": 2.553955554962158, - "learning_rate": 4.452814098919841e-05, - "loss": 6.0085, + "epoch": 0.5361485739553393, + "grad_norm": 2.6394593715667725, + "learning_rate": 2.607506835694755e-05, + "loss": 4.3921, "step": 2425 }, { - "epoch": 0.13432835820895522, - "grad_norm": 3.3297877311706543, - "learning_rate": 4.451392836839113e-05, - "loss": 5.8029, + "epoch": 0.5372540349325668, + "grad_norm": 2.6989800930023193, + "learning_rate": 2.601292567735521e-05, + "loss": 4.3518, "step": 2430 }, { - "epoch": 0.13460475400773908, - "grad_norm": 2.530499219894409, - "learning_rate": 4.449971574758386e-05, - "loss": 5.9603, + "epoch": 0.5383594959097944, + "grad_norm": 2.593045711517334, + "learning_rate": 2.5950782997762863e-05, + "loss": 4.301, "step": 2435 }, { - "epoch": 0.13488114980652294, - "grad_norm": 2.859200954437256, - "learning_rate": 4.448550312677658e-05, - "loss": 5.9611, + "epoch": 0.5394649568870219, + "grad_norm": 2.5254459381103516, + "learning_rate": 2.588864031817052e-05, + "loss": 4.209, "step": 2440 }, { - "epoch": 0.1351575456053068, - "grad_norm": 3.111984968185425, - "learning_rate": 4.4471290505969305e-05, - "loss": 5.9601, + "epoch": 0.5405704178642494, + "grad_norm": 2.765732526779175, + "learning_rate": 2.582649763857818e-05, + "loss": 4.2955, "step": 2445 }, { - "epoch": 0.13543394140409065, - "grad_norm": 2.6724886894226074, - "learning_rate": 4.445707788516202e-05, - "loss": 5.9028, + "epoch": 0.5416758788414769, + "grad_norm": 2.780750274658203, + "learning_rate": 2.576435495898583e-05, + "loss": 4.3846, "step": 2450 }, { - "epoch": 0.1357103372028745, - "grad_norm": 2.909550666809082, - "learning_rate": 4.4442865264354746e-05, - "loss": 6.1483, + "epoch": 0.5427813398187044, + "grad_norm": 2.811513662338257, + "learning_rate": 2.5702212279393488e-05, + "loss": 4.4617, "step": 2455 }, { - "epoch": 0.13598673300165837, - "grad_norm": 2.8380892276763916, - "learning_rate": 4.442865264354748e-05, - "loss": 5.9103, + "epoch": 0.5438868007959319, + "grad_norm": 2.5271966457366943, + "learning_rate": 2.564006959980114e-05, + "loss": 4.1798, "step": 2460 }, { - "epoch": 0.13626312880044222, - "grad_norm": 2.6073994636535645, - "learning_rate": 4.4414440022740194e-05, - "loss": 5.9974, + "epoch": 0.5449922617731594, + "grad_norm": 2.721851348876953, + "learning_rate": 2.55779269202088e-05, + "loss": 4.2644, "step": 2465 }, { - "epoch": 0.13653952459922608, - "grad_norm": 3.0573458671569824, - "learning_rate": 4.440022740193292e-05, - "loss": 5.9833, + "epoch": 0.5460977227503869, + "grad_norm": 2.618861436843872, + "learning_rate": 2.551578424061646e-05, + "loss": 4.317, "step": 2470 }, { - "epoch": 0.13681592039800994, - "grad_norm": 2.835435152053833, - "learning_rate": 4.438601478112564e-05, - "loss": 5.9305, + "epoch": 0.5472031837276145, + "grad_norm": 2.3622546195983887, + "learning_rate": 2.545364156102411e-05, + "loss": 4.4589, "step": 2475 }, { - "epoch": 0.1370923161967938, - "grad_norm": 2.9409842491149902, - "learning_rate": 4.4371802160318366e-05, - "loss": 6.0945, + "epoch": 0.548308644704842, + "grad_norm": 2.5185422897338867, + "learning_rate": 2.539149888143177e-05, + "loss": 4.2975, "step": 2480 }, { - "epoch": 0.13736871199557768, - "grad_norm": 2.7987194061279297, - "learning_rate": 4.435758953951108e-05, - "loss": 6.0813, + "epoch": 0.5494141056820694, + "grad_norm": 2.54284930229187, + "learning_rate": 2.5329356201839428e-05, + "loss": 4.29, "step": 2485 }, { - "epoch": 0.13764510779436154, - "grad_norm": 2.926170825958252, - "learning_rate": 4.434337691870381e-05, - "loss": 6.1433, + "epoch": 0.5505195666592969, + "grad_norm": 2.4982147216796875, + "learning_rate": 2.526721352224708e-05, + "loss": 4.3835, "step": 2490 }, { - "epoch": 0.1379215035931454, - "grad_norm": 2.812005043029785, - "learning_rate": 4.432916429789654e-05, - "loss": 6.1284, + "epoch": 0.5516250276365244, + "grad_norm": 2.5386240482330322, + "learning_rate": 2.5205070842654737e-05, + "loss": 4.4286, "step": 2495 }, { - "epoch": 0.13819789939192925, - "grad_norm": 4.218148231506348, - "learning_rate": 4.4314951677089255e-05, - "loss": 6.2463, + "epoch": 0.552730488613752, + "grad_norm": 2.5726940631866455, + "learning_rate": 2.514292816306239e-05, + "loss": 4.3666, "step": 2500 }, { - "epoch": 0.1384742951907131, - "grad_norm": 3.2735049724578857, - "learning_rate": 4.430073905628198e-05, - "loss": 6.3637, + "epoch": 0.5538359495909795, + "grad_norm": 2.802129030227661, + "learning_rate": 2.508078548347005e-05, + "loss": 4.3205, "step": 2505 }, { - "epoch": 0.13875069098949697, - "grad_norm": 2.975898265838623, - "learning_rate": 4.42865264354747e-05, - "loss": 5.8744, + "epoch": 0.5549414105682069, + "grad_norm": 2.713815212249756, + "learning_rate": 2.5018642803877706e-05, + "loss": 4.2775, "step": 2510 }, { - "epoch": 0.13902708678828082, - "grad_norm": 2.5579276084899902, - "learning_rate": 4.427231381466743e-05, - "loss": 5.8671, + "epoch": 0.5560468715454344, + "grad_norm": 2.597898244857788, + "learning_rate": 2.495650012428536e-05, + "loss": 4.2644, "step": 2515 }, { - "epoch": 0.13930348258706468, - "grad_norm": 2.9788095951080322, - "learning_rate": 4.425810119386015e-05, - "loss": 5.7345, + "epoch": 0.5571523325226619, + "grad_norm": 2.6316134929656982, + "learning_rate": 2.4894357444693018e-05, + "loss": 4.3634, "step": 2520 }, { - "epoch": 0.13957987838584854, - "grad_norm": 2.781212568283081, - "learning_rate": 4.4243888573052875e-05, - "loss": 6.0089, + "epoch": 0.5582577934998895, + "grad_norm": 2.663684129714966, + "learning_rate": 2.4832214765100674e-05, + "loss": 4.2632, "step": 2525 }, { - "epoch": 0.1398562741846324, - "grad_norm": 3.1490135192871094, - "learning_rate": 4.42296759522456e-05, - "loss": 5.9384, + "epoch": 0.559363254477117, + "grad_norm": 2.669243574142456, + "learning_rate": 2.4770072085508327e-05, + "loss": 4.3728, "step": 2530 }, { - "epoch": 0.14013266998341625, - "grad_norm": 3.2188096046447754, - "learning_rate": 4.4215463331438316e-05, - "loss": 6.0045, + "epoch": 0.5604687154543445, + "grad_norm": 2.6854679584503174, + "learning_rate": 2.4707929405915983e-05, + "loss": 4.2938, "step": 2535 }, { - "epoch": 0.1404090657822001, - "grad_norm": 3.2777388095855713, - "learning_rate": 4.420125071063104e-05, - "loss": 5.956, + "epoch": 0.5615741764315719, + "grad_norm": 2.625131130218506, + "learning_rate": 2.4645786726323643e-05, + "loss": 4.3859, "step": 2540 }, { - "epoch": 0.14068546158098397, - "grad_norm": 3.3653669357299805, - "learning_rate": 4.418703808982377e-05, - "loss": 5.9132, + "epoch": 0.5626796374087994, + "grad_norm": 2.6042797565460205, + "learning_rate": 2.4583644046731296e-05, + "loss": 4.2591, "step": 2545 }, { - "epoch": 0.14096185737976782, - "grad_norm": 2.6247451305389404, - "learning_rate": 4.417282546901649e-05, - "loss": 5.8262, + "epoch": 0.563785098386027, + "grad_norm": 2.763540267944336, + "learning_rate": 2.452150136713895e-05, + "loss": 4.2657, "step": 2550 }, { - "epoch": 0.14123825317855168, - "grad_norm": 2.3984429836273193, - "learning_rate": 4.415861284820921e-05, - "loss": 5.8723, + "epoch": 0.5648905593632545, + "grad_norm": 2.8229899406433105, + "learning_rate": 2.4459358687546608e-05, + "loss": 4.3078, "step": 2555 }, { - "epoch": 0.14151464897733554, - "grad_norm": 2.4766428470611572, - "learning_rate": 4.4144400227401936e-05, - "loss": 6.0487, + "epoch": 0.565996020340482, + "grad_norm": 2.8097963333129883, + "learning_rate": 2.4397216007954264e-05, + "loss": 4.3871, "step": 2560 }, { - "epoch": 0.1417910447761194, - "grad_norm": 2.6634018421173096, - "learning_rate": 4.413018760659466e-05, - "loss": 6.0009, + "epoch": 0.5671014813177094, + "grad_norm": 2.6240086555480957, + "learning_rate": 2.433507332836192e-05, + "loss": 4.286, "step": 2565 }, { - "epoch": 0.14206744057490325, - "grad_norm": 3.0467846393585205, - "learning_rate": 4.4115974985787384e-05, - "loss": 6.0557, + "epoch": 0.568206942294937, + "grad_norm": 2.685115098953247, + "learning_rate": 2.4272930648769576e-05, + "loss": 4.2783, "step": 2570 }, { - "epoch": 0.1423438363736871, - "grad_norm": 2.9271202087402344, - "learning_rate": 4.41017623649801e-05, - "loss": 5.7928, + "epoch": 0.5693124032721645, + "grad_norm": 2.697061538696289, + "learning_rate": 2.4210787969177233e-05, + "loss": 4.4211, "step": 2575 }, { - "epoch": 0.14262023217247097, - "grad_norm": 2.9135117530822754, - "learning_rate": 4.408754974417283e-05, - "loss": 5.6288, + "epoch": 0.570417864249392, + "grad_norm": 2.8929386138916016, + "learning_rate": 2.4148645289584885e-05, + "loss": 4.3608, "step": 2580 }, { - "epoch": 0.14289662797125482, - "grad_norm": 2.667640209197998, - "learning_rate": 4.407333712336555e-05, - "loss": 5.9423, + "epoch": 0.5715233252266195, + "grad_norm": 2.6032614707946777, + "learning_rate": 2.4086502609992545e-05, + "loss": 4.2024, "step": 2585 }, { - "epoch": 0.14317302377003868, - "grad_norm": 2.59460711479187, - "learning_rate": 4.405912450255827e-05, - "loss": 6.1813, + "epoch": 0.572628786203847, + "grad_norm": 2.629255533218384, + "learning_rate": 2.40243599304002e-05, + "loss": 4.4302, "step": 2590 }, { - "epoch": 0.14344941956882257, - "grad_norm": 3.593357801437378, - "learning_rate": 4.4044911881751e-05, - "loss": 5.8255, + "epoch": 0.5737342471810745, + "grad_norm": 2.5833659172058105, + "learning_rate": 2.3962217250807857e-05, + "loss": 4.372, "step": 2595 }, { - "epoch": 0.14372581536760642, - "grad_norm": 2.5840964317321777, - "learning_rate": 4.403069926094372e-05, - "loss": 5.9867, + "epoch": 0.574839708158302, + "grad_norm": 2.425273895263672, + "learning_rate": 2.390007457121551e-05, + "loss": 4.2089, "step": 2600 }, { - "epoch": 0.14400221116639028, - "grad_norm": 3.780487060546875, - "learning_rate": 4.4016486640136445e-05, - "loss": 5.734, + "epoch": 0.5759451691355295, + "grad_norm": 2.651646375656128, + "learning_rate": 2.383793189162317e-05, + "loss": 4.2374, "step": 2605 }, { - "epoch": 0.14427860696517414, - "grad_norm": 2.3756935596466064, - "learning_rate": 4.400227401932916e-05, - "loss": 6.0119, + "epoch": 0.577050630112757, + "grad_norm": 2.894827365875244, + "learning_rate": 2.3775789212030826e-05, + "loss": 4.105, "step": 2610 }, { - "epoch": 0.144555002763958, - "grad_norm": 2.420318126678467, - "learning_rate": 4.398806139852189e-05, - "loss": 5.8429, + "epoch": 0.5781560910899846, + "grad_norm": 2.646923780441284, + "learning_rate": 2.371364653243848e-05, + "loss": 4.3908, "step": 2615 }, { - "epoch": 0.14483139856274185, - "grad_norm": 2.4723665714263916, - "learning_rate": 4.397384877771461e-05, - "loss": 5.9102, + "epoch": 0.5792615520672121, + "grad_norm": 2.8050379753112793, + "learning_rate": 2.3651503852846135e-05, + "loss": 4.3573, "step": 2620 }, { - "epoch": 0.1451077943615257, - "grad_norm": 2.8111989498138428, - "learning_rate": 4.3959636156907334e-05, - "loss": 5.796, + "epoch": 0.5803670130444395, + "grad_norm": 2.8766565322875977, + "learning_rate": 2.358936117325379e-05, + "loss": 4.2688, "step": 2625 }, { - "epoch": 0.14538419016030957, - "grad_norm": 2.4505462646484375, - "learning_rate": 4.394542353610006e-05, - "loss": 5.7925, + "epoch": 0.581472474021667, + "grad_norm": 2.452597141265869, + "learning_rate": 2.3527218493661447e-05, + "loss": 4.3922, "step": 2630 }, { - "epoch": 0.14566058595909342, - "grad_norm": 2.9195985794067383, - "learning_rate": 4.393121091529278e-05, - "loss": 5.9991, + "epoch": 0.5825779349988945, + "grad_norm": 2.8422110080718994, + "learning_rate": 2.3465075814069103e-05, + "loss": 4.3008, "step": 2635 }, { - "epoch": 0.14593698175787728, - "grad_norm": 3.3594954013824463, - "learning_rate": 4.3916998294485506e-05, - "loss": 6.0823, + "epoch": 0.5836833959761221, + "grad_norm": 2.661015033721924, + "learning_rate": 2.340293313447676e-05, + "loss": 4.2432, "step": 2640 }, { - "epoch": 0.14621337755666114, - "grad_norm": 2.6641616821289062, - "learning_rate": 4.3902785673678224e-05, - "loss": 6.0646, + "epoch": 0.5847888569533496, + "grad_norm": 2.7962839603424072, + "learning_rate": 2.3340790454884416e-05, + "loss": 4.4387, "step": 2645 }, { - "epoch": 0.146489773355445, - "grad_norm": 3.2339580059051514, - "learning_rate": 4.3888573052870954e-05, - "loss": 6.2217, + "epoch": 0.585894317930577, + "grad_norm": 2.807640552520752, + "learning_rate": 2.3278647775292072e-05, + "loss": 4.3026, "step": 2650 }, { - "epoch": 0.14676616915422885, - "grad_norm": 2.6006035804748535, - "learning_rate": 4.387436043206368e-05, - "loss": 5.8923, + "epoch": 0.5869997789078045, + "grad_norm": 2.77174711227417, + "learning_rate": 2.3216505095699728e-05, + "loss": 4.3376, "step": 2655 }, { - "epoch": 0.1470425649530127, - "grad_norm": 2.918951988220215, - "learning_rate": 4.3860147811256395e-05, - "loss": 5.8821, + "epoch": 0.588105239885032, + "grad_norm": 2.6385319232940674, + "learning_rate": 2.3154362416107384e-05, + "loss": 4.211, "step": 2660 }, { - "epoch": 0.14731896075179657, - "grad_norm": 3.5756337642669678, - "learning_rate": 4.384593519044912e-05, - "loss": 5.9689, + "epoch": 0.5892107008622596, + "grad_norm": 2.464839458465576, + "learning_rate": 2.309221973651504e-05, + "loss": 4.1263, "step": 2665 }, { - "epoch": 0.14759535655058043, - "grad_norm": 2.472301483154297, - "learning_rate": 4.3831722569641843e-05, - "loss": 6.1298, + "epoch": 0.5903161618394871, + "grad_norm": 2.5542917251586914, + "learning_rate": 2.3030077056922693e-05, + "loss": 4.281, "step": 2670 }, { - "epoch": 0.14787175234936428, - "grad_norm": 3.1976962089538574, - "learning_rate": 4.381750994883457e-05, - "loss": 5.9328, + "epoch": 0.5914216228167146, + "grad_norm": 2.796891450881958, + "learning_rate": 2.2967934377330353e-05, + "loss": 4.2626, "step": 2675 }, { - "epoch": 0.14814814814814814, - "grad_norm": 2.672852039337158, - "learning_rate": 4.380329732802729e-05, - "loss": 5.7058, + "epoch": 0.592527083793942, + "grad_norm": 2.6826398372650146, + "learning_rate": 2.290579169773801e-05, + "loss": 4.1999, "step": 2680 }, { - "epoch": 0.148424543946932, - "grad_norm": 2.8173885345458984, - "learning_rate": 4.3789084707220015e-05, - "loss": 5.9733, + "epoch": 0.5936325447711696, + "grad_norm": 2.77254581451416, + "learning_rate": 2.284364901814566e-05, + "loss": 4.3298, "step": 2685 }, { - "epoch": 0.14870093974571585, - "grad_norm": 2.9812614917755127, - "learning_rate": 4.377487208641274e-05, - "loss": 5.9916, + "epoch": 0.5947380057483971, + "grad_norm": 2.6188175678253174, + "learning_rate": 2.2781506338553318e-05, + "loss": 4.2272, "step": 2690 }, { - "epoch": 0.1489773355444997, - "grad_norm": 2.7552757263183594, - "learning_rate": 4.3760659465605457e-05, - "loss": 5.4035, + "epoch": 0.5958434667256246, + "grad_norm": 2.374133825302124, + "learning_rate": 2.2719363658960977e-05, + "loss": 4.425, "step": 2695 }, { - "epoch": 0.14925373134328357, - "grad_norm": 2.684389114379883, - "learning_rate": 4.374644684479818e-05, - "loss": 5.9404, + "epoch": 0.5969489277028521, + "grad_norm": 2.516446352005005, + "learning_rate": 2.2657220979368633e-05, + "loss": 4.3096, "step": 2700 }, { - "epoch": 0.14953012714206745, - "grad_norm": 3.3270440101623535, - "learning_rate": 4.3732234223990905e-05, - "loss": 5.9081, + "epoch": 0.5980543886800795, + "grad_norm": 2.5473289489746094, + "learning_rate": 2.2595078299776286e-05, + "loss": 4.3916, "step": 2705 }, { - "epoch": 0.1498065229408513, - "grad_norm": 3.212458848953247, - "learning_rate": 4.371802160318363e-05, - "loss": 6.2057, + "epoch": 0.5991598496573071, + "grad_norm": 2.9763638973236084, + "learning_rate": 2.2532935620183942e-05, + "loss": 4.2488, "step": 2710 }, { - "epoch": 0.15008291873963517, - "grad_norm": 2.9287021160125732, - "learning_rate": 4.370380898237635e-05, - "loss": 6.2109, + "epoch": 0.6002653106345346, + "grad_norm": 2.831369161605835, + "learning_rate": 2.2470792940591602e-05, + "loss": 4.4136, "step": 2715 }, { - "epoch": 0.15035931453841903, - "grad_norm": 3.4137637615203857, - "learning_rate": 4.3689596361569076e-05, - "loss": 5.805, + "epoch": 0.6013707716117621, + "grad_norm": 2.77677845954895, + "learning_rate": 2.2408650260999255e-05, + "loss": 4.3703, "step": 2720 }, { - "epoch": 0.15063571033720288, - "grad_norm": 2.5240910053253174, - "learning_rate": 4.36753837407618e-05, - "loss": 5.8864, + "epoch": 0.6024762325889896, + "grad_norm": 3.102226972579956, + "learning_rate": 2.234650758140691e-05, + "loss": 4.389, "step": 2725 }, { - "epoch": 0.15091210613598674, - "grad_norm": 3.12414288520813, - "learning_rate": 4.366117111995452e-05, - "loss": 6.0336, + "epoch": 0.6035816935662172, + "grad_norm": 2.694725275039673, + "learning_rate": 2.2284364901814567e-05, + "loss": 4.3748, "step": 2730 }, { - "epoch": 0.1511885019347706, - "grad_norm": 3.1242873668670654, - "learning_rate": 4.364695849914724e-05, - "loss": 6.021, + "epoch": 0.6046871545434446, + "grad_norm": 2.628998041152954, + "learning_rate": 2.2222222222222223e-05, + "loss": 4.2702, "step": 2735 }, { - "epoch": 0.15146489773355445, - "grad_norm": 3.223360538482666, - "learning_rate": 4.363274587833997e-05, - "loss": 6.0656, + "epoch": 0.6057926155206721, + "grad_norm": 2.5050158500671387, + "learning_rate": 2.216007954262988e-05, + "loss": 4.4498, "step": 2740 }, { - "epoch": 0.1517412935323383, - "grad_norm": 3.3459601402282715, - "learning_rate": 4.361853325753269e-05, - "loss": 6.0403, + "epoch": 0.6068980764978996, + "grad_norm": 3.0304501056671143, + "learning_rate": 2.2097936863037536e-05, + "loss": 4.2093, "step": 2745 }, { - "epoch": 0.15201768933112217, - "grad_norm": 2.8621153831481934, - "learning_rate": 4.3604320636725414e-05, - "loss": 5.6734, + "epoch": 0.6080035374751271, + "grad_norm": 2.7480475902557373, + "learning_rate": 2.2035794183445192e-05, + "loss": 4.452, "step": 2750 }, { - "epoch": 0.15229408512990603, - "grad_norm": 2.841547966003418, - "learning_rate": 4.359010801591814e-05, - "loss": 5.8039, + "epoch": 0.6091089984523547, + "grad_norm": 2.5752625465393066, + "learning_rate": 2.1973651503852845e-05, + "loss": 4.1986, "step": 2755 }, { - "epoch": 0.15257048092868988, - "grad_norm": 2.655369758605957, - "learning_rate": 4.357589539511086e-05, - "loss": 5.8627, + "epoch": 0.6102144594295822, + "grad_norm": 2.9249074459075928, + "learning_rate": 2.1911508824260504e-05, + "loss": 4.2884, "step": 2760 }, { - "epoch": 0.15284687672747374, - "grad_norm": 3.5070695877075195, - "learning_rate": 4.3561682774303586e-05, - "loss": 5.7962, + "epoch": 0.6113199204068096, + "grad_norm": 2.565080165863037, + "learning_rate": 2.184936614466816e-05, + "loss": 4.3698, "step": 2765 }, { - "epoch": 0.1531232725262576, - "grad_norm": 2.460430860519409, - "learning_rate": 4.35474701534963e-05, - "loss": 5.9058, + "epoch": 0.6124253813840371, + "grad_norm": 2.9593536853790283, + "learning_rate": 2.1787223465075816e-05, + "loss": 4.4363, "step": 2770 }, { - "epoch": 0.15339966832504145, - "grad_norm": 2.9735209941864014, - "learning_rate": 4.3533257532689033e-05, - "loss": 5.9459, + "epoch": 0.6135308423612647, + "grad_norm": 2.698092460632324, + "learning_rate": 2.172508078548347e-05, + "loss": 4.4131, "step": 2775 }, { - "epoch": 0.1536760641238253, - "grad_norm": 2.6049184799194336, - "learning_rate": 4.351904491188175e-05, - "loss": 5.8831, + "epoch": 0.6146363033384922, + "grad_norm": 2.6179697513580322, + "learning_rate": 2.1662938105891125e-05, + "loss": 4.2489, "step": 2780 }, { - "epoch": 0.15395245992260917, - "grad_norm": 3.0773181915283203, - "learning_rate": 4.3504832291074475e-05, - "loss": 5.8567, + "epoch": 0.6157417643157197, + "grad_norm": 2.7725419998168945, + "learning_rate": 2.1600795426298785e-05, + "loss": 4.3455, "step": 2785 }, { - "epoch": 0.15422885572139303, - "grad_norm": 2.9767990112304688, - "learning_rate": 4.34906196702672e-05, - "loss": 5.884, + "epoch": 0.6168472252929471, + "grad_norm": 2.5519633293151855, + "learning_rate": 2.1538652746706438e-05, + "loss": 4.3074, "step": 2790 }, { - "epoch": 0.15450525152017688, - "grad_norm": 2.9856009483337402, - "learning_rate": 4.347640704945992e-05, - "loss": 5.7991, + "epoch": 0.6179526862701746, + "grad_norm": 2.6183152198791504, + "learning_rate": 2.1476510067114094e-05, + "loss": 4.3562, "step": 2795 }, { - "epoch": 0.15478164731896074, - "grad_norm": 2.7694013118743896, - "learning_rate": 4.3462194428652647e-05, - "loss": 5.9117, + "epoch": 0.6190581472474022, + "grad_norm": 2.5165317058563232, + "learning_rate": 2.141436738752175e-05, + "loss": 4.2388, "step": 2800 }, { - "epoch": 0.1550580431177446, - "grad_norm": 2.524625778198242, - "learning_rate": 4.3447981807845364e-05, - "loss": 5.8089, + "epoch": 0.6201636082246297, + "grad_norm": 2.813973903656006, + "learning_rate": 2.1352224707929406e-05, + "loss": 4.2732, "step": 2805 }, { - "epoch": 0.15533443891652846, - "grad_norm": 2.265674591064453, - "learning_rate": 4.3433769187038095e-05, - "loss": 5.7669, + "epoch": 0.6212690692018572, + "grad_norm": 2.489633798599243, + "learning_rate": 2.1290082028337062e-05, + "loss": 4.183, "step": 2810 }, { - "epoch": 0.15561083471531234, - "grad_norm": 2.9891350269317627, - "learning_rate": 4.341955656623081e-05, - "loss": 5.7408, + "epoch": 0.6223745301790847, + "grad_norm": 2.606971502304077, + "learning_rate": 2.122793934874472e-05, + "loss": 4.3127, "step": 2815 }, { - "epoch": 0.1558872305140962, - "grad_norm": 2.8526082038879395, - "learning_rate": 4.3405343945423536e-05, - "loss": 5.5601, + "epoch": 0.6234799911563121, + "grad_norm": 2.74040150642395, + "learning_rate": 2.1165796669152375e-05, + "loss": 4.3576, "step": 2820 }, { - "epoch": 0.15616362631288006, - "grad_norm": 2.755467653274536, - "learning_rate": 4.3391131324616266e-05, - "loss": 5.6325, + "epoch": 0.6245854521335397, + "grad_norm": 2.814483642578125, + "learning_rate": 2.110365398956003e-05, + "loss": 4.345, "step": 2825 }, { - "epoch": 0.1564400221116639, - "grad_norm": 3.075253963470459, - "learning_rate": 4.3376918703808984e-05, - "loss": 6.0243, + "epoch": 0.6256909131107672, + "grad_norm": 2.4296274185180664, + "learning_rate": 2.1041511309967687e-05, + "loss": 4.2154, "step": 2830 }, { - "epoch": 0.15671641791044777, - "grad_norm": 2.6494526863098145, - "learning_rate": 4.336270608300171e-05, - "loss": 5.7588, + "epoch": 0.6267963740879947, + "grad_norm": 3.018310785293579, + "learning_rate": 2.0979368630375343e-05, + "loss": 4.2779, "step": 2835 }, { - "epoch": 0.15699281370923163, - "grad_norm": 3.440871238708496, - "learning_rate": 4.334849346219443e-05, - "loss": 5.9682, + "epoch": 0.6279018350652222, + "grad_norm": 2.85764741897583, + "learning_rate": 2.0917225950783e-05, + "loss": 4.2533, "step": 2840 }, { - "epoch": 0.15726920950801548, - "grad_norm": 3.129068374633789, - "learning_rate": 4.3334280841387156e-05, - "loss": 5.8397, + "epoch": 0.6290072960424496, + "grad_norm": 2.690497398376465, + "learning_rate": 2.0855083271190652e-05, + "loss": 4.3148, "step": 2845 }, { - "epoch": 0.15754560530679934, - "grad_norm": 2.8854775428771973, - "learning_rate": 4.332006822057988e-05, - "loss": 5.6775, + "epoch": 0.6301127570196772, + "grad_norm": 2.5241053104400635, + "learning_rate": 2.0792940591598312e-05, + "loss": 4.3019, "step": 2850 }, { - "epoch": 0.1578220011055832, - "grad_norm": 2.4169180393218994, - "learning_rate": 4.33058555997726e-05, - "loss": 5.7348, + "epoch": 0.6312182179969047, + "grad_norm": 2.63004732131958, + "learning_rate": 2.0730797912005968e-05, + "loss": 4.3274, "step": 2855 }, { - "epoch": 0.15809839690436706, - "grad_norm": 2.6656081676483154, - "learning_rate": 4.329164297896533e-05, - "loss": 5.5804, + "epoch": 0.6323236789741322, + "grad_norm": 2.6619880199432373, + "learning_rate": 2.066865523241362e-05, + "loss": 4.4063, "step": 2860 }, { - "epoch": 0.1583747927031509, - "grad_norm": 2.1529061794281006, - "learning_rate": 4.3277430358158045e-05, - "loss": 6.0426, + "epoch": 0.6334291399513597, + "grad_norm": 2.918989419937134, + "learning_rate": 2.0606512552821277e-05, + "loss": 4.3446, "step": 2865 }, { - "epoch": 0.15865118850193477, - "grad_norm": 2.7012405395507812, - "learning_rate": 4.326321773735077e-05, - "loss": 5.8865, + "epoch": 0.6345346009285873, + "grad_norm": 2.6898226737976074, + "learning_rate": 2.0544369873228937e-05, + "loss": 4.3895, "step": 2870 }, { - "epoch": 0.15892758430071863, - "grad_norm": 3.113957643508911, - "learning_rate": 4.324900511654349e-05, - "loss": 5.7905, + "epoch": 0.6356400619058147, + "grad_norm": 2.659388542175293, + "learning_rate": 2.0482227193636593e-05, + "loss": 4.2844, "step": 2875 }, { - "epoch": 0.15920398009950248, - "grad_norm": 2.826045274734497, - "learning_rate": 4.323479249573622e-05, - "loss": 5.8868, + "epoch": 0.6367455228830422, + "grad_norm": 2.9145493507385254, + "learning_rate": 2.0420084514044246e-05, + "loss": 4.234, "step": 2880 }, { - "epoch": 0.15948037589828634, - "grad_norm": 2.5294294357299805, - "learning_rate": 4.322057987492894e-05, - "loss": 5.6514, + "epoch": 0.6378509838602697, + "grad_norm": 2.542527198791504, + "learning_rate": 2.03579418344519e-05, + "loss": 4.2848, "step": 2885 }, { - "epoch": 0.1597567716970702, - "grad_norm": 2.923967123031616, - "learning_rate": 4.320636725412166e-05, - "loss": 6.0134, + "epoch": 0.6389564448374973, + "grad_norm": 2.690652847290039, + "learning_rate": 2.0295799154859558e-05, + "loss": 4.2601, "step": 2890 }, { - "epoch": 0.16003316749585406, - "grad_norm": 3.1587777137756348, - "learning_rate": 4.319215463331439e-05, - "loss": 6.1809, + "epoch": 0.6400619058147248, + "grad_norm": 2.74469256401062, + "learning_rate": 2.0233656475267214e-05, + "loss": 4.2875, "step": 2895 }, { - "epoch": 0.1603095632946379, - "grad_norm": 2.6380045413970947, - "learning_rate": 4.317794201250711e-05, - "loss": 5.828, + "epoch": 0.6411673667919523, + "grad_norm": 2.5279908180236816, + "learning_rate": 2.017151379567487e-05, + "loss": 4.3336, "step": 2900 }, { - "epoch": 0.16058595909342177, - "grad_norm": 3.320361852645874, - "learning_rate": 4.316372939169983e-05, - "loss": 5.8606, + "epoch": 0.6422728277691797, + "grad_norm": 2.6275908946990967, + "learning_rate": 2.0109371116082526e-05, + "loss": 4.3125, "step": 2905 }, { - "epoch": 0.16086235489220563, - "grad_norm": 2.4899818897247314, - "learning_rate": 4.3149516770892554e-05, - "loss": 5.7003, + "epoch": 0.6433782887464072, + "grad_norm": 2.629896879196167, + "learning_rate": 2.0047228436490183e-05, + "loss": 4.3233, "step": 2910 }, { - "epoch": 0.16113875069098949, - "grad_norm": 3.366610050201416, - "learning_rate": 4.313530415008528e-05, - "loss": 5.7348, + "epoch": 0.6444837497236348, + "grad_norm": 2.8916358947753906, + "learning_rate": 1.998508575689784e-05, + "loss": 4.2835, "step": 2915 }, { - "epoch": 0.16141514648977334, - "grad_norm": 3.16973876953125, - "learning_rate": 4.3121091529278e-05, - "loss": 5.6955, + "epoch": 0.6455892107008623, + "grad_norm": 2.6450507640838623, + "learning_rate": 1.9922943077305495e-05, + "loss": 4.3504, "step": 2920 }, { - "epoch": 0.16169154228855723, - "grad_norm": 3.0917766094207764, - "learning_rate": 4.310687890847072e-05, - "loss": 5.8221, + "epoch": 0.6466946716780898, + "grad_norm": 2.617589235305786, + "learning_rate": 1.986080039771315e-05, + "loss": 4.4431, "step": 2925 }, { - "epoch": 0.16196793808734108, - "grad_norm": 3.0360071659088135, - "learning_rate": 4.309266628766345e-05, - "loss": 5.7854, + "epoch": 0.6478001326553172, + "grad_norm": 2.4875051975250244, + "learning_rate": 1.9798657718120804e-05, + "loss": 4.3341, "step": 2930 }, { - "epoch": 0.16224433388612494, - "grad_norm": 2.421694040298462, - "learning_rate": 4.3078453666856174e-05, - "loss": 5.5865, + "epoch": 0.6489055936325447, + "grad_norm": 2.5593132972717285, + "learning_rate": 1.9736515038528463e-05, + "loss": 4.335, "step": 2935 }, { - "epoch": 0.1625207296849088, - "grad_norm": 2.4142487049102783, - "learning_rate": 4.306424104604889e-05, - "loss": 5.6557, + "epoch": 0.6500110546097723, + "grad_norm": 2.687657594680786, + "learning_rate": 1.967437235893612e-05, + "loss": 4.3632, "step": 2940 }, { - "epoch": 0.16279712548369266, - "grad_norm": 2.854003429412842, - "learning_rate": 4.3050028425241615e-05, - "loss": 5.8642, + "epoch": 0.6511165155869998, + "grad_norm": 2.605257987976074, + "learning_rate": 1.9612229679343776e-05, + "loss": 4.3999, "step": 2945 }, { - "epoch": 0.1630735212824765, - "grad_norm": 2.4394283294677734, - "learning_rate": 4.303581580443434e-05, - "loss": 5.5967, + "epoch": 0.6522219765642273, + "grad_norm": 2.3589608669281006, + "learning_rate": 1.955008699975143e-05, + "loss": 4.2815, "step": 2950 }, { - "epoch": 0.16334991708126037, - "grad_norm": 3.362593650817871, - "learning_rate": 4.302160318362706e-05, - "loss": 5.9702, + "epoch": 0.6533274375414548, + "grad_norm": 2.8207266330718994, + "learning_rate": 1.9487944320159085e-05, + "loss": 4.2614, "step": 2955 }, { - "epoch": 0.16362631288004423, - "grad_norm": 2.2404420375823975, - "learning_rate": 4.300739056281979e-05, - "loss": 5.6981, + "epoch": 0.6544328985186822, + "grad_norm": 2.7098288536071777, + "learning_rate": 1.9425801640566744e-05, + "loss": 4.2278, "step": 2960 }, { - "epoch": 0.16390270867882809, - "grad_norm": 2.7826569080352783, - "learning_rate": 4.299317794201251e-05, - "loss": 5.7644, + "epoch": 0.6555383594959098, + "grad_norm": 2.819708824157715, + "learning_rate": 1.9363658960974397e-05, + "loss": 4.22, "step": 2965 }, { - "epoch": 0.16417910447761194, - "grad_norm": 2.6472930908203125, - "learning_rate": 4.2978965321205235e-05, - "loss": 6.105, + "epoch": 0.6566438204731373, + "grad_norm": 2.7340097427368164, + "learning_rate": 1.9301516281382053e-05, + "loss": 4.2767, "step": 2970 }, { - "epoch": 0.1644555002763958, - "grad_norm": 3.248873472213745, - "learning_rate": 4.296475270039795e-05, - "loss": 5.7841, + "epoch": 0.6577492814503648, + "grad_norm": 2.6747171878814697, + "learning_rate": 1.923937360178971e-05, + "loss": 4.3268, "step": 2975 }, { - "epoch": 0.16473189607517966, - "grad_norm": 3.790102958679199, - "learning_rate": 4.2950540079590676e-05, - "loss": 5.7012, + "epoch": 0.6588547424275923, + "grad_norm": 2.5896904468536377, + "learning_rate": 1.917723092219737e-05, + "loss": 4.309, "step": 2980 }, { - "epoch": 0.16500829187396351, - "grad_norm": 3.1310534477233887, - "learning_rate": 4.293632745878341e-05, - "loss": 5.7268, + "epoch": 0.6599602034048198, + "grad_norm": 2.6400575637817383, + "learning_rate": 1.9115088242605022e-05, + "loss": 4.2878, "step": 2985 }, { - "epoch": 0.16528468767274737, - "grad_norm": 2.8928165435791016, - "learning_rate": 4.2922114837976124e-05, - "loss": 5.7898, + "epoch": 0.6610656643820473, + "grad_norm": 2.62795352935791, + "learning_rate": 1.9052945563012678e-05, + "loss": 4.3861, "step": 2990 }, { - "epoch": 0.16556108347153123, - "grad_norm": 2.956012010574341, - "learning_rate": 4.290790221716885e-05, - "loss": 6.1025, + "epoch": 0.6621711253592748, + "grad_norm": 2.7335047721862793, + "learning_rate": 1.8990802883420334e-05, + "loss": 4.2773, "step": 2995 }, { - "epoch": 0.16583747927031509, - "grad_norm": 3.146536111831665, - "learning_rate": 4.289368959636157e-05, - "loss": 5.8821, + "epoch": 0.6632765863365023, + "grad_norm": 2.781811237335205, + "learning_rate": 1.892866020382799e-05, + "loss": 4.3049, "step": 3000 }, { - "epoch": 0.16611387506909894, - "grad_norm": 3.373002767562866, - "learning_rate": 4.2879476975554296e-05, - "loss": 5.433, + "epoch": 0.6643820473137299, + "grad_norm": 2.65694522857666, + "learning_rate": 1.8866517524235646e-05, + "loss": 4.2534, "step": 3005 }, { - "epoch": 0.1663902708678828, - "grad_norm": 3.2501721382141113, - "learning_rate": 4.286526435474702e-05, - "loss": 6.0184, + "epoch": 0.6654875082909574, + "grad_norm": 2.611654043197632, + "learning_rate": 1.8804374844643303e-05, + "loss": 4.2397, "step": 3010 }, { - "epoch": 0.16666666666666666, - "grad_norm": 2.656000852584839, - "learning_rate": 4.285105173393974e-05, - "loss": 5.8359, + "epoch": 0.6665929692681848, + "grad_norm": 2.759890079498291, + "learning_rate": 1.874223216505096e-05, + "loss": 4.1524, "step": 3015 }, { - "epoch": 0.16694306246545051, - "grad_norm": 3.513477325439453, - "learning_rate": 4.283683911313247e-05, - "loss": 6.0025, + "epoch": 0.6676984302454123, + "grad_norm": 2.7549400329589844, + "learning_rate": 1.868008948545861e-05, + "loss": 4.2703, "step": 3020 }, { - "epoch": 0.16721945826423437, - "grad_norm": 3.080552816390991, - "learning_rate": 4.2822626492325185e-05, - "loss": 5.7096, + "epoch": 0.6688038912226398, + "grad_norm": 2.606306552886963, + "learning_rate": 1.861794680586627e-05, + "loss": 4.2695, "step": 3025 }, { - "epoch": 0.16749585406301823, - "grad_norm": 2.202702045440674, - "learning_rate": 4.280841387151791e-05, - "loss": 5.9594, + "epoch": 0.6699093521998674, + "grad_norm": 3.0413312911987305, + "learning_rate": 1.8555804126273927e-05, + "loss": 4.5286, "step": 3030 }, { - "epoch": 0.16777224986180211, - "grad_norm": 2.7904155254364014, - "learning_rate": 4.279420125071063e-05, - "loss": 5.749, + "epoch": 0.6710148131770949, + "grad_norm": 2.6322450637817383, + "learning_rate": 1.849366144668158e-05, + "loss": 4.3509, "step": 3035 }, { - "epoch": 0.16804864566058597, - "grad_norm": 3.466552495956421, - "learning_rate": 4.277998862990336e-05, - "loss": 5.8486, + "epoch": 0.6721202741543224, + "grad_norm": 2.7126147747039795, + "learning_rate": 1.8431518767089236e-05, + "loss": 4.502, "step": 3040 }, { - "epoch": 0.16832504145936983, - "grad_norm": 2.64650297164917, - "learning_rate": 4.276577600909608e-05, - "loss": 5.8657, + "epoch": 0.6732257351315498, + "grad_norm": 2.5845155715942383, + "learning_rate": 1.8369376087496896e-05, + "loss": 4.4788, "step": 3045 }, { - "epoch": 0.16860143725815369, - "grad_norm": 3.313481569290161, - "learning_rate": 4.27515633882888e-05, - "loss": 5.5925, + "epoch": 0.6743311961087773, + "grad_norm": 2.713156223297119, + "learning_rate": 1.8307233407904552e-05, + "loss": 4.4627, "step": 3050 }, { - "epoch": 0.16887783305693754, - "grad_norm": 3.433079957962036, - "learning_rate": 4.273735076748153e-05, - "loss": 5.6123, + "epoch": 0.6754366570860049, + "grad_norm": 2.5280685424804688, + "learning_rate": 1.8245090728312205e-05, + "loss": 4.3126, "step": 3055 }, { - "epoch": 0.1691542288557214, - "grad_norm": 3.088146686553955, - "learning_rate": 4.2723138146674246e-05, - "loss": 5.6197, + "epoch": 0.6765421180632324, + "grad_norm": 2.6877503395080566, + "learning_rate": 1.818294804871986e-05, + "loss": 4.4045, "step": 3060 }, { - "epoch": 0.16943062465450526, - "grad_norm": 2.703796863555908, - "learning_rate": 4.270892552586697e-05, - "loss": 5.8104, + "epoch": 0.6776475790404599, + "grad_norm": 2.5872035026550293, + "learning_rate": 1.8120805369127517e-05, + "loss": 4.4283, "step": 3065 }, { - "epoch": 0.16970702045328911, - "grad_norm": 2.511690139770508, - "learning_rate": 4.2694712905059694e-05, - "loss": 5.5404, + "epoch": 0.6787530400176873, + "grad_norm": 2.494570255279541, + "learning_rate": 1.8058662689535173e-05, + "loss": 4.3445, "step": 3070 }, { - "epoch": 0.16998341625207297, - "grad_norm": 2.678618907928467, - "learning_rate": 4.268050028425242e-05, - "loss": 5.8477, + "epoch": 0.6798585009949148, + "grad_norm": 2.8552112579345703, + "learning_rate": 1.799652000994283e-05, + "loss": 4.2656, "step": 3075 }, { - "epoch": 0.17025981205085683, - "grad_norm": 2.9002177715301514, - "learning_rate": 4.266628766344514e-05, - "loss": 5.907, + "epoch": 0.6809639619721424, + "grad_norm": 2.528190851211548, + "learning_rate": 1.7934377330350486e-05, + "loss": 4.2317, "step": 3080 }, { - "epoch": 0.1705362078496407, - "grad_norm": 3.1903774738311768, - "learning_rate": 4.265207504263786e-05, - "loss": 5.8601, + "epoch": 0.6820694229493699, + "grad_norm": 2.6249637603759766, + "learning_rate": 1.7872234650758142e-05, + "loss": 4.4084, "step": 3085 }, { - "epoch": 0.17081260364842454, - "grad_norm": 3.3715593814849854, - "learning_rate": 4.263786242183059e-05, - "loss": 5.8326, + "epoch": 0.6831748839265974, + "grad_norm": 2.8214519023895264, + "learning_rate": 1.7810091971165798e-05, + "loss": 4.4469, "step": 3090 }, { - "epoch": 0.1710889994472084, - "grad_norm": 2.6727895736694336, - "learning_rate": 4.2623649801023314e-05, - "loss": 5.6508, + "epoch": 0.6842803449038249, + "grad_norm": 3.1400296688079834, + "learning_rate": 1.7747949291573454e-05, + "loss": 4.4882, "step": 3095 }, { - "epoch": 0.17136539524599226, - "grad_norm": 2.429774761199951, - "learning_rate": 4.260943718021603e-05, - "loss": 5.7209, + "epoch": 0.6853858058810524, + "grad_norm": 2.7912092208862305, + "learning_rate": 1.768580661198111e-05, + "loss": 4.2987, "step": 3100 }, { - "epoch": 0.17164179104477612, - "grad_norm": 2.418609619140625, - "learning_rate": 4.2595224559408755e-05, - "loss": 5.6107, + "epoch": 0.6864912668582799, + "grad_norm": 2.444261312484741, + "learning_rate": 1.7623663932388766e-05, + "loss": 4.3692, "step": 3105 }, { - "epoch": 0.17191818684355997, - "grad_norm": 3.0767714977264404, - "learning_rate": 4.258101193860148e-05, - "loss": 5.6858, + "epoch": 0.6875967278355074, + "grad_norm": 2.8983335494995117, + "learning_rate": 1.756152125279642e-05, + "loss": 4.2532, "step": 3110 }, { - "epoch": 0.17219458264234383, - "grad_norm": 3.297687530517578, - "learning_rate": 4.25667993177942e-05, - "loss": 5.9651, + "epoch": 0.6887021888127349, + "grad_norm": 2.8009955883026123, + "learning_rate": 1.749937857320408e-05, + "loss": 4.343, "step": 3115 }, { - "epoch": 0.1724709784411277, - "grad_norm": 3.491100549697876, - "learning_rate": 4.255258669698693e-05, - "loss": 5.6576, + "epoch": 0.6898076497899625, + "grad_norm": 2.664306640625, + "learning_rate": 1.7437235893611735e-05, + "loss": 4.3392, "step": 3120 }, { - "epoch": 0.17274737423991154, - "grad_norm": 2.744009256362915, - "learning_rate": 4.253837407617965e-05, - "loss": 5.6868, + "epoch": 0.6909131107671899, + "grad_norm": 2.744086742401123, + "learning_rate": 1.7375093214019388e-05, + "loss": 4.5081, "step": 3125 }, { - "epoch": 0.1730237700386954, - "grad_norm": 3.076695680618286, - "learning_rate": 4.2524161455372375e-05, - "loss": 5.7896, + "epoch": 0.6920185717444174, + "grad_norm": 2.5243453979492188, + "learning_rate": 1.7312950534427044e-05, + "loss": 4.138, "step": 3130 }, { - "epoch": 0.17330016583747926, - "grad_norm": 2.9809587001800537, - "learning_rate": 4.250994883456509e-05, - "loss": 5.7975, + "epoch": 0.6931240327216449, + "grad_norm": 2.879436492919922, + "learning_rate": 1.7250807854834704e-05, + "loss": 4.3065, "step": 3135 }, { - "epoch": 0.17357656163626312, - "grad_norm": 2.666238307952881, - "learning_rate": 4.249573621375782e-05, - "loss": 6.0512, + "epoch": 0.6942294936988724, + "grad_norm": 2.766604423522949, + "learning_rate": 1.7188665175242356e-05, + "loss": 4.3584, "step": 3140 }, { - "epoch": 0.173852957435047, - "grad_norm": 2.905074119567871, - "learning_rate": 4.248152359295054e-05, - "loss": 5.5453, + "epoch": 0.6953349546761, + "grad_norm": 2.644548177719116, + "learning_rate": 1.7126522495650012e-05, + "loss": 4.2898, "step": 3145 }, { - "epoch": 0.17412935323383086, - "grad_norm": 3.2428863048553467, - "learning_rate": 4.2467310972143264e-05, - "loss": 5.5774, + "epoch": 0.6964404156533275, + "grad_norm": 2.6209113597869873, + "learning_rate": 1.706437981605767e-05, + "loss": 4.273, "step": 3150 }, { - "epoch": 0.17440574903261472, - "grad_norm": 2.5885746479034424, - "learning_rate": 4.245309835133599e-05, - "loss": 5.8335, + "epoch": 0.6975458766305549, + "grad_norm": 2.7458090782165527, + "learning_rate": 1.7002237136465328e-05, + "loss": 4.3472, "step": 3155 }, { - "epoch": 0.17468214483139857, - "grad_norm": 2.6241211891174316, - "learning_rate": 4.243888573052871e-05, - "loss": 5.8666, + "epoch": 0.6986513376077824, + "grad_norm": 2.5772080421447754, + "learning_rate": 1.694009445687298e-05, + "loss": 4.4346, "step": 3160 }, { - "epoch": 0.17495854063018243, - "grad_norm": 2.9486637115478516, - "learning_rate": 4.2424673109721436e-05, - "loss": 5.4234, + "epoch": 0.6997567985850099, + "grad_norm": 2.7952399253845215, + "learning_rate": 1.6877951777280637e-05, + "loss": 4.3793, "step": 3165 }, { - "epoch": 0.1752349364289663, - "grad_norm": 3.1195502281188965, - "learning_rate": 4.241046048891415e-05, - "loss": 5.7667, + "epoch": 0.7008622595622375, + "grad_norm": 2.724113702774048, + "learning_rate": 1.6815809097688293e-05, + "loss": 4.2947, "step": 3170 }, { - "epoch": 0.17551133222775014, - "grad_norm": 2.9421496391296387, - "learning_rate": 4.2396247868106884e-05, - "loss": 5.6917, + "epoch": 0.701967720539465, + "grad_norm": 2.809077262878418, + "learning_rate": 1.675366641809595e-05, + "loss": 4.4637, "step": 3175 }, { - "epoch": 0.175787728026534, - "grad_norm": 3.021730422973633, - "learning_rate": 4.238203524729961e-05, - "loss": 6.0067, + "epoch": 0.7030731815166925, + "grad_norm": 2.6896934509277344, + "learning_rate": 1.6691523738503606e-05, + "loss": 4.2131, "step": 3180 }, { - "epoch": 0.17606412382531786, - "grad_norm": 2.6247682571411133, - "learning_rate": 4.2367822626492325e-05, - "loss": 5.7843, + "epoch": 0.7041786424939199, + "grad_norm": 2.823146343231201, + "learning_rate": 1.6629381058911262e-05, + "loss": 4.2319, "step": 3185 }, { - "epoch": 0.17634051962410172, - "grad_norm": 2.8806023597717285, - "learning_rate": 4.235361000568505e-05, - "loss": 5.8293, + "epoch": 0.7052841034711474, + "grad_norm": 2.5893144607543945, + "learning_rate": 1.6567238379318918e-05, + "loss": 4.3153, "step": 3190 }, { - "epoch": 0.17661691542288557, - "grad_norm": 3.3191425800323486, - "learning_rate": 4.233939738487777e-05, - "loss": 5.8644, + "epoch": 0.706389564448375, + "grad_norm": 2.8390941619873047, + "learning_rate": 1.650509569972657e-05, + "loss": 4.2297, "step": 3195 }, { - "epoch": 0.17689331122166943, - "grad_norm": 2.656522274017334, - "learning_rate": 4.23251847640705e-05, - "loss": 5.8543, + "epoch": 0.7074950254256025, + "grad_norm": 2.496361255645752, + "learning_rate": 1.644295302013423e-05, + "loss": 4.4646, "step": 3200 }, { - "epoch": 0.1771697070204533, - "grad_norm": 3.2881433963775635, - "learning_rate": 4.231097214326322e-05, - "loss": 5.5148, + "epoch": 0.70860048640283, + "grad_norm": 2.776575803756714, + "learning_rate": 1.6380810340541887e-05, + "loss": 4.5525, "step": 3205 }, { - "epoch": 0.17744610281923714, - "grad_norm": 2.557159423828125, - "learning_rate": 4.2296759522455945e-05, - "loss": 5.8617, + "epoch": 0.7097059473800574, + "grad_norm": 2.6303658485412598, + "learning_rate": 1.631866766094954e-05, + "loss": 4.3819, "step": 3210 }, { - "epoch": 0.177722498618021, - "grad_norm": 3.0635502338409424, - "learning_rate": 4.228254690164867e-05, - "loss": 5.7063, + "epoch": 0.710811408357285, + "grad_norm": 2.4757165908813477, + "learning_rate": 1.6256524981357195e-05, + "loss": 4.2136, "step": 3215 }, { - "epoch": 0.17799889441680486, - "grad_norm": 2.6538784503936768, - "learning_rate": 4.2268334280841386e-05, - "loss": 5.6792, + "epoch": 0.7119168693345125, + "grad_norm": 2.7062437534332275, + "learning_rate": 1.619438230176485e-05, + "loss": 4.3914, "step": 3220 }, { - "epoch": 0.17827529021558872, - "grad_norm": 3.1617372035980225, - "learning_rate": 4.225412166003411e-05, - "loss": 5.9972, + "epoch": 0.71302233031174, + "grad_norm": 2.7044432163238525, + "learning_rate": 1.613223962217251e-05, + "loss": 4.3731, "step": 3225 }, { - "epoch": 0.17855168601437257, - "grad_norm": 2.5315380096435547, - "learning_rate": 4.223990903922684e-05, - "loss": 5.7918, + "epoch": 0.7141277912889675, + "grad_norm": 2.7421531677246094, + "learning_rate": 1.6070096942580164e-05, + "loss": 4.4874, "step": 3230 }, { - "epoch": 0.17882808181315643, - "grad_norm": 3.2303788661956787, - "learning_rate": 4.222569641841956e-05, - "loss": 5.7051, + "epoch": 0.715233252266195, + "grad_norm": 2.770270347595215, + "learning_rate": 1.600795426298782e-05, + "loss": 4.2702, "step": 3235 }, { - "epoch": 0.1791044776119403, - "grad_norm": 2.689042568206787, - "learning_rate": 4.221148379761228e-05, - "loss": 5.7993, + "epoch": 0.7163387132434225, + "grad_norm": 2.617872714996338, + "learning_rate": 1.5945811583395476e-05, + "loss": 4.3877, "step": 3240 }, { - "epoch": 0.17938087341072415, - "grad_norm": 2.5231776237487793, - "learning_rate": 4.2197271176805006e-05, - "loss": 5.9223, + "epoch": 0.71744417422065, + "grad_norm": 2.5779149532318115, + "learning_rate": 1.5883668903803133e-05, + "loss": 4.2644, "step": 3245 }, { - "epoch": 0.179657269209508, - "grad_norm": 3.4282543659210205, - "learning_rate": 4.218305855599773e-05, - "loss": 5.6935, + "epoch": 0.7185496351978775, + "grad_norm": 2.465280771255493, + "learning_rate": 1.582152622421079e-05, + "loss": 4.2764, "step": 3250 }, { - "epoch": 0.1799336650082919, - "grad_norm": 2.655792236328125, - "learning_rate": 4.216884593519045e-05, - "loss": 5.6622, + "epoch": 0.719655096175105, + "grad_norm": 2.6684722900390625, + "learning_rate": 1.5759383544618445e-05, + "loss": 4.4445, "step": 3255 }, { - "epoch": 0.18021006080707574, - "grad_norm": 3.699481248855591, - "learning_rate": 4.215463331438317e-05, - "loss": 5.6104, + "epoch": 0.7207605571523326, + "grad_norm": 2.7769546508789062, + "learning_rate": 1.56972408650261e-05, + "loss": 4.3571, "step": 3260 }, { - "epoch": 0.1804864566058596, - "grad_norm": 2.7211077213287354, - "learning_rate": 4.21404206935759e-05, - "loss": 5.7032, + "epoch": 0.72186601812956, + "grad_norm": 2.58829402923584, + "learning_rate": 1.5635098185433757e-05, + "loss": 4.2226, "step": 3265 }, { - "epoch": 0.18076285240464346, - "grad_norm": 3.4669954776763916, - "learning_rate": 4.212620807276862e-05, - "loss": 5.4362, + "epoch": 0.7229714791067875, + "grad_norm": 2.5519750118255615, + "learning_rate": 1.5572955505841413e-05, + "loss": 4.4029, "step": 3270 }, { - "epoch": 0.18103924820342732, - "grad_norm": 2.743119716644287, - "learning_rate": 4.211199545196134e-05, - "loss": 5.5355, + "epoch": 0.724076940084015, + "grad_norm": 2.6074788570404053, + "learning_rate": 1.551081282624907e-05, + "loss": 4.0522, "step": 3275 }, { - "epoch": 0.18131564400221117, - "grad_norm": 2.8650245666503906, - "learning_rate": 4.209778283115407e-05, - "loss": 5.6457, + "epoch": 0.7251824010612425, + "grad_norm": 2.721590042114258, + "learning_rate": 1.5448670146656726e-05, + "loss": 4.1492, "step": 3280 }, { - "epoch": 0.18159203980099503, - "grad_norm": 2.683289051055908, - "learning_rate": 4.208357021034679e-05, - "loss": 5.8724, + "epoch": 0.7262878620384701, + "grad_norm": 2.80806827545166, + "learning_rate": 1.538652746706438e-05, + "loss": 4.412, "step": 3285 }, { - "epoch": 0.1818684355997789, - "grad_norm": 2.849991798400879, - "learning_rate": 4.2069357589539515e-05, - "loss": 5.8989, + "epoch": 0.7273933230156976, + "grad_norm": 2.87967848777771, + "learning_rate": 1.5324384787472038e-05, + "loss": 4.3851, "step": 3290 }, { - "epoch": 0.18214483139856275, - "grad_norm": 3.0860819816589355, - "learning_rate": 4.205514496873223e-05, - "loss": 5.9586, + "epoch": 0.728498783992925, + "grad_norm": 2.5552468299865723, + "learning_rate": 1.5262242107879694e-05, + "loss": 4.2578, "step": 3295 }, { - "epoch": 0.1824212271973466, - "grad_norm": 3.3663172721862793, - "learning_rate": 4.204093234792496e-05, - "loss": 5.7115, + "epoch": 0.7296042449701525, + "grad_norm": 2.6064484119415283, + "learning_rate": 1.5200099428287349e-05, + "loss": 4.4176, "step": 3300 }, { - "epoch": 0.18269762299613046, - "grad_norm": 3.262286901473999, - "learning_rate": 4.202671972711768e-05, - "loss": 5.9303, + "epoch": 0.73070970594738, + "grad_norm": 2.6501288414001465, + "learning_rate": 1.5137956748695003e-05, + "loss": 4.2782, "step": 3305 }, { - "epoch": 0.18297401879491432, - "grad_norm": 2.6756162643432617, - "learning_rate": 4.2012507106310404e-05, - "loss": 6.0514, + "epoch": 0.7318151669246076, + "grad_norm": 2.7041335105895996, + "learning_rate": 1.5075814069102661e-05, + "loss": 4.4355, "step": 3310 }, { - "epoch": 0.18325041459369817, - "grad_norm": 2.981628179550171, - "learning_rate": 4.199829448550313e-05, - "loss": 5.5975, + "epoch": 0.7329206279018351, + "grad_norm": 2.7473063468933105, + "learning_rate": 1.5013671389510317e-05, + "loss": 4.3692, "step": 3315 }, { - "epoch": 0.18352681039248203, - "grad_norm": 3.121140956878662, - "learning_rate": 4.198408186469585e-05, - "loss": 5.8209, + "epoch": 0.7340260888790626, + "grad_norm": 2.753004312515259, + "learning_rate": 1.4951528709917972e-05, + "loss": 4.3074, "step": 3320 }, { - "epoch": 0.1838032061912659, - "grad_norm": 2.5124523639678955, - "learning_rate": 4.1969869243888576e-05, - "loss": 5.7161, + "epoch": 0.73513154985629, + "grad_norm": 2.5943238735198975, + "learning_rate": 1.4889386030325628e-05, + "loss": 4.2984, "step": 3325 }, { - "epoch": 0.18407960199004975, - "grad_norm": 2.389613389968872, - "learning_rate": 4.1955656623081294e-05, - "loss": 5.5582, + "epoch": 0.7362370108335176, + "grad_norm": 3.0592753887176514, + "learning_rate": 1.4827243350733282e-05, + "loss": 4.3758, "step": 3330 }, { - "epoch": 0.1843559977888336, - "grad_norm": 3.512432336807251, - "learning_rate": 4.1941444002274024e-05, - "loss": 5.5329, + "epoch": 0.7373424718107451, + "grad_norm": 2.9579524993896484, + "learning_rate": 1.4765100671140942e-05, + "loss": 4.3336, "step": 3335 }, { - "epoch": 0.18463239358761746, - "grad_norm": 2.90018367767334, - "learning_rate": 4.192723138146675e-05, - "loss": 5.9161, + "epoch": 0.7384479327879726, + "grad_norm": 2.8208494186401367, + "learning_rate": 1.4702957991548596e-05, + "loss": 4.3748, "step": 3340 }, { - "epoch": 0.18490878938640132, - "grad_norm": 3.013446092605591, - "learning_rate": 4.1913018760659465e-05, - "loss": 5.9448, + "epoch": 0.7395533937652001, + "grad_norm": 2.7068212032318115, + "learning_rate": 1.4640815311956253e-05, + "loss": 4.3802, "step": 3345 }, { - "epoch": 0.18518518518518517, - "grad_norm": 2.8273327350616455, - "learning_rate": 4.189880613985219e-05, - "loss": 5.5355, + "epoch": 0.7406588547424275, + "grad_norm": 2.6911303997039795, + "learning_rate": 1.4578672632363907e-05, + "loss": 4.2637, "step": 3350 }, { - "epoch": 0.18546158098396903, - "grad_norm": 3.1768956184387207, - "learning_rate": 4.1884593519044913e-05, - "loss": 6.1034, + "epoch": 0.7417643157196551, + "grad_norm": 2.925656318664551, + "learning_rate": 1.4516529952771565e-05, + "loss": 4.1862, "step": 3355 }, { - "epoch": 0.1857379767827529, - "grad_norm": 3.083205461502075, - "learning_rate": 4.187038089823764e-05, - "loss": 5.7557, + "epoch": 0.7428697766968826, + "grad_norm": 2.8226230144500732, + "learning_rate": 1.4454387273179221e-05, + "loss": 4.2084, "step": 3360 }, { - "epoch": 0.18601437258153677, - "grad_norm": 3.252182960510254, - "learning_rate": 4.1856168277430355e-05, - "loss": 5.6727, + "epoch": 0.7439752376741101, + "grad_norm": 2.73540997505188, + "learning_rate": 1.4392244593586876e-05, + "loss": 4.3171, "step": 3365 }, { - "epoch": 0.18629076838032063, - "grad_norm": 2.590454578399658, - "learning_rate": 4.1841955656623085e-05, - "loss": 6.1319, + "epoch": 0.7450806986513376, + "grad_norm": 2.88110613822937, + "learning_rate": 1.4330101913994532e-05, + "loss": 4.3005, "step": 3370 }, { - "epoch": 0.1865671641791045, - "grad_norm": 2.937279224395752, - "learning_rate": 4.182774303581581e-05, - "loss": 5.8309, + "epoch": 0.7461861596285652, + "grad_norm": 2.618785858154297, + "learning_rate": 1.4267959234402186e-05, + "loss": 4.2863, "step": 3375 }, { - "epoch": 0.18684355997788835, - "grad_norm": 2.746546745300293, - "learning_rate": 4.1813530415008527e-05, - "loss": 5.6252, + "epoch": 0.7472916206057926, + "grad_norm": 2.434032440185547, + "learning_rate": 1.4205816554809844e-05, + "loss": 4.3868, "step": 3380 }, { - "epoch": 0.1871199557766722, - "grad_norm": 3.1793739795684814, - "learning_rate": 4.179931779420125e-05, - "loss": 5.7403, + "epoch": 0.7483970815830201, + "grad_norm": 2.4145843982696533, + "learning_rate": 1.41436738752175e-05, + "loss": 4.1055, "step": 3385 }, { - "epoch": 0.18739635157545606, - "grad_norm": 3.3896684646606445, - "learning_rate": 4.1785105173393974e-05, - "loss": 6.023, + "epoch": 0.7495025425602476, + "grad_norm": 2.813927412033081, + "learning_rate": 1.4081531195625155e-05, + "loss": 4.4497, "step": 3390 }, { - "epoch": 0.18767274737423992, - "grad_norm": 2.4643993377685547, - "learning_rate": 4.17708925525867e-05, - "loss": 5.9285, + "epoch": 0.7506080035374751, + "grad_norm": 2.5696094036102295, + "learning_rate": 1.4019388516032811e-05, + "loss": 4.2388, "step": 3395 }, { - "epoch": 0.18794914317302377, - "grad_norm": 2.6043946743011475, - "learning_rate": 4.175667993177942e-05, - "loss": 5.655, + "epoch": 0.7517134645147027, + "grad_norm": 3.0586514472961426, + "learning_rate": 1.3957245836440469e-05, + "loss": 4.3375, "step": 3400 }, { - "epoch": 0.18822553897180763, - "grad_norm": 2.694716691970825, - "learning_rate": 4.1742467310972146e-05, - "loss": 5.7065, + "epoch": 0.7528189254919301, + "grad_norm": 2.7942728996276855, + "learning_rate": 1.3895103156848125e-05, + "loss": 4.2727, "step": 3405 }, { - "epoch": 0.1885019347705915, - "grad_norm": 2.6867024898529053, - "learning_rate": 4.172825469016487e-05, - "loss": 5.8445, + "epoch": 0.7539243864691576, + "grad_norm": 2.541633129119873, + "learning_rate": 1.383296047725578e-05, + "loss": 4.3377, "step": 3410 }, { - "epoch": 0.18877833056937535, - "grad_norm": 2.5469698905944824, - "learning_rate": 4.171404206935759e-05, - "loss": 5.7215, + "epoch": 0.7550298474463851, + "grad_norm": 2.821420192718506, + "learning_rate": 1.3770817797663436e-05, + "loss": 4.4895, "step": 3415 }, { - "epoch": 0.1890547263681592, - "grad_norm": 2.410991668701172, - "learning_rate": 4.169982944855031e-05, - "loss": 5.4893, + "epoch": 0.7561353084236127, + "grad_norm": 2.650139570236206, + "learning_rate": 1.3708675118071093e-05, + "loss": 4.3168, "step": 3420 }, { - "epoch": 0.18933112216694306, - "grad_norm": 3.104326009750366, - "learning_rate": 4.168561682774304e-05, - "loss": 5.4989, + "epoch": 0.7572407694008402, + "grad_norm": 2.784208059310913, + "learning_rate": 1.3646532438478748e-05, + "loss": 4.247, "step": 3425 }, { - "epoch": 0.18960751796572692, - "grad_norm": 2.4258954524993896, - "learning_rate": 4.167140420693576e-05, - "loss": 5.6876, + "epoch": 0.7583462303780677, + "grad_norm": 2.6416375637054443, + "learning_rate": 1.3584389758886404e-05, + "loss": 4.3903, "step": 3430 }, { - "epoch": 0.18988391376451078, - "grad_norm": 2.748441219329834, - "learning_rate": 4.1657191586128484e-05, - "loss": 5.5542, + "epoch": 0.7594516913552951, + "grad_norm": 2.7830934524536133, + "learning_rate": 1.3522247079294059e-05, + "loss": 4.4317, "step": 3435 }, { - "epoch": 0.19016030956329463, - "grad_norm": 2.7649223804473877, - "learning_rate": 4.164297896532121e-05, - "loss": 6.136, + "epoch": 0.7605571523325226, + "grad_norm": 2.5094573497772217, + "learning_rate": 1.3460104399701715e-05, + "loss": 4.2657, "step": 3440 }, { - "epoch": 0.1904367053620785, - "grad_norm": 2.782585620880127, - "learning_rate": 4.162876634451393e-05, - "loss": 5.882, + "epoch": 0.7616626133097502, + "grad_norm": 2.6464684009552, + "learning_rate": 1.3397961720109373e-05, + "loss": 4.344, "step": 3445 }, { - "epoch": 0.19071310116086235, - "grad_norm": 2.9212682247161865, - "learning_rate": 4.161455372370665e-05, - "loss": 5.9311, + "epoch": 0.7627680742869777, + "grad_norm": 2.725152015686035, + "learning_rate": 1.3335819040517029e-05, + "loss": 4.3255, "step": 3450 }, { - "epoch": 0.1909894969596462, - "grad_norm": 3.2894344329833984, - "learning_rate": 4.160034110289938e-05, - "loss": 5.734, + "epoch": 0.7638735352642052, + "grad_norm": 2.7001333236694336, + "learning_rate": 1.3273676360924683e-05, + "loss": 4.2375, "step": 3455 }, { - "epoch": 0.19126589275843006, - "grad_norm": 2.266057014465332, - "learning_rate": 4.1586128482092103e-05, - "loss": 5.4366, + "epoch": 0.7649789962414327, + "grad_norm": 2.7043142318725586, + "learning_rate": 1.321153368133234e-05, + "loss": 4.3848, "step": 3460 }, { - "epoch": 0.19154228855721392, - "grad_norm": 3.4635226726531982, - "learning_rate": 4.157191586128482e-05, - "loss": 5.5668, + "epoch": 0.7660844572186601, + "grad_norm": 2.5512447357177734, + "learning_rate": 1.3149391001739997e-05, + "loss": 4.3744, "step": 3465 }, { - "epoch": 0.19181868435599778, - "grad_norm": 2.7193620204925537, - "learning_rate": 4.1557703240477545e-05, - "loss": 5.8679, + "epoch": 0.7671899181958877, + "grad_norm": 2.840555191040039, + "learning_rate": 1.3087248322147652e-05, + "loss": 4.3698, "step": 3470 }, { - "epoch": 0.19209508015478166, - "grad_norm": 2.5179836750030518, - "learning_rate": 4.154349061967027e-05, - "loss": 5.5888, + "epoch": 0.7682953791731152, + "grad_norm": 2.7197751998901367, + "learning_rate": 1.3025105642555308e-05, + "loss": 4.2368, "step": 3475 }, { - "epoch": 0.19237147595356552, - "grad_norm": 2.735239267349243, - "learning_rate": 4.152927799886299e-05, - "loss": 5.6179, + "epoch": 0.7694008401503427, + "grad_norm": 2.49568247795105, + "learning_rate": 1.2962962962962962e-05, + "loss": 4.3001, "step": 3480 }, { - "epoch": 0.19264787175234938, - "grad_norm": 2.676177501678467, - "learning_rate": 4.1515065378055717e-05, - "loss": 5.7376, + "epoch": 0.7705063011275702, + "grad_norm": 2.975504159927368, + "learning_rate": 1.2900820283370619e-05, + "loss": 4.45, "step": 3485 }, { - "epoch": 0.19292426755113323, - "grad_norm": 3.584538221359253, - "learning_rate": 4.150085275724844e-05, - "loss": 5.9845, + "epoch": 0.7716117621047976, + "grad_norm": 2.614933729171753, + "learning_rate": 1.2838677603778276e-05, + "loss": 4.3452, "step": 3490 }, { - "epoch": 0.1932006633499171, - "grad_norm": 3.5439796447753906, - "learning_rate": 4.1486640136441164e-05, - "loss": 5.8218, + "epoch": 0.7727172230820252, + "grad_norm": 2.6430065631866455, + "learning_rate": 1.2776534924185931e-05, + "loss": 4.2741, "step": 3495 }, { - "epoch": 0.19347705914870095, - "grad_norm": 3.0446224212646484, - "learning_rate": 4.147242751563388e-05, - "loss": 5.5155, + "epoch": 0.7738226840592527, + "grad_norm": 2.71543550491333, + "learning_rate": 1.2714392244593587e-05, + "loss": 4.4366, "step": 3500 }, { - "epoch": 0.1937534549474848, - "grad_norm": 2.8608455657958984, - "learning_rate": 4.1458214894826606e-05, - "loss": 5.9078, + "epoch": 0.7749281450364802, + "grad_norm": 2.868475914001465, + "learning_rate": 1.2652249565001242e-05, + "loss": 4.4391, "step": 3505 }, { - "epoch": 0.19402985074626866, - "grad_norm": 3.2207865715026855, - "learning_rate": 4.1444002274019336e-05, - "loss": 5.7971, + "epoch": 0.7760336060137077, + "grad_norm": 2.8595988750457764, + "learning_rate": 1.2590106885408901e-05, + "loss": 4.2634, "step": 3510 }, { - "epoch": 0.19430624654505252, - "grad_norm": 3.01859188079834, - "learning_rate": 4.1429789653212054e-05, - "loss": 5.7289, + "epoch": 0.7771390669909353, + "grad_norm": 2.577758312225342, + "learning_rate": 1.2527964205816556e-05, + "loss": 4.4947, "step": 3515 }, { - "epoch": 0.19458264234383638, - "grad_norm": 3.373577117919922, - "learning_rate": 4.141557703240478e-05, - "loss": 5.3903, + "epoch": 0.7782445279681627, + "grad_norm": 2.552488088607788, + "learning_rate": 1.2465821526224212e-05, + "loss": 4.4396, "step": 3520 }, { - "epoch": 0.19485903814262023, - "grad_norm": 2.7322349548339844, - "learning_rate": 4.14013644115975e-05, - "loss": 5.3543, + "epoch": 0.7793499889453902, + "grad_norm": 2.7421538829803467, + "learning_rate": 1.2403678846631868e-05, + "loss": 4.344, "step": 3525 }, { - "epoch": 0.1951354339414041, - "grad_norm": 2.5114071369171143, - "learning_rate": 4.1387151790790226e-05, - "loss": 5.7759, + "epoch": 0.7804554499226177, + "grad_norm": 2.6724436283111572, + "learning_rate": 1.2341536167039522e-05, + "loss": 4.507, "step": 3530 }, { - "epoch": 0.19541182974018795, - "grad_norm": 2.954470634460449, - "learning_rate": 4.137293916998295e-05, - "loss": 5.8468, + "epoch": 0.7815609108998453, + "grad_norm": 2.5183072090148926, + "learning_rate": 1.227939348744718e-05, + "loss": 4.3875, "step": 3535 }, { - "epoch": 0.1956882255389718, - "grad_norm": 2.818599224090576, - "learning_rate": 4.135872654917567e-05, - "loss": 5.4117, + "epoch": 0.7826663718770728, + "grad_norm": 2.7601890563964844, + "learning_rate": 1.2217250807854835e-05, + "loss": 4.2108, "step": 3540 }, { - "epoch": 0.19596462133775566, - "grad_norm": 3.0054855346679688, - "learning_rate": 4.13445139283684e-05, - "loss": 5.8437, + "epoch": 0.7837718328543002, + "grad_norm": 2.8598101139068604, + "learning_rate": 1.2155108128262491e-05, + "loss": 4.4034, "step": 3545 }, { - "epoch": 0.19624101713653952, - "grad_norm": 2.919210433959961, - "learning_rate": 4.1330301307561115e-05, - "loss": 5.9989, + "epoch": 0.7848772938315277, + "grad_norm": 2.6984620094299316, + "learning_rate": 1.2092965448670147e-05, + "loss": 4.3129, "step": 3550 }, { - "epoch": 0.19651741293532338, - "grad_norm": 2.6916286945343018, - "learning_rate": 4.131608868675384e-05, - "loss": 5.8415, + "epoch": 0.7859827548087552, + "grad_norm": 2.6067955493927, + "learning_rate": 1.2030822769077803e-05, + "loss": 4.1753, "step": 3555 }, { - "epoch": 0.19679380873410723, - "grad_norm": 2.8010470867156982, - "learning_rate": 4.130187606594656e-05, - "loss": 5.5132, + "epoch": 0.7870882157859828, + "grad_norm": 2.763763904571533, + "learning_rate": 1.196868008948546e-05, + "loss": 4.3784, "step": 3560 }, { - "epoch": 0.1970702045328911, - "grad_norm": 3.2105109691619873, - "learning_rate": 4.128766344513929e-05, - "loss": 5.7389, + "epoch": 0.7881936767632103, + "grad_norm": 2.5143606662750244, + "learning_rate": 1.1906537409893114e-05, + "loss": 4.3958, "step": 3565 }, { - "epoch": 0.19734660033167495, - "grad_norm": 2.8323161602020264, - "learning_rate": 4.127345082433201e-05, - "loss": 5.5191, + "epoch": 0.7892991377404378, + "grad_norm": 2.7460179328918457, + "learning_rate": 1.1844394730300772e-05, + "loss": 4.4161, "step": 3570 }, { - "epoch": 0.1976229961304588, - "grad_norm": 2.6223807334899902, - "learning_rate": 4.125923820352473e-05, - "loss": 5.6713, + "epoch": 0.7904045987176652, + "grad_norm": 2.9888150691986084, + "learning_rate": 1.1782252050708426e-05, + "loss": 4.3169, "step": 3575 }, { - "epoch": 0.19789939192924266, - "grad_norm": 3.166440963745117, - "learning_rate": 4.124502558271746e-05, - "loss": 5.4685, + "epoch": 0.7915100596948927, + "grad_norm": 2.7542128562927246, + "learning_rate": 1.1720109371116084e-05, + "loss": 4.2701, "step": 3580 }, { - "epoch": 0.19817578772802655, - "grad_norm": 2.5099334716796875, - "learning_rate": 4.1230812961910176e-05, - "loss": 5.7664, + "epoch": 0.7926155206721203, + "grad_norm": 2.622459650039673, + "learning_rate": 1.1657966691523739e-05, + "loss": 4.2324, "step": 3585 }, { - "epoch": 0.1984521835268104, - "grad_norm": 2.3826889991760254, - "learning_rate": 4.12166003411029e-05, - "loss": 5.4451, + "epoch": 0.7937209816493478, + "grad_norm": 2.7815279960632324, + "learning_rate": 1.1595824011931397e-05, + "loss": 4.4407, "step": 3590 }, { - "epoch": 0.19872857932559426, - "grad_norm": 2.5310139656066895, - "learning_rate": 4.1202387720295624e-05, - "loss": 5.748, + "epoch": 0.7948264426265753, + "grad_norm": 2.414452075958252, + "learning_rate": 1.1533681332339051e-05, + "loss": 4.2533, "step": 3595 }, { - "epoch": 0.19900497512437812, - "grad_norm": 2.9146652221679688, - "learning_rate": 4.118817509948835e-05, - "loss": 5.4966, + "epoch": 0.7959319036038028, + "grad_norm": 2.864292860031128, + "learning_rate": 1.1471538652746707e-05, + "loss": 4.3427, "step": 3600 }, { - "epoch": 0.19928137092316198, - "grad_norm": 2.7887117862701416, - "learning_rate": 4.117396247868107e-05, - "loss": 5.6988, + "epoch": 0.7970373645810302, + "grad_norm": 2.6127429008483887, + "learning_rate": 1.1409395973154363e-05, + "loss": 4.3717, "step": 3605 }, { - "epoch": 0.19955776672194583, - "grad_norm": 2.6834566593170166, - "learning_rate": 4.115974985787379e-05, - "loss": 5.4005, + "epoch": 0.7981428255582578, + "grad_norm": 2.8165504932403564, + "learning_rate": 1.1347253293562018e-05, + "loss": 4.4479, "step": 3610 }, { - "epoch": 0.1998341625207297, - "grad_norm": 2.7212436199188232, - "learning_rate": 4.114553723706652e-05, - "loss": 5.4062, + "epoch": 0.7992482865354853, + "grad_norm": 2.7605228424072266, + "learning_rate": 1.1285110613969676e-05, + "loss": 4.3603, "step": 3615 }, { - "epoch": 0.20011055831951355, - "grad_norm": 2.5989837646484375, - "learning_rate": 4.1131324616259244e-05, - "loss": 5.7674, + "epoch": 0.8003537475127128, + "grad_norm": 2.749600648880005, + "learning_rate": 1.122296793437733e-05, + "loss": 4.5357, "step": 3620 }, { - "epoch": 0.2003869541182974, - "grad_norm": 2.8077232837677, - "learning_rate": 4.111711199545196e-05, - "loss": 5.7709, + "epoch": 0.8014592084899403, + "grad_norm": 2.5620622634887695, + "learning_rate": 1.1160825254784988e-05, + "loss": 4.2939, "step": 3625 }, { - "epoch": 0.20066334991708126, - "grad_norm": 2.3831634521484375, - "learning_rate": 4.1102899374644685e-05, - "loss": 5.6026, + "epoch": 0.8025646694671678, + "grad_norm": 2.840747356414795, + "learning_rate": 1.1098682575192643e-05, + "loss": 4.4695, "step": 3630 }, { - "epoch": 0.20093974571586512, - "grad_norm": 3.917057991027832, - "learning_rate": 4.108868675383741e-05, - "loss": 5.6208, + "epoch": 0.8036701304443953, + "grad_norm": 2.9626359939575195, + "learning_rate": 1.1036539895600299e-05, + "loss": 4.3105, "step": 3635 }, { - "epoch": 0.20121614151464898, - "grad_norm": 2.841247081756592, - "learning_rate": 4.107447413303013e-05, - "loss": 5.6398, + "epoch": 0.8047755914216228, + "grad_norm": 2.748305320739746, + "learning_rate": 1.0974397216007955e-05, + "loss": 4.3532, "step": 3640 }, { - "epoch": 0.20149253731343283, - "grad_norm": 2.5820930004119873, - "learning_rate": 4.106026151222286e-05, - "loss": 5.7411, + "epoch": 0.8058810523988503, + "grad_norm": 2.6843719482421875, + "learning_rate": 1.091225453641561e-05, + "loss": 4.2337, "step": 3645 }, { - "epoch": 0.2017689331122167, - "grad_norm": 2.7610599994659424, - "learning_rate": 4.104604889141558e-05, - "loss": 5.5423, + "epoch": 0.8069865133760779, + "grad_norm": 2.6707520484924316, + "learning_rate": 1.0850111856823267e-05, + "loss": 4.282, "step": 3650 }, { - "epoch": 0.20204532891100055, - "grad_norm": 3.1269173622131348, - "learning_rate": 4.1031836270608305e-05, - "loss": 5.5471, + "epoch": 0.8080919743533054, + "grad_norm": 2.5987465381622314, + "learning_rate": 1.0787969177230922e-05, + "loss": 4.3666, "step": 3655 }, { - "epoch": 0.2023217247097844, - "grad_norm": 2.523005247116089, - "learning_rate": 4.101762364980102e-05, - "loss": 5.8834, + "epoch": 0.8091974353305328, + "grad_norm": 2.6529898643493652, + "learning_rate": 1.072582649763858e-05, + "loss": 4.4617, "step": 3660 }, { - "epoch": 0.20259812050856826, - "grad_norm": 2.9817779064178467, - "learning_rate": 4.1003411028993746e-05, - "loss": 5.7866, + "epoch": 0.8103028963077603, + "grad_norm": 2.5571646690368652, + "learning_rate": 1.0663683818046234e-05, + "loss": 4.2555, "step": 3665 }, { - "epoch": 0.20287451630735212, - "grad_norm": 2.738621950149536, - "learning_rate": 4.098919840818647e-05, - "loss": 5.7785, + "epoch": 0.8114083572849878, + "grad_norm": 2.8901898860931396, + "learning_rate": 1.060154113845389e-05, + "loss": 4.282, "step": 3670 }, { - "epoch": 0.20315091210613598, - "grad_norm": 2.758222818374634, - "learning_rate": 4.0974985787379194e-05, - "loss": 5.4694, + "epoch": 0.8125138182622154, + "grad_norm": 2.535372018814087, + "learning_rate": 1.0539398458861546e-05, + "loss": 4.2765, "step": 3675 }, { - "epoch": 0.20342730790491984, - "grad_norm": 3.3197033405303955, - "learning_rate": 4.096077316657192e-05, - "loss": 5.6767, + "epoch": 0.8136192792394429, + "grad_norm": 2.7033450603485107, + "learning_rate": 1.0477255779269203e-05, + "loss": 4.4398, "step": 3680 }, { - "epoch": 0.2037037037037037, - "grad_norm": 3.1457207202911377, - "learning_rate": 4.094656054576464e-05, - "loss": 5.8161, + "epoch": 0.8147247402166704, + "grad_norm": 2.949090003967285, + "learning_rate": 1.0415113099676859e-05, + "loss": 4.3627, "step": 3685 }, { - "epoch": 0.20398009950248755, - "grad_norm": 2.59710955619812, - "learning_rate": 4.0932347924957366e-05, - "loss": 5.3963, + "epoch": 0.8158302011938978, + "grad_norm": 3.2762537002563477, + "learning_rate": 1.0352970420084515e-05, + "loss": 4.4777, "step": 3690 }, { - "epoch": 0.2042564953012714, - "grad_norm": 3.7148916721343994, - "learning_rate": 4.091813530415008e-05, - "loss": 5.7489, + "epoch": 0.8169356621711253, + "grad_norm": 2.536367893218994, + "learning_rate": 1.0290827740492171e-05, + "loss": 4.312, "step": 3695 }, { - "epoch": 0.2045328911000553, - "grad_norm": 2.7009363174438477, - "learning_rate": 4.090392268334281e-05, - "loss": 5.619, + "epoch": 0.8180411231483529, + "grad_norm": 2.8747854232788086, + "learning_rate": 1.0228685060899826e-05, + "loss": 4.466, "step": 3700 }, { - "epoch": 0.20480928689883915, - "grad_norm": 2.5814390182495117, - "learning_rate": 4.088971006253554e-05, - "loss": 5.4951, + "epoch": 0.8191465841255804, + "grad_norm": 2.527646780014038, + "learning_rate": 1.0166542381307482e-05, + "loss": 4.2035, "step": 3705 }, { - "epoch": 0.205085682697623, - "grad_norm": 2.773533344268799, - "learning_rate": 4.0875497441728255e-05, - "loss": 5.763, + "epoch": 0.8202520451028079, + "grad_norm": 2.8456356525421143, + "learning_rate": 1.0104399701715138e-05, + "loss": 4.4013, "step": 3710 }, { - "epoch": 0.20536207849640686, - "grad_norm": 3.0854074954986572, - "learning_rate": 4.086128482092098e-05, - "loss": 5.3959, + "epoch": 0.8213575060800353, + "grad_norm": 2.6337332725524902, + "learning_rate": 1.0042257022122794e-05, + "loss": 4.4722, "step": 3715 }, { - "epoch": 0.20563847429519072, - "grad_norm": 2.8978331089019775, - "learning_rate": 4.08470722001137e-05, - "loss": 5.7336, + "epoch": 0.8224629670572629, + "grad_norm": 2.5773563385009766, + "learning_rate": 9.98011434253045e-06, + "loss": 4.3434, "step": 3720 }, { - "epoch": 0.20591487009397458, - "grad_norm": 2.4324569702148438, - "learning_rate": 4.083285957930643e-05, - "loss": 5.832, + "epoch": 0.8235684280344904, + "grad_norm": 2.7738966941833496, + "learning_rate": 9.917971662938106e-06, + "loss": 4.3367, "step": 3725 }, { - "epoch": 0.20619126589275844, - "grad_norm": 2.3226120471954346, - "learning_rate": 4.081864695849915e-05, - "loss": 5.6816, + "epoch": 0.8246738890117179, + "grad_norm": 2.672043561935425, + "learning_rate": 9.855828983345763e-06, + "loss": 4.1075, "step": 3730 }, { - "epoch": 0.2064676616915423, - "grad_norm": 2.733042001724243, - "learning_rate": 4.080443433769187e-05, - "loss": 5.6433, + "epoch": 0.8257793499889454, + "grad_norm": 2.633709669113159, + "learning_rate": 9.793686303753419e-06, + "loss": 4.3165, "step": 3735 }, { - "epoch": 0.20674405749032615, - "grad_norm": 2.787104845046997, - "learning_rate": 4.07902217168846e-05, - "loss": 5.7111, + "epoch": 0.826884810966173, + "grad_norm": 2.5204927921295166, + "learning_rate": 9.731543624161075e-06, + "loss": 4.265, "step": 3740 }, { - "epoch": 0.20702045328911, - "grad_norm": 2.508944511413574, - "learning_rate": 4.0776009096077316e-05, - "loss": 5.7261, + "epoch": 0.8279902719434004, + "grad_norm": 2.7711668014526367, + "learning_rate": 9.669400944568731e-06, + "loss": 4.3085, "step": 3745 }, { - "epoch": 0.20729684908789386, - "grad_norm": 3.51177978515625, - "learning_rate": 4.076179647527004e-05, - "loss": 5.3499, + "epoch": 0.8290957329206279, + "grad_norm": 2.5938053131103516, + "learning_rate": 9.607258264976386e-06, + "loss": 4.4216, "step": 3750 }, { - "epoch": 0.20757324488667772, - "grad_norm": 2.7052156925201416, - "learning_rate": 4.074758385446277e-05, - "loss": 5.7292, + "epoch": 0.8302011938978554, + "grad_norm": 2.4221818447113037, + "learning_rate": 9.545115585384042e-06, + "loss": 4.2004, "step": 3755 }, { - "epoch": 0.20784964068546158, - "grad_norm": 2.662250280380249, - "learning_rate": 4.073337123365549e-05, - "loss": 5.4517, + "epoch": 0.8313066548750829, + "grad_norm": 2.75688099861145, + "learning_rate": 9.482972905791698e-06, + "loss": 4.4424, "step": 3760 }, { - "epoch": 0.20812603648424544, - "grad_norm": 2.365370750427246, - "learning_rate": 4.071915861284821e-05, - "loss": 5.7968, + "epoch": 0.8324121158523105, + "grad_norm": 2.8027572631835938, + "learning_rate": 9.420830226199354e-06, + "loss": 4.3706, "step": 3765 }, { - "epoch": 0.2084024322830293, - "grad_norm": 3.118715763092041, - "learning_rate": 4.0704945992040936e-05, - "loss": 5.5743, + "epoch": 0.8335175768295379, + "grad_norm": 2.787280797958374, + "learning_rate": 9.35868754660701e-06, + "loss": 4.298, "step": 3770 }, { - "epoch": 0.20867882808181315, - "grad_norm": 3.200096607208252, - "learning_rate": 4.069073337123366e-05, - "loss": 6.0694, + "epoch": 0.8346230378067654, + "grad_norm": 2.797969341278076, + "learning_rate": 9.296544867014666e-06, + "loss": 4.4039, "step": 3775 }, { - "epoch": 0.208955223880597, - "grad_norm": 3.215636730194092, - "learning_rate": 4.067652075042638e-05, - "loss": 5.6844, + "epoch": 0.8357284987839929, + "grad_norm": 2.5721869468688965, + "learning_rate": 9.234402187422323e-06, + "loss": 4.3801, "step": 3780 }, { - "epoch": 0.20923161967938086, - "grad_norm": 3.387874126434326, - "learning_rate": 4.06623081296191e-05, - "loss": 5.5597, + "epoch": 0.8368339597612204, + "grad_norm": 2.480556011199951, + "learning_rate": 9.172259507829977e-06, + "loss": 4.5008, "step": 3785 }, { - "epoch": 0.20950801547816472, - "grad_norm": 3.392089605331421, - "learning_rate": 4.064809550881183e-05, - "loss": 5.8506, + "epoch": 0.837939420738448, + "grad_norm": 3.0445311069488525, + "learning_rate": 9.110116828237635e-06, + "loss": 4.376, "step": 3790 }, { - "epoch": 0.20978441127694858, - "grad_norm": 2.8729498386383057, - "learning_rate": 4.063388288800455e-05, - "loss": 5.4329, + "epoch": 0.8390448817156755, + "grad_norm": 2.906247615814209, + "learning_rate": 9.04797414864529e-06, + "loss": 4.1985, "step": 3795 }, { - "epoch": 0.21006080707573244, - "grad_norm": 2.7015790939331055, - "learning_rate": 4.061967026719727e-05, - "loss": 5.6762, + "epoch": 0.8401503426929029, + "grad_norm": 2.624952793121338, + "learning_rate": 8.985831469052947e-06, + "loss": 4.4116, "step": 3800 }, { - "epoch": 0.2103372028745163, - "grad_norm": 3.2836618423461914, - "learning_rate": 4.060545764639e-05, - "loss": 5.6025, + "epoch": 0.8412558036701304, + "grad_norm": 2.826939821243286, + "learning_rate": 8.923688789460602e-06, + "loss": 4.3384, "step": 3805 }, { - "epoch": 0.21061359867330018, - "grad_norm": 2.447291612625122, - "learning_rate": 4.059124502558272e-05, - "loss": 5.6676, + "epoch": 0.8423612646473579, + "grad_norm": 2.7362842559814453, + "learning_rate": 8.861546109868258e-06, + "loss": 4.3327, "step": 3810 }, { - "epoch": 0.21088999447208404, - "grad_norm": 2.9781346321105957, - "learning_rate": 4.0577032404775445e-05, - "loss": 5.6131, + "epoch": 0.8434667256245855, + "grad_norm": 2.5066606998443604, + "learning_rate": 8.799403430275914e-06, + "loss": 4.3919, "step": 3815 }, { - "epoch": 0.2111663902708679, - "grad_norm": 3.1656429767608643, - "learning_rate": 4.056281978396816e-05, - "loss": 5.7342, + "epoch": 0.844572186601813, + "grad_norm": 2.625035524368286, + "learning_rate": 8.737260750683569e-06, + "loss": 4.3227, "step": 3820 }, { - "epoch": 0.21144278606965175, - "grad_norm": 2.7425482273101807, - "learning_rate": 4.054860716316089e-05, - "loss": 5.6741, + "epoch": 0.8456776475790405, + "grad_norm": 2.6161510944366455, + "learning_rate": 8.675118071091226e-06, + "loss": 4.273, "step": 3825 }, { - "epoch": 0.2117191818684356, - "grad_norm": 3.253185510635376, - "learning_rate": 4.053439454235361e-05, - "loss": 5.801, + "epoch": 0.8467831085562679, + "grad_norm": 2.6360316276550293, + "learning_rate": 8.612975391498881e-06, + "loss": 4.3517, "step": 3830 }, { - "epoch": 0.21199557766721946, - "grad_norm": 3.0883877277374268, - "learning_rate": 4.0520181921546334e-05, - "loss": 5.6809, + "epoch": 0.8478885695334955, + "grad_norm": 2.945129632949829, + "learning_rate": 8.550832711906539e-06, + "loss": 4.4634, "step": 3835 }, { - "epoch": 0.21227197346600332, - "grad_norm": 3.222590446472168, - "learning_rate": 4.050596930073906e-05, - "loss": 5.4639, + "epoch": 0.848994030510723, + "grad_norm": 2.797037124633789, + "learning_rate": 8.488690032314193e-06, + "loss": 4.3474, "step": 3840 }, { - "epoch": 0.21254836926478718, - "grad_norm": 2.510165214538574, - "learning_rate": 4.049175667993178e-05, - "loss": 5.6433, + "epoch": 0.8500994914879505, + "grad_norm": 2.6918272972106934, + "learning_rate": 8.42654735272185e-06, + "loss": 4.1983, "step": 3845 }, { - "epoch": 0.21282476506357104, - "grad_norm": 3.1436879634857178, - "learning_rate": 4.0477544059124506e-05, - "loss": 5.3783, + "epoch": 0.851204952465178, + "grad_norm": 2.786607027053833, + "learning_rate": 8.364404673129506e-06, + "loss": 4.2545, "step": 3850 }, { - "epoch": 0.2131011608623549, - "grad_norm": 2.9798269271850586, - "learning_rate": 4.046333143831722e-05, - "loss": 5.6385, + "epoch": 0.8523104134424054, + "grad_norm": 2.799255132675171, + "learning_rate": 8.302261993537162e-06, + "loss": 4.4633, "step": 3855 }, { - "epoch": 0.21337755666113875, - "grad_norm": 2.9167640209198, - "learning_rate": 4.0449118817509954e-05, - "loss": 5.6914, + "epoch": 0.853415874419633, + "grad_norm": 2.393765926361084, + "learning_rate": 8.240119313944818e-06, + "loss": 4.3144, "step": 3860 }, { - "epoch": 0.2136539524599226, - "grad_norm": 3.467259645462036, - "learning_rate": 4.043490619670268e-05, - "loss": 5.8466, + "epoch": 0.8545213353968605, + "grad_norm": 3.014911413192749, + "learning_rate": 8.177976634352472e-06, + "loss": 4.4218, "step": 3865 }, { - "epoch": 0.21393034825870647, - "grad_norm": 2.689114809036255, - "learning_rate": 4.0420693575895395e-05, - "loss": 5.9034, + "epoch": 0.855626796374088, + "grad_norm": 2.7910256385803223, + "learning_rate": 8.11583395476013e-06, + "loss": 4.3782, "step": 3870 }, { - "epoch": 0.21420674405749032, - "grad_norm": 2.7007036209106445, - "learning_rate": 4.040648095508812e-05, - "loss": 5.6707, + "epoch": 0.8567322573513155, + "grad_norm": 2.5579280853271484, + "learning_rate": 8.053691275167785e-06, + "loss": 4.3776, "step": 3875 }, { - "epoch": 0.21448313985627418, - "grad_norm": 2.5844995975494385, - "learning_rate": 4.039226833428084e-05, - "loss": 5.726, + "epoch": 0.857837718328543, + "grad_norm": 2.6511480808258057, + "learning_rate": 7.991548595575441e-06, + "loss": 4.3284, "step": 3880 }, { - "epoch": 0.21475953565505804, - "grad_norm": 2.7026453018188477, - "learning_rate": 4.037805571347357e-05, - "loss": 5.6363, + "epoch": 0.8589431793057705, + "grad_norm": 2.7104756832122803, + "learning_rate": 7.929405915983097e-06, + "loss": 4.3875, "step": 3885 }, { - "epoch": 0.2150359314538419, - "grad_norm": 3.0205512046813965, - "learning_rate": 4.0363843092666284e-05, - "loss": 5.6149, + "epoch": 0.860048640282998, + "grad_norm": 2.8262667655944824, + "learning_rate": 7.867263236390753e-06, + "loss": 4.401, "step": 3890 }, { - "epoch": 0.21531232725262575, - "grad_norm": 2.615713596343994, - "learning_rate": 4.0349630471859015e-05, - "loss": 5.4971, + "epoch": 0.8611541012602255, + "grad_norm": 2.8072750568389893, + "learning_rate": 7.80512055679841e-06, + "loss": 4.3245, "step": 3895 }, { - "epoch": 0.2155887230514096, - "grad_norm": 3.4436938762664795, - "learning_rate": 4.033541785105174e-05, - "loss": 5.9492, + "epoch": 0.862259562237453, + "grad_norm": 3.0384953022003174, + "learning_rate": 7.742977877206066e-06, + "loss": 4.2691, "step": 3900 }, { - "epoch": 0.21586511885019347, - "grad_norm": 2.6737794876098633, - "learning_rate": 4.0321205230244456e-05, - "loss": 5.5133, + "epoch": 0.8633650232146806, + "grad_norm": 2.7213258743286133, + "learning_rate": 7.680835197613722e-06, + "loss": 4.3848, "step": 3905 }, { - "epoch": 0.21614151464897732, - "grad_norm": 2.6840312480926514, - "learning_rate": 4.030699260943718e-05, - "loss": 5.9368, + "epoch": 0.864470484191908, + "grad_norm": 2.9310898780822754, + "learning_rate": 7.618692518021378e-06, + "loss": 4.3875, "step": 3910 }, { - "epoch": 0.21641791044776118, - "grad_norm": 2.652055501937866, - "learning_rate": 4.0292779988629904e-05, - "loss": 5.7533, + "epoch": 0.8655759451691355, + "grad_norm": 2.7270753383636475, + "learning_rate": 7.556549838429033e-06, + "loss": 4.4668, "step": 3915 }, { - "epoch": 0.21669430624654507, - "grad_norm": 3.5088555812835693, - "learning_rate": 4.027856736782263e-05, - "loss": 5.6949, + "epoch": 0.866681406146363, + "grad_norm": 2.7479376792907715, + "learning_rate": 7.494407158836689e-06, + "loss": 4.3906, "step": 3920 }, { - "epoch": 0.21697070204532892, - "grad_norm": 3.445812940597534, - "learning_rate": 4.026435474701535e-05, - "loss": 5.5864, + "epoch": 0.8677868671235905, + "grad_norm": 2.773819923400879, + "learning_rate": 7.432264479244346e-06, + "loss": 4.2478, "step": 3925 }, { - "epoch": 0.21724709784411278, - "grad_norm": 3.1001298427581787, - "learning_rate": 4.0250142126208076e-05, - "loss": 5.8767, + "epoch": 0.8688923281008181, + "grad_norm": 2.642632484436035, + "learning_rate": 7.370121799652001e-06, + "loss": 4.3643, "step": 3930 }, { - "epoch": 0.21752349364289664, - "grad_norm": 2.6372926235198975, - "learning_rate": 4.02359295054008e-05, - "loss": 5.6745, + "epoch": 0.8699977890780456, + "grad_norm": 2.830242872238159, + "learning_rate": 7.307979120059657e-06, + "loss": 4.4359, "step": 3935 }, { - "epoch": 0.2177998894416805, - "grad_norm": 2.3404037952423096, - "learning_rate": 4.022171688459352e-05, - "loss": 5.6526, + "epoch": 0.871103250055273, + "grad_norm": 2.8000121116638184, + "learning_rate": 7.2458364404673125e-06, + "loss": 4.5984, "step": 3940 }, { - "epoch": 0.21807628524046435, - "grad_norm": 3.181204080581665, - "learning_rate": 4.020750426378624e-05, - "loss": 5.5995, + "epoch": 0.8722087110325005, + "grad_norm": 2.8083910942077637, + "learning_rate": 7.1836937608749695e-06, + "loss": 4.2437, "step": 3945 }, { - "epoch": 0.2183526810392482, - "grad_norm": 3.0466437339782715, - "learning_rate": 4.019329164297897e-05, - "loss": 5.7707, + "epoch": 0.873314172009728, + "grad_norm": 2.6732099056243896, + "learning_rate": 7.121551081282625e-06, + "loss": 4.4326, "step": 3950 }, { - "epoch": 0.21862907683803207, - "grad_norm": 2.544200897216797, - "learning_rate": 4.017907902217169e-05, - "loss": 5.9233, + "epoch": 0.8744196329869556, + "grad_norm": 2.4670119285583496, + "learning_rate": 7.059408401690282e-06, + "loss": 4.2204, "step": 3955 }, { - "epoch": 0.21890547263681592, - "grad_norm": 3.109133720397949, - "learning_rate": 4.016486640136441e-05, - "loss": 5.7499, + "epoch": 0.8755250939641831, + "grad_norm": 2.698272943496704, + "learning_rate": 6.997265722097937e-06, + "loss": 4.305, "step": 3960 }, { - "epoch": 0.21918186843559978, - "grad_norm": 2.555316209793091, - "learning_rate": 4.015065378055714e-05, - "loss": 5.3721, + "epoch": 0.8766305549414106, + "grad_norm": 2.7143428325653076, + "learning_rate": 6.935123042505594e-06, + "loss": 4.348, "step": 3965 }, { - "epoch": 0.21945826423438364, - "grad_norm": 2.7728629112243652, - "learning_rate": 4.013644115974986e-05, - "loss": 5.7565, + "epoch": 0.877736015918638, + "grad_norm": 2.571596145629883, + "learning_rate": 6.8729803629132495e-06, + "loss": 4.3278, "step": 3970 }, { - "epoch": 0.2197346600331675, - "grad_norm": 3.0431509017944336, - "learning_rate": 4.0122228538942585e-05, - "loss": 5.5652, + "epoch": 0.8788414768958656, + "grad_norm": 3.0739476680755615, + "learning_rate": 6.810837683320905e-06, + "loss": 4.3202, "step": 3975 }, { - "epoch": 0.22001105583195135, - "grad_norm": 3.0363245010375977, - "learning_rate": 4.01080159181353e-05, - "loss": 5.6372, + "epoch": 0.8799469378730931, + "grad_norm": 2.72713041305542, + "learning_rate": 6.748695003728561e-06, + "loss": 4.5188, "step": 3980 }, { - "epoch": 0.2202874516307352, - "grad_norm": 2.699867010116577, - "learning_rate": 4.009380329732803e-05, - "loss": 5.5497, + "epoch": 0.8810523988503206, + "grad_norm": 2.7530996799468994, + "learning_rate": 6.686552324136216e-06, + "loss": 4.3479, "step": 3985 }, { - "epoch": 0.22056384742951907, - "grad_norm": 2.5388708114624023, - "learning_rate": 4.007959067652075e-05, - "loss": 5.4853, + "epoch": 0.8821578598275481, + "grad_norm": 2.7766714096069336, + "learning_rate": 6.624409644543873e-06, + "loss": 4.3766, "step": 3990 }, { - "epoch": 0.22084024322830292, - "grad_norm": 2.6334452629089355, - "learning_rate": 4.0065378055713474e-05, - "loss": 5.5215, + "epoch": 0.8832633208047755, + "grad_norm": 3.0622363090515137, + "learning_rate": 6.562266964951529e-06, + "loss": 4.3819, "step": 3995 }, { - "epoch": 0.22111663902708678, - "grad_norm": 2.968801498413086, - "learning_rate": 4.00511654349062e-05, - "loss": 5.5953, + "epoch": 0.8843687817820031, + "grad_norm": 2.711118221282959, + "learning_rate": 6.500124285359186e-06, + "loss": 4.2281, "step": 4000 }, { - "epoch": 0.22139303482587064, - "grad_norm": 3.2419137954711914, - "learning_rate": 4.003695281409892e-05, - "loss": 5.8652, + "epoch": 0.8854742427592306, + "grad_norm": 2.5327889919281006, + "learning_rate": 6.437981605766841e-06, + "loss": 4.3108, "step": 4005 }, { - "epoch": 0.2216694306246545, - "grad_norm": 3.5117664337158203, - "learning_rate": 4.0022740193291646e-05, - "loss": 5.1776, + "epoch": 0.8865797037364581, + "grad_norm": 2.6793577671051025, + "learning_rate": 6.375838926174497e-06, + "loss": 4.419, "step": 4010 }, { - "epoch": 0.22194582642343835, - "grad_norm": 3.309300661087036, - "learning_rate": 4.0008527572484363e-05, - "loss": 5.9688, + "epoch": 0.8876851647136856, + "grad_norm": 2.7030229568481445, + "learning_rate": 6.3136962465821526e-06, + "loss": 4.1583, "step": 4015 }, { - "epoch": 0.2222222222222222, - "grad_norm": 3.4260222911834717, - "learning_rate": 3.9994314951677094e-05, - "loss": 5.6959, + "epoch": 0.8887906256909132, + "grad_norm": 2.6065833568573, + "learning_rate": 6.2515535669898096e-06, + "loss": 4.5478, "step": 4020 }, { - "epoch": 0.22249861802100607, - "grad_norm": 2.736858606338501, - "learning_rate": 3.998010233086981e-05, - "loss": 5.69, + "epoch": 0.8898960866681406, + "grad_norm": 2.8415439128875732, + "learning_rate": 6.189410887397465e-06, + "loss": 4.4606, "step": 4025 }, { - "epoch": 0.22277501381978995, - "grad_norm": 3.037052869796753, - "learning_rate": 3.9965889710062535e-05, - "loss": 5.3564, + "epoch": 0.8910015476453681, + "grad_norm": 2.9203150272369385, + "learning_rate": 6.127268207805121e-06, + "loss": 4.207, "step": 4030 }, { - "epoch": 0.2230514096185738, - "grad_norm": 2.519865036010742, - "learning_rate": 3.995167708925526e-05, - "loss": 5.6226, + "epoch": 0.8921070086225956, + "grad_norm": 2.5476462841033936, + "learning_rate": 6.065125528212777e-06, + "loss": 4.3745, "step": 4035 }, { - "epoch": 0.22332780541735767, - "grad_norm": 2.619724988937378, - "learning_rate": 3.993746446844798e-05, - "loss": 5.8328, + "epoch": 0.8932124695998231, + "grad_norm": 3.014671564102173, + "learning_rate": 6.002982848620433e-06, + "loss": 4.2554, "step": 4040 }, { - "epoch": 0.22360420121614152, - "grad_norm": 2.6823482513427734, - "learning_rate": 3.992325184764071e-05, - "loss": 5.2537, + "epoch": 0.8943179305770507, + "grad_norm": 2.628617763519287, + "learning_rate": 5.940840169028089e-06, + "loss": 4.329, "step": 4045 }, { - "epoch": 0.22388059701492538, - "grad_norm": 2.5863189697265625, - "learning_rate": 3.9909039226833425e-05, - "loss": 5.139, + "epoch": 0.8954233915542781, + "grad_norm": 2.746119737625122, + "learning_rate": 5.878697489435745e-06, + "loss": 4.2055, "step": 4050 }, { - "epoch": 0.22415699281370924, - "grad_norm": 2.9364101886749268, - "learning_rate": 3.9894826606026155e-05, - "loss": 5.544, + "epoch": 0.8965288525315056, + "grad_norm": 2.9705591201782227, + "learning_rate": 5.8165548098434e-06, + "loss": 4.3678, "step": 4055 }, { - "epoch": 0.2244333886124931, - "grad_norm": 2.504804849624634, - "learning_rate": 3.988061398521888e-05, - "loss": 5.7221, + "epoch": 0.8976343135087331, + "grad_norm": 2.6920156478881836, + "learning_rate": 5.754412130251056e-06, + "loss": 4.2901, "step": 4060 }, { - "epoch": 0.22470978441127695, - "grad_norm": 2.4577889442443848, - "learning_rate": 3.9866401364411596e-05, - "loss": 5.6893, + "epoch": 0.8987397744859607, + "grad_norm": 2.442110538482666, + "learning_rate": 5.692269450658713e-06, + "loss": 4.3028, "step": 4065 }, { - "epoch": 0.2249861802100608, - "grad_norm": 2.8523504734039307, - "learning_rate": 3.985218874360433e-05, - "loss": 5.4065, + "epoch": 0.8998452354631882, + "grad_norm": 2.74092698097229, + "learning_rate": 5.630126771066369e-06, + "loss": 4.3002, "step": 4070 }, { - "epoch": 0.22526257600884467, - "grad_norm": 2.8694982528686523, - "learning_rate": 3.9837976122797044e-05, - "loss": 5.7412, + "epoch": 0.9009506964404157, + "grad_norm": 2.442526340484619, + "learning_rate": 5.567984091474025e-06, + "loss": 4.284, "step": 4075 }, { - "epoch": 0.22553897180762852, - "grad_norm": 2.2998769283294678, - "learning_rate": 3.982376350198977e-05, - "loss": 5.3882, + "epoch": 0.9020561574176431, + "grad_norm": 2.78788161277771, + "learning_rate": 5.50584141188168e-06, + "loss": 4.3699, "step": 4080 }, { - "epoch": 0.22581536760641238, - "grad_norm": 2.82647442817688, - "learning_rate": 3.980955088118249e-05, - "loss": 5.5734, + "epoch": 0.9031616183948706, + "grad_norm": 2.884793281555176, + "learning_rate": 5.4436987322893364e-06, + "loss": 4.3204, "step": 4085 }, { - "epoch": 0.22609176340519624, - "grad_norm": 2.7505252361297607, - "learning_rate": 3.9795338260375216e-05, - "loss": 5.7028, + "epoch": 0.9042670793720982, + "grad_norm": 2.645921230316162, + "learning_rate": 5.381556052696993e-06, + "loss": 4.4775, "step": 4090 }, { - "epoch": 0.2263681592039801, - "grad_norm": 3.391613483428955, - "learning_rate": 3.978112563956794e-05, - "loss": 5.731, + "epoch": 0.9053725403493257, + "grad_norm": 2.7526016235351562, + "learning_rate": 5.319413373104649e-06, + "loss": 4.2971, "step": 4095 }, { - "epoch": 0.22664455500276395, - "grad_norm": 3.5600905418395996, - "learning_rate": 3.976691301876066e-05, - "loss": 5.5761, + "epoch": 0.9064780013265532, + "grad_norm": 2.6196508407592773, + "learning_rate": 5.257270693512305e-06, + "loss": 4.32, "step": 4100 }, { - "epoch": 0.2269209508015478, - "grad_norm": 3.087388515472412, - "learning_rate": 3.975270039795339e-05, - "loss": 6.0301, + "epoch": 0.9075834623037807, + "grad_norm": 2.9636263847351074, + "learning_rate": 5.195128013919961e-06, + "loss": 4.3498, "step": 4105 }, { - "epoch": 0.22719734660033167, - "grad_norm": 2.921119213104248, - "learning_rate": 3.9738487777146106e-05, - "loss": 5.5959, + "epoch": 0.9086889232810081, + "grad_norm": 2.7609803676605225, + "learning_rate": 5.1329853343276164e-06, + "loss": 4.3134, "step": 4110 }, { - "epoch": 0.22747374239911552, - "grad_norm": 3.0585193634033203, - "learning_rate": 3.972427515633883e-05, - "loss": 5.61, + "epoch": 0.9097943842582357, + "grad_norm": 2.84635329246521, + "learning_rate": 5.070842654735273e-06, + "loss": 4.5655, "step": 4115 }, { - "epoch": 0.22775013819789938, - "grad_norm": 3.3914947509765625, - "learning_rate": 3.9710062535531553e-05, - "loss": 5.3884, + "epoch": 0.9108998452354632, + "grad_norm": 2.9101991653442383, + "learning_rate": 5.008699975142928e-06, + "loss": 4.3149, "step": 4120 }, { - "epoch": 0.22802653399668324, - "grad_norm": 3.621385097503662, - "learning_rate": 3.969584991472428e-05, - "loss": 5.7627, + "epoch": 0.9120053062126907, + "grad_norm": 2.50285005569458, + "learning_rate": 4.946557295550584e-06, + "loss": 4.5046, "step": 4125 }, { - "epoch": 0.2283029297954671, - "grad_norm": 2.755340099334717, - "learning_rate": 3.9681637293917e-05, - "loss": 5.6035, + "epoch": 0.9131107671899182, + "grad_norm": 2.6111807823181152, + "learning_rate": 4.88441461595824e-06, + "loss": 4.4572, "step": 4130 }, { - "epoch": 0.22857932559425095, - "grad_norm": 2.5260558128356934, - "learning_rate": 3.966742467310972e-05, - "loss": 5.795, + "epoch": 0.9142162281671457, + "grad_norm": 2.8482987880706787, + "learning_rate": 4.8222719363658965e-06, + "loss": 4.2982, "step": 4135 }, { - "epoch": 0.22885572139303484, - "grad_norm": 2.8096625804901123, - "learning_rate": 3.965321205230245e-05, - "loss": 5.4867, + "epoch": 0.9153216891443732, + "grad_norm": 2.635841131210327, + "learning_rate": 4.760129256773553e-06, + "loss": 4.3807, "step": 4140 }, { - "epoch": 0.2291321171918187, - "grad_norm": 2.795567274093628, - "learning_rate": 3.963899943149517e-05, - "loss": 5.6046, + "epoch": 0.9164271501216007, + "grad_norm": 2.969567060470581, + "learning_rate": 4.697986577181209e-06, + "loss": 4.5337, "step": 4145 }, { - "epoch": 0.22940851299060255, - "grad_norm": 2.43609881401062, - "learning_rate": 3.962478681068789e-05, - "loss": 5.4769, + "epoch": 0.9175326110988282, + "grad_norm": 2.5630719661712646, + "learning_rate": 4.635843897588864e-06, + "loss": 4.2317, "step": 4150 }, { - "epoch": 0.2296849087893864, - "grad_norm": 3.2818362712860107, - "learning_rate": 3.9610574189880615e-05, - "loss": 5.4739, + "epoch": 0.9186380720760557, + "grad_norm": 3.0482473373413086, + "learning_rate": 4.57370121799652e-06, + "loss": 4.3618, "step": 4155 }, { - "epoch": 0.22996130458817027, - "grad_norm": 3.0869808197021484, - "learning_rate": 3.959636156907334e-05, - "loss": 5.6502, + "epoch": 0.9197435330532833, + "grad_norm": 2.6049513816833496, + "learning_rate": 4.511558538404176e-06, + "loss": 4.3415, "step": 4160 }, { - "epoch": 0.23023770038695412, - "grad_norm": 2.792592763900757, - "learning_rate": 3.958214894826606e-05, - "loss": 5.707, + "epoch": 0.9208489940305107, + "grad_norm": 2.672549247741699, + "learning_rate": 4.449415858811832e-06, + "loss": 4.3546, "step": 4165 }, { - "epoch": 0.23051409618573798, - "grad_norm": 3.079249858856201, - "learning_rate": 3.9567936327458786e-05, - "loss": 5.8627, + "epoch": 0.9219544550077382, + "grad_norm": 2.3971190452575684, + "learning_rate": 4.387273179219488e-06, + "loss": 4.4124, "step": 4170 }, { - "epoch": 0.23079049198452184, - "grad_norm": 2.5989654064178467, - "learning_rate": 3.955372370665151e-05, - "loss": 5.6885, + "epoch": 0.9230599159849657, + "grad_norm": 2.9324026107788086, + "learning_rate": 4.325130499627144e-06, + "loss": 4.4178, "step": 4175 }, { - "epoch": 0.2310668877833057, - "grad_norm": 2.6450164318084717, - "learning_rate": 3.9539511085844234e-05, - "loss": 5.4694, + "epoch": 0.9241653769621933, + "grad_norm": 2.6847023963928223, + "learning_rate": 4.2629878200348e-06, + "loss": 4.3332, "step": 4180 }, { - "epoch": 0.23134328358208955, - "grad_norm": 2.6688590049743652, - "learning_rate": 3.952529846503695e-05, - "loss": 5.5172, + "epoch": 0.9252708379394208, + "grad_norm": 2.586578369140625, + "learning_rate": 4.2008451404424565e-06, + "loss": 4.3377, "step": 4185 }, { - "epoch": 0.2316196793808734, - "grad_norm": 2.876706600189209, - "learning_rate": 3.9511085844229676e-05, - "loss": 5.6633, + "epoch": 0.9263762989166482, + "grad_norm": 2.6753554344177246, + "learning_rate": 4.138702460850112e-06, + "loss": 4.4402, "step": 4190 }, { - "epoch": 0.23189607517965727, - "grad_norm": 2.682499408721924, - "learning_rate": 3.9496873223422406e-05, - "loss": 5.6945, + "epoch": 0.9274817598938757, + "grad_norm": 2.7684082984924316, + "learning_rate": 4.076559781257768e-06, + "loss": 4.3591, "step": 4195 }, { - "epoch": 0.23217247097844113, - "grad_norm": 2.5121068954467773, - "learning_rate": 3.9482660602615124e-05, - "loss": 5.6058, + "epoch": 0.9285872208711032, + "grad_norm": 2.5447866916656494, + "learning_rate": 4.014417101665424e-06, + "loss": 4.2935, "step": 4200 }, { - "epoch": 0.23244886677722498, - "grad_norm": 3.3784987926483154, - "learning_rate": 3.946844798180785e-05, - "loss": 5.4177, + "epoch": 0.9296926818483308, + "grad_norm": 2.744508981704712, + "learning_rate": 3.95227442207308e-06, + "loss": 4.4423, "step": 4205 }, { - "epoch": 0.23272526257600884, - "grad_norm": 2.7231504917144775, - "learning_rate": 3.945423536100057e-05, - "loss": 5.3509, + "epoch": 0.9307981428255583, + "grad_norm": 2.8013176918029785, + "learning_rate": 3.8901317424807365e-06, + "loss": 4.4143, "step": 4210 }, { - "epoch": 0.2330016583747927, - "grad_norm": 3.0387542247772217, - "learning_rate": 3.9440022740193296e-05, - "loss": 5.7707, + "epoch": 0.9319036038027858, + "grad_norm": 2.7098312377929688, + "learning_rate": 3.827989062888392e-06, + "loss": 4.4103, "step": 4215 }, { - "epoch": 0.23327805417357655, - "grad_norm": 2.9296815395355225, - "learning_rate": 3.942581011938601e-05, - "loss": 5.7334, + "epoch": 0.9330090647800132, + "grad_norm": 2.6168668270111084, + "learning_rate": 3.7658463832960476e-06, + "loss": 4.2801, "step": 4220 }, { - "epoch": 0.2335544499723604, - "grad_norm": 2.396350622177124, - "learning_rate": 3.941159749857874e-05, - "loss": 5.254, + "epoch": 0.9341145257572407, + "grad_norm": 2.5833184719085693, + "learning_rate": 3.7037037037037037e-06, + "loss": 4.4013, "step": 4225 }, { - "epoch": 0.23383084577114427, - "grad_norm": 3.36370587348938, - "learning_rate": 3.939738487777147e-05, - "loss": 5.8167, + "epoch": 0.9352199867344683, + "grad_norm": 2.377253293991089, + "learning_rate": 3.64156102411136e-06, + "loss": 4.185, "step": 4230 }, { - "epoch": 0.23410724156992813, - "grad_norm": 3.5464701652526855, - "learning_rate": 3.9383172256964185e-05, - "loss": 5.742, + "epoch": 0.9363254477116958, + "grad_norm": 2.6081435680389404, + "learning_rate": 3.5794183445190157e-06, + "loss": 4.253, "step": 4235 }, { - "epoch": 0.23438363736871198, - "grad_norm": 2.342013120651245, - "learning_rate": 3.936895963615691e-05, - "loss": 5.5515, + "epoch": 0.9374309086889233, + "grad_norm": 2.711153030395508, + "learning_rate": 3.517275664926672e-06, + "loss": 4.3479, "step": 4240 }, { - "epoch": 0.23466003316749584, - "grad_norm": 2.8633322715759277, - "learning_rate": 3.935474701534963e-05, - "loss": 5.4822, + "epoch": 0.9385363696661508, + "grad_norm": 2.4365053176879883, + "learning_rate": 3.455132985334328e-06, + "loss": 4.1939, "step": 4245 }, { - "epoch": 0.23493642896627973, - "grad_norm": 2.7258920669555664, - "learning_rate": 3.9340534394542357e-05, - "loss": 5.2667, + "epoch": 0.9396418306433783, + "grad_norm": 2.638932704925537, + "learning_rate": 3.3929903057419838e-06, + "loss": 4.3875, "step": 4250 }, { - "epoch": 0.23521282476506358, - "grad_norm": 2.6543519496917725, - "learning_rate": 3.932632177373508e-05, - "loss": 5.5115, + "epoch": 0.9407472916206058, + "grad_norm": 2.5555827617645264, + "learning_rate": 3.33084762614964e-06, + "loss": 4.2698, "step": 4255 }, { - "epoch": 0.23548922056384744, - "grad_norm": 3.5072028636932373, - "learning_rate": 3.93121091529278e-05, - "loss": 5.4936, + "epoch": 0.9418527525978333, + "grad_norm": 2.713468074798584, + "learning_rate": 3.268704946557296e-06, + "loss": 4.349, "step": 4260 }, { - "epoch": 0.2357656163626313, - "grad_norm": 2.486929178237915, - "learning_rate": 3.929789653212053e-05, - "loss": 5.719, + "epoch": 0.9429582135750608, + "grad_norm": 2.841186761856079, + "learning_rate": 3.206562266964952e-06, + "loss": 4.2716, "step": 4265 }, { - "epoch": 0.23604201216141515, - "grad_norm": 3.5531532764434814, - "learning_rate": 3.9283683911313246e-05, - "loss": 5.6424, + "epoch": 0.9440636745522883, + "grad_norm": 2.8116109371185303, + "learning_rate": 3.144419587372607e-06, + "loss": 4.4094, "step": 4270 }, { - "epoch": 0.236318407960199, - "grad_norm": 3.207571506500244, - "learning_rate": 3.926947129050597e-05, - "loss": 5.6972, + "epoch": 0.9451691355295158, + "grad_norm": 2.7146096229553223, + "learning_rate": 3.0822769077802638e-06, + "loss": 4.2455, "step": 4275 }, { - "epoch": 0.23659480375898287, - "grad_norm": 3.538468599319458, - "learning_rate": 3.9255258669698694e-05, - "loss": 5.8425, + "epoch": 0.9462745965067433, + "grad_norm": 2.577312469482422, + "learning_rate": 3.02013422818792e-06, + "loss": 4.3422, "step": 4280 }, { - "epoch": 0.23687119955776673, - "grad_norm": 3.163320541381836, - "learning_rate": 3.924104604889142e-05, - "loss": 5.2799, + "epoch": 0.9473800574839708, + "grad_norm": 2.4600229263305664, + "learning_rate": 2.9579915485955753e-06, + "loss": 4.511, "step": 4285 }, { - "epoch": 0.23714759535655058, - "grad_norm": 4.0374274253845215, - "learning_rate": 3.922683342808414e-05, - "loss": 5.5183, + "epoch": 0.9484855184611983, + "grad_norm": 2.7700321674346924, + "learning_rate": 2.8958488690032314e-06, + "loss": 4.2781, "step": 4290 }, { - "epoch": 0.23742399115533444, - "grad_norm": 3.0995869636535645, - "learning_rate": 3.921262080727686e-05, - "loss": 5.7765, + "epoch": 0.9495909794384259, + "grad_norm": 2.7642529010772705, + "learning_rate": 2.8337061894108876e-06, + "loss": 4.3321, "step": 4295 }, { - "epoch": 0.2377003869541183, - "grad_norm": 2.8097715377807617, - "learning_rate": 3.919840818646959e-05, - "loss": 5.7519, + "epoch": 0.9506964404156534, + "grad_norm": 2.4941701889038086, + "learning_rate": 2.7715635098185434e-06, + "loss": 4.2922, "step": 4300 }, { - "epoch": 0.23797678275290216, - "grad_norm": 2.7227861881256104, - "learning_rate": 3.9184195565662314e-05, - "loss": 5.3365, + "epoch": 0.9518019013928808, + "grad_norm": 2.6204841136932373, + "learning_rate": 2.7094208302261995e-06, + "loss": 4.4099, "step": 4305 }, { - "epoch": 0.238253178551686, - "grad_norm": 2.6679046154022217, - "learning_rate": 3.916998294485503e-05, - "loss": 5.4229, + "epoch": 0.9529073623701083, + "grad_norm": 2.7678253650665283, + "learning_rate": 2.6472781506338553e-06, + "loss": 4.5308, "step": 4310 }, { - "epoch": 0.23852957435046987, - "grad_norm": 2.4163801670074463, - "learning_rate": 3.9155770324047755e-05, - "loss": 5.1811, + "epoch": 0.9540128233473358, + "grad_norm": 2.610168218612671, + "learning_rate": 2.5851354710415115e-06, + "loss": 4.2916, "step": 4315 }, { - "epoch": 0.23880597014925373, - "grad_norm": 2.996711254119873, - "learning_rate": 3.914155770324048e-05, - "loss": 5.8349, + "epoch": 0.9551182843245634, + "grad_norm": 2.404608726501465, + "learning_rate": 2.522992791449167e-06, + "loss": 4.1172, "step": 4320 }, { - "epoch": 0.23908236594803758, - "grad_norm": 2.91500186920166, - "learning_rate": 3.91273450824332e-05, - "loss": 5.3502, + "epoch": 0.9562237453017909, + "grad_norm": 2.581918478012085, + "learning_rate": 2.4608501118568234e-06, + "loss": 4.5247, "step": 4325 }, { - "epoch": 0.23935876174682144, - "grad_norm": 2.5672600269317627, - "learning_rate": 3.911313246162592e-05, - "loss": 5.4671, + "epoch": 0.9573292062790183, + "grad_norm": 2.4554283618927, + "learning_rate": 2.3987074322644795e-06, + "loss": 4.4112, "step": 4330 }, { - "epoch": 0.2396351575456053, - "grad_norm": 2.848611831665039, - "learning_rate": 3.909891984081865e-05, - "loss": 5.6904, + "epoch": 0.9584346672562458, + "grad_norm": 3.0333340167999268, + "learning_rate": 2.3365647526721353e-06, + "loss": 4.4101, "step": 4335 }, { - "epoch": 0.23991155334438916, - "grad_norm": 3.451052665710449, - "learning_rate": 3.9084707220011375e-05, - "loss": 5.4361, + "epoch": 0.9595401282334733, + "grad_norm": 2.745823621749878, + "learning_rate": 2.274422073079791e-06, + "loss": 4.4591, "step": 4340 }, { - "epoch": 0.240187949143173, - "grad_norm": 2.8216664791107178, - "learning_rate": 3.907049459920409e-05, - "loss": 5.6986, + "epoch": 0.9606455892107009, + "grad_norm": 2.8770716190338135, + "learning_rate": 2.2122793934874472e-06, + "loss": 4.4189, "step": 4345 }, { - "epoch": 0.24046434494195687, - "grad_norm": 2.856027841567993, - "learning_rate": 3.9056281978396816e-05, - "loss": 5.9277, + "epoch": 0.9617510501879284, + "grad_norm": 2.701787233352661, + "learning_rate": 2.1501367138951034e-06, + "loss": 4.4115, "step": 4350 }, { - "epoch": 0.24074074074074073, - "grad_norm": 2.87872576713562, - "learning_rate": 3.904206935758954e-05, - "loss": 5.6129, + "epoch": 0.9628565111651559, + "grad_norm": 2.8112969398498535, + "learning_rate": 2.087994034302759e-06, + "loss": 4.3162, "step": 4355 }, { - "epoch": 0.2410171365395246, - "grad_norm": 2.844909906387329, - "learning_rate": 3.9027856736782264e-05, - "loss": 5.5358, + "epoch": 0.9639619721423833, + "grad_norm": 2.660151958465576, + "learning_rate": 2.0258513547104153e-06, + "loss": 4.4636, "step": 4360 }, { - "epoch": 0.24129353233830847, - "grad_norm": 3.371220111846924, - "learning_rate": 3.901364411597499e-05, - "loss": 5.2788, + "epoch": 0.9650674331196109, + "grad_norm": 2.6464245319366455, + "learning_rate": 1.963708675118071e-06, + "loss": 4.5144, "step": 4365 }, { - "epoch": 0.24156992813709233, - "grad_norm": 2.4404332637786865, - "learning_rate": 3.899943149516771e-05, - "loss": 5.4483, + "epoch": 0.9661728940968384, + "grad_norm": 2.581138849258423, + "learning_rate": 1.901565995525727e-06, + "loss": 4.3598, "step": 4370 }, { - "epoch": 0.24184632393587618, - "grad_norm": 2.876338481903076, - "learning_rate": 3.8985218874360436e-05, - "loss": 5.4887, + "epoch": 0.9672783550740659, + "grad_norm": 2.4853599071502686, + "learning_rate": 1.8394233159333832e-06, + "loss": 4.2964, "step": 4375 }, { - "epoch": 0.24212271973466004, - "grad_norm": 3.4181478023529053, - "learning_rate": 3.897100625355315e-05, - "loss": 5.6298, + "epoch": 0.9683838160512934, + "grad_norm": 2.554091691970825, + "learning_rate": 1.7772806363410391e-06, + "loss": 4.4226, "step": 4380 }, { - "epoch": 0.2423991155334439, - "grad_norm": 2.8636157512664795, - "learning_rate": 3.8956793632745884e-05, - "loss": 5.7623, + "epoch": 0.969489277028521, + "grad_norm": 2.9564058780670166, + "learning_rate": 1.7151379567486951e-06, + "loss": 4.3925, "step": 4385 }, { - "epoch": 0.24267551133222776, - "grad_norm": 2.881701707839966, - "learning_rate": 3.894258101193861e-05, - "loss": 5.5819, + "epoch": 0.9705947380057484, + "grad_norm": 2.502652406692505, + "learning_rate": 1.6529952771563513e-06, + "loss": 4.3428, "step": 4390 }, { - "epoch": 0.2429519071310116, - "grad_norm": 2.676478385925293, - "learning_rate": 3.8928368391131325e-05, - "loss": 5.5775, + "epoch": 0.9717001989829759, + "grad_norm": 2.493762969970703, + "learning_rate": 1.5908525975640068e-06, + "loss": 4.249, "step": 4395 }, { - "epoch": 0.24322830292979547, - "grad_norm": 3.2318267822265625, - "learning_rate": 3.891415577032405e-05, - "loss": 5.416, + "epoch": 0.9728056599602034, + "grad_norm": 2.4519858360290527, + "learning_rate": 1.528709917971663e-06, + "loss": 4.2229, "step": 4400 }, { - "epoch": 0.24350469872857933, - "grad_norm": 2.6567542552948, - "learning_rate": 3.889994314951677e-05, - "loss": 5.6167, + "epoch": 0.9739111209374309, + "grad_norm": 2.7903311252593994, + "learning_rate": 1.466567238379319e-06, + "loss": 4.4687, "step": 4405 }, { - "epoch": 0.24378109452736318, - "grad_norm": 2.6973955631256104, - "learning_rate": 3.88857305287095e-05, - "loss": 5.5814, + "epoch": 0.9750165819146585, + "grad_norm": 2.556363821029663, + "learning_rate": 1.4044245587869751e-06, + "loss": 4.3987, "step": 4410 }, { - "epoch": 0.24405749032614704, - "grad_norm": 2.4810473918914795, - "learning_rate": 3.8871517907902214e-05, - "loss": 5.61, + "epoch": 0.9761220428918859, + "grad_norm": 2.542534351348877, + "learning_rate": 1.3422818791946309e-06, + "loss": 4.4648, "step": 4415 }, { - "epoch": 0.2443338861249309, - "grad_norm": 2.4649670124053955, - "learning_rate": 3.8857305287094945e-05, - "loss": 5.3495, + "epoch": 0.9772275038691134, + "grad_norm": 2.5431811809539795, + "learning_rate": 1.280139199602287e-06, + "loss": 4.3701, "step": 4420 }, { - "epoch": 0.24461028192371476, - "grad_norm": 2.682745933532715, - "learning_rate": 3.884309266628767e-05, - "loss": 5.498, + "epoch": 0.9783329648463409, + "grad_norm": 2.6445794105529785, + "learning_rate": 1.2179965200099428e-06, + "loss": 4.263, "step": 4425 }, { - "epoch": 0.2448866777224986, - "grad_norm": 3.384434700012207, - "learning_rate": 3.8828880045480386e-05, - "loss": 5.4192, + "epoch": 0.9794384258235684, + "grad_norm": 2.8488686084747314, + "learning_rate": 1.1558538404175988e-06, + "loss": 4.2224, "step": 4430 }, { - "epoch": 0.24516307352128247, - "grad_norm": 3.0535390377044678, - "learning_rate": 3.881466742467311e-05, - "loss": 5.4866, + "epoch": 0.980543886800796, + "grad_norm": 2.919131278991699, + "learning_rate": 1.093711160825255e-06, + "loss": 4.3791, "step": 4435 }, { - "epoch": 0.24543946932006633, - "grad_norm": 2.8525378704071045, - "learning_rate": 3.8800454803865834e-05, - "loss": 5.5854, + "epoch": 0.9816493477780235, + "grad_norm": 2.830904483795166, + "learning_rate": 1.0315684812329107e-06, + "loss": 4.3433, "step": 4440 }, { - "epoch": 0.24571586511885019, - "grad_norm": 2.7716307640075684, - "learning_rate": 3.878624218305856e-05, - "loss": 5.6654, + "epoch": 0.9827548087552509, + "grad_norm": 2.7437570095062256, + "learning_rate": 9.694258016405668e-07, + "loss": 4.2391, "step": 4445 }, { - "epoch": 0.24599226091763404, - "grad_norm": 2.2745914459228516, - "learning_rate": 3.877202956225128e-05, - "loss": 5.5521, + "epoch": 0.9838602697324784, + "grad_norm": 2.664886713027954, + "learning_rate": 9.072831220482228e-07, + "loss": 4.488, "step": 4450 }, { - "epoch": 0.2462686567164179, - "grad_norm": 3.1492974758148193, - "learning_rate": 3.8757816941444006e-05, - "loss": 5.5735, + "epoch": 0.9849657307097059, + "grad_norm": 2.518346071243286, + "learning_rate": 8.451404424558787e-07, + "loss": 4.4019, "step": 4455 }, { - "epoch": 0.24654505251520176, - "grad_norm": 3.3391618728637695, - "learning_rate": 3.874360432063673e-05, - "loss": 5.3698, + "epoch": 0.9860711916869335, + "grad_norm": 2.9975318908691406, + "learning_rate": 7.829977628635347e-07, + "loss": 4.2323, "step": 4460 }, { - "epoch": 0.24682144831398561, - "grad_norm": 2.884638547897339, - "learning_rate": 3.872939169982945e-05, - "loss": 5.7967, + "epoch": 0.987176652664161, + "grad_norm": 2.6765410900115967, + "learning_rate": 7.208550832711907e-07, + "loss": 4.3188, "step": 4465 }, { - "epoch": 0.2470978441127695, - "grad_norm": 2.4962894916534424, - "learning_rate": 3.871517907902217e-05, - "loss": 5.2797, + "epoch": 0.9882821136413884, + "grad_norm": 2.8536341190338135, + "learning_rate": 6.587124036788466e-07, + "loss": 4.4314, "step": 4470 }, { - "epoch": 0.24737423991155336, - "grad_norm": 3.132397413253784, - "learning_rate": 3.87009664582149e-05, - "loss": 5.6922, + "epoch": 0.9893875746186159, + "grad_norm": 2.316105365753174, + "learning_rate": 5.965697240865026e-07, + "loss": 4.5006, "step": 4475 }, { - "epoch": 0.2476506357103372, - "grad_norm": 2.939412832260132, - "learning_rate": 3.868675383740762e-05, - "loss": 5.7016, + "epoch": 0.9904930355958435, + "grad_norm": 2.705261468887329, + "learning_rate": 5.344270444941587e-07, + "loss": 4.4413, "step": 4480 }, { - "epoch": 0.24792703150912107, - "grad_norm": 2.6651360988616943, - "learning_rate": 3.867254121660034e-05, - "loss": 5.4948, + "epoch": 0.991598496573071, + "grad_norm": 2.7570252418518066, + "learning_rate": 4.722843649018146e-07, + "loss": 4.4869, "step": 4485 }, { - "epoch": 0.24820342730790493, - "grad_norm": 3.104663372039795, - "learning_rate": 3.865832859579307e-05, - "loss": 5.6978, + "epoch": 0.9927039575502985, + "grad_norm": 2.687154531478882, + "learning_rate": 4.1014168530947054e-07, + "loss": 4.5021, "step": 4490 }, { - "epoch": 0.24847982310668879, - "grad_norm": 3.229151725769043, - "learning_rate": 3.864411597498579e-05, - "loss": 5.7317, + "epoch": 0.993809418527526, + "grad_norm": 2.885932683944702, + "learning_rate": 3.4799900571712656e-07, + "loss": 4.3324, "step": 4495 }, { - "epoch": 0.24875621890547264, - "grad_norm": 2.4405629634857178, - "learning_rate": 3.8629903354178515e-05, - "loss": 5.7138, + "epoch": 0.9949148795047534, + "grad_norm": 2.6431424617767334, + "learning_rate": 2.858563261247825e-07, + "loss": 4.4544, "step": 4500 }, { - "epoch": 0.2490326147042565, - "grad_norm": 2.657789707183838, - "learning_rate": 3.861569073337123e-05, - "loss": 5.674, + "epoch": 0.996020340481981, + "grad_norm": 2.5612587928771973, + "learning_rate": 2.2371364653243848e-07, + "loss": 4.4129, "step": 4505 }, { - "epoch": 0.24930901050304036, - "grad_norm": 2.536699056625366, - "learning_rate": 3.860147811256396e-05, - "loss": 5.6521, + "epoch": 0.9971258014592085, + "grad_norm": 2.5301103591918945, + "learning_rate": 1.6157096694009447e-07, + "loss": 4.2888, "step": 4510 }, { - "epoch": 0.24958540630182421, - "grad_norm": 2.947270631790161, - "learning_rate": 3.858726549175668e-05, - "loss": 5.6656, + "epoch": 0.998231262436436, + "grad_norm": 2.852886199951172, + "learning_rate": 9.942828734775043e-08, + "loss": 4.4167, "step": 4515 }, { - "epoch": 0.24986180210060807, - "grad_norm": 3.1712615489959717, - "learning_rate": 3.8573052870949404e-05, - "loss": 5.4181, + "epoch": 0.9993367234136635, + "grad_norm": 3.017920970916748, + "learning_rate": 3.728560775540641e-08, + "loss": 4.2826, "step": 4520 - }, - { - "epoch": 0.25013819789939196, - "grad_norm": 2.742157459259033, - "learning_rate": 3.855884025014213e-05, - "loss": 5.5457, - "step": 4525 - }, - { - "epoch": 0.2504145936981758, - "grad_norm": 2.4692294597625732, - "learning_rate": 3.854462762933485e-05, - "loss": 5.5266, - "step": 4530 - }, - { - "epoch": 0.25069098949695967, - "grad_norm": 2.7221648693084717, - "learning_rate": 3.8530415008527576e-05, - "loss": 5.6376, - "step": 4535 - }, - { - "epoch": 0.25096738529574353, - "grad_norm": 2.809128999710083, - "learning_rate": 3.851620238772029e-05, - "loss": 5.4378, - "step": 4540 - }, - { - "epoch": 0.2512437810945274, - "grad_norm": 2.9492316246032715, - "learning_rate": 3.8501989766913024e-05, - "loss": 5.7847, - "step": 4545 - }, - { - "epoch": 0.25152017689331124, - "grad_norm": 3.0182037353515625, - "learning_rate": 3.848777714610574e-05, - "loss": 5.6268, - "step": 4550 - }, - { - "epoch": 0.2517965726920951, - "grad_norm": 3.395892381668091, - "learning_rate": 3.8473564525298465e-05, - "loss": 5.5913, - "step": 4555 - }, - { - "epoch": 0.25207296849087896, - "grad_norm": 2.6461169719696045, - "learning_rate": 3.845935190449119e-05, - "loss": 5.655, - "step": 4560 - }, - { - "epoch": 0.2523493642896628, - "grad_norm": 3.7275054454803467, - "learning_rate": 3.844513928368391e-05, - "loss": 5.5196, - "step": 4565 - }, - { - "epoch": 0.25262576008844667, - "grad_norm": 3.1424355506896973, - "learning_rate": 3.843092666287664e-05, - "loss": 5.3785, - "step": 4570 - }, - { - "epoch": 0.25290215588723053, - "grad_norm": 2.984804153442383, - "learning_rate": 3.8416714042069354e-05, - "loss": 5.4338, - "step": 4575 - }, - { - "epoch": 0.2531785516860144, - "grad_norm": 3.411898136138916, - "learning_rate": 3.8402501421262085e-05, - "loss": 5.7189, - "step": 4580 - }, - { - "epoch": 0.25345494748479824, - "grad_norm": 3.0697154998779297, - "learning_rate": 3.838828880045481e-05, - "loss": 5.554, - "step": 4585 - }, - { - "epoch": 0.2537313432835821, - "grad_norm": 2.617682695388794, - "learning_rate": 3.8374076179647526e-05, - "loss": 5.4068, - "step": 4590 - }, - { - "epoch": 0.25400773908236596, - "grad_norm": 2.9888644218444824, - "learning_rate": 3.835986355884025e-05, - "loss": 5.9172, - "step": 4595 - }, - { - "epoch": 0.2542841348811498, - "grad_norm": 2.7901175022125244, - "learning_rate": 3.8345650938032974e-05, - "loss": 5.3799, - "step": 4600 - }, - { - "epoch": 0.25456053067993367, - "grad_norm": 2.9775073528289795, - "learning_rate": 3.83314383172257e-05, - "loss": 5.9651, - "step": 4605 - }, - { - "epoch": 0.25483692647871753, - "grad_norm": 3.188081979751587, - "learning_rate": 3.831722569641842e-05, - "loss": 5.5412, - "step": 4610 - }, - { - "epoch": 0.2551133222775014, - "grad_norm": 3.1348631381988525, - "learning_rate": 3.8303013075611146e-05, - "loss": 5.4277, - "step": 4615 - }, - { - "epoch": 0.25538971807628524, - "grad_norm": 2.817936658859253, - "learning_rate": 3.828880045480387e-05, - "loss": 5.6873, - "step": 4620 - }, - { - "epoch": 0.2556661138750691, - "grad_norm": 2.883748769760132, - "learning_rate": 3.827458783399659e-05, - "loss": 5.4839, - "step": 4625 - }, - { - "epoch": 0.25594250967385296, - "grad_norm": 2.654115915298462, - "learning_rate": 3.826037521318931e-05, - "loss": 5.7442, - "step": 4630 - }, - { - "epoch": 0.2562189054726368, - "grad_norm": 2.976658821105957, - "learning_rate": 3.8246162592382035e-05, - "loss": 5.374, - "step": 4635 - }, - { - "epoch": 0.2564953012714207, - "grad_norm": 2.345407009124756, - "learning_rate": 3.823194997157476e-05, - "loss": 5.551, - "step": 4640 - }, - { - "epoch": 0.25677169707020453, - "grad_norm": 3.0773231983184814, - "learning_rate": 3.821773735076748e-05, - "loss": 5.7161, - "step": 4645 - }, - { - "epoch": 0.2570480928689884, - "grad_norm": 2.6813673973083496, - "learning_rate": 3.820352472996021e-05, - "loss": 5.2693, - "step": 4650 - }, - { - "epoch": 0.25732448866777224, - "grad_norm": 2.9697906970977783, - "learning_rate": 3.818931210915293e-05, - "loss": 5.2863, - "step": 4655 - }, - { - "epoch": 0.2576008844665561, - "grad_norm": 2.9038257598876953, - "learning_rate": 3.817509948834565e-05, - "loss": 5.4955, - "step": 4660 - }, - { - "epoch": 0.25787728026533996, - "grad_norm": 3.2474894523620605, - "learning_rate": 3.816088686753837e-05, - "loss": 5.316, - "step": 4665 - }, - { - "epoch": 0.2581536760641238, - "grad_norm": 3.7227532863616943, - "learning_rate": 3.81466742467311e-05, - "loss": 5.6569, - "step": 4670 - }, - { - "epoch": 0.2584300718629077, - "grad_norm": 3.429311513900757, - "learning_rate": 3.813246162592382e-05, - "loss": 5.6594, - "step": 4675 - }, - { - "epoch": 0.25870646766169153, - "grad_norm": 2.9430253505706787, - "learning_rate": 3.8118249005116544e-05, - "loss": 5.3997, - "step": 4680 - }, - { - "epoch": 0.2589828634604754, - "grad_norm": 3.2652957439422607, - "learning_rate": 3.810403638430927e-05, - "loss": 5.7445, - "step": 4685 - }, - { - "epoch": 0.25925925925925924, - "grad_norm": 3.255427360534668, - "learning_rate": 3.808982376350199e-05, - "loss": 5.3571, - "step": 4690 - }, - { - "epoch": 0.2595356550580431, - "grad_norm": 2.7767574787139893, - "learning_rate": 3.8075611142694716e-05, - "loss": 5.5315, - "step": 4695 - }, - { - "epoch": 0.25981205085682696, - "grad_norm": 2.555504560470581, - "learning_rate": 3.806139852188744e-05, - "loss": 5.4223, - "step": 4700 - }, - { - "epoch": 0.2600884466556108, - "grad_norm": 2.6958086490631104, - "learning_rate": 3.8047185901080164e-05, - "loss": 5.5566, - "step": 4705 - }, - { - "epoch": 0.2603648424543947, - "grad_norm": 3.072174310684204, - "learning_rate": 3.803297328027288e-05, - "loss": 5.9995, - "step": 4710 - }, - { - "epoch": 0.26064123825317853, - "grad_norm": 3.545971632003784, - "learning_rate": 3.8018760659465605e-05, - "loss": 5.5504, - "step": 4715 - }, - { - "epoch": 0.2609176340519624, - "grad_norm": 2.8304150104522705, - "learning_rate": 3.8004548038658336e-05, - "loss": 5.6472, - "step": 4720 - }, - { - "epoch": 0.26119402985074625, - "grad_norm": 3.0823028087615967, - "learning_rate": 3.799033541785105e-05, - "loss": 5.3301, - "step": 4725 - }, - { - "epoch": 0.2614704256495301, - "grad_norm": 3.2477657794952393, - "learning_rate": 3.797612279704378e-05, - "loss": 5.4662, - "step": 4730 - }, - { - "epoch": 0.26174682144831396, - "grad_norm": 2.6965558528900146, - "learning_rate": 3.79619101762365e-05, - "loss": 5.2598, - "step": 4735 - }, - { - "epoch": 0.2620232172470978, - "grad_norm": 3.41056752204895, - "learning_rate": 3.7947697555429225e-05, - "loss": 5.4817, - "step": 4740 - }, - { - "epoch": 0.26229961304588173, - "grad_norm": 3.00512433052063, - "learning_rate": 3.793348493462194e-05, - "loss": 5.3896, - "step": 4745 - }, - { - "epoch": 0.2625760088446656, - "grad_norm": 3.316678047180176, - "learning_rate": 3.7919272313814666e-05, - "loss": 6.0564, - "step": 4750 - }, - { - "epoch": 0.26285240464344944, - "grad_norm": 2.8839213848114014, - "learning_rate": 3.79050596930074e-05, - "loss": 5.715, - "step": 4755 - }, - { - "epoch": 0.2631288004422333, - "grad_norm": 3.264241933822632, - "learning_rate": 3.7890847072200114e-05, - "loss": 5.5688, - "step": 4760 - }, - { - "epoch": 0.26340519624101716, - "grad_norm": 2.51202130317688, - "learning_rate": 3.787663445139284e-05, - "loss": 5.4865, - "step": 4765 - }, - { - "epoch": 0.263681592039801, - "grad_norm": 2.940673828125, - "learning_rate": 3.786242183058556e-05, - "loss": 5.6147, - "step": 4770 - }, - { - "epoch": 0.2639579878385849, - "grad_norm": 2.740130662918091, - "learning_rate": 3.7848209209778286e-05, - "loss": 5.7488, - "step": 4775 - }, - { - "epoch": 0.26423438363736873, - "grad_norm": 2.5281081199645996, - "learning_rate": 3.783399658897101e-05, - "loss": 5.7751, - "step": 4780 - }, - { - "epoch": 0.2645107794361526, - "grad_norm": 2.7616381645202637, - "learning_rate": 3.781978396816373e-05, - "loss": 5.5114, - "step": 4785 - }, - { - "epoch": 0.26478717523493644, - "grad_norm": 3.112661838531494, - "learning_rate": 3.780557134735646e-05, - "loss": 5.7249, - "step": 4790 - }, - { - "epoch": 0.2650635710337203, - "grad_norm": 2.2833783626556396, - "learning_rate": 3.7791358726549175e-05, - "loss": 5.0357, - "step": 4795 - }, - { - "epoch": 0.26533996683250416, - "grad_norm": 2.7864673137664795, - "learning_rate": 3.77771461057419e-05, - "loss": 5.68, - "step": 4800 - }, - { - "epoch": 0.265616362631288, - "grad_norm": 2.8897244930267334, - "learning_rate": 3.7762933484934623e-05, - "loss": 5.5812, - "step": 4805 - }, - { - "epoch": 0.2658927584300719, - "grad_norm": 3.157487154006958, - "learning_rate": 3.774872086412735e-05, - "loss": 5.5694, - "step": 4810 - }, - { - "epoch": 0.26616915422885573, - "grad_norm": 3.268732786178589, - "learning_rate": 3.773450824332007e-05, - "loss": 5.3811, - "step": 4815 - }, - { - "epoch": 0.2664455500276396, - "grad_norm": 2.7463014125823975, - "learning_rate": 3.772029562251279e-05, - "loss": 5.4335, - "step": 4820 - }, - { - "epoch": 0.26672194582642345, - "grad_norm": 3.34234619140625, - "learning_rate": 3.770608300170552e-05, - "loss": 5.401, - "step": 4825 - }, - { - "epoch": 0.2669983416252073, - "grad_norm": 4.338949680328369, - "learning_rate": 3.769187038089824e-05, - "loss": 5.5645, - "step": 4830 - }, - { - "epoch": 0.26727473742399116, - "grad_norm": 2.9800288677215576, - "learning_rate": 3.767765776009096e-05, - "loss": 5.1976, - "step": 4835 - }, - { - "epoch": 0.267551133222775, - "grad_norm": 3.4728004932403564, - "learning_rate": 3.7663445139283685e-05, - "loss": 5.6093, - "step": 4840 - }, - { - "epoch": 0.2678275290215589, - "grad_norm": 3.496291399002075, - "learning_rate": 3.764923251847641e-05, - "loss": 5.5747, - "step": 4845 - }, - { - "epoch": 0.26810392482034273, - "grad_norm": 2.850144386291504, - "learning_rate": 3.763501989766913e-05, - "loss": 5.1295, - "step": 4850 - }, - { - "epoch": 0.2683803206191266, - "grad_norm": 2.8431968688964844, - "learning_rate": 3.762080727686185e-05, - "loss": 5.5363, - "step": 4855 - }, - { - "epoch": 0.26865671641791045, - "grad_norm": 2.7617273330688477, - "learning_rate": 3.760659465605458e-05, - "loss": 5.7338, - "step": 4860 - }, - { - "epoch": 0.2689331122166943, - "grad_norm": 2.990050792694092, - "learning_rate": 3.7592382035247304e-05, - "loss": 5.2071, - "step": 4865 - }, - { - "epoch": 0.26920950801547816, - "grad_norm": 3.1524643898010254, - "learning_rate": 3.757816941444002e-05, - "loss": 5.6822, - "step": 4870 - }, - { - "epoch": 0.269485903814262, - "grad_norm": 2.6048810482025146, - "learning_rate": 3.7563956793632746e-05, - "loss": 5.6043, - "step": 4875 - }, - { - "epoch": 0.2697622996130459, - "grad_norm": 2.8759994506835938, - "learning_rate": 3.754974417282547e-05, - "loss": 5.4891, - "step": 4880 - }, - { - "epoch": 0.27003869541182973, - "grad_norm": 2.8228535652160645, - "learning_rate": 3.7535531552018194e-05, - "loss": 5.5018, - "step": 4885 - }, - { - "epoch": 0.2703150912106136, - "grad_norm": 3.0626003742218018, - "learning_rate": 3.752131893121092e-05, - "loss": 5.3939, - "step": 4890 - }, - { - "epoch": 0.27059148700939745, - "grad_norm": 2.5288095474243164, - "learning_rate": 3.750710631040364e-05, - "loss": 5.6604, - "step": 4895 - }, - { - "epoch": 0.2708678828081813, - "grad_norm": 2.9323790073394775, - "learning_rate": 3.7492893689596365e-05, - "loss": 5.4378, - "step": 4900 - }, - { - "epoch": 0.27114427860696516, - "grad_norm": 2.815352439880371, - "learning_rate": 3.747868106878908e-05, - "loss": 5.6052, - "step": 4905 - }, - { - "epoch": 0.271420674405749, - "grad_norm": 2.981050968170166, - "learning_rate": 3.746446844798181e-05, - "loss": 5.5689, - "step": 4910 - }, - { - "epoch": 0.2716970702045329, - "grad_norm": 3.062026262283325, - "learning_rate": 3.745025582717454e-05, - "loss": 5.4102, - "step": 4915 - }, - { - "epoch": 0.27197346600331673, - "grad_norm": 3.0991098880767822, - "learning_rate": 3.7436043206367255e-05, - "loss": 5.4325, - "step": 4920 - }, - { - "epoch": 0.2722498618021006, - "grad_norm": 3.309856414794922, - "learning_rate": 3.742183058555998e-05, - "loss": 5.2731, - "step": 4925 - }, - { - "epoch": 0.27252625760088445, - "grad_norm": 3.4550788402557373, - "learning_rate": 3.74076179647527e-05, - "loss": 5.5482, - "step": 4930 - }, - { - "epoch": 0.2728026533996683, - "grad_norm": 2.121727705001831, - "learning_rate": 3.7393405343945427e-05, - "loss": 5.5073, - "step": 4935 - }, - { - "epoch": 0.27307904919845216, - "grad_norm": 2.894728660583496, - "learning_rate": 3.737919272313815e-05, - "loss": 5.693, - "step": 4940 - }, - { - "epoch": 0.273355444997236, - "grad_norm": 3.626741647720337, - "learning_rate": 3.736498010233087e-05, - "loss": 5.7241, - "step": 4945 - }, - { - "epoch": 0.2736318407960199, - "grad_norm": 2.86633563041687, - "learning_rate": 3.73507674815236e-05, - "loss": 5.4346, - "step": 4950 - }, - { - "epoch": 0.27390823659480373, - "grad_norm": 3.4413692951202393, - "learning_rate": 3.7336554860716316e-05, - "loss": 5.4228, - "step": 4955 - }, - { - "epoch": 0.2741846323935876, - "grad_norm": 2.800236225128174, - "learning_rate": 3.732234223990904e-05, - "loss": 5.3015, - "step": 4960 - }, - { - "epoch": 0.2744610281923715, - "grad_norm": 3.2357637882232666, - "learning_rate": 3.7308129619101764e-05, - "loss": 5.7801, - "step": 4965 - }, - { - "epoch": 0.27473742399115536, - "grad_norm": 2.5633909702301025, - "learning_rate": 3.729391699829449e-05, - "loss": 5.6144, - "step": 4970 - }, - { - "epoch": 0.2750138197899392, - "grad_norm": 2.8940200805664062, - "learning_rate": 3.727970437748721e-05, - "loss": 5.442, - "step": 4975 - }, - { - "epoch": 0.2752902155887231, - "grad_norm": 3.922053337097168, - "learning_rate": 3.726549175667993e-05, - "loss": 5.4147, - "step": 4980 - }, - { - "epoch": 0.27556661138750693, - "grad_norm": 3.393669366836548, - "learning_rate": 3.725127913587266e-05, - "loss": 5.6151, - "step": 4985 - }, - { - "epoch": 0.2758430071862908, - "grad_norm": 3.1016077995300293, - "learning_rate": 3.723706651506538e-05, - "loss": 5.5034, - "step": 4990 - }, - { - "epoch": 0.27611940298507465, - "grad_norm": 4.4078145027160645, - "learning_rate": 3.72228538942581e-05, - "loss": 5.5826, - "step": 4995 - }, - { - "epoch": 0.2763957987838585, - "grad_norm": 3.1527042388916016, - "learning_rate": 3.720864127345083e-05, - "loss": 5.4405, - "step": 5000 - }, - { - "epoch": 0.27667219458264236, - "grad_norm": 3.0095982551574707, - "learning_rate": 3.719442865264355e-05, - "loss": 5.6309, - "step": 5005 - }, - { - "epoch": 0.2769485903814262, - "grad_norm": 3.327120542526245, - "learning_rate": 3.718021603183627e-05, - "loss": 5.5869, - "step": 5010 - }, - { - "epoch": 0.2772249861802101, - "grad_norm": 3.148468017578125, - "learning_rate": 3.7166003411029e-05, - "loss": 5.3626, - "step": 5015 - }, - { - "epoch": 0.27750138197899393, - "grad_norm": 2.774047374725342, - "learning_rate": 3.715179079022172e-05, - "loss": 5.0038, - "step": 5020 - }, - { - "epoch": 0.2777777777777778, - "grad_norm": 2.5972516536712646, - "learning_rate": 3.7137578169414445e-05, - "loss": 5.8164, - "step": 5025 - }, - { - "epoch": 0.27805417357656165, - "grad_norm": 3.2580530643463135, - "learning_rate": 3.712336554860716e-05, - "loss": 5.5117, - "step": 5030 - }, - { - "epoch": 0.2783305693753455, - "grad_norm": 2.4557392597198486, - "learning_rate": 3.710915292779989e-05, - "loss": 5.4633, - "step": 5035 - }, - { - "epoch": 0.27860696517412936, - "grad_norm": 2.765587091445923, - "learning_rate": 3.709494030699261e-05, - "loss": 5.3116, - "step": 5040 - }, - { - "epoch": 0.2788833609729132, - "grad_norm": 3.075885057449341, - "learning_rate": 3.7080727686185334e-05, - "loss": 5.4564, - "step": 5045 - }, - { - "epoch": 0.2791597567716971, - "grad_norm": 2.8484435081481934, - "learning_rate": 3.706651506537806e-05, - "loss": 5.5371, - "step": 5050 - }, - { - "epoch": 0.27943615257048093, - "grad_norm": 2.929783344268799, - "learning_rate": 3.705230244457078e-05, - "loss": 5.4535, - "step": 5055 - }, - { - "epoch": 0.2797125483692648, - "grad_norm": 2.6549274921417236, - "learning_rate": 3.7038089823763506e-05, - "loss": 5.4736, - "step": 5060 - }, - { - "epoch": 0.27998894416804865, - "grad_norm": 3.388282537460327, - "learning_rate": 3.702387720295622e-05, - "loss": 5.3436, - "step": 5065 - }, - { - "epoch": 0.2802653399668325, - "grad_norm": 3.1960010528564453, - "learning_rate": 3.7009664582148954e-05, - "loss": 5.8104, - "step": 5070 - }, - { - "epoch": 0.28054173576561636, - "grad_norm": 2.6687440872192383, - "learning_rate": 3.699545196134167e-05, - "loss": 5.2787, - "step": 5075 - }, - { - "epoch": 0.2808181315644002, - "grad_norm": 3.2794923782348633, - "learning_rate": 3.6981239340534395e-05, - "loss": 5.5187, - "step": 5080 - }, - { - "epoch": 0.2810945273631841, - "grad_norm": 2.8185513019561768, - "learning_rate": 3.696702671972712e-05, - "loss": 5.3756, - "step": 5085 - }, - { - "epoch": 0.28137092316196793, - "grad_norm": 3.3090620040893555, - "learning_rate": 3.695281409891984e-05, - "loss": 5.6135, - "step": 5090 - }, - { - "epoch": 0.2816473189607518, - "grad_norm": 2.823322057723999, - "learning_rate": 3.693860147811257e-05, - "loss": 5.7067, - "step": 5095 - }, - { - "epoch": 0.28192371475953565, - "grad_norm": 4.680220603942871, - "learning_rate": 3.6924388857305284e-05, - "loss": 5.3231, - "step": 5100 - }, - { - "epoch": 0.2822001105583195, - "grad_norm": 3.1721339225769043, - "learning_rate": 3.6910176236498015e-05, - "loss": 5.3679, - "step": 5105 - }, - { - "epoch": 0.28247650635710336, - "grad_norm": 3.343784809112549, - "learning_rate": 3.689596361569074e-05, - "loss": 5.5471, - "step": 5110 - }, - { - "epoch": 0.2827529021558872, - "grad_norm": 2.900912284851074, - "learning_rate": 3.6881750994883456e-05, - "loss": 5.4448, - "step": 5115 - }, - { - "epoch": 0.2830292979546711, - "grad_norm": 2.725116729736328, - "learning_rate": 3.686753837407618e-05, - "loss": 5.1778, - "step": 5120 - }, - { - "epoch": 0.28330569375345493, - "grad_norm": 2.9266819953918457, - "learning_rate": 3.6853325753268904e-05, - "loss": 5.706, - "step": 5125 - }, - { - "epoch": 0.2835820895522388, - "grad_norm": 3.6640381813049316, - "learning_rate": 3.683911313246163e-05, - "loss": 5.355, - "step": 5130 - }, - { - "epoch": 0.28385848535102265, - "grad_norm": 3.3055994510650635, - "learning_rate": 3.682490051165435e-05, - "loss": 5.2927, - "step": 5135 - }, - { - "epoch": 0.2841348811498065, - "grad_norm": 3.171983242034912, - "learning_rate": 3.6810687890847076e-05, - "loss": 5.625, - "step": 5140 - }, - { - "epoch": 0.28441127694859036, - "grad_norm": 3.0071423053741455, - "learning_rate": 3.67964752700398e-05, - "loss": 5.2521, - "step": 5145 - }, - { - "epoch": 0.2846876727473742, - "grad_norm": 2.843132972717285, - "learning_rate": 3.678226264923252e-05, - "loss": 5.6159, - "step": 5150 - }, - { - "epoch": 0.2849640685461581, - "grad_norm": 2.8676323890686035, - "learning_rate": 3.676805002842524e-05, - "loss": 5.4452, - "step": 5155 - }, - { - "epoch": 0.28524046434494194, - "grad_norm": 2.7447266578674316, - "learning_rate": 3.675383740761797e-05, - "loss": 5.4009, - "step": 5160 - }, - { - "epoch": 0.2855168601437258, - "grad_norm": 3.2596964836120605, - "learning_rate": 3.673962478681069e-05, - "loss": 5.7716, - "step": 5165 - }, - { - "epoch": 0.28579325594250965, - "grad_norm": 2.8645823001861572, - "learning_rate": 3.672541216600341e-05, - "loss": 5.5613, - "step": 5170 - }, - { - "epoch": 0.2860696517412935, - "grad_norm": 2.947704315185547, - "learning_rate": 3.671119954519614e-05, - "loss": 5.343, - "step": 5175 - }, - { - "epoch": 0.28634604754007736, - "grad_norm": 3.6739652156829834, - "learning_rate": 3.669698692438886e-05, - "loss": 5.3991, - "step": 5180 - }, - { - "epoch": 0.2866224433388613, - "grad_norm": 3.433134078979492, - "learning_rate": 3.668277430358158e-05, - "loss": 5.6598, - "step": 5185 - }, - { - "epoch": 0.28689883913764513, - "grad_norm": 2.977222204208374, - "learning_rate": 3.66685616827743e-05, - "loss": 5.4816, - "step": 5190 - }, - { - "epoch": 0.287175234936429, - "grad_norm": 2.5906851291656494, - "learning_rate": 3.665434906196703e-05, - "loss": 5.3199, - "step": 5195 - }, - { - "epoch": 0.28745163073521285, - "grad_norm": 2.8117191791534424, - "learning_rate": 3.664013644115975e-05, - "loss": 5.4231, - "step": 5200 - }, - { - "epoch": 0.2877280265339967, - "grad_norm": 2.9381093978881836, - "learning_rate": 3.6625923820352474e-05, - "loss": 5.4396, - "step": 5205 - }, - { - "epoch": 0.28800442233278056, - "grad_norm": 3.0122227668762207, - "learning_rate": 3.66117111995452e-05, - "loss": 5.2438, - "step": 5210 - }, - { - "epoch": 0.2882808181315644, - "grad_norm": 2.6538515090942383, - "learning_rate": 3.659749857873792e-05, - "loss": 5.3428, - "step": 5215 - }, - { - "epoch": 0.2885572139303483, - "grad_norm": 3.465743064880371, - "learning_rate": 3.6583285957930646e-05, - "loss": 5.4583, - "step": 5220 - }, - { - "epoch": 0.28883360972913213, - "grad_norm": 3.0137925148010254, - "learning_rate": 3.656907333712336e-05, - "loss": 5.6031, - "step": 5225 - }, - { - "epoch": 0.289110005527916, - "grad_norm": 3.7536206245422363, - "learning_rate": 3.6554860716316094e-05, - "loss": 5.3189, - "step": 5230 - }, - { - "epoch": 0.28938640132669985, - "grad_norm": 2.687974691390991, - "learning_rate": 3.654064809550881e-05, - "loss": 5.1424, - "step": 5235 - }, - { - "epoch": 0.2896627971254837, - "grad_norm": 2.5427982807159424, - "learning_rate": 3.6526435474701535e-05, - "loss": 5.5922, - "step": 5240 - }, - { - "epoch": 0.28993919292426756, - "grad_norm": 2.6604325771331787, - "learning_rate": 3.651222285389426e-05, - "loss": 5.4598, - "step": 5245 - }, - { - "epoch": 0.2902155887230514, - "grad_norm": 2.8820483684539795, - "learning_rate": 3.649801023308698e-05, - "loss": 5.3329, - "step": 5250 - }, - { - "epoch": 0.2904919845218353, - "grad_norm": 4.090969085693359, - "learning_rate": 3.648379761227971e-05, - "loss": 5.5375, - "step": 5255 - }, - { - "epoch": 0.29076838032061914, - "grad_norm": 3.3297054767608643, - "learning_rate": 3.6469584991472424e-05, - "loss": 5.217, - "step": 5260 - }, - { - "epoch": 0.291044776119403, - "grad_norm": 2.8751707077026367, - "learning_rate": 3.6455372370665155e-05, - "loss": 5.3424, - "step": 5265 - }, - { - "epoch": 0.29132117191818685, - "grad_norm": 2.618412971496582, - "learning_rate": 3.644115974985788e-05, - "loss": 5.4186, - "step": 5270 - }, - { - "epoch": 0.2915975677169707, - "grad_norm": 3.5012338161468506, - "learning_rate": 3.6426947129050596e-05, - "loss": 5.5651, - "step": 5275 - }, - { - "epoch": 0.29187396351575456, - "grad_norm": 3.1529078483581543, - "learning_rate": 3.641273450824333e-05, - "loss": 5.4826, - "step": 5280 - }, - { - "epoch": 0.2921503593145384, - "grad_norm": 3.4742085933685303, - "learning_rate": 3.6398521887436044e-05, - "loss": 5.3871, - "step": 5285 - }, - { - "epoch": 0.2924267551133223, - "grad_norm": 3.666706085205078, - "learning_rate": 3.638430926662877e-05, - "loss": 5.6392, - "step": 5290 - }, - { - "epoch": 0.29270315091210614, - "grad_norm": 3.182141065597534, - "learning_rate": 3.6370096645821485e-05, - "loss": 5.3455, - "step": 5295 - }, - { - "epoch": 0.29297954671089, - "grad_norm": 3.0457582473754883, - "learning_rate": 3.6355884025014216e-05, - "loss": 5.7264, - "step": 5300 - }, - { - "epoch": 0.29325594250967385, - "grad_norm": 2.9654111862182617, - "learning_rate": 3.634167140420694e-05, - "loss": 5.3143, - "step": 5305 - }, - { - "epoch": 0.2935323383084577, - "grad_norm": 3.9714043140411377, - "learning_rate": 3.632745878339966e-05, - "loss": 5.6, - "step": 5310 - }, - { - "epoch": 0.29380873410724156, - "grad_norm": 3.186901092529297, - "learning_rate": 3.631324616259239e-05, - "loss": 5.4677, - "step": 5315 - }, - { - "epoch": 0.2940851299060254, - "grad_norm": 3.43259334564209, - "learning_rate": 3.6299033541785105e-05, - "loss": 5.5161, - "step": 5320 - }, - { - "epoch": 0.2943615257048093, - "grad_norm": 2.980299711227417, - "learning_rate": 3.628482092097783e-05, - "loss": 5.4279, - "step": 5325 - }, - { - "epoch": 0.29463792150359314, - "grad_norm": 2.5553932189941406, - "learning_rate": 3.627060830017055e-05, - "loss": 5.2727, - "step": 5330 - }, - { - "epoch": 0.294914317302377, - "grad_norm": 2.9207799434661865, - "learning_rate": 3.625639567936328e-05, - "loss": 5.5978, - "step": 5335 - }, - { - "epoch": 0.29519071310116085, - "grad_norm": 2.6809563636779785, - "learning_rate": 3.6242183058556e-05, - "loss": 5.6545, - "step": 5340 - }, - { - "epoch": 0.2954671088999447, - "grad_norm": 3.1138181686401367, - "learning_rate": 3.622797043774872e-05, - "loss": 5.3426, - "step": 5345 - }, - { - "epoch": 0.29574350469872857, - "grad_norm": 3.2143821716308594, - "learning_rate": 3.621375781694145e-05, - "loss": 5.5284, - "step": 5350 - }, - { - "epoch": 0.2960199004975124, - "grad_norm": 3.2884023189544678, - "learning_rate": 3.619954519613417e-05, - "loss": 5.3533, - "step": 5355 - }, - { - "epoch": 0.2962962962962963, - "grad_norm": 2.707595109939575, - "learning_rate": 3.618533257532689e-05, - "loss": 5.4062, - "step": 5360 - }, - { - "epoch": 0.29657269209508014, - "grad_norm": 2.758535146713257, - "learning_rate": 3.6171119954519614e-05, - "loss": 5.3409, - "step": 5365 - }, - { - "epoch": 0.296849087893864, - "grad_norm": 3.2588820457458496, - "learning_rate": 3.615690733371234e-05, - "loss": 5.4564, - "step": 5370 - }, - { - "epoch": 0.29712548369264785, - "grad_norm": 3.819500207901001, - "learning_rate": 3.614269471290506e-05, - "loss": 5.5978, - "step": 5375 - }, - { - "epoch": 0.2974018794914317, - "grad_norm": 3.1476078033447266, - "learning_rate": 3.6128482092097786e-05, - "loss": 5.1632, - "step": 5380 - }, - { - "epoch": 0.29767827529021557, - "grad_norm": 2.8561973571777344, - "learning_rate": 3.611426947129051e-05, - "loss": 5.5271, - "step": 5385 - }, - { - "epoch": 0.2979546710889994, - "grad_norm": 2.6981911659240723, - "learning_rate": 3.6100056850483234e-05, - "loss": 5.5108, - "step": 5390 - }, - { - "epoch": 0.2982310668877833, - "grad_norm": 3.0033581256866455, - "learning_rate": 3.608584422967595e-05, - "loss": 5.2921, - "step": 5395 - }, - { - "epoch": 0.29850746268656714, - "grad_norm": 3.7791874408721924, - "learning_rate": 3.6071631608868675e-05, - "loss": 5.5129, - "step": 5400 - }, - { - "epoch": 0.29878385848535105, - "grad_norm": 2.5647666454315186, - "learning_rate": 3.60574189880614e-05, - "loss": 5.7754, - "step": 5405 - }, - { - "epoch": 0.2990602542841349, - "grad_norm": 2.5552051067352295, - "learning_rate": 3.604320636725412e-05, - "loss": 5.2537, - "step": 5410 - }, - { - "epoch": 0.29933665008291876, - "grad_norm": 3.173316478729248, - "learning_rate": 3.602899374644685e-05, - "loss": 5.7515, - "step": 5415 - }, - { - "epoch": 0.2996130458817026, - "grad_norm": 2.4736528396606445, - "learning_rate": 3.601478112563957e-05, - "loss": 5.0059, - "step": 5420 - }, - { - "epoch": 0.2998894416804865, - "grad_norm": 3.348816394805908, - "learning_rate": 3.6000568504832295e-05, - "loss": 5.477, - "step": 5425 - }, - { - "epoch": 0.30016583747927034, - "grad_norm": 2.6805813312530518, - "learning_rate": 3.598635588402501e-05, - "loss": 5.5837, - "step": 5430 - }, - { - "epoch": 0.3004422332780542, - "grad_norm": 3.264838218688965, - "learning_rate": 3.5972143263217736e-05, - "loss": 5.6984, - "step": 5435 - }, - { - "epoch": 0.30071862907683805, - "grad_norm": 2.8391873836517334, - "learning_rate": 3.595793064241047e-05, - "loss": 5.2333, - "step": 5440 - }, - { - "epoch": 0.3009950248756219, - "grad_norm": 3.601405382156372, - "learning_rate": 3.5943718021603184e-05, - "loss": 5.2084, - "step": 5445 - }, - { - "epoch": 0.30127142067440577, - "grad_norm": 3.3208744525909424, - "learning_rate": 3.592950540079591e-05, - "loss": 5.18, - "step": 5450 - }, - { - "epoch": 0.3015478164731896, - "grad_norm": 3.2721526622772217, - "learning_rate": 3.591529277998863e-05, - "loss": 5.6478, - "step": 5455 - }, - { - "epoch": 0.3018242122719735, - "grad_norm": 3.912829637527466, - "learning_rate": 3.5901080159181356e-05, - "loss": 5.4232, - "step": 5460 - }, - { - "epoch": 0.30210060807075734, - "grad_norm": 3.12005615234375, - "learning_rate": 3.588686753837408e-05, - "loss": 5.6941, - "step": 5465 - }, - { - "epoch": 0.3023770038695412, - "grad_norm": 3.007559061050415, - "learning_rate": 3.58726549175668e-05, - "loss": 5.3598, - "step": 5470 - }, - { - "epoch": 0.30265339966832505, - "grad_norm": 3.6492421627044678, - "learning_rate": 3.585844229675953e-05, - "loss": 5.6721, - "step": 5475 - }, - { - "epoch": 0.3029297954671089, - "grad_norm": 3.9073736667633057, - "learning_rate": 3.5844229675952245e-05, - "loss": 5.4005, - "step": 5480 - }, - { - "epoch": 0.30320619126589277, - "grad_norm": 3.031067371368408, - "learning_rate": 3.583001705514497e-05, - "loss": 5.3484, - "step": 5485 - }, - { - "epoch": 0.3034825870646766, - "grad_norm": 3.599224090576172, - "learning_rate": 3.581580443433769e-05, - "loss": 5.4078, - "step": 5490 - }, - { - "epoch": 0.3037589828634605, - "grad_norm": 3.183856964111328, - "learning_rate": 3.580159181353042e-05, - "loss": 5.5047, - "step": 5495 - }, - { - "epoch": 0.30403537866224434, - "grad_norm": 3.2303097248077393, - "learning_rate": 3.578737919272314e-05, - "loss": 5.5218, - "step": 5500 - }, - { - "epoch": 0.3043117744610282, - "grad_norm": 3.168322801589966, - "learning_rate": 3.577316657191586e-05, - "loss": 5.4699, - "step": 5505 - }, - { - "epoch": 0.30458817025981205, - "grad_norm": 2.8198087215423584, - "learning_rate": 3.575895395110859e-05, - "loss": 4.9456, - "step": 5510 - }, - { - "epoch": 0.3048645660585959, - "grad_norm": 3.3285229206085205, - "learning_rate": 3.5744741330301307e-05, - "loss": 5.5881, - "step": 5515 - }, - { - "epoch": 0.30514096185737977, - "grad_norm": 3.381110668182373, - "learning_rate": 3.573052870949403e-05, - "loss": 5.8306, - "step": 5520 - }, - { - "epoch": 0.3054173576561636, - "grad_norm": 2.8767781257629395, - "learning_rate": 3.5716316088686754e-05, - "loss": 5.2034, - "step": 5525 - }, - { - "epoch": 0.3056937534549475, - "grad_norm": 4.404655456542969, - "learning_rate": 3.570210346787948e-05, - "loss": 5.4832, - "step": 5530 - }, - { - "epoch": 0.30597014925373134, - "grad_norm": 2.4534425735473633, - "learning_rate": 3.56878908470722e-05, - "loss": 5.1591, - "step": 5535 - }, - { - "epoch": 0.3062465450525152, - "grad_norm": 2.891896963119507, - "learning_rate": 3.567367822626492e-05, - "loss": 5.253, - "step": 5540 - }, - { - "epoch": 0.30652294085129905, - "grad_norm": 2.3647806644439697, - "learning_rate": 3.565946560545765e-05, - "loss": 5.4199, - "step": 5545 - }, - { - "epoch": 0.3067993366500829, - "grad_norm": 3.0437514781951904, - "learning_rate": 3.5645252984650374e-05, - "loss": 5.4336, - "step": 5550 - }, - { - "epoch": 0.30707573244886677, - "grad_norm": 3.7891740798950195, - "learning_rate": 3.563104036384309e-05, - "loss": 5.5751, - "step": 5555 - }, - { - "epoch": 0.3073521282476506, - "grad_norm": 3.7321300506591797, - "learning_rate": 3.5616827743035816e-05, - "loss": 5.319, - "step": 5560 - }, - { - "epoch": 0.3076285240464345, - "grad_norm": 3.2207529544830322, - "learning_rate": 3.560261512222854e-05, - "loss": 5.74, - "step": 5565 - }, - { - "epoch": 0.30790491984521834, - "grad_norm": 3.2805068492889404, - "learning_rate": 3.5588402501421263e-05, - "loss": 5.2347, - "step": 5570 - }, - { - "epoch": 0.3081813156440022, - "grad_norm": 2.671035051345825, - "learning_rate": 3.557418988061399e-05, - "loss": 5.4821, - "step": 5575 - }, - { - "epoch": 0.30845771144278605, - "grad_norm": 2.748237133026123, - "learning_rate": 3.555997725980671e-05, - "loss": 5.2953, - "step": 5580 - }, - { - "epoch": 0.3087341072415699, - "grad_norm": 3.34366512298584, - "learning_rate": 3.5545764638999435e-05, - "loss": 5.336, - "step": 5585 - }, - { - "epoch": 0.30901050304035377, - "grad_norm": 3.0358309745788574, - "learning_rate": 3.553155201819215e-05, - "loss": 5.6294, - "step": 5590 - }, - { - "epoch": 0.3092868988391376, - "grad_norm": 2.842928171157837, - "learning_rate": 3.5517339397384883e-05, - "loss": 5.2573, - "step": 5595 - }, - { - "epoch": 0.3095632946379215, - "grad_norm": 3.203237533569336, - "learning_rate": 3.55031267765776e-05, - "loss": 5.4097, - "step": 5600 - }, - { - "epoch": 0.30983969043670534, - "grad_norm": 3.44087290763855, - "learning_rate": 3.5488914155770325e-05, - "loss": 5.252, - "step": 5605 - }, - { - "epoch": 0.3101160862354892, - "grad_norm": 2.9737708568573, - "learning_rate": 3.547470153496305e-05, - "loss": 5.3748, - "step": 5610 - }, - { - "epoch": 0.31039248203427305, - "grad_norm": 2.6989824771881104, - "learning_rate": 3.546048891415577e-05, - "loss": 5.612, - "step": 5615 - }, - { - "epoch": 0.3106688778330569, - "grad_norm": 3.0308315753936768, - "learning_rate": 3.5446276293348497e-05, - "loss": 5.5512, - "step": 5620 - }, - { - "epoch": 0.31094527363184077, - "grad_norm": 3.383033275604248, - "learning_rate": 3.5432063672541214e-05, - "loss": 5.4498, - "step": 5625 - }, - { - "epoch": 0.3112216694306247, - "grad_norm": 2.5991249084472656, - "learning_rate": 3.5417851051733944e-05, - "loss": 5.3198, - "step": 5630 - }, - { - "epoch": 0.31149806522940854, - "grad_norm": 3.038034439086914, - "learning_rate": 3.540363843092667e-05, - "loss": 5.6306, - "step": 5635 - }, - { - "epoch": 0.3117744610281924, - "grad_norm": 3.4985435009002686, - "learning_rate": 3.5389425810119386e-05, - "loss": 5.7529, - "step": 5640 - }, - { - "epoch": 0.31205085682697625, - "grad_norm": 3.8740835189819336, - "learning_rate": 3.537521318931211e-05, - "loss": 5.5961, - "step": 5645 - }, - { - "epoch": 0.3123272526257601, - "grad_norm": 2.630774974822998, - "learning_rate": 3.5361000568504834e-05, - "loss": 5.4196, - "step": 5650 - }, - { - "epoch": 0.31260364842454397, - "grad_norm": 2.864548683166504, - "learning_rate": 3.534678794769756e-05, - "loss": 5.4804, - "step": 5655 - }, - { - "epoch": 0.3128800442233278, - "grad_norm": 2.944843053817749, - "learning_rate": 3.533257532689028e-05, - "loss": 5.6182, - "step": 5660 - }, - { - "epoch": 0.3131564400221117, - "grad_norm": 2.843414545059204, - "learning_rate": 3.5318362706083006e-05, - "loss": 5.3153, - "step": 5665 - }, - { - "epoch": 0.31343283582089554, - "grad_norm": 2.6151304244995117, - "learning_rate": 3.530415008527573e-05, - "loss": 5.2085, - "step": 5670 - }, - { - "epoch": 0.3137092316196794, - "grad_norm": 3.2855122089385986, - "learning_rate": 3.528993746446845e-05, - "loss": 5.1338, - "step": 5675 - }, - { - "epoch": 0.31398562741846325, - "grad_norm": 2.8010687828063965, - "learning_rate": 3.527572484366117e-05, - "loss": 5.644, - "step": 5680 - }, - { - "epoch": 0.3142620232172471, - "grad_norm": 3.428339958190918, - "learning_rate": 3.52615122228539e-05, - "loss": 5.4467, - "step": 5685 - }, - { - "epoch": 0.31453841901603097, - "grad_norm": 2.4719932079315186, - "learning_rate": 3.524729960204662e-05, - "loss": 5.5976, - "step": 5690 - }, - { - "epoch": 0.3148148148148148, - "grad_norm": 3.2966175079345703, - "learning_rate": 3.523308698123934e-05, - "loss": 5.6092, - "step": 5695 - }, - { - "epoch": 0.3150912106135987, - "grad_norm": 3.152592658996582, - "learning_rate": 3.521887436043207e-05, - "loss": 5.3276, - "step": 5700 - }, - { - "epoch": 0.31536760641238254, - "grad_norm": 2.5936498641967773, - "learning_rate": 3.520466173962479e-05, - "loss": 5.5664, - "step": 5705 - }, - { - "epoch": 0.3156440022111664, - "grad_norm": 3.2857308387756348, - "learning_rate": 3.519044911881751e-05, - "loss": 5.0564, - "step": 5710 - }, - { - "epoch": 0.31592039800995025, - "grad_norm": 3.022956371307373, - "learning_rate": 3.517623649801023e-05, - "loss": 5.3535, - "step": 5715 - }, - { - "epoch": 0.3161967938087341, - "grad_norm": 2.88362455368042, - "learning_rate": 3.516202387720296e-05, - "loss": 5.6187, - "step": 5720 - }, - { - "epoch": 0.31647318960751797, - "grad_norm": 3.379072427749634, - "learning_rate": 3.514781125639568e-05, - "loss": 5.6233, - "step": 5725 - }, - { - "epoch": 0.3167495854063018, - "grad_norm": 2.9026386737823486, - "learning_rate": 3.5133598635588404e-05, - "loss": 5.5723, - "step": 5730 - }, - { - "epoch": 0.3170259812050857, - "grad_norm": 3.1735050678253174, - "learning_rate": 3.511938601478113e-05, - "loss": 5.3733, - "step": 5735 - }, - { - "epoch": 0.31730237700386954, - "grad_norm": 2.6387147903442383, - "learning_rate": 3.510517339397385e-05, - "loss": 5.5433, - "step": 5740 - }, - { - "epoch": 0.3175787728026534, - "grad_norm": 2.5088794231414795, - "learning_rate": 3.5090960773166576e-05, - "loss": 5.3205, - "step": 5745 - }, - { - "epoch": 0.31785516860143725, - "grad_norm": 3.163351535797119, - "learning_rate": 3.507674815235929e-05, - "loss": 5.4311, - "step": 5750 - }, - { - "epoch": 0.3181315644002211, - "grad_norm": 3.1320881843566895, - "learning_rate": 3.5062535531552024e-05, - "loss": 5.3945, - "step": 5755 - }, - { - "epoch": 0.31840796019900497, - "grad_norm": 2.6302387714385986, - "learning_rate": 3.504832291074474e-05, - "loss": 5.2206, - "step": 5760 - }, - { - "epoch": 0.3186843559977888, - "grad_norm": 2.8344478607177734, - "learning_rate": 3.5034110289937465e-05, - "loss": 5.2453, - "step": 5765 - }, - { - "epoch": 0.3189607517965727, - "grad_norm": 3.076878786087036, - "learning_rate": 3.501989766913019e-05, - "loss": 5.6607, - "step": 5770 - }, - { - "epoch": 0.31923714759535654, - "grad_norm": 2.7953310012817383, - "learning_rate": 3.500568504832291e-05, - "loss": 5.4197, - "step": 5775 - }, - { - "epoch": 0.3195135433941404, - "grad_norm": 3.1712965965270996, - "learning_rate": 3.499147242751564e-05, - "loss": 5.5598, - "step": 5780 - }, - { - "epoch": 0.31978993919292426, - "grad_norm": 4.416378021240234, - "learning_rate": 3.4977259806708354e-05, - "loss": 5.5493, - "step": 5785 - }, - { - "epoch": 0.3200663349917081, - "grad_norm": 3.5662577152252197, - "learning_rate": 3.4963047185901085e-05, - "loss": 5.9117, - "step": 5790 - }, - { - "epoch": 0.32034273079049197, - "grad_norm": 3.769775390625, - "learning_rate": 3.494883456509381e-05, - "loss": 5.3641, - "step": 5795 - }, - { - "epoch": 0.3206191265892758, - "grad_norm": 3.322216749191284, - "learning_rate": 3.4934621944286526e-05, - "loss": 5.5554, - "step": 5800 - }, - { - "epoch": 0.3208955223880597, - "grad_norm": 2.9367949962615967, - "learning_rate": 3.492040932347925e-05, - "loss": 5.403, - "step": 5805 - }, - { - "epoch": 0.32117191818684354, - "grad_norm": 2.9416840076446533, - "learning_rate": 3.4906196702671974e-05, - "loss": 5.3599, - "step": 5810 - }, - { - "epoch": 0.3214483139856274, - "grad_norm": 3.0525002479553223, - "learning_rate": 3.48919840818647e-05, - "loss": 5.3231, - "step": 5815 - }, - { - "epoch": 0.32172470978441126, - "grad_norm": 3.840785503387451, - "learning_rate": 3.4877771461057415e-05, - "loss": 5.6919, - "step": 5820 - }, - { - "epoch": 0.3220011055831951, - "grad_norm": 2.738666296005249, - "learning_rate": 3.4863558840250146e-05, - "loss": 5.6685, - "step": 5825 - }, - { - "epoch": 0.32227750138197897, - "grad_norm": 4.050598621368408, - "learning_rate": 3.484934621944287e-05, - "loss": 5.4941, - "step": 5830 - }, - { - "epoch": 0.3225538971807628, - "grad_norm": 3.0634477138519287, - "learning_rate": 3.483513359863559e-05, - "loss": 5.3485, - "step": 5835 - }, - { - "epoch": 0.3228302929795467, - "grad_norm": 3.11482572555542, - "learning_rate": 3.482092097782831e-05, - "loss": 5.1452, - "step": 5840 - }, - { - "epoch": 0.32310668877833054, - "grad_norm": 2.8689663410186768, - "learning_rate": 3.4806708357021035e-05, - "loss": 5.397, - "step": 5845 - }, - { - "epoch": 0.32338308457711445, - "grad_norm": 3.4078924655914307, - "learning_rate": 3.479249573621376e-05, - "loss": 5.6103, - "step": 5850 - }, - { - "epoch": 0.3236594803758983, - "grad_norm": 3.1890652179718018, - "learning_rate": 3.477828311540648e-05, - "loss": 5.482, - "step": 5855 - }, - { - "epoch": 0.32393587617468217, - "grad_norm": 3.211745023727417, - "learning_rate": 3.476407049459921e-05, - "loss": 5.2123, - "step": 5860 - }, - { - "epoch": 0.324212271973466, - "grad_norm": 3.373166799545288, - "learning_rate": 3.474985787379193e-05, - "loss": 5.4259, - "step": 5865 - }, - { - "epoch": 0.3244886677722499, - "grad_norm": 3.0592551231384277, - "learning_rate": 3.473564525298465e-05, - "loss": 5.4406, - "step": 5870 - }, - { - "epoch": 0.32476506357103374, - "grad_norm": 3.850008010864258, - "learning_rate": 3.472143263217737e-05, - "loss": 5.4733, - "step": 5875 - }, - { - "epoch": 0.3250414593698176, - "grad_norm": 3.4215028285980225, - "learning_rate": 3.47072200113701e-05, - "loss": 5.6001, - "step": 5880 - }, - { - "epoch": 0.32531785516860146, - "grad_norm": 2.7632102966308594, - "learning_rate": 3.469300739056282e-05, - "loss": 5.3779, - "step": 5885 - }, - { - "epoch": 0.3255942509673853, - "grad_norm": 2.7702176570892334, - "learning_rate": 3.4678794769755544e-05, - "loss": 5.3826, - "step": 5890 - }, - { - "epoch": 0.32587064676616917, - "grad_norm": 2.870290517807007, - "learning_rate": 3.466458214894827e-05, - "loss": 5.4052, - "step": 5895 - }, - { - "epoch": 0.326147042564953, - "grad_norm": 3.039081573486328, - "learning_rate": 3.465036952814099e-05, - "loss": 5.2297, - "step": 5900 - }, - { - "epoch": 0.3264234383637369, - "grad_norm": 3.192931890487671, - "learning_rate": 3.4636156907333716e-05, - "loss": 5.424, - "step": 5905 - }, - { - "epoch": 0.32669983416252074, - "grad_norm": 2.7073121070861816, - "learning_rate": 3.462194428652644e-05, - "loss": 5.4958, - "step": 5910 - }, - { - "epoch": 0.3269762299613046, - "grad_norm": 3.2365293502807617, - "learning_rate": 3.4607731665719164e-05, - "loss": 5.099, - "step": 5915 - }, - { - "epoch": 0.32725262576008846, - "grad_norm": 3.3595356941223145, - "learning_rate": 3.459351904491188e-05, - "loss": 5.5425, - "step": 5920 - }, - { - "epoch": 0.3275290215588723, - "grad_norm": 2.8691320419311523, - "learning_rate": 3.4579306424104605e-05, - "loss": 5.4369, - "step": 5925 - }, - { - "epoch": 0.32780541735765617, - "grad_norm": 3.031155586242676, - "learning_rate": 3.456509380329733e-05, - "loss": 5.4964, - "step": 5930 - }, - { - "epoch": 0.32808181315644, - "grad_norm": 2.736032247543335, - "learning_rate": 3.455088118249005e-05, - "loss": 5.3402, - "step": 5935 - }, - { - "epoch": 0.3283582089552239, - "grad_norm": 3.4750332832336426, - "learning_rate": 3.453666856168278e-05, - "loss": 5.2481, - "step": 5940 - }, - { - "epoch": 0.32863460475400774, - "grad_norm": 2.969907522201538, - "learning_rate": 3.45224559408755e-05, - "loss": 5.2518, - "step": 5945 - }, - { - "epoch": 0.3289110005527916, - "grad_norm": 3.4692533016204834, - "learning_rate": 3.4508243320068225e-05, - "loss": 5.7448, - "step": 5950 - }, - { - "epoch": 0.32918739635157546, - "grad_norm": 2.769881010055542, - "learning_rate": 3.449403069926094e-05, - "loss": 5.5949, - "step": 5955 - }, - { - "epoch": 0.3294637921503593, - "grad_norm": 2.9238779544830322, - "learning_rate": 3.4479818078453666e-05, - "loss": 5.495, - "step": 5960 - }, - { - "epoch": 0.32974018794914317, - "grad_norm": 3.0299391746520996, - "learning_rate": 3.44656054576464e-05, - "loss": 5.2859, - "step": 5965 - }, - { - "epoch": 0.33001658374792703, - "grad_norm": 3.4507875442504883, - "learning_rate": 3.4451392836839114e-05, - "loss": 5.1926, - "step": 5970 - }, - { - "epoch": 0.3302929795467109, - "grad_norm": 3.147733688354492, - "learning_rate": 3.443718021603184e-05, - "loss": 5.4821, - "step": 5975 - }, - { - "epoch": 0.33056937534549474, - "grad_norm": 2.713547945022583, - "learning_rate": 3.442296759522456e-05, - "loss": 5.3562, - "step": 5980 - }, - { - "epoch": 0.3308457711442786, - "grad_norm": 3.38211727142334, - "learning_rate": 3.4408754974417286e-05, - "loss": 5.69, - "step": 5985 - }, - { - "epoch": 0.33112216694306246, - "grad_norm": 3.258375644683838, - "learning_rate": 3.439454235361001e-05, - "loss": 5.45, - "step": 5990 - }, - { - "epoch": 0.3313985627418463, - "grad_norm": 3.048330307006836, - "learning_rate": 3.438032973280273e-05, - "loss": 5.2408, - "step": 5995 - }, - { - "epoch": 0.33167495854063017, - "grad_norm": 3.6864914894104004, - "learning_rate": 3.436611711199546e-05, - "loss": 5.5333, - "step": 6000 - }, - { - "epoch": 0.33195135433941403, - "grad_norm": 3.327286958694458, - "learning_rate": 3.4351904491188175e-05, - "loss": 5.4176, - "step": 6005 - }, - { - "epoch": 0.3322277501381979, - "grad_norm": 2.82293438911438, - "learning_rate": 3.43376918703809e-05, - "loss": 5.7049, - "step": 6010 - }, - { - "epoch": 0.33250414593698174, - "grad_norm": 2.9626595973968506, - "learning_rate": 3.432347924957362e-05, - "loss": 5.7763, - "step": 6015 - }, - { - "epoch": 0.3327805417357656, - "grad_norm": 3.8101632595062256, - "learning_rate": 3.430926662876635e-05, - "loss": 5.1365, - "step": 6020 - }, - { - "epoch": 0.33305693753454946, - "grad_norm": 3.6021718978881836, - "learning_rate": 3.429505400795907e-05, - "loss": 5.3274, - "step": 6025 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 2.758277654647827, - "learning_rate": 3.428084138715179e-05, - "loss": 5.4596, - "step": 6030 - }, - { - "epoch": 0.33360972913211717, - "grad_norm": 3.2654664516448975, - "learning_rate": 3.426662876634452e-05, - "loss": 5.4182, - "step": 6035 - }, - { - "epoch": 0.33388612493090103, - "grad_norm": 3.1895666122436523, - "learning_rate": 3.4252416145537236e-05, - "loss": 5.4077, - "step": 6040 - }, - { - "epoch": 0.3341625207296849, - "grad_norm": 2.5739054679870605, - "learning_rate": 3.423820352472996e-05, - "loss": 5.4277, - "step": 6045 - }, - { - "epoch": 0.33443891652846874, - "grad_norm": 2.6572439670562744, - "learning_rate": 3.4223990903922684e-05, - "loss": 5.7119, - "step": 6050 - }, - { - "epoch": 0.3347153123272526, - "grad_norm": 3.490938663482666, - "learning_rate": 3.420977828311541e-05, - "loss": 5.3248, - "step": 6055 - }, - { - "epoch": 0.33499170812603646, - "grad_norm": 3.159533977508545, - "learning_rate": 3.419556566230813e-05, - "loss": 5.3683, - "step": 6060 - }, - { - "epoch": 0.3352681039248203, - "grad_norm": 2.4630796909332275, - "learning_rate": 3.418135304150085e-05, - "loss": 5.6767, - "step": 6065 - }, - { - "epoch": 0.33554449972360423, - "grad_norm": 2.582796812057495, - "learning_rate": 3.416714042069358e-05, - "loss": 5.3356, - "step": 6070 - }, - { - "epoch": 0.3358208955223881, - "grad_norm": 2.7957217693328857, - "learning_rate": 3.4152927799886304e-05, - "loss": 5.3454, - "step": 6075 - }, - { - "epoch": 0.33609729132117194, - "grad_norm": 2.7028167247772217, - "learning_rate": 3.413871517907902e-05, - "loss": 5.0287, - "step": 6080 - }, - { - "epoch": 0.3363736871199558, - "grad_norm": 2.7556872367858887, - "learning_rate": 3.4124502558271745e-05, - "loss": 5.5228, - "step": 6085 - }, - { - "epoch": 0.33665008291873966, - "grad_norm": 3.9431374073028564, - "learning_rate": 3.411028993746447e-05, - "loss": 5.4459, - "step": 6090 - }, - { - "epoch": 0.3369264787175235, - "grad_norm": 3.6302506923675537, - "learning_rate": 3.409607731665719e-05, - "loss": 5.7821, - "step": 6095 - }, - { - "epoch": 0.33720287451630737, - "grad_norm": 2.845989227294922, - "learning_rate": 3.408186469584992e-05, - "loss": 5.214, - "step": 6100 - }, - { - "epoch": 0.33747927031509123, - "grad_norm": 2.877218008041382, - "learning_rate": 3.406765207504264e-05, - "loss": 5.2843, - "step": 6105 - }, - { - "epoch": 0.3377556661138751, - "grad_norm": 2.90212082862854, - "learning_rate": 3.4053439454235365e-05, - "loss": 5.1521, - "step": 6110 - }, - { - "epoch": 0.33803206191265894, - "grad_norm": 3.701557159423828, - "learning_rate": 3.403922683342808e-05, - "loss": 5.4906, - "step": 6115 - }, - { - "epoch": 0.3383084577114428, - "grad_norm": 2.650724411010742, - "learning_rate": 3.4025014212620806e-05, - "loss": 5.339, - "step": 6120 - }, - { - "epoch": 0.33858485351022666, - "grad_norm": 3.82738995552063, - "learning_rate": 3.401080159181354e-05, - "loss": 5.489, - "step": 6125 - }, - { - "epoch": 0.3388612493090105, - "grad_norm": 3.1155989170074463, - "learning_rate": 3.3996588971006254e-05, - "loss": 4.9967, - "step": 6130 - }, - { - "epoch": 0.33913764510779437, - "grad_norm": 2.9814980030059814, - "learning_rate": 3.398237635019898e-05, - "loss": 5.3042, - "step": 6135 - }, - { - "epoch": 0.33941404090657823, - "grad_norm": 4.761054515838623, - "learning_rate": 3.39681637293917e-05, - "loss": 5.4005, - "step": 6140 - }, - { - "epoch": 0.3396904367053621, - "grad_norm": 2.9018607139587402, - "learning_rate": 3.3953951108584426e-05, - "loss": 5.2394, - "step": 6145 - }, - { - "epoch": 0.33996683250414594, - "grad_norm": 2.923269748687744, - "learning_rate": 3.3939738487777143e-05, - "loss": 5.3934, - "step": 6150 - }, - { - "epoch": 0.3402432283029298, - "grad_norm": 3.082612991333008, - "learning_rate": 3.392552586696987e-05, - "loss": 5.3588, - "step": 6155 - }, - { - "epoch": 0.34051962410171366, - "grad_norm": 2.4649646282196045, - "learning_rate": 3.39113132461626e-05, - "loss": 4.7427, - "step": 6160 - }, - { - "epoch": 0.3407960199004975, - "grad_norm": 4.229916095733643, - "learning_rate": 3.3897100625355315e-05, - "loss": 5.5868, - "step": 6165 - }, - { - "epoch": 0.3410724156992814, - "grad_norm": 2.851255416870117, - "learning_rate": 3.388288800454804e-05, - "loss": 5.097, - "step": 6170 - }, - { - "epoch": 0.34134881149806523, - "grad_norm": 3.1447596549987793, - "learning_rate": 3.386867538374076e-05, - "loss": 5.5086, - "step": 6175 - }, - { - "epoch": 0.3416252072968491, - "grad_norm": 3.191239833831787, - "learning_rate": 3.385446276293349e-05, - "loss": 5.7917, - "step": 6180 - }, - { - "epoch": 0.34190160309563294, - "grad_norm": 3.4441893100738525, - "learning_rate": 3.384025014212621e-05, - "loss": 5.2703, - "step": 6185 - }, - { - "epoch": 0.3421779988944168, - "grad_norm": 2.9748077392578125, - "learning_rate": 3.382603752131893e-05, - "loss": 5.4914, - "step": 6190 - }, - { - "epoch": 0.34245439469320066, - "grad_norm": 2.859182834625244, - "learning_rate": 3.381182490051166e-05, - "loss": 5.1884, - "step": 6195 - }, - { - "epoch": 0.3427307904919845, - "grad_norm": 3.529243230819702, - "learning_rate": 3.3797612279704376e-05, - "loss": 5.2572, - "step": 6200 - }, - { - "epoch": 0.3430071862907684, - "grad_norm": 3.0292012691497803, - "learning_rate": 3.37833996588971e-05, - "loss": 5.4534, - "step": 6205 - }, - { - "epoch": 0.34328358208955223, - "grad_norm": 2.9836554527282715, - "learning_rate": 3.376918703808983e-05, - "loss": 5.3841, - "step": 6210 - }, - { - "epoch": 0.3435599778883361, - "grad_norm": 2.9530041217803955, - "learning_rate": 3.375497441728255e-05, - "loss": 5.4718, - "step": 6215 - }, - { - "epoch": 0.34383637368711994, - "grad_norm": 4.38737678527832, - "learning_rate": 3.374076179647527e-05, - "loss": 5.5249, - "step": 6220 - }, - { - "epoch": 0.3441127694859038, - "grad_norm": 2.9150431156158447, - "learning_rate": 3.3726549175667996e-05, - "loss": 5.4543, - "step": 6225 - }, - { - "epoch": 0.34438916528468766, - "grad_norm": 2.641899824142456, - "learning_rate": 3.371233655486072e-05, - "loss": 5.3053, - "step": 6230 - }, - { - "epoch": 0.3446655610834715, - "grad_norm": 2.8919003009796143, - "learning_rate": 3.3698123934053444e-05, - "loss": 5.5736, - "step": 6235 - }, - { - "epoch": 0.3449419568822554, - "grad_norm": 4.31245756149292, - "learning_rate": 3.368391131324616e-05, - "loss": 5.4375, - "step": 6240 - }, - { - "epoch": 0.34521835268103923, - "grad_norm": 2.8349204063415527, - "learning_rate": 3.366969869243889e-05, - "loss": 5.4986, - "step": 6245 - }, - { - "epoch": 0.3454947484798231, - "grad_norm": 3.073392629623413, - "learning_rate": 3.365548607163161e-05, - "loss": 5.345, - "step": 6250 - }, - { - "epoch": 0.34577114427860695, - "grad_norm": 3.0366547107696533, - "learning_rate": 3.3641273450824333e-05, - "loss": 5.484, - "step": 6255 - }, - { - "epoch": 0.3460475400773908, - "grad_norm": 2.6022896766662598, - "learning_rate": 3.362706083001706e-05, - "loss": 5.1542, - "step": 6260 - }, - { - "epoch": 0.34632393587617466, - "grad_norm": 2.9307637214660645, - "learning_rate": 3.361284820920978e-05, - "loss": 5.3493, - "step": 6265 - }, - { - "epoch": 0.3466003316749585, - "grad_norm": 3.0827174186706543, - "learning_rate": 3.3598635588402505e-05, - "loss": 5.039, - "step": 6270 - }, - { - "epoch": 0.3468767274737424, - "grad_norm": 3.8074021339416504, - "learning_rate": 3.358442296759522e-05, - "loss": 5.382, - "step": 6275 - }, - { - "epoch": 0.34715312327252623, - "grad_norm": 3.806523561477661, - "learning_rate": 3.357021034678795e-05, - "loss": 5.5725, - "step": 6280 - }, - { - "epoch": 0.3474295190713101, - "grad_norm": 2.643564224243164, - "learning_rate": 3.355599772598067e-05, - "loss": 5.5118, - "step": 6285 - }, - { - "epoch": 0.347705914870094, - "grad_norm": 3.194028377532959, - "learning_rate": 3.3541785105173395e-05, - "loss": 5.5817, - "step": 6290 - }, - { - "epoch": 0.34798231066887786, - "grad_norm": 2.6427788734436035, - "learning_rate": 3.352757248436612e-05, - "loss": 5.3618, - "step": 6295 - }, - { - "epoch": 0.3482587064676617, - "grad_norm": 2.87471604347229, - "learning_rate": 3.351335986355884e-05, - "loss": 5.3261, - "step": 6300 - }, - { - "epoch": 0.3485351022664456, - "grad_norm": 3.181290864944458, - "learning_rate": 3.3499147242751566e-05, - "loss": 5.2144, - "step": 6305 - }, - { - "epoch": 0.34881149806522943, - "grad_norm": 3.502073049545288, - "learning_rate": 3.3484934621944284e-05, - "loss": 5.4402, - "step": 6310 - }, - { - "epoch": 0.3490878938640133, - "grad_norm": 3.2654974460601807, - "learning_rate": 3.3470722001137014e-05, - "loss": 5.5757, - "step": 6315 - }, - { - "epoch": 0.34936428966279715, - "grad_norm": 2.6679444313049316, - "learning_rate": 3.345650938032974e-05, - "loss": 5.2295, - "step": 6320 - }, - { - "epoch": 0.349640685461581, - "grad_norm": 3.1147098541259766, - "learning_rate": 3.3442296759522456e-05, - "loss": 5.3529, - "step": 6325 - }, - { - "epoch": 0.34991708126036486, - "grad_norm": 2.5592527389526367, - "learning_rate": 3.342808413871518e-05, - "loss": 5.1588, - "step": 6330 - }, - { - "epoch": 0.3501934770591487, - "grad_norm": 2.633533000946045, - "learning_rate": 3.3413871517907904e-05, - "loss": 5.4131, - "step": 6335 - }, - { - "epoch": 0.3504698728579326, - "grad_norm": 3.3299381732940674, - "learning_rate": 3.339965889710063e-05, - "loss": 5.1725, - "step": 6340 - }, - { - "epoch": 0.35074626865671643, - "grad_norm": 2.768136978149414, - "learning_rate": 3.338544627629335e-05, - "loss": 5.6988, - "step": 6345 - }, - { - "epoch": 0.3510226644555003, - "grad_norm": 3.701432228088379, - "learning_rate": 3.3371233655486076e-05, - "loss": 5.5261, - "step": 6350 - }, - { - "epoch": 0.35129906025428415, - "grad_norm": 4.025837421417236, - "learning_rate": 3.33570210346788e-05, - "loss": 5.1812, - "step": 6355 - }, - { - "epoch": 0.351575456053068, - "grad_norm": 2.9328114986419678, - "learning_rate": 3.334280841387152e-05, - "loss": 5.3862, - "step": 6360 - }, - { - "epoch": 0.35185185185185186, - "grad_norm": 2.7843148708343506, - "learning_rate": 3.332859579306424e-05, - "loss": 5.6493, - "step": 6365 - }, - { - "epoch": 0.3521282476506357, - "grad_norm": 3.647606134414673, - "learning_rate": 3.3314383172256965e-05, - "loss": 5.7016, - "step": 6370 - }, - { - "epoch": 0.3524046434494196, - "grad_norm": 2.85952091217041, - "learning_rate": 3.330017055144969e-05, - "loss": 5.287, - "step": 6375 - }, - { - "epoch": 0.35268103924820343, - "grad_norm": 2.931126356124878, - "learning_rate": 3.328595793064241e-05, - "loss": 5.2523, - "step": 6380 - }, - { - "epoch": 0.3529574350469873, - "grad_norm": 3.174170970916748, - "learning_rate": 3.3271745309835137e-05, - "loss": 5.1911, - "step": 6385 - }, - { - "epoch": 0.35323383084577115, - "grad_norm": 3.573742389678955, - "learning_rate": 3.325753268902786e-05, - "loss": 4.888, - "step": 6390 - }, - { - "epoch": 0.353510226644555, - "grad_norm": 2.854358673095703, - "learning_rate": 3.324332006822058e-05, - "loss": 5.3531, - "step": 6395 - }, - { - "epoch": 0.35378662244333886, - "grad_norm": 3.4788529872894287, - "learning_rate": 3.32291074474133e-05, - "loss": 5.5047, - "step": 6400 - }, - { - "epoch": 0.3540630182421227, - "grad_norm": 3.1905970573425293, - "learning_rate": 3.321489482660603e-05, - "loss": 5.311, - "step": 6405 - }, - { - "epoch": 0.3543394140409066, - "grad_norm": 3.261735439300537, - "learning_rate": 3.320068220579875e-05, - "loss": 5.3405, - "step": 6410 - }, - { - "epoch": 0.35461580983969043, - "grad_norm": 3.806363344192505, - "learning_rate": 3.3186469584991474e-05, - "loss": 5.2041, - "step": 6415 - }, - { - "epoch": 0.3548922056384743, - "grad_norm": 3.2285566329956055, - "learning_rate": 3.31722569641842e-05, - "loss": 5.2239, - "step": 6420 - }, - { - "epoch": 0.35516860143725815, - "grad_norm": 3.194967269897461, - "learning_rate": 3.315804434337692e-05, - "loss": 5.4333, - "step": 6425 - }, - { - "epoch": 0.355444997236042, - "grad_norm": 3.9801723957061768, - "learning_rate": 3.3143831722569646e-05, - "loss": 5.4375, - "step": 6430 - }, - { - "epoch": 0.35572139303482586, - "grad_norm": 2.690793514251709, - "learning_rate": 3.312961910176236e-05, - "loss": 5.3818, - "step": 6435 - }, - { - "epoch": 0.3559977888336097, - "grad_norm": 3.140059232711792, - "learning_rate": 3.3115406480955094e-05, - "loss": 5.2608, - "step": 6440 - }, - { - "epoch": 0.3562741846323936, - "grad_norm": 3.5246164798736572, - "learning_rate": 3.310119386014781e-05, - "loss": 5.4467, - "step": 6445 - }, - { - "epoch": 0.35655058043117743, - "grad_norm": 3.004757881164551, - "learning_rate": 3.3086981239340535e-05, - "loss": 5.6051, - "step": 6450 - }, - { - "epoch": 0.3568269762299613, - "grad_norm": 2.6584270000457764, - "learning_rate": 3.307276861853326e-05, - "loss": 5.4181, - "step": 6455 - }, - { - "epoch": 0.35710337202874515, - "grad_norm": 2.8345117568969727, - "learning_rate": 3.305855599772598e-05, - "loss": 5.5933, - "step": 6460 - }, - { - "epoch": 0.357379767827529, - "grad_norm": 2.8853676319122314, - "learning_rate": 3.304434337691871e-05, - "loss": 5.1924, - "step": 6465 - }, - { - "epoch": 0.35765616362631286, - "grad_norm": 2.9934706687927246, - "learning_rate": 3.3030130756111424e-05, - "loss": 5.5229, - "step": 6470 - }, - { - "epoch": 0.3579325594250967, - "grad_norm": 3.472885847091675, - "learning_rate": 3.3015918135304155e-05, - "loss": 5.6357, - "step": 6475 - }, - { - "epoch": 0.3582089552238806, - "grad_norm": 3.2652275562286377, - "learning_rate": 3.300170551449687e-05, - "loss": 5.2829, - "step": 6480 - }, - { - "epoch": 0.35848535102266443, - "grad_norm": 3.120035171508789, - "learning_rate": 3.2987492893689596e-05, - "loss": 5.3109, - "step": 6485 - }, - { - "epoch": 0.3587617468214483, - "grad_norm": 2.817810297012329, - "learning_rate": 3.297328027288232e-05, - "loss": 5.243, - "step": 6490 - }, - { - "epoch": 0.35903814262023215, - "grad_norm": 2.679710626602173, - "learning_rate": 3.2959067652075044e-05, - "loss": 5.6024, - "step": 6495 - }, - { - "epoch": 0.359314538419016, - "grad_norm": 3.362010955810547, - "learning_rate": 3.294485503126777e-05, - "loss": 5.3039, - "step": 6500 - }, - { - "epoch": 0.35959093421779986, - "grad_norm": 3.8571290969848633, - "learning_rate": 3.2930642410460485e-05, - "loss": 5.4973, - "step": 6505 - }, - { - "epoch": 0.3598673300165838, - "grad_norm": 3.4675230979919434, - "learning_rate": 3.2916429789653216e-05, - "loss": 5.2216, - "step": 6510 - }, - { - "epoch": 0.36014372581536763, - "grad_norm": 2.4890525341033936, - "learning_rate": 3.290221716884594e-05, - "loss": 5.2977, - "step": 6515 - }, - { - "epoch": 0.3604201216141515, - "grad_norm": 4.0375518798828125, - "learning_rate": 3.288800454803866e-05, - "loss": 5.5202, - "step": 6520 - }, - { - "epoch": 0.36069651741293535, - "grad_norm": 3.2754342555999756, - "learning_rate": 3.287379192723139e-05, - "loss": 5.082, - "step": 6525 - }, - { - "epoch": 0.3609729132117192, - "grad_norm": 2.7093911170959473, - "learning_rate": 3.2859579306424105e-05, - "loss": 5.4033, - "step": 6530 - }, - { - "epoch": 0.36124930901050306, - "grad_norm": 4.006176948547363, - "learning_rate": 3.284536668561683e-05, - "loss": 5.5329, - "step": 6535 - }, - { - "epoch": 0.3615257048092869, - "grad_norm": 4.503550052642822, - "learning_rate": 3.283115406480955e-05, - "loss": 5.3618, - "step": 6540 - }, - { - "epoch": 0.3618021006080708, - "grad_norm": 3.1193487644195557, - "learning_rate": 3.281694144400228e-05, - "loss": 5.3315, - "step": 6545 - }, - { - "epoch": 0.36207849640685463, - "grad_norm": 2.99080228805542, - "learning_rate": 3.2802728823195e-05, - "loss": 5.4393, - "step": 6550 - }, - { - "epoch": 0.3623548922056385, - "grad_norm": 3.051684617996216, - "learning_rate": 3.278851620238772e-05, - "loss": 5.2381, - "step": 6555 - }, - { - "epoch": 0.36263128800442235, - "grad_norm": 3.2539477348327637, - "learning_rate": 3.277430358158045e-05, - "loss": 5.1091, - "step": 6560 - }, - { - "epoch": 0.3629076838032062, - "grad_norm": 2.8935561180114746, - "learning_rate": 3.2760090960773166e-05, - "loss": 5.3882, - "step": 6565 - }, - { - "epoch": 0.36318407960199006, - "grad_norm": 3.101651430130005, - "learning_rate": 3.274587833996589e-05, - "loss": 5.5792, - "step": 6570 - }, - { - "epoch": 0.3634604754007739, - "grad_norm": 2.6292476654052734, - "learning_rate": 3.2731665719158614e-05, - "loss": 5.1631, - "step": 6575 - }, - { - "epoch": 0.3637368711995578, - "grad_norm": 3.1118204593658447, - "learning_rate": 3.271745309835134e-05, - "loss": 5.7077, - "step": 6580 - }, - { - "epoch": 0.36401326699834163, - "grad_norm": 3.469219207763672, - "learning_rate": 3.270324047754406e-05, - "loss": 5.4348, - "step": 6585 - }, - { - "epoch": 0.3642896627971255, - "grad_norm": 3.5681636333465576, - "learning_rate": 3.268902785673678e-05, - "loss": 5.1626, - "step": 6590 - }, - { - "epoch": 0.36456605859590935, - "grad_norm": 3.1905977725982666, - "learning_rate": 3.267481523592951e-05, - "loss": 5.0887, - "step": 6595 - }, - { - "epoch": 0.3648424543946932, - "grad_norm": 3.0895135402679443, - "learning_rate": 3.2660602615122234e-05, - "loss": 5.3645, - "step": 6600 - }, - { - "epoch": 0.36511885019347706, - "grad_norm": 2.88983154296875, - "learning_rate": 3.264638999431495e-05, - "loss": 5.0051, - "step": 6605 - }, - { - "epoch": 0.3653952459922609, - "grad_norm": 2.7491235733032227, - "learning_rate": 3.2632177373507675e-05, - "loss": 5.4856, - "step": 6610 - }, - { - "epoch": 0.3656716417910448, - "grad_norm": 2.5615084171295166, - "learning_rate": 3.26179647527004e-05, - "loss": 5.5509, - "step": 6615 - }, - { - "epoch": 0.36594803758982863, - "grad_norm": 3.6004726886749268, - "learning_rate": 3.260375213189312e-05, - "loss": 4.955, - "step": 6620 - }, - { - "epoch": 0.3662244333886125, - "grad_norm": 3.0466911792755127, - "learning_rate": 3.258953951108585e-05, - "loss": 5.3448, - "step": 6625 - }, - { - "epoch": 0.36650082918739635, - "grad_norm": 3.489597797393799, - "learning_rate": 3.257532689027857e-05, - "loss": 5.239, - "step": 6630 - }, - { - "epoch": 0.3667772249861802, - "grad_norm": 3.6642837524414062, - "learning_rate": 3.2561114269471295e-05, - "loss": 5.7073, - "step": 6635 - }, - { - "epoch": 0.36705362078496406, - "grad_norm": 3.3257694244384766, - "learning_rate": 3.254690164866401e-05, - "loss": 5.5338, - "step": 6640 - }, - { - "epoch": 0.3673300165837479, - "grad_norm": 2.889758825302124, - "learning_rate": 3.2532689027856736e-05, - "loss": 5.4763, - "step": 6645 - }, - { - "epoch": 0.3676064123825318, - "grad_norm": 3.0709822177886963, - "learning_rate": 3.251847640704947e-05, - "loss": 5.4343, - "step": 6650 - }, - { - "epoch": 0.36788280818131563, - "grad_norm": 3.2317843437194824, - "learning_rate": 3.2504263786242184e-05, - "loss": 5.48, - "step": 6655 - }, - { - "epoch": 0.3681592039800995, - "grad_norm": 2.843003034591675, - "learning_rate": 3.249005116543491e-05, - "loss": 5.3191, - "step": 6660 - }, - { - "epoch": 0.36843559977888335, - "grad_norm": 2.82918119430542, - "learning_rate": 3.247583854462763e-05, - "loss": 5.2936, - "step": 6665 - }, - { - "epoch": 0.3687119955776672, - "grad_norm": 3.3583202362060547, - "learning_rate": 3.2461625923820356e-05, - "loss": 5.5645, - "step": 6670 - }, - { - "epoch": 0.36898839137645106, - "grad_norm": 3.859200954437256, - "learning_rate": 3.244741330301307e-05, - "loss": 5.3669, - "step": 6675 - }, - { - "epoch": 0.3692647871752349, - "grad_norm": 2.8152244091033936, - "learning_rate": 3.24332006822058e-05, - "loss": 5.3654, - "step": 6680 - }, - { - "epoch": 0.3695411829740188, - "grad_norm": 2.6449549198150635, - "learning_rate": 3.241898806139853e-05, - "loss": 5.3025, - "step": 6685 - }, - { - "epoch": 0.36981757877280264, - "grad_norm": 2.791011095046997, - "learning_rate": 3.2404775440591245e-05, - "loss": 5.2969, - "step": 6690 - }, - { - "epoch": 0.3700939745715865, - "grad_norm": 3.3841540813446045, - "learning_rate": 3.239056281978397e-05, - "loss": 5.2648, - "step": 6695 - }, - { - "epoch": 0.37037037037037035, - "grad_norm": 2.6592512130737305, - "learning_rate": 3.237635019897669e-05, - "loss": 5.0054, - "step": 6700 - }, - { - "epoch": 0.3706467661691542, - "grad_norm": 3.7593088150024414, - "learning_rate": 3.236213757816942e-05, - "loss": 5.3497, - "step": 6705 - }, - { - "epoch": 0.37092316196793806, - "grad_norm": 3.4678800106048584, - "learning_rate": 3.234792495736214e-05, - "loss": 5.5969, - "step": 6710 - }, - { - "epoch": 0.3711995577667219, - "grad_norm": 2.172253370285034, - "learning_rate": 3.233371233655486e-05, - "loss": 5.2902, - "step": 6715 - }, - { - "epoch": 0.3714759535655058, - "grad_norm": 3.2627487182617188, - "learning_rate": 3.231949971574759e-05, - "loss": 5.5915, - "step": 6720 - }, - { - "epoch": 0.37175234936428964, - "grad_norm": 3.7664973735809326, - "learning_rate": 3.2305287094940306e-05, - "loss": 5.2726, - "step": 6725 - }, - { - "epoch": 0.37202874516307355, - "grad_norm": 4.006364345550537, - "learning_rate": 3.229107447413303e-05, - "loss": 5.1299, - "step": 6730 - }, - { - "epoch": 0.3723051409618574, - "grad_norm": 3.697758436203003, - "learning_rate": 3.2276861853325754e-05, - "loss": 5.4359, - "step": 6735 - }, - { - "epoch": 0.37258153676064126, - "grad_norm": 3.6661410331726074, - "learning_rate": 3.226264923251848e-05, - "loss": 5.3954, - "step": 6740 - }, - { - "epoch": 0.3728579325594251, - "grad_norm": 2.565558910369873, - "learning_rate": 3.22484366117112e-05, - "loss": 5.2579, - "step": 6745 - }, - { - "epoch": 0.373134328358209, - "grad_norm": 4.1350483894348145, - "learning_rate": 3.223422399090392e-05, - "loss": 4.9195, - "step": 6750 - }, - { - "epoch": 0.37341072415699283, - "grad_norm": 2.775804042816162, - "learning_rate": 3.222001137009665e-05, - "loss": 5.2634, - "step": 6755 - }, - { - "epoch": 0.3736871199557767, - "grad_norm": 3.0823562145233154, - "learning_rate": 3.2205798749289374e-05, - "loss": 5.2957, - "step": 6760 - }, - { - "epoch": 0.37396351575456055, - "grad_norm": 2.7218778133392334, - "learning_rate": 3.219158612848209e-05, - "loss": 5.2254, - "step": 6765 - }, - { - "epoch": 0.3742399115533444, - "grad_norm": 2.774827718734741, - "learning_rate": 3.2177373507674815e-05, - "loss": 5.5636, - "step": 6770 - }, - { - "epoch": 0.37451630735212826, - "grad_norm": 3.3417789936065674, - "learning_rate": 3.216316088686754e-05, - "loss": 5.0876, - "step": 6775 - }, - { - "epoch": 0.3747927031509121, - "grad_norm": 2.745445489883423, - "learning_rate": 3.214894826606026e-05, - "loss": 5.5064, - "step": 6780 - }, - { - "epoch": 0.375069098949696, - "grad_norm": 3.0469281673431396, - "learning_rate": 3.213473564525298e-05, - "loss": 5.238, - "step": 6785 - }, - { - "epoch": 0.37534549474847984, - "grad_norm": 3.1263833045959473, - "learning_rate": 3.212052302444571e-05, - "loss": 5.1806, - "step": 6790 - }, - { - "epoch": 0.3756218905472637, - "grad_norm": 3.453270435333252, - "learning_rate": 3.2106310403638435e-05, - "loss": 5.2468, - "step": 6795 - }, - { - "epoch": 0.37589828634604755, - "grad_norm": 4.632005214691162, - "learning_rate": 3.209209778283115e-05, - "loss": 5.4471, - "step": 6800 - }, - { - "epoch": 0.3761746821448314, - "grad_norm": 3.877882242202759, - "learning_rate": 3.2077885162023876e-05, - "loss": 5.5113, - "step": 6805 - }, - { - "epoch": 0.37645107794361526, - "grad_norm": 3.9937760829925537, - "learning_rate": 3.20636725412166e-05, - "loss": 5.4494, - "step": 6810 - }, - { - "epoch": 0.3767274737423991, - "grad_norm": 2.7250454425811768, - "learning_rate": 3.2049459920409324e-05, - "loss": 5.1192, - "step": 6815 - }, - { - "epoch": 0.377003869541183, - "grad_norm": 2.984262466430664, - "learning_rate": 3.203524729960205e-05, - "loss": 5.1072, - "step": 6820 - }, - { - "epoch": 0.37728026533996684, - "grad_norm": 3.4100584983825684, - "learning_rate": 3.202103467879477e-05, - "loss": 5.2542, - "step": 6825 - }, - { - "epoch": 0.3775566611387507, - "grad_norm": 3.297354221343994, - "learning_rate": 3.2006822057987496e-05, - "loss": 5.2157, - "step": 6830 - }, - { - "epoch": 0.37783305693753455, - "grad_norm": 3.0777430534362793, - "learning_rate": 3.1992609437180213e-05, - "loss": 5.516, - "step": 6835 - }, - { - "epoch": 0.3781094527363184, - "grad_norm": 3.4484028816223145, - "learning_rate": 3.1978396816372944e-05, - "loss": 5.5581, - "step": 6840 - }, - { - "epoch": 0.37838584853510226, - "grad_norm": 3.6601834297180176, - "learning_rate": 3.196418419556567e-05, - "loss": 5.2055, - "step": 6845 - }, - { - "epoch": 0.3786622443338861, - "grad_norm": 3.5895004272460938, - "learning_rate": 3.1949971574758385e-05, - "loss": 5.1609, - "step": 6850 - }, - { - "epoch": 0.37893864013267, - "grad_norm": 2.961439371109009, - "learning_rate": 3.193575895395111e-05, - "loss": 5.4183, - "step": 6855 - }, - { - "epoch": 0.37921503593145384, - "grad_norm": 4.192780494689941, - "learning_rate": 3.192154633314383e-05, - "loss": 5.4031, - "step": 6860 - }, - { - "epoch": 0.3794914317302377, - "grad_norm": 3.4532811641693115, - "learning_rate": 3.190733371233656e-05, - "loss": 5.3026, - "step": 6865 - }, - { - "epoch": 0.37976782752902155, - "grad_norm": 3.2710492610931396, - "learning_rate": 3.189312109152928e-05, - "loss": 5.061, - "step": 6870 - }, - { - "epoch": 0.3800442233278054, - "grad_norm": 3.0584661960601807, - "learning_rate": 3.1878908470722005e-05, - "loss": 5.2326, - "step": 6875 - }, - { - "epoch": 0.38032061912658927, - "grad_norm": 2.271395206451416, - "learning_rate": 3.186469584991473e-05, - "loss": 4.9213, - "step": 6880 - }, - { - "epoch": 0.3805970149253731, - "grad_norm": 3.3883402347564697, - "learning_rate": 3.1850483229107446e-05, - "loss": 5.1073, - "step": 6885 - }, - { - "epoch": 0.380873410724157, - "grad_norm": 3.4375452995300293, - "learning_rate": 3.183627060830017e-05, - "loss": 5.5934, - "step": 6890 - }, - { - "epoch": 0.38114980652294084, - "grad_norm": 2.793851137161255, - "learning_rate": 3.1822057987492894e-05, - "loss": 5.0503, - "step": 6895 - }, - { - "epoch": 0.3814262023217247, - "grad_norm": 3.7272047996520996, - "learning_rate": 3.180784536668562e-05, - "loss": 5.3093, - "step": 6900 - }, - { - "epoch": 0.38170259812050855, - "grad_norm": 3.7055723667144775, - "learning_rate": 3.179363274587834e-05, - "loss": 5.3679, - "step": 6905 - }, - { - "epoch": 0.3819789939192924, - "grad_norm": 5.639072418212891, - "learning_rate": 3.1779420125071066e-05, - "loss": 5.4524, - "step": 6910 - }, - { - "epoch": 0.38225538971807627, - "grad_norm": 2.6650073528289795, - "learning_rate": 3.176520750426379e-05, - "loss": 5.022, - "step": 6915 - }, - { - "epoch": 0.3825317855168601, - "grad_norm": 3.388301372528076, - "learning_rate": 3.175099488345651e-05, - "loss": 5.1516, - "step": 6920 - }, - { - "epoch": 0.382808181315644, - "grad_norm": 4.096010208129883, - "learning_rate": 3.173678226264923e-05, - "loss": 5.127, - "step": 6925 - }, - { - "epoch": 0.38308457711442784, - "grad_norm": 3.0948524475097656, - "learning_rate": 3.172256964184196e-05, - "loss": 5.192, - "step": 6930 - }, - { - "epoch": 0.3833609729132117, - "grad_norm": 3.4886627197265625, - "learning_rate": 3.170835702103468e-05, - "loss": 5.19, - "step": 6935 - }, - { - "epoch": 0.38363736871199555, - "grad_norm": 3.2240042686462402, - "learning_rate": 3.1694144400227403e-05, - "loss": 5.6154, - "step": 6940 - }, - { - "epoch": 0.3839137645107794, - "grad_norm": 2.617563247680664, - "learning_rate": 3.167993177942013e-05, - "loss": 5.3793, - "step": 6945 - }, - { - "epoch": 0.3841901603095633, - "grad_norm": 3.2528016567230225, - "learning_rate": 3.166571915861285e-05, - "loss": 5.3784, - "step": 6950 - }, - { - "epoch": 0.3844665561083472, - "grad_norm": 3.294482707977295, - "learning_rate": 3.1651506537805575e-05, - "loss": 5.2618, - "step": 6955 - }, - { - "epoch": 0.38474295190713104, - "grad_norm": 2.874786615371704, - "learning_rate": 3.163729391699829e-05, - "loss": 5.4889, - "step": 6960 - }, - { - "epoch": 0.3850193477059149, - "grad_norm": 2.737448215484619, - "learning_rate": 3.162308129619102e-05, - "loss": 5.2753, - "step": 6965 - }, - { - "epoch": 0.38529574350469875, - "grad_norm": 3.391822338104248, - "learning_rate": 3.160886867538374e-05, - "loss": 5.3451, - "step": 6970 - }, - { - "epoch": 0.3855721393034826, - "grad_norm": 3.012768268585205, - "learning_rate": 3.1594656054576464e-05, - "loss": 5.1552, - "step": 6975 - }, - { - "epoch": 0.38584853510226647, - "grad_norm": 2.8616573810577393, - "learning_rate": 3.158044343376919e-05, - "loss": 5.3288, - "step": 6980 - }, - { - "epoch": 0.3861249309010503, - "grad_norm": 2.7854886054992676, - "learning_rate": 3.156623081296191e-05, - "loss": 5.2121, - "step": 6985 - }, - { - "epoch": 0.3864013266998342, - "grad_norm": 2.713104009628296, - "learning_rate": 3.1552018192154636e-05, - "loss": 5.1097, - "step": 6990 - }, - { - "epoch": 0.38667772249861804, - "grad_norm": 3.1725711822509766, - "learning_rate": 3.1537805571347354e-05, - "loss": 5.2764, - "step": 6995 - }, - { - "epoch": 0.3869541182974019, - "grad_norm": 2.950223684310913, - "learning_rate": 3.1523592950540084e-05, - "loss": 5.3748, - "step": 7000 - }, - { - "epoch": 0.38723051409618575, - "grad_norm": 3.557173252105713, - "learning_rate": 3.15093803297328e-05, - "loss": 5.7142, - "step": 7005 - }, - { - "epoch": 0.3875069098949696, - "grad_norm": 3.303514242172241, - "learning_rate": 3.1495167708925526e-05, - "loss": 5.4981, - "step": 7010 - }, - { - "epoch": 0.38778330569375347, - "grad_norm": 3.2154033184051514, - "learning_rate": 3.148095508811825e-05, - "loss": 5.3371, - "step": 7015 - }, - { - "epoch": 0.3880597014925373, - "grad_norm": 3.158167600631714, - "learning_rate": 3.1466742467310974e-05, - "loss": 5.2136, - "step": 7020 - }, - { - "epoch": 0.3883360972913212, - "grad_norm": 2.4815750122070312, - "learning_rate": 3.14525298465037e-05, - "loss": 5.3322, - "step": 7025 - }, - { - "epoch": 0.38861249309010504, - "grad_norm": 2.9701693058013916, - "learning_rate": 3.1438317225696415e-05, - "loss": 5.0189, - "step": 7030 - }, - { - "epoch": 0.3888888888888889, - "grad_norm": 2.927236318588257, - "learning_rate": 3.1424104604889145e-05, - "loss": 5.224, - "step": 7035 - }, - { - "epoch": 0.38916528468767275, - "grad_norm": 3.0851492881774902, - "learning_rate": 3.140989198408187e-05, - "loss": 5.1077, - "step": 7040 - }, - { - "epoch": 0.3894416804864566, - "grad_norm": 3.0460946559906006, - "learning_rate": 3.139567936327459e-05, - "loss": 4.9981, - "step": 7045 - }, - { - "epoch": 0.38971807628524047, - "grad_norm": 3.0376229286193848, - "learning_rate": 3.138146674246731e-05, - "loss": 5.4228, - "step": 7050 - }, - { - "epoch": 0.3899944720840243, - "grad_norm": 3.2579638957977295, - "learning_rate": 3.1367254121660035e-05, - "loss": 5.3469, - "step": 7055 - }, - { - "epoch": 0.3902708678828082, - "grad_norm": 3.601778984069824, - "learning_rate": 3.135304150085276e-05, - "loss": 5.3353, - "step": 7060 - }, - { - "epoch": 0.39054726368159204, - "grad_norm": 3.496253252029419, - "learning_rate": 3.133882888004548e-05, - "loss": 5.2927, - "step": 7065 - }, - { - "epoch": 0.3908236594803759, - "grad_norm": 2.954893112182617, - "learning_rate": 3.1324616259238207e-05, - "loss": 5.1752, - "step": 7070 - }, - { - "epoch": 0.39110005527915975, - "grad_norm": 3.126713752746582, - "learning_rate": 3.131040363843093e-05, - "loss": 5.1443, - "step": 7075 - }, - { - "epoch": 0.3913764510779436, - "grad_norm": 3.489055633544922, - "learning_rate": 3.129619101762365e-05, - "loss": 5.1166, - "step": 7080 - }, - { - "epoch": 0.39165284687672747, - "grad_norm": 2.989750623703003, - "learning_rate": 3.128197839681637e-05, - "loss": 5.2128, - "step": 7085 - }, - { - "epoch": 0.3919292426755113, - "grad_norm": 3.5772876739501953, - "learning_rate": 3.12677657760091e-05, - "loss": 5.371, - "step": 7090 - }, - { - "epoch": 0.3922056384742952, - "grad_norm": 3.1399166584014893, - "learning_rate": 3.125355315520182e-05, - "loss": 5.0302, - "step": 7095 - }, - { - "epoch": 0.39248203427307904, - "grad_norm": 3.717059373855591, - "learning_rate": 3.1239340534394544e-05, - "loss": 5.1814, - "step": 7100 - }, - { - "epoch": 0.3927584300718629, - "grad_norm": 3.603480815887451, - "learning_rate": 3.122512791358727e-05, - "loss": 5.2066, - "step": 7105 - }, - { - "epoch": 0.39303482587064675, - "grad_norm": 2.86629319190979, - "learning_rate": 3.121091529277999e-05, - "loss": 5.0585, - "step": 7110 - }, - { - "epoch": 0.3933112216694306, - "grad_norm": 3.81514573097229, - "learning_rate": 3.119670267197271e-05, - "loss": 5.4419, - "step": 7115 - }, - { - "epoch": 0.39358761746821447, - "grad_norm": 3.7256438732147217, - "learning_rate": 3.118249005116543e-05, - "loss": 5.4916, - "step": 7120 - }, - { - "epoch": 0.3938640132669983, - "grad_norm": 2.9998602867126465, - "learning_rate": 3.1168277430358164e-05, - "loss": 5.3949, - "step": 7125 - }, - { - "epoch": 0.3941404090657822, - "grad_norm": 3.518113613128662, - "learning_rate": 3.115406480955088e-05, - "loss": 5.4088, - "step": 7130 - }, - { - "epoch": 0.39441680486456604, - "grad_norm": 2.6121208667755127, - "learning_rate": 3.1139852188743605e-05, - "loss": 5.4199, - "step": 7135 - }, - { - "epoch": 0.3946932006633499, - "grad_norm": 3.279484510421753, - "learning_rate": 3.112563956793633e-05, - "loss": 5.1075, - "step": 7140 - }, - { - "epoch": 0.39496959646213375, - "grad_norm": 3.5185775756835938, - "learning_rate": 3.111142694712905e-05, - "loss": 5.2746, - "step": 7145 - }, - { - "epoch": 0.3952459922609176, - "grad_norm": 3.7516870498657227, - "learning_rate": 3.109721432632178e-05, - "loss": 5.1902, - "step": 7150 - }, - { - "epoch": 0.39552238805970147, - "grad_norm": 3.0849173069000244, - "learning_rate": 3.10830017055145e-05, - "loss": 5.1215, - "step": 7155 - }, - { - "epoch": 0.3957987838584853, - "grad_norm": 2.995088577270508, - "learning_rate": 3.1068789084707225e-05, - "loss": 5.2936, - "step": 7160 - }, - { - "epoch": 0.3960751796572692, - "grad_norm": 3.2057669162750244, - "learning_rate": 3.105457646389994e-05, - "loss": 5.5044, - "step": 7165 - }, - { - "epoch": 0.3963515754560531, - "grad_norm": 4.289910793304443, - "learning_rate": 3.1040363843092666e-05, - "loss": 5.522, - "step": 7170 - }, - { - "epoch": 0.39662797125483695, - "grad_norm": 2.816082000732422, - "learning_rate": 3.1026151222285397e-05, - "loss": 5.1474, - "step": 7175 - }, - { - "epoch": 0.3969043670536208, - "grad_norm": 3.6746461391448975, - "learning_rate": 3.1011938601478114e-05, - "loss": 5.5855, - "step": 7180 - }, - { - "epoch": 0.39718076285240467, - "grad_norm": 3.0824356079101562, - "learning_rate": 3.099772598067084e-05, - "loss": 5.1413, - "step": 7185 - }, - { - "epoch": 0.3974571586511885, - "grad_norm": 3.6951475143432617, - "learning_rate": 3.098351335986356e-05, - "loss": 5.3582, - "step": 7190 - }, - { - "epoch": 0.3977335544499724, - "grad_norm": 3.4342403411865234, - "learning_rate": 3.0969300739056286e-05, - "loss": 5.4512, - "step": 7195 - }, - { - "epoch": 0.39800995024875624, - "grad_norm": 2.806812047958374, - "learning_rate": 3.095508811824901e-05, - "loss": 4.7637, - "step": 7200 - }, - { - "epoch": 0.3982863460475401, - "grad_norm": 3.490265130996704, - "learning_rate": 3.094087549744173e-05, - "loss": 5.6499, - "step": 7205 - }, - { - "epoch": 0.39856274184632395, - "grad_norm": 2.978156566619873, - "learning_rate": 3.092666287663446e-05, - "loss": 5.4028, - "step": 7210 - }, - { - "epoch": 0.3988391376451078, - "grad_norm": 2.9881980419158936, - "learning_rate": 3.0912450255827175e-05, - "loss": 5.4023, - "step": 7215 - }, - { - "epoch": 0.39911553344389167, - "grad_norm": 3.111189365386963, - "learning_rate": 3.08982376350199e-05, - "loss": 5.2614, - "step": 7220 - }, - { - "epoch": 0.3993919292426755, - "grad_norm": 2.9220035076141357, - "learning_rate": 3.088402501421262e-05, - "loss": 5.1271, - "step": 7225 - }, - { - "epoch": 0.3996683250414594, - "grad_norm": 3.135206460952759, - "learning_rate": 3.086981239340535e-05, - "loss": 5.0486, - "step": 7230 - }, - { - "epoch": 0.39994472084024324, - "grad_norm": 3.383234739303589, - "learning_rate": 3.085559977259807e-05, - "loss": 4.99, - "step": 7235 - }, - { - "epoch": 0.4002211166390271, - "grad_norm": 3.352858066558838, - "learning_rate": 3.084138715179079e-05, - "loss": 5.206, - "step": 7240 - }, - { - "epoch": 0.40049751243781095, - "grad_norm": 2.6485846042633057, - "learning_rate": 3.082717453098352e-05, - "loss": 5.3789, - "step": 7245 - }, - { - "epoch": 0.4007739082365948, - "grad_norm": 2.586437702178955, - "learning_rate": 3.0812961910176236e-05, - "loss": 5.2531, - "step": 7250 - }, - { - "epoch": 0.40105030403537867, - "grad_norm": 3.3769021034240723, - "learning_rate": 3.079874928936896e-05, - "loss": 5.3068, - "step": 7255 - }, - { - "epoch": 0.4013266998341625, - "grad_norm": 2.9882419109344482, - "learning_rate": 3.0784536668561684e-05, - "loss": 5.1777, - "step": 7260 - }, - { - "epoch": 0.4016030956329464, - "grad_norm": 3.543286085128784, - "learning_rate": 3.077032404775441e-05, - "loss": 5.4205, - "step": 7265 - }, - { - "epoch": 0.40187949143173024, - "grad_norm": 3.5167641639709473, - "learning_rate": 3.075611142694713e-05, - "loss": 5.2147, - "step": 7270 - }, - { - "epoch": 0.4021558872305141, - "grad_norm": 4.086061477661133, - "learning_rate": 3.074189880613985e-05, - "loss": 5.3399, - "step": 7275 - }, - { - "epoch": 0.40243228302929795, - "grad_norm": 2.9162538051605225, - "learning_rate": 3.072768618533258e-05, - "loss": 5.2173, - "step": 7280 - }, - { - "epoch": 0.4027086788280818, - "grad_norm": 3.2344117164611816, - "learning_rate": 3.0713473564525304e-05, - "loss": 5.446, - "step": 7285 - }, - { - "epoch": 0.40298507462686567, - "grad_norm": 2.547792673110962, - "learning_rate": 3.069926094371802e-05, - "loss": 5.2879, - "step": 7290 - }, - { - "epoch": 0.4032614704256495, - "grad_norm": 2.705897092819214, - "learning_rate": 3.0685048322910745e-05, - "loss": 5.243, - "step": 7295 - }, - { - "epoch": 0.4035378662244334, - "grad_norm": 3.052686929702759, - "learning_rate": 3.067083570210347e-05, - "loss": 5.5547, - "step": 7300 - }, - { - "epoch": 0.40381426202321724, - "grad_norm": 2.7619709968566895, - "learning_rate": 3.065662308129619e-05, - "loss": 5.1307, - "step": 7305 - }, - { - "epoch": 0.4040906578220011, - "grad_norm": 2.8688414096832275, - "learning_rate": 3.064241046048892e-05, - "loss": 5.1096, - "step": 7310 - }, - { - "epoch": 0.40436705362078496, - "grad_norm": 3.4232749938964844, - "learning_rate": 3.062819783968164e-05, - "loss": 5.318, - "step": 7315 - }, - { - "epoch": 0.4046434494195688, - "grad_norm": 4.028477191925049, - "learning_rate": 3.0613985218874365e-05, - "loss": 5.4557, - "step": 7320 - }, - { - "epoch": 0.40491984521835267, - "grad_norm": 3.430436134338379, - "learning_rate": 3.059977259806708e-05, - "loss": 5.3102, - "step": 7325 - }, - { - "epoch": 0.4051962410171365, - "grad_norm": 2.725626230239868, - "learning_rate": 3.0585559977259806e-05, - "loss": 5.3974, - "step": 7330 - }, - { - "epoch": 0.4054726368159204, - "grad_norm": 3.0082714557647705, - "learning_rate": 3.057134735645253e-05, - "loss": 5.2993, - "step": 7335 - }, - { - "epoch": 0.40574903261470424, - "grad_norm": 3.552365303039551, - "learning_rate": 3.0557134735645254e-05, - "loss": 5.2335, - "step": 7340 - }, - { - "epoch": 0.4060254284134881, - "grad_norm": 3.0008647441864014, - "learning_rate": 3.054292211483798e-05, - "loss": 5.179, - "step": 7345 - }, - { - "epoch": 0.40630182421227196, - "grad_norm": 3.4368932247161865, - "learning_rate": 3.05287094940307e-05, - "loss": 5.1877, - "step": 7350 - }, - { - "epoch": 0.4065782200110558, - "grad_norm": 2.775163173675537, - "learning_rate": 3.0514496873223426e-05, - "loss": 5.3432, - "step": 7355 - }, - { - "epoch": 0.40685461580983967, - "grad_norm": 3.331177234649658, - "learning_rate": 3.0500284252416143e-05, - "loss": 5.6045, - "step": 7360 - }, - { - "epoch": 0.4071310116086235, - "grad_norm": 3.2628567218780518, - "learning_rate": 3.048607163160887e-05, - "loss": 5.3768, - "step": 7365 - }, - { - "epoch": 0.4074074074074074, - "grad_norm": 3.5824832916259766, - "learning_rate": 3.0471859010801594e-05, - "loss": 5.5101, - "step": 7370 - }, - { - "epoch": 0.40768380320619124, - "grad_norm": 2.661316394805908, - "learning_rate": 3.0457646389994315e-05, - "loss": 5.4241, - "step": 7375 - }, - { - "epoch": 0.4079601990049751, - "grad_norm": 3.2753899097442627, - "learning_rate": 3.0443433769187042e-05, - "loss": 5.1645, - "step": 7380 - }, - { - "epoch": 0.40823659480375896, - "grad_norm": 3.1966748237609863, - "learning_rate": 3.042922114837976e-05, - "loss": 5.3537, - "step": 7385 - }, - { - "epoch": 0.4085129906025428, - "grad_norm": 3.2661285400390625, - "learning_rate": 3.0415008527572487e-05, - "loss": 5.3202, - "step": 7390 - }, - { - "epoch": 0.4087893864013267, - "grad_norm": 2.994544506072998, - "learning_rate": 3.040079590676521e-05, - "loss": 5.3407, - "step": 7395 - }, - { - "epoch": 0.4090657822001106, - "grad_norm": 3.363358974456787, - "learning_rate": 3.038658328595793e-05, - "loss": 5.3274, - "step": 7400 - }, - { - "epoch": 0.40934217799889444, - "grad_norm": 3.417901039123535, - "learning_rate": 3.0372370665150656e-05, - "loss": 5.2245, - "step": 7405 - }, - { - "epoch": 0.4096185737976783, - "grad_norm": 3.499995470046997, - "learning_rate": 3.0358158044343376e-05, - "loss": 5.043, - "step": 7410 - }, - { - "epoch": 0.40989496959646216, - "grad_norm": 3.142343044281006, - "learning_rate": 3.0343945423536104e-05, - "loss": 5.2181, - "step": 7415 - }, - { - "epoch": 0.410171365395246, - "grad_norm": 2.7809529304504395, - "learning_rate": 3.032973280272882e-05, - "loss": 5.3378, - "step": 7420 - }, - { - "epoch": 0.41044776119402987, - "grad_norm": 3.567267656326294, - "learning_rate": 3.0315520181921548e-05, - "loss": 5.3781, - "step": 7425 - }, - { - "epoch": 0.4107241569928137, - "grad_norm": 4.007451057434082, - "learning_rate": 3.0301307561114272e-05, - "loss": 5.1938, - "step": 7430 - }, - { - "epoch": 0.4110005527915976, - "grad_norm": 3.4437944889068604, - "learning_rate": 3.0287094940306993e-05, - "loss": 5.0837, - "step": 7435 - }, - { - "epoch": 0.41127694859038144, - "grad_norm": 3.051156997680664, - "learning_rate": 3.0272882319499717e-05, - "loss": 5.3, - "step": 7440 - }, - { - "epoch": 0.4115533443891653, - "grad_norm": 3.0758109092712402, - "learning_rate": 3.0258669698692437e-05, - "loss": 5.0707, - "step": 7445 - }, - { - "epoch": 0.41182974018794916, - "grad_norm": 3.035916328430176, - "learning_rate": 3.0244457077885165e-05, - "loss": 5.1578, - "step": 7450 - }, - { - "epoch": 0.412106135986733, - "grad_norm": 3.541250228881836, - "learning_rate": 3.023024445707789e-05, - "loss": 4.8507, - "step": 7455 - }, - { - "epoch": 0.41238253178551687, - "grad_norm": 4.51204252243042, - "learning_rate": 3.021603183627061e-05, - "loss": 5.1498, - "step": 7460 - }, - { - "epoch": 0.4126589275843007, - "grad_norm": 2.9354135990142822, - "learning_rate": 3.0201819215463333e-05, - "loss": 5.0576, - "step": 7465 - }, - { - "epoch": 0.4129353233830846, - "grad_norm": 2.9570817947387695, - "learning_rate": 3.0187606594656054e-05, - "loss": 5.3592, - "step": 7470 - }, - { - "epoch": 0.41321171918186844, - "grad_norm": 2.823472499847412, - "learning_rate": 3.0173393973848778e-05, - "loss": 4.9364, - "step": 7475 - }, - { - "epoch": 0.4134881149806523, - "grad_norm": 2.96844744682312, - "learning_rate": 3.0159181353041505e-05, - "loss": 5.3399, - "step": 7480 - }, - { - "epoch": 0.41376451077943616, - "grad_norm": 3.269542694091797, - "learning_rate": 3.0144968732234226e-05, - "loss": 5.2783, - "step": 7485 - }, - { - "epoch": 0.41404090657822, - "grad_norm": 2.990311622619629, - "learning_rate": 3.013075611142695e-05, - "loss": 5.4059, - "step": 7490 - }, - { - "epoch": 0.41431730237700387, - "grad_norm": 2.818763494491577, - "learning_rate": 3.011654349061967e-05, - "loss": 5.3341, - "step": 7495 - }, - { - "epoch": 0.41459369817578773, - "grad_norm": 4.055943489074707, - "learning_rate": 3.0102330869812394e-05, - "loss": 5.2108, - "step": 7500 - }, - { - "epoch": 0.4148700939745716, - "grad_norm": 3.448615789413452, - "learning_rate": 3.008811824900512e-05, - "loss": 5.0803, - "step": 7505 - }, - { - "epoch": 0.41514648977335544, - "grad_norm": 2.7099623680114746, - "learning_rate": 3.007390562819784e-05, - "loss": 5.1597, - "step": 7510 - }, - { - "epoch": 0.4154228855721393, - "grad_norm": 2.860887050628662, - "learning_rate": 3.0059693007390566e-05, - "loss": 5.0554, - "step": 7515 - }, - { - "epoch": 0.41569928137092316, - "grad_norm": 3.508277177810669, - "learning_rate": 3.0045480386583287e-05, - "loss": 5.2466, - "step": 7520 - }, - { - "epoch": 0.415975677169707, - "grad_norm": 3.1834499835968018, - "learning_rate": 3.003126776577601e-05, - "loss": 5.4212, - "step": 7525 - }, - { - "epoch": 0.41625207296849087, - "grad_norm": 3.1005613803863525, - "learning_rate": 3.001705514496873e-05, - "loss": 5.4608, - "step": 7530 - }, - { - "epoch": 0.41652846876727473, - "grad_norm": 3.1550991535186768, - "learning_rate": 3.0002842524161455e-05, - "loss": 5.2565, - "step": 7535 - }, - { - "epoch": 0.4168048645660586, - "grad_norm": 3.293140172958374, - "learning_rate": 2.9988629903354183e-05, - "loss": 5.5235, - "step": 7540 - }, - { - "epoch": 0.41708126036484244, - "grad_norm": 3.3371574878692627, - "learning_rate": 2.9974417282546903e-05, - "loss": 5.3766, - "step": 7545 - }, - { - "epoch": 0.4173576561636263, - "grad_norm": 3.0962579250335693, - "learning_rate": 2.9960204661739627e-05, - "loss": 5.1429, - "step": 7550 - }, - { - "epoch": 0.41763405196241016, - "grad_norm": 2.632319927215576, - "learning_rate": 2.9945992040932348e-05, - "loss": 5.2072, - "step": 7555 - }, - { - "epoch": 0.417910447761194, - "grad_norm": 3.5730085372924805, - "learning_rate": 2.9931779420125072e-05, - "loss": 5.3899, - "step": 7560 - }, - { - "epoch": 0.41818684355997787, - "grad_norm": 3.1113953590393066, - "learning_rate": 2.99175667993178e-05, - "loss": 5.2587, - "step": 7565 - }, - { - "epoch": 0.41846323935876173, - "grad_norm": 2.9837839603424072, - "learning_rate": 2.9903354178510516e-05, - "loss": 5.1953, - "step": 7570 - }, - { - "epoch": 0.4187396351575456, - "grad_norm": 3.156015634536743, - "learning_rate": 2.9889141557703244e-05, - "loss": 5.0675, - "step": 7575 - }, - { - "epoch": 0.41901603095632944, - "grad_norm": 2.8620293140411377, - "learning_rate": 2.9874928936895964e-05, - "loss": 5.2232, - "step": 7580 - }, - { - "epoch": 0.4192924267551133, - "grad_norm": 2.7657828330993652, - "learning_rate": 2.9860716316088688e-05, - "loss": 5.0537, - "step": 7585 - }, - { - "epoch": 0.41956882255389716, - "grad_norm": 2.991236448287964, - "learning_rate": 2.9846503695281412e-05, - "loss": 5.3795, - "step": 7590 - }, - { - "epoch": 0.419845218352681, - "grad_norm": 2.935781717300415, - "learning_rate": 2.9832291074474133e-05, - "loss": 5.0797, - "step": 7595 - }, - { - "epoch": 0.4201216141514649, - "grad_norm": 3.5586657524108887, - "learning_rate": 2.981807845366686e-05, - "loss": 5.3075, - "step": 7600 - }, - { - "epoch": 0.42039800995024873, - "grad_norm": 3.6937458515167236, - "learning_rate": 2.9803865832859577e-05, - "loss": 5.2942, - "step": 7605 - }, - { - "epoch": 0.4206744057490326, - "grad_norm": 3.0351436138153076, - "learning_rate": 2.9789653212052305e-05, - "loss": 5.1602, - "step": 7610 - }, - { - "epoch": 0.4209508015478165, - "grad_norm": 2.95098614692688, - "learning_rate": 2.977544059124503e-05, - "loss": 5.2933, - "step": 7615 - }, - { - "epoch": 0.42122719734660036, - "grad_norm": 2.8165371417999268, - "learning_rate": 2.976122797043775e-05, - "loss": 5.1234, - "step": 7620 - }, - { - "epoch": 0.4215035931453842, - "grad_norm": 3.436764717102051, - "learning_rate": 2.9747015349630473e-05, - "loss": 5.2743, - "step": 7625 - }, - { - "epoch": 0.42177998894416807, - "grad_norm": 3.4942984580993652, - "learning_rate": 2.9732802728823194e-05, - "loss": 5.5733, - "step": 7630 - }, - { - "epoch": 0.42205638474295193, - "grad_norm": 3.76875901222229, - "learning_rate": 2.971859010801592e-05, - "loss": 5.3345, - "step": 7635 - }, - { - "epoch": 0.4223327805417358, - "grad_norm": 3.2860209941864014, - "learning_rate": 2.970437748720864e-05, - "loss": 5.3476, - "step": 7640 - }, - { - "epoch": 0.42260917634051964, - "grad_norm": 3.403754949569702, - "learning_rate": 2.9690164866401366e-05, - "loss": 5.1001, - "step": 7645 - }, - { - "epoch": 0.4228855721393035, - "grad_norm": 3.613391876220703, - "learning_rate": 2.967595224559409e-05, - "loss": 5.0587, - "step": 7650 - }, - { - "epoch": 0.42316196793808736, - "grad_norm": 3.401045083999634, - "learning_rate": 2.966173962478681e-05, - "loss": 4.985, - "step": 7655 - }, - { - "epoch": 0.4234383637368712, - "grad_norm": 3.0673391819000244, - "learning_rate": 2.9647527003979534e-05, - "loss": 5.115, - "step": 7660 - }, - { - "epoch": 0.42371475953565507, - "grad_norm": 3.054534673690796, - "learning_rate": 2.9633314383172255e-05, - "loss": 5.1765, - "step": 7665 - }, - { - "epoch": 0.42399115533443893, - "grad_norm": 4.545713424682617, - "learning_rate": 2.9619101762364982e-05, - "loss": 5.0729, - "step": 7670 - }, - { - "epoch": 0.4242675511332228, - "grad_norm": 3.014993190765381, - "learning_rate": 2.9604889141557706e-05, - "loss": 5.4135, - "step": 7675 - }, - { - "epoch": 0.42454394693200664, - "grad_norm": 2.8630824089050293, - "learning_rate": 2.9590676520750427e-05, - "loss": 5.1434, - "step": 7680 - }, - { - "epoch": 0.4248203427307905, - "grad_norm": 3.143050193786621, - "learning_rate": 2.957646389994315e-05, - "loss": 5.2194, - "step": 7685 - }, - { - "epoch": 0.42509673852957436, - "grad_norm": 3.1831743717193604, - "learning_rate": 2.956225127913587e-05, - "loss": 5.4145, - "step": 7690 - }, - { - "epoch": 0.4253731343283582, - "grad_norm": 3.281905174255371, - "learning_rate": 2.95480386583286e-05, - "loss": 5.2458, - "step": 7695 - }, - { - "epoch": 0.4256495301271421, - "grad_norm": 3.2459661960601807, - "learning_rate": 2.9533826037521323e-05, - "loss": 5.1643, - "step": 7700 - }, - { - "epoch": 0.42592592592592593, - "grad_norm": 3.014573097229004, - "learning_rate": 2.9519613416714043e-05, - "loss": 5.3188, - "step": 7705 - }, - { - "epoch": 0.4262023217247098, - "grad_norm": 3.489968776702881, - "learning_rate": 2.9505400795906767e-05, - "loss": 5.359, - "step": 7710 - }, - { - "epoch": 0.42647871752349364, - "grad_norm": 2.6818461418151855, - "learning_rate": 2.9491188175099488e-05, - "loss": 5.2443, - "step": 7715 - }, - { - "epoch": 0.4267551133222775, - "grad_norm": 3.4301328659057617, - "learning_rate": 2.9476975554292212e-05, - "loss": 5.3404, - "step": 7720 - }, - { - "epoch": 0.42703150912106136, - "grad_norm": 2.975738525390625, - "learning_rate": 2.946276293348494e-05, - "loss": 5.5118, - "step": 7725 - }, - { - "epoch": 0.4273079049198452, - "grad_norm": 3.66871976852417, - "learning_rate": 2.944855031267766e-05, - "loss": 5.2737, - "step": 7730 - }, - { - "epoch": 0.4275843007186291, - "grad_norm": 2.812068462371826, - "learning_rate": 2.9434337691870384e-05, - "loss": 5.0211, - "step": 7735 - }, - { - "epoch": 0.42786069651741293, - "grad_norm": 3.8144164085388184, - "learning_rate": 2.9420125071063105e-05, - "loss": 5.3493, - "step": 7740 - }, - { - "epoch": 0.4281370923161968, - "grad_norm": 3.173715353012085, - "learning_rate": 2.940591245025583e-05, - "loss": 5.144, - "step": 7745 - }, - { - "epoch": 0.42841348811498065, - "grad_norm": 3.548175573348999, - "learning_rate": 2.939169982944855e-05, - "loss": 5.4388, - "step": 7750 - }, - { - "epoch": 0.4286898839137645, - "grad_norm": 2.4336702823638916, - "learning_rate": 2.9377487208641273e-05, - "loss": 5.1597, - "step": 7755 - }, - { - "epoch": 0.42896627971254836, - "grad_norm": 3.0814414024353027, - "learning_rate": 2.9363274587834e-05, - "loss": 5.2932, - "step": 7760 - }, - { - "epoch": 0.4292426755113322, - "grad_norm": 2.701829671859741, - "learning_rate": 2.934906196702672e-05, - "loss": 5.2772, - "step": 7765 - }, - { - "epoch": 0.4295190713101161, - "grad_norm": 2.623934268951416, - "learning_rate": 2.9334849346219445e-05, - "loss": 4.9666, - "step": 7770 - }, - { - "epoch": 0.42979546710889993, - "grad_norm": 3.162205696105957, - "learning_rate": 2.9320636725412166e-05, - "loss": 5.5104, - "step": 7775 - }, - { - "epoch": 0.4300718629076838, - "grad_norm": 3.1001057624816895, - "learning_rate": 2.930642410460489e-05, - "loss": 5.2675, - "step": 7780 - }, - { - "epoch": 0.43034825870646765, - "grad_norm": 3.4800355434417725, - "learning_rate": 2.9292211483797617e-05, - "loss": 5.246, - "step": 7785 - }, - { - "epoch": 0.4306246545052515, - "grad_norm": 3.1043055057525635, - "learning_rate": 2.9277998862990334e-05, - "loss": 4.9439, - "step": 7790 - }, - { - "epoch": 0.43090105030403536, - "grad_norm": 2.9988179206848145, - "learning_rate": 2.926378624218306e-05, - "loss": 5.1472, - "step": 7795 - }, - { - "epoch": 0.4311774461028192, - "grad_norm": 3.2718756198883057, - "learning_rate": 2.9249573621375782e-05, - "loss": 5.3382, - "step": 7800 - }, - { - "epoch": 0.4314538419016031, - "grad_norm": 2.6709251403808594, - "learning_rate": 2.9235361000568506e-05, - "loss": 5.1973, - "step": 7805 - }, - { - "epoch": 0.43173023770038693, - "grad_norm": 2.908034086227417, - "learning_rate": 2.922114837976123e-05, - "loss": 5.0482, - "step": 7810 - }, - { - "epoch": 0.4320066334991708, - "grad_norm": 2.8936550617218018, - "learning_rate": 2.920693575895395e-05, - "loss": 5.223, - "step": 7815 - }, - { - "epoch": 0.43228302929795465, - "grad_norm": 3.31170654296875, - "learning_rate": 2.9192723138146678e-05, - "loss": 5.218, - "step": 7820 - }, - { - "epoch": 0.4325594250967385, - "grad_norm": 3.3530709743499756, - "learning_rate": 2.9178510517339395e-05, - "loss": 5.1359, - "step": 7825 - }, - { - "epoch": 0.43283582089552236, - "grad_norm": 2.950207233428955, - "learning_rate": 2.9164297896532123e-05, - "loss": 5.538, - "step": 7830 - }, - { - "epoch": 0.4331122166943063, - "grad_norm": 3.7131307125091553, - "learning_rate": 2.9150085275724847e-05, - "loss": 5.2753, - "step": 7835 - }, - { - "epoch": 0.43338861249309013, - "grad_norm": 3.750091075897217, - "learning_rate": 2.9135872654917567e-05, - "loss": 5.0678, - "step": 7840 - }, - { - "epoch": 0.433665008291874, - "grad_norm": 2.7476789951324463, - "learning_rate": 2.9121660034110295e-05, - "loss": 5.3974, - "step": 7845 - }, - { - "epoch": 0.43394140409065785, - "grad_norm": 3.338824510574341, - "learning_rate": 2.9107447413303012e-05, - "loss": 4.9142, - "step": 7850 - }, - { - "epoch": 0.4342177998894417, - "grad_norm": 3.1479055881500244, - "learning_rate": 2.909323479249574e-05, - "loss": 5.0689, - "step": 7855 - }, - { - "epoch": 0.43449419568822556, - "grad_norm": 2.765441656112671, - "learning_rate": 2.907902217168846e-05, - "loss": 5.2719, - "step": 7860 - }, - { - "epoch": 0.4347705914870094, - "grad_norm": 3.1556508541107178, - "learning_rate": 2.9064809550881184e-05, - "loss": 5.391, - "step": 7865 - }, - { - "epoch": 0.4350469872857933, - "grad_norm": 3.2448055744171143, - "learning_rate": 2.9050596930073908e-05, - "loss": 5.3468, - "step": 7870 - }, - { - "epoch": 0.43532338308457713, - "grad_norm": 3.5034596920013428, - "learning_rate": 2.9036384309266628e-05, - "loss": 5.1951, - "step": 7875 - }, - { - "epoch": 0.435599778883361, - "grad_norm": 3.018533945083618, - "learning_rate": 2.9022171688459356e-05, - "loss": 5.1914, - "step": 7880 - }, - { - "epoch": 0.43587617468214485, - "grad_norm": 2.8897950649261475, - "learning_rate": 2.9007959067652073e-05, - "loss": 5.6935, - "step": 7885 - }, - { - "epoch": 0.4361525704809287, - "grad_norm": 2.643296957015991, - "learning_rate": 2.89937464468448e-05, - "loss": 5.1013, - "step": 7890 - }, - { - "epoch": 0.43642896627971256, - "grad_norm": 3.041177988052368, - "learning_rate": 2.8979533826037524e-05, - "loss": 5.3812, - "step": 7895 - }, - { - "epoch": 0.4367053620784964, - "grad_norm": 3.4055657386779785, - "learning_rate": 2.8965321205230245e-05, - "loss": 5.5113, - "step": 7900 - }, - { - "epoch": 0.4369817578772803, - "grad_norm": 4.0776495933532715, - "learning_rate": 2.895110858442297e-05, - "loss": 5.4321, - "step": 7905 - }, - { - "epoch": 0.43725815367606413, - "grad_norm": 3.4030396938323975, - "learning_rate": 2.893689596361569e-05, - "loss": 5.091, - "step": 7910 - }, - { - "epoch": 0.437534549474848, - "grad_norm": 3.5989456176757812, - "learning_rate": 2.8922683342808417e-05, - "loss": 5.48, - "step": 7915 - }, - { - "epoch": 0.43781094527363185, - "grad_norm": 2.3618738651275635, - "learning_rate": 2.890847072200114e-05, - "loss": 5.0508, - "step": 7920 - }, - { - "epoch": 0.4380873410724157, - "grad_norm": 3.1228749752044678, - "learning_rate": 2.889425810119386e-05, - "loss": 5.3904, - "step": 7925 - }, - { - "epoch": 0.43836373687119956, - "grad_norm": 3.0725607872009277, - "learning_rate": 2.8880045480386585e-05, - "loss": 5.4847, - "step": 7930 - }, - { - "epoch": 0.4386401326699834, - "grad_norm": 2.9387130737304688, - "learning_rate": 2.8865832859579306e-05, - "loss": 4.9448, - "step": 7935 - }, - { - "epoch": 0.4389165284687673, - "grad_norm": 3.3646342754364014, - "learning_rate": 2.885162023877203e-05, - "loss": 5.3713, - "step": 7940 - }, - { - "epoch": 0.43919292426755113, - "grad_norm": 3.699636697769165, - "learning_rate": 2.8837407617964757e-05, - "loss": 5.2954, - "step": 7945 - }, - { - "epoch": 0.439469320066335, - "grad_norm": 3.304562568664551, - "learning_rate": 2.8823194997157478e-05, - "loss": 5.1658, - "step": 7950 - }, - { - "epoch": 0.43974571586511885, - "grad_norm": 2.908881902694702, - "learning_rate": 2.8808982376350202e-05, - "loss": 5.0667, - "step": 7955 - }, - { - "epoch": 0.4400221116639027, - "grad_norm": 3.2574126720428467, - "learning_rate": 2.8794769755542922e-05, - "loss": 5.1662, - "step": 7960 - }, - { - "epoch": 0.44029850746268656, - "grad_norm": 3.021467924118042, - "learning_rate": 2.8780557134735646e-05, - "loss": 5.0305, - "step": 7965 - }, - { - "epoch": 0.4405749032614704, - "grad_norm": 2.6084797382354736, - "learning_rate": 2.8766344513928367e-05, - "loss": 5.3558, - "step": 7970 - }, - { - "epoch": 0.4408512990602543, - "grad_norm": 3.0199179649353027, - "learning_rate": 2.875213189312109e-05, - "loss": 5.2878, - "step": 7975 - }, - { - "epoch": 0.44112769485903813, - "grad_norm": 3.2974507808685303, - "learning_rate": 2.8737919272313818e-05, - "loss": 5.3241, - "step": 7980 - }, - { - "epoch": 0.441404090657822, - "grad_norm": 3.1707582473754883, - "learning_rate": 2.872370665150654e-05, - "loss": 4.94, - "step": 7985 - }, - { - "epoch": 0.44168048645660585, - "grad_norm": 2.6704931259155273, - "learning_rate": 2.8709494030699263e-05, - "loss": 5.1259, - "step": 7990 - }, - { - "epoch": 0.4419568822553897, - "grad_norm": 4.149257659912109, - "learning_rate": 2.8695281409891983e-05, - "loss": 5.4461, - "step": 7995 - }, - { - "epoch": 0.44223327805417356, - "grad_norm": 3.305680274963379, - "learning_rate": 2.8681068789084707e-05, - "loss": 5.1553, - "step": 8000 - }, - { - "epoch": 0.4425096738529574, - "grad_norm": 3.288146734237671, - "learning_rate": 2.8666856168277435e-05, - "loss": 5.2207, - "step": 8005 - }, - { - "epoch": 0.4427860696517413, - "grad_norm": 4.230424880981445, - "learning_rate": 2.8652643547470155e-05, - "loss": 5.3263, - "step": 8010 - }, - { - "epoch": 0.44306246545052513, - "grad_norm": 3.5503792762756348, - "learning_rate": 2.863843092666288e-05, - "loss": 5.2507, - "step": 8015 - }, - { - "epoch": 0.443338861249309, - "grad_norm": 2.99174427986145, - "learning_rate": 2.86242183058556e-05, - "loss": 5.2627, - "step": 8020 - }, - { - "epoch": 0.44361525704809285, - "grad_norm": 3.6940457820892334, - "learning_rate": 2.8610005685048324e-05, - "loss": 4.9232, - "step": 8025 - }, - { - "epoch": 0.4438916528468767, - "grad_norm": 3.4294097423553467, - "learning_rate": 2.859579306424105e-05, - "loss": 5.2177, - "step": 8030 - }, - { - "epoch": 0.44416804864566056, - "grad_norm": 3.6956839561462402, - "learning_rate": 2.858158044343377e-05, - "loss": 5.3431, - "step": 8035 - }, - { - "epoch": 0.4444444444444444, - "grad_norm": 3.1307976245880127, - "learning_rate": 2.8567367822626496e-05, - "loss": 5.3812, - "step": 8040 - }, - { - "epoch": 0.4447208402432283, - "grad_norm": 2.7979776859283447, - "learning_rate": 2.8553155201819216e-05, - "loss": 5.6822, - "step": 8045 - }, - { - "epoch": 0.44499723604201213, - "grad_norm": 2.8679885864257812, - "learning_rate": 2.853894258101194e-05, - "loss": 5.278, - "step": 8050 - }, - { - "epoch": 0.44527363184079605, - "grad_norm": 2.877314805984497, - "learning_rate": 2.8524729960204664e-05, - "loss": 5.2747, - "step": 8055 - }, - { - "epoch": 0.4455500276395799, - "grad_norm": 3.238851308822632, - "learning_rate": 2.8510517339397385e-05, - "loss": 5.3333, - "step": 8060 - }, - { - "epoch": 0.44582642343836376, - "grad_norm": 2.7757999897003174, - "learning_rate": 2.8496304718590112e-05, - "loss": 5.0878, - "step": 8065 - }, - { - "epoch": 0.4461028192371476, - "grad_norm": 3.3367576599121094, - "learning_rate": 2.848209209778283e-05, - "loss": 5.2714, - "step": 8070 - }, - { - "epoch": 0.4463792150359315, - "grad_norm": 2.983865976333618, - "learning_rate": 2.8467879476975557e-05, - "loss": 5.4693, - "step": 8075 - }, - { - "epoch": 0.44665561083471533, - "grad_norm": 3.211333751678467, - "learning_rate": 2.8453666856168278e-05, - "loss": 5.1891, - "step": 8080 - }, - { - "epoch": 0.4469320066334992, - "grad_norm": 3.608457565307617, - "learning_rate": 2.8439454235361e-05, - "loss": 4.9361, - "step": 8085 - }, - { - "epoch": 0.44720840243228305, - "grad_norm": 3.1287999153137207, - "learning_rate": 2.8425241614553725e-05, - "loss": 5.5907, - "step": 8090 - }, - { - "epoch": 0.4474847982310669, - "grad_norm": 3.5823168754577637, - "learning_rate": 2.8411028993746446e-05, - "loss": 5.1325, - "step": 8095 - }, - { - "epoch": 0.44776119402985076, - "grad_norm": 4.0038299560546875, - "learning_rate": 2.8396816372939173e-05, - "loss": 5.0001, - "step": 8100 - }, - { - "epoch": 0.4480375898286346, - "grad_norm": 3.3478615283966064, - "learning_rate": 2.838260375213189e-05, - "loss": 5.1217, - "step": 8105 - }, - { - "epoch": 0.4483139856274185, - "grad_norm": 3.304795503616333, - "learning_rate": 2.8368391131324618e-05, - "loss": 5.5211, - "step": 8110 - }, - { - "epoch": 0.44859038142620233, - "grad_norm": 3.8344953060150146, - "learning_rate": 2.8354178510517342e-05, - "loss": 5.2864, - "step": 8115 - }, - { - "epoch": 0.4488667772249862, - "grad_norm": 3.359548807144165, - "learning_rate": 2.8339965889710063e-05, - "loss": 5.5551, - "step": 8120 - }, - { - "epoch": 0.44914317302377005, - "grad_norm": 3.1302855014801025, - "learning_rate": 2.8325753268902787e-05, - "loss": 5.1885, - "step": 8125 - }, - { - "epoch": 0.4494195688225539, - "grad_norm": 3.1038334369659424, - "learning_rate": 2.8311540648095507e-05, - "loss": 5.1696, - "step": 8130 - }, - { - "epoch": 0.44969596462133776, - "grad_norm": 4.253241062164307, - "learning_rate": 2.8297328027288235e-05, - "loss": 5.5296, - "step": 8135 - }, - { - "epoch": 0.4499723604201216, - "grad_norm": 3.2857799530029297, - "learning_rate": 2.828311540648096e-05, - "loss": 5.2782, - "step": 8140 - }, - { - "epoch": 0.4502487562189055, - "grad_norm": 3.255506753921509, - "learning_rate": 2.826890278567368e-05, - "loss": 5.2445, - "step": 8145 - }, - { - "epoch": 0.45052515201768933, - "grad_norm": 4.212364673614502, - "learning_rate": 2.8254690164866403e-05, - "loss": 5.1744, - "step": 8150 - }, - { - "epoch": 0.4508015478164732, - "grad_norm": 3.6351518630981445, - "learning_rate": 2.8240477544059124e-05, - "loss": 5.3138, - "step": 8155 - }, - { - "epoch": 0.45107794361525705, - "grad_norm": 2.9749603271484375, - "learning_rate": 2.822626492325185e-05, - "loss": 5.2675, - "step": 8160 - }, - { - "epoch": 0.4513543394140409, - "grad_norm": 3.6853575706481934, - "learning_rate": 2.8212052302444575e-05, - "loss": 5.2495, - "step": 8165 - }, - { - "epoch": 0.45163073521282476, - "grad_norm": 3.170837640762329, - "learning_rate": 2.8197839681637296e-05, - "loss": 5.2689, - "step": 8170 - }, - { - "epoch": 0.4519071310116086, - "grad_norm": 3.9865872859954834, - "learning_rate": 2.818362706083002e-05, - "loss": 5.0987, - "step": 8175 - }, - { - "epoch": 0.4521835268103925, - "grad_norm": 3.1320040225982666, - "learning_rate": 2.816941444002274e-05, - "loss": 5.2081, - "step": 8180 - }, - { - "epoch": 0.45245992260917633, - "grad_norm": 3.1708438396453857, - "learning_rate": 2.8155201819215464e-05, - "loss": 5.1956, - "step": 8185 - }, - { - "epoch": 0.4527363184079602, - "grad_norm": 2.9454898834228516, - "learning_rate": 2.8140989198408185e-05, - "loss": 5.2115, - "step": 8190 - }, - { - "epoch": 0.45301271420674405, - "grad_norm": 3.4514904022216797, - "learning_rate": 2.8126776577600912e-05, - "loss": 5.1788, - "step": 8195 - }, - { - "epoch": 0.4532891100055279, - "grad_norm": 3.5502679347991943, - "learning_rate": 2.8112563956793636e-05, - "loss": 5.1326, - "step": 8200 - }, - { - "epoch": 0.45356550580431176, - "grad_norm": 3.267252206802368, - "learning_rate": 2.8098351335986357e-05, - "loss": 5.2808, - "step": 8205 - }, - { - "epoch": 0.4538419016030956, - "grad_norm": 3.928968667984009, - "learning_rate": 2.808413871517908e-05, - "loss": 5.262, - "step": 8210 - }, - { - "epoch": 0.4541182974018795, - "grad_norm": 3.202829360961914, - "learning_rate": 2.80699260943718e-05, - "loss": 5.2817, - "step": 8215 - }, - { - "epoch": 0.45439469320066334, - "grad_norm": 3.040146589279175, - "learning_rate": 2.8055713473564525e-05, - "loss": 5.3738, - "step": 8220 - }, - { - "epoch": 0.4546710889994472, - "grad_norm": 2.829195499420166, - "learning_rate": 2.8041500852757253e-05, - "loss": 4.9243, - "step": 8225 - }, - { - "epoch": 0.45494748479823105, - "grad_norm": 2.964588165283203, - "learning_rate": 2.8027288231949973e-05, - "loss": 5.2799, - "step": 8230 - }, - { - "epoch": 0.4552238805970149, - "grad_norm": 3.4491324424743652, - "learning_rate": 2.8013075611142697e-05, - "loss": 5.3802, - "step": 8235 - }, - { - "epoch": 0.45550027639579876, - "grad_norm": 3.446174383163452, - "learning_rate": 2.7998862990335418e-05, - "loss": 5.2512, - "step": 8240 - }, - { - "epoch": 0.4557766721945826, - "grad_norm": 3.0602688789367676, - "learning_rate": 2.7984650369528142e-05, - "loss": 5.281, - "step": 8245 - }, - { - "epoch": 0.4560530679933665, - "grad_norm": 2.8950977325439453, - "learning_rate": 2.797043774872087e-05, - "loss": 4.9311, - "step": 8250 - }, - { - "epoch": 0.45632946379215034, - "grad_norm": 3.147683620452881, - "learning_rate": 2.7956225127913586e-05, - "loss": 5.1642, - "step": 8255 - }, - { - "epoch": 0.4566058595909342, - "grad_norm": 3.45888090133667, - "learning_rate": 2.7942012507106314e-05, - "loss": 5.1064, - "step": 8260 - }, - { - "epoch": 0.45688225538971805, - "grad_norm": 3.435093641281128, - "learning_rate": 2.7927799886299034e-05, - "loss": 5.4801, - "step": 8265 - }, - { - "epoch": 0.4571586511885019, - "grad_norm": 3.746147394180298, - "learning_rate": 2.7913587265491758e-05, - "loss": 5.3686, - "step": 8270 - }, - { - "epoch": 0.4574350469872858, - "grad_norm": 3.0401806831359863, - "learning_rate": 2.7899374644684482e-05, - "loss": 5.0805, - "step": 8275 - }, - { - "epoch": 0.4577114427860697, - "grad_norm": 4.183225154876709, - "learning_rate": 2.7885162023877203e-05, - "loss": 5.5761, - "step": 8280 - }, - { - "epoch": 0.45798783858485353, - "grad_norm": 3.05330491065979, - "learning_rate": 2.787094940306993e-05, - "loss": 5.195, - "step": 8285 - }, - { - "epoch": 0.4582642343836374, - "grad_norm": 3.5704240798950195, - "learning_rate": 2.7856736782262647e-05, - "loss": 5.01, - "step": 8290 - }, - { - "epoch": 0.45854063018242125, - "grad_norm": 3.817938804626465, - "learning_rate": 2.7842524161455375e-05, - "loss": 5.2507, - "step": 8295 - }, - { - "epoch": 0.4588170259812051, - "grad_norm": 2.9435267448425293, - "learning_rate": 2.7828311540648095e-05, - "loss": 5.3353, - "step": 8300 - }, - { - "epoch": 0.45909342177998896, - "grad_norm": 3.9453604221343994, - "learning_rate": 2.781409891984082e-05, - "loss": 5.2214, - "step": 8305 - }, - { - "epoch": 0.4593698175787728, - "grad_norm": 3.7097527980804443, - "learning_rate": 2.7799886299033547e-05, - "loss": 4.9059, - "step": 8310 - }, - { - "epoch": 0.4596462133775567, - "grad_norm": 2.8728573322296143, - "learning_rate": 2.7785673678226264e-05, - "loss": 5.2303, - "step": 8315 - }, - { - "epoch": 0.45992260917634054, - "grad_norm": 2.946532726287842, - "learning_rate": 2.777146105741899e-05, - "loss": 5.1568, - "step": 8320 - }, - { - "epoch": 0.4601990049751244, - "grad_norm": 2.8694427013397217, - "learning_rate": 2.7757248436611712e-05, - "loss": 5.2556, - "step": 8325 - }, - { - "epoch": 0.46047540077390825, - "grad_norm": 2.947298049926758, - "learning_rate": 2.7743035815804436e-05, - "loss": 5.269, - "step": 8330 - }, - { - "epoch": 0.4607517965726921, - "grad_norm": 2.9417827129364014, - "learning_rate": 2.772882319499716e-05, - "loss": 5.0295, - "step": 8335 - }, - { - "epoch": 0.46102819237147596, - "grad_norm": 3.7710001468658447, - "learning_rate": 2.771461057418988e-05, - "loss": 5.403, - "step": 8340 - }, - { - "epoch": 0.4613045881702598, - "grad_norm": 3.4150478839874268, - "learning_rate": 2.7700397953382608e-05, - "loss": 5.2718, - "step": 8345 - }, - { - "epoch": 0.4615809839690437, - "grad_norm": 3.6512367725372314, - "learning_rate": 2.7686185332575325e-05, - "loss": 5.1832, - "step": 8350 - }, - { - "epoch": 0.46185737976782754, - "grad_norm": 3.080050468444824, - "learning_rate": 2.7671972711768052e-05, - "loss": 5.4398, - "step": 8355 - }, - { - "epoch": 0.4621337755666114, - "grad_norm": 3.351170778274536, - "learning_rate": 2.7657760090960776e-05, - "loss": 5.3517, - "step": 8360 - }, - { - "epoch": 0.46241017136539525, - "grad_norm": 3.467395782470703, - "learning_rate": 2.7643547470153497e-05, - "loss": 5.3345, - "step": 8365 - }, - { - "epoch": 0.4626865671641791, - "grad_norm": 3.781153440475464, - "learning_rate": 2.762933484934622e-05, - "loss": 5.2006, - "step": 8370 - }, - { - "epoch": 0.46296296296296297, - "grad_norm": 3.0200061798095703, - "learning_rate": 2.761512222853894e-05, - "loss": 4.8137, - "step": 8375 - }, - { - "epoch": 0.4632393587617468, - "grad_norm": 3.3280367851257324, - "learning_rate": 2.760090960773167e-05, - "loss": 5.0456, - "step": 8380 - }, - { - "epoch": 0.4635157545605307, - "grad_norm": 3.1769330501556396, - "learning_rate": 2.7586696986924386e-05, - "loss": 5.3049, - "step": 8385 - }, - { - "epoch": 0.46379215035931454, - "grad_norm": 4.280707836151123, - "learning_rate": 2.7572484366117113e-05, - "loss": 5.2953, - "step": 8390 - }, - { - "epoch": 0.4640685461580984, - "grad_norm": 2.720076322555542, - "learning_rate": 2.7558271745309837e-05, - "loss": 5.269, - "step": 8395 - }, - { - "epoch": 0.46434494195688225, - "grad_norm": 3.4472744464874268, - "learning_rate": 2.7544059124502558e-05, - "loss": 5.1512, - "step": 8400 - }, - { - "epoch": 0.4646213377556661, - "grad_norm": 3.2112371921539307, - "learning_rate": 2.7529846503695282e-05, - "loss": 5.3101, - "step": 8405 - }, - { - "epoch": 0.46489773355444997, - "grad_norm": 3.637249708175659, - "learning_rate": 2.7515633882888003e-05, - "loss": 5.1246, - "step": 8410 - }, - { - "epoch": 0.4651741293532338, - "grad_norm": 2.8876161575317383, - "learning_rate": 2.750142126208073e-05, - "loss": 5.4245, - "step": 8415 - }, - { - "epoch": 0.4654505251520177, - "grad_norm": 3.1725854873657227, - "learning_rate": 2.7487208641273454e-05, - "loss": 5.0848, - "step": 8420 - }, - { - "epoch": 0.46572692095080154, - "grad_norm": 2.7039942741394043, - "learning_rate": 2.7472996020466175e-05, - "loss": 5.2807, - "step": 8425 - }, - { - "epoch": 0.4660033167495854, - "grad_norm": 2.8240089416503906, - "learning_rate": 2.74587833996589e-05, - "loss": 5.4056, - "step": 8430 - }, - { - "epoch": 0.46627971254836925, - "grad_norm": 3.1696321964263916, - "learning_rate": 2.744457077885162e-05, - "loss": 5.4693, - "step": 8435 - }, - { - "epoch": 0.4665561083471531, - "grad_norm": 2.855938196182251, - "learning_rate": 2.7430358158044343e-05, - "loss": 5.1644, - "step": 8440 - }, - { - "epoch": 0.46683250414593697, - "grad_norm": 3.3975577354431152, - "learning_rate": 2.741614553723707e-05, - "loss": 5.1207, - "step": 8445 - }, - { - "epoch": 0.4671088999447208, - "grad_norm": 3.4103002548217773, - "learning_rate": 2.740193291642979e-05, - "loss": 5.2251, - "step": 8450 - }, - { - "epoch": 0.4673852957435047, - "grad_norm": 3.4105570316314697, - "learning_rate": 2.7387720295622515e-05, - "loss": 5.1505, - "step": 8455 - }, - { - "epoch": 0.46766169154228854, - "grad_norm": 3.4730947017669678, - "learning_rate": 2.7373507674815236e-05, - "loss": 5.056, - "step": 8460 - }, - { - "epoch": 0.4679380873410724, - "grad_norm": 3.272139549255371, - "learning_rate": 2.735929505400796e-05, - "loss": 5.3615, - "step": 8465 - }, - { - "epoch": 0.46821448313985625, - "grad_norm": 2.538783073425293, - "learning_rate": 2.7345082433200687e-05, - "loss": 5.3061, - "step": 8470 - }, - { - "epoch": 0.4684908789386401, - "grad_norm": 2.9536359310150146, - "learning_rate": 2.7330869812393408e-05, - "loss": 5.2621, - "step": 8475 - }, - { - "epoch": 0.46876727473742397, - "grad_norm": 4.758022308349609, - "learning_rate": 2.731665719158613e-05, - "loss": 5.0966, - "step": 8480 - }, - { - "epoch": 0.4690436705362078, - "grad_norm": 3.3707218170166016, - "learning_rate": 2.7302444570778852e-05, - "loss": 5.4979, - "step": 8485 - }, - { - "epoch": 0.4693200663349917, - "grad_norm": 2.765289783477783, - "learning_rate": 2.7288231949971576e-05, - "loss": 5.4462, - "step": 8490 - }, - { - "epoch": 0.4695964621337756, - "grad_norm": 2.88114070892334, - "learning_rate": 2.7274019329164297e-05, - "loss": 5.7333, - "step": 8495 - }, - { - "epoch": 0.46987285793255945, - "grad_norm": 3.3201422691345215, - "learning_rate": 2.725980670835702e-05, - "loss": 5.2429, - "step": 8500 - }, - { - "epoch": 0.4701492537313433, - "grad_norm": 3.3583285808563232, - "learning_rate": 2.7245594087549748e-05, - "loss": 5.0037, - "step": 8505 - }, - { - "epoch": 0.47042564953012717, - "grad_norm": 3.485041618347168, - "learning_rate": 2.723138146674247e-05, - "loss": 5.1081, - "step": 8510 - }, - { - "epoch": 0.470702045328911, - "grad_norm": 3.287743330001831, - "learning_rate": 2.7217168845935193e-05, - "loss": 5.1691, - "step": 8515 - }, - { - "epoch": 0.4709784411276949, - "grad_norm": 3.2845332622528076, - "learning_rate": 2.7202956225127913e-05, - "loss": 5.0228, - "step": 8520 - }, - { - "epoch": 0.47125483692647874, - "grad_norm": 3.746253728866577, - "learning_rate": 2.7188743604320637e-05, - "loss": 4.901, - "step": 8525 - }, - { - "epoch": 0.4715312327252626, - "grad_norm": 3.2223970890045166, - "learning_rate": 2.7174530983513365e-05, - "loss": 4.915, - "step": 8530 - }, - { - "epoch": 0.47180762852404645, - "grad_norm": 3.4350712299346924, - "learning_rate": 2.7160318362706082e-05, - "loss": 5.2032, - "step": 8535 - }, - { - "epoch": 0.4720840243228303, - "grad_norm": 3.5283148288726807, - "learning_rate": 2.714610574189881e-05, - "loss": 5.0011, - "step": 8540 - }, - { - "epoch": 0.47236042012161417, - "grad_norm": 3.2964229583740234, - "learning_rate": 2.713189312109153e-05, - "loss": 5.1857, - "step": 8545 - }, - { - "epoch": 0.472636815920398, - "grad_norm": 2.962416172027588, - "learning_rate": 2.7117680500284254e-05, - "loss": 4.9636, - "step": 8550 - }, - { - "epoch": 0.4729132117191819, - "grad_norm": 3.4900479316711426, - "learning_rate": 2.7103467879476978e-05, - "loss": 4.8722, - "step": 8555 - }, - { - "epoch": 0.47318960751796574, - "grad_norm": 3.9601972103118896, - "learning_rate": 2.7089255258669698e-05, - "loss": 5.3924, - "step": 8560 - }, - { - "epoch": 0.4734660033167496, - "grad_norm": 3.298952102661133, - "learning_rate": 2.7075042637862426e-05, - "loss": 4.971, - "step": 8565 - }, - { - "epoch": 0.47374239911553345, - "grad_norm": 3.38555645942688, - "learning_rate": 2.7060830017055143e-05, - "loss": 5.0353, - "step": 8570 - }, - { - "epoch": 0.4740187949143173, - "grad_norm": 2.793022632598877, - "learning_rate": 2.704661739624787e-05, - "loss": 5.0542, - "step": 8575 - }, - { - "epoch": 0.47429519071310117, - "grad_norm": 2.7033803462982178, - "learning_rate": 2.7032404775440594e-05, - "loss": 5.1854, - "step": 8580 - }, - { - "epoch": 0.474571586511885, - "grad_norm": 3.3331148624420166, - "learning_rate": 2.7018192154633315e-05, - "loss": 5.3229, - "step": 8585 - }, - { - "epoch": 0.4748479823106689, - "grad_norm": 3.3170993328094482, - "learning_rate": 2.700397953382604e-05, - "loss": 5.2236, - "step": 8590 - }, - { - "epoch": 0.47512437810945274, - "grad_norm": 3.055724859237671, - "learning_rate": 2.698976691301876e-05, - "loss": 5.447, - "step": 8595 - }, - { - "epoch": 0.4754007739082366, - "grad_norm": 3.705082654953003, - "learning_rate": 2.6975554292211487e-05, - "loss": 5.2754, - "step": 8600 - }, - { - "epoch": 0.47567716970702045, - "grad_norm": 3.6504838466644287, - "learning_rate": 2.6961341671404204e-05, - "loss": 5.1527, - "step": 8605 - }, - { - "epoch": 0.4759535655058043, - "grad_norm": 3.557499647140503, - "learning_rate": 2.694712905059693e-05, - "loss": 5.1805, - "step": 8610 - }, - { - "epoch": 0.47622996130458817, - "grad_norm": 3.1619584560394287, - "learning_rate": 2.6932916429789655e-05, - "loss": 5.0161, - "step": 8615 - }, - { - "epoch": 0.476506357103372, - "grad_norm": 3.1936593055725098, - "learning_rate": 2.6918703808982376e-05, - "loss": 5.0251, - "step": 8620 - }, - { - "epoch": 0.4767827529021559, - "grad_norm": 3.5965256690979004, - "learning_rate": 2.6904491188175103e-05, - "loss": 4.8984, - "step": 8625 - }, - { - "epoch": 0.47705914870093974, - "grad_norm": 3.2955245971679688, - "learning_rate": 2.689027856736782e-05, - "loss": 5.1065, - "step": 8630 - }, - { - "epoch": 0.4773355444997236, - "grad_norm": 2.86582350730896, - "learning_rate": 2.6876065946560548e-05, - "loss": 5.3203, - "step": 8635 - }, - { - "epoch": 0.47761194029850745, - "grad_norm": 2.8077163696289062, - "learning_rate": 2.6861853325753272e-05, - "loss": 5.1535, - "step": 8640 - }, - { - "epoch": 0.4778883360972913, - "grad_norm": 3.2445852756500244, - "learning_rate": 2.6847640704945992e-05, - "loss": 5.3598, - "step": 8645 - }, - { - "epoch": 0.47816473189607517, - "grad_norm": 2.902517557144165, - "learning_rate": 2.6833428084138716e-05, - "loss": 5.1782, - "step": 8650 - }, - { - "epoch": 0.478441127694859, - "grad_norm": 3.0056138038635254, - "learning_rate": 2.6819215463331437e-05, - "loss": 5.2646, - "step": 8655 - }, - { - "epoch": 0.4787175234936429, - "grad_norm": 3.432762861251831, - "learning_rate": 2.6805002842524164e-05, - "loss": 5.3692, - "step": 8660 - }, - { - "epoch": 0.47899391929242674, - "grad_norm": 2.9771177768707275, - "learning_rate": 2.6790790221716888e-05, - "loss": 5.2922, - "step": 8665 - }, - { - "epoch": 0.4792703150912106, - "grad_norm": 2.8784945011138916, - "learning_rate": 2.677657760090961e-05, - "loss": 4.8865, - "step": 8670 - }, - { - "epoch": 0.47954671088999445, - "grad_norm": 3.0475375652313232, - "learning_rate": 2.6762364980102333e-05, - "loss": 5.0122, - "step": 8675 - }, - { - "epoch": 0.4798231066887783, - "grad_norm": 3.4154276847839355, - "learning_rate": 2.6748152359295053e-05, - "loss": 5.1384, - "step": 8680 - }, - { - "epoch": 0.48009950248756217, - "grad_norm": 2.841428279876709, - "learning_rate": 2.6733939738487777e-05, - "loss": 5.3496, - "step": 8685 - }, - { - "epoch": 0.480375898286346, - "grad_norm": 3.4203035831451416, - "learning_rate": 2.6719727117680505e-05, - "loss": 5.2612, - "step": 8690 - }, - { - "epoch": 0.4806522940851299, - "grad_norm": 2.9312570095062256, - "learning_rate": 2.6705514496873225e-05, - "loss": 5.065, - "step": 8695 - }, - { - "epoch": 0.48092868988391374, - "grad_norm": 4.5229668617248535, - "learning_rate": 2.669130187606595e-05, - "loss": 5.2577, - "step": 8700 - }, - { - "epoch": 0.4812050856826976, - "grad_norm": 4.566796779632568, - "learning_rate": 2.667708925525867e-05, - "loss": 5.4462, - "step": 8705 - }, - { - "epoch": 0.48148148148148145, - "grad_norm": 3.907799005508423, - "learning_rate": 2.6662876634451394e-05, - "loss": 5.3402, - "step": 8710 - }, - { - "epoch": 0.48175787728026537, - "grad_norm": 4.1701836585998535, - "learning_rate": 2.6648664013644114e-05, - "loss": 5.3027, - "step": 8715 - }, - { - "epoch": 0.4820342730790492, - "grad_norm": 2.956747055053711, - "learning_rate": 2.663445139283684e-05, - "loss": 5.2394, - "step": 8720 - }, - { - "epoch": 0.4823106688778331, - "grad_norm": 3.6254959106445312, - "learning_rate": 2.6620238772029566e-05, - "loss": 5.1001, - "step": 8725 - }, - { - "epoch": 0.48258706467661694, - "grad_norm": 3.541015386581421, - "learning_rate": 2.6606026151222286e-05, - "loss": 5.2681, - "step": 8730 - }, - { - "epoch": 0.4828634604754008, - "grad_norm": 2.8555665016174316, - "learning_rate": 2.659181353041501e-05, - "loss": 5.1359, - "step": 8735 - }, - { - "epoch": 0.48313985627418465, - "grad_norm": 3.2647249698638916, - "learning_rate": 2.657760090960773e-05, - "loss": 5.1848, - "step": 8740 - }, - { - "epoch": 0.4834162520729685, - "grad_norm": 3.473285675048828, - "learning_rate": 2.6563388288800455e-05, - "loss": 5.1448, - "step": 8745 - }, - { - "epoch": 0.48369264787175237, - "grad_norm": 4.414984226226807, - "learning_rate": 2.6549175667993182e-05, - "loss": 4.8777, - "step": 8750 - }, - { - "epoch": 0.4839690436705362, - "grad_norm": 3.5118448734283447, - "learning_rate": 2.65349630471859e-05, - "loss": 4.973, - "step": 8755 - }, - { - "epoch": 0.4842454394693201, - "grad_norm": 3.8873729705810547, - "learning_rate": 2.6520750426378627e-05, - "loss": 5.1884, - "step": 8760 - }, - { - "epoch": 0.48452183526810394, - "grad_norm": 2.8551294803619385, - "learning_rate": 2.6506537805571347e-05, - "loss": 5.2593, - "step": 8765 - }, - { - "epoch": 0.4847982310668878, - "grad_norm": 3.043487310409546, - "learning_rate": 2.649232518476407e-05, - "loss": 5.3543, - "step": 8770 - }, - { - "epoch": 0.48507462686567165, - "grad_norm": 2.915309429168701, - "learning_rate": 2.64781125639568e-05, - "loss": 5.1256, - "step": 8775 - }, - { - "epoch": 0.4853510226644555, - "grad_norm": 3.0859193801879883, - "learning_rate": 2.6463899943149516e-05, - "loss": 5.1491, - "step": 8780 - }, - { - "epoch": 0.48562741846323937, - "grad_norm": 3.597959518432617, - "learning_rate": 2.6449687322342243e-05, - "loss": 5.2776, - "step": 8785 - }, - { - "epoch": 0.4859038142620232, - "grad_norm": 3.311375617980957, - "learning_rate": 2.6435474701534964e-05, - "loss": 5.0004, - "step": 8790 - }, - { - "epoch": 0.4861802100608071, - "grad_norm": 2.9994888305664062, - "learning_rate": 2.6421262080727688e-05, - "loss": 5.2205, - "step": 8795 - }, - { - "epoch": 0.48645660585959094, - "grad_norm": 3.1175289154052734, - "learning_rate": 2.6407049459920412e-05, - "loss": 4.9917, - "step": 8800 - }, - { - "epoch": 0.4867330016583748, - "grad_norm": 3.356494426727295, - "learning_rate": 2.6392836839113133e-05, - "loss": 5.119, - "step": 8805 - }, - { - "epoch": 0.48700939745715865, - "grad_norm": 2.93296480178833, - "learning_rate": 2.637862421830586e-05, - "loss": 5.1587, - "step": 8810 - }, - { - "epoch": 0.4872857932559425, - "grad_norm": 3.3215854167938232, - "learning_rate": 2.6364411597498577e-05, - "loss": 5.3971, - "step": 8815 - }, - { - "epoch": 0.48756218905472637, - "grad_norm": 2.9889779090881348, - "learning_rate": 2.6350198976691304e-05, - "loss": 5.023, - "step": 8820 - }, - { - "epoch": 0.4878385848535102, - "grad_norm": 4.003651142120361, - "learning_rate": 2.6335986355884025e-05, - "loss": 5.3191, - "step": 8825 - }, - { - "epoch": 0.4881149806522941, - "grad_norm": 4.0643744468688965, - "learning_rate": 2.632177373507675e-05, - "loss": 5.4204, - "step": 8830 - }, - { - "epoch": 0.48839137645107794, - "grad_norm": 3.0099825859069824, - "learning_rate": 2.6307561114269473e-05, - "loss": 5.0147, - "step": 8835 - }, - { - "epoch": 0.4886677722498618, - "grad_norm": 2.961827516555786, - "learning_rate": 2.6293348493462194e-05, - "loss": 4.8997, - "step": 8840 - }, - { - "epoch": 0.48894416804864566, - "grad_norm": 2.8388969898223877, - "learning_rate": 2.627913587265492e-05, - "loss": 5.0859, - "step": 8845 - }, - { - "epoch": 0.4892205638474295, - "grad_norm": 3.230729341506958, - "learning_rate": 2.6264923251847638e-05, - "loss": 5.2606, - "step": 8850 - }, - { - "epoch": 0.48949695964621337, - "grad_norm": 2.873793363571167, - "learning_rate": 2.6250710631040366e-05, - "loss": 5.5914, - "step": 8855 - }, - { - "epoch": 0.4897733554449972, - "grad_norm": 3.9639649391174316, - "learning_rate": 2.623649801023309e-05, - "loss": 4.8028, - "step": 8860 - }, - { - "epoch": 0.4900497512437811, - "grad_norm": 3.220949411392212, - "learning_rate": 2.622228538942581e-05, - "loss": 5.2824, - "step": 8865 - }, - { - "epoch": 0.49032614704256494, - "grad_norm": 4.145904541015625, - "learning_rate": 2.6208072768618534e-05, - "loss": 5.3472, - "step": 8870 - }, - { - "epoch": 0.4906025428413488, - "grad_norm": 3.1538095474243164, - "learning_rate": 2.6193860147811255e-05, - "loss": 5.2069, - "step": 8875 - }, - { - "epoch": 0.49087893864013266, - "grad_norm": 3.5244972705841064, - "learning_rate": 2.6179647527003982e-05, - "loss": 5.048, - "step": 8880 - }, - { - "epoch": 0.4911553344389165, - "grad_norm": 3.0251150131225586, - "learning_rate": 2.6165434906196706e-05, - "loss": 5.1505, - "step": 8885 - }, - { - "epoch": 0.49143173023770037, - "grad_norm": 2.746638298034668, - "learning_rate": 2.6151222285389427e-05, - "loss": 5.2146, - "step": 8890 - }, - { - "epoch": 0.4917081260364842, - "grad_norm": 2.9804468154907227, - "learning_rate": 2.613700966458215e-05, - "loss": 5.1231, - "step": 8895 - }, - { - "epoch": 0.4919845218352681, - "grad_norm": 3.2078375816345215, - "learning_rate": 2.612279704377487e-05, - "loss": 4.9723, - "step": 8900 - }, - { - "epoch": 0.49226091763405194, - "grad_norm": 3.1018552780151367, - "learning_rate": 2.6108584422967595e-05, - "loss": 5.1348, - "step": 8905 - }, - { - "epoch": 0.4925373134328358, - "grad_norm": 2.991896390914917, - "learning_rate": 2.6094371802160323e-05, - "loss": 5.1945, - "step": 8910 - }, - { - "epoch": 0.49281370923161966, - "grad_norm": 3.790259599685669, - "learning_rate": 2.6080159181353043e-05, - "loss": 5.2229, - "step": 8915 - }, - { - "epoch": 0.4930901050304035, - "grad_norm": 2.9007620811462402, - "learning_rate": 2.6065946560545767e-05, - "loss": 4.7525, - "step": 8920 - }, - { - "epoch": 0.49336650082918737, - "grad_norm": 3.5712127685546875, - "learning_rate": 2.6051733939738488e-05, - "loss": 5.2762, - "step": 8925 - }, - { - "epoch": 0.49364289662797123, - "grad_norm": 3.451338291168213, - "learning_rate": 2.6037521318931212e-05, - "loss": 4.9248, - "step": 8930 - }, - { - "epoch": 0.49391929242675514, - "grad_norm": 3.268249988555908, - "learning_rate": 2.6023308698123932e-05, - "loss": 5.2842, - "step": 8935 - }, - { - "epoch": 0.494195688225539, - "grad_norm": 2.8075647354125977, - "learning_rate": 2.600909607731666e-05, - "loss": 5.4364, - "step": 8940 - }, - { - "epoch": 0.49447208402432286, - "grad_norm": 3.8568859100341797, - "learning_rate": 2.5994883456509384e-05, - "loss": 5.1617, - "step": 8945 - }, - { - "epoch": 0.4947484798231067, - "grad_norm": 4.422865867614746, - "learning_rate": 2.5980670835702104e-05, - "loss": 5.2414, - "step": 8950 - }, - { - "epoch": 0.49502487562189057, - "grad_norm": 3.7072761058807373, - "learning_rate": 2.5966458214894828e-05, - "loss": 4.9748, - "step": 8955 - }, - { - "epoch": 0.4953012714206744, - "grad_norm": 3.6415674686431885, - "learning_rate": 2.595224559408755e-05, - "loss": 5.3649, - "step": 8960 - }, - { - "epoch": 0.4955776672194583, - "grad_norm": 3.360414505004883, - "learning_rate": 2.5938032973280273e-05, - "loss": 4.9524, - "step": 8965 - }, - { - "epoch": 0.49585406301824214, - "grad_norm": 3.060049057006836, - "learning_rate": 2.5923820352473e-05, - "loss": 5.1118, - "step": 8970 - }, - { - "epoch": 0.496130458817026, - "grad_norm": 4.066228866577148, - "learning_rate": 2.590960773166572e-05, - "loss": 5.4221, - "step": 8975 - }, - { - "epoch": 0.49640685461580986, - "grad_norm": 3.416016101837158, - "learning_rate": 2.5895395110858445e-05, - "loss": 5.1152, - "step": 8980 - }, - { - "epoch": 0.4966832504145937, - "grad_norm": 3.874455451965332, - "learning_rate": 2.5881182490051165e-05, - "loss": 5.1813, - "step": 8985 - }, - { - "epoch": 0.49695964621337757, - "grad_norm": 3.0348269939422607, - "learning_rate": 2.586696986924389e-05, - "loss": 5.1617, - "step": 8990 - }, - { - "epoch": 0.4972360420121614, - "grad_norm": 3.266130208969116, - "learning_rate": 2.5852757248436617e-05, - "loss": 5.2492, - "step": 8995 - }, - { - "epoch": 0.4975124378109453, - "grad_norm": 3.8973846435546875, - "learning_rate": 2.5838544627629334e-05, - "loss": 5.1134, - "step": 9000 - }, - { - "epoch": 0.49778883360972914, - "grad_norm": 3.322026491165161, - "learning_rate": 2.582433200682206e-05, - "loss": 5.2699, - "step": 9005 - }, - { - "epoch": 0.498065229408513, - "grad_norm": 3.2580060958862305, - "learning_rate": 2.5810119386014782e-05, - "loss": 5.0649, - "step": 9010 - }, - { - "epoch": 0.49834162520729686, - "grad_norm": 3.0472872257232666, - "learning_rate": 2.5795906765207506e-05, - "loss": 5.1669, - "step": 9015 - }, - { - "epoch": 0.4986180210060807, - "grad_norm": 3.3452460765838623, - "learning_rate": 2.578169414440023e-05, - "loss": 5.4016, - "step": 9020 - }, - { - "epoch": 0.49889441680486457, - "grad_norm": 3.5364160537719727, - "learning_rate": 2.576748152359295e-05, - "loss": 5.2059, - "step": 9025 - }, - { - "epoch": 0.49917081260364843, - "grad_norm": 3.439129114151001, - "learning_rate": 2.5753268902785678e-05, - "loss": 5.4018, - "step": 9030 - }, - { - "epoch": 0.4994472084024323, - "grad_norm": 4.166476249694824, - "learning_rate": 2.5739056281978395e-05, - "loss": 4.9097, - "step": 9035 - }, - { - "epoch": 0.49972360420121614, - "grad_norm": 2.5692834854125977, - "learning_rate": 2.5724843661171122e-05, - "loss": 5.2335, - "step": 9040 - }, - { - "epoch": 0.5, - "grad_norm": 3.3796117305755615, - "learning_rate": 2.5710631040363843e-05, - "loss": 5.1816, - "step": 9045 - }, - { - "epoch": 0.5002763957987839, - "grad_norm": 3.1776998043060303, - "learning_rate": 2.5696418419556567e-05, - "loss": 5.1573, - "step": 9050 - }, - { - "epoch": 0.5005527915975677, - "grad_norm": 3.153939723968506, - "learning_rate": 2.5682205798749294e-05, - "loss": 5.2645, - "step": 9055 - }, - { - "epoch": 0.5008291873963516, - "grad_norm": 2.713528633117676, - "learning_rate": 2.566799317794201e-05, - "loss": 4.9227, - "step": 9060 - }, - { - "epoch": 0.5011055831951354, - "grad_norm": 3.3882882595062256, - "learning_rate": 2.565378055713474e-05, - "loss": 5.1723, - "step": 9065 - }, - { - "epoch": 0.5013819789939193, - "grad_norm": 3.6385247707366943, - "learning_rate": 2.5639567936327456e-05, - "loss": 5.0634, - "step": 9070 - }, - { - "epoch": 0.5016583747927031, - "grad_norm": 3.6225831508636475, - "learning_rate": 2.5625355315520183e-05, - "loss": 4.8257, - "step": 9075 - }, - { - "epoch": 0.5019347705914871, - "grad_norm": 3.2693662643432617, - "learning_rate": 2.5611142694712907e-05, - "loss": 5.0228, - "step": 9080 - }, - { - "epoch": 0.5022111663902709, - "grad_norm": 3.6240193843841553, - "learning_rate": 2.5596930073905628e-05, - "loss": 5.0373, - "step": 9085 - }, - { - "epoch": 0.5024875621890548, - "grad_norm": 3.114393472671509, - "learning_rate": 2.5582717453098355e-05, - "loss": 5.4028, - "step": 9090 - }, - { - "epoch": 0.5027639579878386, - "grad_norm": 3.7577624320983887, - "learning_rate": 2.5568504832291073e-05, - "loss": 5.0717, - "step": 9095 - }, - { - "epoch": 0.5030403537866225, - "grad_norm": 3.0983474254608154, - "learning_rate": 2.55542922114838e-05, - "loss": 5.5069, - "step": 9100 - }, - { - "epoch": 0.5033167495854063, - "grad_norm": 3.490330696105957, - "learning_rate": 2.5540079590676524e-05, - "loss": 5.1826, - "step": 9105 - }, - { - "epoch": 0.5035931453841902, - "grad_norm": 4.066411018371582, - "learning_rate": 2.5525866969869244e-05, - "loss": 5.2115, - "step": 9110 - }, - { - "epoch": 0.503869541182974, - "grad_norm": 3.8119757175445557, - "learning_rate": 2.551165434906197e-05, - "loss": 5.2514, - "step": 9115 - }, - { - "epoch": 0.5041459369817579, - "grad_norm": 3.7238693237304688, - "learning_rate": 2.549744172825469e-05, - "loss": 4.7289, - "step": 9120 - }, - { - "epoch": 0.5044223327805417, - "grad_norm": 3.154292583465576, - "learning_rate": 2.5483229107447416e-05, - "loss": 5.1676, - "step": 9125 - }, - { - "epoch": 0.5046987285793256, - "grad_norm": 3.4549925327301025, - "learning_rate": 2.546901648664014e-05, - "loss": 5.1312, - "step": 9130 - }, - { - "epoch": 0.5049751243781094, - "grad_norm": 3.0969717502593994, - "learning_rate": 2.545480386583286e-05, - "loss": 5.2531, - "step": 9135 - }, - { - "epoch": 0.5052515201768933, - "grad_norm": 6.122801780700684, - "learning_rate": 2.5440591245025585e-05, - "loss": 5.0781, - "step": 9140 - }, - { - "epoch": 0.5055279159756771, - "grad_norm": 3.613739013671875, - "learning_rate": 2.5426378624218306e-05, - "loss": 5.2013, - "step": 9145 - }, - { - "epoch": 0.5058043117744611, - "grad_norm": 3.0882883071899414, - "learning_rate": 2.541216600341103e-05, - "loss": 5.1607, - "step": 9150 - }, - { - "epoch": 0.5060807075732449, - "grad_norm": 3.7881696224212646, - "learning_rate": 2.539795338260375e-05, - "loss": 5.0951, - "step": 9155 - }, - { - "epoch": 0.5063571033720288, - "grad_norm": 3.161926507949829, - "learning_rate": 2.5383740761796477e-05, - "loss": 5.1998, - "step": 9160 - }, - { - "epoch": 0.5066334991708126, - "grad_norm": 3.223527193069458, - "learning_rate": 2.53695281409892e-05, - "loss": 5.2554, - "step": 9165 - }, - { - "epoch": 0.5069098949695965, - "grad_norm": 3.8171050548553467, - "learning_rate": 2.5355315520181922e-05, - "loss": 5.2001, - "step": 9170 - }, - { - "epoch": 0.5071862907683803, - "grad_norm": 2.8175442218780518, - "learning_rate": 2.5341102899374646e-05, - "loss": 5.1805, - "step": 9175 - }, - { - "epoch": 0.5074626865671642, - "grad_norm": 3.0957937240600586, - "learning_rate": 2.5326890278567367e-05, - "loss": 5.4076, - "step": 9180 - }, - { - "epoch": 0.507739082365948, - "grad_norm": 2.975939989089966, - "learning_rate": 2.531267765776009e-05, - "loss": 5.2884, - "step": 9185 - }, - { - "epoch": 0.5080154781647319, - "grad_norm": 4.01318359375, - "learning_rate": 2.5298465036952818e-05, - "loss": 5.4881, - "step": 9190 - }, - { - "epoch": 0.5082918739635157, - "grad_norm": 2.6178340911865234, - "learning_rate": 2.528425241614554e-05, - "loss": 5.0587, - "step": 9195 - }, - { - "epoch": 0.5085682697622996, - "grad_norm": 2.9565234184265137, - "learning_rate": 2.5270039795338263e-05, - "loss": 5.16, - "step": 9200 - }, - { - "epoch": 0.5088446655610834, - "grad_norm": 3.489684820175171, - "learning_rate": 2.5255827174530983e-05, - "loss": 5.0378, - "step": 9205 - }, - { - "epoch": 0.5091210613598673, - "grad_norm": 2.891176700592041, - "learning_rate": 2.5241614553723707e-05, - "loss": 5.0495, - "step": 9210 - }, - { - "epoch": 0.5093974571586511, - "grad_norm": 3.31199312210083, - "learning_rate": 2.5227401932916434e-05, - "loss": 5.2217, - "step": 9215 - }, - { - "epoch": 0.5096738529574351, - "grad_norm": 2.859729290008545, - "learning_rate": 2.521318931210915e-05, - "loss": 5.2588, - "step": 9220 - }, - { - "epoch": 0.5099502487562189, - "grad_norm": 3.0680506229400635, - "learning_rate": 2.519897669130188e-05, - "loss": 4.9947, - "step": 9225 - }, - { - "epoch": 0.5102266445550028, - "grad_norm": 3.4097602367401123, - "learning_rate": 2.51847640704946e-05, - "loss": 5.156, - "step": 9230 - }, - { - "epoch": 0.5105030403537866, - "grad_norm": 3.063750743865967, - "learning_rate": 2.5170551449687324e-05, - "loss": 5.1035, - "step": 9235 - }, - { - "epoch": 0.5107794361525705, - "grad_norm": 3.8064587116241455, - "learning_rate": 2.515633882888005e-05, - "loss": 5.3786, - "step": 9240 - }, - { - "epoch": 0.5110558319513543, - "grad_norm": 2.8018996715545654, - "learning_rate": 2.5142126208072768e-05, - "loss": 5.0211, - "step": 9245 - }, - { - "epoch": 0.5113322277501382, - "grad_norm": 2.8001601696014404, - "learning_rate": 2.5127913587265496e-05, - "loss": 5.3029, - "step": 9250 - }, - { - "epoch": 0.511608623548922, - "grad_norm": 3.171142339706421, - "learning_rate": 2.5113700966458216e-05, - "loss": 4.9662, - "step": 9255 - }, - { - "epoch": 0.5118850193477059, - "grad_norm": 2.9701507091522217, - "learning_rate": 2.509948834565094e-05, - "loss": 4.8072, - "step": 9260 - }, - { - "epoch": 0.5121614151464897, - "grad_norm": 3.0788140296936035, - "learning_rate": 2.508527572484366e-05, - "loss": 4.9462, - "step": 9265 - }, - { - "epoch": 0.5124378109452736, - "grad_norm": 3.2449519634246826, - "learning_rate": 2.5071063104036385e-05, - "loss": 5.1457, - "step": 9270 - }, - { - "epoch": 0.5127142067440575, - "grad_norm": 3.2045812606811523, - "learning_rate": 2.5056850483229112e-05, - "loss": 5.3124, - "step": 9275 - }, - { - "epoch": 0.5129906025428413, - "grad_norm": 3.2076756954193115, - "learning_rate": 2.504263786242183e-05, - "loss": 5.187, - "step": 9280 - }, - { - "epoch": 0.5132669983416253, - "grad_norm": 3.471508264541626, - "learning_rate": 2.5028425241614557e-05, - "loss": 5.3885, - "step": 9285 - }, - { - "epoch": 0.5135433941404091, - "grad_norm": 3.8236048221588135, - "learning_rate": 2.5014212620807277e-05, - "loss": 5.1868, - "step": 9290 - }, - { - "epoch": 0.513819789939193, - "grad_norm": 3.278804302215576, - "learning_rate": 2.5e-05, - "loss": 5.2836, - "step": 9295 - }, - { - "epoch": 0.5140961857379768, - "grad_norm": 3.5111825466156006, - "learning_rate": 2.4985787379192725e-05, - "loss": 5.3669, - "step": 9300 - }, - { - "epoch": 0.5143725815367607, - "grad_norm": 3.7894766330718994, - "learning_rate": 2.4971574758385446e-05, - "loss": 4.9516, - "step": 9305 - }, - { - "epoch": 0.5146489773355445, - "grad_norm": 2.5481996536254883, - "learning_rate": 2.4957362137578173e-05, - "loss": 5.1002, - "step": 9310 - }, - { - "epoch": 0.5149253731343284, - "grad_norm": 3.0527219772338867, - "learning_rate": 2.4943149516770894e-05, - "loss": 5.3666, - "step": 9315 - }, - { - "epoch": 0.5152017689331122, - "grad_norm": 3.6825625896453857, - "learning_rate": 2.4928936895963618e-05, - "loss": 5.0798, - "step": 9320 - }, - { - "epoch": 0.5154781647318961, - "grad_norm": 3.366027593612671, - "learning_rate": 2.4914724275156338e-05, - "loss": 5.2866, - "step": 9325 - }, - { - "epoch": 0.5157545605306799, - "grad_norm": 3.307328462600708, - "learning_rate": 2.4900511654349062e-05, - "loss": 4.9818, - "step": 9330 - }, - { - "epoch": 0.5160309563294638, - "grad_norm": 3.386768102645874, - "learning_rate": 2.4886299033541786e-05, - "loss": 5.1491, - "step": 9335 - }, - { - "epoch": 0.5163073521282476, - "grad_norm": 3.7182559967041016, - "learning_rate": 2.487208641273451e-05, - "loss": 5.2406, - "step": 9340 - }, - { - "epoch": 0.5165837479270315, - "grad_norm": 4.657354354858398, - "learning_rate": 2.4857873791927234e-05, - "loss": 4.9486, - "step": 9345 - }, - { - "epoch": 0.5168601437258153, - "grad_norm": 3.5175914764404297, - "learning_rate": 2.4843661171119955e-05, - "loss": 5.1466, - "step": 9350 - }, - { - "epoch": 0.5171365395245993, - "grad_norm": 3.5786592960357666, - "learning_rate": 2.482944855031268e-05, - "loss": 5.1679, - "step": 9355 - }, - { - "epoch": 0.5174129353233831, - "grad_norm": 2.8684964179992676, - "learning_rate": 2.48152359295054e-05, - "loss": 4.9781, - "step": 9360 - }, - { - "epoch": 0.517689331122167, - "grad_norm": 3.130319356918335, - "learning_rate": 2.4801023308698127e-05, - "loss": 5.1694, - "step": 9365 - }, - { - "epoch": 0.5179657269209508, - "grad_norm": 3.6741483211517334, - "learning_rate": 2.478681068789085e-05, - "loss": 4.9329, - "step": 9370 - }, - { - "epoch": 0.5182421227197347, - "grad_norm": 3.3555543422698975, - "learning_rate": 2.477259806708357e-05, - "loss": 5.0482, - "step": 9375 - }, - { - "epoch": 0.5185185185185185, - "grad_norm": 3.1766357421875, - "learning_rate": 2.4758385446276295e-05, - "loss": 5.0693, - "step": 9380 - }, - { - "epoch": 0.5187949143173024, - "grad_norm": 3.1514878273010254, - "learning_rate": 2.4744172825469016e-05, - "loss": 4.7144, - "step": 9385 - }, - { - "epoch": 0.5190713101160862, - "grad_norm": 3.2456977367401123, - "learning_rate": 2.472996020466174e-05, - "loss": 4.9498, - "step": 9390 - }, - { - "epoch": 0.5193477059148701, - "grad_norm": 3.266662836074829, - "learning_rate": 2.4715747583854464e-05, - "loss": 5.3611, - "step": 9395 - }, - { - "epoch": 0.5196241017136539, - "grad_norm": 2.9359843730926514, - "learning_rate": 2.4701534963047188e-05, - "loss": 5.0735, - "step": 9400 - }, - { - "epoch": 0.5199004975124378, - "grad_norm": 2.956838369369507, - "learning_rate": 2.4687322342239912e-05, - "loss": 5.2266, - "step": 9405 - }, - { - "epoch": 0.5201768933112216, - "grad_norm": 3.2221481800079346, - "learning_rate": 2.4673109721432632e-05, - "loss": 4.8549, - "step": 9410 - }, - { - "epoch": 0.5204532891100055, - "grad_norm": 4.0131635665893555, - "learning_rate": 2.4658897100625356e-05, - "loss": 5.0946, - "step": 9415 - }, - { - "epoch": 0.5207296849087893, - "grad_norm": 3.1099162101745605, - "learning_rate": 2.464468447981808e-05, - "loss": 5.4147, - "step": 9420 - }, - { - "epoch": 0.5210060807075733, - "grad_norm": 2.945998430252075, - "learning_rate": 2.4630471859010804e-05, - "loss": 5.0275, - "step": 9425 - }, - { - "epoch": 0.5212824765063571, - "grad_norm": 3.6736671924591064, - "learning_rate": 2.4616259238203525e-05, - "loss": 5.0205, - "step": 9430 - }, - { - "epoch": 0.521558872305141, - "grad_norm": 3.3827338218688965, - "learning_rate": 2.460204661739625e-05, - "loss": 5.2464, - "step": 9435 - }, - { - "epoch": 0.5218352681039248, - "grad_norm": 3.2636919021606445, - "learning_rate": 2.4587833996588973e-05, - "loss": 4.7875, - "step": 9440 - }, - { - "epoch": 0.5221116639027087, - "grad_norm": 2.948478937149048, - "learning_rate": 2.4573621375781693e-05, - "loss": 5.0072, - "step": 9445 - }, - { - "epoch": 0.5223880597014925, - "grad_norm": 3.093000888824463, - "learning_rate": 2.455940875497442e-05, - "loss": 4.9569, - "step": 9450 - }, - { - "epoch": 0.5226644555002764, - "grad_norm": 3.29803466796875, - "learning_rate": 2.454519613416714e-05, - "loss": 5.0124, - "step": 9455 - }, - { - "epoch": 0.5229408512990602, - "grad_norm": 2.948936700820923, - "learning_rate": 2.4530983513359865e-05, - "loss": 5.0278, - "step": 9460 - }, - { - "epoch": 0.5232172470978441, - "grad_norm": 3.211668014526367, - "learning_rate": 2.4516770892552586e-05, - "loss": 5.1416, - "step": 9465 - }, - { - "epoch": 0.5234936428966279, - "grad_norm": 4.215364456176758, - "learning_rate": 2.450255827174531e-05, - "loss": 4.8566, - "step": 9470 - }, - { - "epoch": 0.5237700386954118, - "grad_norm": 3.8499324321746826, - "learning_rate": 2.4488345650938034e-05, - "loss": 4.9764, - "step": 9475 - }, - { - "epoch": 0.5240464344941956, - "grad_norm": 3.159186840057373, - "learning_rate": 2.4474133030130758e-05, - "loss": 5.1786, - "step": 9480 - }, - { - "epoch": 0.5243228302929795, - "grad_norm": 3.4853217601776123, - "learning_rate": 2.4459920409323482e-05, - "loss": 5.2136, - "step": 9485 - }, - { - "epoch": 0.5245992260917635, - "grad_norm": 3.228022813796997, - "learning_rate": 2.4445707788516203e-05, - "loss": 5.498, - "step": 9490 - }, - { - "epoch": 0.5248756218905473, - "grad_norm": 2.8631765842437744, - "learning_rate": 2.4431495167708926e-05, - "loss": 5.1078, - "step": 9495 - }, - { - "epoch": 0.5251520176893312, - "grad_norm": 3.351837396621704, - "learning_rate": 2.4417282546901647e-05, - "loss": 5.2389, - "step": 9500 - }, - { - "epoch": 0.525428413488115, - "grad_norm": 4.084488391876221, - "learning_rate": 2.4403069926094374e-05, - "loss": 5.1804, - "step": 9505 - }, - { - "epoch": 0.5257048092868989, - "grad_norm": 4.037084579467773, - "learning_rate": 2.4388857305287095e-05, - "loss": 5.162, - "step": 9510 - }, - { - "epoch": 0.5259812050856827, - "grad_norm": 3.0944149494171143, - "learning_rate": 2.437464468447982e-05, - "loss": 5.0158, - "step": 9515 - }, - { - "epoch": 0.5262576008844666, - "grad_norm": 3.6755268573760986, - "learning_rate": 2.4360432063672543e-05, - "loss": 5.1988, - "step": 9520 - }, - { - "epoch": 0.5265339966832504, - "grad_norm": 3.344731330871582, - "learning_rate": 2.4346219442865264e-05, - "loss": 5.2125, - "step": 9525 - }, - { - "epoch": 0.5268103924820343, - "grad_norm": 4.068390369415283, - "learning_rate": 2.433200682205799e-05, - "loss": 5.4192, - "step": 9530 - }, - { - "epoch": 0.5270867882808181, - "grad_norm": 2.7342958450317383, - "learning_rate": 2.431779420125071e-05, - "loss": 5.1935, - "step": 9535 - }, - { - "epoch": 0.527363184079602, - "grad_norm": 3.347498893737793, - "learning_rate": 2.4303581580443436e-05, - "loss": 4.9082, - "step": 9540 - }, - { - "epoch": 0.5276395798783858, - "grad_norm": 3.082366466522217, - "learning_rate": 2.428936895963616e-05, - "loss": 5.1855, - "step": 9545 - }, - { - "epoch": 0.5279159756771697, - "grad_norm": 3.0448951721191406, - "learning_rate": 2.427515633882888e-05, - "loss": 5.3123, - "step": 9550 - }, - { - "epoch": 0.5281923714759535, - "grad_norm": 3.1284632682800293, - "learning_rate": 2.4260943718021604e-05, - "loss": 5.0266, - "step": 9555 - }, - { - "epoch": 0.5284687672747375, - "grad_norm": 3.3635149002075195, - "learning_rate": 2.4246731097214328e-05, - "loss": 5.1604, - "step": 9560 - }, - { - "epoch": 0.5287451630735213, - "grad_norm": 4.114374160766602, - "learning_rate": 2.4232518476407052e-05, - "loss": 5.027, - "step": 9565 - }, - { - "epoch": 0.5290215588723052, - "grad_norm": 2.970109701156616, - "learning_rate": 2.4218305855599773e-05, - "loss": 5.0342, - "step": 9570 - }, - { - "epoch": 0.529297954671089, - "grad_norm": 3.081437110900879, - "learning_rate": 2.4204093234792497e-05, - "loss": 4.9838, - "step": 9575 - }, - { - "epoch": 0.5295743504698729, - "grad_norm": 3.3371689319610596, - "learning_rate": 2.418988061398522e-05, - "loss": 5.0638, - "step": 9580 - }, - { - "epoch": 0.5298507462686567, - "grad_norm": 3.4338042736053467, - "learning_rate": 2.4175667993177945e-05, - "loss": 5.2537, - "step": 9585 - }, - { - "epoch": 0.5301271420674406, - "grad_norm": 2.9125192165374756, - "learning_rate": 2.416145537237067e-05, - "loss": 5.1985, - "step": 9590 - }, - { - "epoch": 0.5304035378662244, - "grad_norm": 2.703503131866455, - "learning_rate": 2.414724275156339e-05, - "loss": 5.0267, - "step": 9595 - }, - { - "epoch": 0.5306799336650083, - "grad_norm": 2.609586238861084, - "learning_rate": 2.4133030130756113e-05, - "loss": 5.181, - "step": 9600 - }, - { - "epoch": 0.5309563294637921, - "grad_norm": 3.443605661392212, - "learning_rate": 2.4118817509948834e-05, - "loss": 5.0051, - "step": 9605 - }, - { - "epoch": 0.531232725262576, - "grad_norm": 3.419793128967285, - "learning_rate": 2.4104604889141558e-05, - "loss": 5.0689, - "step": 9610 - }, - { - "epoch": 0.5315091210613598, - "grad_norm": 3.239157199859619, - "learning_rate": 2.409039226833428e-05, - "loss": 5.023, - "step": 9615 - }, - { - "epoch": 0.5317855168601437, - "grad_norm": 3.928612470626831, - "learning_rate": 2.4076179647527006e-05, - "loss": 5.0672, - "step": 9620 - }, - { - "epoch": 0.5320619126589275, - "grad_norm": 3.0533933639526367, - "learning_rate": 2.406196702671973e-05, - "loss": 4.8787, - "step": 9625 - }, - { - "epoch": 0.5323383084577115, - "grad_norm": 2.9441888332366943, - "learning_rate": 2.404775440591245e-05, - "loss": 4.7492, - "step": 9630 - }, - { - "epoch": 0.5326147042564953, - "grad_norm": 3.513918399810791, - "learning_rate": 2.4033541785105174e-05, - "loss": 5.1696, - "step": 9635 - }, - { - "epoch": 0.5328911000552792, - "grad_norm": 3.191592216491699, - "learning_rate": 2.4019329164297898e-05, - "loss": 5.0818, - "step": 9640 - }, - { - "epoch": 0.533167495854063, - "grad_norm": 3.9577038288116455, - "learning_rate": 2.4005116543490622e-05, - "loss": 5.0411, - "step": 9645 - }, - { - "epoch": 0.5334438916528469, - "grad_norm": 3.297288656234741, - "learning_rate": 2.3990903922683343e-05, - "loss": 5.009, - "step": 9650 - }, - { - "epoch": 0.5337202874516307, - "grad_norm": 2.9676246643066406, - "learning_rate": 2.3976691301876067e-05, - "loss": 5.2229, - "step": 9655 - }, - { - "epoch": 0.5339966832504146, - "grad_norm": 3.918351650238037, - "learning_rate": 2.396247868106879e-05, - "loss": 5.3025, - "step": 9660 - }, - { - "epoch": 0.5342730790491984, - "grad_norm": 3.9034459590911865, - "learning_rate": 2.394826606026151e-05, - "loss": 4.985, - "step": 9665 - }, - { - "epoch": 0.5345494748479823, - "grad_norm": 3.6231179237365723, - "learning_rate": 2.393405343945424e-05, - "loss": 4.8507, - "step": 9670 - }, - { - "epoch": 0.5348258706467661, - "grad_norm": 2.965517044067383, - "learning_rate": 2.391984081864696e-05, - "loss": 5.2807, - "step": 9675 - }, - { - "epoch": 0.53510226644555, - "grad_norm": 3.329669713973999, - "learning_rate": 2.3905628197839683e-05, - "loss": 5.1108, - "step": 9680 - }, - { - "epoch": 0.5353786622443338, - "grad_norm": 3.134333372116089, - "learning_rate": 2.3891415577032407e-05, - "loss": 5.1821, - "step": 9685 - }, - { - "epoch": 0.5356550580431177, - "grad_norm": 2.873486042022705, - "learning_rate": 2.3877202956225128e-05, - "loss": 5.0137, - "step": 9690 - }, - { - "epoch": 0.5359314538419016, - "grad_norm": 3.7581984996795654, - "learning_rate": 2.3862990335417855e-05, - "loss": 5.1051, - "step": 9695 - }, - { - "epoch": 0.5362078496406855, - "grad_norm": 4.077391147613525, - "learning_rate": 2.3848777714610576e-05, - "loss": 5.2823, - "step": 9700 - }, - { - "epoch": 0.5364842454394693, - "grad_norm": 3.292609930038452, - "learning_rate": 2.38345650938033e-05, - "loss": 5.0104, - "step": 9705 - }, - { - "epoch": 0.5367606412382532, - "grad_norm": 3.4207916259765625, - "learning_rate": 2.382035247299602e-05, - "loss": 5.2349, - "step": 9710 - }, - { - "epoch": 0.5370370370370371, - "grad_norm": 3.6559712886810303, - "learning_rate": 2.3806139852188744e-05, - "loss": 5.3262, - "step": 9715 - }, - { - "epoch": 0.5373134328358209, - "grad_norm": 2.987576484680176, - "learning_rate": 2.3791927231381468e-05, - "loss": 5.1957, - "step": 9720 - }, - { - "epoch": 0.5375898286346048, - "grad_norm": 3.834031105041504, - "learning_rate": 2.3777714610574192e-05, - "loss": 5.2105, - "step": 9725 - }, - { - "epoch": 0.5378662244333886, - "grad_norm": 2.903601884841919, - "learning_rate": 2.3763501989766916e-05, - "loss": 5.0302, - "step": 9730 - }, - { - "epoch": 0.5381426202321725, - "grad_norm": 4.139296531677246, - "learning_rate": 2.3749289368959637e-05, - "loss": 5.1185, - "step": 9735 - }, - { - "epoch": 0.5384190160309563, - "grad_norm": 3.0780575275421143, - "learning_rate": 2.373507674815236e-05, - "loss": 5.0262, - "step": 9740 - }, - { - "epoch": 0.5386954118297402, - "grad_norm": 2.722825288772583, - "learning_rate": 2.372086412734508e-05, - "loss": 4.9768, - "step": 9745 - }, - { - "epoch": 0.538971807628524, - "grad_norm": 2.6656720638275146, - "learning_rate": 2.370665150653781e-05, - "loss": 5.0315, - "step": 9750 - }, - { - "epoch": 0.539248203427308, - "grad_norm": 3.2058663368225098, - "learning_rate": 2.369243888573053e-05, - "loss": 4.9424, - "step": 9755 - }, - { - "epoch": 0.5395245992260918, - "grad_norm": 3.5652551651000977, - "learning_rate": 2.3678226264923253e-05, - "loss": 4.9202, - "step": 9760 - }, - { - "epoch": 0.5398009950248757, - "grad_norm": 4.415959358215332, - "learning_rate": 2.3664013644115977e-05, - "loss": 5.3065, - "step": 9765 - }, - { - "epoch": 0.5400773908236595, - "grad_norm": 3.7161080837249756, - "learning_rate": 2.3649801023308698e-05, - "loss": 5.3898, - "step": 9770 - }, - { - "epoch": 0.5403537866224434, - "grad_norm": 4.07873010635376, - "learning_rate": 2.3635588402501422e-05, - "loss": 5.0935, - "step": 9775 - }, - { - "epoch": 0.5406301824212272, - "grad_norm": 4.138256549835205, - "learning_rate": 2.3621375781694146e-05, - "loss": 4.8865, - "step": 9780 - }, - { - "epoch": 0.5409065782200111, - "grad_norm": 3.804762601852417, - "learning_rate": 2.360716316088687e-05, - "loss": 5.1764, - "step": 9785 - }, - { - "epoch": 0.5411829740187949, - "grad_norm": 3.2528018951416016, - "learning_rate": 2.359295054007959e-05, - "loss": 5.3817, - "step": 9790 - }, - { - "epoch": 0.5414593698175788, - "grad_norm": 3.966082811355591, - "learning_rate": 2.3578737919272314e-05, - "loss": 5.1415, - "step": 9795 - }, - { - "epoch": 0.5417357656163626, - "grad_norm": 3.4930150508880615, - "learning_rate": 2.356452529846504e-05, - "loss": 5.2255, - "step": 9800 - }, - { - "epoch": 0.5420121614151465, - "grad_norm": 3.7764456272125244, - "learning_rate": 2.355031267765776e-05, - "loss": 5.3015, - "step": 9805 - }, - { - "epoch": 0.5422885572139303, - "grad_norm": 3.298708200454712, - "learning_rate": 2.3536100056850486e-05, - "loss": 4.8978, - "step": 9810 - }, - { - "epoch": 0.5425649530127142, - "grad_norm": 3.262017011642456, - "learning_rate": 2.3521887436043207e-05, - "loss": 5.2939, - "step": 9815 - }, - { - "epoch": 0.542841348811498, - "grad_norm": 3.5957589149475098, - "learning_rate": 2.350767481523593e-05, - "loss": 5.3993, - "step": 9820 - }, - { - "epoch": 0.543117744610282, - "grad_norm": 3.324526071548462, - "learning_rate": 2.349346219442865e-05, - "loss": 4.8801, - "step": 9825 - }, - { - "epoch": 0.5433941404090658, - "grad_norm": 2.6901962757110596, - "learning_rate": 2.3479249573621375e-05, - "loss": 4.8502, - "step": 9830 - }, - { - "epoch": 0.5436705362078497, - "grad_norm": 3.8433539867401123, - "learning_rate": 2.3465036952814103e-05, - "loss": 5.1071, - "step": 9835 - }, - { - "epoch": 0.5439469320066335, - "grad_norm": 3.26355242729187, - "learning_rate": 2.3450824332006823e-05, - "loss": 5.0674, - "step": 9840 - }, - { - "epoch": 0.5442233278054174, - "grad_norm": 3.1263980865478516, - "learning_rate": 2.3436611711199547e-05, - "loss": 5.2538, - "step": 9845 - }, - { - "epoch": 0.5444997236042012, - "grad_norm": 3.164491891860962, - "learning_rate": 2.3422399090392268e-05, - "loss": 5.1808, - "step": 9850 - }, - { - "epoch": 0.5447761194029851, - "grad_norm": 2.887119770050049, - "learning_rate": 2.3408186469584992e-05, - "loss": 4.8469, - "step": 9855 - }, - { - "epoch": 0.5450525152017689, - "grad_norm": 3.320695161819458, - "learning_rate": 2.3393973848777716e-05, - "loss": 4.9163, - "step": 9860 - }, - { - "epoch": 0.5453289110005528, - "grad_norm": 3.467114210128784, - "learning_rate": 2.337976122797044e-05, - "loss": 5.0402, - "step": 9865 - }, - { - "epoch": 0.5456053067993366, - "grad_norm": 3.7807321548461914, - "learning_rate": 2.3365548607163164e-05, - "loss": 5.4195, - "step": 9870 - }, - { - "epoch": 0.5458817025981205, - "grad_norm": 3.7920944690704346, - "learning_rate": 2.3351335986355885e-05, - "loss": 4.8416, - "step": 9875 - }, - { - "epoch": 0.5461580983969043, - "grad_norm": 3.7439687252044678, - "learning_rate": 2.333712336554861e-05, - "loss": 5.0364, - "step": 9880 - }, - { - "epoch": 0.5464344941956882, - "grad_norm": 3.955336093902588, - "learning_rate": 2.332291074474133e-05, - "loss": 5.5306, - "step": 9885 - }, - { - "epoch": 0.546710889994472, - "grad_norm": 3.5154666900634766, - "learning_rate": 2.3308698123934056e-05, - "loss": 5.1119, - "step": 9890 - }, - { - "epoch": 0.546987285793256, - "grad_norm": 3.323625326156616, - "learning_rate": 2.3294485503126777e-05, - "loss": 5.1608, - "step": 9895 - }, - { - "epoch": 0.5472636815920398, - "grad_norm": 2.7888081073760986, - "learning_rate": 2.32802728823195e-05, - "loss": 5.173, - "step": 9900 - }, - { - "epoch": 0.5475400773908237, - "grad_norm": 3.7210378646850586, - "learning_rate": 2.3266060261512225e-05, - "loss": 5.3936, - "step": 9905 - }, - { - "epoch": 0.5478164731896075, - "grad_norm": 3.4708786010742188, - "learning_rate": 2.3251847640704946e-05, - "loss": 5.1886, - "step": 9910 - }, - { - "epoch": 0.5480928689883914, - "grad_norm": 3.851804494857788, - "learning_rate": 2.323763501989767e-05, - "loss": 5.0378, - "step": 9915 - }, - { - "epoch": 0.5483692647871752, - "grad_norm": 3.7334494590759277, - "learning_rate": 2.3223422399090394e-05, - "loss": 4.8619, - "step": 9920 - }, - { - "epoch": 0.5486456605859591, - "grad_norm": 3.100353479385376, - "learning_rate": 2.3209209778283118e-05, - "loss": 4.8678, - "step": 9925 - }, - { - "epoch": 0.548922056384743, - "grad_norm": 3.700317621231079, - "learning_rate": 2.3194997157475838e-05, - "loss": 5.2288, - "step": 9930 - }, - { - "epoch": 0.5491984521835268, - "grad_norm": 2.94370174407959, - "learning_rate": 2.3180784536668562e-05, - "loss": 5.097, - "step": 9935 - }, - { - "epoch": 0.5494748479823107, - "grad_norm": 2.9802255630493164, - "learning_rate": 2.3166571915861286e-05, - "loss": 5.1708, - "step": 9940 - }, - { - "epoch": 0.5497512437810945, - "grad_norm": 3.3415024280548096, - "learning_rate": 2.315235929505401e-05, - "loss": 5.2635, - "step": 9945 - }, - { - "epoch": 0.5500276395798784, - "grad_norm": 2.9417765140533447, - "learning_rate": 2.3138146674246734e-05, - "loss": 5.2387, - "step": 9950 - }, - { - "epoch": 0.5503040353786622, - "grad_norm": 3.112800359725952, - "learning_rate": 2.3123934053439455e-05, - "loss": 5.0443, - "step": 9955 - }, - { - "epoch": 0.5505804311774462, - "grad_norm": 2.728961944580078, - "learning_rate": 2.310972143263218e-05, - "loss": 5.0209, - "step": 9960 - }, - { - "epoch": 0.55085682697623, - "grad_norm": 3.2710981369018555, - "learning_rate": 2.30955088118249e-05, - "loss": 5.2521, - "step": 9965 - }, - { - "epoch": 0.5511332227750139, - "grad_norm": 3.748812675476074, - "learning_rate": 2.3081296191017623e-05, - "loss": 5.3927, - "step": 9970 - }, - { - "epoch": 0.5514096185737977, - "grad_norm": 3.3356282711029053, - "learning_rate": 2.3067083570210347e-05, - "loss": 5.1706, - "step": 9975 - }, - { - "epoch": 0.5516860143725816, - "grad_norm": 2.5249369144439697, - "learning_rate": 2.305287094940307e-05, - "loss": 5.0159, - "step": 9980 - }, - { - "epoch": 0.5519624101713654, - "grad_norm": 3.26520037651062, - "learning_rate": 2.3038658328595795e-05, - "loss": 5.1818, - "step": 9985 - }, - { - "epoch": 0.5522388059701493, - "grad_norm": 3.965653896331787, - "learning_rate": 2.3024445707788516e-05, - "loss": 4.8768, - "step": 9990 - }, - { - "epoch": 0.5525152017689331, - "grad_norm": 4.425838947296143, - "learning_rate": 2.301023308698124e-05, - "loss": 5.2812, - "step": 9995 - }, - { - "epoch": 0.552791597567717, - "grad_norm": 3.3867270946502686, - "learning_rate": 2.2996020466173964e-05, - "loss": 5.1826, - "step": 10000 - }, - { - "epoch": 0.5530679933665008, - "grad_norm": 3.590095043182373, - "learning_rate": 2.2981807845366688e-05, - "loss": 5.2498, - "step": 10005 - }, - { - "epoch": 0.5533443891652847, - "grad_norm": 3.230135440826416, - "learning_rate": 2.296759522455941e-05, - "loss": 5.0662, - "step": 10010 - }, - { - "epoch": 0.5536207849640685, - "grad_norm": 3.142805337905884, - "learning_rate": 2.2953382603752132e-05, - "loss": 5.1993, - "step": 10015 - }, - { - "epoch": 0.5538971807628524, - "grad_norm": 4.192418098449707, - "learning_rate": 2.2939169982944856e-05, - "loss": 5.1139, - "step": 10020 - }, - { - "epoch": 0.5541735765616362, - "grad_norm": 3.831026315689087, - "learning_rate": 2.2924957362137577e-05, - "loss": 5.1316, - "step": 10025 - }, - { - "epoch": 0.5544499723604202, - "grad_norm": 3.54022216796875, - "learning_rate": 2.2910744741330304e-05, - "loss": 5.0901, - "step": 10030 - }, - { - "epoch": 0.554726368159204, - "grad_norm": 4.166489601135254, - "learning_rate": 2.2896532120523025e-05, - "loss": 5.1592, - "step": 10035 - }, - { - "epoch": 0.5550027639579879, - "grad_norm": 3.5303165912628174, - "learning_rate": 2.288231949971575e-05, - "loss": 4.9975, - "step": 10040 - }, - { - "epoch": 0.5552791597567717, - "grad_norm": 3.065279245376587, - "learning_rate": 2.2868106878908473e-05, - "loss": 5.1, - "step": 10045 - }, - { - "epoch": 0.5555555555555556, - "grad_norm": 4.913681507110596, - "learning_rate": 2.2853894258101193e-05, - "loss": 5.207, - "step": 10050 - }, - { - "epoch": 0.5558319513543394, - "grad_norm": 3.3215198516845703, - "learning_rate": 2.283968163729392e-05, - "loss": 5.0969, - "step": 10055 - }, - { - "epoch": 0.5561083471531233, - "grad_norm": 3.600184440612793, - "learning_rate": 2.282546901648664e-05, - "loss": 4.8922, - "step": 10060 - }, - { - "epoch": 0.5563847429519071, - "grad_norm": 3.041132926940918, - "learning_rate": 2.2811256395679365e-05, - "loss": 5.2081, - "step": 10065 - }, - { - "epoch": 0.556661138750691, - "grad_norm": 3.7934274673461914, - "learning_rate": 2.2797043774872086e-05, - "loss": 5.1229, - "step": 10070 - }, - { - "epoch": 0.5569375345494748, - "grad_norm": 2.82381010055542, - "learning_rate": 2.278283115406481e-05, - "loss": 5.2129, - "step": 10075 - }, - { - "epoch": 0.5572139303482587, - "grad_norm": 3.184391736984253, - "learning_rate": 2.2768618533257534e-05, - "loss": 5.199, - "step": 10080 - }, - { - "epoch": 0.5574903261470425, - "grad_norm": 2.923619031906128, - "learning_rate": 2.2754405912450258e-05, - "loss": 5.2113, - "step": 10085 - }, - { - "epoch": 0.5577667219458264, - "grad_norm": 3.289689540863037, - "learning_rate": 2.2740193291642982e-05, - "loss": 5.3625, - "step": 10090 - }, - { - "epoch": 0.5580431177446102, - "grad_norm": 3.128100633621216, - "learning_rate": 2.2725980670835702e-05, - "loss": 4.9404, - "step": 10095 - }, - { - "epoch": 0.5583195135433942, - "grad_norm": 3.116595983505249, - "learning_rate": 2.2711768050028426e-05, - "loss": 4.6508, - "step": 10100 - }, - { - "epoch": 0.558595909342178, - "grad_norm": 3.509528636932373, - "learning_rate": 2.2697555429221147e-05, - "loss": 5.3815, - "step": 10105 - }, - { - "epoch": 0.5588723051409619, - "grad_norm": 2.830974578857422, - "learning_rate": 2.2683342808413874e-05, - "loss": 5.0021, - "step": 10110 - }, - { - "epoch": 0.5591487009397457, - "grad_norm": 3.5547916889190674, - "learning_rate": 2.2669130187606595e-05, - "loss": 5.1721, - "step": 10115 - }, - { - "epoch": 0.5594250967385296, - "grad_norm": 3.2966182231903076, - "learning_rate": 2.265491756679932e-05, - "loss": 5.1488, - "step": 10120 - }, - { - "epoch": 0.5597014925373134, - "grad_norm": 3.2352070808410645, - "learning_rate": 2.2640704945992043e-05, - "loss": 4.8284, - "step": 10125 - }, - { - "epoch": 0.5599778883360973, - "grad_norm": 3.2427544593811035, - "learning_rate": 2.2626492325184763e-05, - "loss": 5.1183, - "step": 10130 - }, - { - "epoch": 0.5602542841348811, - "grad_norm": 2.5661115646362305, - "learning_rate": 2.2612279704377487e-05, - "loss": 5.1111, - "step": 10135 - }, - { - "epoch": 0.560530679933665, - "grad_norm": 3.292429208755493, - "learning_rate": 2.259806708357021e-05, - "loss": 5.2935, - "step": 10140 - }, - { - "epoch": 0.5608070757324488, - "grad_norm": 3.2697348594665527, - "learning_rate": 2.2583854462762935e-05, - "loss": 5.3207, - "step": 10145 - }, - { - "epoch": 0.5610834715312327, - "grad_norm": 2.9878225326538086, - "learning_rate": 2.256964184195566e-05, - "loss": 4.8464, - "step": 10150 - }, - { - "epoch": 0.5613598673300166, - "grad_norm": 3.4964377880096436, - "learning_rate": 2.255542922114838e-05, - "loss": 5.2375, - "step": 10155 - }, - { - "epoch": 0.5616362631288004, - "grad_norm": 3.2263262271881104, - "learning_rate": 2.2541216600341104e-05, - "loss": 5.1451, - "step": 10160 - }, - { - "epoch": 0.5619126589275844, - "grad_norm": 2.8740015029907227, - "learning_rate": 2.2527003979533828e-05, - "loss": 4.9799, - "step": 10165 - }, - { - "epoch": 0.5621890547263682, - "grad_norm": 3.3548688888549805, - "learning_rate": 2.2512791358726552e-05, - "loss": 5.0609, - "step": 10170 - }, - { - "epoch": 0.5624654505251521, - "grad_norm": 3.348917245864868, - "learning_rate": 2.2498578737919272e-05, - "loss": 5.0375, - "step": 10175 - }, - { - "epoch": 0.5627418463239359, - "grad_norm": 2.9906487464904785, - "learning_rate": 2.2484366117111996e-05, - "loss": 5.109, - "step": 10180 - }, - { - "epoch": 0.5630182421227198, - "grad_norm": 3.7559046745300293, - "learning_rate": 2.247015349630472e-05, - "loss": 4.9875, - "step": 10185 - }, - { - "epoch": 0.5632946379215036, - "grad_norm": 4.169631004333496, - "learning_rate": 2.245594087549744e-05, - "loss": 5.0137, - "step": 10190 - }, - { - "epoch": 0.5635710337202875, - "grad_norm": 3.692657709121704, - "learning_rate": 2.244172825469017e-05, - "loss": 5.1208, - "step": 10195 - }, - { - "epoch": 0.5638474295190713, - "grad_norm": 4.431186199188232, - "learning_rate": 2.242751563388289e-05, - "loss": 5.0208, - "step": 10200 - }, - { - "epoch": 0.5641238253178552, - "grad_norm": 3.045299530029297, - "learning_rate": 2.2413303013075613e-05, - "loss": 4.8395, - "step": 10205 - }, - { - "epoch": 0.564400221116639, - "grad_norm": 2.9593913555145264, - "learning_rate": 2.2399090392268334e-05, - "loss": 5.0236, - "step": 10210 - }, - { - "epoch": 0.5646766169154229, - "grad_norm": 3.3709757328033447, - "learning_rate": 2.2384877771461058e-05, - "loss": 5.3689, - "step": 10215 - }, - { - "epoch": 0.5649530127142067, - "grad_norm": 3.7486398220062256, - "learning_rate": 2.237066515065378e-05, - "loss": 5.1189, - "step": 10220 - }, - { - "epoch": 0.5652294085129906, - "grad_norm": 2.766789674758911, - "learning_rate": 2.2356452529846505e-05, - "loss": 5.1984, - "step": 10225 - }, - { - "epoch": 0.5655058043117744, - "grad_norm": 3.7790632247924805, - "learning_rate": 2.234223990903923e-05, - "loss": 5.0615, - "step": 10230 - }, - { - "epoch": 0.5657822001105584, - "grad_norm": 3.1158392429351807, - "learning_rate": 2.232802728823195e-05, - "loss": 5.3859, - "step": 10235 - }, - { - "epoch": 0.5660585959093422, - "grad_norm": 3.7767012119293213, - "learning_rate": 2.2313814667424674e-05, - "loss": 5.1952, - "step": 10240 - }, - { - "epoch": 0.5663349917081261, - "grad_norm": 3.6504740715026855, - "learning_rate": 2.2299602046617395e-05, - "loss": 4.9556, - "step": 10245 - }, - { - "epoch": 0.5666113875069099, - "grad_norm": 3.328760862350464, - "learning_rate": 2.2285389425810122e-05, - "loss": 5.1943, - "step": 10250 - }, - { - "epoch": 0.5668877833056938, - "grad_norm": 2.9243085384368896, - "learning_rate": 2.2271176805002843e-05, - "loss": 4.821, - "step": 10255 - }, - { - "epoch": 0.5671641791044776, - "grad_norm": 3.004983425140381, - "learning_rate": 2.2256964184195567e-05, - "loss": 5.1676, - "step": 10260 - }, - { - "epoch": 0.5674405749032615, - "grad_norm": 3.309654474258423, - "learning_rate": 2.224275156338829e-05, - "loss": 4.9507, - "step": 10265 - }, - { - "epoch": 0.5677169707020453, - "grad_norm": 2.664963960647583, - "learning_rate": 2.222853894258101e-05, - "loss": 4.8282, - "step": 10270 - }, - { - "epoch": 0.5679933665008292, - "grad_norm": 3.0774271488189697, - "learning_rate": 2.221432632177374e-05, - "loss": 4.9935, - "step": 10275 - }, - { - "epoch": 0.568269762299613, - "grad_norm": 3.3895280361175537, - "learning_rate": 2.220011370096646e-05, - "loss": 5.2864, - "step": 10280 - }, - { - "epoch": 0.5685461580983969, - "grad_norm": 4.060614109039307, - "learning_rate": 2.2185901080159183e-05, - "loss": 5.4247, - "step": 10285 - }, - { - "epoch": 0.5688225538971807, - "grad_norm": 4.643860816955566, - "learning_rate": 2.2171688459351904e-05, - "loss": 5.0794, - "step": 10290 - }, - { - "epoch": 0.5690989496959646, - "grad_norm": 3.1240062713623047, - "learning_rate": 2.2157475838544628e-05, - "loss": 5.1919, - "step": 10295 - }, - { - "epoch": 0.5693753454947484, - "grad_norm": 2.9779624938964844, - "learning_rate": 2.214326321773735e-05, - "loss": 5.066, - "step": 10300 - }, - { - "epoch": 0.5696517412935324, - "grad_norm": 3.8186936378479004, - "learning_rate": 2.2129050596930076e-05, - "loss": 5.2885, - "step": 10305 - }, - { - "epoch": 0.5699281370923162, - "grad_norm": 3.409640073776245, - "learning_rate": 2.21148379761228e-05, - "loss": 4.9745, - "step": 10310 - }, - { - "epoch": 0.5702045328911001, - "grad_norm": 4.826499938964844, - "learning_rate": 2.210062535531552e-05, - "loss": 5.1617, - "step": 10315 - }, - { - "epoch": 0.5704809286898839, - "grad_norm": 3.2550175189971924, - "learning_rate": 2.2086412734508244e-05, - "loss": 5.0947, - "step": 10320 - }, - { - "epoch": 0.5707573244886678, - "grad_norm": 3.287470579147339, - "learning_rate": 2.2072200113700968e-05, - "loss": 4.8948, - "step": 10325 - }, - { - "epoch": 0.5710337202874516, - "grad_norm": 2.8862740993499756, - "learning_rate": 2.2057987492893692e-05, - "loss": 4.8842, - "step": 10330 - }, - { - "epoch": 0.5713101160862355, - "grad_norm": 3.2922773361206055, - "learning_rate": 2.2043774872086416e-05, - "loss": 5.13, - "step": 10335 - }, - { - "epoch": 0.5715865118850193, - "grad_norm": 3.6037707328796387, - "learning_rate": 2.2029562251279137e-05, - "loss": 4.9092, - "step": 10340 - }, - { - "epoch": 0.5718629076838032, - "grad_norm": 3.365394115447998, - "learning_rate": 2.201534963047186e-05, - "loss": 5.3031, - "step": 10345 - }, - { - "epoch": 0.572139303482587, - "grad_norm": 4.060079574584961, - "learning_rate": 2.200113700966458e-05, - "loss": 5.4657, - "step": 10350 - }, - { - "epoch": 0.5724156992813709, - "grad_norm": 3.4800689220428467, - "learning_rate": 2.1986924388857305e-05, - "loss": 5.0092, - "step": 10355 - }, - { - "epoch": 0.5726920950801547, - "grad_norm": 3.4629600048065186, - "learning_rate": 2.197271176805003e-05, - "loss": 5.0127, - "step": 10360 - }, - { - "epoch": 0.5729684908789386, - "grad_norm": 3.4687135219573975, - "learning_rate": 2.1958499147242753e-05, - "loss": 5.2457, - "step": 10365 - }, - { - "epoch": 0.5732448866777226, - "grad_norm": 3.8498566150665283, - "learning_rate": 2.1944286526435477e-05, - "loss": 5.4415, - "step": 10370 - }, - { - "epoch": 0.5735212824765064, - "grad_norm": 3.0555355548858643, - "learning_rate": 2.1930073905628198e-05, - "loss": 4.9743, - "step": 10375 - }, - { - "epoch": 0.5737976782752903, - "grad_norm": 3.5073928833007812, - "learning_rate": 2.1915861284820922e-05, - "loss": 5.0117, - "step": 10380 - }, - { - "epoch": 0.5740740740740741, - "grad_norm": 3.9393508434295654, - "learning_rate": 2.1901648664013646e-05, - "loss": 5.0825, - "step": 10385 - }, - { - "epoch": 0.574350469872858, - "grad_norm": 3.139087438583374, - "learning_rate": 2.188743604320637e-05, - "loss": 4.9374, - "step": 10390 - }, - { - "epoch": 0.5746268656716418, - "grad_norm": 3.968113660812378, - "learning_rate": 2.187322342239909e-05, - "loss": 5.1305, - "step": 10395 - }, - { - "epoch": 0.5749032614704257, - "grad_norm": 3.1973788738250732, - "learning_rate": 2.1859010801591814e-05, - "loss": 4.7387, - "step": 10400 - }, - { - "epoch": 0.5751796572692095, - "grad_norm": 3.473482847213745, - "learning_rate": 2.1844798180784538e-05, - "loss": 4.8958, - "step": 10405 - }, - { - "epoch": 0.5754560530679934, - "grad_norm": 3.418353796005249, - "learning_rate": 2.183058555997726e-05, - "loss": 4.8574, - "step": 10410 - }, - { - "epoch": 0.5757324488667772, - "grad_norm": 2.979325532913208, - "learning_rate": 2.1816372939169986e-05, - "loss": 5.1611, - "step": 10415 - }, - { - "epoch": 0.5760088446655611, - "grad_norm": 3.108954668045044, - "learning_rate": 2.1802160318362707e-05, - "loss": 5.1672, - "step": 10420 - }, - { - "epoch": 0.5762852404643449, - "grad_norm": 2.963421106338501, - "learning_rate": 2.178794769755543e-05, - "loss": 4.4638, - "step": 10425 - }, - { - "epoch": 0.5765616362631288, - "grad_norm": 4.1467461585998535, - "learning_rate": 2.177373507674815e-05, - "loss": 4.8556, - "step": 10430 - }, - { - "epoch": 0.5768380320619126, - "grad_norm": 4.003355503082275, - "learning_rate": 2.1759522455940875e-05, - "loss": 5.0152, - "step": 10435 - }, - { - "epoch": 0.5771144278606966, - "grad_norm": 3.502562999725342, - "learning_rate": 2.17453098351336e-05, - "loss": 5.0242, - "step": 10440 - }, - { - "epoch": 0.5773908236594804, - "grad_norm": 3.727094888687134, - "learning_rate": 2.1731097214326323e-05, - "loss": 5.2192, - "step": 10445 - }, - { - "epoch": 0.5776672194582643, - "grad_norm": 3.4509377479553223, - "learning_rate": 2.1716884593519047e-05, - "loss": 5.3475, - "step": 10450 - }, - { - "epoch": 0.5779436152570481, - "grad_norm": 3.1824676990509033, - "learning_rate": 2.1702671972711768e-05, - "loss": 4.8505, - "step": 10455 - }, - { - "epoch": 0.578220011055832, - "grad_norm": 4.357178211212158, - "learning_rate": 2.1688459351904492e-05, - "loss": 5.2004, - "step": 10460 - }, - { - "epoch": 0.5784964068546158, - "grad_norm": 4.844539642333984, - "learning_rate": 2.1674246731097216e-05, - "loss": 5.1357, - "step": 10465 - }, - { - "epoch": 0.5787728026533997, - "grad_norm": 3.694406509399414, - "learning_rate": 2.166003411028994e-05, - "loss": 4.9931, - "step": 10470 - }, - { - "epoch": 0.5790491984521835, - "grad_norm": 3.6863090991973877, - "learning_rate": 2.1645821489482664e-05, - "loss": 5.4604, - "step": 10475 - }, - { - "epoch": 0.5793255942509674, - "grad_norm": 3.620305299758911, - "learning_rate": 2.1631608868675384e-05, - "loss": 4.958, - "step": 10480 - }, - { - "epoch": 0.5796019900497512, - "grad_norm": 3.349443197250366, - "learning_rate": 2.161739624786811e-05, - "loss": 5.0294, - "step": 10485 - }, - { - "epoch": 0.5798783858485351, - "grad_norm": 3.268993854522705, - "learning_rate": 2.160318362706083e-05, - "loss": 5.0303, - "step": 10490 - }, - { - "epoch": 0.5801547816473189, - "grad_norm": 3.0426461696624756, - "learning_rate": 2.1588971006253556e-05, - "loss": 5.0315, - "step": 10495 - }, - { - "epoch": 0.5804311774461028, - "grad_norm": 3.8381333351135254, - "learning_rate": 2.1574758385446277e-05, - "loss": 5.1518, - "step": 10500 - }, - { - "epoch": 0.5807075732448866, - "grad_norm": 4.0485687255859375, - "learning_rate": 2.1560545764639e-05, - "loss": 5.1779, - "step": 10505 - }, - { - "epoch": 0.5809839690436706, - "grad_norm": 3.4191527366638184, - "learning_rate": 2.1546333143831725e-05, - "loss": 5.2983, - "step": 10510 - }, - { - "epoch": 0.5812603648424544, - "grad_norm": 3.35133695602417, - "learning_rate": 2.1532120523024445e-05, - "loss": 5.1839, - "step": 10515 - }, - { - "epoch": 0.5815367606412383, - "grad_norm": 2.7311925888061523, - "learning_rate": 2.151790790221717e-05, - "loss": 4.8678, - "step": 10520 - }, - { - "epoch": 0.5818131564400221, - "grad_norm": 4.017518043518066, - "learning_rate": 2.1503695281409893e-05, - "loss": 4.8128, - "step": 10525 - }, - { - "epoch": 0.582089552238806, - "grad_norm": 3.112291097640991, - "learning_rate": 2.1489482660602617e-05, - "loss": 5.3122, - "step": 10530 - }, - { - "epoch": 0.5823659480375898, - "grad_norm": 3.437387704849243, - "learning_rate": 2.1475270039795338e-05, - "loss": 4.9359, - "step": 10535 - }, - { - "epoch": 0.5826423438363737, - "grad_norm": 3.4737443923950195, - "learning_rate": 2.1461057418988062e-05, - "loss": 5.0828, - "step": 10540 - }, - { - "epoch": 0.5829187396351575, - "grad_norm": 4.825445175170898, - "learning_rate": 2.1446844798180786e-05, - "loss": 5.0617, - "step": 10545 - }, - { - "epoch": 0.5831951354339414, - "grad_norm": 2.9588332176208496, - "learning_rate": 2.143263217737351e-05, - "loss": 5.124, - "step": 10550 - }, - { - "epoch": 0.5834715312327252, - "grad_norm": 3.551666736602783, - "learning_rate": 2.1418419556566234e-05, - "loss": 5.1687, - "step": 10555 - }, - { - "epoch": 0.5837479270315091, - "grad_norm": 3.1426239013671875, - "learning_rate": 2.1404206935758954e-05, - "loss": 4.7921, - "step": 10560 - }, - { - "epoch": 0.5840243228302929, - "grad_norm": 3.088836193084717, - "learning_rate": 2.138999431495168e-05, - "loss": 5.3397, - "step": 10565 - }, - { - "epoch": 0.5843007186290768, - "grad_norm": 3.6306614875793457, - "learning_rate": 2.13757816941444e-05, - "loss": 5.3874, - "step": 10570 - }, - { - "epoch": 0.5845771144278606, - "grad_norm": 4.261897563934326, - "learning_rate": 2.1361569073337123e-05, - "loss": 5.0643, - "step": 10575 - }, - { - "epoch": 0.5848535102266446, - "grad_norm": 3.7982075214385986, - "learning_rate": 2.1347356452529847e-05, - "loss": 4.9988, - "step": 10580 - }, - { - "epoch": 0.5851299060254284, - "grad_norm": 3.575862407684326, - "learning_rate": 2.133314383172257e-05, - "loss": 5.3502, - "step": 10585 - }, - { - "epoch": 0.5854063018242123, - "grad_norm": 3.2091081142425537, - "learning_rate": 2.1318931210915295e-05, - "loss": 4.929, - "step": 10590 - }, - { - "epoch": 0.5856826976229962, - "grad_norm": 3.6131644248962402, - "learning_rate": 2.1304718590108016e-05, - "loss": 4.9555, - "step": 10595 - }, - { - "epoch": 0.58595909342178, - "grad_norm": 3.6742172241210938, - "learning_rate": 2.129050596930074e-05, - "loss": 5.0453, - "step": 10600 - }, - { - "epoch": 0.5862354892205639, - "grad_norm": 3.404371738433838, - "learning_rate": 2.1276293348493464e-05, - "loss": 4.9043, - "step": 10605 - }, - { - "epoch": 0.5865118850193477, - "grad_norm": 3.1416611671447754, - "learning_rate": 2.1262080727686187e-05, - "loss": 5.0373, - "step": 10610 - }, - { - "epoch": 0.5867882808181316, - "grad_norm": 3.7025134563446045, - "learning_rate": 2.124786810687891e-05, - "loss": 4.9599, - "step": 10615 - }, - { - "epoch": 0.5870646766169154, - "grad_norm": 3.171035051345825, - "learning_rate": 2.1233655486071632e-05, - "loss": 5.4027, - "step": 10620 - }, - { - "epoch": 0.5873410724156993, - "grad_norm": 3.402039051055908, - "learning_rate": 2.1219442865264356e-05, - "loss": 5.0835, - "step": 10625 - }, - { - "epoch": 0.5876174682144831, - "grad_norm": 3.2764732837677, - "learning_rate": 2.1205230244457077e-05, - "loss": 4.7431, - "step": 10630 - }, - { - "epoch": 0.587893864013267, - "grad_norm": 3.389270305633545, - "learning_rate": 2.1191017623649804e-05, - "loss": 5.0233, - "step": 10635 - }, - { - "epoch": 0.5881702598120508, - "grad_norm": 3.1218948364257812, - "learning_rate": 2.1176805002842525e-05, - "loss": 5.0945, - "step": 10640 - }, - { - "epoch": 0.5884466556108348, - "grad_norm": 3.973499298095703, - "learning_rate": 2.116259238203525e-05, - "loss": 4.7494, - "step": 10645 - }, - { - "epoch": 0.5887230514096186, - "grad_norm": 3.017624616622925, - "learning_rate": 2.1148379761227973e-05, - "loss": 4.8494, - "step": 10650 - }, - { - "epoch": 0.5889994472084025, - "grad_norm": 4.036647796630859, - "learning_rate": 2.1134167140420693e-05, - "loss": 5.053, - "step": 10655 - }, - { - "epoch": 0.5892758430071863, - "grad_norm": 3.0895462036132812, - "learning_rate": 2.111995451961342e-05, - "loss": 4.7882, - "step": 10660 - }, - { - "epoch": 0.5895522388059702, - "grad_norm": 3.1549055576324463, - "learning_rate": 2.110574189880614e-05, - "loss": 4.8596, - "step": 10665 - }, - { - "epoch": 0.589828634604754, - "grad_norm": 3.222073793411255, - "learning_rate": 2.1091529277998865e-05, - "loss": 5.1663, - "step": 10670 - }, - { - "epoch": 0.5901050304035379, - "grad_norm": 3.191425323486328, - "learning_rate": 2.1077316657191586e-05, - "loss": 5.1247, - "step": 10675 - }, - { - "epoch": 0.5903814262023217, - "grad_norm": 3.3451499938964844, - "learning_rate": 2.106310403638431e-05, - "loss": 5.0468, - "step": 10680 - }, - { - "epoch": 0.5906578220011056, - "grad_norm": 3.7087557315826416, - "learning_rate": 2.1048891415577034e-05, - "loss": 5.2902, - "step": 10685 - }, - { - "epoch": 0.5909342177998894, - "grad_norm": 3.4653618335723877, - "learning_rate": 2.1034678794769758e-05, - "loss": 4.8757, - "step": 10690 - }, - { - "epoch": 0.5912106135986733, - "grad_norm": 3.368715763092041, - "learning_rate": 2.102046617396248e-05, - "loss": 5.0481, - "step": 10695 - }, - { - "epoch": 0.5914870093974571, - "grad_norm": 3.7154407501220703, - "learning_rate": 2.1006253553155202e-05, - "loss": 4.8707, - "step": 10700 - }, - { - "epoch": 0.591763405196241, - "grad_norm": 2.9506330490112305, - "learning_rate": 2.0992040932347926e-05, - "loss": 4.9962, - "step": 10705 - }, - { - "epoch": 0.5920398009950248, - "grad_norm": 2.9487926959991455, - "learning_rate": 2.0977828311540647e-05, - "loss": 4.864, - "step": 10710 - }, - { - "epoch": 0.5923161967938088, - "grad_norm": 2.97090482711792, - "learning_rate": 2.0963615690733374e-05, - "loss": 4.7697, - "step": 10715 - }, - { - "epoch": 0.5925925925925926, - "grad_norm": 3.8541948795318604, - "learning_rate": 2.0949403069926095e-05, - "loss": 5.1119, - "step": 10720 - }, - { - "epoch": 0.5928689883913765, - "grad_norm": 3.0364882946014404, - "learning_rate": 2.093519044911882e-05, - "loss": 5.2197, - "step": 10725 - }, - { - "epoch": 0.5931453841901603, - "grad_norm": 3.167233943939209, - "learning_rate": 2.0920977828311543e-05, - "loss": 5.1334, - "step": 10730 - }, - { - "epoch": 0.5934217799889442, - "grad_norm": 2.8649866580963135, - "learning_rate": 2.0906765207504263e-05, - "loss": 4.886, - "step": 10735 - }, - { - "epoch": 0.593698175787728, - "grad_norm": 4.133547782897949, - "learning_rate": 2.0892552586696987e-05, - "loss": 5.1874, - "step": 10740 - }, - { - "epoch": 0.5939745715865119, - "grad_norm": 3.462965488433838, - "learning_rate": 2.087833996588971e-05, - "loss": 5.0722, - "step": 10745 - }, - { - "epoch": 0.5942509673852957, - "grad_norm": 3.6210412979125977, - "learning_rate": 2.0864127345082435e-05, - "loss": 4.7343, - "step": 10750 - }, - { - "epoch": 0.5945273631840796, - "grad_norm": 3.1893310546875, - "learning_rate": 2.0849914724275156e-05, - "loss": 4.9851, - "step": 10755 - }, - { - "epoch": 0.5948037589828634, - "grad_norm": 2.8404297828674316, - "learning_rate": 2.083570210346788e-05, - "loss": 5.1774, - "step": 10760 - }, - { - "epoch": 0.5950801547816473, - "grad_norm": 3.4504430294036865, - "learning_rate": 2.0821489482660604e-05, - "loss": 5.1506, - "step": 10765 - }, - { - "epoch": 0.5953565505804311, - "grad_norm": 3.756342649459839, - "learning_rate": 2.0807276861853324e-05, - "loss": 5.1785, - "step": 10770 - }, - { - "epoch": 0.595632946379215, - "grad_norm": 2.698963165283203, - "learning_rate": 2.0793064241046052e-05, - "loss": 5.1673, - "step": 10775 - }, - { - "epoch": 0.5959093421779988, - "grad_norm": 3.2271690368652344, - "learning_rate": 2.0778851620238772e-05, - "loss": 5.037, - "step": 10780 - }, - { - "epoch": 0.5961857379767828, - "grad_norm": 3.4815831184387207, - "learning_rate": 2.0764638999431496e-05, - "loss": 4.9544, - "step": 10785 - }, - { - "epoch": 0.5964621337755666, - "grad_norm": 3.6066627502441406, - "learning_rate": 2.075042637862422e-05, - "loss": 5.0425, - "step": 10790 - }, - { - "epoch": 0.5967385295743505, - "grad_norm": 3.649170160293579, - "learning_rate": 2.073621375781694e-05, - "loss": 5.303, - "step": 10795 - }, - { - "epoch": 0.5970149253731343, - "grad_norm": 3.320713996887207, - "learning_rate": 2.0722001137009668e-05, - "loss": 4.8067, - "step": 10800 - }, - { - "epoch": 0.5972913211719182, - "grad_norm": 2.9411003589630127, - "learning_rate": 2.070778851620239e-05, - "loss": 5.1317, - "step": 10805 - }, - { - "epoch": 0.5975677169707021, - "grad_norm": 3.370591878890991, - "learning_rate": 2.0693575895395113e-05, - "loss": 5.191, - "step": 10810 - }, - { - "epoch": 0.5978441127694859, - "grad_norm": 3.282804250717163, - "learning_rate": 2.0679363274587833e-05, - "loss": 5.2755, - "step": 10815 - }, - { - "epoch": 0.5981205085682698, - "grad_norm": 3.2764978408813477, - "learning_rate": 2.0665150653780557e-05, - "loss": 5.0115, - "step": 10820 - }, - { - "epoch": 0.5983969043670536, - "grad_norm": 3.380523920059204, - "learning_rate": 2.065093803297328e-05, - "loss": 4.9544, - "step": 10825 - }, - { - "epoch": 0.5986733001658375, - "grad_norm": 3.9343714714050293, - "learning_rate": 2.0636725412166005e-05, - "loss": 4.836, - "step": 10830 - }, - { - "epoch": 0.5989496959646213, - "grad_norm": 2.9544639587402344, - "learning_rate": 2.062251279135873e-05, - "loss": 4.939, - "step": 10835 - }, - { - "epoch": 0.5992260917634052, - "grad_norm": 3.6150293350219727, - "learning_rate": 2.060830017055145e-05, - "loss": 5.26, - "step": 10840 - }, - { - "epoch": 0.599502487562189, - "grad_norm": 3.480836868286133, - "learning_rate": 2.0594087549744174e-05, - "loss": 5.2313, - "step": 10845 - }, - { - "epoch": 0.599778883360973, - "grad_norm": 3.0056374073028564, - "learning_rate": 2.0579874928936894e-05, - "loss": 4.9642, - "step": 10850 - }, - { - "epoch": 0.6000552791597568, - "grad_norm": 3.1670284271240234, - "learning_rate": 2.0565662308129622e-05, - "loss": 5.4859, - "step": 10855 - }, - { - "epoch": 0.6003316749585407, - "grad_norm": 3.991672992706299, - "learning_rate": 2.0551449687322342e-05, - "loss": 5.167, - "step": 10860 - }, - { - "epoch": 0.6006080707573245, - "grad_norm": 3.690068244934082, - "learning_rate": 2.0537237066515066e-05, - "loss": 5.0009, - "step": 10865 - }, - { - "epoch": 0.6008844665561084, - "grad_norm": 3.5538177490234375, - "learning_rate": 2.052302444570779e-05, - "loss": 4.9969, - "step": 10870 - }, - { - "epoch": 0.6011608623548922, - "grad_norm": 3.6092166900634766, - "learning_rate": 2.050881182490051e-05, - "loss": 5.2605, - "step": 10875 - }, - { - "epoch": 0.6014372581536761, - "grad_norm": 3.576063871383667, - "learning_rate": 2.0494599204093235e-05, - "loss": 5.2605, - "step": 10880 - }, - { - "epoch": 0.6017136539524599, - "grad_norm": 3.104238271713257, - "learning_rate": 2.048038658328596e-05, - "loss": 4.956, - "step": 10885 - }, - { - "epoch": 0.6019900497512438, - "grad_norm": 2.788604974746704, - "learning_rate": 2.0466173962478683e-05, - "loss": 5.0503, - "step": 10890 - }, - { - "epoch": 0.6022664455500276, - "grad_norm": 3.3958215713500977, - "learning_rate": 2.0451961341671403e-05, - "loss": 5.4058, - "step": 10895 - }, - { - "epoch": 0.6025428413488115, - "grad_norm": 3.638075113296509, - "learning_rate": 2.0437748720864127e-05, - "loss": 5.0203, - "step": 10900 - }, - { - "epoch": 0.6028192371475953, - "grad_norm": 3.6704213619232178, - "learning_rate": 2.042353610005685e-05, - "loss": 5.0472, - "step": 10905 - }, - { - "epoch": 0.6030956329463792, - "grad_norm": 4.042377471923828, - "learning_rate": 2.0409323479249575e-05, - "loss": 4.8711, - "step": 10910 - }, - { - "epoch": 0.603372028745163, - "grad_norm": 3.991793632507324, - "learning_rate": 2.03951108584423e-05, - "loss": 5.3127, - "step": 10915 - }, - { - "epoch": 0.603648424543947, - "grad_norm": 3.0052876472473145, - "learning_rate": 2.038089823763502e-05, - "loss": 5.3358, - "step": 10920 - }, - { - "epoch": 0.6039248203427308, - "grad_norm": 3.848688840866089, - "learning_rate": 2.0366685616827744e-05, - "loss": 5.1195, - "step": 10925 - }, - { - "epoch": 0.6042012161415147, - "grad_norm": 3.1843132972717285, - "learning_rate": 2.0352472996020468e-05, - "loss": 5.0706, - "step": 10930 - }, - { - "epoch": 0.6044776119402985, - "grad_norm": 3.1511127948760986, - "learning_rate": 2.033826037521319e-05, - "loss": 5.0114, - "step": 10935 - }, - { - "epoch": 0.6047540077390824, - "grad_norm": 3.199019432067871, - "learning_rate": 2.0324047754405916e-05, - "loss": 4.9726, - "step": 10940 - }, - { - "epoch": 0.6050304035378662, - "grad_norm": 3.4878625869750977, - "learning_rate": 2.0309835133598637e-05, - "loss": 5.249, - "step": 10945 - }, - { - "epoch": 0.6053067993366501, - "grad_norm": 2.9809420108795166, - "learning_rate": 2.029562251279136e-05, - "loss": 5.2818, - "step": 10950 - }, - { - "epoch": 0.6055831951354339, - "grad_norm": 3.647505521774292, - "learning_rate": 2.028140989198408e-05, - "loss": 5.1207, - "step": 10955 - }, - { - "epoch": 0.6058595909342178, - "grad_norm": 3.185030698776245, - "learning_rate": 2.0267197271176805e-05, - "loss": 5.0107, - "step": 10960 - }, - { - "epoch": 0.6061359867330016, - "grad_norm": 4.0943803787231445, - "learning_rate": 2.025298465036953e-05, - "loss": 5.0815, - "step": 10965 - }, - { - "epoch": 0.6064123825317855, - "grad_norm": 2.887913703918457, - "learning_rate": 2.0238772029562253e-05, - "loss": 4.8405, - "step": 10970 - }, - { - "epoch": 0.6066887783305693, - "grad_norm": 3.262972831726074, - "learning_rate": 2.0224559408754977e-05, - "loss": 4.8914, - "step": 10975 - }, - { - "epoch": 0.6069651741293532, - "grad_norm": 4.215073585510254, - "learning_rate": 2.0210346787947698e-05, - "loss": 4.9772, - "step": 10980 - }, - { - "epoch": 0.607241569928137, - "grad_norm": 3.0118231773376465, - "learning_rate": 2.019613416714042e-05, - "loss": 4.9951, - "step": 10985 - }, - { - "epoch": 0.607517965726921, - "grad_norm": 4.327718257904053, - "learning_rate": 2.0181921546333142e-05, - "loss": 5.2855, - "step": 10990 - }, - { - "epoch": 0.6077943615257048, - "grad_norm": 3.5684874057769775, - "learning_rate": 2.016770892552587e-05, - "loss": 5.302, - "step": 10995 - }, - { - "epoch": 0.6080707573244887, - "grad_norm": 3.2276368141174316, - "learning_rate": 2.015349630471859e-05, - "loss": 5.1272, - "step": 11000 - }, - { - "epoch": 0.6083471531232725, - "grad_norm": 3.3742012977600098, - "learning_rate": 2.0139283683911314e-05, - "loss": 5.1438, - "step": 11005 - }, - { - "epoch": 0.6086235489220564, - "grad_norm": 3.5114636421203613, - "learning_rate": 2.0125071063104038e-05, - "loss": 4.9084, - "step": 11010 - }, - { - "epoch": 0.6088999447208402, - "grad_norm": 3.3797311782836914, - "learning_rate": 2.011085844229676e-05, - "loss": 4.9402, - "step": 11015 - }, - { - "epoch": 0.6091763405196241, - "grad_norm": 3.398404121398926, - "learning_rate": 2.0096645821489486e-05, - "loss": 4.8323, - "step": 11020 - }, - { - "epoch": 0.6094527363184079, - "grad_norm": 3.7108492851257324, - "learning_rate": 2.0082433200682207e-05, - "loss": 4.7783, - "step": 11025 - }, - { - "epoch": 0.6097291321171918, - "grad_norm": 3.121354341506958, - "learning_rate": 2.006822057987493e-05, - "loss": 5.045, - "step": 11030 - }, - { - "epoch": 0.6100055279159757, - "grad_norm": 4.430671691894531, - "learning_rate": 2.005400795906765e-05, - "loss": 5.0018, - "step": 11035 - }, - { - "epoch": 0.6102819237147595, - "grad_norm": 3.9316344261169434, - "learning_rate": 2.0039795338260375e-05, - "loss": 5.2804, - "step": 11040 - }, - { - "epoch": 0.6105583195135434, - "grad_norm": 3.610534906387329, - "learning_rate": 2.00255827174531e-05, - "loss": 5.0137, - "step": 11045 - }, - { - "epoch": 0.6108347153123272, - "grad_norm": 3.248753786087036, - "learning_rate": 2.0011370096645823e-05, - "loss": 4.9726, - "step": 11050 - }, - { - "epoch": 0.6111111111111112, - "grad_norm": 4.037750244140625, - "learning_rate": 1.9997157475838547e-05, - "loss": 5.3961, - "step": 11055 - }, - { - "epoch": 0.611387506909895, - "grad_norm": 3.1049892902374268, - "learning_rate": 1.9982944855031268e-05, - "loss": 5.026, - "step": 11060 - }, - { - "epoch": 0.6116639027086789, - "grad_norm": 3.5560545921325684, - "learning_rate": 1.996873223422399e-05, - "loss": 5.3866, - "step": 11065 - }, - { - "epoch": 0.6119402985074627, - "grad_norm": 3.1912312507629395, - "learning_rate": 1.9954519613416712e-05, - "loss": 5.1017, - "step": 11070 - }, - { - "epoch": 0.6122166943062466, - "grad_norm": 3.402047634124756, - "learning_rate": 1.994030699260944e-05, - "loss": 5.0437, - "step": 11075 - }, - { - "epoch": 0.6124930901050304, - "grad_norm": 4.292905807495117, - "learning_rate": 1.9926094371802164e-05, - "loss": 5.2194, - "step": 11080 - }, - { - "epoch": 0.6127694859038143, - "grad_norm": 3.1720829010009766, - "learning_rate": 1.9911881750994884e-05, - "loss": 4.989, - "step": 11085 - }, - { - "epoch": 0.6130458817025981, - "grad_norm": 3.152600049972534, - "learning_rate": 1.9897669130187608e-05, - "loss": 5.4581, - "step": 11090 - }, - { - "epoch": 0.613322277501382, - "grad_norm": 3.7568509578704834, - "learning_rate": 1.988345650938033e-05, - "loss": 5.183, - "step": 11095 - }, - { - "epoch": 0.6135986733001658, - "grad_norm": 3.13639497756958, - "learning_rate": 1.9869243888573053e-05, - "loss": 5.2017, - "step": 11100 - }, - { - "epoch": 0.6138750690989497, - "grad_norm": 3.1426844596862793, - "learning_rate": 1.9855031267765777e-05, - "loss": 4.9344, - "step": 11105 - }, - { - "epoch": 0.6141514648977335, - "grad_norm": 3.382380723953247, - "learning_rate": 1.98408186469585e-05, - "loss": 5.0299, - "step": 11110 - }, - { - "epoch": 0.6144278606965174, - "grad_norm": 3.4279749393463135, - "learning_rate": 1.9826606026151225e-05, - "loss": 5.0376, - "step": 11115 - }, - { - "epoch": 0.6147042564953012, - "grad_norm": 2.9934186935424805, - "learning_rate": 1.9812393405343945e-05, - "loss": 4.8845, - "step": 11120 - }, - { - "epoch": 0.6149806522940852, - "grad_norm": 3.1613216400146484, - "learning_rate": 1.979818078453667e-05, - "loss": 5.2844, - "step": 11125 - }, - { - "epoch": 0.615257048092869, - "grad_norm": 2.9240121841430664, - "learning_rate": 1.9783968163729393e-05, - "loss": 5.0547, - "step": 11130 - }, - { - "epoch": 0.6155334438916529, - "grad_norm": 3.2536087036132812, - "learning_rate": 1.9769755542922117e-05, - "loss": 4.9044, - "step": 11135 - }, - { - "epoch": 0.6158098396904367, - "grad_norm": 3.3475849628448486, - "learning_rate": 1.9755542922114838e-05, - "loss": 5.041, - "step": 11140 - }, - { - "epoch": 0.6160862354892206, - "grad_norm": 3.322903871536255, - "learning_rate": 1.9741330301307562e-05, - "loss": 4.9652, - "step": 11145 - }, - { - "epoch": 0.6163626312880044, - "grad_norm": 3.3216490745544434, - "learning_rate": 1.9727117680500286e-05, - "loss": 4.9637, - "step": 11150 - }, - { - "epoch": 0.6166390270867883, - "grad_norm": 3.5304057598114014, - "learning_rate": 1.9712905059693006e-05, - "loss": 5.2, - "step": 11155 - }, - { - "epoch": 0.6169154228855721, - "grad_norm": 3.0232765674591064, - "learning_rate": 1.9698692438885734e-05, - "loss": 4.9661, - "step": 11160 - }, - { - "epoch": 0.617191818684356, - "grad_norm": 3.3707964420318604, - "learning_rate": 1.9684479818078454e-05, - "loss": 5.2281, - "step": 11165 - }, - { - "epoch": 0.6174682144831398, - "grad_norm": 3.333315134048462, - "learning_rate": 1.9670267197271178e-05, - "loss": 5.0546, - "step": 11170 - }, - { - "epoch": 0.6177446102819237, - "grad_norm": 3.7123754024505615, - "learning_rate": 1.96560545764639e-05, - "loss": 4.9796, - "step": 11175 - }, - { - "epoch": 0.6180210060807075, - "grad_norm": 3.348001480102539, - "learning_rate": 1.9641841955656623e-05, - "loss": 5.06, - "step": 11180 - }, - { - "epoch": 0.6182974018794914, - "grad_norm": 3.643882989883423, - "learning_rate": 1.9627629334849347e-05, - "loss": 4.9581, - "step": 11185 - }, - { - "epoch": 0.6185737976782753, - "grad_norm": 4.164022922515869, - "learning_rate": 1.961341671404207e-05, - "loss": 5.1829, - "step": 11190 - }, - { - "epoch": 0.6188501934770592, - "grad_norm": 2.8525750637054443, - "learning_rate": 1.9599204093234795e-05, - "loss": 5.1341, - "step": 11195 - }, - { - "epoch": 0.619126589275843, - "grad_norm": 3.13280987739563, - "learning_rate": 1.9584991472427515e-05, - "loss": 4.6817, - "step": 11200 - }, - { - "epoch": 0.6194029850746269, - "grad_norm": 3.3129093647003174, - "learning_rate": 1.957077885162024e-05, - "loss": 4.7727, - "step": 11205 - }, - { - "epoch": 0.6196793808734107, - "grad_norm": 3.110025405883789, - "learning_rate": 1.955656623081296e-05, - "loss": 5.0595, - "step": 11210 - }, - { - "epoch": 0.6199557766721946, - "grad_norm": 3.4862754344940186, - "learning_rate": 1.9542353610005687e-05, - "loss": 4.9615, - "step": 11215 - }, - { - "epoch": 0.6202321724709784, - "grad_norm": 2.9289820194244385, - "learning_rate": 1.9528140989198408e-05, - "loss": 5.0799, - "step": 11220 - }, - { - "epoch": 0.6205085682697623, - "grad_norm": 3.868337631225586, - "learning_rate": 1.9513928368391132e-05, - "loss": 5.0724, - "step": 11225 - }, - { - "epoch": 0.6207849640685461, - "grad_norm": 3.828287124633789, - "learning_rate": 1.9499715747583856e-05, - "loss": 4.9354, - "step": 11230 - }, - { - "epoch": 0.62106135986733, - "grad_norm": 3.591191291809082, - "learning_rate": 1.9485503126776576e-05, - "loss": 5.0352, - "step": 11235 - }, - { - "epoch": 0.6213377556661138, - "grad_norm": 3.2487423419952393, - "learning_rate": 1.9471290505969304e-05, - "loss": 4.6987, - "step": 11240 - }, - { - "epoch": 0.6216141514648977, - "grad_norm": 3.1650702953338623, - "learning_rate": 1.9457077885162024e-05, - "loss": 5.0245, - "step": 11245 - }, - { - "epoch": 0.6218905472636815, - "grad_norm": 3.4682087898254395, - "learning_rate": 1.944286526435475e-05, - "loss": 4.9166, - "step": 11250 - }, - { - "epoch": 0.6221669430624654, - "grad_norm": 3.3193416595458984, - "learning_rate": 1.9428652643547472e-05, - "loss": 5.1592, - "step": 11255 - }, - { - "epoch": 0.6224433388612494, - "grad_norm": 3.075951337814331, - "learning_rate": 1.9414440022740193e-05, - "loss": 5.0866, - "step": 11260 - }, - { - "epoch": 0.6227197346600332, - "grad_norm": 3.391075372695923, - "learning_rate": 1.9400227401932917e-05, - "loss": 5.1489, - "step": 11265 - }, - { - "epoch": 0.6229961304588171, - "grad_norm": 3.094604015350342, - "learning_rate": 1.938601478112564e-05, - "loss": 5.0992, - "step": 11270 - }, - { - "epoch": 0.6232725262576009, - "grad_norm": 3.3130276203155518, - "learning_rate": 1.9371802160318365e-05, - "loss": 4.941, - "step": 11275 - }, - { - "epoch": 0.6235489220563848, - "grad_norm": 4.256862640380859, - "learning_rate": 1.9357589539511086e-05, - "loss": 5.0641, - "step": 11280 - }, - { - "epoch": 0.6238253178551686, - "grad_norm": 3.174084186553955, - "learning_rate": 1.934337691870381e-05, - "loss": 5.2618, - "step": 11285 - }, - { - "epoch": 0.6241017136539525, - "grad_norm": 4.607797145843506, - "learning_rate": 1.9329164297896533e-05, - "loss": 4.6887, - "step": 11290 - }, - { - "epoch": 0.6243781094527363, - "grad_norm": 3.27817964553833, - "learning_rate": 1.9314951677089257e-05, - "loss": 5.25, - "step": 11295 - }, - { - "epoch": 0.6246545052515202, - "grad_norm": 3.901153564453125, - "learning_rate": 1.930073905628198e-05, - "loss": 5.1067, - "step": 11300 - }, - { - "epoch": 0.624930901050304, - "grad_norm": 4.034906387329102, - "learning_rate": 1.9286526435474702e-05, - "loss": 5.0124, - "step": 11305 - }, - { - "epoch": 0.6252072968490879, - "grad_norm": 3.6741175651550293, - "learning_rate": 1.9272313814667426e-05, - "loss": 5.1274, - "step": 11310 - }, - { - "epoch": 0.6254836926478717, - "grad_norm": 3.136146068572998, - "learning_rate": 1.9258101193860147e-05, - "loss": 4.9906, - "step": 11315 - }, - { - "epoch": 0.6257600884466556, - "grad_norm": 2.70194673538208, - "learning_rate": 1.924388857305287e-05, - "loss": 4.8283, - "step": 11320 - }, - { - "epoch": 0.6260364842454395, - "grad_norm": 3.145467758178711, - "learning_rate": 1.9229675952245595e-05, - "loss": 5.2228, - "step": 11325 - }, - { - "epoch": 0.6263128800442234, - "grad_norm": 3.2286715507507324, - "learning_rate": 1.921546333143832e-05, - "loss": 4.9371, - "step": 11330 - }, - { - "epoch": 0.6265892758430072, - "grad_norm": 3.4829323291778564, - "learning_rate": 1.9201250710631043e-05, - "loss": 4.8272, - "step": 11335 - }, - { - "epoch": 0.6268656716417911, - "grad_norm": 3.874124050140381, - "learning_rate": 1.9187038089823763e-05, - "loss": 5.1033, - "step": 11340 - }, - { - "epoch": 0.6271420674405749, - "grad_norm": 3.66743540763855, - "learning_rate": 1.9172825469016487e-05, - "loss": 5.012, - "step": 11345 - }, - { - "epoch": 0.6274184632393588, - "grad_norm": 3.3614590167999268, - "learning_rate": 1.915861284820921e-05, - "loss": 5.2151, - "step": 11350 - }, - { - "epoch": 0.6276948590381426, - "grad_norm": 3.108370780944824, - "learning_rate": 1.9144400227401935e-05, - "loss": 4.9544, - "step": 11355 - }, - { - "epoch": 0.6279712548369265, - "grad_norm": 4.2630391120910645, - "learning_rate": 1.9130187606594656e-05, - "loss": 4.6829, - "step": 11360 - }, - { - "epoch": 0.6282476506357103, - "grad_norm": 4.369514465332031, - "learning_rate": 1.911597498578738e-05, - "loss": 5.0551, - "step": 11365 - }, - { - "epoch": 0.6285240464344942, - "grad_norm": 3.091458559036255, - "learning_rate": 1.9101762364980104e-05, - "loss": 5.2453, - "step": 11370 - }, - { - "epoch": 0.628800442233278, - "grad_norm": 3.507791757583618, - "learning_rate": 1.9087549744172824e-05, - "loss": 4.803, - "step": 11375 - }, - { - "epoch": 0.6290768380320619, - "grad_norm": 3.9560585021972656, - "learning_rate": 1.907333712336555e-05, - "loss": 4.9198, - "step": 11380 - }, - { - "epoch": 0.6293532338308457, - "grad_norm": 3.2745492458343506, - "learning_rate": 1.9059124502558272e-05, - "loss": 5.143, - "step": 11385 - }, - { - "epoch": 0.6296296296296297, - "grad_norm": 3.749288558959961, - "learning_rate": 1.9044911881750996e-05, - "loss": 5.0508, - "step": 11390 - }, - { - "epoch": 0.6299060254284135, - "grad_norm": 3.2741405963897705, - "learning_rate": 1.903069926094372e-05, - "loss": 4.849, - "step": 11395 - }, - { - "epoch": 0.6301824212271974, - "grad_norm": 3.3491995334625244, - "learning_rate": 1.901648664013644e-05, - "loss": 5.0678, - "step": 11400 - }, - { - "epoch": 0.6304588170259812, - "grad_norm": 3.6836395263671875, - "learning_rate": 1.9002274019329168e-05, - "loss": 5.2173, - "step": 11405 - }, - { - "epoch": 0.6307352128247651, - "grad_norm": 3.4828968048095703, - "learning_rate": 1.898806139852189e-05, - "loss": 4.8904, - "step": 11410 - }, - { - "epoch": 0.6310116086235489, - "grad_norm": 3.472628355026245, - "learning_rate": 1.8973848777714613e-05, - "loss": 5.1827, - "step": 11415 - }, - { - "epoch": 0.6312880044223328, - "grad_norm": 3.0321156978607178, - "learning_rate": 1.8959636156907333e-05, - "loss": 4.8185, - "step": 11420 - }, - { - "epoch": 0.6315644002211166, - "grad_norm": 3.027168035507202, - "learning_rate": 1.8945423536100057e-05, - "loss": 5.1646, - "step": 11425 - }, - { - "epoch": 0.6318407960199005, - "grad_norm": 3.633697748184204, - "learning_rate": 1.893121091529278e-05, - "loss": 4.8653, - "step": 11430 - }, - { - "epoch": 0.6321171918186843, - "grad_norm": 3.8917782306671143, - "learning_rate": 1.8916998294485505e-05, - "loss": 5.1636, - "step": 11435 - }, - { - "epoch": 0.6323935876174682, - "grad_norm": 3.511700391769409, - "learning_rate": 1.890278567367823e-05, - "loss": 5.0657, - "step": 11440 - }, - { - "epoch": 0.632669983416252, - "grad_norm": 3.8616366386413574, - "learning_rate": 1.888857305287095e-05, - "loss": 5.085, - "step": 11445 - }, - { - "epoch": 0.6329463792150359, - "grad_norm": 3.9748528003692627, - "learning_rate": 1.8874360432063674e-05, - "loss": 5.2986, - "step": 11450 - }, - { - "epoch": 0.6332227750138197, - "grad_norm": 3.7701029777526855, - "learning_rate": 1.8860147811256394e-05, - "loss": 5.0137, - "step": 11455 - }, - { - "epoch": 0.6334991708126037, - "grad_norm": 3.903155565261841, - "learning_rate": 1.884593519044912e-05, - "loss": 5.0449, - "step": 11460 - }, - { - "epoch": 0.6337755666113875, - "grad_norm": 4.124213218688965, - "learning_rate": 1.8831722569641842e-05, - "loss": 4.9746, - "step": 11465 - }, - { - "epoch": 0.6340519624101714, - "grad_norm": 3.3113105297088623, - "learning_rate": 1.8817509948834566e-05, - "loss": 5.3676, - "step": 11470 - }, - { - "epoch": 0.6343283582089553, - "grad_norm": 3.800441265106201, - "learning_rate": 1.880329732802729e-05, - "loss": 4.9518, - "step": 11475 - }, - { - "epoch": 0.6346047540077391, - "grad_norm": 3.8035666942596436, - "learning_rate": 1.878908470722001e-05, - "loss": 5.1185, - "step": 11480 - }, - { - "epoch": 0.634881149806523, - "grad_norm": 3.5756587982177734, - "learning_rate": 1.8774872086412735e-05, - "loss": 5.027, - "step": 11485 - }, - { - "epoch": 0.6351575456053068, - "grad_norm": 4.0546875, - "learning_rate": 1.876065946560546e-05, - "loss": 5.163, - "step": 11490 - }, - { - "epoch": 0.6354339414040907, - "grad_norm": 3.088939666748047, - "learning_rate": 1.8746446844798183e-05, - "loss": 5.0475, - "step": 11495 - }, - { - "epoch": 0.6357103372028745, - "grad_norm": 3.1749298572540283, - "learning_rate": 1.8732234223990903e-05, - "loss": 4.7649, - "step": 11500 - }, - { - "epoch": 0.6359867330016584, - "grad_norm": 3.7194294929504395, - "learning_rate": 1.8718021603183627e-05, - "loss": 4.9005, - "step": 11505 - }, - { - "epoch": 0.6362631288004422, - "grad_norm": 3.6377458572387695, - "learning_rate": 1.870380898237635e-05, - "loss": 4.8569, - "step": 11510 - }, - { - "epoch": 0.6365395245992261, - "grad_norm": 3.6304149627685547, - "learning_rate": 1.8689596361569075e-05, - "loss": 5.1913, - "step": 11515 - }, - { - "epoch": 0.6368159203980099, - "grad_norm": 3.7646942138671875, - "learning_rate": 1.86753837407618e-05, - "loss": 4.9044, - "step": 11520 - }, - { - "epoch": 0.6370923161967939, - "grad_norm": 2.9954702854156494, - "learning_rate": 1.866117111995452e-05, - "loss": 4.9666, - "step": 11525 - }, - { - "epoch": 0.6373687119955777, - "grad_norm": 3.282142162322998, - "learning_rate": 1.8646958499147244e-05, - "loss": 4.8544, - "step": 11530 - }, - { - "epoch": 0.6376451077943616, - "grad_norm": 3.545342206954956, - "learning_rate": 1.8632745878339964e-05, - "loss": 4.9769, - "step": 11535 - }, - { - "epoch": 0.6379215035931454, - "grad_norm": 3.873244285583496, - "learning_rate": 1.861853325753269e-05, - "loss": 5.1524, - "step": 11540 - }, - { - "epoch": 0.6381978993919293, - "grad_norm": 3.273556709289551, - "learning_rate": 1.8604320636725416e-05, - "loss": 4.9264, - "step": 11545 - }, - { - "epoch": 0.6384742951907131, - "grad_norm": 4.054984092712402, - "learning_rate": 1.8590108015918136e-05, - "loss": 4.9509, - "step": 11550 - }, - { - "epoch": 0.638750690989497, - "grad_norm": 4.351573467254639, - "learning_rate": 1.857589539511086e-05, - "loss": 5.0652, - "step": 11555 - }, - { - "epoch": 0.6390270867882808, - "grad_norm": 3.1132781505584717, - "learning_rate": 1.856168277430358e-05, - "loss": 5.0784, - "step": 11560 - }, - { - "epoch": 0.6393034825870647, - "grad_norm": 3.2859065532684326, - "learning_rate": 1.8547470153496305e-05, - "loss": 5.099, - "step": 11565 - }, - { - "epoch": 0.6395798783858485, - "grad_norm": 3.499131441116333, - "learning_rate": 1.853325753268903e-05, - "loss": 5.0266, - "step": 11570 - }, - { - "epoch": 0.6398562741846324, - "grad_norm": 3.202064037322998, - "learning_rate": 1.8519044911881753e-05, - "loss": 4.8385, - "step": 11575 - }, - { - "epoch": 0.6401326699834162, - "grad_norm": 3.816030502319336, - "learning_rate": 1.8504832291074477e-05, - "loss": 4.8693, - "step": 11580 - }, - { - "epoch": 0.6404090657822001, - "grad_norm": 3.5522069931030273, - "learning_rate": 1.8490619670267197e-05, - "loss": 5.2284, - "step": 11585 - }, - { - "epoch": 0.6406854615809839, - "grad_norm": 2.9385523796081543, - "learning_rate": 1.847640704945992e-05, - "loss": 4.9548, - "step": 11590 - }, - { - "epoch": 0.6409618573797679, - "grad_norm": 3.6049933433532715, - "learning_rate": 1.8462194428652642e-05, - "loss": 4.9881, - "step": 11595 - }, - { - "epoch": 0.6412382531785517, - "grad_norm": 2.8920090198516846, - "learning_rate": 1.844798180784537e-05, - "loss": 4.9215, - "step": 11600 - }, - { - "epoch": 0.6415146489773356, - "grad_norm": 3.008737564086914, - "learning_rate": 1.843376918703809e-05, - "loss": 4.9579, - "step": 11605 - }, - { - "epoch": 0.6417910447761194, - "grad_norm": 3.340202808380127, - "learning_rate": 1.8419556566230814e-05, - "loss": 4.9546, - "step": 11610 - }, - { - "epoch": 0.6420674405749033, - "grad_norm": 4.0580973625183105, - "learning_rate": 1.8405343945423538e-05, - "loss": 5.3337, - "step": 11615 - }, - { - "epoch": 0.6423438363736871, - "grad_norm": 3.040929079055786, - "learning_rate": 1.839113132461626e-05, - "loss": 4.8372, - "step": 11620 - }, - { - "epoch": 0.642620232172471, - "grad_norm": 3.111201524734497, - "learning_rate": 1.8376918703808986e-05, - "loss": 5.1435, - "step": 11625 - }, - { - "epoch": 0.6428966279712548, - "grad_norm": 3.6376893520355225, - "learning_rate": 1.8362706083001706e-05, - "loss": 4.9107, - "step": 11630 - }, - { - "epoch": 0.6431730237700387, - "grad_norm": 3.81954288482666, - "learning_rate": 1.834849346219443e-05, - "loss": 5.0791, - "step": 11635 - }, - { - "epoch": 0.6434494195688225, - "grad_norm": 2.9065468311309814, - "learning_rate": 1.833428084138715e-05, - "loss": 5.0171, - "step": 11640 - }, - { - "epoch": 0.6437258153676064, - "grad_norm": 3.3919105529785156, - "learning_rate": 1.8320068220579875e-05, - "loss": 4.9347, - "step": 11645 - }, - { - "epoch": 0.6440022111663902, - "grad_norm": 2.9932496547698975, - "learning_rate": 1.83058555997726e-05, - "loss": 5.207, - "step": 11650 - }, - { - "epoch": 0.6442786069651741, - "grad_norm": 3.7456681728363037, - "learning_rate": 1.8291642978965323e-05, - "loss": 4.9296, - "step": 11655 - }, - { - "epoch": 0.6445550027639579, - "grad_norm": 3.1489369869232178, - "learning_rate": 1.8277430358158047e-05, - "loss": 4.8545, - "step": 11660 - }, - { - "epoch": 0.6448313985627419, - "grad_norm": 3.770603895187378, - "learning_rate": 1.8263217737350768e-05, - "loss": 4.9182, - "step": 11665 - }, - { - "epoch": 0.6451077943615257, - "grad_norm": 3.3885293006896973, - "learning_rate": 1.824900511654349e-05, - "loss": 5.0724, - "step": 11670 - }, - { - "epoch": 0.6453841901603096, - "grad_norm": 3.6072707176208496, - "learning_rate": 1.8234792495736212e-05, - "loss": 4.9864, - "step": 11675 - }, - { - "epoch": 0.6456605859590934, - "grad_norm": 3.027808904647827, - "learning_rate": 1.822057987492894e-05, - "loss": 5.3012, - "step": 11680 - }, - { - "epoch": 0.6459369817578773, - "grad_norm": 3.8706979751586914, - "learning_rate": 1.8206367254121663e-05, - "loss": 5.0479, - "step": 11685 - }, - { - "epoch": 0.6462133775566611, - "grad_norm": 2.9442152976989746, - "learning_rate": 1.8192154633314384e-05, - "loss": 5.153, - "step": 11690 - }, - { - "epoch": 0.646489773355445, - "grad_norm": 3.645040512084961, - "learning_rate": 1.8177942012507108e-05, - "loss": 5.0203, - "step": 11695 - }, - { - "epoch": 0.6467661691542289, - "grad_norm": 3.7537355422973633, - "learning_rate": 1.816372939169983e-05, - "loss": 5.155, - "step": 11700 - }, - { - "epoch": 0.6470425649530127, - "grad_norm": 3.36226749420166, - "learning_rate": 1.8149516770892553e-05, - "loss": 5.1796, - "step": 11705 - }, - { - "epoch": 0.6473189607517966, - "grad_norm": 3.477741241455078, - "learning_rate": 1.8135304150085277e-05, - "loss": 4.8361, - "step": 11710 - }, - { - "epoch": 0.6475953565505804, - "grad_norm": 3.8094239234924316, - "learning_rate": 1.8121091529278e-05, - "loss": 4.9998, - "step": 11715 - }, - { - "epoch": 0.6478717523493643, - "grad_norm": 3.418806791305542, - "learning_rate": 1.8106878908470725e-05, - "loss": 5.1284, - "step": 11720 - }, - { - "epoch": 0.6481481481481481, - "grad_norm": 3.108595132827759, - "learning_rate": 1.8092666287663445e-05, - "loss": 5.0816, - "step": 11725 - }, - { - "epoch": 0.648424543946932, - "grad_norm": 3.583040475845337, - "learning_rate": 1.807845366685617e-05, - "loss": 5.0151, - "step": 11730 - }, - { - "epoch": 0.6487009397457159, - "grad_norm": 2.922956705093384, - "learning_rate": 1.8064241046048893e-05, - "loss": 5.2425, - "step": 11735 - }, - { - "epoch": 0.6489773355444998, - "grad_norm": 3.604804039001465, - "learning_rate": 1.8050028425241617e-05, - "loss": 4.9848, - "step": 11740 - }, - { - "epoch": 0.6492537313432836, - "grad_norm": 3.4737608432769775, - "learning_rate": 1.8035815804434338e-05, - "loss": 5.0245, - "step": 11745 - }, - { - "epoch": 0.6495301271420675, - "grad_norm": 3.2414677143096924, - "learning_rate": 1.802160318362706e-05, - "loss": 4.8838, - "step": 11750 - }, - { - "epoch": 0.6498065229408513, - "grad_norm": 3.7715156078338623, - "learning_rate": 1.8007390562819786e-05, - "loss": 4.8937, - "step": 11755 - }, - { - "epoch": 0.6500829187396352, - "grad_norm": 3.0882837772369385, - "learning_rate": 1.7993177942012506e-05, - "loss": 5.4637, - "step": 11760 - }, - { - "epoch": 0.650359314538419, - "grad_norm": 3.7343342304229736, - "learning_rate": 1.7978965321205234e-05, - "loss": 4.9524, - "step": 11765 - }, - { - "epoch": 0.6506357103372029, - "grad_norm": 3.9432291984558105, - "learning_rate": 1.7964752700397954e-05, - "loss": 5.1691, - "step": 11770 - }, - { - "epoch": 0.6509121061359867, - "grad_norm": 3.1902172565460205, - "learning_rate": 1.7950540079590678e-05, - "loss": 5.2231, - "step": 11775 - }, - { - "epoch": 0.6511885019347706, - "grad_norm": 3.733781099319458, - "learning_rate": 1.79363274587834e-05, - "loss": 5.1506, - "step": 11780 - }, - { - "epoch": 0.6514648977335544, - "grad_norm": 3.6218392848968506, - "learning_rate": 1.7922114837976123e-05, - "loss": 5.1997, - "step": 11785 - }, - { - "epoch": 0.6517412935323383, - "grad_norm": 4.095831394195557, - "learning_rate": 1.7907902217168847e-05, - "loss": 4.934, - "step": 11790 - }, - { - "epoch": 0.6520176893311221, - "grad_norm": 2.9039041996002197, - "learning_rate": 1.789368959636157e-05, - "loss": 4.8885, - "step": 11795 - }, - { - "epoch": 0.652294085129906, - "grad_norm": 2.837620496749878, - "learning_rate": 1.7879476975554295e-05, - "loss": 4.7736, - "step": 11800 - }, - { - "epoch": 0.6525704809286899, - "grad_norm": 4.38239049911499, - "learning_rate": 1.7865264354747015e-05, - "loss": 5.1993, - "step": 11805 - }, - { - "epoch": 0.6528468767274738, - "grad_norm": 3.1232762336730957, - "learning_rate": 1.785105173393974e-05, - "loss": 4.8217, - "step": 11810 - }, - { - "epoch": 0.6531232725262576, - "grad_norm": 3.8106727600097656, - "learning_rate": 1.783683911313246e-05, - "loss": 5.0121, - "step": 11815 - }, - { - "epoch": 0.6533996683250415, - "grad_norm": 3.4995906352996826, - "learning_rate": 1.7822626492325187e-05, - "loss": 5.0138, - "step": 11820 - }, - { - "epoch": 0.6536760641238253, - "grad_norm": 3.8078784942626953, - "learning_rate": 1.7808413871517908e-05, - "loss": 5.102, - "step": 11825 - }, - { - "epoch": 0.6539524599226092, - "grad_norm": 3.3027868270874023, - "learning_rate": 1.7794201250710632e-05, - "loss": 4.8747, - "step": 11830 - }, - { - "epoch": 0.654228855721393, - "grad_norm": 3.113663673400879, - "learning_rate": 1.7779988629903356e-05, - "loss": 4.9301, - "step": 11835 - }, - { - "epoch": 0.6545052515201769, - "grad_norm": 3.6533520221710205, - "learning_rate": 1.7765776009096076e-05, - "loss": 4.8014, - "step": 11840 - }, - { - "epoch": 0.6547816473189607, - "grad_norm": 3.131390333175659, - "learning_rate": 1.77515633882888e-05, - "loss": 4.8878, - "step": 11845 - }, - { - "epoch": 0.6550580431177446, - "grad_norm": 3.265279769897461, - "learning_rate": 1.7737350767481524e-05, - "loss": 5.1533, - "step": 11850 - }, - { - "epoch": 0.6553344389165284, - "grad_norm": 3.4372310638427734, - "learning_rate": 1.7723138146674248e-05, - "loss": 4.766, - "step": 11855 - }, - { - "epoch": 0.6556108347153123, - "grad_norm": 3.4847805500030518, - "learning_rate": 1.7708925525866972e-05, - "loss": 4.963, - "step": 11860 - }, - { - "epoch": 0.6558872305140961, - "grad_norm": 4.266146659851074, - "learning_rate": 1.7694712905059693e-05, - "loss": 4.8469, - "step": 11865 - }, - { - "epoch": 0.65616362631288, - "grad_norm": 3.8087639808654785, - "learning_rate": 1.7680500284252417e-05, - "loss": 5.1229, - "step": 11870 - }, - { - "epoch": 0.6564400221116639, - "grad_norm": 3.209784507751465, - "learning_rate": 1.766628766344514e-05, - "loss": 5.0318, - "step": 11875 - }, - { - "epoch": 0.6567164179104478, - "grad_norm": 3.074418306350708, - "learning_rate": 1.7652075042637865e-05, - "loss": 4.9064, - "step": 11880 - }, - { - "epoch": 0.6569928137092316, - "grad_norm": 3.3850672245025635, - "learning_rate": 1.7637862421830585e-05, - "loss": 4.882, - "step": 11885 - }, - { - "epoch": 0.6572692095080155, - "grad_norm": 2.94960618019104, - "learning_rate": 1.762364980102331e-05, - "loss": 5.1099, - "step": 11890 - }, - { - "epoch": 0.6575456053067993, - "grad_norm": 3.0878140926361084, - "learning_rate": 1.7609437180216033e-05, - "loss": 5.1642, - "step": 11895 - }, - { - "epoch": 0.6578220011055832, - "grad_norm": 3.899074077606201, - "learning_rate": 1.7595224559408754e-05, - "loss": 5.0711, - "step": 11900 - }, - { - "epoch": 0.658098396904367, - "grad_norm": 3.6305460929870605, - "learning_rate": 1.758101193860148e-05, - "loss": 4.9042, - "step": 11905 - }, - { - "epoch": 0.6583747927031509, - "grad_norm": 3.7980971336364746, - "learning_rate": 1.7566799317794202e-05, - "loss": 5.0719, - "step": 11910 - }, - { - "epoch": 0.6586511885019348, - "grad_norm": 3.5552990436553955, - "learning_rate": 1.7552586696986926e-05, - "loss": 5.0205, - "step": 11915 - }, - { - "epoch": 0.6589275843007186, - "grad_norm": 3.524707794189453, - "learning_rate": 1.7538374076179646e-05, - "loss": 4.978, - "step": 11920 - }, - { - "epoch": 0.6592039800995025, - "grad_norm": 3.4494073390960693, - "learning_rate": 1.752416145537237e-05, - "loss": 4.9419, - "step": 11925 - }, - { - "epoch": 0.6594803758982863, - "grad_norm": 2.977220058441162, - "learning_rate": 1.7509948834565094e-05, - "loss": 5.1477, - "step": 11930 - }, - { - "epoch": 0.6597567716970703, - "grad_norm": 4.73137903213501, - "learning_rate": 1.749573621375782e-05, - "loss": 4.9816, - "step": 11935 - }, - { - "epoch": 0.6600331674958541, - "grad_norm": 3.988065719604492, - "learning_rate": 1.7481523592950542e-05, - "loss": 4.8524, - "step": 11940 - }, - { - "epoch": 0.660309563294638, - "grad_norm": 3.581176996231079, - "learning_rate": 1.7467310972143263e-05, - "loss": 5.3324, - "step": 11945 - }, - { - "epoch": 0.6605859590934218, - "grad_norm": 3.1877663135528564, - "learning_rate": 1.7453098351335987e-05, - "loss": 4.9964, - "step": 11950 - }, - { - "epoch": 0.6608623548922057, - "grad_norm": 3.8775217533111572, - "learning_rate": 1.7438885730528708e-05, - "loss": 4.9952, - "step": 11955 - }, - { - "epoch": 0.6611387506909895, - "grad_norm": 3.670485496520996, - "learning_rate": 1.7424673109721435e-05, - "loss": 4.9151, - "step": 11960 - }, - { - "epoch": 0.6614151464897734, - "grad_norm": 5.177221298217773, - "learning_rate": 1.7410460488914155e-05, - "loss": 5.3336, - "step": 11965 - }, - { - "epoch": 0.6616915422885572, - "grad_norm": 3.854691982269287, - "learning_rate": 1.739624786810688e-05, - "loss": 5.2463, - "step": 11970 - }, - { - "epoch": 0.6619679380873411, - "grad_norm": 3.330227851867676, - "learning_rate": 1.7382035247299603e-05, - "loss": 4.4221, - "step": 11975 - }, - { - "epoch": 0.6622443338861249, - "grad_norm": 3.3024230003356934, - "learning_rate": 1.7367822626492324e-05, - "loss": 4.9856, - "step": 11980 - }, - { - "epoch": 0.6625207296849088, - "grad_norm": 3.446526527404785, - "learning_rate": 1.735361000568505e-05, - "loss": 4.9725, - "step": 11985 - }, - { - "epoch": 0.6627971254836926, - "grad_norm": 3.300661087036133, - "learning_rate": 1.7339397384877772e-05, - "loss": 4.965, - "step": 11990 - }, - { - "epoch": 0.6630735212824765, - "grad_norm": 3.3495724201202393, - "learning_rate": 1.7325184764070496e-05, - "loss": 5.1418, - "step": 11995 - }, - { - "epoch": 0.6633499170812603, - "grad_norm": 3.493957281112671, - "learning_rate": 1.731097214326322e-05, - "loss": 4.8989, - "step": 12000 - }, - { - "epoch": 0.6636263128800443, - "grad_norm": 3.782233953475952, - "learning_rate": 1.729675952245594e-05, - "loss": 4.989, - "step": 12005 - }, - { - "epoch": 0.6639027086788281, - "grad_norm": 3.869248867034912, - "learning_rate": 1.7282546901648665e-05, - "loss": 4.8152, - "step": 12010 - }, - { - "epoch": 0.664179104477612, - "grad_norm": 3.3503940105438232, - "learning_rate": 1.726833428084139e-05, - "loss": 5.0352, - "step": 12015 - }, - { - "epoch": 0.6644555002763958, - "grad_norm": 3.2065019607543945, - "learning_rate": 1.7254121660034112e-05, - "loss": 4.8756, - "step": 12020 - }, - { - "epoch": 0.6647318960751797, - "grad_norm": 3.2806355953216553, - "learning_rate": 1.7239909039226833e-05, - "loss": 4.9745, - "step": 12025 - }, - { - "epoch": 0.6650082918739635, - "grad_norm": 3.366260290145874, - "learning_rate": 1.7225696418419557e-05, - "loss": 4.9347, - "step": 12030 - }, - { - "epoch": 0.6652846876727474, - "grad_norm": 3.2907090187072754, - "learning_rate": 1.721148379761228e-05, - "loss": 4.9765, - "step": 12035 - }, - { - "epoch": 0.6655610834715312, - "grad_norm": 4.094642639160156, - "learning_rate": 1.7197271176805005e-05, - "loss": 4.8334, - "step": 12040 - }, - { - "epoch": 0.6658374792703151, - "grad_norm": 3.0640034675598145, - "learning_rate": 1.718305855599773e-05, - "loss": 4.6388, - "step": 12045 - }, - { - "epoch": 0.6661138750690989, - "grad_norm": 3.662433385848999, - "learning_rate": 1.716884593519045e-05, - "loss": 4.8606, - "step": 12050 - }, - { - "epoch": 0.6663902708678828, - "grad_norm": 4.076939105987549, - "learning_rate": 1.7154633314383174e-05, - "loss": 4.8626, - "step": 12055 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 3.6793127059936523, - "learning_rate": 1.7140420693575894e-05, - "loss": 4.8432, - "step": 12060 - }, - { - "epoch": 0.6669430624654505, - "grad_norm": 3.6468772888183594, - "learning_rate": 1.7126208072768618e-05, - "loss": 4.7681, - "step": 12065 - }, - { - "epoch": 0.6672194582642343, - "grad_norm": 3.1914479732513428, - "learning_rate": 1.7111995451961342e-05, - "loss": 5.0191, - "step": 12070 - }, - { - "epoch": 0.6674958540630183, - "grad_norm": 3.367316484451294, - "learning_rate": 1.7097782831154066e-05, - "loss": 5.0413, - "step": 12075 - }, - { - "epoch": 0.6677722498618021, - "grad_norm": 3.765392780303955, - "learning_rate": 1.708357021034679e-05, - "loss": 4.8648, - "step": 12080 - }, - { - "epoch": 0.668048645660586, - "grad_norm": 3.203326463699341, - "learning_rate": 1.706935758953951e-05, - "loss": 4.9687, - "step": 12085 - }, - { - "epoch": 0.6683250414593698, - "grad_norm": 3.558098554611206, - "learning_rate": 1.7055144968732235e-05, - "loss": 5.1091, - "step": 12090 - }, - { - "epoch": 0.6686014372581537, - "grad_norm": 2.9107744693756104, - "learning_rate": 1.704093234792496e-05, - "loss": 4.8894, - "step": 12095 - }, - { - "epoch": 0.6688778330569375, - "grad_norm": 3.141874074935913, - "learning_rate": 1.7026719727117683e-05, - "loss": 4.7187, - "step": 12100 - }, - { - "epoch": 0.6691542288557214, - "grad_norm": 3.327786684036255, - "learning_rate": 1.7012507106310403e-05, - "loss": 5.34, - "step": 12105 - }, - { - "epoch": 0.6694306246545052, - "grad_norm": 3.6189236640930176, - "learning_rate": 1.6998294485503127e-05, - "loss": 5.1619, - "step": 12110 - }, - { - "epoch": 0.6697070204532891, - "grad_norm": 3.134925365447998, - "learning_rate": 1.698408186469585e-05, - "loss": 5.4205, - "step": 12115 - }, - { - "epoch": 0.6699834162520729, - "grad_norm": 3.9740278720855713, - "learning_rate": 1.6969869243888572e-05, - "loss": 4.929, - "step": 12120 - }, - { - "epoch": 0.6702598120508568, - "grad_norm": 3.459599494934082, - "learning_rate": 1.69556566230813e-05, - "loss": 5.0055, - "step": 12125 - }, - { - "epoch": 0.6705362078496406, - "grad_norm": 3.837351083755493, - "learning_rate": 1.694144400227402e-05, - "loss": 5.0351, - "step": 12130 - }, - { - "epoch": 0.6708126036484245, - "grad_norm": 4.666230201721191, - "learning_rate": 1.6927231381466744e-05, - "loss": 5.0094, - "step": 12135 - }, - { - "epoch": 0.6710889994472085, - "grad_norm": 4.403833866119385, - "learning_rate": 1.6913018760659464e-05, - "loss": 5.0496, - "step": 12140 - }, - { - "epoch": 0.6713653952459923, - "grad_norm": 3.412066698074341, - "learning_rate": 1.6898806139852188e-05, - "loss": 5.5492, - "step": 12145 - }, - { - "epoch": 0.6716417910447762, - "grad_norm": 4.21888542175293, - "learning_rate": 1.6884593519044916e-05, - "loss": 4.8999, - "step": 12150 - }, - { - "epoch": 0.67191818684356, - "grad_norm": 3.7332446575164795, - "learning_rate": 1.6870380898237636e-05, - "loss": 5.1613, - "step": 12155 - }, - { - "epoch": 0.6721945826423439, - "grad_norm": 3.9630868434906006, - "learning_rate": 1.685616827743036e-05, - "loss": 5.1332, - "step": 12160 - }, - { - "epoch": 0.6724709784411277, - "grad_norm": 3.0835654735565186, - "learning_rate": 1.684195565662308e-05, - "loss": 5.3016, - "step": 12165 - }, - { - "epoch": 0.6727473742399116, - "grad_norm": 3.015239715576172, - "learning_rate": 1.6827743035815805e-05, - "loss": 5.2528, - "step": 12170 - }, - { - "epoch": 0.6730237700386954, - "grad_norm": 3.449125051498413, - "learning_rate": 1.681353041500853e-05, - "loss": 4.8346, - "step": 12175 - }, - { - "epoch": 0.6733001658374793, - "grad_norm": 3.250673294067383, - "learning_rate": 1.6799317794201253e-05, - "loss": 5.0164, - "step": 12180 - }, - { - "epoch": 0.6735765616362631, - "grad_norm": 3.8429434299468994, - "learning_rate": 1.6785105173393977e-05, - "loss": 5.326, - "step": 12185 - }, - { - "epoch": 0.673852957435047, - "grad_norm": 3.823730707168579, - "learning_rate": 1.6770892552586697e-05, - "loss": 5.399, - "step": 12190 - }, - { - "epoch": 0.6741293532338308, - "grad_norm": 3.227236747741699, - "learning_rate": 1.675667993177942e-05, - "loss": 4.8976, - "step": 12195 - }, - { - "epoch": 0.6744057490326147, - "grad_norm": 3.5069992542266846, - "learning_rate": 1.6742467310972142e-05, - "loss": 4.9528, - "step": 12200 - }, - { - "epoch": 0.6746821448313985, - "grad_norm": 2.9861481189727783, - "learning_rate": 1.672825469016487e-05, - "loss": 5.1612, - "step": 12205 - }, - { - "epoch": 0.6749585406301825, - "grad_norm": 2.750514507293701, - "learning_rate": 1.671404206935759e-05, - "loss": 4.7478, - "step": 12210 - }, - { - "epoch": 0.6752349364289663, - "grad_norm": 3.113337993621826, - "learning_rate": 1.6699829448550314e-05, - "loss": 5.0428, - "step": 12215 - }, - { - "epoch": 0.6755113322277502, - "grad_norm": 3.744915723800659, - "learning_rate": 1.6685616827743038e-05, - "loss": 4.9286, - "step": 12220 - }, - { - "epoch": 0.675787728026534, - "grad_norm": 2.7885375022888184, - "learning_rate": 1.667140420693576e-05, - "loss": 5.1185, - "step": 12225 - }, - { - "epoch": 0.6760641238253179, - "grad_norm": 3.3463151454925537, - "learning_rate": 1.6657191586128482e-05, - "loss": 4.815, - "step": 12230 - }, - { - "epoch": 0.6763405196241017, - "grad_norm": 3.387303113937378, - "learning_rate": 1.6642978965321206e-05, - "loss": 5.0025, - "step": 12235 - }, - { - "epoch": 0.6766169154228856, - "grad_norm": 3.3395464420318604, - "learning_rate": 1.662876634451393e-05, - "loss": 5.3222, - "step": 12240 - }, - { - "epoch": 0.6768933112216694, - "grad_norm": 3.5981926918029785, - "learning_rate": 1.661455372370665e-05, - "loss": 5.262, - "step": 12245 - }, - { - "epoch": 0.6771697070204533, - "grad_norm": 2.9530062675476074, - "learning_rate": 1.6600341102899375e-05, - "loss": 5.0093, - "step": 12250 - }, - { - "epoch": 0.6774461028192371, - "grad_norm": 2.790177583694458, - "learning_rate": 1.65861284820921e-05, - "loss": 5.178, - "step": 12255 - }, - { - "epoch": 0.677722498618021, - "grad_norm": 3.2655928134918213, - "learning_rate": 1.6571915861284823e-05, - "loss": 4.9004, - "step": 12260 - }, - { - "epoch": 0.6779988944168048, - "grad_norm": 3.1761622428894043, - "learning_rate": 1.6557703240477547e-05, - "loss": 5.2096, - "step": 12265 - }, - { - "epoch": 0.6782752902155887, - "grad_norm": 3.3835296630859375, - "learning_rate": 1.6543490619670267e-05, - "loss": 4.7797, - "step": 12270 - }, - { - "epoch": 0.6785516860143725, - "grad_norm": 3.7050514221191406, - "learning_rate": 1.652927799886299e-05, - "loss": 5.075, - "step": 12275 - }, - { - "epoch": 0.6788280818131565, - "grad_norm": 3.350598096847534, - "learning_rate": 1.6515065378055712e-05, - "loss": 4.9758, - "step": 12280 - }, - { - "epoch": 0.6791044776119403, - "grad_norm": 3.3467860221862793, - "learning_rate": 1.6500852757248436e-05, - "loss": 4.984, - "step": 12285 - }, - { - "epoch": 0.6793808734107242, - "grad_norm": 3.433746814727783, - "learning_rate": 1.648664013644116e-05, - "loss": 4.8733, - "step": 12290 - }, - { - "epoch": 0.679657269209508, - "grad_norm": 3.335195302963257, - "learning_rate": 1.6472427515633884e-05, - "loss": 4.9734, - "step": 12295 - }, - { - "epoch": 0.6799336650082919, - "grad_norm": 3.4817519187927246, - "learning_rate": 1.6458214894826608e-05, - "loss": 4.9225, - "step": 12300 - }, - { - "epoch": 0.6802100608070757, - "grad_norm": 3.4715635776519775, - "learning_rate": 1.644400227401933e-05, - "loss": 5.0767, - "step": 12305 - }, - { - "epoch": 0.6804864566058596, - "grad_norm": 4.440505027770996, - "learning_rate": 1.6429789653212052e-05, - "loss": 4.6531, - "step": 12310 - }, - { - "epoch": 0.6807628524046434, - "grad_norm": 3.239976644515991, - "learning_rate": 1.6415577032404776e-05, - "loss": 5.0474, - "step": 12315 - }, - { - "epoch": 0.6810392482034273, - "grad_norm": 3.019190549850464, - "learning_rate": 1.64013644115975e-05, - "loss": 4.9771, - "step": 12320 - }, - { - "epoch": 0.6813156440022111, - "grad_norm": 3.4960367679595947, - "learning_rate": 1.6387151790790224e-05, - "loss": 5.1239, - "step": 12325 - }, - { - "epoch": 0.681592039800995, - "grad_norm": 4.1001081466674805, - "learning_rate": 1.6372939169982945e-05, - "loss": 5.0819, - "step": 12330 - }, - { - "epoch": 0.6818684355997788, - "grad_norm": 4.748738765716553, - "learning_rate": 1.635872654917567e-05, - "loss": 4.8874, - "step": 12335 - }, - { - "epoch": 0.6821448313985627, - "grad_norm": 3.8815574645996094, - "learning_rate": 1.634451392836839e-05, - "loss": 5.0887, - "step": 12340 - }, - { - "epoch": 0.6824212271973465, - "grad_norm": 3.651639938354492, - "learning_rate": 1.6330301307561117e-05, - "loss": 4.7795, - "step": 12345 - }, - { - "epoch": 0.6826976229961305, - "grad_norm": 3.9696593284606934, - "learning_rate": 1.6316088686753837e-05, - "loss": 5.243, - "step": 12350 - }, - { - "epoch": 0.6829740187949144, - "grad_norm": 3.3984673023223877, - "learning_rate": 1.630187606594656e-05, - "loss": 5.1969, - "step": 12355 - }, - { - "epoch": 0.6832504145936982, - "grad_norm": 3.081726312637329, - "learning_rate": 1.6287663445139285e-05, - "loss": 5.1571, - "step": 12360 - }, - { - "epoch": 0.6835268103924821, - "grad_norm": 3.6568610668182373, - "learning_rate": 1.6273450824332006e-05, - "loss": 5.2434, - "step": 12365 - }, - { - "epoch": 0.6838032061912659, - "grad_norm": 4.056967735290527, - "learning_rate": 1.6259238203524733e-05, - "loss": 4.993, - "step": 12370 - }, - { - "epoch": 0.6840796019900498, - "grad_norm": 3.637042284011841, - "learning_rate": 1.6245025582717454e-05, - "loss": 5.5494, - "step": 12375 - }, - { - "epoch": 0.6843559977888336, - "grad_norm": 4.664302825927734, - "learning_rate": 1.6230812961910178e-05, - "loss": 4.9485, - "step": 12380 - }, - { - "epoch": 0.6846323935876175, - "grad_norm": 3.098987340927124, - "learning_rate": 1.62166003411029e-05, - "loss": 4.8687, - "step": 12385 - }, - { - "epoch": 0.6849087893864013, - "grad_norm": 3.432152509689331, - "learning_rate": 1.6202387720295623e-05, - "loss": 4.7856, - "step": 12390 - }, - { - "epoch": 0.6851851851851852, - "grad_norm": 3.564708709716797, - "learning_rate": 1.6188175099488347e-05, - "loss": 5.0981, - "step": 12395 - }, - { - "epoch": 0.685461580983969, - "grad_norm": 3.6227316856384277, - "learning_rate": 1.617396247868107e-05, - "loss": 5.0038, - "step": 12400 - }, - { - "epoch": 0.685737976782753, - "grad_norm": 3.641265392303467, - "learning_rate": 1.6159749857873794e-05, - "loss": 4.9751, - "step": 12405 - }, - { - "epoch": 0.6860143725815367, - "grad_norm": 3.2415549755096436, - "learning_rate": 1.6145537237066515e-05, - "loss": 5.1277, - "step": 12410 - }, - { - "epoch": 0.6862907683803207, - "grad_norm": 3.59389591217041, - "learning_rate": 1.613132461625924e-05, - "loss": 5.1952, - "step": 12415 - }, - { - "epoch": 0.6865671641791045, - "grad_norm": 3.559694528579712, - "learning_rate": 1.611711199545196e-05, - "loss": 4.9343, - "step": 12420 - }, - { - "epoch": 0.6868435599778884, - "grad_norm": 3.0507447719573975, - "learning_rate": 1.6102899374644687e-05, - "loss": 5.0918, - "step": 12425 - }, - { - "epoch": 0.6871199557766722, - "grad_norm": 3.375110149383545, - "learning_rate": 1.6088686753837408e-05, - "loss": 4.78, - "step": 12430 - }, - { - "epoch": 0.6873963515754561, - "grad_norm": 4.308310031890869, - "learning_rate": 1.607447413303013e-05, - "loss": 4.9678, - "step": 12435 - }, - { - "epoch": 0.6876727473742399, - "grad_norm": 4.061061382293701, - "learning_rate": 1.6060261512222856e-05, - "loss": 4.9967, - "step": 12440 - }, - { - "epoch": 0.6879491431730238, - "grad_norm": 3.5668656826019287, - "learning_rate": 1.6046048891415576e-05, - "loss": 4.695, - "step": 12445 - }, - { - "epoch": 0.6882255389718076, - "grad_norm": 3.55816650390625, - "learning_rate": 1.60318362706083e-05, - "loss": 5.0108, - "step": 12450 - }, - { - "epoch": 0.6885019347705915, - "grad_norm": 3.516140937805176, - "learning_rate": 1.6017623649801024e-05, - "loss": 5.0607, - "step": 12455 - }, - { - "epoch": 0.6887783305693753, - "grad_norm": 3.5040078163146973, - "learning_rate": 1.6003411028993748e-05, - "loss": 5.2791, - "step": 12460 - }, - { - "epoch": 0.6890547263681592, - "grad_norm": 3.3454535007476807, - "learning_rate": 1.5989198408186472e-05, - "loss": 4.9042, - "step": 12465 - }, - { - "epoch": 0.689331122166943, - "grad_norm": 4.622253894805908, - "learning_rate": 1.5974985787379193e-05, - "loss": 4.8014, - "step": 12470 - }, - { - "epoch": 0.689607517965727, - "grad_norm": 4.814120292663574, - "learning_rate": 1.5960773166571917e-05, - "loss": 5.0986, - "step": 12475 - }, - { - "epoch": 0.6898839137645107, - "grad_norm": 3.6000027656555176, - "learning_rate": 1.594656054576464e-05, - "loss": 5.2295, - "step": 12480 - }, - { - "epoch": 0.6901603095632947, - "grad_norm": 3.4990899562835693, - "learning_rate": 1.5932347924957365e-05, - "loss": 5.2095, - "step": 12485 - }, - { - "epoch": 0.6904367053620785, - "grad_norm": 4.27670431137085, - "learning_rate": 1.5918135304150085e-05, - "loss": 5.3237, - "step": 12490 - }, - { - "epoch": 0.6907131011608624, - "grad_norm": 4.3061394691467285, - "learning_rate": 1.590392268334281e-05, - "loss": 5.0581, - "step": 12495 - }, - { - "epoch": 0.6909894969596462, - "grad_norm": 3.4813592433929443, - "learning_rate": 1.5889710062535533e-05, - "loss": 5.244, - "step": 12500 - }, - { - "epoch": 0.6912658927584301, - "grad_norm": 4.107872486114502, - "learning_rate": 1.5875497441728254e-05, - "loss": 4.9351, - "step": 12505 - }, - { - "epoch": 0.6915422885572139, - "grad_norm": 3.8972761631011963, - "learning_rate": 1.586128482092098e-05, - "loss": 4.954, - "step": 12510 - }, - { - "epoch": 0.6918186843559978, - "grad_norm": 3.9529995918273926, - "learning_rate": 1.5847072200113702e-05, - "loss": 4.553, - "step": 12515 - }, - { - "epoch": 0.6920950801547816, - "grad_norm": 3.2394180297851562, - "learning_rate": 1.5832859579306426e-05, - "loss": 4.8631, - "step": 12520 - }, - { - "epoch": 0.6923714759535655, - "grad_norm": 3.906923294067383, - "learning_rate": 1.5818646958499146e-05, - "loss": 4.655, - "step": 12525 - }, - { - "epoch": 0.6926478717523493, - "grad_norm": 3.0384624004364014, - "learning_rate": 1.580443433769187e-05, - "loss": 4.8143, - "step": 12530 - }, - { - "epoch": 0.6929242675511332, - "grad_norm": 3.3072428703308105, - "learning_rate": 1.5790221716884594e-05, - "loss": 5.1887, - "step": 12535 - }, - { - "epoch": 0.693200663349917, - "grad_norm": 3.6409823894500732, - "learning_rate": 1.5776009096077318e-05, - "loss": 5.3076, - "step": 12540 - }, - { - "epoch": 0.693477059148701, - "grad_norm": 4.234898567199707, - "learning_rate": 1.5761796475270042e-05, - "loss": 5.037, - "step": 12545 - }, - { - "epoch": 0.6937534549474847, - "grad_norm": 3.5586225986480713, - "learning_rate": 1.5747583854462763e-05, - "loss": 5.0659, - "step": 12550 - }, - { - "epoch": 0.6940298507462687, - "grad_norm": 3.656480073928833, - "learning_rate": 1.5733371233655487e-05, - "loss": 5.1345, - "step": 12555 - }, - { - "epoch": 0.6943062465450525, - "grad_norm": 3.5738766193389893, - "learning_rate": 1.5719158612848207e-05, - "loss": 4.8976, - "step": 12560 - }, - { - "epoch": 0.6945826423438364, - "grad_norm": 3.6187734603881836, - "learning_rate": 1.5704945992040935e-05, - "loss": 4.7864, - "step": 12565 - }, - { - "epoch": 0.6948590381426202, - "grad_norm": 3.634728193283081, - "learning_rate": 1.5690733371233655e-05, - "loss": 5.1336, - "step": 12570 - }, - { - "epoch": 0.6951354339414041, - "grad_norm": 3.8284621238708496, - "learning_rate": 1.567652075042638e-05, - "loss": 5.0473, - "step": 12575 - }, - { - "epoch": 0.695411829740188, - "grad_norm": 3.628915786743164, - "learning_rate": 1.5662308129619103e-05, - "loss": 4.8755, - "step": 12580 - }, - { - "epoch": 0.6956882255389718, - "grad_norm": 4.438342571258545, - "learning_rate": 1.5648095508811824e-05, - "loss": 4.7324, - "step": 12585 - }, - { - "epoch": 0.6959646213377557, - "grad_norm": 3.574575662612915, - "learning_rate": 1.563388288800455e-05, - "loss": 5.0009, - "step": 12590 - }, - { - "epoch": 0.6962410171365395, - "grad_norm": 3.5201807022094727, - "learning_rate": 1.5619670267197272e-05, - "loss": 5.0128, - "step": 12595 - }, - { - "epoch": 0.6965174129353234, - "grad_norm": 3.0795626640319824, - "learning_rate": 1.5605457646389996e-05, - "loss": 4.9838, - "step": 12600 - }, - { - "epoch": 0.6967938087341072, - "grad_norm": 3.3544068336486816, - "learning_rate": 1.5591245025582716e-05, - "loss": 5.1475, - "step": 12605 - }, - { - "epoch": 0.6970702045328911, - "grad_norm": 3.4172658920288086, - "learning_rate": 1.557703240477544e-05, - "loss": 4.7901, - "step": 12610 - }, - { - "epoch": 0.697346600331675, - "grad_norm": 3.966212511062622, - "learning_rate": 1.5562819783968164e-05, - "loss": 5.0707, - "step": 12615 - }, - { - "epoch": 0.6976229961304589, - "grad_norm": 3.324779748916626, - "learning_rate": 1.554860716316089e-05, - "loss": 5.0197, - "step": 12620 - }, - { - "epoch": 0.6978993919292427, - "grad_norm": 3.247331142425537, - "learning_rate": 1.5534394542353612e-05, - "loss": 5.3125, - "step": 12625 - }, - { - "epoch": 0.6981757877280266, - "grad_norm": 2.768170118331909, - "learning_rate": 1.5520181921546333e-05, - "loss": 4.8045, - "step": 12630 - }, - { - "epoch": 0.6984521835268104, - "grad_norm": 2.8270652294158936, - "learning_rate": 1.5505969300739057e-05, - "loss": 5.0571, - "step": 12635 - }, - { - "epoch": 0.6987285793255943, - "grad_norm": 3.471879005432129, - "learning_rate": 1.549175667993178e-05, - "loss": 4.9667, - "step": 12640 - }, - { - "epoch": 0.6990049751243781, - "grad_norm": 3.826054811477661, - "learning_rate": 1.5477544059124505e-05, - "loss": 4.9908, - "step": 12645 - }, - { - "epoch": 0.699281370923162, - "grad_norm": 3.6149983406066895, - "learning_rate": 1.546333143831723e-05, - "loss": 4.8681, - "step": 12650 - }, - { - "epoch": 0.6995577667219458, - "grad_norm": 4.132063388824463, - "learning_rate": 1.544911881750995e-05, - "loss": 5.0986, - "step": 12655 - }, - { - "epoch": 0.6998341625207297, - "grad_norm": 3.410290002822876, - "learning_rate": 1.5434906196702673e-05, - "loss": 5.0152, - "step": 12660 - }, - { - "epoch": 0.7001105583195135, - "grad_norm": 3.215104103088379, - "learning_rate": 1.5420693575895394e-05, - "loss": 4.7577, - "step": 12665 - }, - { - "epoch": 0.7003869541182974, - "grad_norm": 3.521066427230835, - "learning_rate": 1.5406480955088118e-05, - "loss": 4.9928, - "step": 12670 - }, - { - "epoch": 0.7006633499170812, - "grad_norm": 3.641317367553711, - "learning_rate": 1.5392268334280842e-05, - "loss": 5.1642, - "step": 12675 - }, - { - "epoch": 0.7009397457158651, - "grad_norm": 3.365527629852295, - "learning_rate": 1.5378055713473566e-05, - "loss": 4.9449, - "step": 12680 - }, - { - "epoch": 0.701216141514649, - "grad_norm": 3.430457592010498, - "learning_rate": 1.536384309266629e-05, - "loss": 5.123, - "step": 12685 - }, - { - "epoch": 0.7014925373134329, - "grad_norm": 3.236186981201172, - "learning_rate": 1.534963047185901e-05, - "loss": 5.1541, - "step": 12690 - }, - { - "epoch": 0.7017689331122167, - "grad_norm": 3.792572021484375, - "learning_rate": 1.5335417851051734e-05, - "loss": 5.1397, - "step": 12695 - }, - { - "epoch": 0.7020453289110006, - "grad_norm": 3.7908694744110107, - "learning_rate": 1.532120523024446e-05, - "loss": 5.0269, - "step": 12700 - }, - { - "epoch": 0.7023217247097844, - "grad_norm": 3.2599024772644043, - "learning_rate": 1.5306992609437182e-05, - "loss": 4.7786, - "step": 12705 - }, - { - "epoch": 0.7025981205085683, - "grad_norm": 3.5617377758026123, - "learning_rate": 1.5292779988629903e-05, - "loss": 5.0488, - "step": 12710 - }, - { - "epoch": 0.7028745163073521, - "grad_norm": 3.4966659545898438, - "learning_rate": 1.5278567367822627e-05, - "loss": 5.0739, - "step": 12715 - }, - { - "epoch": 0.703150912106136, - "grad_norm": 3.6931207180023193, - "learning_rate": 1.526435474701535e-05, - "loss": 4.7299, - "step": 12720 - }, - { - "epoch": 0.7034273079049198, - "grad_norm": 3.201852560043335, - "learning_rate": 1.5250142126208072e-05, - "loss": 5.1792, - "step": 12725 - }, - { - "epoch": 0.7037037037037037, - "grad_norm": 3.669928550720215, - "learning_rate": 1.5235929505400797e-05, - "loss": 4.8751, - "step": 12730 - }, - { - "epoch": 0.7039800995024875, - "grad_norm": 3.639188766479492, - "learning_rate": 1.5221716884593521e-05, - "loss": 4.7316, - "step": 12735 - }, - { - "epoch": 0.7042564953012714, - "grad_norm": 3.298006057739258, - "learning_rate": 1.5207504263786243e-05, - "loss": 4.7265, - "step": 12740 - }, - { - "epoch": 0.7045328911000552, - "grad_norm": 4.186183452606201, - "learning_rate": 1.5193291642978966e-05, - "loss": 4.6542, - "step": 12745 - }, - { - "epoch": 0.7048092868988391, - "grad_norm": 3.1352524757385254, - "learning_rate": 1.5179079022171688e-05, - "loss": 5.1087, - "step": 12750 - }, - { - "epoch": 0.705085682697623, - "grad_norm": 3.946307420730591, - "learning_rate": 1.516486640136441e-05, - "loss": 4.9225, - "step": 12755 - }, - { - "epoch": 0.7053620784964069, - "grad_norm": 3.4466960430145264, - "learning_rate": 1.5150653780557136e-05, - "loss": 5.0924, - "step": 12760 - }, - { - "epoch": 0.7056384742951907, - "grad_norm": 3.4541025161743164, - "learning_rate": 1.5136441159749858e-05, - "loss": 4.7316, - "step": 12765 - }, - { - "epoch": 0.7059148700939746, - "grad_norm": 3.1319077014923096, - "learning_rate": 1.5122228538942582e-05, - "loss": 5.0303, - "step": 12770 - }, - { - "epoch": 0.7061912658927584, - "grad_norm": 3.6074001789093018, - "learning_rate": 1.5108015918135305e-05, - "loss": 4.9892, - "step": 12775 - }, - { - "epoch": 0.7064676616915423, - "grad_norm": 3.2462453842163086, - "learning_rate": 1.5093803297328027e-05, - "loss": 4.9228, - "step": 12780 - }, - { - "epoch": 0.7067440574903261, - "grad_norm": 4.183916091918945, - "learning_rate": 1.5079590676520753e-05, - "loss": 5.0513, - "step": 12785 - }, - { - "epoch": 0.70702045328911, - "grad_norm": 4.227228164672852, - "learning_rate": 1.5065378055713475e-05, - "loss": 5.2663, - "step": 12790 - }, - { - "epoch": 0.7072968490878938, - "grad_norm": 4.704550743103027, - "learning_rate": 1.5051165434906197e-05, - "loss": 5.0818, - "step": 12795 - }, - { - "epoch": 0.7075732448866777, - "grad_norm": 3.2340505123138428, - "learning_rate": 1.503695281409892e-05, - "loss": 5.1363, - "step": 12800 - }, - { - "epoch": 0.7078496406854616, - "grad_norm": 4.0538153648376465, - "learning_rate": 1.5022740193291643e-05, - "loss": 5.6689, - "step": 12805 - }, - { - "epoch": 0.7081260364842454, - "grad_norm": 4.062292098999023, - "learning_rate": 1.5008527572484366e-05, - "loss": 5.157, - "step": 12810 - }, - { - "epoch": 0.7084024322830293, - "grad_norm": 3.2612810134887695, - "learning_rate": 1.4994314951677091e-05, - "loss": 4.9579, - "step": 12815 - }, - { - "epoch": 0.7086788280818132, - "grad_norm": 3.235447406768799, - "learning_rate": 1.4980102330869814e-05, - "loss": 5.1034, - "step": 12820 - }, - { - "epoch": 0.7089552238805971, - "grad_norm": 3.9662389755249023, - "learning_rate": 1.4965889710062536e-05, - "loss": 4.9475, - "step": 12825 - }, - { - "epoch": 0.7092316196793809, - "grad_norm": 3.0024068355560303, - "learning_rate": 1.4951677089255258e-05, - "loss": 4.9537, - "step": 12830 - }, - { - "epoch": 0.7095080154781648, - "grad_norm": 3.502521514892578, - "learning_rate": 1.4937464468447982e-05, - "loss": 4.9855, - "step": 12835 - }, - { - "epoch": 0.7097844112769486, - "grad_norm": 3.6317951679229736, - "learning_rate": 1.4923251847640706e-05, - "loss": 5.1954, - "step": 12840 - }, - { - "epoch": 0.7100608070757325, - "grad_norm": 3.3001856803894043, - "learning_rate": 1.490903922683343e-05, - "loss": 4.9513, - "step": 12845 - }, - { - "epoch": 0.7103372028745163, - "grad_norm": 3.484205961227417, - "learning_rate": 1.4894826606026152e-05, - "loss": 4.6211, - "step": 12850 - }, - { - "epoch": 0.7106135986733002, - "grad_norm": 3.8079354763031006, - "learning_rate": 1.4880613985218875e-05, - "loss": 5.0842, - "step": 12855 - }, - { - "epoch": 0.710889994472084, - "grad_norm": 3.5550713539123535, - "learning_rate": 1.4866401364411597e-05, - "loss": 4.981, - "step": 12860 - }, - { - "epoch": 0.7111663902708679, - "grad_norm": 4.202615261077881, - "learning_rate": 1.485218874360432e-05, - "loss": 5.1481, - "step": 12865 - }, - { - "epoch": 0.7114427860696517, - "grad_norm": 3.486539125442505, - "learning_rate": 1.4837976122797045e-05, - "loss": 5.0235, - "step": 12870 - }, - { - "epoch": 0.7117191818684356, - "grad_norm": 3.639721632003784, - "learning_rate": 1.4823763501989767e-05, - "loss": 4.9426, - "step": 12875 - }, - { - "epoch": 0.7119955776672194, - "grad_norm": 3.9784390926361084, - "learning_rate": 1.4809550881182491e-05, - "loss": 5.0809, - "step": 12880 - }, - { - "epoch": 0.7122719734660033, - "grad_norm": 2.89168643951416, - "learning_rate": 1.4795338260375213e-05, - "loss": 5.0391, - "step": 12885 - }, - { - "epoch": 0.7125483692647872, - "grad_norm": 4.273104667663574, - "learning_rate": 1.4781125639567936e-05, - "loss": 4.8018, - "step": 12890 - }, - { - "epoch": 0.7128247650635711, - "grad_norm": 3.9629507064819336, - "learning_rate": 1.4766913018760661e-05, - "loss": 5.0206, - "step": 12895 - }, - { - "epoch": 0.7131011608623549, - "grad_norm": 3.7056665420532227, - "learning_rate": 1.4752700397953384e-05, - "loss": 5.1737, - "step": 12900 - }, - { - "epoch": 0.7133775566611388, - "grad_norm": 3.9631214141845703, - "learning_rate": 1.4738487777146106e-05, - "loss": 5.4375, - "step": 12905 - }, - { - "epoch": 0.7136539524599226, - "grad_norm": 4.301235198974609, - "learning_rate": 1.472427515633883e-05, - "loss": 5.0811, - "step": 12910 - }, - { - "epoch": 0.7139303482587065, - "grad_norm": 3.4924840927124023, - "learning_rate": 1.4710062535531552e-05, - "loss": 5.0251, - "step": 12915 - }, - { - "epoch": 0.7142067440574903, - "grad_norm": 3.845777750015259, - "learning_rate": 1.4695849914724275e-05, - "loss": 4.9661, - "step": 12920 - }, - { - "epoch": 0.7144831398562742, - "grad_norm": 3.8963284492492676, - "learning_rate": 1.4681637293917e-05, - "loss": 5.0677, - "step": 12925 - }, - { - "epoch": 0.714759535655058, - "grad_norm": 3.3300371170043945, - "learning_rate": 1.4667424673109723e-05, - "loss": 4.565, - "step": 12930 - }, - { - "epoch": 0.7150359314538419, - "grad_norm": 3.4866998195648193, - "learning_rate": 1.4653212052302445e-05, - "loss": 4.9402, - "step": 12935 - }, - { - "epoch": 0.7153123272526257, - "grad_norm": 3.7221293449401855, - "learning_rate": 1.4638999431495167e-05, - "loss": 5.0538, - "step": 12940 - }, - { - "epoch": 0.7155887230514096, - "grad_norm": 3.6549923419952393, - "learning_rate": 1.4624786810687891e-05, - "loss": 5.0269, - "step": 12945 - }, - { - "epoch": 0.7158651188501934, - "grad_norm": 3.261334180831909, - "learning_rate": 1.4610574189880615e-05, - "loss": 5.0511, - "step": 12950 - }, - { - "epoch": 0.7161415146489774, - "grad_norm": 3.2248923778533936, - "learning_rate": 1.4596361569073339e-05, - "loss": 5.0044, - "step": 12955 - }, - { - "epoch": 0.7164179104477612, - "grad_norm": 3.5878825187683105, - "learning_rate": 1.4582148948266061e-05, - "loss": 5.0746, - "step": 12960 - }, - { - "epoch": 0.7166943062465451, - "grad_norm": 3.1510488986968994, - "learning_rate": 1.4567936327458784e-05, - "loss": 4.775, - "step": 12965 - }, - { - "epoch": 0.7169707020453289, - "grad_norm": 3.1844141483306885, - "learning_rate": 1.4553723706651506e-05, - "loss": 4.803, - "step": 12970 - }, - { - "epoch": 0.7172470978441128, - "grad_norm": 3.417433977127075, - "learning_rate": 1.453951108584423e-05, - "loss": 4.932, - "step": 12975 - }, - { - "epoch": 0.7175234936428966, - "grad_norm": 3.5905191898345947, - "learning_rate": 1.4525298465036954e-05, - "loss": 5.1135, - "step": 12980 - }, - { - "epoch": 0.7177998894416805, - "grad_norm": 3.180964231491089, - "learning_rate": 1.4511085844229678e-05, - "loss": 4.8405, - "step": 12985 - }, - { - "epoch": 0.7180762852404643, - "grad_norm": 3.3651866912841797, - "learning_rate": 1.44968732234224e-05, - "loss": 4.8142, - "step": 12990 - }, - { - "epoch": 0.7183526810392482, - "grad_norm": 3.8147387504577637, - "learning_rate": 1.4482660602615122e-05, - "loss": 5.152, - "step": 12995 - }, - { - "epoch": 0.718629076838032, - "grad_norm": 3.135228157043457, - "learning_rate": 1.4468447981807845e-05, - "loss": 4.7615, - "step": 13000 - }, - { - "epoch": 0.7189054726368159, - "grad_norm": 3.4666500091552734, - "learning_rate": 1.445423536100057e-05, - "loss": 5.238, - "step": 13005 - }, - { - "epoch": 0.7191818684355997, - "grad_norm": 3.4540812969207764, - "learning_rate": 1.4440022740193293e-05, - "loss": 5.1853, - "step": 13010 - }, - { - "epoch": 0.7194582642343836, - "grad_norm": 3.278425455093384, - "learning_rate": 1.4425810119386015e-05, - "loss": 5.0469, - "step": 13015 - }, - { - "epoch": 0.7197346600331676, - "grad_norm": 4.068847179412842, - "learning_rate": 1.4411597498578739e-05, - "loss": 4.8051, - "step": 13020 - }, - { - "epoch": 0.7200110558319514, - "grad_norm": 4.303192615509033, - "learning_rate": 1.4397384877771461e-05, - "loss": 4.8124, - "step": 13025 - }, - { - "epoch": 0.7202874516307353, - "grad_norm": 3.8566951751708984, - "learning_rate": 1.4383172256964183e-05, - "loss": 4.9964, - "step": 13030 - }, - { - "epoch": 0.7205638474295191, - "grad_norm": 3.5754945278167725, - "learning_rate": 1.4368959636156909e-05, - "loss": 5.055, - "step": 13035 - }, - { - "epoch": 0.720840243228303, - "grad_norm": 4.132562637329102, - "learning_rate": 1.4354747015349631e-05, - "loss": 5.0778, - "step": 13040 - }, - { - "epoch": 0.7211166390270868, - "grad_norm": 3.1052451133728027, - "learning_rate": 1.4340534394542354e-05, - "loss": 4.8714, - "step": 13045 - }, - { - "epoch": 0.7213930348258707, - "grad_norm": 3.893859386444092, - "learning_rate": 1.4326321773735078e-05, - "loss": 5.101, - "step": 13050 - }, - { - "epoch": 0.7216694306246545, - "grad_norm": 3.588839054107666, - "learning_rate": 1.43121091529278e-05, - "loss": 4.6311, - "step": 13055 - }, - { - "epoch": 0.7219458264234384, - "grad_norm": 3.061183214187622, - "learning_rate": 1.4297896532120526e-05, - "loss": 4.9162, - "step": 13060 - }, - { - "epoch": 0.7222222222222222, - "grad_norm": 3.5575475692749023, - "learning_rate": 1.4283683911313248e-05, - "loss": 5.3776, - "step": 13065 - }, - { - "epoch": 0.7224986180210061, - "grad_norm": 3.373143196105957, - "learning_rate": 1.426947129050597e-05, - "loss": 4.9563, - "step": 13070 - }, - { - "epoch": 0.7227750138197899, - "grad_norm": 4.447892189025879, - "learning_rate": 1.4255258669698693e-05, - "loss": 4.8872, - "step": 13075 - }, - { - "epoch": 0.7230514096185738, - "grad_norm": 4.527839660644531, - "learning_rate": 1.4241046048891415e-05, - "loss": 4.99, - "step": 13080 - }, - { - "epoch": 0.7233278054173576, - "grad_norm": 3.5849502086639404, - "learning_rate": 1.4226833428084139e-05, - "loss": 4.6041, - "step": 13085 - }, - { - "epoch": 0.7236042012161416, - "grad_norm": 3.7297580242156982, - "learning_rate": 1.4212620807276863e-05, - "loss": 4.948, - "step": 13090 - }, - { - "epoch": 0.7238805970149254, - "grad_norm": 3.810117721557617, - "learning_rate": 1.4198408186469587e-05, - "loss": 4.5659, - "step": 13095 - }, - { - "epoch": 0.7241569928137093, - "grad_norm": 3.6611037254333496, - "learning_rate": 1.4184195565662309e-05, - "loss": 4.7651, - "step": 13100 - }, - { - "epoch": 0.7244333886124931, - "grad_norm": 3.81868314743042, - "learning_rate": 1.4169982944855031e-05, - "loss": 4.8415, - "step": 13105 - }, - { - "epoch": 0.724709784411277, - "grad_norm": 3.0699076652526855, - "learning_rate": 1.4155770324047754e-05, - "loss": 4.8364, - "step": 13110 - }, - { - "epoch": 0.7249861802100608, - "grad_norm": 3.566793918609619, - "learning_rate": 1.414155770324048e-05, - "loss": 4.7472, - "step": 13115 - }, - { - "epoch": 0.7252625760088447, - "grad_norm": 3.110363721847534, - "learning_rate": 1.4127345082433202e-05, - "loss": 4.6858, - "step": 13120 - }, - { - "epoch": 0.7255389718076285, - "grad_norm": 3.3906173706054688, - "learning_rate": 1.4113132461625926e-05, - "loss": 4.9415, - "step": 13125 - }, - { - "epoch": 0.7258153676064124, - "grad_norm": 3.598484754562378, - "learning_rate": 1.4098919840818648e-05, - "loss": 4.9086, - "step": 13130 - }, - { - "epoch": 0.7260917634051962, - "grad_norm": 3.2210891246795654, - "learning_rate": 1.408470722001137e-05, - "loss": 5.1958, - "step": 13135 - }, - { - "epoch": 0.7263681592039801, - "grad_norm": 3.400056838989258, - "learning_rate": 1.4070494599204092e-05, - "loss": 5.2228, - "step": 13140 - }, - { - "epoch": 0.7266445550027639, - "grad_norm": 3.5063843727111816, - "learning_rate": 1.4056281978396818e-05, - "loss": 4.9669, - "step": 13145 - }, - { - "epoch": 0.7269209508015478, - "grad_norm": 3.8449692726135254, - "learning_rate": 1.404206935758954e-05, - "loss": 4.8901, - "step": 13150 - }, - { - "epoch": 0.7271973466003316, - "grad_norm": 3.1058237552642822, - "learning_rate": 1.4027856736782263e-05, - "loss": 5.0105, - "step": 13155 - }, - { - "epoch": 0.7274737423991156, - "grad_norm": 3.9088826179504395, - "learning_rate": 1.4013644115974987e-05, - "loss": 5.0905, - "step": 13160 - }, - { - "epoch": 0.7277501381978994, - "grad_norm": 3.3987643718719482, - "learning_rate": 1.3999431495167709e-05, - "loss": 4.5506, - "step": 13165 - }, - { - "epoch": 0.7280265339966833, - "grad_norm": 3.4407613277435303, - "learning_rate": 1.3985218874360435e-05, - "loss": 4.8973, - "step": 13170 - }, - { - "epoch": 0.7283029297954671, - "grad_norm": 3.590932607650757, - "learning_rate": 1.3971006253553157e-05, - "loss": 5.0722, - "step": 13175 - }, - { - "epoch": 0.728579325594251, - "grad_norm": 3.1984472274780273, - "learning_rate": 1.3956793632745879e-05, - "loss": 4.9033, - "step": 13180 - }, - { - "epoch": 0.7288557213930348, - "grad_norm": 3.5542407035827637, - "learning_rate": 1.3942581011938601e-05, - "loss": 4.8929, - "step": 13185 - }, - { - "epoch": 0.7291321171918187, - "grad_norm": 3.841609001159668, - "learning_rate": 1.3928368391131324e-05, - "loss": 5.0927, - "step": 13190 - }, - { - "epoch": 0.7294085129906025, - "grad_norm": 3.5699667930603027, - "learning_rate": 1.3914155770324048e-05, - "loss": 5.0084, - "step": 13195 - }, - { - "epoch": 0.7296849087893864, - "grad_norm": 3.561516046524048, - "learning_rate": 1.3899943149516773e-05, - "loss": 5.2851, - "step": 13200 - }, - { - "epoch": 0.7299613045881702, - "grad_norm": 3.833688497543335, - "learning_rate": 1.3885730528709496e-05, - "loss": 5.1715, - "step": 13205 - }, - { - "epoch": 0.7302377003869541, - "grad_norm": 3.255157232284546, - "learning_rate": 1.3871517907902218e-05, - "loss": 4.768, - "step": 13210 - }, - { - "epoch": 0.7305140961857379, - "grad_norm": 3.3706374168395996, - "learning_rate": 1.385730528709494e-05, - "loss": 4.8445, - "step": 13215 - }, - { - "epoch": 0.7307904919845218, - "grad_norm": 3.0012950897216797, - "learning_rate": 1.3843092666287662e-05, - "loss": 4.8293, - "step": 13220 - }, - { - "epoch": 0.7310668877833056, - "grad_norm": 3.7579739093780518, - "learning_rate": 1.3828880045480388e-05, - "loss": 5.1837, - "step": 13225 - }, - { - "epoch": 0.7313432835820896, - "grad_norm": 3.6330161094665527, - "learning_rate": 1.381466742467311e-05, - "loss": 4.9532, - "step": 13230 - }, - { - "epoch": 0.7316196793808734, - "grad_norm": 4.080511569976807, - "learning_rate": 1.3800454803865834e-05, - "loss": 5.0781, - "step": 13235 - }, - { - "epoch": 0.7318960751796573, - "grad_norm": 3.782904624938965, - "learning_rate": 1.3786242183058557e-05, - "loss": 4.9824, - "step": 13240 - }, - { - "epoch": 0.7321724709784412, - "grad_norm": 3.812208414077759, - "learning_rate": 1.3772029562251279e-05, - "loss": 5.1982, - "step": 13245 - }, - { - "epoch": 0.732448866777225, - "grad_norm": 3.541715145111084, - "learning_rate": 1.3757816941444001e-05, - "loss": 4.945, - "step": 13250 - }, - { - "epoch": 0.7327252625760089, - "grad_norm": 3.311810255050659, - "learning_rate": 1.3743604320636727e-05, - "loss": 5.0986, - "step": 13255 - }, - { - "epoch": 0.7330016583747927, - "grad_norm": 4.002370357513428, - "learning_rate": 1.372939169982945e-05, - "loss": 4.7093, - "step": 13260 - }, - { - "epoch": 0.7332780541735766, - "grad_norm": 3.475691556930542, - "learning_rate": 1.3715179079022172e-05, - "loss": 5.043, - "step": 13265 - }, - { - "epoch": 0.7335544499723604, - "grad_norm": 3.538708448410034, - "learning_rate": 1.3700966458214896e-05, - "loss": 4.9838, - "step": 13270 - }, - { - "epoch": 0.7338308457711443, - "grad_norm": 3.6737594604492188, - "learning_rate": 1.3686753837407618e-05, - "loss": 4.9001, - "step": 13275 - }, - { - "epoch": 0.7341072415699281, - "grad_norm": 3.5421972274780273, - "learning_rate": 1.3672541216600343e-05, - "loss": 4.9089, - "step": 13280 - }, - { - "epoch": 0.734383637368712, - "grad_norm": 3.507235050201416, - "learning_rate": 1.3658328595793066e-05, - "loss": 5.0143, - "step": 13285 - }, - { - "epoch": 0.7346600331674958, - "grad_norm": 2.759843587875366, - "learning_rate": 1.3644115974985788e-05, - "loss": 4.9846, - "step": 13290 - }, - { - "epoch": 0.7349364289662798, - "grad_norm": 3.151954412460327, - "learning_rate": 1.362990335417851e-05, - "loss": 4.933, - "step": 13295 - }, - { - "epoch": 0.7352128247650636, - "grad_norm": 2.9206533432006836, - "learning_rate": 1.3615690733371234e-05, - "loss": 4.7963, - "step": 13300 - }, - { - "epoch": 0.7354892205638475, - "grad_norm": 3.8691372871398926, - "learning_rate": 1.3601478112563957e-05, - "loss": 5.2145, - "step": 13305 - }, - { - "epoch": 0.7357656163626313, - "grad_norm": 3.0782530307769775, - "learning_rate": 1.3587265491756682e-05, - "loss": 4.6068, - "step": 13310 - }, - { - "epoch": 0.7360420121614152, - "grad_norm": 3.9192464351654053, - "learning_rate": 1.3573052870949405e-05, - "loss": 5.0152, - "step": 13315 - }, - { - "epoch": 0.736318407960199, - "grad_norm": 5.0663371086120605, - "learning_rate": 1.3558840250142127e-05, - "loss": 5.1813, - "step": 13320 - }, - { - "epoch": 0.7365948037589829, - "grad_norm": 3.4639241695404053, - "learning_rate": 1.3544627629334849e-05, - "loss": 5.0714, - "step": 13325 - }, - { - "epoch": 0.7368711995577667, - "grad_norm": 3.764646053314209, - "learning_rate": 1.3530415008527571e-05, - "loss": 4.9129, - "step": 13330 - }, - { - "epoch": 0.7371475953565506, - "grad_norm": 3.288600206375122, - "learning_rate": 1.3516202387720297e-05, - "loss": 4.8982, - "step": 13335 - }, - { - "epoch": 0.7374239911553344, - "grad_norm": 3.8759982585906982, - "learning_rate": 1.350198976691302e-05, - "loss": 5.1059, - "step": 13340 - }, - { - "epoch": 0.7377003869541183, - "grad_norm": 3.3489646911621094, - "learning_rate": 1.3487777146105743e-05, - "loss": 5.1768, - "step": 13345 - }, - { - "epoch": 0.7379767827529021, - "grad_norm": 3.005626916885376, - "learning_rate": 1.3473564525298466e-05, - "loss": 5.0332, - "step": 13350 - }, - { - "epoch": 0.738253178551686, - "grad_norm": 3.416574239730835, - "learning_rate": 1.3459351904491188e-05, - "loss": 5.0185, - "step": 13355 - }, - { - "epoch": 0.7385295743504698, - "grad_norm": 3.5634119510650635, - "learning_rate": 1.344513928368391e-05, - "loss": 4.7656, - "step": 13360 - }, - { - "epoch": 0.7388059701492538, - "grad_norm": 3.0939366817474365, - "learning_rate": 1.3430926662876636e-05, - "loss": 4.8285, - "step": 13365 - }, - { - "epoch": 0.7390823659480376, - "grad_norm": 3.3977231979370117, - "learning_rate": 1.3416714042069358e-05, - "loss": 4.9093, - "step": 13370 - }, - { - "epoch": 0.7393587617468215, - "grad_norm": 3.6465513706207275, - "learning_rate": 1.3402501421262082e-05, - "loss": 5.0178, - "step": 13375 - }, - { - "epoch": 0.7396351575456053, - "grad_norm": 3.207108974456787, - "learning_rate": 1.3388288800454804e-05, - "loss": 5.2148, - "step": 13380 - }, - { - "epoch": 0.7399115533443892, - "grad_norm": 3.185753107070923, - "learning_rate": 1.3374076179647527e-05, - "loss": 5.0929, - "step": 13385 - }, - { - "epoch": 0.740187949143173, - "grad_norm": 4.584200859069824, - "learning_rate": 1.3359863558840252e-05, - "loss": 4.651, - "step": 13390 - }, - { - "epoch": 0.7404643449419569, - "grad_norm": 3.067399024963379, - "learning_rate": 1.3345650938032975e-05, - "loss": 5.0875, - "step": 13395 - }, - { - "epoch": 0.7407407407407407, - "grad_norm": 3.611589193344116, - "learning_rate": 1.3331438317225697e-05, - "loss": 4.725, - "step": 13400 - }, - { - "epoch": 0.7410171365395246, - "grad_norm": 3.321446418762207, - "learning_rate": 1.331722569641842e-05, - "loss": 4.8858, - "step": 13405 - }, - { - "epoch": 0.7412935323383084, - "grad_norm": 3.519653797149658, - "learning_rate": 1.3303013075611143e-05, - "loss": 4.645, - "step": 13410 - }, - { - "epoch": 0.7415699281370923, - "grad_norm": 3.325186014175415, - "learning_rate": 1.3288800454803865e-05, - "loss": 4.8125, - "step": 13415 - }, - { - "epoch": 0.7418463239358761, - "grad_norm": 3.7790181636810303, - "learning_rate": 1.3274587833996591e-05, - "loss": 4.9224, - "step": 13420 - }, - { - "epoch": 0.74212271973466, - "grad_norm": 3.3539559841156006, - "learning_rate": 1.3260375213189313e-05, - "loss": 4.5994, - "step": 13425 - }, - { - "epoch": 0.7423991155334438, - "grad_norm": 3.002809762954712, - "learning_rate": 1.3246162592382036e-05, - "loss": 5.2474, - "step": 13430 - }, - { - "epoch": 0.7426755113322278, - "grad_norm": 3.0804951190948486, - "learning_rate": 1.3231949971574758e-05, - "loss": 4.6779, - "step": 13435 - }, - { - "epoch": 0.7429519071310116, - "grad_norm": 4.401519775390625, - "learning_rate": 1.3217737350767482e-05, - "loss": 4.8666, - "step": 13440 - }, - { - "epoch": 0.7432283029297955, - "grad_norm": 3.0811357498168945, - "learning_rate": 1.3203524729960206e-05, - "loss": 4.909, - "step": 13445 - }, - { - "epoch": 0.7435046987285793, - "grad_norm": 4.676823139190674, - "learning_rate": 1.318931210915293e-05, - "loss": 5.3051, - "step": 13450 - }, - { - "epoch": 0.7437810945273632, - "grad_norm": 3.572274684906006, - "learning_rate": 1.3175099488345652e-05, - "loss": 4.8484, - "step": 13455 - }, - { - "epoch": 0.7440574903261471, - "grad_norm": 3.933598041534424, - "learning_rate": 1.3160886867538375e-05, - "loss": 4.9078, - "step": 13460 - }, - { - "epoch": 0.7443338861249309, - "grad_norm": 4.784248352050781, - "learning_rate": 1.3146674246731097e-05, - "loss": 4.9795, - "step": 13465 - }, - { - "epoch": 0.7446102819237148, - "grad_norm": 3.54833984375, - "learning_rate": 1.3132461625923819e-05, - "loss": 5.0677, - "step": 13470 - }, - { - "epoch": 0.7448866777224986, - "grad_norm": 3.983644962310791, - "learning_rate": 1.3118249005116545e-05, - "loss": 4.8627, - "step": 13475 - }, - { - "epoch": 0.7451630735212825, - "grad_norm": 3.837332010269165, - "learning_rate": 1.3104036384309267e-05, - "loss": 4.7729, - "step": 13480 - }, - { - "epoch": 0.7454394693200663, - "grad_norm": 4.445468425750732, - "learning_rate": 1.3089823763501991e-05, - "loss": 5.0954, - "step": 13485 - }, - { - "epoch": 0.7457158651188502, - "grad_norm": 3.518786668777466, - "learning_rate": 1.3075611142694713e-05, - "loss": 4.855, - "step": 13490 - }, - { - "epoch": 0.745992260917634, - "grad_norm": 3.9900107383728027, - "learning_rate": 1.3061398521887436e-05, - "loss": 4.7721, - "step": 13495 - }, - { - "epoch": 0.746268656716418, - "grad_norm": 3.3050553798675537, - "learning_rate": 1.3047185901080161e-05, - "loss": 4.9431, - "step": 13500 - }, - { - "epoch": 0.7465450525152018, - "grad_norm": 3.213809013366699, - "learning_rate": 1.3032973280272884e-05, - "loss": 5.172, - "step": 13505 - }, - { - "epoch": 0.7468214483139857, - "grad_norm": 3.7360849380493164, - "learning_rate": 1.3018760659465606e-05, - "loss": 5.0361, - "step": 13510 - }, - { - "epoch": 0.7470978441127695, - "grad_norm": 4.024673938751221, - "learning_rate": 1.300454803865833e-05, - "loss": 4.8585, - "step": 13515 - }, - { - "epoch": 0.7473742399115534, - "grad_norm": 3.3230862617492676, - "learning_rate": 1.2990335417851052e-05, - "loss": 5.1286, - "step": 13520 - }, - { - "epoch": 0.7476506357103372, - "grad_norm": 4.014267921447754, - "learning_rate": 1.2976122797043774e-05, - "loss": 5.0204, - "step": 13525 - }, - { - "epoch": 0.7479270315091211, - "grad_norm": 3.804960250854492, - "learning_rate": 1.29619101762365e-05, - "loss": 4.6567, - "step": 13530 - }, - { - "epoch": 0.7482034273079049, - "grad_norm": 3.149284601211548, - "learning_rate": 1.2947697555429222e-05, - "loss": 4.857, - "step": 13535 - }, - { - "epoch": 0.7484798231066888, - "grad_norm": 3.5097615718841553, - "learning_rate": 1.2933484934621945e-05, - "loss": 4.5143, - "step": 13540 - }, - { - "epoch": 0.7487562189054726, - "grad_norm": 3.3128249645233154, - "learning_rate": 1.2919272313814667e-05, - "loss": 5.1963, - "step": 13545 - }, - { - "epoch": 0.7490326147042565, - "grad_norm": 3.3906478881835938, - "learning_rate": 1.2905059693007391e-05, - "loss": 4.741, - "step": 13550 - }, - { - "epoch": 0.7493090105030403, - "grad_norm": 3.369476556777954, - "learning_rate": 1.2890847072200115e-05, - "loss": 5.0067, - "step": 13555 - }, - { - "epoch": 0.7495854063018242, - "grad_norm": 4.018293857574463, - "learning_rate": 1.2876634451392839e-05, - "loss": 5.2337, - "step": 13560 - }, - { - "epoch": 0.749861802100608, - "grad_norm": 4.163745880126953, - "learning_rate": 1.2862421830585561e-05, - "loss": 4.9956, - "step": 13565 - }, - { - "epoch": 0.750138197899392, - "grad_norm": 3.6494219303131104, - "learning_rate": 1.2848209209778283e-05, - "loss": 4.6203, - "step": 13570 - }, - { - "epoch": 0.7504145936981758, - "grad_norm": 3.210977077484131, - "learning_rate": 1.2833996588971006e-05, - "loss": 4.9079, - "step": 13575 - }, - { - "epoch": 0.7506909894969597, - "grad_norm": 3.0875606536865234, - "learning_rate": 1.2819783968163728e-05, - "loss": 5.0435, - "step": 13580 - }, - { - "epoch": 0.7509673852957435, - "grad_norm": 3.371025323867798, - "learning_rate": 1.2805571347356454e-05, - "loss": 5.0249, - "step": 13585 - }, - { - "epoch": 0.7512437810945274, - "grad_norm": 3.431417226791382, - "learning_rate": 1.2791358726549178e-05, - "loss": 4.8918, - "step": 13590 - }, - { - "epoch": 0.7515201768933112, - "grad_norm": 4.608222007751465, - "learning_rate": 1.27771461057419e-05, - "loss": 4.8191, - "step": 13595 - }, - { - "epoch": 0.7517965726920951, - "grad_norm": 4.438573360443115, - "learning_rate": 1.2762933484934622e-05, - "loss": 5.1095, - "step": 13600 - }, - { - "epoch": 0.7520729684908789, - "grad_norm": 3.5704104900360107, - "learning_rate": 1.2748720864127345e-05, - "loss": 4.8193, - "step": 13605 - }, - { - "epoch": 0.7523493642896628, - "grad_norm": 3.665001630783081, - "learning_rate": 1.273450824332007e-05, - "loss": 4.9377, - "step": 13610 - }, - { - "epoch": 0.7526257600884466, - "grad_norm": 4.091150760650635, - "learning_rate": 1.2720295622512792e-05, - "loss": 4.758, - "step": 13615 - }, - { - "epoch": 0.7529021558872305, - "grad_norm": 3.687591791152954, - "learning_rate": 1.2706083001705515e-05, - "loss": 4.9703, - "step": 13620 - }, - { - "epoch": 0.7531785516860143, - "grad_norm": 3.9441850185394287, - "learning_rate": 1.2691870380898239e-05, - "loss": 5.0439, - "step": 13625 - }, - { - "epoch": 0.7534549474847982, - "grad_norm": 3.240487575531006, - "learning_rate": 1.2677657760090961e-05, - "loss": 4.6799, - "step": 13630 - }, - { - "epoch": 0.753731343283582, - "grad_norm": 3.4183409214019775, - "learning_rate": 1.2663445139283683e-05, - "loss": 5.1698, - "step": 13635 - }, - { - "epoch": 0.754007739082366, - "grad_norm": 3.549414873123169, - "learning_rate": 1.2649232518476409e-05, - "loss": 4.9135, - "step": 13640 - }, - { - "epoch": 0.7542841348811498, - "grad_norm": 3.5168375968933105, - "learning_rate": 1.2635019897669131e-05, - "loss": 5.078, - "step": 13645 - }, - { - "epoch": 0.7545605306799337, - "grad_norm": 2.9269330501556396, - "learning_rate": 1.2620807276861854e-05, - "loss": 4.9437, - "step": 13650 - }, - { - "epoch": 0.7548369264787175, - "grad_norm": 3.613555908203125, - "learning_rate": 1.2606594656054576e-05, - "loss": 5.1616, - "step": 13655 - }, - { - "epoch": 0.7551133222775014, - "grad_norm": 3.630427122116089, - "learning_rate": 1.25923820352473e-05, - "loss": 5.0425, - "step": 13660 - }, - { - "epoch": 0.7553897180762852, - "grad_norm": 3.5098865032196045, - "learning_rate": 1.2578169414440025e-05, - "loss": 4.9677, - "step": 13665 - }, - { - "epoch": 0.7556661138750691, - "grad_norm": 3.6350200176239014, - "learning_rate": 1.2563956793632748e-05, - "loss": 5.1123, - "step": 13670 - }, - { - "epoch": 0.7559425096738529, - "grad_norm": 3.2419891357421875, - "learning_rate": 1.254974417282547e-05, - "loss": 5.0052, - "step": 13675 - }, - { - "epoch": 0.7562189054726368, - "grad_norm": 3.6156861782073975, - "learning_rate": 1.2535531552018192e-05, - "loss": 4.6917, - "step": 13680 - }, - { - "epoch": 0.7564953012714207, - "grad_norm": 3.5388574600219727, - "learning_rate": 1.2521318931210915e-05, - "loss": 4.9765, - "step": 13685 - }, - { - "epoch": 0.7567716970702045, - "grad_norm": 3.379070997238159, - "learning_rate": 1.2507106310403639e-05, - "loss": 4.8777, - "step": 13690 - }, - { - "epoch": 0.7570480928689884, - "grad_norm": 3.054107904434204, - "learning_rate": 1.2492893689596363e-05, - "loss": 4.6504, - "step": 13695 - }, - { - "epoch": 0.7573244886677722, - "grad_norm": 4.416358947753906, - "learning_rate": 1.2478681068789087e-05, - "loss": 4.9261, - "step": 13700 - }, - { - "epoch": 0.7576008844665562, - "grad_norm": 3.9947686195373535, - "learning_rate": 1.2464468447981809e-05, - "loss": 5.0292, - "step": 13705 - }, - { - "epoch": 0.75787728026534, - "grad_norm": 3.1346933841705322, - "learning_rate": 1.2450255827174531e-05, - "loss": 4.9894, - "step": 13710 - }, - { - "epoch": 0.7581536760641239, - "grad_norm": 3.530553102493286, - "learning_rate": 1.2436043206367255e-05, - "loss": 5.091, - "step": 13715 - }, - { - "epoch": 0.7584300718629077, - "grad_norm": 3.7621891498565674, - "learning_rate": 1.2421830585559977e-05, - "loss": 4.74, - "step": 13720 - }, - { - "epoch": 0.7587064676616916, - "grad_norm": 3.6707799434661865, - "learning_rate": 1.24076179647527e-05, - "loss": 5.1945, - "step": 13725 - }, - { - "epoch": 0.7589828634604754, - "grad_norm": 3.413144111633301, - "learning_rate": 1.2393405343945425e-05, - "loss": 4.9481, - "step": 13730 - }, - { - "epoch": 0.7592592592592593, - "grad_norm": 3.7922909259796143, - "learning_rate": 1.2379192723138148e-05, - "loss": 5.0266, - "step": 13735 - }, - { - "epoch": 0.7595356550580431, - "grad_norm": 3.63356876373291, - "learning_rate": 1.236498010233087e-05, - "loss": 5.0376, - "step": 13740 - }, - { - "epoch": 0.759812050856827, - "grad_norm": 3.9122843742370605, - "learning_rate": 1.2350767481523594e-05, - "loss": 4.7132, - "step": 13745 - }, - { - "epoch": 0.7600884466556108, - "grad_norm": 4.200931072235107, - "learning_rate": 1.2336554860716316e-05, - "loss": 4.8281, - "step": 13750 - }, - { - "epoch": 0.7603648424543947, - "grad_norm": 3.4712514877319336, - "learning_rate": 1.232234223990904e-05, - "loss": 4.9505, - "step": 13755 - }, - { - "epoch": 0.7606412382531785, - "grad_norm": 3.5167579650878906, - "learning_rate": 1.2308129619101762e-05, - "loss": 4.8998, - "step": 13760 - }, - { - "epoch": 0.7609176340519624, - "grad_norm": 2.8712246417999268, - "learning_rate": 1.2293916998294486e-05, - "loss": 4.9693, - "step": 13765 - }, - { - "epoch": 0.7611940298507462, - "grad_norm": 3.5918309688568115, - "learning_rate": 1.227970437748721e-05, - "loss": 5.3341, - "step": 13770 - }, - { - "epoch": 0.7614704256495302, - "grad_norm": 3.4594647884368896, - "learning_rate": 1.2265491756679933e-05, - "loss": 4.7239, - "step": 13775 - }, - { - "epoch": 0.761746821448314, - "grad_norm": 3.583055257797241, - "learning_rate": 1.2251279135872655e-05, - "loss": 4.8022, - "step": 13780 - }, - { - "epoch": 0.7620232172470979, - "grad_norm": 3.9287872314453125, - "learning_rate": 1.2237066515065379e-05, - "loss": 4.6748, - "step": 13785 - }, - { - "epoch": 0.7622996130458817, - "grad_norm": 4.147637367248535, - "learning_rate": 1.2222853894258101e-05, - "loss": 4.9476, - "step": 13790 - }, - { - "epoch": 0.7625760088446656, - "grad_norm": 3.5439789295196533, - "learning_rate": 1.2208641273450824e-05, - "loss": 4.9426, - "step": 13795 - }, - { - "epoch": 0.7628524046434494, - "grad_norm": 3.4278995990753174, - "learning_rate": 1.2194428652643548e-05, - "loss": 5.0638, - "step": 13800 - }, - { - "epoch": 0.7631288004422333, - "grad_norm": 3.199315071105957, - "learning_rate": 1.2180216031836271e-05, - "loss": 4.9484, - "step": 13805 - }, - { - "epoch": 0.7634051962410171, - "grad_norm": 3.8618929386138916, - "learning_rate": 1.2166003411028995e-05, - "loss": 4.7998, - "step": 13810 - }, - { - "epoch": 0.763681592039801, - "grad_norm": 3.2143681049346924, - "learning_rate": 1.2151790790221718e-05, - "loss": 4.9469, - "step": 13815 - }, - { - "epoch": 0.7639579878385848, - "grad_norm": 3.6355247497558594, - "learning_rate": 1.213757816941444e-05, - "loss": 4.7807, - "step": 13820 - }, - { - "epoch": 0.7642343836373687, - "grad_norm": 4.1098246574401855, - "learning_rate": 1.2123365548607164e-05, - "loss": 4.9628, - "step": 13825 - }, - { - "epoch": 0.7645107794361525, - "grad_norm": 3.3956034183502197, - "learning_rate": 1.2109152927799886e-05, - "loss": 5.1022, - "step": 13830 - }, - { - "epoch": 0.7647871752349364, - "grad_norm": 3.51596736907959, - "learning_rate": 1.209494030699261e-05, - "loss": 4.7867, - "step": 13835 - }, - { - "epoch": 0.7650635710337202, - "grad_norm": 3.4072954654693604, - "learning_rate": 1.2080727686185334e-05, - "loss": 5.0985, - "step": 13840 - }, - { - "epoch": 0.7653399668325042, - "grad_norm": 2.994511842727661, - "learning_rate": 1.2066515065378057e-05, - "loss": 4.9816, - "step": 13845 - }, - { - "epoch": 0.765616362631288, - "grad_norm": 3.5091991424560547, - "learning_rate": 1.2052302444570779e-05, - "loss": 5.0666, - "step": 13850 - }, - { - "epoch": 0.7658927584300719, - "grad_norm": 3.4935755729675293, - "learning_rate": 1.2038089823763503e-05, - "loss": 4.8671, - "step": 13855 - }, - { - "epoch": 0.7661691542288557, - "grad_norm": 3.766550302505493, - "learning_rate": 1.2023877202956225e-05, - "loss": 4.9597, - "step": 13860 - }, - { - "epoch": 0.7664455500276396, - "grad_norm": 3.4647650718688965, - "learning_rate": 1.2009664582148949e-05, - "loss": 5.0242, - "step": 13865 - }, - { - "epoch": 0.7667219458264234, - "grad_norm": 3.2583017349243164, - "learning_rate": 1.1995451961341671e-05, - "loss": 5.1654, - "step": 13870 - }, - { - "epoch": 0.7669983416252073, - "grad_norm": 4.087442874908447, - "learning_rate": 1.1981239340534395e-05, - "loss": 5.029, - "step": 13875 - }, - { - "epoch": 0.7672747374239911, - "grad_norm": 3.532439947128296, - "learning_rate": 1.196702671972712e-05, - "loss": 4.7802, - "step": 13880 - }, - { - "epoch": 0.767551133222775, - "grad_norm": 2.9457197189331055, - "learning_rate": 1.1952814098919842e-05, - "loss": 4.8096, - "step": 13885 - }, - { - "epoch": 0.7678275290215588, - "grad_norm": 3.6090781688690186, - "learning_rate": 1.1938601478112564e-05, - "loss": 4.9288, - "step": 13890 - }, - { - "epoch": 0.7681039248203427, - "grad_norm": 4.044902801513672, - "learning_rate": 1.1924388857305288e-05, - "loss": 4.8477, - "step": 13895 - }, - { - "epoch": 0.7683803206191266, - "grad_norm": 4.630081653594971, - "learning_rate": 1.191017623649801e-05, - "loss": 4.9998, - "step": 13900 - }, - { - "epoch": 0.7686567164179104, - "grad_norm": 4.869181156158447, - "learning_rate": 1.1895963615690734e-05, - "loss": 4.8786, - "step": 13905 - }, - { - "epoch": 0.7689331122166944, - "grad_norm": 3.5769691467285156, - "learning_rate": 1.1881750994883458e-05, - "loss": 4.9105, - "step": 13910 - }, - { - "epoch": 0.7692095080154782, - "grad_norm": 2.8430140018463135, - "learning_rate": 1.186753837407618e-05, - "loss": 4.9828, - "step": 13915 - }, - { - "epoch": 0.7694859038142621, - "grad_norm": 3.4973304271698, - "learning_rate": 1.1853325753268904e-05, - "loss": 4.7579, - "step": 13920 - }, - { - "epoch": 0.7697622996130459, - "grad_norm": 3.7342896461486816, - "learning_rate": 1.1839113132461627e-05, - "loss": 5.0691, - "step": 13925 - }, - { - "epoch": 0.7700386954118298, - "grad_norm": 3.4098775386810303, - "learning_rate": 1.1824900511654349e-05, - "loss": 4.8514, - "step": 13930 - }, - { - "epoch": 0.7703150912106136, - "grad_norm": 3.3334388732910156, - "learning_rate": 1.1810687890847073e-05, - "loss": 5.1646, - "step": 13935 - }, - { - "epoch": 0.7705914870093975, - "grad_norm": 4.081144332885742, - "learning_rate": 1.1796475270039795e-05, - "loss": 5.1261, - "step": 13940 - }, - { - "epoch": 0.7708678828081813, - "grad_norm": 3.5878841876983643, - "learning_rate": 1.178226264923252e-05, - "loss": 4.9911, - "step": 13945 - }, - { - "epoch": 0.7711442786069652, - "grad_norm": 3.167708158493042, - "learning_rate": 1.1768050028425243e-05, - "loss": 4.8064, - "step": 13950 - }, - { - "epoch": 0.771420674405749, - "grad_norm": 4.6932806968688965, - "learning_rate": 1.1753837407617965e-05, - "loss": 5.0513, - "step": 13955 - }, - { - "epoch": 0.7716970702045329, - "grad_norm": 3.659235954284668, - "learning_rate": 1.1739624786810688e-05, - "loss": 4.88, - "step": 13960 - }, - { - "epoch": 0.7719734660033167, - "grad_norm": 3.2741990089416504, - "learning_rate": 1.1725412166003412e-05, - "loss": 4.7233, - "step": 13965 - }, - { - "epoch": 0.7722498618021006, - "grad_norm": 3.8330390453338623, - "learning_rate": 1.1711199545196134e-05, - "loss": 4.9704, - "step": 13970 - }, - { - "epoch": 0.7725262576008844, - "grad_norm": 3.3870649337768555, - "learning_rate": 1.1696986924388858e-05, - "loss": 4.7279, - "step": 13975 - }, - { - "epoch": 0.7728026533996684, - "grad_norm": 3.542768716812134, - "learning_rate": 1.1682774303581582e-05, - "loss": 5.0573, - "step": 13980 - }, - { - "epoch": 0.7730790491984522, - "grad_norm": 4.090266227722168, - "learning_rate": 1.1668561682774304e-05, - "loss": 5.2034, - "step": 13985 - }, - { - "epoch": 0.7733554449972361, - "grad_norm": 3.7428195476531982, - "learning_rate": 1.1654349061967028e-05, - "loss": 5.0463, - "step": 13990 - }, - { - "epoch": 0.7736318407960199, - "grad_norm": 4.728503227233887, - "learning_rate": 1.164013644115975e-05, - "loss": 4.7835, - "step": 13995 - }, - { - "epoch": 0.7739082365948038, - "grad_norm": 3.689528226852417, - "learning_rate": 1.1625923820352473e-05, - "loss": 5.2645, - "step": 14000 - }, - { - "epoch": 0.7741846323935876, - "grad_norm": 4.224091529846191, - "learning_rate": 1.1611711199545197e-05, - "loss": 4.7057, - "step": 14005 - }, - { - "epoch": 0.7744610281923715, - "grad_norm": 3.54939866065979, - "learning_rate": 1.1597498578737919e-05, - "loss": 5.2695, - "step": 14010 - }, - { - "epoch": 0.7747374239911553, - "grad_norm": 3.281078338623047, - "learning_rate": 1.1583285957930643e-05, - "loss": 5.0044, - "step": 14015 - }, - { - "epoch": 0.7750138197899392, - "grad_norm": 5.049131870269775, - "learning_rate": 1.1569073337123367e-05, - "loss": 5.2212, - "step": 14020 - }, - { - "epoch": 0.775290215588723, - "grad_norm": 3.362013339996338, - "learning_rate": 1.155486071631609e-05, - "loss": 4.8976, - "step": 14025 - }, - { - "epoch": 0.7755666113875069, - "grad_norm": 5.27224063873291, - "learning_rate": 1.1540648095508812e-05, - "loss": 4.898, - "step": 14030 - }, - { - "epoch": 0.7758430071862907, - "grad_norm": 3.802133560180664, - "learning_rate": 1.1526435474701536e-05, - "loss": 4.8342, - "step": 14035 - }, - { - "epoch": 0.7761194029850746, - "grad_norm": 3.688088893890381, - "learning_rate": 1.1512222853894258e-05, - "loss": 4.8441, - "step": 14040 - }, - { - "epoch": 0.7763957987838584, - "grad_norm": 3.9619669914245605, - "learning_rate": 1.1498010233086982e-05, - "loss": 5.2455, - "step": 14045 - }, - { - "epoch": 0.7766721945826424, - "grad_norm": 3.759188413619995, - "learning_rate": 1.1483797612279706e-05, - "loss": 5.1307, - "step": 14050 - }, - { - "epoch": 0.7769485903814262, - "grad_norm": 3.8243587017059326, - "learning_rate": 1.1469584991472428e-05, - "loss": 4.9645, - "step": 14055 - }, - { - "epoch": 0.7772249861802101, - "grad_norm": 3.4312751293182373, - "learning_rate": 1.1455372370665152e-05, - "loss": 4.9075, - "step": 14060 - }, - { - "epoch": 0.7775013819789939, - "grad_norm": 3.4000275135040283, - "learning_rate": 1.1441159749857874e-05, - "loss": 4.8989, - "step": 14065 - }, - { - "epoch": 0.7777777777777778, - "grad_norm": 3.78006911277771, - "learning_rate": 1.1426947129050597e-05, - "loss": 5.2333, - "step": 14070 - }, - { - "epoch": 0.7780541735765616, - "grad_norm": 3.4480984210968018, - "learning_rate": 1.141273450824332e-05, - "loss": 4.9613, - "step": 14075 - }, - { - "epoch": 0.7783305693753455, - "grad_norm": 3.377744674682617, - "learning_rate": 1.1398521887436043e-05, - "loss": 5.0646, - "step": 14080 - }, - { - "epoch": 0.7786069651741293, - "grad_norm": 3.1980888843536377, - "learning_rate": 1.1384309266628767e-05, - "loss": 4.9803, - "step": 14085 - }, - { - "epoch": 0.7788833609729132, - "grad_norm": 3.6203091144561768, - "learning_rate": 1.1370096645821491e-05, - "loss": 4.8729, - "step": 14090 - }, - { - "epoch": 0.779159756771697, - "grad_norm": 3.161928176879883, - "learning_rate": 1.1355884025014213e-05, - "loss": 5.1262, - "step": 14095 - }, - { - "epoch": 0.7794361525704809, - "grad_norm": 3.8327505588531494, - "learning_rate": 1.1341671404206937e-05, - "loss": 5.0207, - "step": 14100 - }, - { - "epoch": 0.7797125483692647, - "grad_norm": 3.690469980239868, - "learning_rate": 1.132745878339966e-05, - "loss": 5.0829, - "step": 14105 - }, - { - "epoch": 0.7799889441680486, - "grad_norm": 3.06017804145813, - "learning_rate": 1.1313246162592382e-05, - "loss": 5.2518, - "step": 14110 - }, - { - "epoch": 0.7802653399668324, - "grad_norm": 4.095527172088623, - "learning_rate": 1.1299033541785106e-05, - "loss": 5.1885, - "step": 14115 - }, - { - "epoch": 0.7805417357656164, - "grad_norm": 3.2654521465301514, - "learning_rate": 1.128482092097783e-05, - "loss": 4.9736, - "step": 14120 - }, - { - "epoch": 0.7808181315644003, - "grad_norm": 3.9866836071014404, - "learning_rate": 1.1270608300170552e-05, - "loss": 5.0144, - "step": 14125 - }, - { - "epoch": 0.7810945273631841, - "grad_norm": 4.128415584564209, - "learning_rate": 1.1256395679363276e-05, - "loss": 5.0449, - "step": 14130 - }, - { - "epoch": 0.781370923161968, - "grad_norm": 3.3916735649108887, - "learning_rate": 1.1242183058555998e-05, - "loss": 5.0108, - "step": 14135 - }, - { - "epoch": 0.7816473189607518, - "grad_norm": 3.543978452682495, - "learning_rate": 1.122797043774872e-05, - "loss": 4.8539, - "step": 14140 - }, - { - "epoch": 0.7819237147595357, - "grad_norm": 3.8419699668884277, - "learning_rate": 1.1213757816941444e-05, - "loss": 4.722, - "step": 14145 - }, - { - "epoch": 0.7822001105583195, - "grad_norm": 3.7718849182128906, - "learning_rate": 1.1199545196134167e-05, - "loss": 4.9846, - "step": 14150 - }, - { - "epoch": 0.7824765063571034, - "grad_norm": 3.22829532623291, - "learning_rate": 1.118533257532689e-05, - "loss": 4.6035, - "step": 14155 - }, - { - "epoch": 0.7827529021558872, - "grad_norm": 3.4191575050354004, - "learning_rate": 1.1171119954519615e-05, - "loss": 5.0507, - "step": 14160 - }, - { - "epoch": 0.7830292979546711, - "grad_norm": 4.302538871765137, - "learning_rate": 1.1156907333712337e-05, - "loss": 4.8843, - "step": 14165 - }, - { - "epoch": 0.7833056937534549, - "grad_norm": 4.03009557723999, - "learning_rate": 1.1142694712905061e-05, - "loss": 4.8532, - "step": 14170 - }, - { - "epoch": 0.7835820895522388, - "grad_norm": 3.964759349822998, - "learning_rate": 1.1128482092097783e-05, - "loss": 5.3011, - "step": 14175 - }, - { - "epoch": 0.7838584853510226, - "grad_norm": 3.5552785396575928, - "learning_rate": 1.1114269471290506e-05, - "loss": 5.0725, - "step": 14180 - }, - { - "epoch": 0.7841348811498066, - "grad_norm": 3.765488862991333, - "learning_rate": 1.110005685048323e-05, - "loss": 4.8646, - "step": 14185 - }, - { - "epoch": 0.7844112769485904, - "grad_norm": 3.883822441101074, - "learning_rate": 1.1085844229675952e-05, - "loss": 4.9223, - "step": 14190 - }, - { - "epoch": 0.7846876727473743, - "grad_norm": 3.70638370513916, - "learning_rate": 1.1071631608868676e-05, - "loss": 5.1099, - "step": 14195 - }, - { - "epoch": 0.7849640685461581, - "grad_norm": 4.200986385345459, - "learning_rate": 1.10574189880614e-05, - "loss": 4.722, - "step": 14200 - }, - { - "epoch": 0.785240464344942, - "grad_norm": 3.2610623836517334, - "learning_rate": 1.1043206367254122e-05, - "loss": 4.8481, - "step": 14205 - }, - { - "epoch": 0.7855168601437258, - "grad_norm": 3.607994556427002, - "learning_rate": 1.1028993746446846e-05, - "loss": 4.6585, - "step": 14210 - }, - { - "epoch": 0.7857932559425097, - "grad_norm": 3.5993244647979736, - "learning_rate": 1.1014781125639568e-05, - "loss": 4.8042, - "step": 14215 - }, - { - "epoch": 0.7860696517412935, - "grad_norm": 3.5972378253936768, - "learning_rate": 1.100056850483229e-05, - "loss": 4.8298, - "step": 14220 - }, - { - "epoch": 0.7863460475400774, - "grad_norm": 3.5555312633514404, - "learning_rate": 1.0986355884025015e-05, - "loss": 5.0829, - "step": 14225 - }, - { - "epoch": 0.7866224433388612, - "grad_norm": 3.3311171531677246, - "learning_rate": 1.0972143263217739e-05, - "loss": 5.0186, - "step": 14230 - }, - { - "epoch": 0.7868988391376451, - "grad_norm": 3.552480936050415, - "learning_rate": 1.0957930642410461e-05, - "loss": 4.6854, - "step": 14235 - }, - { - "epoch": 0.7871752349364289, - "grad_norm": 3.2884206771850586, - "learning_rate": 1.0943718021603185e-05, - "loss": 5.0206, - "step": 14240 - }, - { - "epoch": 0.7874516307352128, - "grad_norm": 3.535804033279419, - "learning_rate": 1.0929505400795907e-05, - "loss": 4.8758, - "step": 14245 - }, - { - "epoch": 0.7877280265339967, - "grad_norm": 4.292071342468262, - "learning_rate": 1.091529277998863e-05, - "loss": 5.2122, - "step": 14250 - }, - { - "epoch": 0.7880044223327806, - "grad_norm": 3.89021372795105, - "learning_rate": 1.0901080159181353e-05, - "loss": 4.775, - "step": 14255 - }, - { - "epoch": 0.7882808181315644, - "grad_norm": 3.114368438720703, - "learning_rate": 1.0886867538374076e-05, - "loss": 5.1345, - "step": 14260 - }, - { - "epoch": 0.7885572139303483, - "grad_norm": 3.6420657634735107, - "learning_rate": 1.08726549175668e-05, - "loss": 4.7937, - "step": 14265 - }, - { - "epoch": 0.7888336097291321, - "grad_norm": 3.4523563385009766, - "learning_rate": 1.0858442296759524e-05, - "loss": 4.8285, - "step": 14270 - }, - { - "epoch": 0.789110005527916, - "grad_norm": 3.896808624267578, - "learning_rate": 1.0844229675952246e-05, - "loss": 5.1532, - "step": 14275 - }, - { - "epoch": 0.7893864013266998, - "grad_norm": 3.9850831031799316, - "learning_rate": 1.083001705514497e-05, - "loss": 5.117, - "step": 14280 - }, - { - "epoch": 0.7896627971254837, - "grad_norm": 3.7816109657287598, - "learning_rate": 1.0815804434337692e-05, - "loss": 4.8639, - "step": 14285 - }, - { - "epoch": 0.7899391929242675, - "grad_norm": 3.2434422969818115, - "learning_rate": 1.0801591813530414e-05, - "loss": 4.716, - "step": 14290 - }, - { - "epoch": 0.7902155887230514, - "grad_norm": 4.700549125671387, - "learning_rate": 1.0787379192723138e-05, - "loss": 5.1435, - "step": 14295 - }, - { - "epoch": 0.7904919845218352, - "grad_norm": 4.932585716247559, - "learning_rate": 1.0773166571915862e-05, - "loss": 5.0201, - "step": 14300 - }, - { - "epoch": 0.7907683803206191, - "grad_norm": 4.089287281036377, - "learning_rate": 1.0758953951108585e-05, - "loss": 4.8033, - "step": 14305 - }, - { - "epoch": 0.7910447761194029, - "grad_norm": 4.705197811126709, - "learning_rate": 1.0744741330301309e-05, - "loss": 4.6571, - "step": 14310 - }, - { - "epoch": 0.7913211719181868, - "grad_norm": 4.048484802246094, - "learning_rate": 1.0730528709494031e-05, - "loss": 4.8813, - "step": 14315 - }, - { - "epoch": 0.7915975677169707, - "grad_norm": 4.197638988494873, - "learning_rate": 1.0716316088686755e-05, - "loss": 4.991, - "step": 14320 - }, - { - "epoch": 0.7918739635157546, - "grad_norm": 3.081589698791504, - "learning_rate": 1.0702103467879477e-05, - "loss": 4.7785, - "step": 14325 - }, - { - "epoch": 0.7921503593145384, - "grad_norm": 3.1407415866851807, - "learning_rate": 1.06878908470722e-05, - "loss": 4.7891, - "step": 14330 - }, - { - "epoch": 0.7924267551133223, - "grad_norm": 3.6896438598632812, - "learning_rate": 1.0673678226264924e-05, - "loss": 4.7142, - "step": 14335 - }, - { - "epoch": 0.7927031509121062, - "grad_norm": 3.5066559314727783, - "learning_rate": 1.0659465605457647e-05, - "loss": 5.1038, - "step": 14340 - }, - { - "epoch": 0.79297954671089, - "grad_norm": 4.803554058074951, - "learning_rate": 1.064525298465037e-05, - "loss": 5.1086, - "step": 14345 - }, - { - "epoch": 0.7932559425096739, - "grad_norm": 3.1235287189483643, - "learning_rate": 1.0631040363843094e-05, - "loss": 4.952, - "step": 14350 - }, - { - "epoch": 0.7935323383084577, - "grad_norm": 4.96946382522583, - "learning_rate": 1.0616827743035816e-05, - "loss": 5.2588, - "step": 14355 - }, - { - "epoch": 0.7938087341072416, - "grad_norm": 3.8352441787719727, - "learning_rate": 1.0602615122228538e-05, - "loss": 4.871, - "step": 14360 - }, - { - "epoch": 0.7940851299060254, - "grad_norm": 3.559532642364502, - "learning_rate": 1.0588402501421262e-05, - "loss": 4.5453, - "step": 14365 - }, - { - "epoch": 0.7943615257048093, - "grad_norm": 4.492837905883789, - "learning_rate": 1.0574189880613986e-05, - "loss": 5.0764, - "step": 14370 - }, - { - "epoch": 0.7946379215035931, - "grad_norm": 3.7767648696899414, - "learning_rate": 1.055997725980671e-05, - "loss": 5.1033, - "step": 14375 - }, - { - "epoch": 0.794914317302377, - "grad_norm": 3.4074301719665527, - "learning_rate": 1.0545764638999433e-05, - "loss": 4.6381, - "step": 14380 - }, - { - "epoch": 0.7951907131011609, - "grad_norm": 4.1058783531188965, - "learning_rate": 1.0531552018192155e-05, - "loss": 4.7906, - "step": 14385 - }, - { - "epoch": 0.7954671088999448, - "grad_norm": 3.570598840713501, - "learning_rate": 1.0517339397384879e-05, - "loss": 4.7018, - "step": 14390 - }, - { - "epoch": 0.7957435046987286, - "grad_norm": 4.346248626708984, - "learning_rate": 1.0503126776577601e-05, - "loss": 5.2179, - "step": 14395 - }, - { - "epoch": 0.7960199004975125, - "grad_norm": 4.531290054321289, - "learning_rate": 1.0488914155770323e-05, - "loss": 5.124, - "step": 14400 - }, - { - "epoch": 0.7962962962962963, - "grad_norm": 4.11277961730957, - "learning_rate": 1.0474701534963047e-05, - "loss": 4.9454, - "step": 14405 - }, - { - "epoch": 0.7965726920950802, - "grad_norm": 4.518270015716553, - "learning_rate": 1.0460488914155771e-05, - "loss": 5.0708, - "step": 14410 - }, - { - "epoch": 0.796849087893864, - "grad_norm": 3.6835479736328125, - "learning_rate": 1.0446276293348494e-05, - "loss": 4.9844, - "step": 14415 - }, - { - "epoch": 0.7971254836926479, - "grad_norm": 4.1077880859375, - "learning_rate": 1.0432063672541218e-05, - "loss": 4.8259, - "step": 14420 - }, - { - "epoch": 0.7974018794914317, - "grad_norm": 3.0021631717681885, - "learning_rate": 1.041785105173394e-05, - "loss": 5.0534, - "step": 14425 - }, - { - "epoch": 0.7976782752902156, - "grad_norm": 3.630094289779663, - "learning_rate": 1.0403638430926662e-05, - "loss": 4.9099, - "step": 14430 - }, - { - "epoch": 0.7979546710889994, - "grad_norm": 3.7591092586517334, - "learning_rate": 1.0389425810119386e-05, - "loss": 5.1555, - "step": 14435 - }, - { - "epoch": 0.7982310668877833, - "grad_norm": 4.0143656730651855, - "learning_rate": 1.037521318931211e-05, - "loss": 4.811, - "step": 14440 - }, - { - "epoch": 0.7985074626865671, - "grad_norm": 4.8033976554870605, - "learning_rate": 1.0361000568504834e-05, - "loss": 5.0588, - "step": 14445 - }, - { - "epoch": 0.798783858485351, - "grad_norm": 4.348042964935303, - "learning_rate": 1.0346787947697556e-05, - "loss": 4.6481, - "step": 14450 - }, - { - "epoch": 0.7990602542841349, - "grad_norm": 3.549741506576538, - "learning_rate": 1.0332575326890279e-05, - "loss": 4.9424, - "step": 14455 - }, - { - "epoch": 0.7993366500829188, - "grad_norm": 3.3581111431121826, - "learning_rate": 1.0318362706083003e-05, - "loss": 5.1743, - "step": 14460 - }, - { - "epoch": 0.7996130458817026, - "grad_norm": 4.2459540367126465, - "learning_rate": 1.0304150085275725e-05, - "loss": 5.1007, - "step": 14465 - }, - { - "epoch": 0.7998894416804865, - "grad_norm": 5.176113128662109, - "learning_rate": 1.0289937464468447e-05, - "loss": 5.1488, - "step": 14470 - }, - { - "epoch": 0.8001658374792703, - "grad_norm": 3.7594547271728516, - "learning_rate": 1.0275724843661171e-05, - "loss": 5.0909, - "step": 14475 - }, - { - "epoch": 0.8004422332780542, - "grad_norm": 4.071329593658447, - "learning_rate": 1.0261512222853895e-05, - "loss": 5.1695, - "step": 14480 - }, - { - "epoch": 0.800718629076838, - "grad_norm": 2.798461437225342, - "learning_rate": 1.0247299602046617e-05, - "loss": 4.7519, - "step": 14485 - }, - { - "epoch": 0.8009950248756219, - "grad_norm": 3.2360336780548096, - "learning_rate": 1.0233086981239341e-05, - "loss": 5.04, - "step": 14490 - }, - { - "epoch": 0.8012714206744057, - "grad_norm": 3.919640064239502, - "learning_rate": 1.0218874360432064e-05, - "loss": 4.8624, - "step": 14495 - }, - { - "epoch": 0.8015478164731896, - "grad_norm": 3.5546929836273193, - "learning_rate": 1.0204661739624788e-05, - "loss": 4.8529, - "step": 14500 - }, - { - "epoch": 0.8018242122719734, - "grad_norm": 4.039145469665527, - "learning_rate": 1.019044911881751e-05, - "loss": 5.2288, - "step": 14505 - }, - { - "epoch": 0.8021006080707573, - "grad_norm": 2.8271827697753906, - "learning_rate": 1.0176236498010234e-05, - "loss": 4.7984, - "step": 14510 - }, - { - "epoch": 0.8023770038695411, - "grad_norm": 3.8265321254730225, - "learning_rate": 1.0162023877202958e-05, - "loss": 4.8457, - "step": 14515 - }, - { - "epoch": 0.802653399668325, - "grad_norm": 3.9370837211608887, - "learning_rate": 1.014781125639568e-05, - "loss": 5.279, - "step": 14520 - }, - { - "epoch": 0.8029297954671089, - "grad_norm": 3.1449708938598633, - "learning_rate": 1.0133598635588403e-05, - "loss": 4.7764, - "step": 14525 - }, - { - "epoch": 0.8032061912658928, - "grad_norm": 3.421999454498291, - "learning_rate": 1.0119386014781127e-05, - "loss": 4.887, - "step": 14530 - }, - { - "epoch": 0.8034825870646766, - "grad_norm": 3.242093324661255, - "learning_rate": 1.0105173393973849e-05, - "loss": 4.9132, - "step": 14535 - }, - { - "epoch": 0.8037589828634605, - "grad_norm": 3.8317198753356934, - "learning_rate": 1.0090960773166571e-05, - "loss": 4.9708, - "step": 14540 - }, - { - "epoch": 0.8040353786622443, - "grad_norm": 2.940936326980591, - "learning_rate": 1.0076748152359295e-05, - "loss": 5.1001, - "step": 14545 - }, - { - "epoch": 0.8043117744610282, - "grad_norm": 3.3959786891937256, - "learning_rate": 1.0062535531552019e-05, - "loss": 4.722, - "step": 14550 - }, - { - "epoch": 0.804588170259812, - "grad_norm": 3.077038049697876, - "learning_rate": 1.0048322910744743e-05, - "loss": 4.8792, - "step": 14555 - }, - { - "epoch": 0.8048645660585959, - "grad_norm": 3.9798264503479004, - "learning_rate": 1.0034110289937465e-05, - "loss": 4.864, - "step": 14560 - }, - { - "epoch": 0.8051409618573798, - "grad_norm": 3.65219783782959, - "learning_rate": 1.0019897669130188e-05, - "loss": 5.1996, - "step": 14565 - }, - { - "epoch": 0.8054173576561636, - "grad_norm": 3.0886595249176025, - "learning_rate": 1.0005685048322912e-05, - "loss": 4.9891, - "step": 14570 - }, - { - "epoch": 0.8056937534549475, - "grad_norm": 3.2638444900512695, - "learning_rate": 9.991472427515634e-06, - "loss": 4.4643, - "step": 14575 - }, - { - "epoch": 0.8059701492537313, - "grad_norm": 3.3993797302246094, - "learning_rate": 9.977259806708356e-06, - "loss": 4.5518, - "step": 14580 - }, - { - "epoch": 0.8062465450525153, - "grad_norm": 3.588103771209717, - "learning_rate": 9.963047185901082e-06, - "loss": 4.8706, - "step": 14585 - }, - { - "epoch": 0.806522940851299, - "grad_norm": 3.355370044708252, - "learning_rate": 9.948834565093804e-06, - "loss": 5.0936, - "step": 14590 - }, - { - "epoch": 0.806799336650083, - "grad_norm": 3.478992462158203, - "learning_rate": 9.934621944286526e-06, - "loss": 4.8008, - "step": 14595 - }, - { - "epoch": 0.8070757324488668, - "grad_norm": 3.620059013366699, - "learning_rate": 9.92040932347925e-06, - "loss": 4.8624, - "step": 14600 - }, - { - "epoch": 0.8073521282476507, - "grad_norm": 3.095531463623047, - "learning_rate": 9.906196702671973e-06, - "loss": 4.7169, - "step": 14605 - }, - { - "epoch": 0.8076285240464345, - "grad_norm": 3.2527894973754883, - "learning_rate": 9.891984081864697e-06, - "loss": 4.9315, - "step": 14610 - }, - { - "epoch": 0.8079049198452184, - "grad_norm": 3.8432343006134033, - "learning_rate": 9.877771461057419e-06, - "loss": 5.0862, - "step": 14615 - }, - { - "epoch": 0.8081813156440022, - "grad_norm": 4.290609836578369, - "learning_rate": 9.863558840250143e-06, - "loss": 5.1276, - "step": 14620 - }, - { - "epoch": 0.8084577114427861, - "grad_norm": 3.555579662322998, - "learning_rate": 9.849346219442867e-06, - "loss": 5.0188, - "step": 14625 - }, - { - "epoch": 0.8087341072415699, - "grad_norm": 3.1154568195343018, - "learning_rate": 9.835133598635589e-06, - "loss": 5.083, - "step": 14630 - }, - { - "epoch": 0.8090105030403538, - "grad_norm": 3.5717074871063232, - "learning_rate": 9.820920977828311e-06, - "loss": 5.0745, - "step": 14635 - }, - { - "epoch": 0.8092868988391376, - "grad_norm": 3.1403820514678955, - "learning_rate": 9.806708357021035e-06, - "loss": 4.8718, - "step": 14640 - }, - { - "epoch": 0.8095632946379215, - "grad_norm": 3.6060197353363037, - "learning_rate": 9.792495736213758e-06, - "loss": 5.1306, - "step": 14645 - }, - { - "epoch": 0.8098396904367053, - "grad_norm": 3.7077431678771973, - "learning_rate": 9.77828311540648e-06, - "loss": 4.6169, - "step": 14650 - }, - { - "epoch": 0.8101160862354893, - "grad_norm": 4.237201690673828, - "learning_rate": 9.764070494599204e-06, - "loss": 4.6571, - "step": 14655 - }, - { - "epoch": 0.810392482034273, - "grad_norm": 3.263676166534424, - "learning_rate": 9.749857873791928e-06, - "loss": 5.0076, - "step": 14660 - }, - { - "epoch": 0.810668877833057, - "grad_norm": 3.406616687774658, - "learning_rate": 9.735645252984652e-06, - "loss": 4.6793, - "step": 14665 - }, - { - "epoch": 0.8109452736318408, - "grad_norm": 3.664701461791992, - "learning_rate": 9.721432632177374e-06, - "loss": 4.7769, - "step": 14670 - }, - { - "epoch": 0.8112216694306247, - "grad_norm": 3.6615958213806152, - "learning_rate": 9.707220011370096e-06, - "loss": 5.1376, - "step": 14675 - }, - { - "epoch": 0.8114980652294085, - "grad_norm": 3.585599660873413, - "learning_rate": 9.69300739056282e-06, - "loss": 4.8062, - "step": 14680 - }, - { - "epoch": 0.8117744610281924, - "grad_norm": 3.3951401710510254, - "learning_rate": 9.678794769755543e-06, - "loss": 5.0233, - "step": 14685 - }, - { - "epoch": 0.8120508568269762, - "grad_norm": 3.261725664138794, - "learning_rate": 9.664582148948267e-06, - "loss": 4.957, - "step": 14690 - }, - { - "epoch": 0.8123272526257601, - "grad_norm": 3.23929762840271, - "learning_rate": 9.65036952814099e-06, - "loss": 4.885, - "step": 14695 - }, - { - "epoch": 0.8126036484245439, - "grad_norm": 3.207082748413086, - "learning_rate": 9.636156907333713e-06, - "loss": 4.5523, - "step": 14700 - }, - { - "epoch": 0.8128800442233278, - "grad_norm": 3.7475905418395996, - "learning_rate": 9.621944286526435e-06, - "loss": 5.0544, - "step": 14705 - }, - { - "epoch": 0.8131564400221116, - "grad_norm": 3.0875489711761475, - "learning_rate": 9.60773166571916e-06, - "loss": 5.127, - "step": 14710 - }, - { - "epoch": 0.8134328358208955, - "grad_norm": 3.5430004596710205, - "learning_rate": 9.593519044911882e-06, - "loss": 4.9775, - "step": 14715 - }, - { - "epoch": 0.8137092316196793, - "grad_norm": 3.482557535171509, - "learning_rate": 9.579306424104606e-06, - "loss": 4.8944, - "step": 14720 - }, - { - "epoch": 0.8139856274184633, - "grad_norm": 3.779650926589966, - "learning_rate": 9.565093803297328e-06, - "loss": 5.2317, - "step": 14725 - }, - { - "epoch": 0.814262023217247, - "grad_norm": 4.0570244789123535, - "learning_rate": 9.550881182490052e-06, - "loss": 4.9351, - "step": 14730 - }, - { - "epoch": 0.814538419016031, - "grad_norm": 3.294402599334717, - "learning_rate": 9.536668561682776e-06, - "loss": 4.63, - "step": 14735 - }, - { - "epoch": 0.8148148148148148, - "grad_norm": 3.926581859588623, - "learning_rate": 9.522455940875498e-06, - "loss": 4.9833, - "step": 14740 - }, - { - "epoch": 0.8150912106135987, - "grad_norm": 3.623554229736328, - "learning_rate": 9.50824332006822e-06, - "loss": 4.9796, - "step": 14745 - }, - { - "epoch": 0.8153676064123825, - "grad_norm": 4.15550422668457, - "learning_rate": 9.494030699260944e-06, - "loss": 4.9854, - "step": 14750 - }, - { - "epoch": 0.8156440022111664, - "grad_norm": 4.647388935089111, - "learning_rate": 9.479818078453667e-06, - "loss": 5.336, - "step": 14755 - }, - { - "epoch": 0.8159203980099502, - "grad_norm": 4.139171600341797, - "learning_rate": 9.46560545764639e-06, - "loss": 4.9827, - "step": 14760 - }, - { - "epoch": 0.8161967938087341, - "grad_norm": 3.5119783878326416, - "learning_rate": 9.451392836839115e-06, - "loss": 4.7924, - "step": 14765 - }, - { - "epoch": 0.8164731896075179, - "grad_norm": 3.4265787601470947, - "learning_rate": 9.437180216031837e-06, - "loss": 5.0749, - "step": 14770 - }, - { - "epoch": 0.8167495854063018, - "grad_norm": 3.3151018619537354, - "learning_rate": 9.42296759522456e-06, - "loss": 4.7777, - "step": 14775 - }, - { - "epoch": 0.8170259812050856, - "grad_norm": 3.0593369007110596, - "learning_rate": 9.408754974417283e-06, - "loss": 4.9151, - "step": 14780 - }, - { - "epoch": 0.8173023770038695, - "grad_norm": 3.627476930618286, - "learning_rate": 9.394542353610005e-06, - "loss": 5.2975, - "step": 14785 - }, - { - "epoch": 0.8175787728026535, - "grad_norm": 3.161733388900757, - "learning_rate": 9.38032973280273e-06, - "loss": 5.0477, - "step": 14790 - }, - { - "epoch": 0.8178551686014373, - "grad_norm": 3.5496115684509277, - "learning_rate": 9.366117111995452e-06, - "loss": 5.0122, - "step": 14795 - }, - { - "epoch": 0.8181315644002212, - "grad_norm": 3.702653169631958, - "learning_rate": 9.351904491188176e-06, - "loss": 4.7585, - "step": 14800 - }, - { - "epoch": 0.818407960199005, - "grad_norm": 3.9237051010131836, - "learning_rate": 9.3376918703809e-06, - "loss": 4.6184, - "step": 14805 - }, - { - "epoch": 0.8186843559977889, - "grad_norm": 3.077275276184082, - "learning_rate": 9.323479249573622e-06, - "loss": 4.6302, - "step": 14810 - }, - { - "epoch": 0.8189607517965727, - "grad_norm": 3.5210278034210205, - "learning_rate": 9.309266628766344e-06, - "loss": 5.0052, - "step": 14815 - }, - { - "epoch": 0.8192371475953566, - "grad_norm": 3.1332850456237793, - "learning_rate": 9.295054007959068e-06, - "loss": 4.9236, - "step": 14820 - }, - { - "epoch": 0.8195135433941404, - "grad_norm": 3.8537850379943848, - "learning_rate": 9.28084138715179e-06, - "loss": 5.0165, - "step": 14825 - }, - { - "epoch": 0.8197899391929243, - "grad_norm": 3.768019199371338, - "learning_rate": 9.266628766344514e-06, - "loss": 4.7222, - "step": 14830 - }, - { - "epoch": 0.8200663349917081, - "grad_norm": 4.06325101852417, - "learning_rate": 9.252416145537238e-06, - "loss": 4.942, - "step": 14835 - }, - { - "epoch": 0.820342730790492, - "grad_norm": 3.555703639984131, - "learning_rate": 9.23820352472996e-06, - "loss": 5.0981, - "step": 14840 - }, - { - "epoch": 0.8206191265892758, - "grad_norm": 3.216472625732422, - "learning_rate": 9.223990903922685e-06, - "loss": 5.1655, - "step": 14845 - }, - { - "epoch": 0.8208955223880597, - "grad_norm": 2.9533238410949707, - "learning_rate": 9.209778283115407e-06, - "loss": 4.868, - "step": 14850 - }, - { - "epoch": 0.8211719181868435, - "grad_norm": 3.6127288341522217, - "learning_rate": 9.19556566230813e-06, - "loss": 5.1768, - "step": 14855 - }, - { - "epoch": 0.8214483139856275, - "grad_norm": 3.685889720916748, - "learning_rate": 9.181353041500853e-06, - "loss": 4.8842, - "step": 14860 - }, - { - "epoch": 0.8217247097844113, - "grad_norm": 3.554274559020996, - "learning_rate": 9.167140420693576e-06, - "loss": 4.7565, - "step": 14865 - }, - { - "epoch": 0.8220011055831952, - "grad_norm": 4.190557956695557, - "learning_rate": 9.1529277998863e-06, - "loss": 4.8703, - "step": 14870 - }, - { - "epoch": 0.822277501381979, - "grad_norm": 3.2858259677886963, - "learning_rate": 9.138715179079023e-06, - "loss": 4.8811, - "step": 14875 - }, - { - "epoch": 0.8225538971807629, - "grad_norm": 4.575525760650635, - "learning_rate": 9.124502558271746e-06, - "loss": 5.1692, - "step": 14880 - }, - { - "epoch": 0.8228302929795467, - "grad_norm": 3.1636838912963867, - "learning_rate": 9.11028993746447e-06, - "loss": 4.8458, - "step": 14885 - }, - { - "epoch": 0.8231066887783306, - "grad_norm": 3.5616745948791504, - "learning_rate": 9.096077316657192e-06, - "loss": 4.9314, - "step": 14890 - }, - { - "epoch": 0.8233830845771144, - "grad_norm": 4.337357044219971, - "learning_rate": 9.081864695849914e-06, - "loss": 5.0281, - "step": 14895 - }, - { - "epoch": 0.8236594803758983, - "grad_norm": 3.906620502471924, - "learning_rate": 9.067652075042638e-06, - "loss": 4.895, - "step": 14900 - }, - { - "epoch": 0.8239358761746821, - "grad_norm": 3.766569137573242, - "learning_rate": 9.053439454235362e-06, - "loss": 4.7679, - "step": 14905 - }, - { - "epoch": 0.824212271973466, - "grad_norm": 3.7413177490234375, - "learning_rate": 9.039226833428085e-06, - "loss": 4.8692, - "step": 14910 - }, - { - "epoch": 0.8244886677722498, - "grad_norm": 3.4053595066070557, - "learning_rate": 9.025014212620809e-06, - "loss": 4.625, - "step": 14915 - }, - { - "epoch": 0.8247650635710337, - "grad_norm": 4.579228401184082, - "learning_rate": 9.01080159181353e-06, - "loss": 4.6284, - "step": 14920 - }, - { - "epoch": 0.8250414593698175, - "grad_norm": 4.081772327423096, - "learning_rate": 8.996588971006253e-06, - "loss": 4.9273, - "step": 14925 - }, - { - "epoch": 0.8253178551686015, - "grad_norm": 3.704084634780884, - "learning_rate": 8.982376350198977e-06, - "loss": 4.8167, - "step": 14930 - }, - { - "epoch": 0.8255942509673853, - "grad_norm": 4.213784217834473, - "learning_rate": 8.9681637293917e-06, - "loss": 4.8445, - "step": 14935 - }, - { - "epoch": 0.8258706467661692, - "grad_norm": 3.3844199180603027, - "learning_rate": 8.953951108584423e-06, - "loss": 4.9753, - "step": 14940 - }, - { - "epoch": 0.826147042564953, - "grad_norm": 4.304365158081055, - "learning_rate": 8.939738487777147e-06, - "loss": 4.9407, - "step": 14945 - }, - { - "epoch": 0.8264234383637369, - "grad_norm": 5.198899269104004, - "learning_rate": 8.92552586696987e-06, - "loss": 5.0407, - "step": 14950 - }, - { - "epoch": 0.8266998341625207, - "grad_norm": 3.2233057022094727, - "learning_rate": 8.911313246162594e-06, - "loss": 4.6535, - "step": 14955 - }, - { - "epoch": 0.8269762299613046, - "grad_norm": 3.325000286102295, - "learning_rate": 8.897100625355316e-06, - "loss": 4.9527, - "step": 14960 - }, - { - "epoch": 0.8272526257600884, - "grad_norm": 4.237645626068115, - "learning_rate": 8.882888004548038e-06, - "loss": 4.831, - "step": 14965 - }, - { - "epoch": 0.8275290215588723, - "grad_norm": 3.883349895477295, - "learning_rate": 8.868675383740762e-06, - "loss": 5.2111, - "step": 14970 - }, - { - "epoch": 0.8278054173576561, - "grad_norm": 4.307447910308838, - "learning_rate": 8.854462762933486e-06, - "loss": 4.8234, - "step": 14975 - }, - { - "epoch": 0.82808181315644, - "grad_norm": 3.646482229232788, - "learning_rate": 8.840250142126208e-06, - "loss": 4.6802, - "step": 14980 - }, - { - "epoch": 0.8283582089552238, - "grad_norm": 4.150384902954102, - "learning_rate": 8.826037521318932e-06, - "loss": 4.939, - "step": 14985 - }, - { - "epoch": 0.8286346047540077, - "grad_norm": 3.2273035049438477, - "learning_rate": 8.811824900511655e-06, - "loss": 4.758, - "step": 14990 - }, - { - "epoch": 0.8289110005527915, - "grad_norm": 3.5394163131713867, - "learning_rate": 8.797612279704377e-06, - "loss": 5.039, - "step": 14995 - }, - { - "epoch": 0.8291873963515755, - "grad_norm": 3.0084025859832764, - "learning_rate": 8.783399658897101e-06, - "loss": 5.2781, - "step": 15000 - }, - { - "epoch": 0.8294637921503594, - "grad_norm": 3.8212339878082275, - "learning_rate": 8.769187038089823e-06, - "loss": 4.8291, - "step": 15005 - }, - { - "epoch": 0.8297401879491432, - "grad_norm": 3.359372854232788, - "learning_rate": 8.754974417282547e-06, - "loss": 4.9139, - "step": 15010 - }, - { - "epoch": 0.8300165837479271, - "grad_norm": 3.9898805618286133, - "learning_rate": 8.740761796475271e-06, - "loss": 4.738, - "step": 15015 - }, - { - "epoch": 0.8302929795467109, - "grad_norm": 3.4556021690368652, - "learning_rate": 8.726549175667993e-06, - "loss": 4.6281, - "step": 15020 - }, - { - "epoch": 0.8305693753454948, - "grad_norm": 4.344873905181885, - "learning_rate": 8.712336554860717e-06, - "loss": 5.041, - "step": 15025 - }, - { - "epoch": 0.8308457711442786, - "grad_norm": 4.770878314971924, - "learning_rate": 8.69812393405344e-06, - "loss": 4.8658, - "step": 15030 - }, - { - "epoch": 0.8311221669430625, - "grad_norm": 3.748448610305786, - "learning_rate": 8.683911313246162e-06, - "loss": 4.969, - "step": 15035 - }, - { - "epoch": 0.8313985627418463, - "grad_norm": 4.457357883453369, - "learning_rate": 8.669698692438886e-06, - "loss": 5.2523, - "step": 15040 - }, - { - "epoch": 0.8316749585406302, - "grad_norm": 4.152907371520996, - "learning_rate": 8.65548607163161e-06, - "loss": 4.8646, - "step": 15045 - }, - { - "epoch": 0.831951354339414, - "grad_norm": 3.441721200942993, - "learning_rate": 8.641273450824332e-06, - "loss": 4.8011, - "step": 15050 - }, - { - "epoch": 0.8322277501381979, - "grad_norm": 4.53822660446167, - "learning_rate": 8.627060830017056e-06, - "loss": 5.184, - "step": 15055 - }, - { - "epoch": 0.8325041459369817, - "grad_norm": 3.868192195892334, - "learning_rate": 8.612848209209779e-06, - "loss": 4.8637, - "step": 15060 - }, - { - "epoch": 0.8327805417357657, - "grad_norm": 3.9942774772644043, - "learning_rate": 8.598635588402502e-06, - "loss": 4.8782, - "step": 15065 - }, - { - "epoch": 0.8330569375345495, - "grad_norm": 2.853957176208496, - "learning_rate": 8.584422967595225e-06, - "loss": 4.9473, - "step": 15070 - }, - { - "epoch": 0.8333333333333334, - "grad_norm": 3.749110460281372, - "learning_rate": 8.570210346787947e-06, - "loss": 5.012, - "step": 15075 - }, - { - "epoch": 0.8336097291321172, - "grad_norm": 4.649420261383057, - "learning_rate": 8.555997725980671e-06, - "loss": 4.7911, - "step": 15080 - }, - { - "epoch": 0.8338861249309011, - "grad_norm": 3.9291510581970215, - "learning_rate": 8.541785105173395e-06, - "loss": 5.1986, - "step": 15085 - }, - { - "epoch": 0.8341625207296849, - "grad_norm": 3.576134443283081, - "learning_rate": 8.527572484366117e-06, - "loss": 5.0059, - "step": 15090 - }, - { - "epoch": 0.8344389165284688, - "grad_norm": 3.902698516845703, - "learning_rate": 8.513359863558841e-06, - "loss": 4.8758, - "step": 15095 - }, - { - "epoch": 0.8347153123272526, - "grad_norm": 4.259060859680176, - "learning_rate": 8.499147242751564e-06, - "loss": 4.8227, - "step": 15100 - }, - { - "epoch": 0.8349917081260365, - "grad_norm": 3.539156436920166, - "learning_rate": 8.484934621944286e-06, - "loss": 5.0189, - "step": 15105 - }, - { - "epoch": 0.8352681039248203, - "grad_norm": 4.712406635284424, - "learning_rate": 8.47072200113701e-06, - "loss": 5.0471, - "step": 15110 - }, - { - "epoch": 0.8355444997236042, - "grad_norm": 3.5508053302764893, - "learning_rate": 8.456509380329732e-06, - "loss": 4.8707, - "step": 15115 - }, - { - "epoch": 0.835820895522388, - "grad_norm": 3.3851168155670166, - "learning_rate": 8.442296759522458e-06, - "loss": 4.8683, - "step": 15120 - }, - { - "epoch": 0.8360972913211719, - "grad_norm": 3.6790895462036133, - "learning_rate": 8.42808413871518e-06, - "loss": 4.8464, - "step": 15125 - }, - { - "epoch": 0.8363736871199557, - "grad_norm": 3.053698778152466, - "learning_rate": 8.413871517907902e-06, - "loss": 4.9889, - "step": 15130 - }, - { - "epoch": 0.8366500829187397, - "grad_norm": 4.331393718719482, - "learning_rate": 8.399658897100626e-06, - "loss": 5.4066, - "step": 15135 - }, - { - "epoch": 0.8369264787175235, - "grad_norm": 3.729410171508789, - "learning_rate": 8.385446276293349e-06, - "loss": 5.0611, - "step": 15140 - }, - { - "epoch": 0.8372028745163074, - "grad_norm": 3.60453724861145, - "learning_rate": 8.371233655486071e-06, - "loss": 4.8469, - "step": 15145 - }, - { - "epoch": 0.8374792703150912, - "grad_norm": 4.959109306335449, - "learning_rate": 8.357021034678795e-06, - "loss": 4.938, - "step": 15150 - }, - { - "epoch": 0.8377556661138751, - "grad_norm": 4.496500015258789, - "learning_rate": 8.342808413871519e-06, - "loss": 4.7706, - "step": 15155 - }, - { - "epoch": 0.8380320619126589, - "grad_norm": 4.093534469604492, - "learning_rate": 8.328595793064241e-06, - "loss": 5.1374, - "step": 15160 - }, - { - "epoch": 0.8383084577114428, - "grad_norm": 3.628263473510742, - "learning_rate": 8.314383172256965e-06, - "loss": 4.7076, - "step": 15165 - }, - { - "epoch": 0.8385848535102266, - "grad_norm": 3.8275880813598633, - "learning_rate": 8.300170551449687e-06, - "loss": 4.5418, - "step": 15170 - }, - { - "epoch": 0.8388612493090105, - "grad_norm": 4.351752758026123, - "learning_rate": 8.285957930642411e-06, - "loss": 4.903, - "step": 15175 - }, - { - "epoch": 0.8391376451077943, - "grad_norm": 3.621277093887329, - "learning_rate": 8.271745309835134e-06, - "loss": 4.9818, - "step": 15180 - }, - { - "epoch": 0.8394140409065782, - "grad_norm": 3.3908252716064453, - "learning_rate": 8.257532689027856e-06, - "loss": 5.084, - "step": 15185 - }, - { - "epoch": 0.839690436705362, - "grad_norm": 3.7233057022094727, - "learning_rate": 8.24332006822058e-06, - "loss": 5.2591, - "step": 15190 - }, - { - "epoch": 0.8399668325041459, - "grad_norm": 3.7437429428100586, - "learning_rate": 8.229107447413304e-06, - "loss": 4.817, - "step": 15195 - }, - { - "epoch": 0.8402432283029297, - "grad_norm": 3.7888224124908447, - "learning_rate": 8.214894826606026e-06, - "loss": 4.598, - "step": 15200 - }, - { - "epoch": 0.8405196241017137, - "grad_norm": 3.3755719661712646, - "learning_rate": 8.20068220579875e-06, - "loss": 5.1114, - "step": 15205 - }, - { - "epoch": 0.8407960199004975, - "grad_norm": 3.7054946422576904, - "learning_rate": 8.186469584991472e-06, - "loss": 4.6614, - "step": 15210 - }, - { - "epoch": 0.8410724156992814, - "grad_norm": 3.747762441635132, - "learning_rate": 8.172256964184195e-06, - "loss": 4.7052, - "step": 15215 - }, - { - "epoch": 0.8413488114980652, - "grad_norm": 3.4834938049316406, - "learning_rate": 8.158044343376919e-06, - "loss": 5.1221, - "step": 15220 - }, - { - "epoch": 0.8416252072968491, - "grad_norm": 3.943834066390991, - "learning_rate": 8.143831722569643e-06, - "loss": 4.9175, - "step": 15225 - }, - { - "epoch": 0.841901603095633, - "grad_norm": 3.3898112773895264, - "learning_rate": 8.129619101762367e-06, - "loss": 5.0414, - "step": 15230 - }, - { - "epoch": 0.8421779988944168, - "grad_norm": 3.7850241661071777, - "learning_rate": 8.115406480955089e-06, - "loss": 4.9141, - "step": 15235 - }, - { - "epoch": 0.8424543946932007, - "grad_norm": 3.2577099800109863, - "learning_rate": 8.101193860147811e-06, - "loss": 4.7304, - "step": 15240 - }, - { - "epoch": 0.8427307904919845, - "grad_norm": 3.3998703956604004, - "learning_rate": 8.086981239340535e-06, - "loss": 4.9697, - "step": 15245 - }, - { - "epoch": 0.8430071862907684, - "grad_norm": 4.133670330047607, - "learning_rate": 8.072768618533258e-06, - "loss": 4.9938, - "step": 15250 - }, - { - "epoch": 0.8432835820895522, - "grad_norm": 4.74376916885376, - "learning_rate": 8.05855599772598e-06, - "loss": 4.8981, - "step": 15255 - }, - { - "epoch": 0.8435599778883361, - "grad_norm": 3.422651529312134, - "learning_rate": 8.044343376918704e-06, - "loss": 4.9323, - "step": 15260 - }, - { - "epoch": 0.84383637368712, - "grad_norm": 4.132688999176025, - "learning_rate": 8.030130756111428e-06, - "loss": 4.8445, - "step": 15265 - }, - { - "epoch": 0.8441127694859039, - "grad_norm": 4.657894134521484, - "learning_rate": 8.01591813530415e-06, - "loss": 4.7014, - "step": 15270 - }, - { - "epoch": 0.8443891652846877, - "grad_norm": 4.646186828613281, - "learning_rate": 8.001705514496874e-06, - "loss": 5.2851, - "step": 15275 - }, - { - "epoch": 0.8446655610834716, - "grad_norm": 4.24678897857666, - "learning_rate": 7.987492893689596e-06, - "loss": 4.6824, - "step": 15280 - }, - { - "epoch": 0.8449419568822554, - "grad_norm": 3.599647045135498, - "learning_rate": 7.97328027288232e-06, - "loss": 4.8003, - "step": 15285 - }, - { - "epoch": 0.8452183526810393, - "grad_norm": 3.2087178230285645, - "learning_rate": 7.959067652075043e-06, - "loss": 4.6724, - "step": 15290 - }, - { - "epoch": 0.8454947484798231, - "grad_norm": 3.3129324913024902, - "learning_rate": 7.944855031267767e-06, - "loss": 4.9888, - "step": 15295 - }, - { - "epoch": 0.845771144278607, - "grad_norm": 4.16626501083374, - "learning_rate": 7.93064241046049e-06, - "loss": 4.9972, - "step": 15300 - }, - { - "epoch": 0.8460475400773908, - "grad_norm": 3.8757193088531494, - "learning_rate": 7.916429789653213e-06, - "loss": 4.9566, - "step": 15305 - }, - { - "epoch": 0.8463239358761747, - "grad_norm": 3.9059760570526123, - "learning_rate": 7.902217168845935e-06, - "loss": 4.7056, - "step": 15310 - }, - { - "epoch": 0.8466003316749585, - "grad_norm": 3.2571303844451904, - "learning_rate": 7.888004548038659e-06, - "loss": 5.0104, - "step": 15315 - }, - { - "epoch": 0.8468767274737424, - "grad_norm": 3.6211488246917725, - "learning_rate": 7.873791927231381e-06, - "loss": 4.9973, - "step": 15320 - }, - { - "epoch": 0.8471531232725262, - "grad_norm": 4.511015892028809, - "learning_rate": 7.859579306424104e-06, - "loss": 4.8835, - "step": 15325 - }, - { - "epoch": 0.8474295190713101, - "grad_norm": 3.3514885902404785, - "learning_rate": 7.845366685616828e-06, - "loss": 5.1082, - "step": 15330 - }, - { - "epoch": 0.847705914870094, - "grad_norm": 4.153277397155762, - "learning_rate": 7.831154064809552e-06, - "loss": 5.1117, - "step": 15335 - }, - { - "epoch": 0.8479823106688779, - "grad_norm": 3.661910057067871, - "learning_rate": 7.816941444002276e-06, - "loss": 4.785, - "step": 15340 - }, - { - "epoch": 0.8482587064676617, - "grad_norm": 3.4714293479919434, - "learning_rate": 7.802728823194998e-06, - "loss": 4.9615, - "step": 15345 - }, - { - "epoch": 0.8485351022664456, - "grad_norm": 3.412801504135132, - "learning_rate": 7.78851620238772e-06, - "loss": 4.7827, - "step": 15350 - }, - { - "epoch": 0.8488114980652294, - "grad_norm": 4.46990966796875, - "learning_rate": 7.774303581580444e-06, - "loss": 4.8436, - "step": 15355 - }, - { - "epoch": 0.8490878938640133, - "grad_norm": 4.014577388763428, - "learning_rate": 7.760090960773166e-06, - "loss": 4.9757, - "step": 15360 - }, - { - "epoch": 0.8493642896627971, - "grad_norm": 3.89868426322937, - "learning_rate": 7.74587833996589e-06, - "loss": 4.8749, - "step": 15365 - }, - { - "epoch": 0.849640685461581, - "grad_norm": 2.8966128826141357, - "learning_rate": 7.731665719158614e-06, - "loss": 4.7, - "step": 15370 - }, - { - "epoch": 0.8499170812603648, - "grad_norm": 4.55120849609375, - "learning_rate": 7.717453098351337e-06, - "loss": 4.7576, - "step": 15375 - }, - { - "epoch": 0.8501934770591487, - "grad_norm": 4.043015003204346, - "learning_rate": 7.703240477544059e-06, - "loss": 4.7656, - "step": 15380 - }, - { - "epoch": 0.8504698728579325, - "grad_norm": 4.398715019226074, - "learning_rate": 7.689027856736783e-06, - "loss": 4.7285, - "step": 15385 - }, - { - "epoch": 0.8507462686567164, - "grad_norm": 4.311310291290283, - "learning_rate": 7.674815235929505e-06, - "loss": 4.8688, - "step": 15390 - }, - { - "epoch": 0.8510226644555002, - "grad_norm": 3.4514060020446777, - "learning_rate": 7.66060261512223e-06, - "loss": 4.7912, - "step": 15395 - }, - { - "epoch": 0.8512990602542841, - "grad_norm": 3.418095588684082, - "learning_rate": 7.646389994314952e-06, - "loss": 4.8534, - "step": 15400 - }, - { - "epoch": 0.851575456053068, - "grad_norm": 4.0209503173828125, - "learning_rate": 7.632177373507675e-06, - "loss": 5.2002, - "step": 15405 - }, - { - "epoch": 0.8518518518518519, - "grad_norm": 2.8665504455566406, - "learning_rate": 7.617964752700399e-06, - "loss": 5.142, - "step": 15410 - }, - { - "epoch": 0.8521282476506357, - "grad_norm": 3.491189956665039, - "learning_rate": 7.603752131893122e-06, - "loss": 4.6841, - "step": 15415 - }, - { - "epoch": 0.8524046434494196, - "grad_norm": 3.8220739364624023, - "learning_rate": 7.589539511085844e-06, - "loss": 4.929, - "step": 15420 - }, - { - "epoch": 0.8526810392482034, - "grad_norm": 3.5828657150268555, - "learning_rate": 7.575326890278568e-06, - "loss": 5.0016, - "step": 15425 - }, - { - "epoch": 0.8529574350469873, - "grad_norm": 4.3814921379089355, - "learning_rate": 7.561114269471291e-06, - "loss": 4.8032, - "step": 15430 - }, - { - "epoch": 0.8532338308457711, - "grad_norm": 3.4918086528778076, - "learning_rate": 7.5469016486640134e-06, - "loss": 4.846, - "step": 15435 - }, - { - "epoch": 0.853510226644555, - "grad_norm": 3.771287679672241, - "learning_rate": 7.532689027856737e-06, - "loss": 4.791, - "step": 15440 - }, - { - "epoch": 0.8537866224433389, - "grad_norm": 3.5989866256713867, - "learning_rate": 7.51847640704946e-06, - "loss": 4.9933, - "step": 15445 - }, - { - "epoch": 0.8540630182421227, - "grad_norm": 3.3803904056549072, - "learning_rate": 7.504263786242183e-06, - "loss": 4.8067, - "step": 15450 - }, - { - "epoch": 0.8543394140409066, - "grad_norm": 4.378183841705322, - "learning_rate": 7.490051165434907e-06, - "loss": 5.3456, - "step": 15455 - }, - { - "epoch": 0.8546158098396904, - "grad_norm": 4.145111560821533, - "learning_rate": 7.475838544627629e-06, - "loss": 4.833, - "step": 15460 - }, - { - "epoch": 0.8548922056384743, - "grad_norm": 3.1699306964874268, - "learning_rate": 7.461625923820353e-06, - "loss": 4.7371, - "step": 15465 - }, - { - "epoch": 0.8551686014372581, - "grad_norm": 3.570518732070923, - "learning_rate": 7.447413303013076e-06, - "loss": 5.0778, - "step": 15470 - }, - { - "epoch": 0.8554449972360421, - "grad_norm": 3.922854423522949, - "learning_rate": 7.4332006822057985e-06, - "loss": 4.9517, - "step": 15475 - }, - { - "epoch": 0.8557213930348259, - "grad_norm": 4.147318363189697, - "learning_rate": 7.4189880613985225e-06, - "loss": 4.9921, - "step": 15480 - }, - { - "epoch": 0.8559977888336098, - "grad_norm": 3.7411043643951416, - "learning_rate": 7.404775440591246e-06, - "loss": 5.2079, - "step": 15485 - }, - { - "epoch": 0.8562741846323936, - "grad_norm": 3.129054069519043, - "learning_rate": 7.390562819783968e-06, - "loss": 4.8514, - "step": 15490 - }, - { - "epoch": 0.8565505804311775, - "grad_norm": 3.2814624309539795, - "learning_rate": 7.376350198976692e-06, - "loss": 4.9005, - "step": 15495 - }, - { - "epoch": 0.8568269762299613, - "grad_norm": 4.446510314941406, - "learning_rate": 7.362137578169415e-06, - "loss": 4.6255, - "step": 15500 - }, - { - "epoch": 0.8571033720287452, - "grad_norm": 4.147850036621094, - "learning_rate": 7.347924957362137e-06, - "loss": 4.7585, - "step": 15505 - }, - { - "epoch": 0.857379767827529, - "grad_norm": 3.926903247833252, - "learning_rate": 7.333712336554861e-06, - "loss": 5.0353, - "step": 15510 - }, - { - "epoch": 0.8576561636263129, - "grad_norm": 4.027170658111572, - "learning_rate": 7.3194997157475835e-06, - "loss": 4.9129, - "step": 15515 - }, - { - "epoch": 0.8579325594250967, - "grad_norm": 3.412487030029297, - "learning_rate": 7.3052870949403075e-06, - "loss": 4.8461, - "step": 15520 - }, - { - "epoch": 0.8582089552238806, - "grad_norm": 4.255180358886719, - "learning_rate": 7.291074474133031e-06, - "loss": 5.133, - "step": 15525 - }, - { - "epoch": 0.8584853510226644, - "grad_norm": 3.505549192428589, - "learning_rate": 7.276861853325753e-06, - "loss": 5.1632, - "step": 15530 - }, - { - "epoch": 0.8587617468214483, - "grad_norm": 3.490243434906006, - "learning_rate": 7.262649232518477e-06, - "loss": 4.8109, - "step": 15535 - }, - { - "epoch": 0.8590381426202321, - "grad_norm": 3.9529364109039307, - "learning_rate": 7.2484366117112e-06, - "loss": 4.7609, - "step": 15540 - }, - { - "epoch": 0.8593145384190161, - "grad_norm": 4.20280122756958, - "learning_rate": 7.234223990903922e-06, - "loss": 4.8306, - "step": 15545 - }, - { - "epoch": 0.8595909342177999, - "grad_norm": 3.574753761291504, - "learning_rate": 7.220011370096646e-06, - "loss": 5.051, - "step": 15550 - }, - { - "epoch": 0.8598673300165838, - "grad_norm": 3.6286284923553467, - "learning_rate": 7.2057987492893694e-06, - "loss": 4.7709, - "step": 15555 - }, - { - "epoch": 0.8601437258153676, - "grad_norm": 5.038646697998047, - "learning_rate": 7.191586128482092e-06, - "loss": 4.9982, - "step": 15560 - }, - { - "epoch": 0.8604201216141515, - "grad_norm": 3.0044443607330322, - "learning_rate": 7.177373507674816e-06, - "loss": 4.8929, - "step": 15565 - }, - { - "epoch": 0.8606965174129353, - "grad_norm": 3.639540672302246, - "learning_rate": 7.163160886867539e-06, - "loss": 4.9908, - "step": 15570 - }, - { - "epoch": 0.8609729132117192, - "grad_norm": 3.9962198734283447, - "learning_rate": 7.148948266060263e-06, - "loss": 4.6839, - "step": 15575 - }, - { - "epoch": 0.861249309010503, - "grad_norm": 3.4947597980499268, - "learning_rate": 7.134735645252985e-06, - "loss": 4.9089, - "step": 15580 - }, - { - "epoch": 0.8615257048092869, - "grad_norm": 3.2274651527404785, - "learning_rate": 7.120523024445707e-06, - "loss": 4.6201, - "step": 15585 - }, - { - "epoch": 0.8618021006080707, - "grad_norm": 4.237607955932617, - "learning_rate": 7.106310403638431e-06, - "loss": 5.0777, - "step": 15590 - }, - { - "epoch": 0.8620784964068546, - "grad_norm": 2.7979462146759033, - "learning_rate": 7.0920977828311545e-06, - "loss": 4.7732, - "step": 15595 - }, - { - "epoch": 0.8623548922056384, - "grad_norm": 3.392643928527832, - "learning_rate": 7.077885162023877e-06, - "loss": 4.8292, - "step": 15600 - }, - { - "epoch": 0.8626312880044223, - "grad_norm": 4.662559509277344, - "learning_rate": 7.063672541216601e-06, - "loss": 4.7918, - "step": 15605 - }, - { - "epoch": 0.8629076838032061, - "grad_norm": 3.801340341567993, - "learning_rate": 7.049459920409324e-06, - "loss": 5.173, - "step": 15610 - }, - { - "epoch": 0.8631840796019901, - "grad_norm": 3.8719255924224854, - "learning_rate": 7.035247299602046e-06, - "loss": 4.9831, - "step": 15615 - }, - { - "epoch": 0.8634604754007739, - "grad_norm": 3.9387316703796387, - "learning_rate": 7.02103467879477e-06, - "loss": 4.8149, - "step": 15620 - }, - { - "epoch": 0.8637368711995578, - "grad_norm": 3.568291664123535, - "learning_rate": 7.006822057987493e-06, - "loss": 4.8419, - "step": 15625 - }, - { - "epoch": 0.8640132669983416, - "grad_norm": 4.031901836395264, - "learning_rate": 6.992609437180217e-06, - "loss": 4.6986, - "step": 15630 - }, - { - "epoch": 0.8642896627971255, - "grad_norm": 4.286612033843994, - "learning_rate": 6.9783968163729396e-06, - "loss": 4.9399, - "step": 15635 - }, - { - "epoch": 0.8645660585959093, - "grad_norm": 3.5853421688079834, - "learning_rate": 6.964184195565662e-06, - "loss": 5.2017, - "step": 15640 - }, - { - "epoch": 0.8648424543946932, - "grad_norm": 4.815415382385254, - "learning_rate": 6.949971574758387e-06, - "loss": 5.2114, - "step": 15645 - }, - { - "epoch": 0.865118850193477, - "grad_norm": 3.401326894760132, - "learning_rate": 6.935758953951109e-06, - "loss": 4.821, - "step": 15650 - }, - { - "epoch": 0.8653952459922609, - "grad_norm": 3.949343681335449, - "learning_rate": 6.921546333143831e-06, - "loss": 5.2086, - "step": 15655 - }, - { - "epoch": 0.8656716417910447, - "grad_norm": 4.092352867126465, - "learning_rate": 6.907333712336555e-06, - "loss": 4.8635, - "step": 15660 - }, - { - "epoch": 0.8659480375898286, - "grad_norm": 4.154953956604004, - "learning_rate": 6.893121091529278e-06, - "loss": 4.7274, - "step": 15665 - }, - { - "epoch": 0.8662244333886125, - "grad_norm": 3.2108614444732666, - "learning_rate": 6.878908470722001e-06, - "loss": 5.0751, - "step": 15670 - }, - { - "epoch": 0.8665008291873963, - "grad_norm": 3.6406795978546143, - "learning_rate": 6.864695849914725e-06, - "loss": 4.935, - "step": 15675 - }, - { - "epoch": 0.8667772249861803, - "grad_norm": 3.644953727722168, - "learning_rate": 6.850483229107448e-06, - "loss": 5.0036, - "step": 15680 - }, - { - "epoch": 0.8670536207849641, - "grad_norm": 3.906993865966797, - "learning_rate": 6.836270608300172e-06, - "loss": 4.7136, - "step": 15685 - }, - { - "epoch": 0.867330016583748, - "grad_norm": 3.677238702774048, - "learning_rate": 6.822057987492894e-06, - "loss": 4.8841, - "step": 15690 - }, - { - "epoch": 0.8676064123825318, - "grad_norm": 3.396554946899414, - "learning_rate": 6.807845366685617e-06, - "loss": 4.9053, - "step": 15695 - }, - { - "epoch": 0.8678828081813157, - "grad_norm": 4.678235054016113, - "learning_rate": 6.793632745878341e-06, - "loss": 4.7476, - "step": 15700 - }, - { - "epoch": 0.8681592039800995, - "grad_norm": 3.4147768020629883, - "learning_rate": 6.779420125071063e-06, - "loss": 5.0731, - "step": 15705 - }, - { - "epoch": 0.8684355997788834, - "grad_norm": 4.460427761077881, - "learning_rate": 6.765207504263786e-06, - "loss": 4.9435, - "step": 15710 - }, - { - "epoch": 0.8687119955776672, - "grad_norm": 4.205367565155029, - "learning_rate": 6.75099488345651e-06, - "loss": 4.7848, - "step": 15715 - }, - { - "epoch": 0.8689883913764511, - "grad_norm": 3.436741828918457, - "learning_rate": 6.736782262649233e-06, - "loss": 4.7811, - "step": 15720 - }, - { - "epoch": 0.8692647871752349, - "grad_norm": 3.21610164642334, - "learning_rate": 6.722569641841955e-06, - "loss": 4.8657, - "step": 15725 - }, - { - "epoch": 0.8695411829740188, - "grad_norm": 3.93279767036438, - "learning_rate": 6.708357021034679e-06, - "loss": 4.8837, - "step": 15730 - }, - { - "epoch": 0.8698175787728026, - "grad_norm": 3.3323113918304443, - "learning_rate": 6.694144400227402e-06, - "loss": 5.143, - "step": 15735 - }, - { - "epoch": 0.8700939745715865, - "grad_norm": 3.971737861633301, - "learning_rate": 6.679931779420126e-06, - "loss": 4.9652, - "step": 15740 - }, - { - "epoch": 0.8703703703703703, - "grad_norm": 3.596555471420288, - "learning_rate": 6.6657191586128485e-06, - "loss": 4.9259, - "step": 15745 - }, - { - "epoch": 0.8706467661691543, - "grad_norm": 3.582368850708008, - "learning_rate": 6.651506537805572e-06, - "loss": 5.0243, - "step": 15750 - }, - { - "epoch": 0.8709231619679381, - "grad_norm": 3.9322404861450195, - "learning_rate": 6.637293916998296e-06, - "loss": 5.2276, - "step": 15755 - }, - { - "epoch": 0.871199557766722, - "grad_norm": 4.058863162994385, - "learning_rate": 6.623081296191018e-06, - "loss": 5.2218, - "step": 15760 - }, - { - "epoch": 0.8714759535655058, - "grad_norm": 3.4161527156829834, - "learning_rate": 6.608868675383741e-06, - "loss": 4.6876, - "step": 15765 - }, - { - "epoch": 0.8717523493642897, - "grad_norm": 2.900460958480835, - "learning_rate": 6.594656054576465e-06, - "loss": 4.7734, - "step": 15770 - }, - { - "epoch": 0.8720287451630735, - "grad_norm": 3.5918712615966797, - "learning_rate": 6.580443433769187e-06, - "loss": 4.7184, - "step": 15775 - }, - { - "epoch": 0.8723051409618574, - "grad_norm": 3.992398500442505, - "learning_rate": 6.5662308129619095e-06, - "loss": 4.966, - "step": 15780 - }, - { - "epoch": 0.8725815367606412, - "grad_norm": 3.817514419555664, - "learning_rate": 6.5520181921546335e-06, - "loss": 5.0287, - "step": 15785 - }, - { - "epoch": 0.8728579325594251, - "grad_norm": 3.5824077129364014, - "learning_rate": 6.537805571347357e-06, - "loss": 4.9329, - "step": 15790 - }, - { - "epoch": 0.8731343283582089, - "grad_norm": 3.7401037216186523, - "learning_rate": 6.523592950540081e-06, - "loss": 4.9287, - "step": 15795 - }, - { - "epoch": 0.8734107241569928, - "grad_norm": 3.4796454906463623, - "learning_rate": 6.509380329732803e-06, - "loss": 4.9727, - "step": 15800 - }, - { - "epoch": 0.8736871199557766, - "grad_norm": 5.003295421600342, - "learning_rate": 6.495167708925526e-06, - "loss": 4.7314, - "step": 15805 - }, - { - "epoch": 0.8739635157545605, - "grad_norm": 4.505870819091797, - "learning_rate": 6.48095508811825e-06, - "loss": 5.0809, - "step": 15810 - }, - { - "epoch": 0.8742399115533444, - "grad_norm": 3.863006114959717, - "learning_rate": 6.466742467310972e-06, - "loss": 4.6385, - "step": 15815 - }, - { - "epoch": 0.8745163073521283, - "grad_norm": 3.4922070503234863, - "learning_rate": 6.4525298465036955e-06, - "loss": 4.7127, - "step": 15820 - }, - { - "epoch": 0.8747927031509121, - "grad_norm": 3.6463775634765625, - "learning_rate": 6.4383172256964194e-06, - "loss": 4.938, - "step": 15825 - }, - { - "epoch": 0.875069098949696, - "grad_norm": 3.5146822929382324, - "learning_rate": 6.424104604889142e-06, - "loss": 4.8737, - "step": 15830 - }, - { - "epoch": 0.8753454947484798, - "grad_norm": 2.9423630237579346, - "learning_rate": 6.409891984081864e-06, - "loss": 4.7986, - "step": 15835 - }, - { - "epoch": 0.8756218905472637, - "grad_norm": 4.098637580871582, - "learning_rate": 6.395679363274589e-06, - "loss": 4.7571, - "step": 15840 - }, - { - "epoch": 0.8758982863460475, - "grad_norm": 2.9004056453704834, - "learning_rate": 6.381466742467311e-06, - "loss": 4.716, - "step": 15845 - }, - { - "epoch": 0.8761746821448314, - "grad_norm": 3.797128915786743, - "learning_rate": 6.367254121660035e-06, - "loss": 5.0502, - "step": 15850 - }, - { - "epoch": 0.8764510779436152, - "grad_norm": 3.994654417037964, - "learning_rate": 6.353041500852757e-06, - "loss": 4.898, - "step": 15855 - }, - { - "epoch": 0.8767274737423991, - "grad_norm": 3.2940444946289062, - "learning_rate": 6.3388288800454805e-06, - "loss": 4.8664, - "step": 15860 - }, - { - "epoch": 0.8770038695411829, - "grad_norm": 4.2146124839782715, - "learning_rate": 6.3246162592382045e-06, - "loss": 4.8941, - "step": 15865 - }, - { - "epoch": 0.8772802653399668, - "grad_norm": 3.3862946033477783, - "learning_rate": 6.310403638430927e-06, - "loss": 4.8247, - "step": 15870 - }, - { - "epoch": 0.8775566611387506, - "grad_norm": 3.480405807495117, - "learning_rate": 6.29619101762365e-06, - "loss": 5.1447, - "step": 15875 - }, - { - "epoch": 0.8778330569375346, - "grad_norm": 3.4733119010925293, - "learning_rate": 6.281978396816374e-06, - "loss": 4.6635, - "step": 15880 - }, - { - "epoch": 0.8781094527363185, - "grad_norm": 3.853881359100342, - "learning_rate": 6.267765776009096e-06, - "loss": 5.0099, - "step": 15885 - }, - { - "epoch": 0.8783858485351023, - "grad_norm": 3.801287889480591, - "learning_rate": 6.253553155201819e-06, - "loss": 4.6277, - "step": 15890 - }, - { - "epoch": 0.8786622443338862, - "grad_norm": 3.785771131515503, - "learning_rate": 6.239340534394543e-06, - "loss": 4.7045, - "step": 15895 - }, - { - "epoch": 0.87893864013267, - "grad_norm": 4.128140449523926, - "learning_rate": 6.2251279135872656e-06, - "loss": 5.0229, - "step": 15900 - }, - { - "epoch": 0.8792150359314539, - "grad_norm": 3.9363701343536377, - "learning_rate": 6.210915292779989e-06, - "loss": 5.1252, - "step": 15905 - }, - { - "epoch": 0.8794914317302377, - "grad_norm": 3.420142889022827, - "learning_rate": 6.196702671972713e-06, - "loss": 5.1304, - "step": 15910 - }, - { - "epoch": 0.8797678275290216, - "grad_norm": 4.293994426727295, - "learning_rate": 6.182490051165435e-06, - "loss": 5.1745, - "step": 15915 - }, - { - "epoch": 0.8800442233278054, - "grad_norm": 4.5453314781188965, - "learning_rate": 6.168277430358158e-06, - "loss": 4.7134, - "step": 15920 - }, - { - "epoch": 0.8803206191265893, - "grad_norm": 3.731394052505493, - "learning_rate": 6.154064809550881e-06, - "loss": 4.7987, - "step": 15925 - }, - { - "epoch": 0.8805970149253731, - "grad_norm": 3.9155333042144775, - "learning_rate": 6.139852188743605e-06, - "loss": 4.8764, - "step": 15930 - }, - { - "epoch": 0.880873410724157, - "grad_norm": 3.251206636428833, - "learning_rate": 6.1256395679363275e-06, - "loss": 4.9742, - "step": 15935 - }, - { - "epoch": 0.8811498065229408, - "grad_norm": 3.428173542022705, - "learning_rate": 6.111426947129051e-06, - "loss": 5.0398, - "step": 15940 - }, - { - "epoch": 0.8814262023217247, - "grad_norm": 4.512082576751709, - "learning_rate": 6.097214326321774e-06, - "loss": 4.9136, - "step": 15945 - }, - { - "epoch": 0.8817025981205086, - "grad_norm": 3.4978713989257812, - "learning_rate": 6.083001705514498e-06, - "loss": 4.732, - "step": 15950 - }, - { - "epoch": 0.8819789939192925, - "grad_norm": 4.127050876617432, - "learning_rate": 6.06878908470722e-06, - "loss": 4.9104, - "step": 15955 - }, - { - "epoch": 0.8822553897180763, - "grad_norm": 3.6599040031433105, - "learning_rate": 6.054576463899943e-06, - "loss": 5.2423, - "step": 15960 - }, - { - "epoch": 0.8825317855168602, - "grad_norm": 3.895284652709961, - "learning_rate": 6.040363843092667e-06, - "loss": 4.9198, - "step": 15965 - }, - { - "epoch": 0.882808181315644, - "grad_norm": 3.3798611164093018, - "learning_rate": 6.026151222285389e-06, - "loss": 4.7896, - "step": 15970 - }, - { - "epoch": 0.8830845771144279, - "grad_norm": 3.8356308937072754, - "learning_rate": 6.0119386014781126e-06, - "loss": 5.1028, - "step": 15975 - }, - { - "epoch": 0.8833609729132117, - "grad_norm": 3.9979400634765625, - "learning_rate": 5.997725980670836e-06, - "loss": 4.9204, - "step": 15980 - }, - { - "epoch": 0.8836373687119956, - "grad_norm": 3.993461847305298, - "learning_rate": 5.98351335986356e-06, - "loss": 4.7276, - "step": 15985 - }, - { - "epoch": 0.8839137645107794, - "grad_norm": 3.9294278621673584, - "learning_rate": 5.969300739056282e-06, - "loss": 4.666, - "step": 15990 - }, - { - "epoch": 0.8841901603095633, - "grad_norm": 5.005484104156494, - "learning_rate": 5.955088118249005e-06, - "loss": 4.9111, - "step": 15995 - }, - { - "epoch": 0.8844665561083471, - "grad_norm": 3.216432809829712, - "learning_rate": 5.940875497441729e-06, - "loss": 4.6319, - "step": 16000 - }, - { - "epoch": 0.884742951907131, - "grad_norm": 3.6470518112182617, - "learning_rate": 5.926662876634452e-06, - "loss": 4.7934, - "step": 16005 - }, - { - "epoch": 0.8850193477059148, - "grad_norm": 3.7931535243988037, - "learning_rate": 5.9124502558271745e-06, - "loss": 4.9271, - "step": 16010 - }, - { - "epoch": 0.8852957435046988, - "grad_norm": 3.077409267425537, - "learning_rate": 5.898237635019898e-06, - "loss": 4.7351, - "step": 16015 - }, - { - "epoch": 0.8855721393034826, - "grad_norm": 3.6445906162261963, - "learning_rate": 5.884025014212622e-06, - "loss": 4.8911, - "step": 16020 - }, - { - "epoch": 0.8858485351022665, - "grad_norm": 4.014408111572266, - "learning_rate": 5.869812393405344e-06, - "loss": 4.9776, - "step": 16025 - }, - { - "epoch": 0.8861249309010503, - "grad_norm": 3.4272587299346924, - "learning_rate": 5.855599772598067e-06, - "loss": 4.9316, - "step": 16030 - }, - { - "epoch": 0.8864013266998342, - "grad_norm": 3.4246294498443604, - "learning_rate": 5.841387151790791e-06, - "loss": 4.7851, - "step": 16035 - }, - { - "epoch": 0.886677722498618, - "grad_norm": 3.426506996154785, - "learning_rate": 5.827174530983514e-06, - "loss": 5.0848, - "step": 16040 - }, - { - "epoch": 0.8869541182974019, - "grad_norm": 4.0810956954956055, - "learning_rate": 5.812961910176236e-06, - "loss": 4.5215, - "step": 16045 - }, - { - "epoch": 0.8872305140961857, - "grad_norm": 2.9384403228759766, - "learning_rate": 5.7987492893689595e-06, - "loss": 4.7552, - "step": 16050 - }, - { - "epoch": 0.8875069098949696, - "grad_norm": 3.227759838104248, - "learning_rate": 5.7845366685616835e-06, - "loss": 4.7817, - "step": 16055 - }, - { - "epoch": 0.8877833056937534, - "grad_norm": 4.257302761077881, - "learning_rate": 5.770324047754406e-06, - "loss": 4.7766, - "step": 16060 - }, - { - "epoch": 0.8880597014925373, - "grad_norm": 4.3714919090271, - "learning_rate": 5.756111426947129e-06, - "loss": 5.0406, - "step": 16065 - }, - { - "epoch": 0.8883360972913211, - "grad_norm": 3.33453369140625, - "learning_rate": 5.741898806139853e-06, - "loss": 5.4695, - "step": 16070 - }, - { - "epoch": 0.888612493090105, - "grad_norm": 3.6694791316986084, - "learning_rate": 5.727686185332576e-06, - "loss": 4.7901, - "step": 16075 - }, - { - "epoch": 0.8888888888888888, - "grad_norm": 3.3092100620269775, - "learning_rate": 5.713473564525298e-06, - "loss": 5.0055, - "step": 16080 - }, - { - "epoch": 0.8891652846876728, - "grad_norm": 3.254375457763672, - "learning_rate": 5.6992609437180215e-06, - "loss": 4.8603, - "step": 16085 - }, - { - "epoch": 0.8894416804864566, - "grad_norm": 3.5926568508148193, - "learning_rate": 5.6850483229107454e-06, - "loss": 4.9635, - "step": 16090 - }, - { - "epoch": 0.8897180762852405, - "grad_norm": 3.4119133949279785, - "learning_rate": 5.6708357021034686e-06, - "loss": 5.0395, - "step": 16095 - }, - { - "epoch": 0.8899944720840243, - "grad_norm": 3.3834893703460693, - "learning_rate": 5.656623081296191e-06, - "loss": 5.145, - "step": 16100 - }, - { - "epoch": 0.8902708678828082, - "grad_norm": 3.796090602874756, - "learning_rate": 5.642410460488915e-06, - "loss": 4.8246, - "step": 16105 - }, - { - "epoch": 0.8905472636815921, - "grad_norm": 3.5259482860565186, - "learning_rate": 5.628197839681638e-06, - "loss": 4.8869, - "step": 16110 - }, - { - "epoch": 0.8908236594803759, - "grad_norm": 4.100551605224609, - "learning_rate": 5.61398521887436e-06, - "loss": 4.674, - "step": 16115 - }, - { - "epoch": 0.8911000552791598, - "grad_norm": 4.206357479095459, - "learning_rate": 5.599772598067083e-06, - "loss": 4.7143, - "step": 16120 - }, - { - "epoch": 0.8913764510779436, - "grad_norm": 3.823300838470459, - "learning_rate": 5.585559977259807e-06, - "loss": 4.8694, - "step": 16125 - }, - { - "epoch": 0.8916528468767275, - "grad_norm": 4.283262729644775, - "learning_rate": 5.5713473564525305e-06, - "loss": 5.0432, - "step": 16130 - }, - { - "epoch": 0.8919292426755113, - "grad_norm": 4.096394062042236, - "learning_rate": 5.557134735645253e-06, - "loss": 4.9027, - "step": 16135 - }, - { - "epoch": 0.8922056384742952, - "grad_norm": 3.4035484790802, - "learning_rate": 5.542922114837976e-06, - "loss": 4.8498, - "step": 16140 - }, - { - "epoch": 0.892482034273079, - "grad_norm": 3.974168539047241, - "learning_rate": 5.5287094940307e-06, - "loss": 4.9876, - "step": 16145 - }, - { - "epoch": 0.892758430071863, - "grad_norm": 4.489919662475586, - "learning_rate": 5.514496873223423e-06, - "loss": 4.757, - "step": 16150 - }, - { - "epoch": 0.8930348258706468, - "grad_norm": 3.5792441368103027, - "learning_rate": 5.500284252416145e-06, - "loss": 4.821, - "step": 16155 - }, - { - "epoch": 0.8933112216694307, - "grad_norm": 4.130000591278076, - "learning_rate": 5.486071631608869e-06, - "loss": 4.5594, - "step": 16160 - }, - { - "epoch": 0.8935876174682145, - "grad_norm": 4.009551048278809, - "learning_rate": 5.471859010801592e-06, - "loss": 5.024, - "step": 16165 - }, - { - "epoch": 0.8938640132669984, - "grad_norm": 4.058741569519043, - "learning_rate": 5.457646389994315e-06, - "loss": 5.031, - "step": 16170 - }, - { - "epoch": 0.8941404090657822, - "grad_norm": 3.8095996379852295, - "learning_rate": 5.443433769187038e-06, - "loss": 4.7272, - "step": 16175 - }, - { - "epoch": 0.8944168048645661, - "grad_norm": 3.7943928241729736, - "learning_rate": 5.429221148379762e-06, - "loss": 4.8543, - "step": 16180 - }, - { - "epoch": 0.8946932006633499, - "grad_norm": 3.153428316116333, - "learning_rate": 5.415008527572485e-06, - "loss": 4.6109, - "step": 16185 - }, - { - "epoch": 0.8949695964621338, - "grad_norm": 3.685887575149536, - "learning_rate": 5.400795906765207e-06, - "loss": 4.676, - "step": 16190 - }, - { - "epoch": 0.8952459922609176, - "grad_norm": 3.823957920074463, - "learning_rate": 5.386583285957931e-06, - "loss": 5.0588, - "step": 16195 - }, - { - "epoch": 0.8955223880597015, - "grad_norm": 3.5146028995513916, - "learning_rate": 5.372370665150654e-06, - "loss": 4.5432, - "step": 16200 - }, - { - "epoch": 0.8957987838584853, - "grad_norm": 3.8643455505371094, - "learning_rate": 5.3581580443433775e-06, - "loss": 5.029, - "step": 16205 - }, - { - "epoch": 0.8960751796572692, - "grad_norm": 4.048851490020752, - "learning_rate": 5.3439454235361e-06, - "loss": 4.7677, - "step": 16210 - }, - { - "epoch": 0.896351575456053, - "grad_norm": 3.798962354660034, - "learning_rate": 5.329732802728824e-06, - "loss": 5.0452, - "step": 16215 - }, - { - "epoch": 0.896627971254837, - "grad_norm": 3.9125924110412598, - "learning_rate": 5.315520181921547e-06, - "loss": 4.8263, - "step": 16220 - }, - { - "epoch": 0.8969043670536208, - "grad_norm": 5.244776248931885, - "learning_rate": 5.301307561114269e-06, - "loss": 5.1466, - "step": 16225 - }, - { - "epoch": 0.8971807628524047, - "grad_norm": 4.3974528312683105, - "learning_rate": 5.287094940306993e-06, - "loss": 4.6972, - "step": 16230 - }, - { - "epoch": 0.8974571586511885, - "grad_norm": 3.663856267929077, - "learning_rate": 5.272882319499716e-06, - "loss": 4.8124, - "step": 16235 - }, - { - "epoch": 0.8977335544499724, - "grad_norm": 3.177274465560913, - "learning_rate": 5.258669698692439e-06, - "loss": 4.7413, - "step": 16240 - }, - { - "epoch": 0.8980099502487562, - "grad_norm": 4.527139663696289, - "learning_rate": 5.244457077885162e-06, - "loss": 4.6418, - "step": 16245 - }, - { - "epoch": 0.8982863460475401, - "grad_norm": 3.90124773979187, - "learning_rate": 5.230244457077886e-06, - "loss": 4.985, - "step": 16250 - }, - { - "epoch": 0.8985627418463239, - "grad_norm": 4.100987911224365, - "learning_rate": 5.216031836270609e-06, - "loss": 4.9227, - "step": 16255 - }, - { - "epoch": 0.8988391376451078, - "grad_norm": 3.7627832889556885, - "learning_rate": 5.201819215463331e-06, - "loss": 4.658, - "step": 16260 - }, - { - "epoch": 0.8991155334438916, - "grad_norm": 3.8064067363739014, - "learning_rate": 5.187606594656055e-06, - "loss": 4.7242, - "step": 16265 - }, - { - "epoch": 0.8993919292426755, - "grad_norm": 4.380611419677734, - "learning_rate": 5.173393973848778e-06, - "loss": 4.957, - "step": 16270 - }, - { - "epoch": 0.8996683250414593, - "grad_norm": 4.4961676597595215, - "learning_rate": 5.159181353041501e-06, - "loss": 4.7186, - "step": 16275 - }, - { - "epoch": 0.8999447208402432, - "grad_norm": 4.06073522567749, - "learning_rate": 5.144968732234224e-06, - "loss": 5.024, - "step": 16280 - }, - { - "epoch": 0.900221116639027, - "grad_norm": 3.396531581878662, - "learning_rate": 5.130756111426948e-06, - "loss": 4.8516, - "step": 16285 - }, - { - "epoch": 0.900497512437811, - "grad_norm": 3.7098519802093506, - "learning_rate": 5.116543490619671e-06, - "loss": 4.719, - "step": 16290 - }, - { - "epoch": 0.9007739082365948, - "grad_norm": 3.9446892738342285, - "learning_rate": 5.102330869812394e-06, - "loss": 5.1406, - "step": 16295 - }, - { - "epoch": 0.9010503040353787, - "grad_norm": 3.210261344909668, - "learning_rate": 5.088118249005117e-06, - "loss": 4.6729, - "step": 16300 - }, - { - "epoch": 0.9013266998341625, - "grad_norm": 4.007756233215332, - "learning_rate": 5.07390562819784e-06, - "loss": 4.7955, - "step": 16305 - }, - { - "epoch": 0.9016030956329464, - "grad_norm": 3.227644205093384, - "learning_rate": 5.059693007390563e-06, - "loss": 4.7183, - "step": 16310 - }, - { - "epoch": 0.9018794914317302, - "grad_norm": 4.634718894958496, - "learning_rate": 5.0454803865832855e-06, - "loss": 5.101, - "step": 16315 - }, - { - "epoch": 0.9021558872305141, - "grad_norm": 4.174224853515625, - "learning_rate": 5.0312677657760095e-06, - "loss": 4.9855, - "step": 16320 - }, - { - "epoch": 0.9024322830292979, - "grad_norm": 3.7757952213287354, - "learning_rate": 5.017055144968733e-06, - "loss": 5.0235, - "step": 16325 - }, - { - "epoch": 0.9027086788280818, - "grad_norm": 3.770765781402588, - "learning_rate": 5.002842524161456e-06, - "loss": 5.1194, - "step": 16330 - }, - { - "epoch": 0.9029850746268657, - "grad_norm": 3.6249911785125732, - "learning_rate": 4.988629903354178e-06, - "loss": 4.7802, - "step": 16335 - }, - { - "epoch": 0.9032614704256495, - "grad_norm": 4.837128162384033, - "learning_rate": 4.974417282546902e-06, - "loss": 4.599, - "step": 16340 - }, - { - "epoch": 0.9035378662244334, - "grad_norm": 4.374870777130127, - "learning_rate": 4.960204661739625e-06, - "loss": 4.8379, - "step": 16345 - }, - { - "epoch": 0.9038142620232172, - "grad_norm": 4.0973734855651855, - "learning_rate": 4.945992040932348e-06, - "loss": 5.0881, - "step": 16350 - }, - { - "epoch": 0.9040906578220012, - "grad_norm": 4.1243977546691895, - "learning_rate": 4.9317794201250714e-06, - "loss": 5.1151, - "step": 16355 - }, - { - "epoch": 0.904367053620785, - "grad_norm": 3.1973705291748047, - "learning_rate": 4.9175667993177946e-06, - "loss": 5.0274, - "step": 16360 - }, - { - "epoch": 0.9046434494195689, - "grad_norm": 3.458981990814209, - "learning_rate": 4.903354178510518e-06, - "loss": 4.8251, - "step": 16365 - }, - { - "epoch": 0.9049198452183527, - "grad_norm": 3.640015125274658, - "learning_rate": 4.88914155770324e-06, - "loss": 4.9673, - "step": 16370 - }, - { - "epoch": 0.9051962410171366, - "grad_norm": 3.2531189918518066, - "learning_rate": 4.874928936895964e-06, - "loss": 4.5497, - "step": 16375 - }, - { - "epoch": 0.9054726368159204, - "grad_norm": 3.353008508682251, - "learning_rate": 4.860716316088687e-06, - "loss": 4.8228, - "step": 16380 - }, - { - "epoch": 0.9057490326147043, - "grad_norm": 4.130444049835205, - "learning_rate": 4.84650369528141e-06, - "loss": 4.7594, - "step": 16385 - }, - { - "epoch": 0.9060254284134881, - "grad_norm": 3.1232221126556396, - "learning_rate": 4.832291074474133e-06, - "loss": 4.9349, - "step": 16390 - }, - { - "epoch": 0.906301824212272, - "grad_norm": 3.7138428688049316, - "learning_rate": 4.8180784536668565e-06, - "loss": 4.7187, - "step": 16395 - }, - { - "epoch": 0.9065782200110558, - "grad_norm": 3.310006618499756, - "learning_rate": 4.80386583285958e-06, - "loss": 5.0194, - "step": 16400 - }, - { - "epoch": 0.9068546158098397, - "grad_norm": 5.019920349121094, - "learning_rate": 4.789653212052303e-06, - "loss": 4.9197, - "step": 16405 - }, - { - "epoch": 0.9071310116086235, - "grad_norm": 3.792968273162842, - "learning_rate": 4.775440591245026e-06, - "loss": 4.9581, - "step": 16410 - }, - { - "epoch": 0.9074074074074074, - "grad_norm": 4.522578716278076, - "learning_rate": 4.761227970437749e-06, - "loss": 4.7076, - "step": 16415 - }, - { - "epoch": 0.9076838032061912, - "grad_norm": 3.9118542671203613, - "learning_rate": 4.747015349630472e-06, - "loss": 4.9707, - "step": 16420 - }, - { - "epoch": 0.9079601990049752, - "grad_norm": 4.168303489685059, - "learning_rate": 4.732802728823195e-06, - "loss": 4.603, - "step": 16425 - }, - { - "epoch": 0.908236594803759, - "grad_norm": 3.641812324523926, - "learning_rate": 4.7185901080159184e-06, - "loss": 4.8405, - "step": 16430 - }, - { - "epoch": 0.9085129906025429, - "grad_norm": 4.432767868041992, - "learning_rate": 4.7043774872086416e-06, - "loss": 5.1405, - "step": 16435 - }, - { - "epoch": 0.9087893864013267, - "grad_norm": 3.7158989906311035, - "learning_rate": 4.690164866401365e-06, - "loss": 4.8914, - "step": 16440 - }, - { - "epoch": 0.9090657822001106, - "grad_norm": 3.8096296787261963, - "learning_rate": 4.675952245594088e-06, - "loss": 4.8564, - "step": 16445 - }, - { - "epoch": 0.9093421779988944, - "grad_norm": 3.3491950035095215, - "learning_rate": 4.661739624786811e-06, - "loss": 5.1232, - "step": 16450 - }, - { - "epoch": 0.9096185737976783, - "grad_norm": 4.407762050628662, - "learning_rate": 4.647527003979534e-06, - "loss": 5.2513, - "step": 16455 - }, - { - "epoch": 0.9098949695964621, - "grad_norm": 4.356717109680176, - "learning_rate": 4.633314383172257e-06, - "loss": 5.0331, - "step": 16460 - }, - { - "epoch": 0.910171365395246, - "grad_norm": 3.2973270416259766, - "learning_rate": 4.61910176236498e-06, - "loss": 4.8999, - "step": 16465 - }, - { - "epoch": 0.9104477611940298, - "grad_norm": 3.944094181060791, - "learning_rate": 4.6048891415577035e-06, - "loss": 4.7392, - "step": 16470 - }, - { - "epoch": 0.9107241569928137, - "grad_norm": 3.541595220565796, - "learning_rate": 4.590676520750427e-06, - "loss": 5.0021, - "step": 16475 - }, - { - "epoch": 0.9110005527915975, - "grad_norm": 4.237240314483643, - "learning_rate": 4.57646389994315e-06, - "loss": 4.586, - "step": 16480 - }, - { - "epoch": 0.9112769485903814, - "grad_norm": 3.664316177368164, - "learning_rate": 4.562251279135873e-06, - "loss": 4.9774, - "step": 16485 - }, - { - "epoch": 0.9115533443891652, - "grad_norm": 3.5280942916870117, - "learning_rate": 4.548038658328596e-06, - "loss": 4.9678, - "step": 16490 - }, - { - "epoch": 0.9118297401879492, - "grad_norm": 3.860072135925293, - "learning_rate": 4.533826037521319e-06, - "loss": 5.1321, - "step": 16495 - }, - { - "epoch": 0.912106135986733, - "grad_norm": 3.7712130546569824, - "learning_rate": 4.519613416714042e-06, - "loss": 4.9678, - "step": 16500 - }, - { - "epoch": 0.9123825317855169, - "grad_norm": 3.747433662414551, - "learning_rate": 4.505400795906765e-06, - "loss": 4.9893, - "step": 16505 - }, - { - "epoch": 0.9126589275843007, - "grad_norm": 6.0445170402526855, - "learning_rate": 4.4911881750994885e-06, - "loss": 4.8376, - "step": 16510 - }, - { - "epoch": 0.9129353233830846, - "grad_norm": 3.5901873111724854, - "learning_rate": 4.476975554292212e-06, - "loss": 4.8362, - "step": 16515 - }, - { - "epoch": 0.9132117191818684, - "grad_norm": 3.6722826957702637, - "learning_rate": 4.462762933484935e-06, - "loss": 5.1034, - "step": 16520 - }, - { - "epoch": 0.9134881149806523, - "grad_norm": 3.5389833450317383, - "learning_rate": 4.448550312677658e-06, - "loss": 4.6817, - "step": 16525 - }, - { - "epoch": 0.9137645107794361, - "grad_norm": 3.544811248779297, - "learning_rate": 4.434337691870381e-06, - "loss": 5.0695, - "step": 16530 - }, - { - "epoch": 0.91404090657822, - "grad_norm": 4.132248401641846, - "learning_rate": 4.420125071063104e-06, - "loss": 4.9197, - "step": 16535 - }, - { - "epoch": 0.9143173023770038, - "grad_norm": 3.680356979370117, - "learning_rate": 4.405912450255827e-06, - "loss": 4.5352, - "step": 16540 - }, - { - "epoch": 0.9145936981757877, - "grad_norm": 4.035909175872803, - "learning_rate": 4.3916998294485505e-06, - "loss": 4.8975, - "step": 16545 - }, - { - "epoch": 0.9148700939745716, - "grad_norm": 3.8951597213745117, - "learning_rate": 4.377487208641274e-06, - "loss": 4.817, - "step": 16550 - }, - { - "epoch": 0.9151464897733554, - "grad_norm": 3.9107933044433594, - "learning_rate": 4.363274587833997e-06, - "loss": 4.7651, - "step": 16555 - }, - { - "epoch": 0.9154228855721394, - "grad_norm": 3.4493396282196045, - "learning_rate": 4.34906196702672e-06, - "loss": 5.076, - "step": 16560 - }, - { - "epoch": 0.9156992813709232, - "grad_norm": 4.296689987182617, - "learning_rate": 4.334849346219443e-06, - "loss": 4.9039, - "step": 16565 - }, - { - "epoch": 0.9159756771697071, - "grad_norm": 3.8431396484375, - "learning_rate": 4.320636725412166e-06, - "loss": 4.6727, - "step": 16570 - }, - { - "epoch": 0.9162520729684909, - "grad_norm": 4.500297546386719, - "learning_rate": 4.306424104604889e-06, - "loss": 5.3912, - "step": 16575 - }, - { - "epoch": 0.9165284687672748, - "grad_norm": 4.726571083068848, - "learning_rate": 4.292211483797612e-06, - "loss": 5.2984, - "step": 16580 - }, - { - "epoch": 0.9168048645660586, - "grad_norm": 3.416921615600586, - "learning_rate": 4.2779988629903355e-06, - "loss": 4.6175, - "step": 16585 - }, - { - "epoch": 0.9170812603648425, - "grad_norm": 3.1644046306610107, - "learning_rate": 4.263786242183059e-06, - "loss": 4.6776, - "step": 16590 - }, - { - "epoch": 0.9173576561636263, - "grad_norm": 3.82157826423645, - "learning_rate": 4.249573621375782e-06, - "loss": 4.881, - "step": 16595 - }, - { - "epoch": 0.9176340519624102, - "grad_norm": 3.8752880096435547, - "learning_rate": 4.235361000568505e-06, - "loss": 4.749, - "step": 16600 - }, - { - "epoch": 0.917910447761194, - "grad_norm": 3.842712879180908, - "learning_rate": 4.221148379761229e-06, - "loss": 4.7253, - "step": 16605 - }, - { - "epoch": 0.9181868435599779, - "grad_norm": 4.872412204742432, - "learning_rate": 4.206935758953951e-06, - "loss": 4.7417, - "step": 16610 - }, - { - "epoch": 0.9184632393587617, - "grad_norm": 3.414172649383545, - "learning_rate": 4.192723138146674e-06, - "loss": 5.1289, - "step": 16615 - }, - { - "epoch": 0.9187396351575456, - "grad_norm": 4.3799543380737305, - "learning_rate": 4.1785105173393974e-06, - "loss": 4.9575, - "step": 16620 - }, - { - "epoch": 0.9190160309563294, - "grad_norm": 3.8672125339508057, - "learning_rate": 4.164297896532121e-06, - "loss": 4.7991, - "step": 16625 - }, - { - "epoch": 0.9192924267551134, - "grad_norm": 3.75191593170166, - "learning_rate": 4.150085275724844e-06, - "loss": 4.8514, - "step": 16630 - }, - { - "epoch": 0.9195688225538972, - "grad_norm": 3.8761277198791504, - "learning_rate": 4.135872654917567e-06, - "loss": 4.9027, - "step": 16635 - }, - { - "epoch": 0.9198452183526811, - "grad_norm": 3.8116533756256104, - "learning_rate": 4.12166003411029e-06, - "loss": 4.7569, - "step": 16640 - }, - { - "epoch": 0.9201216141514649, - "grad_norm": 3.882634162902832, - "learning_rate": 4.107447413303013e-06, - "loss": 4.6937, - "step": 16645 - }, - { - "epoch": 0.9203980099502488, - "grad_norm": 3.3166961669921875, - "learning_rate": 4.093234792495736e-06, - "loss": 4.8617, - "step": 16650 - }, - { - "epoch": 0.9206744057490326, - "grad_norm": 4.038979530334473, - "learning_rate": 4.079022171688459e-06, - "loss": 4.9206, - "step": 16655 - }, - { - "epoch": 0.9209508015478165, - "grad_norm": 3.362423896789551, - "learning_rate": 4.064809550881183e-06, - "loss": 4.9875, - "step": 16660 - }, - { - "epoch": 0.9212271973466003, - "grad_norm": 3.3738491535186768, - "learning_rate": 4.050596930073906e-06, - "loss": 4.7933, - "step": 16665 - }, - { - "epoch": 0.9215035931453842, - "grad_norm": 3.503601551055908, - "learning_rate": 4.036384309266629e-06, - "loss": 5.0732, - "step": 16670 - }, - { - "epoch": 0.921779988944168, - "grad_norm": 3.1450142860412598, - "learning_rate": 4.022171688459352e-06, - "loss": 4.7811, - "step": 16675 - }, - { - "epoch": 0.9220563847429519, - "grad_norm": 3.4202752113342285, - "learning_rate": 4.007959067652075e-06, - "loss": 5.0072, - "step": 16680 - }, - { - "epoch": 0.9223327805417357, - "grad_norm": 3.7823784351348877, - "learning_rate": 3.993746446844798e-06, - "loss": 5.1254, - "step": 16685 - }, - { - "epoch": 0.9226091763405196, - "grad_norm": 4.299469947814941, - "learning_rate": 3.979533826037521e-06, - "loss": 4.9729, - "step": 16690 - }, - { - "epoch": 0.9228855721393034, - "grad_norm": 3.722261905670166, - "learning_rate": 3.965321205230245e-06, - "loss": 4.9265, - "step": 16695 - }, - { - "epoch": 0.9231619679380874, - "grad_norm": 4.488846778869629, - "learning_rate": 3.9511085844229676e-06, - "loss": 4.8332, - "step": 16700 - }, - { - "epoch": 0.9234383637368712, - "grad_norm": 3.4476404190063477, - "learning_rate": 3.936895963615691e-06, - "loss": 4.8358, - "step": 16705 - }, - { - "epoch": 0.9237147595356551, - "grad_norm": 3.9329166412353516, - "learning_rate": 3.922683342808414e-06, - "loss": 4.721, - "step": 16710 - }, - { - "epoch": 0.9239911553344389, - "grad_norm": 4.05313777923584, - "learning_rate": 3.908470722001138e-06, - "loss": 4.9849, - "step": 16715 - }, - { - "epoch": 0.9242675511332228, - "grad_norm": 3.66392183303833, - "learning_rate": 3.89425810119386e-06, - "loss": 4.8486, - "step": 16720 - }, - { - "epoch": 0.9245439469320066, - "grad_norm": 4.335777759552002, - "learning_rate": 3.880045480386583e-06, - "loss": 4.9046, - "step": 16725 - }, - { - "epoch": 0.9248203427307905, - "grad_norm": 4.8382954597473145, - "learning_rate": 3.865832859579307e-06, - "loss": 4.8476, - "step": 16730 - }, - { - "epoch": 0.9250967385295743, - "grad_norm": 3.354020357131958, - "learning_rate": 3.8516202387720295e-06, - "loss": 5.12, - "step": 16735 - }, - { - "epoch": 0.9253731343283582, - "grad_norm": 3.5212149620056152, - "learning_rate": 3.837407617964753e-06, - "loss": 4.6406, - "step": 16740 - }, - { - "epoch": 0.925649530127142, - "grad_norm": 4.447071552276611, - "learning_rate": 3.823194997157476e-06, - "loss": 5.1303, - "step": 16745 - }, - { - "epoch": 0.9259259259259259, - "grad_norm": 4.714621067047119, - "learning_rate": 3.8089823763501993e-06, - "loss": 5.0047, - "step": 16750 - }, - { - "epoch": 0.9262023217247097, - "grad_norm": 4.3438568115234375, - "learning_rate": 3.794769755542922e-06, - "loss": 4.8138, - "step": 16755 - }, - { - "epoch": 0.9264787175234936, - "grad_norm": 3.487215280532837, - "learning_rate": 3.7805571347356456e-06, - "loss": 5.0629, - "step": 16760 - }, - { - "epoch": 0.9267551133222774, - "grad_norm": 3.3207781314849854, - "learning_rate": 3.7663445139283687e-06, - "loss": 4.6624, - "step": 16765 - }, - { - "epoch": 0.9270315091210614, - "grad_norm": 3.6874585151672363, - "learning_rate": 3.7521318931210914e-06, - "loss": 4.9265, - "step": 16770 - }, - { - "epoch": 0.9273079049198453, - "grad_norm": 3.417487859725952, - "learning_rate": 3.7379192723138145e-06, - "loss": 4.8298, - "step": 16775 - }, - { - "epoch": 0.9275843007186291, - "grad_norm": 3.985056161880493, - "learning_rate": 3.723706651506538e-06, - "loss": 5.0497, - "step": 16780 - }, - { - "epoch": 0.927860696517413, - "grad_norm": 3.5535130500793457, - "learning_rate": 3.7094940306992612e-06, - "loss": 4.9327, - "step": 16785 - }, - { - "epoch": 0.9281370923161968, - "grad_norm": 3.7775490283966064, - "learning_rate": 3.695281409891984e-06, - "loss": 4.7599, - "step": 16790 - }, - { - "epoch": 0.9284134881149807, - "grad_norm": 3.6912879943847656, - "learning_rate": 3.6810687890847075e-06, - "loss": 4.73, - "step": 16795 - }, - { - "epoch": 0.9286898839137645, - "grad_norm": 3.8959012031555176, - "learning_rate": 3.6668561682774306e-06, - "loss": 4.6683, - "step": 16800 - }, - { - "epoch": 0.9289662797125484, - "grad_norm": 3.763211250305176, - "learning_rate": 3.6526435474701538e-06, - "loss": 4.9404, - "step": 16805 - }, - { - "epoch": 0.9292426755113322, - "grad_norm": 2.960988998413086, - "learning_rate": 3.6384309266628765e-06, - "loss": 4.7549, - "step": 16810 - }, - { - "epoch": 0.9295190713101161, - "grad_norm": 3.7784411907196045, - "learning_rate": 3.6242183058556e-06, - "loss": 5.0343, - "step": 16815 - }, - { - "epoch": 0.9297954671088999, - "grad_norm": 3.5923070907592773, - "learning_rate": 3.610005685048323e-06, - "loss": 4.9945, - "step": 16820 - }, - { - "epoch": 0.9300718629076838, - "grad_norm": 4.040002822875977, - "learning_rate": 3.595793064241046e-06, - "loss": 4.9812, - "step": 16825 - }, - { - "epoch": 0.9303482587064676, - "grad_norm": 4.1851806640625, - "learning_rate": 3.5815804434337694e-06, - "loss": 5.0198, - "step": 16830 - }, - { - "epoch": 0.9306246545052516, - "grad_norm": 3.2891855239868164, - "learning_rate": 3.5673678226264926e-06, - "loss": 4.9279, - "step": 16835 - }, - { - "epoch": 0.9309010503040354, - "grad_norm": 4.148435115814209, - "learning_rate": 3.5531552018192157e-06, - "loss": 4.7171, - "step": 16840 - }, - { - "epoch": 0.9311774461028193, - "grad_norm": 3.529670238494873, - "learning_rate": 3.5389425810119384e-06, - "loss": 5.2854, - "step": 16845 - }, - { - "epoch": 0.9314538419016031, - "grad_norm": 4.003045082092285, - "learning_rate": 3.524729960204662e-06, - "loss": 4.829, - "step": 16850 - }, - { - "epoch": 0.931730237700387, - "grad_norm": 4.24057674407959, - "learning_rate": 3.510517339397385e-06, - "loss": 4.6103, - "step": 16855 - }, - { - "epoch": 0.9320066334991708, - "grad_norm": 4.488296985626221, - "learning_rate": 3.4963047185901086e-06, - "loss": 4.9311, - "step": 16860 - }, - { - "epoch": 0.9322830292979547, - "grad_norm": 3.4120020866394043, - "learning_rate": 3.482092097782831e-06, - "loss": 4.8596, - "step": 16865 - }, - { - "epoch": 0.9325594250967385, - "grad_norm": 3.683335781097412, - "learning_rate": 3.4678794769755545e-06, - "loss": 4.5523, - "step": 16870 - }, - { - "epoch": 0.9328358208955224, - "grad_norm": 3.4997408390045166, - "learning_rate": 3.4536668561682776e-06, - "loss": 4.8484, - "step": 16875 - }, - { - "epoch": 0.9331122166943062, - "grad_norm": 4.172585964202881, - "learning_rate": 3.4394542353610003e-06, - "loss": 4.8826, - "step": 16880 - }, - { - "epoch": 0.9333886124930901, - "grad_norm": 3.2831268310546875, - "learning_rate": 3.425241614553724e-06, - "loss": 4.8996, - "step": 16885 - }, - { - "epoch": 0.9336650082918739, - "grad_norm": 4.524475574493408, - "learning_rate": 3.411028993746447e-06, - "loss": 5.0182, - "step": 16890 - }, - { - "epoch": 0.9339414040906578, - "grad_norm": 3.8012876510620117, - "learning_rate": 3.3968163729391706e-06, - "loss": 4.9137, - "step": 16895 - }, - { - "epoch": 0.9342177998894416, - "grad_norm": 4.124208450317383, - "learning_rate": 3.382603752131893e-06, - "loss": 4.7937, - "step": 16900 - }, - { - "epoch": 0.9344941956882256, - "grad_norm": 3.331831932067871, - "learning_rate": 3.3683911313246164e-06, - "loss": 4.7215, - "step": 16905 - }, - { - "epoch": 0.9347705914870094, - "grad_norm": 4.77909517288208, - "learning_rate": 3.3541785105173395e-06, - "loss": 4.811, - "step": 16910 - }, - { - "epoch": 0.9350469872857933, - "grad_norm": 4.471525192260742, - "learning_rate": 3.339965889710063e-06, - "loss": 4.704, - "step": 16915 - }, - { - "epoch": 0.9353233830845771, - "grad_norm": 3.200888156890869, - "learning_rate": 3.325753268902786e-06, - "loss": 4.5741, - "step": 16920 - }, - { - "epoch": 0.935599778883361, - "grad_norm": 3.5812439918518066, - "learning_rate": 3.311540648095509e-06, - "loss": 4.9324, - "step": 16925 - }, - { - "epoch": 0.9358761746821448, - "grad_norm": 4.601278305053711, - "learning_rate": 3.2973280272882325e-06, - "loss": 4.908, - "step": 16930 - }, - { - "epoch": 0.9361525704809287, - "grad_norm": 3.4622044563293457, - "learning_rate": 3.2831154064809548e-06, - "loss": 4.5155, - "step": 16935 - }, - { - "epoch": 0.9364289662797125, - "grad_norm": 4.049286842346191, - "learning_rate": 3.2689027856736783e-06, - "loss": 4.7219, - "step": 16940 - }, - { - "epoch": 0.9367053620784964, - "grad_norm": 4.182099342346191, - "learning_rate": 3.2546901648664015e-06, - "loss": 4.8228, - "step": 16945 - }, - { - "epoch": 0.9369817578772802, - "grad_norm": 3.8937876224517822, - "learning_rate": 3.240477544059125e-06, - "loss": 4.8272, - "step": 16950 - }, - { - "epoch": 0.9372581536760641, - "grad_norm": 4.102065086364746, - "learning_rate": 3.2262649232518477e-06, - "loss": 5.0221, - "step": 16955 - }, - { - "epoch": 0.9375345494748479, - "grad_norm": 3.8191025257110596, - "learning_rate": 3.212052302444571e-06, - "loss": 4.9027, - "step": 16960 - }, - { - "epoch": 0.9378109452736318, - "grad_norm": 3.0223190784454346, - "learning_rate": 3.1978396816372944e-06, - "loss": 4.7848, - "step": 16965 - }, - { - "epoch": 0.9380873410724156, - "grad_norm": 3.0020852088928223, - "learning_rate": 3.1836270608300175e-06, - "loss": 4.8542, - "step": 16970 - }, - { - "epoch": 0.9383637368711996, - "grad_norm": 3.872197389602661, - "learning_rate": 3.1694144400227403e-06, - "loss": 4.4044, - "step": 16975 - }, - { - "epoch": 0.9386401326699834, - "grad_norm": 4.418979167938232, - "learning_rate": 3.1552018192154634e-06, - "loss": 4.732, - "step": 16980 - }, - { - "epoch": 0.9389165284687673, - "grad_norm": 3.6312499046325684, - "learning_rate": 3.140989198408187e-06, - "loss": 5.0736, - "step": 16985 - }, - { - "epoch": 0.9391929242675512, - "grad_norm": 4.047171592712402, - "learning_rate": 3.1267765776009097e-06, - "loss": 4.8834, - "step": 16990 - }, - { - "epoch": 0.939469320066335, - "grad_norm": 3.8443045616149902, - "learning_rate": 3.1125639567936328e-06, - "loss": 5.0899, - "step": 16995 - }, - { - "epoch": 0.9397457158651189, - "grad_norm": 3.2416129112243652, - "learning_rate": 3.0983513359863563e-06, - "loss": 4.6055, - "step": 17000 - }, - { - "epoch": 0.9400221116639027, - "grad_norm": 4.050750255584717, - "learning_rate": 3.084138715179079e-06, - "loss": 4.6478, - "step": 17005 - }, - { - "epoch": 0.9402985074626866, - "grad_norm": 3.5441677570343018, - "learning_rate": 3.0699260943718026e-06, - "loss": 4.7012, - "step": 17010 - }, - { - "epoch": 0.9405749032614704, - "grad_norm": 3.7427573204040527, - "learning_rate": 3.0557134735645253e-06, - "loss": 4.8786, - "step": 17015 - }, - { - "epoch": 0.9408512990602543, - "grad_norm": 3.412027359008789, - "learning_rate": 3.041500852757249e-06, - "loss": 5.0255, - "step": 17020 - }, - { - "epoch": 0.9411276948590381, - "grad_norm": 4.9815497398376465, - "learning_rate": 3.0272882319499716e-06, - "loss": 4.6702, - "step": 17025 - }, - { - "epoch": 0.941404090657822, - "grad_norm": 4.5840044021606445, - "learning_rate": 3.0130756111426947e-06, - "loss": 4.7252, - "step": 17030 - }, - { - "epoch": 0.9416804864566058, - "grad_norm": 3.795989751815796, - "learning_rate": 2.998862990335418e-06, - "loss": 5.215, - "step": 17035 - }, - { - "epoch": 0.9419568822553898, - "grad_norm": 3.6358675956726074, - "learning_rate": 2.984650369528141e-06, - "loss": 4.6967, - "step": 17040 - }, - { - "epoch": 0.9422332780541736, - "grad_norm": 3.8705015182495117, - "learning_rate": 2.9704377487208645e-06, - "loss": 4.806, - "step": 17045 - }, - { - "epoch": 0.9425096738529575, - "grad_norm": 3.705080270767212, - "learning_rate": 2.9562251279135872e-06, - "loss": 4.7858, - "step": 17050 - }, - { - "epoch": 0.9427860696517413, - "grad_norm": 3.397587299346924, - "learning_rate": 2.942012507106311e-06, - "loss": 4.4487, - "step": 17055 - }, - { - "epoch": 0.9430624654505252, - "grad_norm": 4.504406929016113, - "learning_rate": 2.9277998862990335e-06, - "loss": 5.0349, - "step": 17060 - }, - { - "epoch": 0.943338861249309, - "grad_norm": 4.826185703277588, - "learning_rate": 2.913587265491757e-06, - "loss": 5.0815, - "step": 17065 - }, - { - "epoch": 0.9436152570480929, - "grad_norm": 3.7354743480682373, - "learning_rate": 2.8993746446844798e-06, - "loss": 4.5416, - "step": 17070 - }, - { - "epoch": 0.9438916528468767, - "grad_norm": 3.69808030128479, - "learning_rate": 2.885162023877203e-06, - "loss": 5.134, - "step": 17075 - }, - { - "epoch": 0.9441680486456606, - "grad_norm": 3.6593971252441406, - "learning_rate": 2.8709494030699265e-06, - "loss": 4.9477, - "step": 17080 - }, - { - "epoch": 0.9444444444444444, - "grad_norm": 3.8395798206329346, - "learning_rate": 2.856736782262649e-06, - "loss": 4.4886, - "step": 17085 - }, - { - "epoch": 0.9447208402432283, - "grad_norm": 4.302114009857178, - "learning_rate": 2.8425241614553727e-06, - "loss": 4.9254, - "step": 17090 - }, - { - "epoch": 0.9449972360420121, - "grad_norm": 3.074225664138794, - "learning_rate": 2.8283115406480954e-06, - "loss": 4.586, - "step": 17095 - }, - { - "epoch": 0.945273631840796, - "grad_norm": 4.352227687835693, - "learning_rate": 2.814098919840819e-06, - "loss": 4.9557, - "step": 17100 - }, - { - "epoch": 0.9455500276395798, - "grad_norm": 4.241294860839844, - "learning_rate": 2.7998862990335417e-06, - "loss": 5.0972, - "step": 17105 - }, - { - "epoch": 0.9458264234383638, - "grad_norm": 3.86039662361145, - "learning_rate": 2.7856736782262652e-06, - "loss": 4.9244, - "step": 17110 - }, - { - "epoch": 0.9461028192371476, - "grad_norm": 3.4481401443481445, - "learning_rate": 2.771461057418988e-06, - "loss": 4.6124, - "step": 17115 - }, - { - "epoch": 0.9463792150359315, - "grad_norm": 3.4227750301361084, - "learning_rate": 2.7572484366117115e-06, - "loss": 4.7664, - "step": 17120 - }, - { - "epoch": 0.9466556108347153, - "grad_norm": 4.316078186035156, - "learning_rate": 2.7430358158044346e-06, - "loss": 4.8977, - "step": 17125 - }, - { - "epoch": 0.9469320066334992, - "grad_norm": 4.136010646820068, - "learning_rate": 2.7288231949971574e-06, - "loss": 5.169, - "step": 17130 - }, - { - "epoch": 0.947208402432283, - "grad_norm": 3.5034444332122803, - "learning_rate": 2.714610574189881e-06, - "loss": 4.9702, - "step": 17135 - }, - { - "epoch": 0.9474847982310669, - "grad_norm": 3.8330748081207275, - "learning_rate": 2.7003979533826036e-06, - "loss": 5.0539, - "step": 17140 - }, - { - "epoch": 0.9477611940298507, - "grad_norm": 3.778444290161133, - "learning_rate": 2.686185332575327e-06, - "loss": 4.9856, - "step": 17145 - }, - { - "epoch": 0.9480375898286346, - "grad_norm": 4.075733184814453, - "learning_rate": 2.67197271176805e-06, - "loss": 4.4412, - "step": 17150 - }, - { - "epoch": 0.9483139856274184, - "grad_norm": 4.561306953430176, - "learning_rate": 2.6577600909607734e-06, - "loss": 4.5683, - "step": 17155 - }, - { - "epoch": 0.9485903814262023, - "grad_norm": 4.139663219451904, - "learning_rate": 2.6435474701534966e-06, - "loss": 5.1347, - "step": 17160 - }, - { - "epoch": 0.9488667772249861, - "grad_norm": 4.152304172515869, - "learning_rate": 2.6293348493462197e-06, - "loss": 4.851, - "step": 17165 - }, - { - "epoch": 0.94914317302377, - "grad_norm": 3.967447280883789, - "learning_rate": 2.615122228538943e-06, - "loss": 4.9876, - "step": 17170 - }, - { - "epoch": 0.9494195688225538, - "grad_norm": 3.313063144683838, - "learning_rate": 2.6009096077316655e-06, - "loss": 4.8804, - "step": 17175 - }, - { - "epoch": 0.9496959646213378, - "grad_norm": 3.359733819961548, - "learning_rate": 2.586696986924389e-06, - "loss": 4.673, - "step": 17180 - }, - { - "epoch": 0.9499723604201216, - "grad_norm": 3.648066520690918, - "learning_rate": 2.572484366117112e-06, - "loss": 4.9545, - "step": 17185 - }, - { - "epoch": 0.9502487562189055, - "grad_norm": 3.5169296264648438, - "learning_rate": 2.5582717453098354e-06, - "loss": 4.7484, - "step": 17190 - }, - { - "epoch": 0.9505251520176893, - "grad_norm": 3.443011522293091, - "learning_rate": 2.5440591245025585e-06, - "loss": 4.6217, - "step": 17195 - }, - { - "epoch": 0.9508015478164732, - "grad_norm": 3.1068191528320312, - "learning_rate": 2.5298465036952816e-06, - "loss": 4.6914, - "step": 17200 - }, - { - "epoch": 0.951077943615257, - "grad_norm": 3.6582255363464355, - "learning_rate": 2.5156338828880048e-06, - "loss": 4.7874, - "step": 17205 - }, - { - "epoch": 0.9513543394140409, - "grad_norm": 4.528609752655029, - "learning_rate": 2.501421262080728e-06, - "loss": 4.8206, - "step": 17210 - }, - { - "epoch": 0.9516307352128248, - "grad_norm": 4.048614025115967, - "learning_rate": 2.487208641273451e-06, - "loss": 5.1836, - "step": 17215 - }, - { - "epoch": 0.9519071310116086, - "grad_norm": 3.5472965240478516, - "learning_rate": 2.472996020466174e-06, - "loss": 4.8558, - "step": 17220 - }, - { - "epoch": 0.9521835268103925, - "grad_norm": 3.3577089309692383, - "learning_rate": 2.4587833996588973e-06, - "loss": 5.1221, - "step": 17225 - }, - { - "epoch": 0.9524599226091763, - "grad_norm": 3.5389177799224854, - "learning_rate": 2.44457077885162e-06, - "loss": 5.0266, - "step": 17230 - }, - { - "epoch": 0.9527363184079602, - "grad_norm": 4.607699871063232, - "learning_rate": 2.4303581580443436e-06, - "loss": 5.0358, - "step": 17235 - }, - { - "epoch": 0.953012714206744, - "grad_norm": 3.5849125385284424, - "learning_rate": 2.4161455372370667e-06, - "loss": 4.8641, - "step": 17240 - }, - { - "epoch": 0.953289110005528, - "grad_norm": 4.045363426208496, - "learning_rate": 2.40193291642979e-06, - "loss": 4.5956, - "step": 17245 - }, - { - "epoch": 0.9535655058043118, - "grad_norm": 3.181807041168213, - "learning_rate": 2.387720295622513e-06, - "loss": 4.868, - "step": 17250 - }, - { - "epoch": 0.9538419016030957, - "grad_norm": 3.7292158603668213, - "learning_rate": 2.373507674815236e-06, - "loss": 4.9381, - "step": 17255 - }, - { - "epoch": 0.9541182974018795, - "grad_norm": 3.4840378761291504, - "learning_rate": 2.3592950540079592e-06, - "loss": 4.6714, - "step": 17260 - }, - { - "epoch": 0.9543946932006634, - "grad_norm": 4.143215179443359, - "learning_rate": 2.3450824332006823e-06, - "loss": 4.5491, - "step": 17265 - }, - { - "epoch": 0.9546710889994472, - "grad_norm": 4.7000555992126465, - "learning_rate": 2.3308698123934055e-06, - "loss": 4.7206, - "step": 17270 - }, - { - "epoch": 0.9549474847982311, - "grad_norm": 3.5755834579467773, - "learning_rate": 2.3166571915861286e-06, - "loss": 4.6387, - "step": 17275 - }, - { - "epoch": 0.9552238805970149, - "grad_norm": 3.0749599933624268, - "learning_rate": 2.3024445707788517e-06, - "loss": 4.6379, - "step": 17280 - }, - { - "epoch": 0.9555002763957988, - "grad_norm": 4.241413593292236, - "learning_rate": 2.288231949971575e-06, - "loss": 4.7813, - "step": 17285 - }, - { - "epoch": 0.9557766721945826, - "grad_norm": 4.150501251220703, - "learning_rate": 2.274019329164298e-06, - "loss": 5.048, - "step": 17290 - }, - { - "epoch": 0.9560530679933665, - "grad_norm": 3.893385887145996, - "learning_rate": 2.259806708357021e-06, - "loss": 5.1972, - "step": 17295 - }, - { - "epoch": 0.9563294637921503, - "grad_norm": 3.0259926319122314, - "learning_rate": 2.2455940875497443e-06, - "loss": 4.9307, - "step": 17300 - }, - { - "epoch": 0.9566058595909342, - "grad_norm": 4.032433986663818, - "learning_rate": 2.2313814667424674e-06, - "loss": 4.8233, - "step": 17305 - }, - { - "epoch": 0.956882255389718, - "grad_norm": 3.581953525543213, - "learning_rate": 2.2171688459351905e-06, - "loss": 4.994, - "step": 17310 - }, - { - "epoch": 0.957158651188502, - "grad_norm": 3.022449493408203, - "learning_rate": 2.2029562251279137e-06, - "loss": 4.8858, - "step": 17315 - }, - { - "epoch": 0.9574350469872858, - "grad_norm": 3.664137125015259, - "learning_rate": 2.188743604320637e-06, - "loss": 4.8783, - "step": 17320 - }, - { - "epoch": 0.9577114427860697, - "grad_norm": 3.227354049682617, - "learning_rate": 2.17453098351336e-06, - "loss": 4.923, - "step": 17325 - }, - { - "epoch": 0.9579878385848535, - "grad_norm": 3.1704261302948, - "learning_rate": 2.160318362706083e-06, - "loss": 4.8075, - "step": 17330 - }, - { - "epoch": 0.9582642343836374, - "grad_norm": 3.537228584289551, - "learning_rate": 2.146105741898806e-06, - "loss": 4.7424, - "step": 17335 - }, - { - "epoch": 0.9585406301824212, - "grad_norm": 4.334897518157959, - "learning_rate": 2.1318931210915293e-06, - "loss": 4.9451, - "step": 17340 - }, - { - "epoch": 0.9588170259812051, - "grad_norm": 3.4837393760681152, - "learning_rate": 2.1176805002842525e-06, - "loss": 4.8, - "step": 17345 - }, - { - "epoch": 0.9590934217799889, - "grad_norm": 3.615074396133423, - "learning_rate": 2.1034678794769756e-06, - "loss": 4.9119, - "step": 17350 - }, - { - "epoch": 0.9593698175787728, - "grad_norm": 3.716219425201416, - "learning_rate": 2.0892552586696987e-06, - "loss": 5.1262, - "step": 17355 - }, - { - "epoch": 0.9596462133775566, - "grad_norm": 3.8585875034332275, - "learning_rate": 2.075042637862422e-06, - "loss": 4.9943, - "step": 17360 - }, - { - "epoch": 0.9599226091763405, - "grad_norm": 3.1968586444854736, - "learning_rate": 2.060830017055145e-06, - "loss": 4.8486, - "step": 17365 - }, - { - "epoch": 0.9601990049751243, - "grad_norm": 5.157547950744629, - "learning_rate": 2.046617396247868e-06, - "loss": 5.1108, - "step": 17370 - }, - { - "epoch": 0.9604754007739082, - "grad_norm": 4.623779296875, - "learning_rate": 2.0324047754405917e-06, - "loss": 4.89, - "step": 17375 - }, - { - "epoch": 0.960751796572692, - "grad_norm": 3.5606627464294434, - "learning_rate": 2.0181921546333144e-06, - "loss": 4.8832, - "step": 17380 - }, - { - "epoch": 0.961028192371476, - "grad_norm": 3.792771577835083, - "learning_rate": 2.0039795338260375e-06, - "loss": 4.8013, - "step": 17385 - }, - { - "epoch": 0.9613045881702598, - "grad_norm": 3.4608917236328125, - "learning_rate": 1.9897669130187606e-06, - "loss": 4.888, - "step": 17390 - }, - { - "epoch": 0.9615809839690437, - "grad_norm": 4.744699001312256, - "learning_rate": 1.9755542922114838e-06, - "loss": 5.3272, - "step": 17395 - }, - { - "epoch": 0.9618573797678275, - "grad_norm": 3.0279312133789062, - "learning_rate": 1.961341671404207e-06, - "loss": 4.6455, - "step": 17400 - }, - { - "epoch": 0.9621337755666114, - "grad_norm": 3.420703649520874, - "learning_rate": 1.94712905059693e-06, - "loss": 4.8824, - "step": 17405 - }, - { - "epoch": 0.9624101713653952, - "grad_norm": 3.330737590789795, - "learning_rate": 1.9329164297896536e-06, - "loss": 4.7754, - "step": 17410 - }, - { - "epoch": 0.9626865671641791, - "grad_norm": 3.5434346199035645, - "learning_rate": 1.9187038089823763e-06, - "loss": 4.6785, - "step": 17415 - }, - { - "epoch": 0.9629629629629629, - "grad_norm": 4.19118595123291, - "learning_rate": 1.9044911881750997e-06, - "loss": 4.8247, - "step": 17420 - }, - { - "epoch": 0.9632393587617468, - "grad_norm": 4.7213454246521, - "learning_rate": 1.8902785673678228e-06, - "loss": 4.8312, - "step": 17425 - }, - { - "epoch": 0.9635157545605307, - "grad_norm": 3.6587038040161133, - "learning_rate": 1.8760659465605457e-06, - "loss": 5.1918, - "step": 17430 - }, - { - "epoch": 0.9637921503593145, - "grad_norm": 3.242537021636963, - "learning_rate": 1.861853325753269e-06, - "loss": 4.9542, - "step": 17435 - }, - { - "epoch": 0.9640685461580984, - "grad_norm": 3.7462520599365234, - "learning_rate": 1.847640704945992e-06, - "loss": 4.9511, - "step": 17440 - }, - { - "epoch": 0.9643449419568823, - "grad_norm": 3.6050407886505127, - "learning_rate": 1.8334280841387153e-06, - "loss": 4.8826, - "step": 17445 - }, - { - "epoch": 0.9646213377556662, - "grad_norm": 3.979299783706665, - "learning_rate": 1.8192154633314382e-06, - "loss": 4.879, - "step": 17450 - }, - { - "epoch": 0.96489773355445, - "grad_norm": 4.422558784484863, - "learning_rate": 1.8050028425241616e-06, - "loss": 5.2462, - "step": 17455 - }, - { - "epoch": 0.9651741293532339, - "grad_norm": 3.984792947769165, - "learning_rate": 1.7907902217168847e-06, - "loss": 4.9413, - "step": 17460 - }, - { - "epoch": 0.9654505251520177, - "grad_norm": 3.3358352184295654, - "learning_rate": 1.7765776009096078e-06, - "loss": 4.8726, - "step": 17465 - }, - { - "epoch": 0.9657269209508016, - "grad_norm": 3.0157341957092285, - "learning_rate": 1.762364980102331e-06, - "loss": 4.9832, - "step": 17470 - }, - { - "epoch": 0.9660033167495854, - "grad_norm": 3.5615592002868652, - "learning_rate": 1.7481523592950543e-06, - "loss": 4.6077, - "step": 17475 - }, - { - "epoch": 0.9662797125483693, - "grad_norm": 3.1383845806121826, - "learning_rate": 1.7339397384877772e-06, - "loss": 4.7842, - "step": 17480 - }, - { - "epoch": 0.9665561083471531, - "grad_norm": 4.033356189727783, - "learning_rate": 1.7197271176805002e-06, - "loss": 4.9063, - "step": 17485 - }, - { - "epoch": 0.966832504145937, - "grad_norm": 3.651676654815674, - "learning_rate": 1.7055144968732235e-06, - "loss": 4.7036, - "step": 17490 - }, - { - "epoch": 0.9671088999447208, - "grad_norm": 3.519984245300293, - "learning_rate": 1.6913018760659464e-06, - "loss": 4.9829, - "step": 17495 - }, - { - "epoch": 0.9673852957435047, - "grad_norm": 2.9878344535827637, - "learning_rate": 1.6770892552586698e-06, - "loss": 4.6443, - "step": 17500 - }, - { - "epoch": 0.9676616915422885, - "grad_norm": 3.7152388095855713, - "learning_rate": 1.662876634451393e-06, - "loss": 4.8157, - "step": 17505 - }, - { - "epoch": 0.9679380873410725, - "grad_norm": 3.1234991550445557, - "learning_rate": 1.6486640136441162e-06, - "loss": 5.0816, - "step": 17510 - }, - { - "epoch": 0.9682144831398563, - "grad_norm": 3.695985794067383, - "learning_rate": 1.6344513928368392e-06, - "loss": 5.1458, - "step": 17515 - }, - { - "epoch": 0.9684908789386402, - "grad_norm": 3.487590789794922, - "learning_rate": 1.6202387720295625e-06, - "loss": 4.5166, - "step": 17520 - }, - { - "epoch": 0.968767274737424, - "grad_norm": 3.8516764640808105, - "learning_rate": 1.6060261512222854e-06, - "loss": 5.0152, - "step": 17525 - }, - { - "epoch": 0.9690436705362079, - "grad_norm": 3.8771095275878906, - "learning_rate": 1.5918135304150088e-06, - "loss": 4.7823, - "step": 17530 - }, - { - "epoch": 0.9693200663349917, - "grad_norm": 4.3737287521362305, - "learning_rate": 1.5776009096077317e-06, - "loss": 4.905, - "step": 17535 - }, - { - "epoch": 0.9695964621337756, - "grad_norm": 4.190290451049805, - "learning_rate": 1.5633882888004548e-06, - "loss": 4.7303, - "step": 17540 - }, - { - "epoch": 0.9698728579325594, - "grad_norm": 4.203909397125244, - "learning_rate": 1.5491756679931782e-06, - "loss": 4.6031, - "step": 17545 - }, - { - "epoch": 0.9701492537313433, - "grad_norm": 3.6529483795166016, - "learning_rate": 1.5349630471859013e-06, - "loss": 4.9744, - "step": 17550 - }, - { - "epoch": 0.9704256495301271, - "grad_norm": 3.739197015762329, - "learning_rate": 1.5207504263786244e-06, - "loss": 4.8951, - "step": 17555 - }, - { - "epoch": 0.970702045328911, - "grad_norm": 4.100447177886963, - "learning_rate": 1.5065378055713474e-06, - "loss": 5.0774, - "step": 17560 - }, - { - "epoch": 0.9709784411276948, - "grad_norm": 4.293310165405273, - "learning_rate": 1.4923251847640705e-06, - "loss": 4.7931, - "step": 17565 - }, - { - "epoch": 0.9712548369264787, - "grad_norm": 3.8114206790924072, - "learning_rate": 1.4781125639567936e-06, - "loss": 4.6042, - "step": 17570 - }, - { - "epoch": 0.9715312327252625, - "grad_norm": 3.7172446250915527, - "learning_rate": 1.4638999431495168e-06, - "loss": 4.7856, - "step": 17575 - }, - { - "epoch": 0.9718076285240465, - "grad_norm": 3.316227912902832, - "learning_rate": 1.4496873223422399e-06, - "loss": 4.8669, - "step": 17580 - }, - { - "epoch": 0.9720840243228303, - "grad_norm": 4.058900833129883, - "learning_rate": 1.4354747015349632e-06, - "loss": 4.5917, - "step": 17585 - }, - { - "epoch": 0.9723604201216142, - "grad_norm": 3.633906841278076, - "learning_rate": 1.4212620807276864e-06, - "loss": 4.7329, - "step": 17590 - }, - { - "epoch": 0.972636815920398, - "grad_norm": 4.460212230682373, - "learning_rate": 1.4070494599204095e-06, - "loss": 4.7991, - "step": 17595 - }, - { - "epoch": 0.9729132117191819, - "grad_norm": 3.0964694023132324, - "learning_rate": 1.3928368391131326e-06, - "loss": 4.6162, - "step": 17600 - }, - { - "epoch": 0.9731896075179657, - "grad_norm": 3.333630323410034, - "learning_rate": 1.3786242183058558e-06, - "loss": 4.5838, - "step": 17605 - }, - { - "epoch": 0.9734660033167496, - "grad_norm": 3.469555139541626, - "learning_rate": 1.3644115974985787e-06, - "loss": 5.2188, - "step": 17610 - }, - { - "epoch": 0.9737423991155334, - "grad_norm": 3.8169009685516357, - "learning_rate": 1.3501989766913018e-06, - "loss": 5.018, - "step": 17615 - }, - { - "epoch": 0.9740187949143173, - "grad_norm": 4.118598461151123, - "learning_rate": 1.335986355884025e-06, - "loss": 5.0628, - "step": 17620 - }, - { - "epoch": 0.9742951907131011, - "grad_norm": 3.506039619445801, - "learning_rate": 1.3217737350767483e-06, - "loss": 4.6609, - "step": 17625 - }, - { - "epoch": 0.974571586511885, - "grad_norm": 3.59202241897583, - "learning_rate": 1.3075611142694714e-06, - "loss": 4.9948, - "step": 17630 - }, - { - "epoch": 0.9748479823106688, - "grad_norm": 3.3904547691345215, - "learning_rate": 1.2933484934621945e-06, - "loss": 5.0171, - "step": 17635 - }, - { - "epoch": 0.9751243781094527, - "grad_norm": 3.5725204944610596, - "learning_rate": 1.2791358726549177e-06, - "loss": 4.719, - "step": 17640 - }, - { - "epoch": 0.9754007739082365, - "grad_norm": 4.067680835723877, - "learning_rate": 1.2649232518476408e-06, - "loss": 5.1472, - "step": 17645 - }, - { - "epoch": 0.9756771697070205, - "grad_norm": 3.5419161319732666, - "learning_rate": 1.250710631040364e-06, - "loss": 4.9008, - "step": 17650 - }, - { - "epoch": 0.9759535655058044, - "grad_norm": 4.060971260070801, - "learning_rate": 1.236498010233087e-06, - "loss": 4.8635, - "step": 17655 - }, - { - "epoch": 0.9762299613045882, - "grad_norm": 3.4809699058532715, - "learning_rate": 1.22228538942581e-06, - "loss": 4.7176, - "step": 17660 - }, - { - "epoch": 0.9765063571033721, - "grad_norm": 3.121370792388916, - "learning_rate": 1.2080727686185333e-06, - "loss": 4.7872, - "step": 17665 - }, - { - "epoch": 0.9767827529021559, - "grad_norm": 3.8386788368225098, - "learning_rate": 1.1938601478112565e-06, - "loss": 4.6917, - "step": 17670 - }, - { - "epoch": 0.9770591487009398, - "grad_norm": 3.4671096801757812, - "learning_rate": 1.1796475270039796e-06, - "loss": 5.0222, - "step": 17675 - }, - { - "epoch": 0.9773355444997236, - "grad_norm": 4.064565658569336, - "learning_rate": 1.1654349061967027e-06, - "loss": 4.9298, - "step": 17680 - }, - { - "epoch": 0.9776119402985075, - "grad_norm": 3.155139923095703, - "learning_rate": 1.1512222853894259e-06, - "loss": 4.7341, - "step": 17685 - }, - { - "epoch": 0.9778883360972913, - "grad_norm": 3.358921527862549, - "learning_rate": 1.137009664582149e-06, - "loss": 4.8701, - "step": 17690 - }, - { - "epoch": 0.9781647318960752, - "grad_norm": 3.9727041721343994, - "learning_rate": 1.1227970437748721e-06, - "loss": 4.9399, - "step": 17695 - }, - { - "epoch": 0.978441127694859, - "grad_norm": 3.551064968109131, - "learning_rate": 1.1085844229675953e-06, - "loss": 4.4855, - "step": 17700 - }, - { - "epoch": 0.9787175234936429, - "grad_norm": 3.285154104232788, - "learning_rate": 1.0943718021603184e-06, - "loss": 4.6904, - "step": 17705 - }, - { - "epoch": 0.9789939192924267, - "grad_norm": 3.77357816696167, - "learning_rate": 1.0801591813530415e-06, - "loss": 4.7048, - "step": 17710 - }, - { - "epoch": 0.9792703150912107, - "grad_norm": 3.7652862071990967, - "learning_rate": 1.0659465605457647e-06, - "loss": 4.8469, - "step": 17715 - }, - { - "epoch": 0.9795467108899945, - "grad_norm": 4.785614013671875, - "learning_rate": 1.0517339397384878e-06, - "loss": 4.6331, - "step": 17720 - }, - { - "epoch": 0.9798231066887784, - "grad_norm": 3.4421780109405518, - "learning_rate": 1.037521318931211e-06, - "loss": 4.8027, - "step": 17725 - }, - { - "epoch": 0.9800995024875622, - "grad_norm": 3.414224863052368, - "learning_rate": 1.023308698123934e-06, - "loss": 4.6746, - "step": 17730 - }, - { - "epoch": 0.9803758982863461, - "grad_norm": 3.6963014602661133, - "learning_rate": 1.0090960773166572e-06, - "loss": 4.9759, - "step": 17735 - }, - { - "epoch": 0.9806522940851299, - "grad_norm": 4.396233558654785, - "learning_rate": 9.948834565093803e-07, - "loss": 4.9611, - "step": 17740 - }, - { - "epoch": 0.9809286898839138, - "grad_norm": 3.8200385570526123, - "learning_rate": 9.806708357021035e-07, - "loss": 4.7393, - "step": 17745 - }, - { - "epoch": 0.9812050856826976, - "grad_norm": 3.469111680984497, - "learning_rate": 9.664582148948268e-07, - "loss": 4.9948, - "step": 17750 - }, - { - "epoch": 0.9814814814814815, - "grad_norm": 3.5124690532684326, - "learning_rate": 9.522455940875498e-07, - "loss": 4.9789, - "step": 17755 - }, - { - "epoch": 0.9817578772802653, - "grad_norm": 3.9192700386047363, - "learning_rate": 9.380329732802729e-07, - "loss": 4.7527, - "step": 17760 - }, - { - "epoch": 0.9820342730790492, - "grad_norm": 3.603703260421753, - "learning_rate": 9.23820352472996e-07, - "loss": 4.8427, - "step": 17765 - }, - { - "epoch": 0.982310668877833, - "grad_norm": 3.82257342338562, - "learning_rate": 9.096077316657191e-07, - "loss": 4.6548, - "step": 17770 - }, - { - "epoch": 0.9825870646766169, - "grad_norm": 3.6828839778900146, - "learning_rate": 8.953951108584424e-07, - "loss": 4.7383, - "step": 17775 - }, - { - "epoch": 0.9828634604754007, - "grad_norm": 4.09072732925415, - "learning_rate": 8.811824900511655e-07, - "loss": 4.6382, - "step": 17780 - }, - { - "epoch": 0.9831398562741847, - "grad_norm": 3.557931900024414, - "learning_rate": 8.669698692438886e-07, - "loss": 4.7966, - "step": 17785 - }, - { - "epoch": 0.9834162520729685, - "grad_norm": 3.4907984733581543, - "learning_rate": 8.527572484366118e-07, - "loss": 5.0257, - "step": 17790 - }, - { - "epoch": 0.9836926478717524, - "grad_norm": 4.357024192810059, - "learning_rate": 8.385446276293349e-07, - "loss": 5.2176, - "step": 17795 - }, - { - "epoch": 0.9839690436705362, - "grad_norm": 4.168322563171387, - "learning_rate": 8.243320068220581e-07, - "loss": 4.9219, - "step": 17800 - }, - { - "epoch": 0.9842454394693201, - "grad_norm": 4.186100006103516, - "learning_rate": 8.101193860147813e-07, - "loss": 5.1336, - "step": 17805 - }, - { - "epoch": 0.9845218352681039, - "grad_norm": 4.123225212097168, - "learning_rate": 7.959067652075044e-07, - "loss": 4.7761, - "step": 17810 - }, - { - "epoch": 0.9847982310668878, - "grad_norm": 4.013276100158691, - "learning_rate": 7.816941444002274e-07, - "loss": 4.8168, - "step": 17815 - }, - { - "epoch": 0.9850746268656716, - "grad_norm": 3.063643217086792, - "learning_rate": 7.674815235929507e-07, - "loss": 4.8048, - "step": 17820 - }, - { - "epoch": 0.9853510226644555, - "grad_norm": 4.011346817016602, - "learning_rate": 7.532689027856737e-07, - "loss": 4.3031, - "step": 17825 - }, - { - "epoch": 0.9856274184632393, - "grad_norm": 4.323795318603516, - "learning_rate": 7.390562819783968e-07, - "loss": 4.7946, - "step": 17830 - }, - { - "epoch": 0.9859038142620232, - "grad_norm": 4.777589321136475, - "learning_rate": 7.248436611711199e-07, - "loss": 4.6589, - "step": 17835 - }, - { - "epoch": 0.986180210060807, - "grad_norm": 4.321505546569824, - "learning_rate": 7.106310403638432e-07, - "loss": 4.9267, - "step": 17840 - }, - { - "epoch": 0.9864566058595909, - "grad_norm": 3.9251480102539062, - "learning_rate": 6.964184195565663e-07, - "loss": 4.6038, - "step": 17845 - }, - { - "epoch": 0.9867330016583747, - "grad_norm": 3.0840792655944824, - "learning_rate": 6.822057987492893e-07, - "loss": 4.5904, - "step": 17850 - }, - { - "epoch": 0.9870093974571587, - "grad_norm": 4.160499572753906, - "learning_rate": 6.679931779420125e-07, - "loss": 4.8037, - "step": 17855 - }, - { - "epoch": 0.9872857932559425, - "grad_norm": 4.162237167358398, - "learning_rate": 6.537805571347357e-07, - "loss": 5.0783, - "step": 17860 - }, - { - "epoch": 0.9875621890547264, - "grad_norm": 3.218111515045166, - "learning_rate": 6.395679363274588e-07, - "loss": 4.8852, - "step": 17865 - }, - { - "epoch": 0.9878385848535103, - "grad_norm": 3.7877068519592285, - "learning_rate": 6.25355315520182e-07, - "loss": 5.0094, - "step": 17870 - }, - { - "epoch": 0.9881149806522941, - "grad_norm": 3.7480013370513916, - "learning_rate": 6.11142694712905e-07, - "loss": 4.9167, - "step": 17875 - }, - { - "epoch": 0.988391376451078, - "grad_norm": 3.9518656730651855, - "learning_rate": 5.969300739056282e-07, - "loss": 4.9065, - "step": 17880 - }, - { - "epoch": 0.9886677722498618, - "grad_norm": 4.303936958312988, - "learning_rate": 5.827174530983514e-07, - "loss": 5.0365, - "step": 17885 - }, - { - "epoch": 0.9889441680486457, - "grad_norm": 3.944624423980713, - "learning_rate": 5.685048322910745e-07, - "loss": 4.9906, - "step": 17890 - }, - { - "epoch": 0.9892205638474295, - "grad_norm": 3.4610772132873535, - "learning_rate": 5.542922114837976e-07, - "loss": 4.6898, - "step": 17895 - }, - { - "epoch": 0.9894969596462134, - "grad_norm": 3.5662105083465576, - "learning_rate": 5.400795906765208e-07, - "loss": 5.1878, - "step": 17900 - }, - { - "epoch": 0.9897733554449972, - "grad_norm": 3.8425076007843018, - "learning_rate": 5.258669698692439e-07, - "loss": 4.482, - "step": 17905 - }, - { - "epoch": 0.9900497512437811, - "grad_norm": 3.6111109256744385, - "learning_rate": 5.11654349061967e-07, - "loss": 4.8749, - "step": 17910 - }, - { - "epoch": 0.9903261470425649, - "grad_norm": 3.664844274520874, - "learning_rate": 4.974417282546902e-07, - "loss": 5.1134, - "step": 17915 - }, - { - "epoch": 0.9906025428413489, - "grad_norm": 3.341689109802246, - "learning_rate": 4.832291074474134e-07, - "loss": 5.1036, - "step": 17920 - }, - { - "epoch": 0.9908789386401327, - "grad_norm": 3.0677828788757324, - "learning_rate": 4.6901648664013643e-07, - "loss": 5.0625, - "step": 17925 - }, - { - "epoch": 0.9911553344389166, - "grad_norm": 4.164408206939697, - "learning_rate": 4.5480386583285956e-07, - "loss": 4.8023, - "step": 17930 - }, - { - "epoch": 0.9914317302377004, - "grad_norm": 3.8659586906433105, - "learning_rate": 4.4059124502558274e-07, - "loss": 4.9043, - "step": 17935 - }, - { - "epoch": 0.9917081260364843, - "grad_norm": 4.336862087249756, - "learning_rate": 4.263786242183059e-07, - "loss": 5.1321, - "step": 17940 - }, - { - "epoch": 0.9919845218352681, - "grad_norm": 3.780893325805664, - "learning_rate": 4.1216600341102906e-07, - "loss": 5.1098, - "step": 17945 - }, - { - "epoch": 0.992260917634052, - "grad_norm": 3.7630019187927246, - "learning_rate": 3.979533826037522e-07, - "loss": 4.7298, - "step": 17950 - }, - { - "epoch": 0.9925373134328358, - "grad_norm": 4.459870338439941, - "learning_rate": 3.837407617964753e-07, - "loss": 5.0754, - "step": 17955 - }, - { - "epoch": 0.9928137092316197, - "grad_norm": 3.592924118041992, - "learning_rate": 3.695281409891984e-07, - "loss": 4.9391, - "step": 17960 - }, - { - "epoch": 0.9930901050304035, - "grad_norm": 3.273549795150757, - "learning_rate": 3.553155201819216e-07, - "loss": 4.6995, - "step": 17965 - }, - { - "epoch": 0.9933665008291874, - "grad_norm": 3.976041555404663, - "learning_rate": 3.4110289937464467e-07, - "loss": 5.0032, - "step": 17970 - }, - { - "epoch": 0.9936428966279712, - "grad_norm": 3.7148942947387695, - "learning_rate": 3.2689027856736785e-07, - "loss": 4.9053, - "step": 17975 - }, - { - "epoch": 0.9939192924267551, - "grad_norm": 4.372527122497559, - "learning_rate": 3.12677657760091e-07, - "loss": 4.6127, - "step": 17980 - }, - { - "epoch": 0.9941956882255389, - "grad_norm": 4.019134521484375, - "learning_rate": 2.984650369528141e-07, - "loss": 4.9816, - "step": 17985 - }, - { - "epoch": 0.9944720840243229, - "grad_norm": 4.684123992919922, - "learning_rate": 2.8425241614553725e-07, - "loss": 4.9317, - "step": 17990 - }, - { - "epoch": 0.9947484798231067, - "grad_norm": 4.50031852722168, - "learning_rate": 2.700397953382604e-07, - "loss": 4.8163, - "step": 17995 - }, - { - "epoch": 0.9950248756218906, - "grad_norm": 3.7687060832977295, - "learning_rate": 2.558271745309835e-07, - "loss": 4.8997, - "step": 18000 - }, - { - "epoch": 0.9953012714206744, - "grad_norm": 5.449435234069824, - "learning_rate": 2.416145537237067e-07, - "loss": 4.8225, - "step": 18005 - }, - { - "epoch": 0.9955776672194583, - "grad_norm": 3.540816068649292, - "learning_rate": 2.2740193291642978e-07, - "loss": 4.8916, - "step": 18010 - }, - { - "epoch": 0.9958540630182421, - "grad_norm": 3.5293331146240234, - "learning_rate": 2.1318931210915294e-07, - "loss": 4.8202, - "step": 18015 - }, - { - "epoch": 0.996130458817026, - "grad_norm": 4.026026248931885, - "learning_rate": 1.989766913018761e-07, - "loss": 5.0024, - "step": 18020 - }, - { - "epoch": 0.9964068546158098, - "grad_norm": 4.405704021453857, - "learning_rate": 1.847640704945992e-07, - "loss": 4.9342, - "step": 18025 - }, - { - "epoch": 0.9966832504145937, - "grad_norm": 3.3129539489746094, - "learning_rate": 1.7055144968732233e-07, - "loss": 4.8739, - "step": 18030 - }, - { - "epoch": 0.9969596462133775, - "grad_norm": 4.042524814605713, - "learning_rate": 1.563388288800455e-07, - "loss": 4.9343, - "step": 18035 - }, - { - "epoch": 0.9972360420121614, - "grad_norm": 3.105717658996582, - "learning_rate": 1.4212620807276863e-07, - "loss": 4.463, - "step": 18040 - }, - { - "epoch": 0.9975124378109452, - "grad_norm": 3.765864133834839, - "learning_rate": 1.2791358726549176e-07, - "loss": 4.9118, - "step": 18045 - }, - { - "epoch": 0.9977888336097291, - "grad_norm": 4.456231594085693, - "learning_rate": 1.1370096645821489e-07, - "loss": 4.8341, - "step": 18050 - }, - { - "epoch": 0.9980652294085129, - "grad_norm": 3.701779365539551, - "learning_rate": 9.948834565093805e-08, - "loss": 4.9091, - "step": 18055 - }, - { - "epoch": 0.9983416252072969, - "grad_norm": 3.981372833251953, - "learning_rate": 8.527572484366117e-08, - "loss": 4.9035, - "step": 18060 - }, - { - "epoch": 0.9986180210060807, - "grad_norm": 4.189070224761963, - "learning_rate": 7.106310403638431e-08, - "loss": 4.8013, - "step": 18065 - }, - { - "epoch": 0.9988944168048646, - "grad_norm": 3.8687007427215576, - "learning_rate": 5.6850483229107445e-08, - "loss": 4.7042, - "step": 18070 - }, - { - "epoch": 0.9991708126036484, - "grad_norm": 4.483328342437744, - "learning_rate": 4.2637862421830584e-08, - "loss": 4.6875, - "step": 18075 - }, - { - "epoch": 0.9994472084024323, - "grad_norm": 4.180428981781006, - "learning_rate": 2.8425241614553722e-08, - "loss": 4.816, - "step": 18080 - }, - { - "epoch": 0.9997236042012161, - "grad_norm": 3.824141025543213, - "learning_rate": 1.4212620807276861e-08, - "loss": 4.8663, - "step": 18085 - }, - { - "epoch": 1.0, - "grad_norm": 3.4549672603607178, - "learning_rate": 0.0, - "loss": 4.3516, - "step": 18090 } ], "logging_steps": 5, - "max_steps": 18090, + "max_steps": 4523, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, @@ -25352,8 +6354,8 @@ "attributes": {} } }, - "total_flos": 1.6815556451321856e+16, - "train_batch_size": 16, + "total_flos": 1.7364382421434368e+16, + "train_batch_size": 64, "trial_name": null, "trial_params": null }