{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 9505, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005260389268805891, "grad_norm": 4.7434234619140625, "learning_rate": 5.0000000000000004e-08, "loss": 1.7896, "step": 1 }, { "epoch": 0.0010520778537611783, "grad_norm": 4.893940448760986, "learning_rate": 1.0000000000000001e-07, "loss": 1.8423, "step": 2 }, { "epoch": 0.0015781167806417674, "grad_norm": 5.008203029632568, "learning_rate": 1.5000000000000002e-07, "loss": 1.7775, "step": 3 }, { "epoch": 0.0021041557075223566, "grad_norm": 4.682094097137451, "learning_rate": 2.0000000000000002e-07, "loss": 1.7132, "step": 4 }, { "epoch": 0.0026301946344029457, "grad_norm": 5.076476097106934, "learning_rate": 2.5000000000000004e-07, "loss": 1.7946, "step": 5 }, { "epoch": 0.003156233561283535, "grad_norm": 5.164911270141602, "learning_rate": 3.0000000000000004e-07, "loss": 1.7562, "step": 6 }, { "epoch": 0.003682272488164124, "grad_norm": 5.532482624053955, "learning_rate": 3.5000000000000004e-07, "loss": 1.9173, "step": 7 }, { "epoch": 0.004208311415044713, "grad_norm": 4.994466304779053, "learning_rate": 4.0000000000000003e-07, "loss": 1.8048, "step": 8 }, { "epoch": 0.004734350341925302, "grad_norm": 4.728099822998047, "learning_rate": 4.5000000000000003e-07, "loss": 1.8313, "step": 9 }, { "epoch": 0.0052603892688058915, "grad_norm": 4.757445335388184, "learning_rate": 5.000000000000001e-07, "loss": 1.7745, "step": 10 }, { "epoch": 0.005786428195686481, "grad_norm": 4.926065444946289, "learning_rate": 5.5e-07, "loss": 1.8448, "step": 11 }, { "epoch": 0.00631246712256707, "grad_norm": 4.987133979797363, "learning_rate": 6.000000000000001e-07, "loss": 1.7755, "step": 12 }, { "epoch": 0.006838506049447659, "grad_norm": 4.783141613006592, "learning_rate": 6.5e-07, "loss": 1.7815, "step": 13 }, { "epoch": 0.007364544976328248, "grad_norm": 4.668217182159424, "learning_rate": 7.000000000000001e-07, "loss": 1.754, "step": 14 }, { "epoch": 0.007890583903208837, "grad_norm": 4.673665523529053, "learning_rate": 7.5e-07, "loss": 1.7608, "step": 15 }, { "epoch": 0.008416622830089426, "grad_norm": 4.452486991882324, "learning_rate": 8.000000000000001e-07, "loss": 1.7222, "step": 16 }, { "epoch": 0.008942661756970016, "grad_norm": 4.257665157318115, "learning_rate": 8.500000000000001e-07, "loss": 1.7556, "step": 17 }, { "epoch": 0.009468700683850605, "grad_norm": 4.1270432472229, "learning_rate": 9.000000000000001e-07, "loss": 1.7121, "step": 18 }, { "epoch": 0.009994739610731194, "grad_norm": 4.321215629577637, "learning_rate": 9.500000000000001e-07, "loss": 1.7584, "step": 19 }, { "epoch": 0.010520778537611783, "grad_norm": 3.8703970909118652, "learning_rate": 1.0000000000000002e-06, "loss": 1.6611, "step": 20 }, { "epoch": 0.011046817464492372, "grad_norm": 4.07947301864624, "learning_rate": 1.0500000000000001e-06, "loss": 1.7914, "step": 21 }, { "epoch": 0.011572856391372961, "grad_norm": 3.9068686962127686, "learning_rate": 1.1e-06, "loss": 1.7848, "step": 22 }, { "epoch": 0.01209889531825355, "grad_norm": 3.7697386741638184, "learning_rate": 1.1500000000000002e-06, "loss": 1.6694, "step": 23 }, { "epoch": 0.01262493424513414, "grad_norm": 3.795276641845703, "learning_rate": 1.2000000000000002e-06, "loss": 1.759, "step": 24 }, { "epoch": 0.013150973172014729, "grad_norm": 3.331472396850586, "learning_rate": 1.25e-06, "loss": 1.7053, "step": 25 }, { "epoch": 0.013677012098895318, "grad_norm": 3.381592035293579, "learning_rate": 1.3e-06, "loss": 1.683, "step": 26 }, { "epoch": 0.014203051025775907, "grad_norm": 3.2494184970855713, "learning_rate": 1.3500000000000002e-06, "loss": 1.5756, "step": 27 }, { "epoch": 0.014729089952656496, "grad_norm": 3.124213695526123, "learning_rate": 1.4000000000000001e-06, "loss": 1.7102, "step": 28 }, { "epoch": 0.015255128879537085, "grad_norm": 2.9148762226104736, "learning_rate": 1.45e-06, "loss": 1.6007, "step": 29 }, { "epoch": 0.015781167806417674, "grad_norm": 2.886734962463379, "learning_rate": 1.5e-06, "loss": 1.7086, "step": 30 }, { "epoch": 0.016307206733298264, "grad_norm": 2.6898605823516846, "learning_rate": 1.5500000000000002e-06, "loss": 1.5655, "step": 31 }, { "epoch": 0.016833245660178853, "grad_norm": 2.6458981037139893, "learning_rate": 1.6000000000000001e-06, "loss": 1.4881, "step": 32 }, { "epoch": 0.017359284587059442, "grad_norm": 2.481387138366699, "learning_rate": 1.6500000000000003e-06, "loss": 1.5608, "step": 33 }, { "epoch": 0.01788532351394003, "grad_norm": 2.743023633956909, "learning_rate": 1.7000000000000002e-06, "loss": 1.5705, "step": 34 }, { "epoch": 0.01841136244082062, "grad_norm": 2.7273406982421875, "learning_rate": 1.75e-06, "loss": 1.5819, "step": 35 }, { "epoch": 0.01893740136770121, "grad_norm": 2.7253308296203613, "learning_rate": 1.8000000000000001e-06, "loss": 1.5201, "step": 36 }, { "epoch": 0.0194634402945818, "grad_norm": 2.8794732093811035, "learning_rate": 1.85e-06, "loss": 1.4743, "step": 37 }, { "epoch": 0.019989479221462388, "grad_norm": 2.767172336578369, "learning_rate": 1.9000000000000002e-06, "loss": 1.5366, "step": 38 }, { "epoch": 0.020515518148342977, "grad_norm": 2.84169864654541, "learning_rate": 1.9500000000000004e-06, "loss": 1.5635, "step": 39 }, { "epoch": 0.021041557075223566, "grad_norm": 2.6982147693634033, "learning_rate": 2.0000000000000003e-06, "loss": 1.49, "step": 40 }, { "epoch": 0.021567596002104155, "grad_norm": 2.597731590270996, "learning_rate": 2.05e-06, "loss": 1.5189, "step": 41 }, { "epoch": 0.022093634928984744, "grad_norm": 2.4286556243896484, "learning_rate": 2.1000000000000002e-06, "loss": 1.4439, "step": 42 }, { "epoch": 0.022619673855865333, "grad_norm": 2.6267499923706055, "learning_rate": 2.15e-06, "loss": 1.3522, "step": 43 }, { "epoch": 0.023145712782745922, "grad_norm": 2.2576816082000732, "learning_rate": 2.2e-06, "loss": 1.4713, "step": 44 }, { "epoch": 0.02367175170962651, "grad_norm": 2.406381368637085, "learning_rate": 2.25e-06, "loss": 1.47, "step": 45 }, { "epoch": 0.0241977906365071, "grad_norm": 2.2341415882110596, "learning_rate": 2.3000000000000004e-06, "loss": 1.4041, "step": 46 }, { "epoch": 0.02472382956338769, "grad_norm": 2.5055644512176514, "learning_rate": 2.35e-06, "loss": 1.4321, "step": 47 }, { "epoch": 0.02524986849026828, "grad_norm": 2.2131927013397217, "learning_rate": 2.4000000000000003e-06, "loss": 1.3631, "step": 48 }, { "epoch": 0.025775907417148868, "grad_norm": 2.3399457931518555, "learning_rate": 2.4500000000000003e-06, "loss": 1.4055, "step": 49 }, { "epoch": 0.026301946344029457, "grad_norm": 2.2194554805755615, "learning_rate": 2.5e-06, "loss": 1.3722, "step": 50 }, { "epoch": 0.026827985270910047, "grad_norm": 2.196530342102051, "learning_rate": 2.55e-06, "loss": 1.4126, "step": 51 }, { "epoch": 0.027354024197790636, "grad_norm": 2.401376485824585, "learning_rate": 2.6e-06, "loss": 1.4174, "step": 52 }, { "epoch": 0.027880063124671225, "grad_norm": 2.2509777545928955, "learning_rate": 2.6500000000000005e-06, "loss": 1.3725, "step": 53 }, { "epoch": 0.028406102051551814, "grad_norm": 2.2538340091705322, "learning_rate": 2.7000000000000004e-06, "loss": 1.4274, "step": 54 }, { "epoch": 0.028932140978432403, "grad_norm": 2.218494176864624, "learning_rate": 2.7500000000000004e-06, "loss": 1.4518, "step": 55 }, { "epoch": 0.029458179905312992, "grad_norm": 2.06544828414917, "learning_rate": 2.8000000000000003e-06, "loss": 1.3547, "step": 56 }, { "epoch": 0.02998421883219358, "grad_norm": 2.014075994491577, "learning_rate": 2.85e-06, "loss": 1.2274, "step": 57 }, { "epoch": 0.03051025775907417, "grad_norm": 2.187418222427368, "learning_rate": 2.9e-06, "loss": 1.3663, "step": 58 }, { "epoch": 0.03103629668595476, "grad_norm": 1.993913173675537, "learning_rate": 2.95e-06, "loss": 1.3357, "step": 59 }, { "epoch": 0.03156233561283535, "grad_norm": 2.1067426204681396, "learning_rate": 3e-06, "loss": 1.3627, "step": 60 }, { "epoch": 0.03208837453971594, "grad_norm": 2.0144565105438232, "learning_rate": 3.05e-06, "loss": 1.394, "step": 61 }, { "epoch": 0.03261441346659653, "grad_norm": 2.2240288257598877, "learning_rate": 3.1000000000000004e-06, "loss": 1.3657, "step": 62 }, { "epoch": 0.03314045239347712, "grad_norm": 2.0080718994140625, "learning_rate": 3.1500000000000003e-06, "loss": 1.2954, "step": 63 }, { "epoch": 0.033666491320357705, "grad_norm": 2.1592211723327637, "learning_rate": 3.2000000000000003e-06, "loss": 1.363, "step": 64 }, { "epoch": 0.0341925302472383, "grad_norm": 2.1390435695648193, "learning_rate": 3.2500000000000002e-06, "loss": 1.3329, "step": 65 }, { "epoch": 0.034718569174118884, "grad_norm": 2.309795379638672, "learning_rate": 3.3000000000000006e-06, "loss": 1.3378, "step": 66 }, { "epoch": 0.035244608100999476, "grad_norm": 2.0283970832824707, "learning_rate": 3.3500000000000005e-06, "loss": 1.2707, "step": 67 }, { "epoch": 0.03577064702788006, "grad_norm": 2.3350703716278076, "learning_rate": 3.4000000000000005e-06, "loss": 1.3149, "step": 68 }, { "epoch": 0.036296685954760655, "grad_norm": 2.1374268531799316, "learning_rate": 3.45e-06, "loss": 1.3181, "step": 69 }, { "epoch": 0.03682272488164124, "grad_norm": 2.1340744495391846, "learning_rate": 3.5e-06, "loss": 1.2968, "step": 70 }, { "epoch": 0.03734876380852183, "grad_norm": 2.212939500808716, "learning_rate": 3.5500000000000003e-06, "loss": 1.3285, "step": 71 }, { "epoch": 0.03787480273540242, "grad_norm": 2.0891077518463135, "learning_rate": 3.6000000000000003e-06, "loss": 1.3142, "step": 72 }, { "epoch": 0.03840084166228301, "grad_norm": 2.0146496295928955, "learning_rate": 3.65e-06, "loss": 1.2932, "step": 73 }, { "epoch": 0.0389268805891636, "grad_norm": 2.2315266132354736, "learning_rate": 3.7e-06, "loss": 1.3515, "step": 74 }, { "epoch": 0.03945291951604419, "grad_norm": 2.0311717987060547, "learning_rate": 3.7500000000000005e-06, "loss": 1.2601, "step": 75 }, { "epoch": 0.039978958442924775, "grad_norm": 1.9522899389266968, "learning_rate": 3.8000000000000005e-06, "loss": 1.3521, "step": 76 }, { "epoch": 0.04050499736980537, "grad_norm": 2.0501742362976074, "learning_rate": 3.85e-06, "loss": 1.3243, "step": 77 }, { "epoch": 0.041031036296685953, "grad_norm": 2.136033535003662, "learning_rate": 3.900000000000001e-06, "loss": 1.3373, "step": 78 }, { "epoch": 0.041557075223566546, "grad_norm": 2.328866958618164, "learning_rate": 3.95e-06, "loss": 1.2864, "step": 79 }, { "epoch": 0.04208311415044713, "grad_norm": 2.0889344215393066, "learning_rate": 4.000000000000001e-06, "loss": 1.2692, "step": 80 }, { "epoch": 0.042609153077327724, "grad_norm": 2.088667631149292, "learning_rate": 4.05e-06, "loss": 1.2232, "step": 81 }, { "epoch": 0.04313519200420831, "grad_norm": 2.0293898582458496, "learning_rate": 4.1e-06, "loss": 1.2505, "step": 82 }, { "epoch": 0.0436612309310889, "grad_norm": 2.240025281906128, "learning_rate": 4.15e-06, "loss": 1.3107, "step": 83 }, { "epoch": 0.04418726985796949, "grad_norm": 2.123445987701416, "learning_rate": 4.2000000000000004e-06, "loss": 1.1674, "step": 84 }, { "epoch": 0.04471330878485008, "grad_norm": 2.1865620613098145, "learning_rate": 4.25e-06, "loss": 1.3257, "step": 85 }, { "epoch": 0.04523934771173067, "grad_norm": 2.1336405277252197, "learning_rate": 4.3e-06, "loss": 1.2968, "step": 86 }, { "epoch": 0.04576538663861126, "grad_norm": 2.117763042449951, "learning_rate": 4.350000000000001e-06, "loss": 1.2294, "step": 87 }, { "epoch": 0.046291425565491845, "grad_norm": 1.9969348907470703, "learning_rate": 4.4e-06, "loss": 1.2621, "step": 88 }, { "epoch": 0.04681746449237244, "grad_norm": 2.24861741065979, "learning_rate": 4.450000000000001e-06, "loss": 1.2909, "step": 89 }, { "epoch": 0.04734350341925302, "grad_norm": 2.08335542678833, "learning_rate": 4.5e-06, "loss": 1.2691, "step": 90 }, { "epoch": 0.047869542346133616, "grad_norm": 2.1306045055389404, "learning_rate": 4.5500000000000005e-06, "loss": 1.3248, "step": 91 }, { "epoch": 0.0483955812730142, "grad_norm": 2.2251298427581787, "learning_rate": 4.600000000000001e-06, "loss": 1.2391, "step": 92 }, { "epoch": 0.048921620199894794, "grad_norm": 2.1604959964752197, "learning_rate": 4.65e-06, "loss": 1.2169, "step": 93 }, { "epoch": 0.04944765912677538, "grad_norm": 2.0155038833618164, "learning_rate": 4.7e-06, "loss": 1.2533, "step": 94 }, { "epoch": 0.04997369805365597, "grad_norm": 1.9579726457595825, "learning_rate": 4.75e-06, "loss": 1.2228, "step": 95 }, { "epoch": 0.05049973698053656, "grad_norm": 2.129992961883545, "learning_rate": 4.800000000000001e-06, "loss": 1.2573, "step": 96 }, { "epoch": 0.05102577590741715, "grad_norm": 2.0832459926605225, "learning_rate": 4.85e-06, "loss": 1.241, "step": 97 }, { "epoch": 0.051551814834297736, "grad_norm": 2.278550148010254, "learning_rate": 4.9000000000000005e-06, "loss": 1.2565, "step": 98 }, { "epoch": 0.05207785376117833, "grad_norm": 2.0997259616851807, "learning_rate": 4.95e-06, "loss": 1.2445, "step": 99 }, { "epoch": 0.052603892688058915, "grad_norm": 2.127976417541504, "learning_rate": 5e-06, "loss": 1.2605, "step": 100 }, { "epoch": 0.05312993161493951, "grad_norm": 2.1200127601623535, "learning_rate": 4.9999999034856715e-06, "loss": 1.3057, "step": 101 }, { "epoch": 0.05365597054182009, "grad_norm": 2.456881046295166, "learning_rate": 4.999999613942694e-06, "loss": 1.2741, "step": 102 }, { "epoch": 0.054182009468700686, "grad_norm": 2.189507484436035, "learning_rate": 4.9999991313710884e-06, "loss": 1.2399, "step": 103 }, { "epoch": 0.05470804839558127, "grad_norm": 2.258619785308838, "learning_rate": 4.9999984557708936e-06, "loss": 1.2161, "step": 104 }, { "epoch": 0.055234087322461864, "grad_norm": 1.983225703239441, "learning_rate": 4.999997587142161e-06, "loss": 1.2027, "step": 105 }, { "epoch": 0.05576012624934245, "grad_norm": 2.1400973796844482, "learning_rate": 4.999996525484957e-06, "loss": 1.2685, "step": 106 }, { "epoch": 0.05628616517622304, "grad_norm": 1.9494950771331787, "learning_rate": 4.999995270799365e-06, "loss": 1.2604, "step": 107 }, { "epoch": 0.05681220410310363, "grad_norm": 2.1203386783599854, "learning_rate": 4.9999938230854814e-06, "loss": 1.2345, "step": 108 }, { "epoch": 0.05733824302998422, "grad_norm": 2.131884813308716, "learning_rate": 4.999992182343417e-06, "loss": 1.2097, "step": 109 }, { "epoch": 0.057864281956864806, "grad_norm": 2.136289119720459, "learning_rate": 4.9999903485732996e-06, "loss": 1.2617, "step": 110 }, { "epoch": 0.0583903208837454, "grad_norm": 2.025071144104004, "learning_rate": 4.9999883217752705e-06, "loss": 1.2004, "step": 111 }, { "epoch": 0.058916359810625984, "grad_norm": 2.513960838317871, "learning_rate": 4.999986101949486e-06, "loss": 1.2399, "step": 112 }, { "epoch": 0.05944239873750658, "grad_norm": 2.2483277320861816, "learning_rate": 4.999983689096117e-06, "loss": 1.2265, "step": 113 }, { "epoch": 0.05996843766438716, "grad_norm": 2.0863187313079834, "learning_rate": 4.999981083215352e-06, "loss": 1.1969, "step": 114 }, { "epoch": 0.060494476591267755, "grad_norm": 2.1240596771240234, "learning_rate": 4.99997828430739e-06, "loss": 1.275, "step": 115 }, { "epoch": 0.06102051551814834, "grad_norm": 2.3810060024261475, "learning_rate": 4.9999752923724465e-06, "loss": 1.3054, "step": 116 }, { "epoch": 0.061546554445028934, "grad_norm": 2.1266205310821533, "learning_rate": 4.999972107410754e-06, "loss": 1.1933, "step": 117 }, { "epoch": 0.06207259337190952, "grad_norm": 2.039619207382202, "learning_rate": 4.999968729422559e-06, "loss": 1.1886, "step": 118 }, { "epoch": 0.0625986322987901, "grad_norm": 2.024503707885742, "learning_rate": 4.999965158408122e-06, "loss": 1.2008, "step": 119 }, { "epoch": 0.0631246712256707, "grad_norm": 2.058926582336426, "learning_rate": 4.999961394367717e-06, "loss": 1.1772, "step": 120 }, { "epoch": 0.06365071015255129, "grad_norm": 1.989399790763855, "learning_rate": 4.999957437301637e-06, "loss": 1.1869, "step": 121 }, { "epoch": 0.06417674907943188, "grad_norm": 2.0462567806243896, "learning_rate": 4.999953287210185e-06, "loss": 1.1944, "step": 122 }, { "epoch": 0.06470278800631246, "grad_norm": 2.258549213409424, "learning_rate": 4.999948944093683e-06, "loss": 1.2304, "step": 123 }, { "epoch": 0.06522882693319305, "grad_norm": 2.115344285964966, "learning_rate": 4.999944407952467e-06, "loss": 1.1901, "step": 124 }, { "epoch": 0.06575486586007365, "grad_norm": 2.082406997680664, "learning_rate": 4.999939678786886e-06, "loss": 1.2481, "step": 125 }, { "epoch": 0.06628090478695424, "grad_norm": 2.5095906257629395, "learning_rate": 4.999934756597305e-06, "loss": 1.2526, "step": 126 }, { "epoch": 0.06680694371383482, "grad_norm": 1.989524483680725, "learning_rate": 4.999929641384105e-06, "loss": 1.2298, "step": 127 }, { "epoch": 0.06733298264071541, "grad_norm": 2.3429722785949707, "learning_rate": 4.999924333147681e-06, "loss": 1.2511, "step": 128 }, { "epoch": 0.067859021567596, "grad_norm": 2.064497232437134, "learning_rate": 4.999918831888441e-06, "loss": 1.2041, "step": 129 }, { "epoch": 0.0683850604944766, "grad_norm": 2.099992513656616, "learning_rate": 4.999913137606813e-06, "loss": 1.2256, "step": 130 }, { "epoch": 0.06891109942135717, "grad_norm": 2.188778877258301, "learning_rate": 4.999907250303234e-06, "loss": 1.2009, "step": 131 }, { "epoch": 0.06943713834823777, "grad_norm": 2.154895067214966, "learning_rate": 4.999901169978158e-06, "loss": 1.273, "step": 132 }, { "epoch": 0.06996317727511836, "grad_norm": 2.457084894180298, "learning_rate": 4.999894896632058e-06, "loss": 1.2003, "step": 133 }, { "epoch": 0.07048921620199895, "grad_norm": 2.0455472469329834, "learning_rate": 4.999888430265415e-06, "loss": 1.1909, "step": 134 }, { "epoch": 0.07101525512887953, "grad_norm": 2.3690097332000732, "learning_rate": 4.99988177087873e-06, "loss": 1.2414, "step": 135 }, { "epoch": 0.07154129405576012, "grad_norm": 2.0194432735443115, "learning_rate": 4.999874918472516e-06, "loss": 1.2072, "step": 136 }, { "epoch": 0.07206733298264072, "grad_norm": 2.0639989376068115, "learning_rate": 4.999867873047303e-06, "loss": 1.1853, "step": 137 }, { "epoch": 0.07259337190952131, "grad_norm": 2.1263129711151123, "learning_rate": 4.999860634603635e-06, "loss": 1.1915, "step": 138 }, { "epoch": 0.07311941083640189, "grad_norm": 1.9768770933151245, "learning_rate": 4.99985320314207e-06, "loss": 1.1623, "step": 139 }, { "epoch": 0.07364544976328248, "grad_norm": 2.4466986656188965, "learning_rate": 4.9998455786631835e-06, "loss": 1.2549, "step": 140 }, { "epoch": 0.07417148869016307, "grad_norm": 2.482954263687134, "learning_rate": 4.999837761167563e-06, "loss": 1.1503, "step": 141 }, { "epoch": 0.07469752761704367, "grad_norm": 2.1949164867401123, "learning_rate": 4.9998297506558116e-06, "loss": 1.2515, "step": 142 }, { "epoch": 0.07522356654392424, "grad_norm": 2.3435401916503906, "learning_rate": 4.9998215471285486e-06, "loss": 1.2231, "step": 143 }, { "epoch": 0.07574960547080484, "grad_norm": 2.2442994117736816, "learning_rate": 4.9998131505864064e-06, "loss": 1.2472, "step": 144 }, { "epoch": 0.07627564439768543, "grad_norm": 2.4117157459259033, "learning_rate": 4.999804561030036e-06, "loss": 1.2303, "step": 145 }, { "epoch": 0.07680168332456602, "grad_norm": 2.263303279876709, "learning_rate": 4.999795778460097e-06, "loss": 1.2435, "step": 146 }, { "epoch": 0.0773277222514466, "grad_norm": 2.174962282180786, "learning_rate": 4.99978680287727e-06, "loss": 1.2074, "step": 147 }, { "epoch": 0.0778537611783272, "grad_norm": 2.1498875617980957, "learning_rate": 4.999777634282248e-06, "loss": 1.1665, "step": 148 }, { "epoch": 0.07837980010520779, "grad_norm": 2.0245747566223145, "learning_rate": 4.999768272675737e-06, "loss": 1.169, "step": 149 }, { "epoch": 0.07890583903208838, "grad_norm": 2.03243350982666, "learning_rate": 4.999758718058462e-06, "loss": 1.2113, "step": 150 }, { "epoch": 0.07943187795896896, "grad_norm": 2.104052782058716, "learning_rate": 4.9997489704311586e-06, "loss": 1.1792, "step": 151 }, { "epoch": 0.07995791688584955, "grad_norm": 2.16056752204895, "learning_rate": 4.999739029794581e-06, "loss": 1.2183, "step": 152 }, { "epoch": 0.08048395581273014, "grad_norm": 2.1418581008911133, "learning_rate": 4.9997288961494975e-06, "loss": 1.2024, "step": 153 }, { "epoch": 0.08100999473961074, "grad_norm": 2.235917329788208, "learning_rate": 4.999718569496688e-06, "loss": 1.2234, "step": 154 }, { "epoch": 0.08153603366649131, "grad_norm": 2.0039474964141846, "learning_rate": 4.999708049836952e-06, "loss": 1.1164, "step": 155 }, { "epoch": 0.08206207259337191, "grad_norm": 2.0888242721557617, "learning_rate": 4.9996973371710995e-06, "loss": 1.1935, "step": 156 }, { "epoch": 0.0825881115202525, "grad_norm": 2.245558500289917, "learning_rate": 4.999686431499961e-06, "loss": 1.1438, "step": 157 }, { "epoch": 0.08311415044713309, "grad_norm": 2.351905345916748, "learning_rate": 4.999675332824376e-06, "loss": 1.2208, "step": 158 }, { "epoch": 0.08364018937401367, "grad_norm": 2.0418808460235596, "learning_rate": 4.999664041145201e-06, "loss": 1.1537, "step": 159 }, { "epoch": 0.08416622830089426, "grad_norm": 2.194399118423462, "learning_rate": 4.99965255646331e-06, "loss": 1.1602, "step": 160 }, { "epoch": 0.08469226722777486, "grad_norm": 2.4853098392486572, "learning_rate": 4.999640878779588e-06, "loss": 1.1981, "step": 161 }, { "epoch": 0.08521830615465545, "grad_norm": 2.1702558994293213, "learning_rate": 4.9996290080949386e-06, "loss": 1.1682, "step": 162 }, { "epoch": 0.08574434508153603, "grad_norm": 2.150707960128784, "learning_rate": 4.999616944410276e-06, "loss": 1.2123, "step": 163 }, { "epoch": 0.08627038400841662, "grad_norm": 2.166897773742676, "learning_rate": 4.9996046877265325e-06, "loss": 1.1855, "step": 164 }, { "epoch": 0.08679642293529721, "grad_norm": 2.1538188457489014, "learning_rate": 4.999592238044655e-06, "loss": 1.1797, "step": 165 }, { "epoch": 0.0873224618621778, "grad_norm": 2.222170114517212, "learning_rate": 4.999579595365604e-06, "loss": 1.1606, "step": 166 }, { "epoch": 0.08784850078905838, "grad_norm": 2.264437437057495, "learning_rate": 4.999566759690356e-06, "loss": 1.1662, "step": 167 }, { "epoch": 0.08837453971593898, "grad_norm": 2.2306337356567383, "learning_rate": 4.999553731019903e-06, "loss": 1.1933, "step": 168 }, { "epoch": 0.08890057864281957, "grad_norm": 2.2025609016418457, "learning_rate": 4.9995405093552495e-06, "loss": 1.2241, "step": 169 }, { "epoch": 0.08942661756970016, "grad_norm": 2.3908772468566895, "learning_rate": 4.999527094697418e-06, "loss": 1.1954, "step": 170 }, { "epoch": 0.08995265649658074, "grad_norm": 2.1161653995513916, "learning_rate": 4.999513487047442e-06, "loss": 1.2315, "step": 171 }, { "epoch": 0.09047869542346133, "grad_norm": 2.0984017848968506, "learning_rate": 4.9994996864063735e-06, "loss": 1.2413, "step": 172 }, { "epoch": 0.09100473435034193, "grad_norm": 2.205087900161743, "learning_rate": 4.999485692775279e-06, "loss": 1.2267, "step": 173 }, { "epoch": 0.09153077327722252, "grad_norm": 2.224553108215332, "learning_rate": 4.9994715061552365e-06, "loss": 1.1613, "step": 174 }, { "epoch": 0.0920568122041031, "grad_norm": 2.191676139831543, "learning_rate": 4.999457126547344e-06, "loss": 1.168, "step": 175 }, { "epoch": 0.09258285113098369, "grad_norm": 2.2432751655578613, "learning_rate": 4.99944255395271e-06, "loss": 1.218, "step": 176 }, { "epoch": 0.09310889005786428, "grad_norm": 2.1327083110809326, "learning_rate": 4.999427788372461e-06, "loss": 1.1994, "step": 177 }, { "epoch": 0.09363492898474488, "grad_norm": 2.146256923675537, "learning_rate": 4.999412829807735e-06, "loss": 1.1387, "step": 178 }, { "epoch": 0.09416096791162545, "grad_norm": 2.377356767654419, "learning_rate": 4.999397678259689e-06, "loss": 1.1901, "step": 179 }, { "epoch": 0.09468700683850605, "grad_norm": 2.192535638809204, "learning_rate": 4.999382333729492e-06, "loss": 1.2079, "step": 180 }, { "epoch": 0.09521304576538664, "grad_norm": 2.0958621501922607, "learning_rate": 4.999366796218329e-06, "loss": 1.1663, "step": 181 }, { "epoch": 0.09573908469226723, "grad_norm": 2.1492772102355957, "learning_rate": 4.9993510657274e-06, "loss": 1.1877, "step": 182 }, { "epoch": 0.09626512361914781, "grad_norm": 2.366111993789673, "learning_rate": 4.999335142257919e-06, "loss": 1.1849, "step": 183 }, { "epoch": 0.0967911625460284, "grad_norm": 2.144526243209839, "learning_rate": 4.999319025811116e-06, "loss": 1.1739, "step": 184 }, { "epoch": 0.097317201472909, "grad_norm": 2.3407647609710693, "learning_rate": 4.999302716388234e-06, "loss": 1.1987, "step": 185 }, { "epoch": 0.09784324039978959, "grad_norm": 2.3771328926086426, "learning_rate": 4.999286213990534e-06, "loss": 1.2024, "step": 186 }, { "epoch": 0.09836927932667017, "grad_norm": 2.2484753131866455, "learning_rate": 4.99926951861929e-06, "loss": 1.2087, "step": 187 }, { "epoch": 0.09889531825355076, "grad_norm": 2.276099681854248, "learning_rate": 4.99925263027579e-06, "loss": 1.1696, "step": 188 }, { "epoch": 0.09942135718043135, "grad_norm": 2.1576876640319824, "learning_rate": 4.999235548961338e-06, "loss": 1.1404, "step": 189 }, { "epoch": 0.09994739610731194, "grad_norm": 2.1412558555603027, "learning_rate": 4.999218274677254e-06, "loss": 1.1279, "step": 190 }, { "epoch": 0.10047343503419252, "grad_norm": 2.1507153511047363, "learning_rate": 4.999200807424871e-06, "loss": 1.1841, "step": 191 }, { "epoch": 0.10099947396107312, "grad_norm": 2.236116886138916, "learning_rate": 4.999183147205538e-06, "loss": 1.208, "step": 192 }, { "epoch": 0.10152551288795371, "grad_norm": 2.1643691062927246, "learning_rate": 4.9991652940206185e-06, "loss": 1.1325, "step": 193 }, { "epoch": 0.1020515518148343, "grad_norm": 2.11639142036438, "learning_rate": 4.999147247871491e-06, "loss": 1.2073, "step": 194 }, { "epoch": 0.10257759074171488, "grad_norm": 1.9682193994522095, "learning_rate": 4.9991290087595475e-06, "loss": 1.1447, "step": 195 }, { "epoch": 0.10310362966859547, "grad_norm": 1.9927830696105957, "learning_rate": 4.9991105766861996e-06, "loss": 1.1694, "step": 196 }, { "epoch": 0.10362966859547607, "grad_norm": 2.0124592781066895, "learning_rate": 4.999091951652867e-06, "loss": 1.152, "step": 197 }, { "epoch": 0.10415570752235666, "grad_norm": 2.1793248653411865, "learning_rate": 4.99907313366099e-06, "loss": 1.228, "step": 198 }, { "epoch": 0.10468174644923724, "grad_norm": 2.1615028381347656, "learning_rate": 4.99905412271202e-06, "loss": 1.2106, "step": 199 }, { "epoch": 0.10520778537611783, "grad_norm": 1.9827650785446167, "learning_rate": 4.999034918807425e-06, "loss": 1.1829, "step": 200 }, { "epoch": 0.10573382430299842, "grad_norm": 2.1772680282592773, "learning_rate": 4.999015521948689e-06, "loss": 1.13, "step": 201 }, { "epoch": 0.10625986322987901, "grad_norm": 2.257385492324829, "learning_rate": 4.99899593213731e-06, "loss": 1.2144, "step": 202 }, { "epoch": 0.1067859021567596, "grad_norm": 2.104809045791626, "learning_rate": 4.998976149374799e-06, "loss": 1.1715, "step": 203 }, { "epoch": 0.10731194108364019, "grad_norm": 2.116504430770874, "learning_rate": 4.998956173662683e-06, "loss": 1.1442, "step": 204 }, { "epoch": 0.10783798001052078, "grad_norm": 2.2018845081329346, "learning_rate": 4.998936005002507e-06, "loss": 1.1327, "step": 205 }, { "epoch": 0.10836401893740137, "grad_norm": 2.2733311653137207, "learning_rate": 4.998915643395826e-06, "loss": 1.1821, "step": 206 }, { "epoch": 0.10889005786428195, "grad_norm": 2.0005805492401123, "learning_rate": 4.998895088844212e-06, "loss": 1.0955, "step": 207 }, { "epoch": 0.10941609679116254, "grad_norm": 2.0851638317108154, "learning_rate": 4.998874341349253e-06, "loss": 1.1851, "step": 208 }, { "epoch": 0.10994213571804314, "grad_norm": 2.032989501953125, "learning_rate": 4.998853400912552e-06, "loss": 1.1069, "step": 209 }, { "epoch": 0.11046817464492373, "grad_norm": 2.295994520187378, "learning_rate": 4.9988322675357235e-06, "loss": 1.1511, "step": 210 }, { "epoch": 0.1109942135718043, "grad_norm": 1.9963881969451904, "learning_rate": 4.9988109412204015e-06, "loss": 1.1497, "step": 211 }, { "epoch": 0.1115202524986849, "grad_norm": 2.6223835945129395, "learning_rate": 4.998789421968231e-06, "loss": 1.1692, "step": 212 }, { "epoch": 0.11204629142556549, "grad_norm": 2.1924188137054443, "learning_rate": 4.998767709780873e-06, "loss": 1.1659, "step": 213 }, { "epoch": 0.11257233035244608, "grad_norm": 2.4124836921691895, "learning_rate": 4.998745804660005e-06, "loss": 1.1965, "step": 214 }, { "epoch": 0.11309836927932668, "grad_norm": 2.15348482131958, "learning_rate": 4.99872370660732e-06, "loss": 1.1337, "step": 215 }, { "epoch": 0.11362440820620726, "grad_norm": 2.3462562561035156, "learning_rate": 4.9987014156245215e-06, "loss": 1.1793, "step": 216 }, { "epoch": 0.11415044713308785, "grad_norm": 2.1864969730377197, "learning_rate": 4.998678931713331e-06, "loss": 1.1139, "step": 217 }, { "epoch": 0.11467648605996844, "grad_norm": 2.1411378383636475, "learning_rate": 4.998656254875486e-06, "loss": 1.1582, "step": 218 }, { "epoch": 0.11520252498684903, "grad_norm": 2.2826247215270996, "learning_rate": 4.998633385112737e-06, "loss": 1.1779, "step": 219 }, { "epoch": 0.11572856391372961, "grad_norm": 2.0697169303894043, "learning_rate": 4.998610322426848e-06, "loss": 1.1775, "step": 220 }, { "epoch": 0.1162546028406102, "grad_norm": 2.153381824493408, "learning_rate": 4.998587066819602e-06, "loss": 1.2244, "step": 221 }, { "epoch": 0.1167806417674908, "grad_norm": 2.151595115661621, "learning_rate": 4.998563618292793e-06, "loss": 1.1562, "step": 222 }, { "epoch": 0.11730668069437139, "grad_norm": 2.1102607250213623, "learning_rate": 4.998539976848233e-06, "loss": 1.1326, "step": 223 }, { "epoch": 0.11783271962125197, "grad_norm": 2.3099205493927, "learning_rate": 4.998516142487746e-06, "loss": 1.1934, "step": 224 }, { "epoch": 0.11835875854813256, "grad_norm": 2.0830485820770264, "learning_rate": 4.998492115213173e-06, "loss": 1.105, "step": 225 }, { "epoch": 0.11888479747501315, "grad_norm": 1.965256929397583, "learning_rate": 4.998467895026369e-06, "loss": 1.1496, "step": 226 }, { "epoch": 0.11941083640189375, "grad_norm": 2.060734272003174, "learning_rate": 4.9984434819292036e-06, "loss": 1.1256, "step": 227 }, { "epoch": 0.11993687532877433, "grad_norm": 2.278106927871704, "learning_rate": 4.998418875923563e-06, "loss": 1.1557, "step": 228 }, { "epoch": 0.12046291425565492, "grad_norm": 2.562490463256836, "learning_rate": 4.998394077011346e-06, "loss": 1.1579, "step": 229 }, { "epoch": 0.12098895318253551, "grad_norm": 2.20798921585083, "learning_rate": 4.998369085194468e-06, "loss": 1.181, "step": 230 }, { "epoch": 0.1215149921094161, "grad_norm": 2.3529961109161377, "learning_rate": 4.998343900474858e-06, "loss": 1.1514, "step": 231 }, { "epoch": 0.12204103103629668, "grad_norm": 2.2413651943206787, "learning_rate": 4.998318522854461e-06, "loss": 1.1317, "step": 232 }, { "epoch": 0.12256706996317727, "grad_norm": 2.2179031372070312, "learning_rate": 4.998292952335236e-06, "loss": 1.1784, "step": 233 }, { "epoch": 0.12309310889005787, "grad_norm": 2.2591211795806885, "learning_rate": 4.998267188919158e-06, "loss": 1.1587, "step": 234 }, { "epoch": 0.12361914781693846, "grad_norm": 2.4820573329925537, "learning_rate": 4.998241232608216e-06, "loss": 1.1448, "step": 235 }, { "epoch": 0.12414518674381904, "grad_norm": 2.202066659927368, "learning_rate": 4.998215083404414e-06, "loss": 1.1859, "step": 236 }, { "epoch": 0.12467122567069963, "grad_norm": 2.246918201446533, "learning_rate": 4.9981887413097705e-06, "loss": 1.1778, "step": 237 }, { "epoch": 0.1251972645975802, "grad_norm": 2.166926145553589, "learning_rate": 4.9981622063263205e-06, "loss": 1.16, "step": 238 }, { "epoch": 0.12572330352446082, "grad_norm": 2.2850661277770996, "learning_rate": 4.998135478456112e-06, "loss": 1.1522, "step": 239 }, { "epoch": 0.1262493424513414, "grad_norm": 2.1694653034210205, "learning_rate": 4.9981085577012095e-06, "loss": 1.1394, "step": 240 }, { "epoch": 0.126775381378222, "grad_norm": 2.061791181564331, "learning_rate": 4.998081444063691e-06, "loss": 1.1551, "step": 241 }, { "epoch": 0.12730142030510258, "grad_norm": 2.1517114639282227, "learning_rate": 4.998054137545649e-06, "loss": 1.1487, "step": 242 }, { "epoch": 0.12782745923198316, "grad_norm": 2.118903398513794, "learning_rate": 4.9980266381491935e-06, "loss": 1.1871, "step": 243 }, { "epoch": 0.12835349815886377, "grad_norm": 2.271512508392334, "learning_rate": 4.997998945876448e-06, "loss": 1.21, "step": 244 }, { "epoch": 0.12887953708574434, "grad_norm": 2.199542760848999, "learning_rate": 4.997971060729549e-06, "loss": 1.17, "step": 245 }, { "epoch": 0.12940557601262492, "grad_norm": 2.213566303253174, "learning_rate": 4.997942982710651e-06, "loss": 1.1521, "step": 246 }, { "epoch": 0.12993161493950553, "grad_norm": 2.291456699371338, "learning_rate": 4.997914711821921e-06, "loss": 1.1671, "step": 247 }, { "epoch": 0.1304576538663861, "grad_norm": 2.017871856689453, "learning_rate": 4.997886248065542e-06, "loss": 1.1522, "step": 248 }, { "epoch": 0.13098369279326671, "grad_norm": 2.1125521659851074, "learning_rate": 4.9978575914437115e-06, "loss": 1.1335, "step": 249 }, { "epoch": 0.1315097317201473, "grad_norm": 2.262874126434326, "learning_rate": 4.997828741958643e-06, "loss": 1.1697, "step": 250 }, { "epoch": 0.13203577064702787, "grad_norm": 2.450192451477051, "learning_rate": 4.997799699612563e-06, "loss": 1.1329, "step": 251 }, { "epoch": 0.13256180957390848, "grad_norm": 2.0831351280212402, "learning_rate": 4.997770464407715e-06, "loss": 1.1711, "step": 252 }, { "epoch": 0.13308784850078906, "grad_norm": 2.2078895568847656, "learning_rate": 4.997741036346357e-06, "loss": 1.1998, "step": 253 }, { "epoch": 0.13361388742766964, "grad_norm": 2.175858497619629, "learning_rate": 4.997711415430759e-06, "loss": 1.1083, "step": 254 }, { "epoch": 0.13413992635455024, "grad_norm": 2.203817129135132, "learning_rate": 4.997681601663207e-06, "loss": 1.088, "step": 255 }, { "epoch": 0.13466596528143082, "grad_norm": 2.0065557956695557, "learning_rate": 4.997651595046007e-06, "loss": 1.1584, "step": 256 }, { "epoch": 0.13519200420831143, "grad_norm": 2.299633264541626, "learning_rate": 4.997621395581474e-06, "loss": 1.2102, "step": 257 }, { "epoch": 0.135718043135192, "grad_norm": 2.2972707748413086, "learning_rate": 4.997591003271938e-06, "loss": 1.1821, "step": 258 }, { "epoch": 0.13624408206207259, "grad_norm": 2.399705171585083, "learning_rate": 4.997560418119749e-06, "loss": 1.1325, "step": 259 }, { "epoch": 0.1367701209889532, "grad_norm": 2.2461678981781006, "learning_rate": 4.997529640127266e-06, "loss": 1.2361, "step": 260 }, { "epoch": 0.13729615991583377, "grad_norm": 2.236917495727539, "learning_rate": 4.997498669296865e-06, "loss": 1.1159, "step": 261 }, { "epoch": 0.13782219884271435, "grad_norm": 2.2851338386535645, "learning_rate": 4.99746750563094e-06, "loss": 1.1688, "step": 262 }, { "epoch": 0.13834823776959496, "grad_norm": 2.1499626636505127, "learning_rate": 4.997436149131894e-06, "loss": 1.1478, "step": 263 }, { "epoch": 0.13887427669647553, "grad_norm": 2.0969858169555664, "learning_rate": 4.997404599802151e-06, "loss": 1.1102, "step": 264 }, { "epoch": 0.13940031562335614, "grad_norm": 2.5635933876037598, "learning_rate": 4.997372857644146e-06, "loss": 1.1173, "step": 265 }, { "epoch": 0.13992635455023672, "grad_norm": 2.1076197624206543, "learning_rate": 4.997340922660329e-06, "loss": 1.1321, "step": 266 }, { "epoch": 0.1404523934771173, "grad_norm": 2.179189443588257, "learning_rate": 4.997308794853165e-06, "loss": 1.1325, "step": 267 }, { "epoch": 0.1409784324039979, "grad_norm": 2.0838067531585693, "learning_rate": 4.9972764742251375e-06, "loss": 1.1243, "step": 268 }, { "epoch": 0.14150447133087848, "grad_norm": 2.1462979316711426, "learning_rate": 4.9972439607787405e-06, "loss": 1.1251, "step": 269 }, { "epoch": 0.14203051025775906, "grad_norm": 2.144658088684082, "learning_rate": 4.997211254516484e-06, "loss": 1.1879, "step": 270 }, { "epoch": 0.14255654918463967, "grad_norm": 2.118098020553589, "learning_rate": 4.997178355440892e-06, "loss": 1.1635, "step": 271 }, { "epoch": 0.14308258811152025, "grad_norm": 2.284640312194824, "learning_rate": 4.99714526355451e-06, "loss": 1.1181, "step": 272 }, { "epoch": 0.14360862703840085, "grad_norm": 2.2020652294158936, "learning_rate": 4.997111978859886e-06, "loss": 1.1234, "step": 273 }, { "epoch": 0.14413466596528143, "grad_norm": 2.164998769760132, "learning_rate": 4.997078501359595e-06, "loss": 1.1723, "step": 274 }, { "epoch": 0.144660704892162, "grad_norm": 2.1917877197265625, "learning_rate": 4.9970448310562196e-06, "loss": 1.1222, "step": 275 }, { "epoch": 0.14518674381904262, "grad_norm": 2.314770221710205, "learning_rate": 4.99701096795236e-06, "loss": 1.183, "step": 276 }, { "epoch": 0.1457127827459232, "grad_norm": 2.217176675796509, "learning_rate": 4.996976912050632e-06, "loss": 1.1509, "step": 277 }, { "epoch": 0.14623882167280378, "grad_norm": 2.253232002258301, "learning_rate": 4.996942663353663e-06, "loss": 1.1733, "step": 278 }, { "epoch": 0.14676486059968438, "grad_norm": 2.091414213180542, "learning_rate": 4.996908221864099e-06, "loss": 1.1479, "step": 279 }, { "epoch": 0.14729089952656496, "grad_norm": 2.391035556793213, "learning_rate": 4.996873587584599e-06, "loss": 1.1646, "step": 280 }, { "epoch": 0.14781693845344557, "grad_norm": 1.941179871559143, "learning_rate": 4.996838760517836e-06, "loss": 1.1362, "step": 281 }, { "epoch": 0.14834297738032615, "grad_norm": 2.3869614601135254, "learning_rate": 4.9968037406665e-06, "loss": 1.1455, "step": 282 }, { "epoch": 0.14886901630720673, "grad_norm": 2.2253477573394775, "learning_rate": 4.9967685280332955e-06, "loss": 1.1934, "step": 283 }, { "epoch": 0.14939505523408733, "grad_norm": 2.235481023788452, "learning_rate": 4.99673312262094e-06, "loss": 1.1457, "step": 284 }, { "epoch": 0.1499210941609679, "grad_norm": 2.1756770610809326, "learning_rate": 4.996697524432169e-06, "loss": 1.1874, "step": 285 }, { "epoch": 0.1504471330878485, "grad_norm": 1.9890838861465454, "learning_rate": 4.99666173346973e-06, "loss": 1.1381, "step": 286 }, { "epoch": 0.1509731720147291, "grad_norm": 2.032940149307251, "learning_rate": 4.996625749736386e-06, "loss": 1.1408, "step": 287 }, { "epoch": 0.15149921094160967, "grad_norm": 2.38653564453125, "learning_rate": 4.996589573234915e-06, "loss": 1.1137, "step": 288 }, { "epoch": 0.15202524986849028, "grad_norm": 2.5009000301361084, "learning_rate": 4.9965532039681116e-06, "loss": 1.1404, "step": 289 }, { "epoch": 0.15255128879537086, "grad_norm": 2.113600969314575, "learning_rate": 4.996516641938784e-06, "loss": 1.0764, "step": 290 }, { "epoch": 0.15307732772225144, "grad_norm": 2.2645368576049805, "learning_rate": 4.996479887149754e-06, "loss": 1.1499, "step": 291 }, { "epoch": 0.15360336664913204, "grad_norm": 2.015124559402466, "learning_rate": 4.99644293960386e-06, "loss": 1.0487, "step": 292 }, { "epoch": 0.15412940557601262, "grad_norm": 2.121588706970215, "learning_rate": 4.996405799303955e-06, "loss": 1.1119, "step": 293 }, { "epoch": 0.1546554445028932, "grad_norm": 2.3707003593444824, "learning_rate": 4.996368466252907e-06, "loss": 1.1797, "step": 294 }, { "epoch": 0.1551814834297738, "grad_norm": 2.3027000427246094, "learning_rate": 4.996330940453598e-06, "loss": 1.1228, "step": 295 }, { "epoch": 0.1557075223566544, "grad_norm": 2.0909178256988525, "learning_rate": 4.996293221908925e-06, "loss": 1.0932, "step": 296 }, { "epoch": 0.156233561283535, "grad_norm": 2.362823486328125, "learning_rate": 4.996255310621801e-06, "loss": 1.1507, "step": 297 }, { "epoch": 0.15675960021041557, "grad_norm": 2.080667495727539, "learning_rate": 4.996217206595153e-06, "loss": 1.1158, "step": 298 }, { "epoch": 0.15728563913729615, "grad_norm": 2.0508742332458496, "learning_rate": 4.996178909831922e-06, "loss": 1.1326, "step": 299 }, { "epoch": 0.15781167806417676, "grad_norm": 2.1632707118988037, "learning_rate": 4.996140420335068e-06, "loss": 1.0946, "step": 300 }, { "epoch": 0.15833771699105734, "grad_norm": 1.9084789752960205, "learning_rate": 4.996101738107559e-06, "loss": 1.0939, "step": 301 }, { "epoch": 0.15886375591793792, "grad_norm": 1.9817906618118286, "learning_rate": 4.996062863152385e-06, "loss": 1.1013, "step": 302 }, { "epoch": 0.15938979484481852, "grad_norm": 1.9947365522384644, "learning_rate": 4.9960237954725446e-06, "loss": 1.0635, "step": 303 }, { "epoch": 0.1599158337716991, "grad_norm": 2.0908870697021484, "learning_rate": 4.995984535071056e-06, "loss": 1.0914, "step": 304 }, { "epoch": 0.1604418726985797, "grad_norm": 2.1920530796051025, "learning_rate": 4.995945081950952e-06, "loss": 1.1816, "step": 305 }, { "epoch": 0.16096791162546029, "grad_norm": 2.250007152557373, "learning_rate": 4.995905436115276e-06, "loss": 1.1543, "step": 306 }, { "epoch": 0.16149395055234086, "grad_norm": 2.3157906532287598, "learning_rate": 4.995865597567091e-06, "loss": 1.1349, "step": 307 }, { "epoch": 0.16201998947922147, "grad_norm": 2.816443681716919, "learning_rate": 4.995825566309471e-06, "loss": 1.1154, "step": 308 }, { "epoch": 0.16254602840610205, "grad_norm": 2.3194282054901123, "learning_rate": 4.995785342345509e-06, "loss": 1.1547, "step": 309 }, { "epoch": 0.16307206733298263, "grad_norm": 2.1249098777770996, "learning_rate": 4.99574492567831e-06, "loss": 1.0995, "step": 310 }, { "epoch": 0.16359810625986324, "grad_norm": 2.100315809249878, "learning_rate": 4.995704316310994e-06, "loss": 1.1662, "step": 311 }, { "epoch": 0.16412414518674381, "grad_norm": 2.1664323806762695, "learning_rate": 4.995663514246697e-06, "loss": 1.1466, "step": 312 }, { "epoch": 0.16465018411362442, "grad_norm": 2.217438220977783, "learning_rate": 4.9956225194885704e-06, "loss": 1.1908, "step": 313 }, { "epoch": 0.165176223040505, "grad_norm": 2.3328514099121094, "learning_rate": 4.995581332039778e-06, "loss": 1.0809, "step": 314 }, { "epoch": 0.16570226196738558, "grad_norm": 2.088467836380005, "learning_rate": 4.9955399519035e-06, "loss": 1.0908, "step": 315 }, { "epoch": 0.16622830089426618, "grad_norm": 2.2554612159729004, "learning_rate": 4.995498379082932e-06, "loss": 1.1702, "step": 316 }, { "epoch": 0.16675433982114676, "grad_norm": 2.2798142433166504, "learning_rate": 4.995456613581284e-06, "loss": 1.107, "step": 317 }, { "epoch": 0.16728037874802734, "grad_norm": 2.4394755363464355, "learning_rate": 4.9954146554017816e-06, "loss": 1.0881, "step": 318 }, { "epoch": 0.16780641767490795, "grad_norm": 2.1176295280456543, "learning_rate": 4.995372504547662e-06, "loss": 1.1177, "step": 319 }, { "epoch": 0.16833245660178853, "grad_norm": 2.141923189163208, "learning_rate": 4.995330161022181e-06, "loss": 1.1321, "step": 320 }, { "epoch": 0.16885849552866913, "grad_norm": 2.273068428039551, "learning_rate": 4.9952876248286086e-06, "loss": 1.1832, "step": 321 }, { "epoch": 0.1693845344555497, "grad_norm": 2.267636299133301, "learning_rate": 4.995244895970228e-06, "loss": 1.1058, "step": 322 }, { "epoch": 0.1699105733824303, "grad_norm": 2.133772850036621, "learning_rate": 4.99520197445034e-06, "loss": 1.1478, "step": 323 }, { "epoch": 0.1704366123093109, "grad_norm": 2.2782862186431885, "learning_rate": 4.995158860272257e-06, "loss": 1.1074, "step": 324 }, { "epoch": 0.17096265123619148, "grad_norm": 2.544316053390503, "learning_rate": 4.995115553439308e-06, "loss": 1.0583, "step": 325 }, { "epoch": 0.17148869016307206, "grad_norm": 2.2900187969207764, "learning_rate": 4.995072053954838e-06, "loss": 1.1933, "step": 326 }, { "epoch": 0.17201472908995266, "grad_norm": 2.190380811691284, "learning_rate": 4.995028361822206e-06, "loss": 1.135, "step": 327 }, { "epoch": 0.17254076801683324, "grad_norm": 2.4495794773101807, "learning_rate": 4.9949844770447834e-06, "loss": 1.1214, "step": 328 }, { "epoch": 0.17306680694371385, "grad_norm": 2.332644462585449, "learning_rate": 4.994940399625959e-06, "loss": 1.1017, "step": 329 }, { "epoch": 0.17359284587059443, "grad_norm": 2.0709457397460938, "learning_rate": 4.994896129569138e-06, "loss": 1.1073, "step": 330 }, { "epoch": 0.174118884797475, "grad_norm": 2.8817923069000244, "learning_rate": 4.994851666877736e-06, "loss": 1.0758, "step": 331 }, { "epoch": 0.1746449237243556, "grad_norm": 2.2557790279388428, "learning_rate": 4.994807011555189e-06, "loss": 1.173, "step": 332 }, { "epoch": 0.1751709626512362, "grad_norm": 2.2412662506103516, "learning_rate": 4.994762163604942e-06, "loss": 1.1357, "step": 333 }, { "epoch": 0.17569700157811677, "grad_norm": 2.1749277114868164, "learning_rate": 4.9947171230304595e-06, "loss": 1.0988, "step": 334 }, { "epoch": 0.17622304050499737, "grad_norm": 2.4530062675476074, "learning_rate": 4.994671889835218e-06, "loss": 1.1377, "step": 335 }, { "epoch": 0.17674907943187795, "grad_norm": 2.2602410316467285, "learning_rate": 4.994626464022711e-06, "loss": 1.0799, "step": 336 }, { "epoch": 0.17727511835875856, "grad_norm": 2.0797061920166016, "learning_rate": 4.994580845596446e-06, "loss": 1.1214, "step": 337 }, { "epoch": 0.17780115728563914, "grad_norm": 2.1437630653381348, "learning_rate": 4.994535034559945e-06, "loss": 1.1794, "step": 338 }, { "epoch": 0.17832719621251972, "grad_norm": 2.0809285640716553, "learning_rate": 4.994489030916745e-06, "loss": 1.1331, "step": 339 }, { "epoch": 0.17885323513940032, "grad_norm": 2.31193208694458, "learning_rate": 4.994442834670397e-06, "loss": 1.1425, "step": 340 }, { "epoch": 0.1793792740662809, "grad_norm": 2.0348451137542725, "learning_rate": 4.99439644582447e-06, "loss": 1.1149, "step": 341 }, { "epoch": 0.17990531299316148, "grad_norm": 2.2816810607910156, "learning_rate": 4.994349864382544e-06, "loss": 1.1509, "step": 342 }, { "epoch": 0.1804313519200421, "grad_norm": 2.08492374420166, "learning_rate": 4.994303090348217e-06, "loss": 1.0854, "step": 343 }, { "epoch": 0.18095739084692267, "grad_norm": 2.0389866828918457, "learning_rate": 4.994256123725098e-06, "loss": 1.1195, "step": 344 }, { "epoch": 0.18148342977380327, "grad_norm": 2.2040510177612305, "learning_rate": 4.9942089645168175e-06, "loss": 1.1112, "step": 345 }, { "epoch": 0.18200946870068385, "grad_norm": 2.058849811553955, "learning_rate": 4.994161612727013e-06, "loss": 1.1462, "step": 346 }, { "epoch": 0.18253550762756443, "grad_norm": 2.2940948009490967, "learning_rate": 4.994114068359343e-06, "loss": 1.2183, "step": 347 }, { "epoch": 0.18306154655444504, "grad_norm": 2.0303874015808105, "learning_rate": 4.9940663314174756e-06, "loss": 1.1136, "step": 348 }, { "epoch": 0.18358758548132562, "grad_norm": 2.208289861679077, "learning_rate": 4.9940184019051e-06, "loss": 1.1507, "step": 349 }, { "epoch": 0.1841136244082062, "grad_norm": 2.438228130340576, "learning_rate": 4.993970279825915e-06, "loss": 1.1619, "step": 350 }, { "epoch": 0.1846396633350868, "grad_norm": 2.1701645851135254, "learning_rate": 4.993921965183636e-06, "loss": 1.1057, "step": 351 }, { "epoch": 0.18516570226196738, "grad_norm": 2.345054864883423, "learning_rate": 4.9938734579819944e-06, "loss": 1.1758, "step": 352 }, { "epoch": 0.185691741188848, "grad_norm": 2.3761768341064453, "learning_rate": 4.9938247582247345e-06, "loss": 1.1093, "step": 353 }, { "epoch": 0.18621778011572857, "grad_norm": 2.2209126949310303, "learning_rate": 4.993775865915618e-06, "loss": 1.0882, "step": 354 }, { "epoch": 0.18674381904260914, "grad_norm": 2.093406915664673, "learning_rate": 4.993726781058419e-06, "loss": 1.1621, "step": 355 }, { "epoch": 0.18726985796948975, "grad_norm": 2.509725332260132, "learning_rate": 4.993677503656927e-06, "loss": 1.1411, "step": 356 }, { "epoch": 0.18779589689637033, "grad_norm": 2.2245242595672607, "learning_rate": 4.993628033714947e-06, "loss": 1.1042, "step": 357 }, { "epoch": 0.1883219358232509, "grad_norm": 1.838408350944519, "learning_rate": 4.9935783712363e-06, "loss": 1.0204, "step": 358 }, { "epoch": 0.18884797475013151, "grad_norm": 2.0559537410736084, "learning_rate": 4.993528516224818e-06, "loss": 1.0681, "step": 359 }, { "epoch": 0.1893740136770121, "grad_norm": 2.084890604019165, "learning_rate": 4.993478468684352e-06, "loss": 1.1149, "step": 360 }, { "epoch": 0.1899000526038927, "grad_norm": 2.179478168487549, "learning_rate": 4.993428228618767e-06, "loss": 1.1342, "step": 361 }, { "epoch": 0.19042609153077328, "grad_norm": 2.082578182220459, "learning_rate": 4.99337779603194e-06, "loss": 1.1293, "step": 362 }, { "epoch": 0.19095213045765386, "grad_norm": 2.031831979751587, "learning_rate": 4.993327170927766e-06, "loss": 1.0728, "step": 363 }, { "epoch": 0.19147816938453446, "grad_norm": 2.1939597129821777, "learning_rate": 4.993276353310155e-06, "loss": 1.1252, "step": 364 }, { "epoch": 0.19200420831141504, "grad_norm": 2.031350612640381, "learning_rate": 4.9932253431830295e-06, "loss": 1.1039, "step": 365 }, { "epoch": 0.19253024723829562, "grad_norm": 2.3367671966552734, "learning_rate": 4.993174140550327e-06, "loss": 1.1211, "step": 366 }, { "epoch": 0.19305628616517623, "grad_norm": 2.2768945693969727, "learning_rate": 4.993122745416003e-06, "loss": 1.1119, "step": 367 }, { "epoch": 0.1935823250920568, "grad_norm": 2.220766544342041, "learning_rate": 4.993071157784025e-06, "loss": 1.1451, "step": 368 }, { "epoch": 0.1941083640189374, "grad_norm": 2.3694369792938232, "learning_rate": 4.993019377658376e-06, "loss": 1.1156, "step": 369 }, { "epoch": 0.194634402945818, "grad_norm": 2.245237350463867, "learning_rate": 4.9929674050430535e-06, "loss": 1.1316, "step": 370 }, { "epoch": 0.19516044187269857, "grad_norm": 2.720625400543213, "learning_rate": 4.992915239942071e-06, "loss": 1.1092, "step": 371 }, { "epoch": 0.19568648079957918, "grad_norm": 2.115727424621582, "learning_rate": 4.992862882359457e-06, "loss": 1.1769, "step": 372 }, { "epoch": 0.19621251972645976, "grad_norm": 2.235677480697632, "learning_rate": 4.992810332299253e-06, "loss": 1.1786, "step": 373 }, { "epoch": 0.19673855865334033, "grad_norm": 2.539433002471924, "learning_rate": 4.992757589765516e-06, "loss": 1.1251, "step": 374 }, { "epoch": 0.19726459758022094, "grad_norm": 5.042508602142334, "learning_rate": 4.99270465476232e-06, "loss": 1.0706, "step": 375 }, { "epoch": 0.19779063650710152, "grad_norm": 2.1171703338623047, "learning_rate": 4.9926515272937516e-06, "loss": 1.1287, "step": 376 }, { "epoch": 0.19831667543398213, "grad_norm": 2.4587223529815674, "learning_rate": 4.992598207363912e-06, "loss": 1.053, "step": 377 }, { "epoch": 0.1988427143608627, "grad_norm": 2.1502695083618164, "learning_rate": 4.9925446949769184e-06, "loss": 1.0837, "step": 378 }, { "epoch": 0.19936875328774328, "grad_norm": 2.139822483062744, "learning_rate": 4.992490990136903e-06, "loss": 1.1358, "step": 379 }, { "epoch": 0.1998947922146239, "grad_norm": 2.4914610385894775, "learning_rate": 4.992437092848012e-06, "loss": 1.1053, "step": 380 }, { "epoch": 0.20042083114150447, "grad_norm": 2.24576735496521, "learning_rate": 4.992383003114408e-06, "loss": 1.1034, "step": 381 }, { "epoch": 0.20094687006838505, "grad_norm": 2.1979477405548096, "learning_rate": 4.992328720940266e-06, "loss": 1.0839, "step": 382 }, { "epoch": 0.20147290899526565, "grad_norm": 2.1680850982666016, "learning_rate": 4.992274246329778e-06, "loss": 1.1011, "step": 383 }, { "epoch": 0.20199894792214623, "grad_norm": 2.3214027881622314, "learning_rate": 4.9922195792871495e-06, "loss": 1.03, "step": 384 }, { "epoch": 0.20252498684902684, "grad_norm": 2.162393808364868, "learning_rate": 4.9921647198166014e-06, "loss": 1.0466, "step": 385 }, { "epoch": 0.20305102577590742, "grad_norm": 2.184163808822632, "learning_rate": 4.99210966792237e-06, "loss": 1.1379, "step": 386 }, { "epoch": 0.203577064702788, "grad_norm": 2.3308913707733154, "learning_rate": 4.992054423608706e-06, "loss": 1.1751, "step": 387 }, { "epoch": 0.2041031036296686, "grad_norm": 2.123298168182373, "learning_rate": 4.991998986879874e-06, "loss": 1.1079, "step": 388 }, { "epoch": 0.20462914255654918, "grad_norm": 2.229844331741333, "learning_rate": 4.991943357740155e-06, "loss": 1.1242, "step": 389 }, { "epoch": 0.20515518148342976, "grad_norm": 2.1815683841705322, "learning_rate": 4.991887536193845e-06, "loss": 1.0949, "step": 390 }, { "epoch": 0.20568122041031037, "grad_norm": 2.4636261463165283, "learning_rate": 4.991831522245253e-06, "loss": 1.1118, "step": 391 }, { "epoch": 0.20620725933719095, "grad_norm": 2.0095014572143555, "learning_rate": 4.991775315898703e-06, "loss": 1.0197, "step": 392 }, { "epoch": 0.20673329826407155, "grad_norm": 2.1244406700134277, "learning_rate": 4.991718917158538e-06, "loss": 1.1081, "step": 393 }, { "epoch": 0.20725933719095213, "grad_norm": 1.9773920774459839, "learning_rate": 4.991662326029109e-06, "loss": 1.0657, "step": 394 }, { "epoch": 0.2077853761178327, "grad_norm": 2.204554796218872, "learning_rate": 4.9916055425147874e-06, "loss": 1.1434, "step": 395 }, { "epoch": 0.20831141504471332, "grad_norm": 2.068147659301758, "learning_rate": 4.991548566619957e-06, "loss": 1.1281, "step": 396 }, { "epoch": 0.2088374539715939, "grad_norm": 2.1518101692199707, "learning_rate": 4.991491398349017e-06, "loss": 1.0977, "step": 397 }, { "epoch": 0.20936349289847447, "grad_norm": 2.091654062271118, "learning_rate": 4.991434037706382e-06, "loss": 1.1033, "step": 398 }, { "epoch": 0.20988953182535508, "grad_norm": 2.8754067420959473, "learning_rate": 4.9913764846964805e-06, "loss": 1.1237, "step": 399 }, { "epoch": 0.21041557075223566, "grad_norm": 2.2165675163269043, "learning_rate": 4.991318739323757e-06, "loss": 1.1298, "step": 400 }, { "epoch": 0.21094160967911627, "grad_norm": 2.1219065189361572, "learning_rate": 4.991260801592668e-06, "loss": 1.0795, "step": 401 }, { "epoch": 0.21146764860599684, "grad_norm": 2.132737159729004, "learning_rate": 4.9912026715076885e-06, "loss": 1.0546, "step": 402 }, { "epoch": 0.21199368753287742, "grad_norm": 2.228076457977295, "learning_rate": 4.9911443490733075e-06, "loss": 1.1759, "step": 403 }, { "epoch": 0.21251972645975803, "grad_norm": 2.1305177211761475, "learning_rate": 4.991085834294027e-06, "loss": 1.0865, "step": 404 }, { "epoch": 0.2130457653866386, "grad_norm": 2.1550936698913574, "learning_rate": 4.991027127174365e-06, "loss": 1.1027, "step": 405 }, { "epoch": 0.2135718043135192, "grad_norm": 2.3489346504211426, "learning_rate": 4.990968227718854e-06, "loss": 1.184, "step": 406 }, { "epoch": 0.2140978432403998, "grad_norm": 2.2208189964294434, "learning_rate": 4.9909091359320434e-06, "loss": 1.1476, "step": 407 }, { "epoch": 0.21462388216728037, "grad_norm": 2.230978012084961, "learning_rate": 4.990849851818494e-06, "loss": 1.1125, "step": 408 }, { "epoch": 0.21514992109416098, "grad_norm": 2.294647216796875, "learning_rate": 4.990790375382784e-06, "loss": 1.1526, "step": 409 }, { "epoch": 0.21567596002104156, "grad_norm": 2.160446882247925, "learning_rate": 4.990730706629507e-06, "loss": 1.1569, "step": 410 }, { "epoch": 0.21620199894792214, "grad_norm": 2.1352434158325195, "learning_rate": 4.990670845563268e-06, "loss": 1.049, "step": 411 }, { "epoch": 0.21672803787480274, "grad_norm": 2.0740866661071777, "learning_rate": 4.99061079218869e-06, "loss": 1.104, "step": 412 }, { "epoch": 0.21725407680168332, "grad_norm": 2.302877426147461, "learning_rate": 4.990550546510408e-06, "loss": 1.0942, "step": 413 }, { "epoch": 0.2177801157285639, "grad_norm": 2.270836353302002, "learning_rate": 4.990490108533076e-06, "loss": 1.107, "step": 414 }, { "epoch": 0.2183061546554445, "grad_norm": 2.05703067779541, "learning_rate": 4.99042947826136e-06, "loss": 1.1284, "step": 415 }, { "epoch": 0.21883219358232509, "grad_norm": 2.3524155616760254, "learning_rate": 4.990368655699941e-06, "loss": 1.068, "step": 416 }, { "epoch": 0.2193582325092057, "grad_norm": 2.5300350189208984, "learning_rate": 4.9903076408535145e-06, "loss": 1.0993, "step": 417 }, { "epoch": 0.21988427143608627, "grad_norm": 2.1858162879943848, "learning_rate": 4.990246433726793e-06, "loss": 1.1398, "step": 418 }, { "epoch": 0.22041031036296685, "grad_norm": 1.9856489896774292, "learning_rate": 4.990185034324501e-06, "loss": 1.0671, "step": 419 }, { "epoch": 0.22093634928984746, "grad_norm": 2.177152156829834, "learning_rate": 4.99012344265138e-06, "loss": 1.1673, "step": 420 }, { "epoch": 0.22146238821672803, "grad_norm": 2.128787040710449, "learning_rate": 4.990061658712186e-06, "loss": 1.1629, "step": 421 }, { "epoch": 0.2219884271436086, "grad_norm": 2.1840457916259766, "learning_rate": 4.989999682511688e-06, "loss": 1.0739, "step": 422 }, { "epoch": 0.22251446607048922, "grad_norm": 2.37825608253479, "learning_rate": 4.989937514054673e-06, "loss": 1.1179, "step": 423 }, { "epoch": 0.2230405049973698, "grad_norm": 2.2746498584747314, "learning_rate": 4.98987515334594e-06, "loss": 1.1117, "step": 424 }, { "epoch": 0.2235665439242504, "grad_norm": 2.441087007522583, "learning_rate": 4.989812600390304e-06, "loss": 1.134, "step": 425 }, { "epoch": 0.22409258285113098, "grad_norm": 1.9548932313919067, "learning_rate": 4.989749855192596e-06, "loss": 1.0962, "step": 426 }, { "epoch": 0.22461862177801156, "grad_norm": 2.382025957107544, "learning_rate": 4.989686917757659e-06, "loss": 1.1233, "step": 427 }, { "epoch": 0.22514466070489217, "grad_norm": 2.1739771366119385, "learning_rate": 4.989623788090353e-06, "loss": 1.0665, "step": 428 }, { "epoch": 0.22567069963177275, "grad_norm": 2.3246262073516846, "learning_rate": 4.989560466195553e-06, "loss": 1.0834, "step": 429 }, { "epoch": 0.22619673855865335, "grad_norm": 2.1649882793426514, "learning_rate": 4.9894969520781475e-06, "loss": 1.1144, "step": 430 }, { "epoch": 0.22672277748553393, "grad_norm": 2.307199001312256, "learning_rate": 4.98943324574304e-06, "loss": 1.2195, "step": 431 }, { "epoch": 0.2272488164124145, "grad_norm": 2.2414958477020264, "learning_rate": 4.989369347195151e-06, "loss": 1.0549, "step": 432 }, { "epoch": 0.22777485533929512, "grad_norm": 2.12762713432312, "learning_rate": 4.989305256439413e-06, "loss": 1.1185, "step": 433 }, { "epoch": 0.2283008942661757, "grad_norm": 2.1503520011901855, "learning_rate": 4.989240973480774e-06, "loss": 1.1294, "step": 434 }, { "epoch": 0.22882693319305628, "grad_norm": 2.1283833980560303, "learning_rate": 4.9891764983242e-06, "loss": 1.1154, "step": 435 }, { "epoch": 0.22935297211993688, "grad_norm": 2.239828109741211, "learning_rate": 4.9891118309746666e-06, "loss": 1.073, "step": 436 }, { "epoch": 0.22987901104681746, "grad_norm": 2.396672248840332, "learning_rate": 4.989046971437167e-06, "loss": 1.0916, "step": 437 }, { "epoch": 0.23040504997369807, "grad_norm": 2.1172304153442383, "learning_rate": 4.98898191971671e-06, "loss": 1.1001, "step": 438 }, { "epoch": 0.23093108890057865, "grad_norm": 2.1714346408843994, "learning_rate": 4.98891667581832e-06, "loss": 1.1672, "step": 439 }, { "epoch": 0.23145712782745922, "grad_norm": 2.058523178100586, "learning_rate": 4.98885123974703e-06, "loss": 1.0842, "step": 440 }, { "epoch": 0.23198316675433983, "grad_norm": 2.4147160053253174, "learning_rate": 4.988785611507896e-06, "loss": 1.0755, "step": 441 }, { "epoch": 0.2325092056812204, "grad_norm": 2.274296283721924, "learning_rate": 4.988719791105985e-06, "loss": 1.1141, "step": 442 }, { "epoch": 0.233035244608101, "grad_norm": 2.178182363510132, "learning_rate": 4.988653778546379e-06, "loss": 1.212, "step": 443 }, { "epoch": 0.2335612835349816, "grad_norm": 2.200793743133545, "learning_rate": 4.988587573834173e-06, "loss": 1.0992, "step": 444 }, { "epoch": 0.23408732246186217, "grad_norm": 1.9726881980895996, "learning_rate": 4.98852117697448e-06, "loss": 1.1165, "step": 445 }, { "epoch": 0.23461336138874278, "grad_norm": 2.1173300743103027, "learning_rate": 4.988454587972428e-06, "loss": 1.1162, "step": 446 }, { "epoch": 0.23513940031562336, "grad_norm": 2.1428768634796143, "learning_rate": 4.9883878068331556e-06, "loss": 1.1343, "step": 447 }, { "epoch": 0.23566543924250394, "grad_norm": 2.00190806388855, "learning_rate": 4.988320833561822e-06, "loss": 1.0873, "step": 448 }, { "epoch": 0.23619147816938454, "grad_norm": 2.2472777366638184, "learning_rate": 4.988253668163596e-06, "loss": 1.1209, "step": 449 }, { "epoch": 0.23671751709626512, "grad_norm": 2.0522475242614746, "learning_rate": 4.988186310643666e-06, "loss": 1.0912, "step": 450 }, { "epoch": 0.2372435560231457, "grad_norm": 2.1521215438842773, "learning_rate": 4.98811876100723e-06, "loss": 1.0971, "step": 451 }, { "epoch": 0.2377695949500263, "grad_norm": 2.1117734909057617, "learning_rate": 4.988051019259505e-06, "loss": 1.1247, "step": 452 }, { "epoch": 0.2382956338769069, "grad_norm": 2.1884706020355225, "learning_rate": 4.987983085405722e-06, "loss": 1.1255, "step": 453 }, { "epoch": 0.2388216728037875, "grad_norm": 2.138962984085083, "learning_rate": 4.9879149594511245e-06, "loss": 1.0787, "step": 454 }, { "epoch": 0.23934771173066807, "grad_norm": 2.553452730178833, "learning_rate": 4.987846641400974e-06, "loss": 1.1178, "step": 455 }, { "epoch": 0.23987375065754865, "grad_norm": 2.5340464115142822, "learning_rate": 4.987778131260546e-06, "loss": 1.1577, "step": 456 }, { "epoch": 0.24039978958442926, "grad_norm": 2.2375919818878174, "learning_rate": 4.987709429035128e-06, "loss": 1.0711, "step": 457 }, { "epoch": 0.24092582851130984, "grad_norm": 2.35756254196167, "learning_rate": 4.987640534730027e-06, "loss": 1.1031, "step": 458 }, { "epoch": 0.24145186743819042, "grad_norm": 2.03385591506958, "learning_rate": 4.987571448350561e-06, "loss": 1.0869, "step": 459 }, { "epoch": 0.24197790636507102, "grad_norm": 2.662584066390991, "learning_rate": 4.987502169902065e-06, "loss": 1.0909, "step": 460 }, { "epoch": 0.2425039452919516, "grad_norm": 2.2569165229797363, "learning_rate": 4.987432699389888e-06, "loss": 1.1576, "step": 461 }, { "epoch": 0.2430299842188322, "grad_norm": 1.9718097448349, "learning_rate": 4.987363036819393e-06, "loss": 1.0577, "step": 462 }, { "epoch": 0.24355602314571279, "grad_norm": 2.2083537578582764, "learning_rate": 4.987293182195959e-06, "loss": 1.1328, "step": 463 }, { "epoch": 0.24408206207259336, "grad_norm": 2.2045726776123047, "learning_rate": 4.987223135524981e-06, "loss": 1.0908, "step": 464 }, { "epoch": 0.24460810099947397, "grad_norm": 2.213714122772217, "learning_rate": 4.987152896811866e-06, "loss": 1.124, "step": 465 }, { "epoch": 0.24513413992635455, "grad_norm": 4.030746936798096, "learning_rate": 4.987082466062038e-06, "loss": 1.0855, "step": 466 }, { "epoch": 0.24566017885323513, "grad_norm": 2.1142022609710693, "learning_rate": 4.987011843280934e-06, "loss": 1.1305, "step": 467 }, { "epoch": 0.24618621778011573, "grad_norm": 2.1746232509613037, "learning_rate": 4.986941028474009e-06, "loss": 1.0846, "step": 468 }, { "epoch": 0.2467122567069963, "grad_norm": 2.038947820663452, "learning_rate": 4.986870021646728e-06, "loss": 1.0907, "step": 469 }, { "epoch": 0.24723829563387692, "grad_norm": 12.261099815368652, "learning_rate": 4.986798822804576e-06, "loss": 1.1012, "step": 470 }, { "epoch": 0.2477643345607575, "grad_norm": 2.020077705383301, "learning_rate": 4.986727431953048e-06, "loss": 1.097, "step": 471 }, { "epoch": 0.24829037348763808, "grad_norm": 2.070114850997925, "learning_rate": 4.986655849097658e-06, "loss": 1.175, "step": 472 }, { "epoch": 0.24881641241451868, "grad_norm": 2.0364394187927246, "learning_rate": 4.986584074243932e-06, "loss": 1.0892, "step": 473 }, { "epoch": 0.24934245134139926, "grad_norm": 2.1961004734039307, "learning_rate": 4.986512107397413e-06, "loss": 1.0867, "step": 474 }, { "epoch": 0.24986849026827984, "grad_norm": 3.1488072872161865, "learning_rate": 4.986439948563656e-06, "loss": 1.0276, "step": 475 }, { "epoch": 0.2503945291951604, "grad_norm": 2.3070068359375, "learning_rate": 4.986367597748235e-06, "loss": 1.0897, "step": 476 }, { "epoch": 0.25092056812204105, "grad_norm": 2.0328757762908936, "learning_rate": 4.986295054956733e-06, "loss": 1.0573, "step": 477 }, { "epoch": 0.25144660704892163, "grad_norm": 2.4608747959136963, "learning_rate": 4.986222320194754e-06, "loss": 1.1343, "step": 478 }, { "epoch": 0.2519726459758022, "grad_norm": 2.249994993209839, "learning_rate": 4.986149393467913e-06, "loss": 1.0771, "step": 479 }, { "epoch": 0.2524986849026828, "grad_norm": 2.1573803424835205, "learning_rate": 4.98607627478184e-06, "loss": 1.0795, "step": 480 }, { "epoch": 0.25302472382956337, "grad_norm": 2.6239383220672607, "learning_rate": 4.986002964142182e-06, "loss": 1.0874, "step": 481 }, { "epoch": 0.253550762756444, "grad_norm": 2.0815794467926025, "learning_rate": 4.985929461554597e-06, "loss": 1.0729, "step": 482 }, { "epoch": 0.2540768016833246, "grad_norm": 2.156259059906006, "learning_rate": 4.985855767024763e-06, "loss": 1.0912, "step": 483 }, { "epoch": 0.25460284061020516, "grad_norm": 2.4136252403259277, "learning_rate": 4.985781880558369e-06, "loss": 1.1365, "step": 484 }, { "epoch": 0.25512887953708574, "grad_norm": 2.265622854232788, "learning_rate": 4.98570780216112e-06, "loss": 1.1218, "step": 485 }, { "epoch": 0.2556549184639663, "grad_norm": 2.1097841262817383, "learning_rate": 4.985633531838735e-06, "loss": 1.1238, "step": 486 }, { "epoch": 0.2561809573908469, "grad_norm": 2.205012083053589, "learning_rate": 4.985559069596949e-06, "loss": 1.0664, "step": 487 }, { "epoch": 0.25670699631772753, "grad_norm": 2.1896169185638428, "learning_rate": 4.9854844154415115e-06, "loss": 1.0374, "step": 488 }, { "epoch": 0.2572330352446081, "grad_norm": 2.0652949810028076, "learning_rate": 4.985409569378187e-06, "loss": 1.1016, "step": 489 }, { "epoch": 0.2577590741714887, "grad_norm": 2.1278676986694336, "learning_rate": 4.985334531412754e-06, "loss": 1.147, "step": 490 }, { "epoch": 0.25828511309836927, "grad_norm": 2.2769057750701904, "learning_rate": 4.985259301551005e-06, "loss": 1.1389, "step": 491 }, { "epoch": 0.25881115202524985, "grad_norm": 2.0440104007720947, "learning_rate": 4.985183879798751e-06, "loss": 1.0826, "step": 492 }, { "epoch": 0.2593371909521305, "grad_norm": 2.4153213500976562, "learning_rate": 4.985108266161815e-06, "loss": 1.105, "step": 493 }, { "epoch": 0.25986322987901106, "grad_norm": 2.3863043785095215, "learning_rate": 4.985032460646033e-06, "loss": 1.1023, "step": 494 }, { "epoch": 0.26038926880589164, "grad_norm": 2.2597336769104004, "learning_rate": 4.98495646325726e-06, "loss": 1.1046, "step": 495 }, { "epoch": 0.2609153077327722, "grad_norm": 2.541444778442383, "learning_rate": 4.984880274001364e-06, "loss": 1.1149, "step": 496 }, { "epoch": 0.2614413466596528, "grad_norm": 2.3011064529418945, "learning_rate": 4.984803892884227e-06, "loss": 1.0757, "step": 497 }, { "epoch": 0.26196738558653343, "grad_norm": 2.116774797439575, "learning_rate": 4.9847273199117475e-06, "loss": 1.1151, "step": 498 }, { "epoch": 0.262493424513414, "grad_norm": 2.2372357845306396, "learning_rate": 4.984650555089836e-06, "loss": 1.1107, "step": 499 }, { "epoch": 0.2630194634402946, "grad_norm": 2.0782155990600586, "learning_rate": 4.984573598424421e-06, "loss": 1.1174, "step": 500 }, { "epoch": 0.26354550236717517, "grad_norm": 2.0625476837158203, "learning_rate": 4.984496449921444e-06, "loss": 1.0965, "step": 501 }, { "epoch": 0.26407154129405574, "grad_norm": 2.142184019088745, "learning_rate": 4.9844191095868615e-06, "loss": 1.0678, "step": 502 }, { "epoch": 0.2645975802209363, "grad_norm": 2.1218082904815674, "learning_rate": 4.984341577426646e-06, "loss": 1.0661, "step": 503 }, { "epoch": 0.26512361914781696, "grad_norm": 2.2910757064819336, "learning_rate": 4.984263853446783e-06, "loss": 1.1111, "step": 504 }, { "epoch": 0.26564965807469754, "grad_norm": 2.0604546070098877, "learning_rate": 4.984185937653274e-06, "loss": 1.0614, "step": 505 }, { "epoch": 0.2661756970015781, "grad_norm": 2.1210556030273438, "learning_rate": 4.984107830052134e-06, "loss": 1.0925, "step": 506 }, { "epoch": 0.2667017359284587, "grad_norm": 2.535501003265381, "learning_rate": 4.984029530649396e-06, "loss": 1.1238, "step": 507 }, { "epoch": 0.2672277748553393, "grad_norm": 2.2978546619415283, "learning_rate": 4.9839510394511035e-06, "loss": 1.1615, "step": 508 }, { "epoch": 0.2677538137822199, "grad_norm": 2.0443382263183594, "learning_rate": 4.983872356463318e-06, "loss": 1.1087, "step": 509 }, { "epoch": 0.2682798527091005, "grad_norm": 2.216139316558838, "learning_rate": 4.983793481692114e-06, "loss": 1.1431, "step": 510 }, { "epoch": 0.26880589163598106, "grad_norm": 1.9255571365356445, "learning_rate": 4.983714415143583e-06, "loss": 1.0204, "step": 511 }, { "epoch": 0.26933193056286164, "grad_norm": 2.103969097137451, "learning_rate": 4.9836351568238286e-06, "loss": 1.0855, "step": 512 }, { "epoch": 0.2698579694897422, "grad_norm": 2.5458972454071045, "learning_rate": 4.98355570673897e-06, "loss": 1.0747, "step": 513 }, { "epoch": 0.27038400841662286, "grad_norm": 2.023601531982422, "learning_rate": 4.983476064895143e-06, "loss": 1.0471, "step": 514 }, { "epoch": 0.27091004734350344, "grad_norm": 2.0976908206939697, "learning_rate": 4.983396231298496e-06, "loss": 1.0658, "step": 515 }, { "epoch": 0.271436086270384, "grad_norm": 2.4051074981689453, "learning_rate": 4.9833162059551936e-06, "loss": 1.0624, "step": 516 }, { "epoch": 0.2719621251972646, "grad_norm": 2.0524230003356934, "learning_rate": 4.983235988871414e-06, "loss": 1.1261, "step": 517 }, { "epoch": 0.27248816412414517, "grad_norm": 2.1440162658691406, "learning_rate": 4.983155580053351e-06, "loss": 0.9893, "step": 518 }, { "epoch": 0.27301420305102575, "grad_norm": 2.1923670768737793, "learning_rate": 4.983074979507213e-06, "loss": 1.1066, "step": 519 }, { "epoch": 0.2735402419779064, "grad_norm": 2.2967565059661865, "learning_rate": 4.982994187239225e-06, "loss": 1.1256, "step": 520 }, { "epoch": 0.27406628090478696, "grad_norm": 2.0392587184906006, "learning_rate": 4.982913203255623e-06, "loss": 1.1026, "step": 521 }, { "epoch": 0.27459231983166754, "grad_norm": 2.371121644973755, "learning_rate": 4.9828320275626605e-06, "loss": 1.0607, "step": 522 }, { "epoch": 0.2751183587585481, "grad_norm": 2.082239866256714, "learning_rate": 4.982750660166606e-06, "loss": 1.0749, "step": 523 }, { "epoch": 0.2756443976854287, "grad_norm": 2.2039687633514404, "learning_rate": 4.98266910107374e-06, "loss": 1.0769, "step": 524 }, { "epoch": 0.27617043661230933, "grad_norm": 2.087859869003296, "learning_rate": 4.9825873502903625e-06, "loss": 1.1575, "step": 525 }, { "epoch": 0.2766964755391899, "grad_norm": 2.1991021633148193, "learning_rate": 4.982505407822783e-06, "loss": 1.1149, "step": 526 }, { "epoch": 0.2772225144660705, "grad_norm": 2.2656140327453613, "learning_rate": 4.98242327367733e-06, "loss": 1.0948, "step": 527 }, { "epoch": 0.27774855339295107, "grad_norm": 2.1107430458068848, "learning_rate": 4.982340947860344e-06, "loss": 1.0289, "step": 528 }, { "epoch": 0.27827459231983165, "grad_norm": 2.2510344982147217, "learning_rate": 4.982258430378184e-06, "loss": 1.0694, "step": 529 }, { "epoch": 0.2788006312467123, "grad_norm": 2.252258062362671, "learning_rate": 4.982175721237218e-06, "loss": 1.0435, "step": 530 }, { "epoch": 0.27932667017359286, "grad_norm": 2.12455677986145, "learning_rate": 4.982092820443834e-06, "loss": 1.0202, "step": 531 }, { "epoch": 0.27985270910047344, "grad_norm": 2.3654651641845703, "learning_rate": 4.982009728004433e-06, "loss": 1.1282, "step": 532 }, { "epoch": 0.280378748027354, "grad_norm": 2.3759138584136963, "learning_rate": 4.981926443925431e-06, "loss": 1.1557, "step": 533 }, { "epoch": 0.2809047869542346, "grad_norm": 1.9874821901321411, "learning_rate": 4.981842968213256e-06, "loss": 1.0723, "step": 534 }, { "epoch": 0.2814308258811152, "grad_norm": 2.154383897781372, "learning_rate": 4.981759300874356e-06, "loss": 1.0786, "step": 535 }, { "epoch": 0.2819568648079958, "grad_norm": 2.1774797439575195, "learning_rate": 4.9816754419151906e-06, "loss": 1.0457, "step": 536 }, { "epoch": 0.2824829037348764, "grad_norm": 2.206082820892334, "learning_rate": 4.981591391342233e-06, "loss": 1.0216, "step": 537 }, { "epoch": 0.28300894266175697, "grad_norm": 2.008676528930664, "learning_rate": 4.981507149161975e-06, "loss": 1.0297, "step": 538 }, { "epoch": 0.28353498158863755, "grad_norm": 2.0553462505340576, "learning_rate": 4.981422715380919e-06, "loss": 1.0967, "step": 539 }, { "epoch": 0.2840610205155181, "grad_norm": 2.047567844390869, "learning_rate": 4.981338090005586e-06, "loss": 1.0524, "step": 540 }, { "epoch": 0.28458705944239876, "grad_norm": 2.2144312858581543, "learning_rate": 4.981253273042509e-06, "loss": 1.1178, "step": 541 }, { "epoch": 0.28511309836927934, "grad_norm": 2.388124465942383, "learning_rate": 4.981168264498238e-06, "loss": 1.0728, "step": 542 }, { "epoch": 0.2856391372961599, "grad_norm": 2.152280807495117, "learning_rate": 4.981083064379335e-06, "loss": 1.1146, "step": 543 }, { "epoch": 0.2861651762230405, "grad_norm": 2.1481564044952393, "learning_rate": 4.98099767269238e-06, "loss": 1.1376, "step": 544 }, { "epoch": 0.2866912151499211, "grad_norm": 2.060664415359497, "learning_rate": 4.980912089443966e-06, "loss": 1.0961, "step": 545 }, { "epoch": 0.2872172540768017, "grad_norm": 2.032557964324951, "learning_rate": 4.9808263146406985e-06, "loss": 1.1055, "step": 546 }, { "epoch": 0.2877432930036823, "grad_norm": 2.0957093238830566, "learning_rate": 4.980740348289204e-06, "loss": 1.0444, "step": 547 }, { "epoch": 0.28826933193056287, "grad_norm": 2.0774853229522705, "learning_rate": 4.980654190396118e-06, "loss": 1.0963, "step": 548 }, { "epoch": 0.28879537085744345, "grad_norm": 2.0808207988739014, "learning_rate": 4.980567840968094e-06, "loss": 1.0634, "step": 549 }, { "epoch": 0.289321409784324, "grad_norm": 2.2924559116363525, "learning_rate": 4.980481300011797e-06, "loss": 1.0805, "step": 550 }, { "epoch": 0.2898474487112046, "grad_norm": 2.041088104248047, "learning_rate": 4.980394567533911e-06, "loss": 1.0983, "step": 551 }, { "epoch": 0.29037348763808524, "grad_norm": 2.030073881149292, "learning_rate": 4.980307643541132e-06, "loss": 1.1334, "step": 552 }, { "epoch": 0.2908995265649658, "grad_norm": 2.15849232673645, "learning_rate": 4.980220528040172e-06, "loss": 1.0906, "step": 553 }, { "epoch": 0.2914255654918464, "grad_norm": 2.094135284423828, "learning_rate": 4.9801332210377574e-06, "loss": 1.0644, "step": 554 }, { "epoch": 0.291951604418727, "grad_norm": 2.193941354751587, "learning_rate": 4.980045722540628e-06, "loss": 1.0819, "step": 555 }, { "epoch": 0.29247764334560755, "grad_norm": 2.2015504837036133, "learning_rate": 4.979958032555542e-06, "loss": 1.0759, "step": 556 }, { "epoch": 0.2930036822724882, "grad_norm": 2.1240222454071045, "learning_rate": 4.979870151089267e-06, "loss": 1.1268, "step": 557 }, { "epoch": 0.29352972119936876, "grad_norm": 2.0243959426879883, "learning_rate": 4.9797820781485905e-06, "loss": 1.0449, "step": 558 }, { "epoch": 0.29405576012624934, "grad_norm": 2.2300705909729004, "learning_rate": 4.979693813740313e-06, "loss": 1.0493, "step": 559 }, { "epoch": 0.2945817990531299, "grad_norm": 2.1185836791992188, "learning_rate": 4.979605357871249e-06, "loss": 1.0921, "step": 560 }, { "epoch": 0.2951078379800105, "grad_norm": 2.091691732406616, "learning_rate": 4.979516710548227e-06, "loss": 1.1025, "step": 561 }, { "epoch": 0.29563387690689114, "grad_norm": 2.1666178703308105, "learning_rate": 4.979427871778094e-06, "loss": 1.1245, "step": 562 }, { "epoch": 0.2961599158337717, "grad_norm": 2.6985056400299072, "learning_rate": 4.9793388415677066e-06, "loss": 1.1398, "step": 563 }, { "epoch": 0.2966859547606523, "grad_norm": 2.118074655532837, "learning_rate": 4.979249619923942e-06, "loss": 1.0897, "step": 564 }, { "epoch": 0.29721199368753287, "grad_norm": 2.246856927871704, "learning_rate": 4.979160206853687e-06, "loss": 1.0714, "step": 565 }, { "epoch": 0.29773803261441345, "grad_norm": 2.201953887939453, "learning_rate": 4.979070602363846e-06, "loss": 1.1466, "step": 566 }, { "epoch": 0.29826407154129403, "grad_norm": 2.048617362976074, "learning_rate": 4.9789808064613375e-06, "loss": 1.1368, "step": 567 }, { "epoch": 0.29879011046817466, "grad_norm": 2.1507785320281982, "learning_rate": 4.978890819153095e-06, "loss": 1.1499, "step": 568 }, { "epoch": 0.29931614939505524, "grad_norm": 1.9633440971374512, "learning_rate": 4.978800640446066e-06, "loss": 1.0667, "step": 569 }, { "epoch": 0.2998421883219358, "grad_norm": 2.1089606285095215, "learning_rate": 4.978710270347214e-06, "loss": 1.0611, "step": 570 }, { "epoch": 0.3003682272488164, "grad_norm": 2.170901298522949, "learning_rate": 4.9786197088635145e-06, "loss": 1.1524, "step": 571 }, { "epoch": 0.300894266175697, "grad_norm": 2.165510892868042, "learning_rate": 4.978528956001964e-06, "loss": 1.0987, "step": 572 }, { "epoch": 0.3014203051025776, "grad_norm": 2.0415878295898438, "learning_rate": 4.978438011769565e-06, "loss": 1.1582, "step": 573 }, { "epoch": 0.3019463440294582, "grad_norm": 2.110260248184204, "learning_rate": 4.978346876173342e-06, "loss": 1.0587, "step": 574 }, { "epoch": 0.30247238295633877, "grad_norm": 2.253488063812256, "learning_rate": 4.9782555492203334e-06, "loss": 1.1038, "step": 575 }, { "epoch": 0.30299842188321935, "grad_norm": 2.0166091918945312, "learning_rate": 4.978164030917587e-06, "loss": 1.0367, "step": 576 }, { "epoch": 0.3035244608100999, "grad_norm": 2.2842600345611572, "learning_rate": 4.978072321272171e-06, "loss": 1.0996, "step": 577 }, { "epoch": 0.30405049973698056, "grad_norm": 2.0563907623291016, "learning_rate": 4.977980420291166e-06, "loss": 1.1219, "step": 578 }, { "epoch": 0.30457653866386114, "grad_norm": 2.059800863265991, "learning_rate": 4.977888327981668e-06, "loss": 1.1193, "step": 579 }, { "epoch": 0.3051025775907417, "grad_norm": 2.242919921875, "learning_rate": 4.977796044350788e-06, "loss": 1.0701, "step": 580 }, { "epoch": 0.3056286165176223, "grad_norm": 1.9749282598495483, "learning_rate": 4.977703569405651e-06, "loss": 1.0771, "step": 581 }, { "epoch": 0.3061546554445029, "grad_norm": 2.2251386642456055, "learning_rate": 4.977610903153397e-06, "loss": 1.084, "step": 582 }, { "epoch": 0.30668069437138346, "grad_norm": 2.0289855003356934, "learning_rate": 4.97751804560118e-06, "loss": 1.0732, "step": 583 }, { "epoch": 0.3072067332982641, "grad_norm": 2.152841806411743, "learning_rate": 4.977424996756171e-06, "loss": 1.0712, "step": 584 }, { "epoch": 0.30773277222514467, "grad_norm": 2.3243937492370605, "learning_rate": 4.977331756625555e-06, "loss": 1.0197, "step": 585 }, { "epoch": 0.30825881115202525, "grad_norm": 2.293274402618408, "learning_rate": 4.97723832521653e-06, "loss": 1.1121, "step": 586 }, { "epoch": 0.3087848500789058, "grad_norm": 2.139958143234253, "learning_rate": 4.97714470253631e-06, "loss": 1.0799, "step": 587 }, { "epoch": 0.3093108890057864, "grad_norm": 2.269357442855835, "learning_rate": 4.977050888592123e-06, "loss": 1.0872, "step": 588 }, { "epoch": 0.30983692793266704, "grad_norm": 2.268691301345825, "learning_rate": 4.976956883391215e-06, "loss": 1.1079, "step": 589 }, { "epoch": 0.3103629668595476, "grad_norm": 2.127131223678589, "learning_rate": 4.976862686940842e-06, "loss": 1.1217, "step": 590 }, { "epoch": 0.3108890057864282, "grad_norm": 2.0126006603240967, "learning_rate": 4.976768299248278e-06, "loss": 1.0719, "step": 591 }, { "epoch": 0.3114150447133088, "grad_norm": 1.965903639793396, "learning_rate": 4.97667372032081e-06, "loss": 1.0843, "step": 592 }, { "epoch": 0.31194108364018935, "grad_norm": 2.1280322074890137, "learning_rate": 4.976578950165742e-06, "loss": 1.0676, "step": 593 }, { "epoch": 0.31246712256707, "grad_norm": 2.2355756759643555, "learning_rate": 4.976483988790391e-06, "loss": 1.0855, "step": 594 }, { "epoch": 0.31299316149395057, "grad_norm": 2.153095245361328, "learning_rate": 4.976388836202088e-06, "loss": 1.0357, "step": 595 }, { "epoch": 0.31351920042083115, "grad_norm": 2.023137092590332, "learning_rate": 4.97629349240818e-06, "loss": 1.0381, "step": 596 }, { "epoch": 0.3140452393477117, "grad_norm": 2.2524759769439697, "learning_rate": 4.97619795741603e-06, "loss": 1.0911, "step": 597 }, { "epoch": 0.3145712782745923, "grad_norm": 2.1904008388519287, "learning_rate": 4.9761022312330135e-06, "loss": 1.047, "step": 598 }, { "epoch": 0.3150973172014729, "grad_norm": 2.3166565895080566, "learning_rate": 4.976006313866521e-06, "loss": 1.0663, "step": 599 }, { "epoch": 0.3156233561283535, "grad_norm": 2.11413836479187, "learning_rate": 4.975910205323959e-06, "loss": 1.0843, "step": 600 }, { "epoch": 0.3161493950552341, "grad_norm": 2.1609344482421875, "learning_rate": 4.975813905612749e-06, "loss": 1.1344, "step": 601 }, { "epoch": 0.3166754339821147, "grad_norm": 2.055330276489258, "learning_rate": 4.975717414740326e-06, "loss": 1.0663, "step": 602 }, { "epoch": 0.31720147290899525, "grad_norm": 2.2735755443573, "learning_rate": 4.975620732714139e-06, "loss": 1.1061, "step": 603 }, { "epoch": 0.31772751183587583, "grad_norm": 2.1966300010681152, "learning_rate": 4.975523859541654e-06, "loss": 1.1498, "step": 604 }, { "epoch": 0.31825355076275647, "grad_norm": 2.20951247215271, "learning_rate": 4.975426795230351e-06, "loss": 1.1057, "step": 605 }, { "epoch": 0.31877958968963704, "grad_norm": 2.0706050395965576, "learning_rate": 4.975329539787725e-06, "loss": 1.0906, "step": 606 }, { "epoch": 0.3193056286165176, "grad_norm": 2.0394089221954346, "learning_rate": 4.975232093221284e-06, "loss": 1.0514, "step": 607 }, { "epoch": 0.3198316675433982, "grad_norm": 2.1639111042022705, "learning_rate": 4.975134455538551e-06, "loss": 1.0787, "step": 608 }, { "epoch": 0.3203577064702788, "grad_norm": 2.025575876235962, "learning_rate": 4.975036626747067e-06, "loss": 1.0451, "step": 609 }, { "epoch": 0.3208837453971594, "grad_norm": 2.060215950012207, "learning_rate": 4.974938606854384e-06, "loss": 1.0821, "step": 610 }, { "epoch": 0.32140978432404, "grad_norm": 2.265155792236328, "learning_rate": 4.974840395868073e-06, "loss": 1.1341, "step": 611 }, { "epoch": 0.32193582325092057, "grad_norm": 2.22503924369812, "learning_rate": 4.974741993795712e-06, "loss": 1.1643, "step": 612 }, { "epoch": 0.32246186217780115, "grad_norm": 2.11155104637146, "learning_rate": 4.9746434006449034e-06, "loss": 1.0548, "step": 613 }, { "epoch": 0.32298790110468173, "grad_norm": 2.0055696964263916, "learning_rate": 4.974544616423258e-06, "loss": 1.0769, "step": 614 }, { "epoch": 0.3235139400315623, "grad_norm": 2.0843770503997803, "learning_rate": 4.974445641138403e-06, "loss": 1.0701, "step": 615 }, { "epoch": 0.32403997895844294, "grad_norm": 2.0580337047576904, "learning_rate": 4.9743464747979785e-06, "loss": 1.0465, "step": 616 }, { "epoch": 0.3245660178853235, "grad_norm": 2.3719844818115234, "learning_rate": 4.974247117409645e-06, "loss": 1.1498, "step": 617 }, { "epoch": 0.3250920568122041, "grad_norm": 1.9926241636276245, "learning_rate": 4.974147568981072e-06, "loss": 1.081, "step": 618 }, { "epoch": 0.3256180957390847, "grad_norm": 2.029318332672119, "learning_rate": 4.974047829519946e-06, "loss": 1.139, "step": 619 }, { "epoch": 0.32614413466596526, "grad_norm": 2.0171804428100586, "learning_rate": 4.973947899033969e-06, "loss": 1.0887, "step": 620 }, { "epoch": 0.3266701735928459, "grad_norm": 2.3209071159362793, "learning_rate": 4.973847777530854e-06, "loss": 1.1156, "step": 621 }, { "epoch": 0.32719621251972647, "grad_norm": 2.360849142074585, "learning_rate": 4.973747465018334e-06, "loss": 1.1305, "step": 622 }, { "epoch": 0.32772225144660705, "grad_norm": 2.1828086376190186, "learning_rate": 4.973646961504154e-06, "loss": 1.091, "step": 623 }, { "epoch": 0.32824829037348763, "grad_norm": 1.9628446102142334, "learning_rate": 4.973546266996074e-06, "loss": 1.0932, "step": 624 }, { "epoch": 0.3287743293003682, "grad_norm": 2.0040283203125, "learning_rate": 4.973445381501868e-06, "loss": 1.0723, "step": 625 }, { "epoch": 0.32930036822724884, "grad_norm": 2.289292097091675, "learning_rate": 4.973344305029326e-06, "loss": 1.1526, "step": 626 }, { "epoch": 0.3298264071541294, "grad_norm": 2.1106910705566406, "learning_rate": 4.973243037586252e-06, "loss": 1.1327, "step": 627 }, { "epoch": 0.33035244608101, "grad_norm": 2.326677083969116, "learning_rate": 4.9731415791804655e-06, "loss": 1.0898, "step": 628 }, { "epoch": 0.3308784850078906, "grad_norm": 2.086299180984497, "learning_rate": 4.9730399298198e-06, "loss": 1.0842, "step": 629 }, { "epoch": 0.33140452393477116, "grad_norm": 2.045738935470581, "learning_rate": 4.972938089512104e-06, "loss": 1.0156, "step": 630 }, { "epoch": 0.33193056286165173, "grad_norm": 2.038058280944824, "learning_rate": 4.97283605826524e-06, "loss": 1.0545, "step": 631 }, { "epoch": 0.33245660178853237, "grad_norm": 2.0892717838287354, "learning_rate": 4.972733836087088e-06, "loss": 1.099, "step": 632 }, { "epoch": 0.33298264071541295, "grad_norm": 2.2152934074401855, "learning_rate": 4.972631422985538e-06, "loss": 1.0775, "step": 633 }, { "epoch": 0.3335086796422935, "grad_norm": 2.3605494499206543, "learning_rate": 4.9725288189685e-06, "loss": 1.0682, "step": 634 }, { "epoch": 0.3340347185691741, "grad_norm": 2.076491117477417, "learning_rate": 4.9724260240438945e-06, "loss": 1.063, "step": 635 }, { "epoch": 0.3345607574960547, "grad_norm": 3.2677767276763916, "learning_rate": 4.97232303821966e-06, "loss": 1.1173, "step": 636 }, { "epoch": 0.3350867964229353, "grad_norm": 2.110320568084717, "learning_rate": 4.972219861503746e-06, "loss": 1.0264, "step": 637 }, { "epoch": 0.3356128353498159, "grad_norm": 2.101353406906128, "learning_rate": 4.972116493904121e-06, "loss": 1.0806, "step": 638 }, { "epoch": 0.3361388742766965, "grad_norm": 2.247091293334961, "learning_rate": 4.972012935428765e-06, "loss": 1.1178, "step": 639 }, { "epoch": 0.33666491320357705, "grad_norm": 2.183757781982422, "learning_rate": 4.971909186085675e-06, "loss": 1.0615, "step": 640 }, { "epoch": 0.33719095213045763, "grad_norm": 2.0801236629486084, "learning_rate": 4.97180524588286e-06, "loss": 1.0441, "step": 641 }, { "epoch": 0.33771699105733827, "grad_norm": 1.9939873218536377, "learning_rate": 4.9717011148283455e-06, "loss": 1.0853, "step": 642 }, { "epoch": 0.33824302998421885, "grad_norm": 2.13399338722229, "learning_rate": 4.971596792930174e-06, "loss": 0.9943, "step": 643 }, { "epoch": 0.3387690689110994, "grad_norm": 2.1221766471862793, "learning_rate": 4.971492280196397e-06, "loss": 1.0088, "step": 644 }, { "epoch": 0.33929510783798, "grad_norm": 2.023320436477661, "learning_rate": 4.971387576635087e-06, "loss": 1.0449, "step": 645 }, { "epoch": 0.3398211467648606, "grad_norm": 2.1422126293182373, "learning_rate": 4.971282682254327e-06, "loss": 1.0987, "step": 646 }, { "epoch": 0.3403471856917412, "grad_norm": 2.136868715286255, "learning_rate": 4.971177597062215e-06, "loss": 1.0983, "step": 647 }, { "epoch": 0.3408732246186218, "grad_norm": 2.1036930084228516, "learning_rate": 4.971072321066868e-06, "loss": 1.1284, "step": 648 }, { "epoch": 0.3413992635455024, "grad_norm": 2.147191286087036, "learning_rate": 4.970966854276411e-06, "loss": 1.1165, "step": 649 }, { "epoch": 0.34192530247238295, "grad_norm": 2.1734893321990967, "learning_rate": 4.970861196698988e-06, "loss": 1.0834, "step": 650 }, { "epoch": 0.34245134139926353, "grad_norm": 2.038435459136963, "learning_rate": 4.97075534834276e-06, "loss": 1.0193, "step": 651 }, { "epoch": 0.3429773803261441, "grad_norm": 2.077822208404541, "learning_rate": 4.970649309215895e-06, "loss": 1.0697, "step": 652 }, { "epoch": 0.34350341925302474, "grad_norm": 2.056907892227173, "learning_rate": 4.970543079326584e-06, "loss": 1.0593, "step": 653 }, { "epoch": 0.3440294581799053, "grad_norm": 2.7795369625091553, "learning_rate": 4.9704366586830275e-06, "loss": 1.122, "step": 654 }, { "epoch": 0.3445554971067859, "grad_norm": 2.0807559490203857, "learning_rate": 4.970330047293443e-06, "loss": 1.0225, "step": 655 }, { "epoch": 0.3450815360336665, "grad_norm": 2.219024658203125, "learning_rate": 4.970223245166062e-06, "loss": 1.1506, "step": 656 }, { "epoch": 0.34560757496054706, "grad_norm": 2.1809475421905518, "learning_rate": 4.970116252309131e-06, "loss": 1.1094, "step": 657 }, { "epoch": 0.3461336138874277, "grad_norm": 2.243777275085449, "learning_rate": 4.970009068730911e-06, "loss": 1.0942, "step": 658 }, { "epoch": 0.3466596528143083, "grad_norm": 2.106391191482544, "learning_rate": 4.969901694439677e-06, "loss": 1.0899, "step": 659 }, { "epoch": 0.34718569174118885, "grad_norm": 2.1109979152679443, "learning_rate": 4.96979412944372e-06, "loss": 1.0622, "step": 660 }, { "epoch": 0.34771173066806943, "grad_norm": 2.292466163635254, "learning_rate": 4.969686373751347e-06, "loss": 1.1081, "step": 661 }, { "epoch": 0.34823776959495, "grad_norm": 1.9919096231460571, "learning_rate": 4.9695784273708755e-06, "loss": 1.0774, "step": 662 }, { "epoch": 0.34876380852183064, "grad_norm": 2.2421789169311523, "learning_rate": 4.969470290310641e-06, "loss": 1.0958, "step": 663 }, { "epoch": 0.3492898474487112, "grad_norm": 2.069939613342285, "learning_rate": 4.969361962578994e-06, "loss": 1.0758, "step": 664 }, { "epoch": 0.3498158863755918, "grad_norm": 2.0892951488494873, "learning_rate": 4.969253444184297e-06, "loss": 1.105, "step": 665 }, { "epoch": 0.3503419253024724, "grad_norm": 2.1536753177642822, "learning_rate": 4.969144735134929e-06, "loss": 1.0655, "step": 666 }, { "epoch": 0.35086796422935296, "grad_norm": 2.031996250152588, "learning_rate": 4.969035835439284e-06, "loss": 1.1107, "step": 667 }, { "epoch": 0.35139400315623354, "grad_norm": 2.068693161010742, "learning_rate": 4.9689267451057714e-06, "loss": 1.0293, "step": 668 }, { "epoch": 0.35192004208311417, "grad_norm": 2.1489906311035156, "learning_rate": 4.9688174641428136e-06, "loss": 1.0656, "step": 669 }, { "epoch": 0.35244608100999475, "grad_norm": 2.5132720470428467, "learning_rate": 4.9687079925588475e-06, "loss": 1.0558, "step": 670 }, { "epoch": 0.35297211993687533, "grad_norm": 1.9639642238616943, "learning_rate": 4.968598330362326e-06, "loss": 1.0498, "step": 671 }, { "epoch": 0.3534981588637559, "grad_norm": 2.2413175106048584, "learning_rate": 4.968488477561716e-06, "loss": 0.986, "step": 672 }, { "epoch": 0.3540241977906365, "grad_norm": 2.0109381675720215, "learning_rate": 4.968378434165501e-06, "loss": 1.1112, "step": 673 }, { "epoch": 0.3545502367175171, "grad_norm": 2.1863934993743896, "learning_rate": 4.968268200182175e-06, "loss": 1.0843, "step": 674 }, { "epoch": 0.3550762756443977, "grad_norm": 2.262173652648926, "learning_rate": 4.968157775620252e-06, "loss": 1.0938, "step": 675 }, { "epoch": 0.3556023145712783, "grad_norm": 2.261918067932129, "learning_rate": 4.968047160488256e-06, "loss": 1.1004, "step": 676 }, { "epoch": 0.35612835349815886, "grad_norm": 2.13324236869812, "learning_rate": 4.967936354794728e-06, "loss": 1.0881, "step": 677 }, { "epoch": 0.35665439242503943, "grad_norm": 2.271207809448242, "learning_rate": 4.967825358548225e-06, "loss": 1.0967, "step": 678 }, { "epoch": 0.35718043135192007, "grad_norm": 2.177339553833008, "learning_rate": 4.967714171757315e-06, "loss": 1.1131, "step": 679 }, { "epoch": 0.35770647027880065, "grad_norm": 2.1329848766326904, "learning_rate": 4.967602794430585e-06, "loss": 1.112, "step": 680 }, { "epoch": 0.3582325092056812, "grad_norm": 2.0018250942230225, "learning_rate": 4.967491226576634e-06, "loss": 1.0853, "step": 681 }, { "epoch": 0.3587585481325618, "grad_norm": 2.06925106048584, "learning_rate": 4.967379468204075e-06, "loss": 1.1405, "step": 682 }, { "epoch": 0.3592845870594424, "grad_norm": 2.0437614917755127, "learning_rate": 4.967267519321538e-06, "loss": 1.1165, "step": 683 }, { "epoch": 0.35981062598632296, "grad_norm": 2.043297290802002, "learning_rate": 4.9671553799376685e-06, "loss": 1.0438, "step": 684 }, { "epoch": 0.3603366649132036, "grad_norm": 2.060760259628296, "learning_rate": 4.967043050061121e-06, "loss": 1.0401, "step": 685 }, { "epoch": 0.3608627038400842, "grad_norm": 2.3929009437561035, "learning_rate": 4.966930529700572e-06, "loss": 1.0812, "step": 686 }, { "epoch": 0.36138874276696475, "grad_norm": 2.2057461738586426, "learning_rate": 4.966817818864708e-06, "loss": 1.0499, "step": 687 }, { "epoch": 0.36191478169384533, "grad_norm": 2.0358550548553467, "learning_rate": 4.966704917562231e-06, "loss": 1.1603, "step": 688 }, { "epoch": 0.3624408206207259, "grad_norm": 2.0840682983398438, "learning_rate": 4.966591825801859e-06, "loss": 1.0967, "step": 689 }, { "epoch": 0.36296685954760655, "grad_norm": 2.0170061588287354, "learning_rate": 4.9664785435923255e-06, "loss": 1.0573, "step": 690 }, { "epoch": 0.3634928984744871, "grad_norm": 2.1349408626556396, "learning_rate": 4.966365070942375e-06, "loss": 1.0665, "step": 691 }, { "epoch": 0.3640189374013677, "grad_norm": 2.1616368293762207, "learning_rate": 4.966251407860769e-06, "loss": 1.0306, "step": 692 }, { "epoch": 0.3645449763282483, "grad_norm": 2.2529335021972656, "learning_rate": 4.966137554356285e-06, "loss": 1.0445, "step": 693 }, { "epoch": 0.36507101525512886, "grad_norm": 2.041102170944214, "learning_rate": 4.966023510437713e-06, "loss": 1.0395, "step": 694 }, { "epoch": 0.3655970541820095, "grad_norm": 2.0450620651245117, "learning_rate": 4.9659092761138585e-06, "loss": 1.064, "step": 695 }, { "epoch": 0.3661230931088901, "grad_norm": 2.163081407546997, "learning_rate": 4.965794851393541e-06, "loss": 1.0729, "step": 696 }, { "epoch": 0.36664913203577065, "grad_norm": 2.1602089405059814, "learning_rate": 4.965680236285596e-06, "loss": 1.0707, "step": 697 }, { "epoch": 0.36717517096265123, "grad_norm": 2.3263938426971436, "learning_rate": 4.965565430798875e-06, "loss": 1.0146, "step": 698 }, { "epoch": 0.3677012098895318, "grad_norm": 2.0192365646362305, "learning_rate": 4.965450434942238e-06, "loss": 1.0751, "step": 699 }, { "epoch": 0.3682272488164124, "grad_norm": 2.0557174682617188, "learning_rate": 4.965335248724568e-06, "loss": 1.0749, "step": 700 }, { "epoch": 0.368753287743293, "grad_norm": 2.29679799079895, "learning_rate": 4.965219872154757e-06, "loss": 1.0516, "step": 701 }, { "epoch": 0.3692793266701736, "grad_norm": 2.2303829193115234, "learning_rate": 4.965104305241713e-06, "loss": 1.1586, "step": 702 }, { "epoch": 0.3698053655970542, "grad_norm": 2.112283706665039, "learning_rate": 4.964988547994361e-06, "loss": 1.0833, "step": 703 }, { "epoch": 0.37033140452393476, "grad_norm": 2.1807613372802734, "learning_rate": 4.9648726004216354e-06, "loss": 1.0786, "step": 704 }, { "epoch": 0.37085744345081534, "grad_norm": 2.0990889072418213, "learning_rate": 4.964756462532492e-06, "loss": 1.0555, "step": 705 }, { "epoch": 0.371383482377696, "grad_norm": 2.2034318447113037, "learning_rate": 4.964640134335896e-06, "loss": 1.0696, "step": 706 }, { "epoch": 0.37190952130457655, "grad_norm": 2.207235813140869, "learning_rate": 4.964523615840831e-06, "loss": 1.0897, "step": 707 }, { "epoch": 0.37243556023145713, "grad_norm": 1.8820483684539795, "learning_rate": 4.964406907056291e-06, "loss": 1.0822, "step": 708 }, { "epoch": 0.3729615991583377, "grad_norm": 2.2243785858154297, "learning_rate": 4.964290007991291e-06, "loss": 1.0958, "step": 709 }, { "epoch": 0.3734876380852183, "grad_norm": 2.208770990371704, "learning_rate": 4.964172918654854e-06, "loss": 1.0803, "step": 710 }, { "epoch": 0.3740136770120989, "grad_norm": 2.1083521842956543, "learning_rate": 4.96405563905602e-06, "loss": 1.0513, "step": 711 }, { "epoch": 0.3745397159389795, "grad_norm": 2.0161774158477783, "learning_rate": 4.963938169203847e-06, "loss": 1.0775, "step": 712 }, { "epoch": 0.3750657548658601, "grad_norm": 2.1578962802886963, "learning_rate": 4.963820509107403e-06, "loss": 1.0695, "step": 713 }, { "epoch": 0.37559179379274066, "grad_norm": 2.1972339153289795, "learning_rate": 4.963702658775774e-06, "loss": 1.0703, "step": 714 }, { "epoch": 0.37611783271962124, "grad_norm": 2.338205575942993, "learning_rate": 4.9635846182180594e-06, "loss": 1.0756, "step": 715 }, { "epoch": 0.3766438716465018, "grad_norm": 2.281242847442627, "learning_rate": 4.963466387443372e-06, "loss": 1.1177, "step": 716 }, { "epoch": 0.37716991057338245, "grad_norm": 2.092036724090576, "learning_rate": 4.963347966460841e-06, "loss": 1.1004, "step": 717 }, { "epoch": 0.37769594950026303, "grad_norm": 2.148244857788086, "learning_rate": 4.963229355279611e-06, "loss": 1.1157, "step": 718 }, { "epoch": 0.3782219884271436, "grad_norm": 1.9961777925491333, "learning_rate": 4.963110553908838e-06, "loss": 1.0703, "step": 719 }, { "epoch": 0.3787480273540242, "grad_norm": 2.299091339111328, "learning_rate": 4.962991562357697e-06, "loss": 1.1265, "step": 720 }, { "epoch": 0.37927406628090476, "grad_norm": 2.1055006980895996, "learning_rate": 4.962872380635374e-06, "loss": 1.0361, "step": 721 }, { "epoch": 0.3798001052077854, "grad_norm": 2.1554667949676514, "learning_rate": 4.9627530087510725e-06, "loss": 1.0603, "step": 722 }, { "epoch": 0.380326144134666, "grad_norm": 2.1003949642181396, "learning_rate": 4.962633446714009e-06, "loss": 1.0714, "step": 723 }, { "epoch": 0.38085218306154656, "grad_norm": 2.1850736141204834, "learning_rate": 4.962513694533414e-06, "loss": 1.0795, "step": 724 }, { "epoch": 0.38137822198842714, "grad_norm": 2.0440175533294678, "learning_rate": 4.962393752218535e-06, "loss": 1.0882, "step": 725 }, { "epoch": 0.3819042609153077, "grad_norm": 2.2579755783081055, "learning_rate": 4.962273619778632e-06, "loss": 1.1066, "step": 726 }, { "epoch": 0.38243029984218835, "grad_norm": 2.0210318565368652, "learning_rate": 4.962153297222981e-06, "loss": 1.0843, "step": 727 }, { "epoch": 0.3829563387690689, "grad_norm": 2.1218135356903076, "learning_rate": 4.962032784560873e-06, "loss": 1.1039, "step": 728 }, { "epoch": 0.3834823776959495, "grad_norm": 2.2498831748962402, "learning_rate": 4.961912081801612e-06, "loss": 1.0389, "step": 729 }, { "epoch": 0.3840084166228301, "grad_norm": 2.6789276599884033, "learning_rate": 4.9617911889545175e-06, "loss": 1.0772, "step": 730 }, { "epoch": 0.38453445554971066, "grad_norm": 1.9847339391708374, "learning_rate": 4.961670106028924e-06, "loss": 1.0804, "step": 731 }, { "epoch": 0.38506049447659124, "grad_norm": 2.048737049102783, "learning_rate": 4.9615488330341814e-06, "loss": 1.1089, "step": 732 }, { "epoch": 0.3855865334034719, "grad_norm": 2.2241313457489014, "learning_rate": 4.961427369979652e-06, "loss": 1.0618, "step": 733 }, { "epoch": 0.38611257233035245, "grad_norm": 1.9084025621414185, "learning_rate": 4.961305716874716e-06, "loss": 1.0316, "step": 734 }, { "epoch": 0.38663861125723303, "grad_norm": 2.0064773559570312, "learning_rate": 4.9611838737287646e-06, "loss": 1.0289, "step": 735 }, { "epoch": 0.3871646501841136, "grad_norm": 2.386962652206421, "learning_rate": 4.961061840551205e-06, "loss": 1.1488, "step": 736 }, { "epoch": 0.3876906891109942, "grad_norm": 2.0626862049102783, "learning_rate": 4.960939617351462e-06, "loss": 1.0793, "step": 737 }, { "epoch": 0.3882167280378748, "grad_norm": 2.1622767448425293, "learning_rate": 4.960817204138971e-06, "loss": 1.0923, "step": 738 }, { "epoch": 0.3887427669647554, "grad_norm": 2.049163818359375, "learning_rate": 4.9606946009231834e-06, "loss": 1.0423, "step": 739 }, { "epoch": 0.389268805891636, "grad_norm": 2.0196399688720703, "learning_rate": 4.960571807713568e-06, "loss": 0.9832, "step": 740 }, { "epoch": 0.38979484481851656, "grad_norm": 1.982647180557251, "learning_rate": 4.960448824519602e-06, "loss": 1.0424, "step": 741 }, { "epoch": 0.39032088374539714, "grad_norm": 2.0468926429748535, "learning_rate": 4.960325651350784e-06, "loss": 1.074, "step": 742 }, { "epoch": 0.3908469226722778, "grad_norm": 2.402381181716919, "learning_rate": 4.960202288216624e-06, "loss": 1.058, "step": 743 }, { "epoch": 0.39137296159915835, "grad_norm": 2.065232753753662, "learning_rate": 4.960078735126646e-06, "loss": 1.0985, "step": 744 }, { "epoch": 0.39189900052603893, "grad_norm": 2.1949756145477295, "learning_rate": 4.95995499209039e-06, "loss": 1.0791, "step": 745 }, { "epoch": 0.3924250394529195, "grad_norm": 2.121232271194458, "learning_rate": 4.959831059117411e-06, "loss": 1.0606, "step": 746 }, { "epoch": 0.3929510783798001, "grad_norm": 2.247145652770996, "learning_rate": 4.959706936217278e-06, "loss": 1.0991, "step": 747 }, { "epoch": 0.39347711730668067, "grad_norm": 2.0540339946746826, "learning_rate": 4.9595826233995735e-06, "loss": 1.0835, "step": 748 }, { "epoch": 0.3940031562335613, "grad_norm": 2.173257350921631, "learning_rate": 4.959458120673898e-06, "loss": 1.0588, "step": 749 }, { "epoch": 0.3945291951604419, "grad_norm": 2.1530778408050537, "learning_rate": 4.959333428049862e-06, "loss": 1.0395, "step": 750 }, { "epoch": 0.39505523408732246, "grad_norm": 2.0705490112304688, "learning_rate": 4.959208545537095e-06, "loss": 1.071, "step": 751 }, { "epoch": 0.39558127301420304, "grad_norm": 1.9439338445663452, "learning_rate": 4.95908347314524e-06, "loss": 1.0224, "step": 752 }, { "epoch": 0.3961073119410836, "grad_norm": 2.1683454513549805, "learning_rate": 4.958958210883952e-06, "loss": 1.0745, "step": 753 }, { "epoch": 0.39663335086796425, "grad_norm": 2.2809042930603027, "learning_rate": 4.958832758762903e-06, "loss": 1.0887, "step": 754 }, { "epoch": 0.39715938979484483, "grad_norm": 2.161447048187256, "learning_rate": 4.9587071167917814e-06, "loss": 1.1447, "step": 755 }, { "epoch": 0.3976854287217254, "grad_norm": 2.1375932693481445, "learning_rate": 4.958581284980285e-06, "loss": 1.0295, "step": 756 }, { "epoch": 0.398211467648606, "grad_norm": 2.0431041717529297, "learning_rate": 4.958455263338133e-06, "loss": 1.0567, "step": 757 }, { "epoch": 0.39873750657548657, "grad_norm": 2.0288238525390625, "learning_rate": 4.958329051875053e-06, "loss": 1.0736, "step": 758 }, { "epoch": 0.3992635455023672, "grad_norm": 2.146132230758667, "learning_rate": 4.958202650600791e-06, "loss": 1.0744, "step": 759 }, { "epoch": 0.3997895844292478, "grad_norm": 2.1740963459014893, "learning_rate": 4.958076059525107e-06, "loss": 1.0263, "step": 760 }, { "epoch": 0.40031562335612836, "grad_norm": 2.1219875812530518, "learning_rate": 4.957949278657773e-06, "loss": 1.0508, "step": 761 }, { "epoch": 0.40084166228300894, "grad_norm": 2.0742340087890625, "learning_rate": 4.9578223080085815e-06, "loss": 1.0455, "step": 762 }, { "epoch": 0.4013677012098895, "grad_norm": 2.1779415607452393, "learning_rate": 4.957695147587334e-06, "loss": 1.1079, "step": 763 }, { "epoch": 0.4018937401367701, "grad_norm": 2.151047706604004, "learning_rate": 4.957567797403848e-06, "loss": 1.0893, "step": 764 }, { "epoch": 0.40241977906365073, "grad_norm": 2.1728570461273193, "learning_rate": 4.9574402574679594e-06, "loss": 1.0726, "step": 765 }, { "epoch": 0.4029458179905313, "grad_norm": 1.982230305671692, "learning_rate": 4.957312527789512e-06, "loss": 1.0629, "step": 766 }, { "epoch": 0.4034718569174119, "grad_norm": 1.953464150428772, "learning_rate": 4.95718460837837e-06, "loss": 1.1093, "step": 767 }, { "epoch": 0.40399789584429247, "grad_norm": 1.9718215465545654, "learning_rate": 4.9570564992444116e-06, "loss": 1.1018, "step": 768 }, { "epoch": 0.40452393477117304, "grad_norm": 2.067629337310791, "learning_rate": 4.956928200397526e-06, "loss": 1.0364, "step": 769 }, { "epoch": 0.4050499736980537, "grad_norm": 2.1172022819519043, "learning_rate": 4.956799711847619e-06, "loss": 1.0693, "step": 770 }, { "epoch": 0.40557601262493426, "grad_norm": 2.0539615154266357, "learning_rate": 4.956671033604613e-06, "loss": 1.0034, "step": 771 }, { "epoch": 0.40610205155181484, "grad_norm": 1.9780375957489014, "learning_rate": 4.956542165678443e-06, "loss": 1.0515, "step": 772 }, { "epoch": 0.4066280904786954, "grad_norm": 2.0974819660186768, "learning_rate": 4.95641310807906e-06, "loss": 1.0754, "step": 773 }, { "epoch": 0.407154129405576, "grad_norm": 2.1018221378326416, "learning_rate": 4.956283860816427e-06, "loss": 1.1102, "step": 774 }, { "epoch": 0.4076801683324566, "grad_norm": 2.3969085216522217, "learning_rate": 4.9561544239005235e-06, "loss": 1.0455, "step": 775 }, { "epoch": 0.4082062072593372, "grad_norm": 2.2645649909973145, "learning_rate": 4.956024797341345e-06, "loss": 0.9724, "step": 776 }, { "epoch": 0.4087322461862178, "grad_norm": 2.3406150341033936, "learning_rate": 4.955894981148898e-06, "loss": 1.1341, "step": 777 }, { "epoch": 0.40925828511309836, "grad_norm": 2.0782880783081055, "learning_rate": 4.955764975333208e-06, "loss": 1.0116, "step": 778 }, { "epoch": 0.40978432403997894, "grad_norm": 2.1269314289093018, "learning_rate": 4.955634779904312e-06, "loss": 1.0967, "step": 779 }, { "epoch": 0.4103103629668595, "grad_norm": 2.198559522628784, "learning_rate": 4.9555043948722625e-06, "loss": 1.0815, "step": 780 }, { "epoch": 0.41083640189374016, "grad_norm": 2.2189719676971436, "learning_rate": 4.9553738202471264e-06, "loss": 1.0559, "step": 781 }, { "epoch": 0.41136244082062073, "grad_norm": 2.2313179969787598, "learning_rate": 4.955243056038986e-06, "loss": 1.046, "step": 782 }, { "epoch": 0.4118884797475013, "grad_norm": 1.9563003778457642, "learning_rate": 4.955112102257939e-06, "loss": 1.0735, "step": 783 }, { "epoch": 0.4124145186743819, "grad_norm": 1.99479341506958, "learning_rate": 4.954980958914093e-06, "loss": 1.0657, "step": 784 }, { "epoch": 0.41294055760126247, "grad_norm": 2.029634714126587, "learning_rate": 4.954849626017577e-06, "loss": 1.0811, "step": 785 }, { "epoch": 0.4134665965281431, "grad_norm": 2.2947723865509033, "learning_rate": 4.9547181035785314e-06, "loss": 1.0807, "step": 786 }, { "epoch": 0.4139926354550237, "grad_norm": 2.0323445796966553, "learning_rate": 4.9545863916071094e-06, "loss": 1.0715, "step": 787 }, { "epoch": 0.41451867438190426, "grad_norm": 2.0068464279174805, "learning_rate": 4.954454490113482e-06, "loss": 1.0447, "step": 788 }, { "epoch": 0.41504471330878484, "grad_norm": 2.132549285888672, "learning_rate": 4.954322399107833e-06, "loss": 1.0454, "step": 789 }, { "epoch": 0.4155707522356654, "grad_norm": 2.0086755752563477, "learning_rate": 4.954190118600361e-06, "loss": 1.0724, "step": 790 }, { "epoch": 0.41609679116254605, "grad_norm": 2.1461241245269775, "learning_rate": 4.95405764860128e-06, "loss": 1.0391, "step": 791 }, { "epoch": 0.41662283008942663, "grad_norm": 2.1352107524871826, "learning_rate": 4.953924989120818e-06, "loss": 0.9898, "step": 792 }, { "epoch": 0.4171488690163072, "grad_norm": 2.0694406032562256, "learning_rate": 4.953792140169219e-06, "loss": 1.0819, "step": 793 }, { "epoch": 0.4176749079431878, "grad_norm": 2.088433027267456, "learning_rate": 4.953659101756739e-06, "loss": 1.0833, "step": 794 }, { "epoch": 0.41820094687006837, "grad_norm": 2.1760306358337402, "learning_rate": 4.95352587389365e-06, "loss": 1.0535, "step": 795 }, { "epoch": 0.41872698579694895, "grad_norm": 2.2031099796295166, "learning_rate": 4.95339245659024e-06, "loss": 1.0389, "step": 796 }, { "epoch": 0.4192530247238296, "grad_norm": 2.247276782989502, "learning_rate": 4.953258849856809e-06, "loss": 1.0839, "step": 797 }, { "epoch": 0.41977906365071016, "grad_norm": 2.24357271194458, "learning_rate": 4.953125053703674e-06, "loss": 1.0666, "step": 798 }, { "epoch": 0.42030510257759074, "grad_norm": 2.240151882171631, "learning_rate": 4.952991068141165e-06, "loss": 1.1009, "step": 799 }, { "epoch": 0.4208311415044713, "grad_norm": 2.2172327041625977, "learning_rate": 4.952856893179628e-06, "loss": 1.0928, "step": 800 }, { "epoch": 0.4213571804313519, "grad_norm": 2.377336025238037, "learning_rate": 4.952722528829422e-06, "loss": 1.0968, "step": 801 }, { "epoch": 0.42188321935823253, "grad_norm": 2.466841459274292, "learning_rate": 4.9525879751009205e-06, "loss": 1.0631, "step": 802 }, { "epoch": 0.4224092582851131, "grad_norm": 2.035644054412842, "learning_rate": 4.952453232004516e-06, "loss": 1.0609, "step": 803 }, { "epoch": 0.4229352972119937, "grad_norm": 2.2472054958343506, "learning_rate": 4.952318299550608e-06, "loss": 1.0613, "step": 804 }, { "epoch": 0.42346133613887427, "grad_norm": 2.175999879837036, "learning_rate": 4.952183177749618e-06, "loss": 1.0954, "step": 805 }, { "epoch": 0.42398737506575485, "grad_norm": 2.206052303314209, "learning_rate": 4.952047866611978e-06, "loss": 1.0965, "step": 806 }, { "epoch": 0.4245134139926355, "grad_norm": 1.9550546407699585, "learning_rate": 4.951912366148135e-06, "loss": 1.0835, "step": 807 }, { "epoch": 0.42503945291951606, "grad_norm": 2.194734811782837, "learning_rate": 4.951776676368552e-06, "loss": 1.1179, "step": 808 }, { "epoch": 0.42556549184639664, "grad_norm": 2.094862222671509, "learning_rate": 4.951640797283704e-06, "loss": 1.0634, "step": 809 }, { "epoch": 0.4260915307732772, "grad_norm": 1.980043888092041, "learning_rate": 4.951504728904085e-06, "loss": 1.0874, "step": 810 }, { "epoch": 0.4266175697001578, "grad_norm": 2.2654919624328613, "learning_rate": 4.9513684712402e-06, "loss": 1.057, "step": 811 }, { "epoch": 0.4271436086270384, "grad_norm": 2.197120189666748, "learning_rate": 4.951232024302569e-06, "loss": 1.1114, "step": 812 }, { "epoch": 0.427669647553919, "grad_norm": 2.143324375152588, "learning_rate": 4.9510953881017275e-06, "loss": 1.07, "step": 813 }, { "epoch": 0.4281956864807996, "grad_norm": 2.1920077800750732, "learning_rate": 4.950958562648226e-06, "loss": 1.0373, "step": 814 }, { "epoch": 0.42872172540768017, "grad_norm": 2.0401923656463623, "learning_rate": 4.950821547952629e-06, "loss": 1.1111, "step": 815 }, { "epoch": 0.42924776433456074, "grad_norm": 1.9541674852371216, "learning_rate": 4.950684344025515e-06, "loss": 1.0153, "step": 816 }, { "epoch": 0.4297738032614413, "grad_norm": 3.5096704959869385, "learning_rate": 4.9505469508774776e-06, "loss": 1.0435, "step": 817 }, { "epoch": 0.43029984218832196, "grad_norm": 2.0304462909698486, "learning_rate": 4.9504093685191255e-06, "loss": 1.0786, "step": 818 }, { "epoch": 0.43082588111520254, "grad_norm": 2.115224599838257, "learning_rate": 4.950271596961082e-06, "loss": 1.0854, "step": 819 }, { "epoch": 0.4313519200420831, "grad_norm": 2.176621913909912, "learning_rate": 4.950133636213984e-06, "loss": 0.9909, "step": 820 }, { "epoch": 0.4318779589689637, "grad_norm": 2.2046449184417725, "learning_rate": 4.949995486288484e-06, "loss": 1.0688, "step": 821 }, { "epoch": 0.43240399789584427, "grad_norm": 2.1462888717651367, "learning_rate": 4.949857147195249e-06, "loss": 1.0644, "step": 822 }, { "epoch": 0.4329300368227249, "grad_norm": 2.0735347270965576, "learning_rate": 4.94971861894496e-06, "loss": 1.022, "step": 823 }, { "epoch": 0.4334560757496055, "grad_norm": 2.086724042892456, "learning_rate": 4.949579901548312e-06, "loss": 1.02, "step": 824 }, { "epoch": 0.43398211467648606, "grad_norm": 2.078622341156006, "learning_rate": 4.949440995016018e-06, "loss": 1.0653, "step": 825 }, { "epoch": 0.43450815360336664, "grad_norm": 2.1504440307617188, "learning_rate": 4.949301899358801e-06, "loss": 1.0708, "step": 826 }, { "epoch": 0.4350341925302472, "grad_norm": 2.2340216636657715, "learning_rate": 4.949162614587401e-06, "loss": 1.0688, "step": 827 }, { "epoch": 0.4355602314571278, "grad_norm": 2.2017569541931152, "learning_rate": 4.949023140712574e-06, "loss": 1.0935, "step": 828 }, { "epoch": 0.43608627038400843, "grad_norm": 2.117745876312256, "learning_rate": 4.948883477745088e-06, "loss": 1.0868, "step": 829 }, { "epoch": 0.436612309310889, "grad_norm": 2.0983524322509766, "learning_rate": 4.948743625695726e-06, "loss": 1.0695, "step": 830 }, { "epoch": 0.4371383482377696, "grad_norm": 2.205693244934082, "learning_rate": 4.948603584575287e-06, "loss": 1.0541, "step": 831 }, { "epoch": 0.43766438716465017, "grad_norm": 1.9967527389526367, "learning_rate": 4.948463354394583e-06, "loss": 0.9933, "step": 832 }, { "epoch": 0.43819042609153075, "grad_norm": 2.113577127456665, "learning_rate": 4.948322935164442e-06, "loss": 1.0199, "step": 833 }, { "epoch": 0.4387164650184114, "grad_norm": 2.0825533866882324, "learning_rate": 4.948182326895705e-06, "loss": 1.0446, "step": 834 }, { "epoch": 0.43924250394529196, "grad_norm": 2.0186421871185303, "learning_rate": 4.94804152959923e-06, "loss": 1.0798, "step": 835 }, { "epoch": 0.43976854287217254, "grad_norm": 2.3025147914886475, "learning_rate": 4.947900543285888e-06, "loss": 0.9977, "step": 836 }, { "epoch": 0.4402945817990531, "grad_norm": 2.1662867069244385, "learning_rate": 4.947759367966564e-06, "loss": 1.048, "step": 837 }, { "epoch": 0.4408206207259337, "grad_norm": 2.0708656311035156, "learning_rate": 4.947618003652158e-06, "loss": 1.0715, "step": 838 }, { "epoch": 0.44134665965281433, "grad_norm": 2.2494263648986816, "learning_rate": 4.947476450353586e-06, "loss": 1.0901, "step": 839 }, { "epoch": 0.4418726985796949, "grad_norm": 2.3319430351257324, "learning_rate": 4.947334708081777e-06, "loss": 1.0308, "step": 840 }, { "epoch": 0.4423987375065755, "grad_norm": 2.134620428085327, "learning_rate": 4.947192776847676e-06, "loss": 1.0459, "step": 841 }, { "epoch": 0.44292477643345607, "grad_norm": 2.075429916381836, "learning_rate": 4.94705065666224e-06, "loss": 1.0733, "step": 842 }, { "epoch": 0.44345081536033665, "grad_norm": 2.173069953918457, "learning_rate": 4.946908347536444e-06, "loss": 1.1092, "step": 843 }, { "epoch": 0.4439768542872172, "grad_norm": 2.1481893062591553, "learning_rate": 4.946765849481274e-06, "loss": 1.0822, "step": 844 }, { "epoch": 0.44450289321409786, "grad_norm": 2.247277021408081, "learning_rate": 4.9466231625077354e-06, "loss": 1.0777, "step": 845 }, { "epoch": 0.44502893214097844, "grad_norm": 2.1181042194366455, "learning_rate": 4.946480286626842e-06, "loss": 1.1139, "step": 846 }, { "epoch": 0.445554971067859, "grad_norm": 2.05195951461792, "learning_rate": 4.946337221849628e-06, "loss": 1.0738, "step": 847 }, { "epoch": 0.4460810099947396, "grad_norm": 2.122732639312744, "learning_rate": 4.946193968187139e-06, "loss": 1.061, "step": 848 }, { "epoch": 0.4466070489216202, "grad_norm": 1.8827515840530396, "learning_rate": 4.946050525650434e-06, "loss": 1.061, "step": 849 }, { "epoch": 0.4471330878485008, "grad_norm": 2.3874471187591553, "learning_rate": 4.945906894250591e-06, "loss": 1.0667, "step": 850 }, { "epoch": 0.4476591267753814, "grad_norm": 2.274724006652832, "learning_rate": 4.945763073998699e-06, "loss": 1.0559, "step": 851 }, { "epoch": 0.44818516570226197, "grad_norm": 2.2730906009674072, "learning_rate": 4.945619064905861e-06, "loss": 1.0952, "step": 852 }, { "epoch": 0.44871120462914255, "grad_norm": 2.190969944000244, "learning_rate": 4.945474866983199e-06, "loss": 1.0816, "step": 853 }, { "epoch": 0.4492372435560231, "grad_norm": 3.6214282512664795, "learning_rate": 4.945330480241844e-06, "loss": 1.09, "step": 854 }, { "epoch": 0.44976328248290376, "grad_norm": 2.0487356185913086, "learning_rate": 4.945185904692946e-06, "loss": 1.0279, "step": 855 }, { "epoch": 0.45028932140978434, "grad_norm": 2.074282646179199, "learning_rate": 4.945041140347669e-06, "loss": 1.0514, "step": 856 }, { "epoch": 0.4508153603366649, "grad_norm": 2.126495838165283, "learning_rate": 4.944896187217187e-06, "loss": 1.0819, "step": 857 }, { "epoch": 0.4513413992635455, "grad_norm": 2.0265605449676514, "learning_rate": 4.944751045312695e-06, "loss": 1.0282, "step": 858 }, { "epoch": 0.4518674381904261, "grad_norm": 2.0557355880737305, "learning_rate": 4.944605714645399e-06, "loss": 1.1052, "step": 859 }, { "epoch": 0.4523934771173067, "grad_norm": 2.026393175125122, "learning_rate": 4.944460195226519e-06, "loss": 0.982, "step": 860 }, { "epoch": 0.4529195160441873, "grad_norm": 2.1781463623046875, "learning_rate": 4.9443144870672925e-06, "loss": 1.1251, "step": 861 }, { "epoch": 0.45344555497106787, "grad_norm": 2.053683042526245, "learning_rate": 4.944168590178968e-06, "loss": 1.0766, "step": 862 }, { "epoch": 0.45397159389794844, "grad_norm": 2.1147496700286865, "learning_rate": 4.944022504572811e-06, "loss": 1.0174, "step": 863 }, { "epoch": 0.454497632824829, "grad_norm": 2.06046199798584, "learning_rate": 4.943876230260102e-06, "loss": 1.0836, "step": 864 }, { "epoch": 0.4550236717517096, "grad_norm": 2.171419382095337, "learning_rate": 4.9437297672521345e-06, "loss": 1.0695, "step": 865 }, { "epoch": 0.45554971067859024, "grad_norm": 2.064301013946533, "learning_rate": 4.943583115560217e-06, "loss": 1.0147, "step": 866 }, { "epoch": 0.4560757496054708, "grad_norm": 2.6638195514678955, "learning_rate": 4.943436275195673e-06, "loss": 1.0565, "step": 867 }, { "epoch": 0.4566017885323514, "grad_norm": 3.9418976306915283, "learning_rate": 4.943289246169839e-06, "loss": 1.0768, "step": 868 }, { "epoch": 0.457127827459232, "grad_norm": 2.114297389984131, "learning_rate": 4.943142028494069e-06, "loss": 1.0687, "step": 869 }, { "epoch": 0.45765386638611255, "grad_norm": 2.139803171157837, "learning_rate": 4.942994622179729e-06, "loss": 1.0464, "step": 870 }, { "epoch": 0.4581799053129932, "grad_norm": 2.011474370956421, "learning_rate": 4.942847027238201e-06, "loss": 1.0181, "step": 871 }, { "epoch": 0.45870594423987376, "grad_norm": 2.1592113971710205, "learning_rate": 4.94269924368088e-06, "loss": 1.0699, "step": 872 }, { "epoch": 0.45923198316675434, "grad_norm": 2.0230283737182617, "learning_rate": 4.942551271519178e-06, "loss": 1.075, "step": 873 }, { "epoch": 0.4597580220936349, "grad_norm": 2.286768913269043, "learning_rate": 4.942403110764518e-06, "loss": 1.0604, "step": 874 }, { "epoch": 0.4602840610205155, "grad_norm": 2.305375337600708, "learning_rate": 4.942254761428343e-06, "loss": 1.0067, "step": 875 }, { "epoch": 0.46081009994739613, "grad_norm": 2.416245698928833, "learning_rate": 4.942106223522104e-06, "loss": 1.1109, "step": 876 }, { "epoch": 0.4613361388742767, "grad_norm": 2.1339962482452393, "learning_rate": 4.941957497057272e-06, "loss": 1.0708, "step": 877 }, { "epoch": 0.4618621778011573, "grad_norm": 1.9983795881271362, "learning_rate": 4.941808582045329e-06, "loss": 1.0032, "step": 878 }, { "epoch": 0.46238821672803787, "grad_norm": 2.1115024089813232, "learning_rate": 4.9416594784977735e-06, "loss": 1.0272, "step": 879 }, { "epoch": 0.46291425565491845, "grad_norm": 2.2785818576812744, "learning_rate": 4.941510186426118e-06, "loss": 1.0538, "step": 880 }, { "epoch": 0.46344029458179903, "grad_norm": 2.009938955307007, "learning_rate": 4.94136070584189e-06, "loss": 1.0432, "step": 881 }, { "epoch": 0.46396633350867966, "grad_norm": 2.119264841079712, "learning_rate": 4.94121103675663e-06, "loss": 1.063, "step": 882 }, { "epoch": 0.46449237243556024, "grad_norm": 2.267575979232788, "learning_rate": 4.941061179181896e-06, "loss": 1.0698, "step": 883 }, { "epoch": 0.4650184113624408, "grad_norm": 2.2345592975616455, "learning_rate": 4.940911133129257e-06, "loss": 1.0898, "step": 884 }, { "epoch": 0.4655444502893214, "grad_norm": 2.175180673599243, "learning_rate": 4.940760898610299e-06, "loss": 1.0915, "step": 885 }, { "epoch": 0.466070489216202, "grad_norm": 2.036628246307373, "learning_rate": 4.940610475636621e-06, "loss": 1.0981, "step": 886 }, { "epoch": 0.4665965281430826, "grad_norm": 2.193129539489746, "learning_rate": 4.9404598642198386e-06, "loss": 1.1237, "step": 887 }, { "epoch": 0.4671225670699632, "grad_norm": 1.920074462890625, "learning_rate": 4.9403090643715804e-06, "loss": 1.0358, "step": 888 }, { "epoch": 0.46764860599684377, "grad_norm": 2.0745346546173096, "learning_rate": 4.940158076103489e-06, "loss": 1.0487, "step": 889 }, { "epoch": 0.46817464492372435, "grad_norm": 1.9645469188690186, "learning_rate": 4.940006899427225e-06, "loss": 1.0256, "step": 890 }, { "epoch": 0.4687006838506049, "grad_norm": 1.9696778059005737, "learning_rate": 4.939855534354458e-06, "loss": 1.0302, "step": 891 }, { "epoch": 0.46922672277748556, "grad_norm": 2.1893057823181152, "learning_rate": 4.939703980896875e-06, "loss": 1.0391, "step": 892 }, { "epoch": 0.46975276170436614, "grad_norm": 2.0537021160125732, "learning_rate": 4.93955223906618e-06, "loss": 1.0498, "step": 893 }, { "epoch": 0.4702788006312467, "grad_norm": 2.4528138637542725, "learning_rate": 4.9394003088740875e-06, "loss": 1.0393, "step": 894 }, { "epoch": 0.4708048395581273, "grad_norm": 2.2085723876953125, "learning_rate": 4.93924819033233e-06, "loss": 1.0789, "step": 895 }, { "epoch": 0.4713308784850079, "grad_norm": 2.0029642581939697, "learning_rate": 4.9390958834526504e-06, "loss": 1.0621, "step": 896 }, { "epoch": 0.47185691741188845, "grad_norm": 2.0400004386901855, "learning_rate": 4.93894338824681e-06, "loss": 1.0426, "step": 897 }, { "epoch": 0.4723829563387691, "grad_norm": 2.3174595832824707, "learning_rate": 4.9387907047265825e-06, "loss": 1.0273, "step": 898 }, { "epoch": 0.47290899526564967, "grad_norm": 1.998889446258545, "learning_rate": 4.938637832903758e-06, "loss": 1.0401, "step": 899 }, { "epoch": 0.47343503419253025, "grad_norm": 2.0847246646881104, "learning_rate": 4.93848477279014e-06, "loss": 1.0677, "step": 900 }, { "epoch": 0.4739610731194108, "grad_norm": 2.086249351501465, "learning_rate": 4.938331524397544e-06, "loss": 1.043, "step": 901 }, { "epoch": 0.4744871120462914, "grad_norm": 2.1909382343292236, "learning_rate": 4.938178087737805e-06, "loss": 0.9977, "step": 902 }, { "epoch": 0.47501315097317204, "grad_norm": 2.066394567489624, "learning_rate": 4.938024462822769e-06, "loss": 1.044, "step": 903 }, { "epoch": 0.4755391899000526, "grad_norm": 2.1768858432769775, "learning_rate": 4.937870649664299e-06, "loss": 0.9886, "step": 904 }, { "epoch": 0.4760652288269332, "grad_norm": 2.0450236797332764, "learning_rate": 4.937716648274269e-06, "loss": 1.0471, "step": 905 }, { "epoch": 0.4765912677538138, "grad_norm": 2.218719720840454, "learning_rate": 4.937562458664571e-06, "loss": 1.0324, "step": 906 }, { "epoch": 0.47711730668069435, "grad_norm": 2.2519423961639404, "learning_rate": 4.937408080847109e-06, "loss": 1.0899, "step": 907 }, { "epoch": 0.477643345607575, "grad_norm": 2.045959234237671, "learning_rate": 4.9372535148338055e-06, "loss": 1.0383, "step": 908 }, { "epoch": 0.47816938453445557, "grad_norm": 2.1137306690216064, "learning_rate": 4.937098760636591e-06, "loss": 1.0223, "step": 909 }, { "epoch": 0.47869542346133614, "grad_norm": 2.2585835456848145, "learning_rate": 4.936943818267418e-06, "loss": 1.027, "step": 910 }, { "epoch": 0.4792214623882167, "grad_norm": 2.161625862121582, "learning_rate": 4.936788687738247e-06, "loss": 1.0318, "step": 911 }, { "epoch": 0.4797475013150973, "grad_norm": 2.0743277072906494, "learning_rate": 4.936633369061057e-06, "loss": 1.1014, "step": 912 }, { "epoch": 0.4802735402419779, "grad_norm": 2.1271307468414307, "learning_rate": 4.936477862247841e-06, "loss": 1.0403, "step": 913 }, { "epoch": 0.4807995791688585, "grad_norm": 2.0820491313934326, "learning_rate": 4.9363221673106046e-06, "loss": 1.069, "step": 914 }, { "epoch": 0.4813256180957391, "grad_norm": 2.0069093704223633, "learning_rate": 4.936166284261369e-06, "loss": 1.0752, "step": 915 }, { "epoch": 0.4818516570226197, "grad_norm": 2.2541720867156982, "learning_rate": 4.936010213112172e-06, "loss": 1.0309, "step": 916 }, { "epoch": 0.48237769594950025, "grad_norm": 2.155980110168457, "learning_rate": 4.9358539538750636e-06, "loss": 1.0078, "step": 917 }, { "epoch": 0.48290373487638083, "grad_norm": 2.217339038848877, "learning_rate": 4.935697506562107e-06, "loss": 1.0522, "step": 918 }, { "epoch": 0.48342977380326146, "grad_norm": 1.963270902633667, "learning_rate": 4.935540871185384e-06, "loss": 1.0692, "step": 919 }, { "epoch": 0.48395581273014204, "grad_norm": 1.9923917055130005, "learning_rate": 4.935384047756987e-06, "loss": 1.0926, "step": 920 }, { "epoch": 0.4844818516570226, "grad_norm": 2.177624464035034, "learning_rate": 4.935227036289026e-06, "loss": 1.0727, "step": 921 }, { "epoch": 0.4850078905839032, "grad_norm": 2.022496461868286, "learning_rate": 4.935069836793622e-06, "loss": 1.0267, "step": 922 }, { "epoch": 0.4855339295107838, "grad_norm": 2.0110666751861572, "learning_rate": 4.9349124492829155e-06, "loss": 1.0911, "step": 923 }, { "epoch": 0.4860599684376644, "grad_norm": 2.1780877113342285, "learning_rate": 4.934754873769057e-06, "loss": 1.0494, "step": 924 }, { "epoch": 0.486586007364545, "grad_norm": 2.0291390419006348, "learning_rate": 4.934597110264212e-06, "loss": 1.0485, "step": 925 }, { "epoch": 0.48711204629142557, "grad_norm": 1.947896957397461, "learning_rate": 4.9344391587805626e-06, "loss": 1.0789, "step": 926 }, { "epoch": 0.48763808521830615, "grad_norm": 1.9520971775054932, "learning_rate": 4.934281019330305e-06, "loss": 1.0644, "step": 927 }, { "epoch": 0.48816412414518673, "grad_norm": 2.0348432064056396, "learning_rate": 4.93412269192565e-06, "loss": 1.0471, "step": 928 }, { "epoch": 0.4886901630720673, "grad_norm": 2.214876651763916, "learning_rate": 4.93396417657882e-06, "loss": 1.0921, "step": 929 }, { "epoch": 0.48921620199894794, "grad_norm": 1.9910991191864014, "learning_rate": 4.933805473302057e-06, "loss": 1.0962, "step": 930 }, { "epoch": 0.4897422409258285, "grad_norm": 2.0497536659240723, "learning_rate": 4.933646582107612e-06, "loss": 1.0502, "step": 931 }, { "epoch": 0.4902682798527091, "grad_norm": 2.102994203567505, "learning_rate": 4.933487503007756e-06, "loss": 1.0676, "step": 932 }, { "epoch": 0.4907943187795897, "grad_norm": 1.885666012763977, "learning_rate": 4.933328236014768e-06, "loss": 1.0005, "step": 933 }, { "epoch": 0.49132035770647026, "grad_norm": 2.1525766849517822, "learning_rate": 4.933168781140949e-06, "loss": 1.0997, "step": 934 }, { "epoch": 0.4918463966333509, "grad_norm": 2.0346620082855225, "learning_rate": 4.9330091383986086e-06, "loss": 1.0651, "step": 935 }, { "epoch": 0.49237243556023147, "grad_norm": 2.0436878204345703, "learning_rate": 4.932849307800074e-06, "loss": 1.0539, "step": 936 }, { "epoch": 0.49289847448711205, "grad_norm": 2.1023032665252686, "learning_rate": 4.932689289357686e-06, "loss": 1.0583, "step": 937 }, { "epoch": 0.4934245134139926, "grad_norm": 2.0781443119049072, "learning_rate": 4.932529083083798e-06, "loss": 1.0753, "step": 938 }, { "epoch": 0.4939505523408732, "grad_norm": 2.0385992527008057, "learning_rate": 4.932368688990783e-06, "loss": 1.0165, "step": 939 }, { "epoch": 0.49447659126775384, "grad_norm": 2.350186586380005, "learning_rate": 4.932208107091022e-06, "loss": 1.0834, "step": 940 }, { "epoch": 0.4950026301946344, "grad_norm": 2.2009286880493164, "learning_rate": 4.932047337396917e-06, "loss": 1.0975, "step": 941 }, { "epoch": 0.495528669121515, "grad_norm": 2.389380931854248, "learning_rate": 4.931886379920878e-06, "loss": 1.0853, "step": 942 }, { "epoch": 0.4960547080483956, "grad_norm": 2.016162157058716, "learning_rate": 4.931725234675334e-06, "loss": 1.039, "step": 943 }, { "epoch": 0.49658074697527615, "grad_norm": 2.116718292236328, "learning_rate": 4.9315639016727286e-06, "loss": 1.0182, "step": 944 }, { "epoch": 0.49710678590215673, "grad_norm": 2.1381125450134277, "learning_rate": 4.931402380925517e-06, "loss": 1.1051, "step": 945 }, { "epoch": 0.49763282482903737, "grad_norm": 2.0954737663269043, "learning_rate": 4.931240672446171e-06, "loss": 1.038, "step": 946 }, { "epoch": 0.49815886375591795, "grad_norm": 2.167865037918091, "learning_rate": 4.931078776247176e-06, "loss": 1.0998, "step": 947 }, { "epoch": 0.4986849026827985, "grad_norm": 2.1278021335601807, "learning_rate": 4.930916692341034e-06, "loss": 1.0374, "step": 948 }, { "epoch": 0.4992109416096791, "grad_norm": 2.088512420654297, "learning_rate": 4.9307544207402565e-06, "loss": 1.0954, "step": 949 }, { "epoch": 0.4997369805365597, "grad_norm": 2.015916109085083, "learning_rate": 4.930591961457375e-06, "loss": 1.0163, "step": 950 }, { "epoch": 0.5002630194634403, "grad_norm": 2.0662143230438232, "learning_rate": 4.930429314504933e-06, "loss": 1.0968, "step": 951 }, { "epoch": 0.5007890583903208, "grad_norm": 2.0692410469055176, "learning_rate": 4.930266479895488e-06, "loss": 1.0772, "step": 952 }, { "epoch": 0.5013150973172015, "grad_norm": 2.0734803676605225, "learning_rate": 4.930103457641613e-06, "loss": 1.1096, "step": 953 }, { "epoch": 0.5018411362440821, "grad_norm": 2.167228937149048, "learning_rate": 4.929940247755896e-06, "loss": 1.0608, "step": 954 }, { "epoch": 0.5023671751709626, "grad_norm": 2.272087574005127, "learning_rate": 4.929776850250937e-06, "loss": 1.0825, "step": 955 }, { "epoch": 0.5028932140978433, "grad_norm": 2.0937726497650146, "learning_rate": 4.929613265139354e-06, "loss": 1.0651, "step": 956 }, { "epoch": 0.5034192530247238, "grad_norm": 2.168090343475342, "learning_rate": 4.929449492433777e-06, "loss": 1.0821, "step": 957 }, { "epoch": 0.5039452919516044, "grad_norm": 2.0708675384521484, "learning_rate": 4.92928553214685e-06, "loss": 1.0655, "step": 958 }, { "epoch": 0.5044713308784851, "grad_norm": 2.067678689956665, "learning_rate": 4.929121384291234e-06, "loss": 1.05, "step": 959 }, { "epoch": 0.5049973698053656, "grad_norm": 1.9181219339370728, "learning_rate": 4.928957048879602e-06, "loss": 0.9935, "step": 960 }, { "epoch": 0.5055234087322462, "grad_norm": 2.217785358428955, "learning_rate": 4.928792525924644e-06, "loss": 0.97, "step": 961 }, { "epoch": 0.5060494476591267, "grad_norm": 2.084656238555908, "learning_rate": 4.928627815439062e-06, "loss": 1.0541, "step": 962 }, { "epoch": 0.5065754865860074, "grad_norm": 2.035367727279663, "learning_rate": 4.928462917435574e-06, "loss": 1.0694, "step": 963 }, { "epoch": 0.507101525512888, "grad_norm": 2.001654624938965, "learning_rate": 4.928297831926912e-06, "loss": 1.0232, "step": 964 }, { "epoch": 0.5076275644397685, "grad_norm": 2.57733154296875, "learning_rate": 4.928132558925822e-06, "loss": 1.0664, "step": 965 }, { "epoch": 0.5081536033666492, "grad_norm": 2.1757423877716064, "learning_rate": 4.927967098445066e-06, "loss": 1.1119, "step": 966 }, { "epoch": 0.5086796422935297, "grad_norm": 2.089594602584839, "learning_rate": 4.927801450497417e-06, "loss": 1.0212, "step": 967 }, { "epoch": 0.5092056812204103, "grad_norm": 2.078519821166992, "learning_rate": 4.927635615095668e-06, "loss": 1.0381, "step": 968 }, { "epoch": 0.5097317201472908, "grad_norm": 2.0807132720947266, "learning_rate": 4.927469592252621e-06, "loss": 1.0272, "step": 969 }, { "epoch": 0.5102577590741715, "grad_norm": 2.1806020736694336, "learning_rate": 4.927303381981098e-06, "loss": 1.0846, "step": 970 }, { "epoch": 0.5107837980010521, "grad_norm": 2.1434948444366455, "learning_rate": 4.927136984293928e-06, "loss": 1.0775, "step": 971 }, { "epoch": 0.5113098369279326, "grad_norm": 2.000924825668335, "learning_rate": 4.926970399203962e-06, "loss": 1.0272, "step": 972 }, { "epoch": 0.5118358758548133, "grad_norm": 2.1742711067199707, "learning_rate": 4.926803626724062e-06, "loss": 1.0253, "step": 973 }, { "epoch": 0.5123619147816938, "grad_norm": 2.1074674129486084, "learning_rate": 4.926636666867103e-06, "loss": 1.0146, "step": 974 }, { "epoch": 0.5128879537085744, "grad_norm": 2.1562392711639404, "learning_rate": 4.926469519645976e-06, "loss": 1.0364, "step": 975 }, { "epoch": 0.5134139926354551, "grad_norm": 2.4177775382995605, "learning_rate": 4.926302185073591e-06, "loss": 1.0658, "step": 976 }, { "epoch": 0.5139400315623356, "grad_norm": 2.322571277618408, "learning_rate": 4.9261346631628635e-06, "loss": 1.0489, "step": 977 }, { "epoch": 0.5144660704892162, "grad_norm": 2.0937836170196533, "learning_rate": 4.925966953926729e-06, "loss": 1.0535, "step": 978 }, { "epoch": 0.5149921094160967, "grad_norm": 2.065213680267334, "learning_rate": 4.925799057378139e-06, "loss": 1.0097, "step": 979 }, { "epoch": 0.5155181483429774, "grad_norm": 2.0844249725341797, "learning_rate": 4.925630973530054e-06, "loss": 1.0719, "step": 980 }, { "epoch": 0.516044187269858, "grad_norm": 2.4148666858673096, "learning_rate": 4.925462702395454e-06, "loss": 1.0906, "step": 981 }, { "epoch": 0.5165702261967385, "grad_norm": 2.071423053741455, "learning_rate": 4.925294243987331e-06, "loss": 1.03, "step": 982 }, { "epoch": 0.5170962651236192, "grad_norm": 2.2397513389587402, "learning_rate": 4.9251255983186915e-06, "loss": 1.0412, "step": 983 }, { "epoch": 0.5176223040504997, "grad_norm": 2.171269655227661, "learning_rate": 4.924956765402557e-06, "loss": 1.1, "step": 984 }, { "epoch": 0.5181483429773803, "grad_norm": 2.1705877780914307, "learning_rate": 4.924787745251963e-06, "loss": 1.0534, "step": 985 }, { "epoch": 0.518674381904261, "grad_norm": 2.178514003753662, "learning_rate": 4.924618537879961e-06, "loss": 1.0759, "step": 986 }, { "epoch": 0.5192004208311415, "grad_norm": 2.072097063064575, "learning_rate": 4.924449143299614e-06, "loss": 1.0321, "step": 987 }, { "epoch": 0.5197264597580221, "grad_norm": 2.124030351638794, "learning_rate": 4.924279561524004e-06, "loss": 1.0465, "step": 988 }, { "epoch": 0.5202524986849026, "grad_norm": 2.0927019119262695, "learning_rate": 4.924109792566222e-06, "loss": 1.0716, "step": 989 }, { "epoch": 0.5207785376117833, "grad_norm": 2.0673232078552246, "learning_rate": 4.923939836439377e-06, "loss": 1.0628, "step": 990 }, { "epoch": 0.5213045765386639, "grad_norm": 2.2538528442382812, "learning_rate": 4.92376969315659e-06, "loss": 1.0687, "step": 991 }, { "epoch": 0.5218306154655444, "grad_norm": 2.120530366897583, "learning_rate": 4.923599362731001e-06, "loss": 1.0893, "step": 992 }, { "epoch": 0.5223566543924251, "grad_norm": 2.0750699043273926, "learning_rate": 4.92342884517576e-06, "loss": 1.0016, "step": 993 }, { "epoch": 0.5228826933193056, "grad_norm": 1.9984569549560547, "learning_rate": 4.923258140504032e-06, "loss": 1.0326, "step": 994 }, { "epoch": 0.5234087322461862, "grad_norm": 2.201758861541748, "learning_rate": 4.923087248729e-06, "loss": 1.0413, "step": 995 }, { "epoch": 0.5239347711730669, "grad_norm": 2.1322648525238037, "learning_rate": 4.922916169863855e-06, "loss": 1.0505, "step": 996 }, { "epoch": 0.5244608100999474, "grad_norm": 2.0557119846343994, "learning_rate": 4.922744903921809e-06, "loss": 0.9761, "step": 997 }, { "epoch": 0.524986849026828, "grad_norm": 2.0989720821380615, "learning_rate": 4.922573450916086e-06, "loss": 1.0436, "step": 998 }, { "epoch": 0.5255128879537085, "grad_norm": 2.152665138244629, "learning_rate": 4.922401810859922e-06, "loss": 1.0567, "step": 999 }, { "epoch": 0.5260389268805892, "grad_norm": 1.959796667098999, "learning_rate": 4.922229983766571e-06, "loss": 1.0694, "step": 1000 }, { "epoch": 0.5265649658074697, "grad_norm": 2.121493101119995, "learning_rate": 4.9220579696493e-06, "loss": 1.1024, "step": 1001 }, { "epoch": 0.5270910047343503, "grad_norm": 1.9629384279251099, "learning_rate": 4.92188576852139e-06, "loss": 1.0538, "step": 1002 }, { "epoch": 0.527617043661231, "grad_norm": 2.396224021911621, "learning_rate": 4.921713380396137e-06, "loss": 1.0711, "step": 1003 }, { "epoch": 0.5281430825881115, "grad_norm": 2.1571781635284424, "learning_rate": 4.921540805286852e-06, "loss": 1.0663, "step": 1004 }, { "epoch": 0.5286691215149921, "grad_norm": 2.032282590866089, "learning_rate": 4.921368043206858e-06, "loss": 1.0658, "step": 1005 }, { "epoch": 0.5291951604418726, "grad_norm": 1.9589232206344604, "learning_rate": 4.921195094169496e-06, "loss": 0.9755, "step": 1006 }, { "epoch": 0.5297211993687533, "grad_norm": 1.9304051399230957, "learning_rate": 4.92102195818812e-06, "loss": 1.011, "step": 1007 }, { "epoch": 0.5302472382956339, "grad_norm": 2.306674003601074, "learning_rate": 4.920848635276096e-06, "loss": 1.0626, "step": 1008 }, { "epoch": 0.5307732772225144, "grad_norm": 2.156906843185425, "learning_rate": 4.920675125446809e-06, "loss": 1.0107, "step": 1009 }, { "epoch": 0.5312993161493951, "grad_norm": 2.2959272861480713, "learning_rate": 4.9205014287136535e-06, "loss": 1.0527, "step": 1010 }, { "epoch": 0.5318253550762756, "grad_norm": 2.046900510787964, "learning_rate": 4.9203275450900426e-06, "loss": 1.0154, "step": 1011 }, { "epoch": 0.5323513940031562, "grad_norm": 1.9947476387023926, "learning_rate": 4.920153474589401e-06, "loss": 1.0456, "step": 1012 }, { "epoch": 0.5328774329300369, "grad_norm": 2.3516438007354736, "learning_rate": 4.919979217225169e-06, "loss": 1.0982, "step": 1013 }, { "epoch": 0.5334034718569174, "grad_norm": 2.5909998416900635, "learning_rate": 4.919804773010802e-06, "loss": 1.0436, "step": 1014 }, { "epoch": 0.533929510783798, "grad_norm": 2.5206117630004883, "learning_rate": 4.91963014195977e-06, "loss": 1.0405, "step": 1015 }, { "epoch": 0.5344555497106785, "grad_norm": 2.21992826461792, "learning_rate": 4.919455324085554e-06, "loss": 1.0594, "step": 1016 }, { "epoch": 0.5349815886375592, "grad_norm": 2.2773711681365967, "learning_rate": 4.919280319401654e-06, "loss": 1.0501, "step": 1017 }, { "epoch": 0.5355076275644398, "grad_norm": 2.075962543487549, "learning_rate": 4.919105127921582e-06, "loss": 1.0052, "step": 1018 }, { "epoch": 0.5360336664913203, "grad_norm": 2.108670473098755, "learning_rate": 4.9189297496588636e-06, "loss": 1.0675, "step": 1019 }, { "epoch": 0.536559705418201, "grad_norm": 2.125927209854126, "learning_rate": 4.918754184627041e-06, "loss": 1.0912, "step": 1020 }, { "epoch": 0.5370857443450815, "grad_norm": 2.1099467277526855, "learning_rate": 4.91857843283967e-06, "loss": 1.0424, "step": 1021 }, { "epoch": 0.5376117832719621, "grad_norm": 2.0880467891693115, "learning_rate": 4.918402494310319e-06, "loss": 1.061, "step": 1022 }, { "epoch": 0.5381378221988428, "grad_norm": 2.1544101238250732, "learning_rate": 4.918226369052575e-06, "loss": 1.0608, "step": 1023 }, { "epoch": 0.5386638611257233, "grad_norm": 2.213214635848999, "learning_rate": 4.918050057080036e-06, "loss": 1.1368, "step": 1024 }, { "epoch": 0.5391899000526039, "grad_norm": 2.062903642654419, "learning_rate": 4.917873558406315e-06, "loss": 1.0861, "step": 1025 }, { "epoch": 0.5397159389794844, "grad_norm": 1.9643436670303345, "learning_rate": 4.917696873045039e-06, "loss": 1.0008, "step": 1026 }, { "epoch": 0.5402419779063651, "grad_norm": 2.276639699935913, "learning_rate": 4.917520001009851e-06, "loss": 0.9812, "step": 1027 }, { "epoch": 0.5407680168332457, "grad_norm": 2.1487631797790527, "learning_rate": 4.917342942314407e-06, "loss": 1.0603, "step": 1028 }, { "epoch": 0.5412940557601262, "grad_norm": 2.1040542125701904, "learning_rate": 4.917165696972379e-06, "loss": 1.0425, "step": 1029 }, { "epoch": 0.5418200946870069, "grad_norm": 2.214475154876709, "learning_rate": 4.916988264997452e-06, "loss": 1.032, "step": 1030 }, { "epoch": 0.5423461336138874, "grad_norm": 2.154320001602173, "learning_rate": 4.916810646403325e-06, "loss": 1.0371, "step": 1031 }, { "epoch": 0.542872172540768, "grad_norm": 2.1565327644348145, "learning_rate": 4.916632841203714e-06, "loss": 1.0866, "step": 1032 }, { "epoch": 0.5433982114676486, "grad_norm": 2.197402238845825, "learning_rate": 4.916454849412344e-06, "loss": 1.0531, "step": 1033 }, { "epoch": 0.5439242503945292, "grad_norm": 2.0249993801116943, "learning_rate": 4.916276671042962e-06, "loss": 1.0485, "step": 1034 }, { "epoch": 0.5444502893214098, "grad_norm": 2.077765703201294, "learning_rate": 4.916098306109323e-06, "loss": 1.0731, "step": 1035 }, { "epoch": 0.5449763282482903, "grad_norm": 2.0669186115264893, "learning_rate": 4.915919754625199e-06, "loss": 1.0912, "step": 1036 }, { "epoch": 0.545502367175171, "grad_norm": 2.160076379776001, "learning_rate": 4.915741016604378e-06, "loss": 1.0523, "step": 1037 }, { "epoch": 0.5460284061020515, "grad_norm": 1.8992373943328857, "learning_rate": 4.915562092060659e-06, "loss": 1.0185, "step": 1038 }, { "epoch": 0.5465544450289321, "grad_norm": 2.0712900161743164, "learning_rate": 4.915382981007857e-06, "loss": 1.0581, "step": 1039 }, { "epoch": 0.5470804839558128, "grad_norm": 2.2600317001342773, "learning_rate": 4.915203683459802e-06, "loss": 1.0154, "step": 1040 }, { "epoch": 0.5476065228826933, "grad_norm": 2.050366163253784, "learning_rate": 4.915024199430338e-06, "loss": 1.0371, "step": 1041 }, { "epoch": 0.5481325618095739, "grad_norm": 2.208393096923828, "learning_rate": 4.914844528933322e-06, "loss": 1.0767, "step": 1042 }, { "epoch": 0.5486586007364544, "grad_norm": 2.1388466358184814, "learning_rate": 4.914664671982629e-06, "loss": 1.1074, "step": 1043 }, { "epoch": 0.5491846396633351, "grad_norm": 2.253007411956787, "learning_rate": 4.914484628592144e-06, "loss": 1.0455, "step": 1044 }, { "epoch": 0.5497106785902157, "grad_norm": 2.2380669116973877, "learning_rate": 4.9143043987757684e-06, "loss": 1.0581, "step": 1045 }, { "epoch": 0.5502367175170962, "grad_norm": 2.136256456375122, "learning_rate": 4.914123982547419e-06, "loss": 1.0588, "step": 1046 }, { "epoch": 0.5507627564439769, "grad_norm": 2.0044257640838623, "learning_rate": 4.913943379921025e-06, "loss": 0.9918, "step": 1047 }, { "epoch": 0.5512887953708574, "grad_norm": 2.089315414428711, "learning_rate": 4.913762590910533e-06, "loss": 1.0675, "step": 1048 }, { "epoch": 0.551814834297738, "grad_norm": 2.048976182937622, "learning_rate": 4.9135816155298985e-06, "loss": 1.0259, "step": 1049 }, { "epoch": 0.5523408732246187, "grad_norm": 2.273501396179199, "learning_rate": 4.913400453793098e-06, "loss": 1.0743, "step": 1050 }, { "epoch": 0.5528669121514992, "grad_norm": 2.0761802196502686, "learning_rate": 4.913219105714117e-06, "loss": 1.0199, "step": 1051 }, { "epoch": 0.5533929510783798, "grad_norm": 1.9552183151245117, "learning_rate": 4.913037571306961e-06, "loss": 1.0582, "step": 1052 }, { "epoch": 0.5539189900052603, "grad_norm": 2.0593061447143555, "learning_rate": 4.9128558505856425e-06, "loss": 0.9626, "step": 1053 }, { "epoch": 0.554445028932141, "grad_norm": 2.026820659637451, "learning_rate": 4.9126739435641955e-06, "loss": 1.0253, "step": 1054 }, { "epoch": 0.5549710678590216, "grad_norm": 2.22835111618042, "learning_rate": 4.9124918502566635e-06, "loss": 1.0176, "step": 1055 }, { "epoch": 0.5554971067859021, "grad_norm": 1.9653559923171997, "learning_rate": 4.9123095706771064e-06, "loss": 0.9886, "step": 1056 }, { "epoch": 0.5560231457127828, "grad_norm": 2.083310842514038, "learning_rate": 4.912127104839599e-06, "loss": 1.0105, "step": 1057 }, { "epoch": 0.5565491846396633, "grad_norm": 2.1681482791900635, "learning_rate": 4.91194445275823e-06, "loss": 1.0359, "step": 1058 }, { "epoch": 0.5570752235665439, "grad_norm": 1.990717887878418, "learning_rate": 4.911761614447101e-06, "loss": 1.0369, "step": 1059 }, { "epoch": 0.5576012624934246, "grad_norm": 2.159813642501831, "learning_rate": 4.91157858992033e-06, "loss": 1.0341, "step": 1060 }, { "epoch": 0.5581273014203051, "grad_norm": 1.9474655389785767, "learning_rate": 4.911395379192048e-06, "loss": 1.0432, "step": 1061 }, { "epoch": 0.5586533403471857, "grad_norm": 2.140634536743164, "learning_rate": 4.911211982276402e-06, "loss": 1.0485, "step": 1062 }, { "epoch": 0.5591793792740662, "grad_norm": 2.2925636768341064, "learning_rate": 4.911028399187552e-06, "loss": 1.0648, "step": 1063 }, { "epoch": 0.5597054182009469, "grad_norm": 2.037755250930786, "learning_rate": 4.910844629939672e-06, "loss": 1.0568, "step": 1064 }, { "epoch": 0.5602314571278275, "grad_norm": 1.997471809387207, "learning_rate": 4.910660674546951e-06, "loss": 1.0109, "step": 1065 }, { "epoch": 0.560757496054708, "grad_norm": 2.109219551086426, "learning_rate": 4.910476533023593e-06, "loss": 1.0658, "step": 1066 }, { "epoch": 0.5612835349815887, "grad_norm": 2.10469388961792, "learning_rate": 4.9102922053838175e-06, "loss": 1.0612, "step": 1067 }, { "epoch": 0.5618095739084692, "grad_norm": 2.2748658657073975, "learning_rate": 4.9101076916418535e-06, "loss": 1.0422, "step": 1068 }, { "epoch": 0.5623356128353498, "grad_norm": 2.0472326278686523, "learning_rate": 4.90992299181195e-06, "loss": 1.0339, "step": 1069 }, { "epoch": 0.5628616517622304, "grad_norm": 2.0694494247436523, "learning_rate": 4.909738105908367e-06, "loss": 1.0083, "step": 1070 }, { "epoch": 0.563387690689111, "grad_norm": 2.0032498836517334, "learning_rate": 4.909553033945379e-06, "loss": 0.9889, "step": 1071 }, { "epoch": 0.5639137296159916, "grad_norm": 2.091393232345581, "learning_rate": 4.909367775937278e-06, "loss": 1.0856, "step": 1072 }, { "epoch": 0.5644397685428721, "grad_norm": 2.0542173385620117, "learning_rate": 4.909182331898366e-06, "loss": 1.0422, "step": 1073 }, { "epoch": 0.5649658074697528, "grad_norm": 2.009228467941284, "learning_rate": 4.908996701842962e-06, "loss": 1.0594, "step": 1074 }, { "epoch": 0.5654918463966333, "grad_norm": 1.9546911716461182, "learning_rate": 4.9088108857853985e-06, "loss": 1.0691, "step": 1075 }, { "epoch": 0.5660178853235139, "grad_norm": 2.1657440662384033, "learning_rate": 4.908624883740023e-06, "loss": 1.0252, "step": 1076 }, { "epoch": 0.5665439242503946, "grad_norm": 2.151035785675049, "learning_rate": 4.9084386957211975e-06, "loss": 1.0587, "step": 1077 }, { "epoch": 0.5670699631772751, "grad_norm": 2.299673557281494, "learning_rate": 4.908252321743296e-06, "loss": 1.0221, "step": 1078 }, { "epoch": 0.5675960021041557, "grad_norm": 2.0144848823547363, "learning_rate": 4.908065761820711e-06, "loss": 1.0256, "step": 1079 }, { "epoch": 0.5681220410310363, "grad_norm": 2.172971725463867, "learning_rate": 4.907879015967846e-06, "loss": 1.0231, "step": 1080 }, { "epoch": 0.5686480799579169, "grad_norm": 2.0427041053771973, "learning_rate": 4.907692084199119e-06, "loss": 1.0433, "step": 1081 }, { "epoch": 0.5691741188847975, "grad_norm": 2.1561834812164307, "learning_rate": 4.907504966528966e-06, "loss": 1.0478, "step": 1082 }, { "epoch": 0.569700157811678, "grad_norm": 2.012385606765747, "learning_rate": 4.907317662971831e-06, "loss": 1.0703, "step": 1083 }, { "epoch": 0.5702261967385587, "grad_norm": 2.137075424194336, "learning_rate": 4.907130173542179e-06, "loss": 1.0527, "step": 1084 }, { "epoch": 0.5707522356654392, "grad_norm": 2.039424180984497, "learning_rate": 4.906942498254485e-06, "loss": 0.9969, "step": 1085 }, { "epoch": 0.5712782745923198, "grad_norm": 2.0207748413085938, "learning_rate": 4.90675463712324e-06, "loss": 1.0157, "step": 1086 }, { "epoch": 0.5718043135192005, "grad_norm": 2.024454116821289, "learning_rate": 4.906566590162949e-06, "loss": 1.0699, "step": 1087 }, { "epoch": 0.572330352446081, "grad_norm": 2.256537675857544, "learning_rate": 4.90637835738813e-06, "loss": 1.1083, "step": 1088 }, { "epoch": 0.5728563913729616, "grad_norm": 2.099698543548584, "learning_rate": 4.90618993881332e-06, "loss": 1.0242, "step": 1089 }, { "epoch": 0.5733824302998421, "grad_norm": 2.0367214679718018, "learning_rate": 4.906001334453064e-06, "loss": 1.0088, "step": 1090 }, { "epoch": 0.5739084692267228, "grad_norm": 1.9988690614700317, "learning_rate": 4.9058125443219245e-06, "loss": 1.044, "step": 1091 }, { "epoch": 0.5744345081536034, "grad_norm": 1.9970273971557617, "learning_rate": 4.9056235684344805e-06, "loss": 1.0847, "step": 1092 }, { "epoch": 0.5749605470804839, "grad_norm": 2.152602434158325, "learning_rate": 4.905434406805322e-06, "loss": 1.0931, "step": 1093 }, { "epoch": 0.5754865860073646, "grad_norm": 2.0728707313537598, "learning_rate": 4.905245059449053e-06, "loss": 1.0401, "step": 1094 }, { "epoch": 0.5760126249342451, "grad_norm": 1.94095778465271, "learning_rate": 4.9050555263802954e-06, "loss": 1.0262, "step": 1095 }, { "epoch": 0.5765386638611257, "grad_norm": 2.126347780227661, "learning_rate": 4.904865807613683e-06, "loss": 1.0678, "step": 1096 }, { "epoch": 0.5770647027880064, "grad_norm": 2.085378646850586, "learning_rate": 4.904675903163864e-06, "loss": 1.0665, "step": 1097 }, { "epoch": 0.5775907417148869, "grad_norm": 2.2276804447174072, "learning_rate": 4.9044858130454995e-06, "loss": 1.0718, "step": 1098 }, { "epoch": 0.5781167806417675, "grad_norm": 2.2318899631500244, "learning_rate": 4.904295537273269e-06, "loss": 1.0663, "step": 1099 }, { "epoch": 0.578642819568648, "grad_norm": 2.0555522441864014, "learning_rate": 4.904105075861864e-06, "loss": 0.9989, "step": 1100 }, { "epoch": 0.5791688584955287, "grad_norm": 2.094501256942749, "learning_rate": 4.9039144288259876e-06, "loss": 1.0802, "step": 1101 }, { "epoch": 0.5796948974224092, "grad_norm": 2.7403769493103027, "learning_rate": 4.903723596180363e-06, "loss": 1.0024, "step": 1102 }, { "epoch": 0.5802209363492898, "grad_norm": 2.1775436401367188, "learning_rate": 4.9035325779397225e-06, "loss": 1.0234, "step": 1103 }, { "epoch": 0.5807469752761705, "grad_norm": 2.2489676475524902, "learning_rate": 4.903341374118816e-06, "loss": 1.0188, "step": 1104 }, { "epoch": 0.581273014203051, "grad_norm": 2.2214367389678955, "learning_rate": 4.903149984732407e-06, "loss": 1.0835, "step": 1105 }, { "epoch": 0.5817990531299316, "grad_norm": 2.203273296356201, "learning_rate": 4.902958409795272e-06, "loss": 1.0547, "step": 1106 }, { "epoch": 0.5823250920568122, "grad_norm": 2.1076622009277344, "learning_rate": 4.902766649322204e-06, "loss": 1.0571, "step": 1107 }, { "epoch": 0.5828511309836928, "grad_norm": 2.1270394325256348, "learning_rate": 4.902574703328007e-06, "loss": 0.9863, "step": 1108 }, { "epoch": 0.5833771699105734, "grad_norm": 2.1030006408691406, "learning_rate": 4.902382571827503e-06, "loss": 1.0404, "step": 1109 }, { "epoch": 0.583903208837454, "grad_norm": 2.1046831607818604, "learning_rate": 4.9021902548355275e-06, "loss": 1.018, "step": 1110 }, { "epoch": 0.5844292477643346, "grad_norm": 2.0193376541137695, "learning_rate": 4.901997752366927e-06, "loss": 1.0035, "step": 1111 }, { "epoch": 0.5849552866912151, "grad_norm": 2.0812923908233643, "learning_rate": 4.9018050644365675e-06, "loss": 0.9928, "step": 1112 }, { "epoch": 0.5854813256180957, "grad_norm": 2.035750150680542, "learning_rate": 4.901612191059325e-06, "loss": 1.0658, "step": 1113 }, { "epoch": 0.5860073645449764, "grad_norm": 2.093606948852539, "learning_rate": 4.901419132250093e-06, "loss": 1.0019, "step": 1114 }, { "epoch": 0.5865334034718569, "grad_norm": 2.4018402099609375, "learning_rate": 4.901225888023776e-06, "loss": 1.0785, "step": 1115 }, { "epoch": 0.5870594423987375, "grad_norm": 2.1731529235839844, "learning_rate": 4.901032458395296e-06, "loss": 1.0437, "step": 1116 }, { "epoch": 0.587585481325618, "grad_norm": 2.085692882537842, "learning_rate": 4.900838843379588e-06, "loss": 1.0122, "step": 1117 }, { "epoch": 0.5881115202524987, "grad_norm": 2.272787094116211, "learning_rate": 4.900645042991601e-06, "loss": 1.0708, "step": 1118 }, { "epoch": 0.5886375591793793, "grad_norm": 2.197758913040161, "learning_rate": 4.900451057246298e-06, "loss": 1.037, "step": 1119 }, { "epoch": 0.5891635981062598, "grad_norm": 2.228980779647827, "learning_rate": 4.900256886158658e-06, "loss": 1.0306, "step": 1120 }, { "epoch": 0.5896896370331405, "grad_norm": 2.010698080062866, "learning_rate": 4.900062529743672e-06, "loss": 1.0777, "step": 1121 }, { "epoch": 0.590215675960021, "grad_norm": 2.0015103816986084, "learning_rate": 4.899867988016348e-06, "loss": 0.9991, "step": 1122 }, { "epoch": 0.5907417148869016, "grad_norm": 1.9307256937026978, "learning_rate": 4.899673260991706e-06, "loss": 1.0655, "step": 1123 }, { "epoch": 0.5912677538137823, "grad_norm": 2.339930295944214, "learning_rate": 4.899478348684782e-06, "loss": 1.0177, "step": 1124 }, { "epoch": 0.5917937927406628, "grad_norm": 2.000337839126587, "learning_rate": 4.899283251110624e-06, "loss": 1.036, "step": 1125 }, { "epoch": 0.5923198316675434, "grad_norm": 2.0116374492645264, "learning_rate": 4.899087968284297e-06, "loss": 0.9666, "step": 1126 }, { "epoch": 0.592845870594424, "grad_norm": 2.27270245552063, "learning_rate": 4.898892500220878e-06, "loss": 1.0526, "step": 1127 }, { "epoch": 0.5933719095213046, "grad_norm": 2.1844749450683594, "learning_rate": 4.89869684693546e-06, "loss": 1.0606, "step": 1128 }, { "epoch": 0.5938979484481852, "grad_norm": 2.112031936645508, "learning_rate": 4.898501008443151e-06, "loss": 1.0846, "step": 1129 }, { "epoch": 0.5944239873750657, "grad_norm": 2.251878499984741, "learning_rate": 4.898304984759069e-06, "loss": 1.023, "step": 1130 }, { "epoch": 0.5949500263019464, "grad_norm": 2.064732074737549, "learning_rate": 4.898108775898351e-06, "loss": 1.066, "step": 1131 }, { "epoch": 0.5954760652288269, "grad_norm": 2.10412335395813, "learning_rate": 4.897912381876147e-06, "loss": 1.0476, "step": 1132 }, { "epoch": 0.5960021041557075, "grad_norm": 2.1343259811401367, "learning_rate": 4.897715802707621e-06, "loss": 1.0264, "step": 1133 }, { "epoch": 0.5965281430825881, "grad_norm": 2.3453173637390137, "learning_rate": 4.89751903840795e-06, "loss": 1.076, "step": 1134 }, { "epoch": 0.5970541820094687, "grad_norm": 2.040123462677002, "learning_rate": 4.897322088992326e-06, "loss": 1.0494, "step": 1135 }, { "epoch": 0.5975802209363493, "grad_norm": 2.070585012435913, "learning_rate": 4.897124954475958e-06, "loss": 1.0904, "step": 1136 }, { "epoch": 0.5981062598632298, "grad_norm": 2.048081159591675, "learning_rate": 4.896927634874065e-06, "loss": 0.9855, "step": 1137 }, { "epoch": 0.5986322987901105, "grad_norm": 2.07633113861084, "learning_rate": 4.896730130201883e-06, "loss": 1.0848, "step": 1138 }, { "epoch": 0.599158337716991, "grad_norm": 2.233821153640747, "learning_rate": 4.8965324404746624e-06, "loss": 1.0419, "step": 1139 }, { "epoch": 0.5996843766438716, "grad_norm": 2.1806929111480713, "learning_rate": 4.896334565707666e-06, "loss": 1.0377, "step": 1140 }, { "epoch": 0.6002104155707523, "grad_norm": 2.056483268737793, "learning_rate": 4.896136505916174e-06, "loss": 1.0269, "step": 1141 }, { "epoch": 0.6007364544976328, "grad_norm": 1.9446007013320923, "learning_rate": 4.895938261115476e-06, "loss": 0.9958, "step": 1142 }, { "epoch": 0.6012624934245134, "grad_norm": 1.9170737266540527, "learning_rate": 4.8957398313208795e-06, "loss": 1.0083, "step": 1143 }, { "epoch": 0.601788532351394, "grad_norm": 2.0455801486968994, "learning_rate": 4.895541216547707e-06, "loss": 1.0819, "step": 1144 }, { "epoch": 0.6023145712782746, "grad_norm": 2.410231828689575, "learning_rate": 4.8953424168112925e-06, "loss": 1.0265, "step": 1145 }, { "epoch": 0.6028406102051552, "grad_norm": 2.0946412086486816, "learning_rate": 4.895143432126986e-06, "loss": 1.014, "step": 1146 }, { "epoch": 0.6033666491320357, "grad_norm": 1.9825836420059204, "learning_rate": 4.894944262510152e-06, "loss": 0.9721, "step": 1147 }, { "epoch": 0.6038926880589164, "grad_norm": 2.1228606700897217, "learning_rate": 4.8947449079761685e-06, "loss": 1.0971, "step": 1148 }, { "epoch": 0.6044187269857969, "grad_norm": 2.1443943977355957, "learning_rate": 4.894545368540427e-06, "loss": 0.9956, "step": 1149 }, { "epoch": 0.6049447659126775, "grad_norm": 1.9651165008544922, "learning_rate": 4.894345644218335e-06, "loss": 1.0103, "step": 1150 }, { "epoch": 0.6054708048395582, "grad_norm": 1.9829816818237305, "learning_rate": 4.8941457350253134e-06, "loss": 1.0425, "step": 1151 }, { "epoch": 0.6059968437664387, "grad_norm": 2.122873067855835, "learning_rate": 4.893945640976798e-06, "loss": 1.0532, "step": 1152 }, { "epoch": 0.6065228826933193, "grad_norm": 2.0714738368988037, "learning_rate": 4.8937453620882365e-06, "loss": 1.0307, "step": 1153 }, { "epoch": 0.6070489216201999, "grad_norm": 1.9049363136291504, "learning_rate": 4.893544898375096e-06, "loss": 0.9805, "step": 1154 }, { "epoch": 0.6075749605470805, "grad_norm": 2.432041645050049, "learning_rate": 4.893344249852851e-06, "loss": 1.0833, "step": 1155 }, { "epoch": 0.6081009994739611, "grad_norm": 2.055748224258423, "learning_rate": 4.893143416536997e-06, "loss": 1.0315, "step": 1156 }, { "epoch": 0.6086270384008416, "grad_norm": 1.9813153743743896, "learning_rate": 4.892942398443037e-06, "loss": 1.0786, "step": 1157 }, { "epoch": 0.6091530773277223, "grad_norm": 2.2038941383361816, "learning_rate": 4.892741195586496e-06, "loss": 1.0604, "step": 1158 }, { "epoch": 0.6096791162546028, "grad_norm": 2.0015673637390137, "learning_rate": 4.892539807982906e-06, "loss": 0.9863, "step": 1159 }, { "epoch": 0.6102051551814834, "grad_norm": 2.0392401218414307, "learning_rate": 4.892338235647818e-06, "loss": 1.0218, "step": 1160 }, { "epoch": 0.6107311941083641, "grad_norm": 2.0060133934020996, "learning_rate": 4.892136478596796e-06, "loss": 1.0134, "step": 1161 }, { "epoch": 0.6112572330352446, "grad_norm": 1.9645148515701294, "learning_rate": 4.8919345368454164e-06, "loss": 1.0206, "step": 1162 }, { "epoch": 0.6117832719621252, "grad_norm": 1.9299581050872803, "learning_rate": 4.8917324104092725e-06, "loss": 1.0243, "step": 1163 }, { "epoch": 0.6123093108890058, "grad_norm": 2.071143388748169, "learning_rate": 4.891530099303971e-06, "loss": 1.0466, "step": 1164 }, { "epoch": 0.6128353498158864, "grad_norm": 2.122020959854126, "learning_rate": 4.891327603545132e-06, "loss": 1.0886, "step": 1165 }, { "epoch": 0.6133613887427669, "grad_norm": 2.0861775875091553, "learning_rate": 4.891124923148391e-06, "loss": 1.0481, "step": 1166 }, { "epoch": 0.6138874276696475, "grad_norm": 2.053553581237793, "learning_rate": 4.890922058129396e-06, "loss": 1.0332, "step": 1167 }, { "epoch": 0.6144134665965282, "grad_norm": 2.0698556900024414, "learning_rate": 4.890719008503813e-06, "loss": 0.9913, "step": 1168 }, { "epoch": 0.6149395055234087, "grad_norm": 2.0626866817474365, "learning_rate": 4.890515774287317e-06, "loss": 1.0383, "step": 1169 }, { "epoch": 0.6154655444502893, "grad_norm": 2.001122236251831, "learning_rate": 4.890312355495602e-06, "loss": 0.997, "step": 1170 }, { "epoch": 0.6159915833771699, "grad_norm": 2.141261577606201, "learning_rate": 4.890108752144373e-06, "loss": 1.0139, "step": 1171 }, { "epoch": 0.6165176223040505, "grad_norm": 2.0430335998535156, "learning_rate": 4.8899049642493514e-06, "loss": 1.0177, "step": 1172 }, { "epoch": 0.6170436612309311, "grad_norm": 2.0376110076904297, "learning_rate": 4.889700991826271e-06, "loss": 1.0306, "step": 1173 }, { "epoch": 0.6175697001578117, "grad_norm": 2.0546419620513916, "learning_rate": 4.889496834890882e-06, "loss": 1.0379, "step": 1174 }, { "epoch": 0.6180957390846923, "grad_norm": 2.004117012023926, "learning_rate": 4.889292493458947e-06, "loss": 1.1014, "step": 1175 }, { "epoch": 0.6186217780115728, "grad_norm": 2.1904101371765137, "learning_rate": 4.889087967546243e-06, "loss": 1.0252, "step": 1176 }, { "epoch": 0.6191478169384534, "grad_norm": 2.2026965618133545, "learning_rate": 4.8888832571685626e-06, "loss": 1.0309, "step": 1177 }, { "epoch": 0.6196738558653341, "grad_norm": 1.9925811290740967, "learning_rate": 4.888678362341711e-06, "loss": 1.0157, "step": 1178 }, { "epoch": 0.6201998947922146, "grad_norm": 2.4098422527313232, "learning_rate": 4.88847328308151e-06, "loss": 0.9825, "step": 1179 }, { "epoch": 0.6207259337190952, "grad_norm": 1.9352220296859741, "learning_rate": 4.888268019403792e-06, "loss": 1.0235, "step": 1180 }, { "epoch": 0.6212519726459758, "grad_norm": 1.9798966646194458, "learning_rate": 4.888062571324407e-06, "loss": 1.0124, "step": 1181 }, { "epoch": 0.6217780115728564, "grad_norm": 1.9737377166748047, "learning_rate": 4.887856938859218e-06, "loss": 1.005, "step": 1182 }, { "epoch": 0.622304050499737, "grad_norm": 2.2528250217437744, "learning_rate": 4.887651122024102e-06, "loss": 1.0207, "step": 1183 }, { "epoch": 0.6228300894266176, "grad_norm": 2.01436185836792, "learning_rate": 4.887445120834949e-06, "loss": 1.0368, "step": 1184 }, { "epoch": 0.6233561283534982, "grad_norm": 2.0212924480438232, "learning_rate": 4.887238935307667e-06, "loss": 1.0136, "step": 1185 }, { "epoch": 0.6238821672803787, "grad_norm": 2.080514669418335, "learning_rate": 4.887032565458174e-06, "loss": 1.0012, "step": 1186 }, { "epoch": 0.6244082062072593, "grad_norm": 2.220168113708496, "learning_rate": 4.886826011302406e-06, "loss": 1.0055, "step": 1187 }, { "epoch": 0.62493424513414, "grad_norm": 2.042325258255005, "learning_rate": 4.886619272856309e-06, "loss": 1.0793, "step": 1188 }, { "epoch": 0.6254602840610205, "grad_norm": 2.0139427185058594, "learning_rate": 4.886412350135848e-06, "loss": 1.0853, "step": 1189 }, { "epoch": 0.6259863229879011, "grad_norm": 2.072531223297119, "learning_rate": 4.886205243156998e-06, "loss": 1.0611, "step": 1190 }, { "epoch": 0.6265123619147817, "grad_norm": 2.1070992946624756, "learning_rate": 4.8859979519357505e-06, "loss": 1.0171, "step": 1191 }, { "epoch": 0.6270384008416623, "grad_norm": 1.9750585556030273, "learning_rate": 4.885790476488111e-06, "loss": 1.01, "step": 1192 }, { "epoch": 0.6275644397685429, "grad_norm": 1.9221036434173584, "learning_rate": 4.885582816830099e-06, "loss": 1.0173, "step": 1193 }, { "epoch": 0.6280904786954234, "grad_norm": 2.0700929164886475, "learning_rate": 4.885374972977748e-06, "loss": 1.0469, "step": 1194 }, { "epoch": 0.6286165176223041, "grad_norm": 2.1358914375305176, "learning_rate": 4.885166944947106e-06, "loss": 1.0144, "step": 1195 }, { "epoch": 0.6291425565491846, "grad_norm": 2.0657570362091064, "learning_rate": 4.884958732754236e-06, "loss": 1.0278, "step": 1196 }, { "epoch": 0.6296685954760652, "grad_norm": 2.050619125366211, "learning_rate": 4.884750336415213e-06, "loss": 1.0401, "step": 1197 }, { "epoch": 0.6301946344029458, "grad_norm": 2.029069423675537, "learning_rate": 4.884541755946127e-06, "loss": 1.0265, "step": 1198 }, { "epoch": 0.6307206733298264, "grad_norm": 2.2242050170898438, "learning_rate": 4.884332991363086e-06, "loss": 1.043, "step": 1199 }, { "epoch": 0.631246712256707, "grad_norm": 1.9235576391220093, "learning_rate": 4.8841240426822056e-06, "loss": 1.0323, "step": 1200 }, { "epoch": 0.6317727511835876, "grad_norm": 2.0110039710998535, "learning_rate": 4.88391490991962e-06, "loss": 0.9861, "step": 1201 }, { "epoch": 0.6322987901104682, "grad_norm": 1.9583542346954346, "learning_rate": 4.883705593091478e-06, "loss": 1.0907, "step": 1202 }, { "epoch": 0.6328248290373487, "grad_norm": 2.046147346496582, "learning_rate": 4.88349609221394e-06, "loss": 1.0264, "step": 1203 }, { "epoch": 0.6333508679642293, "grad_norm": 2.072329521179199, "learning_rate": 4.8832864073031826e-06, "loss": 1.0273, "step": 1204 }, { "epoch": 0.63387690689111, "grad_norm": 2.163562774658203, "learning_rate": 4.883076538375395e-06, "loss": 0.9729, "step": 1205 }, { "epoch": 0.6344029458179905, "grad_norm": 2.018745183944702, "learning_rate": 4.8828664854467825e-06, "loss": 1.0349, "step": 1206 }, { "epoch": 0.6349289847448711, "grad_norm": 1.9641830921173096, "learning_rate": 4.882656248533562e-06, "loss": 1.0254, "step": 1207 }, { "epoch": 0.6354550236717517, "grad_norm": 2.189903736114502, "learning_rate": 4.8824458276519676e-06, "loss": 1.0347, "step": 1208 }, { "epoch": 0.6359810625986323, "grad_norm": 1.9000815153121948, "learning_rate": 4.882235222818245e-06, "loss": 1.0068, "step": 1209 }, { "epoch": 0.6365071015255129, "grad_norm": 2.008253335952759, "learning_rate": 4.882024434048658e-06, "loss": 0.9951, "step": 1210 }, { "epoch": 0.6370331404523935, "grad_norm": 2.254880905151367, "learning_rate": 4.881813461359479e-06, "loss": 1.0254, "step": 1211 }, { "epoch": 0.6375591793792741, "grad_norm": 2.079281806945801, "learning_rate": 4.881602304766999e-06, "loss": 1.0138, "step": 1212 }, { "epoch": 0.6380852183061546, "grad_norm": 1.9515445232391357, "learning_rate": 4.881390964287521e-06, "loss": 0.9896, "step": 1213 }, { "epoch": 0.6386112572330352, "grad_norm": 2.118746757507324, "learning_rate": 4.881179439937363e-06, "loss": 1.0554, "step": 1214 }, { "epoch": 0.6391372961599159, "grad_norm": 1.9809492826461792, "learning_rate": 4.8809677317328574e-06, "loss": 1.0327, "step": 1215 }, { "epoch": 0.6396633350867964, "grad_norm": 2.0196714401245117, "learning_rate": 4.88075583969035e-06, "loss": 1.0072, "step": 1216 }, { "epoch": 0.640189374013677, "grad_norm": 2.075596570968628, "learning_rate": 4.8805437638262024e-06, "loss": 1.0088, "step": 1217 }, { "epoch": 0.6407154129405576, "grad_norm": 1.919331431388855, "learning_rate": 4.880331504156788e-06, "loss": 0.9561, "step": 1218 }, { "epoch": 0.6412414518674382, "grad_norm": 2.1209754943847656, "learning_rate": 4.8801190606984974e-06, "loss": 1.0436, "step": 1219 }, { "epoch": 0.6417674907943188, "grad_norm": 2.1692416667938232, "learning_rate": 4.879906433467731e-06, "loss": 1.0596, "step": 1220 }, { "epoch": 0.6422935297211994, "grad_norm": 2.127383232116699, "learning_rate": 4.879693622480908e-06, "loss": 1.0527, "step": 1221 }, { "epoch": 0.64281956864808, "grad_norm": 2.0686752796173096, "learning_rate": 4.87948062775446e-06, "loss": 1.0161, "step": 1222 }, { "epoch": 0.6433456075749605, "grad_norm": 1.9912559986114502, "learning_rate": 4.879267449304831e-06, "loss": 1.0246, "step": 1223 }, { "epoch": 0.6438716465018411, "grad_norm": 1.9714523553848267, "learning_rate": 4.879054087148483e-06, "loss": 1.0669, "step": 1224 }, { "epoch": 0.6443976854287218, "grad_norm": 2.0122146606445312, "learning_rate": 4.878840541301888e-06, "loss": 1.0383, "step": 1225 }, { "epoch": 0.6449237243556023, "grad_norm": 2.191110134124756, "learning_rate": 4.878626811781536e-06, "loss": 1.0832, "step": 1226 }, { "epoch": 0.6454497632824829, "grad_norm": 2.018800735473633, "learning_rate": 4.8784128986039274e-06, "loss": 1.0588, "step": 1227 }, { "epoch": 0.6459758022093635, "grad_norm": 2.0812923908233643, "learning_rate": 4.87819880178558e-06, "loss": 1.0221, "step": 1228 }, { "epoch": 0.6465018411362441, "grad_norm": 2.110596179962158, "learning_rate": 4.877984521343025e-06, "loss": 1.0252, "step": 1229 }, { "epoch": 0.6470278800631246, "grad_norm": 2.2176296710968018, "learning_rate": 4.877770057292806e-06, "loss": 1.0575, "step": 1230 }, { "epoch": 0.6475539189900053, "grad_norm": 2.0294981002807617, "learning_rate": 4.8775554096514836e-06, "loss": 0.9862, "step": 1231 }, { "epoch": 0.6480799579168859, "grad_norm": 2.03635573387146, "learning_rate": 4.8773405784356285e-06, "loss": 1.0229, "step": 1232 }, { "epoch": 0.6486059968437664, "grad_norm": 2.2391481399536133, "learning_rate": 4.877125563661831e-06, "loss": 1.1258, "step": 1233 }, { "epoch": 0.649132035770647, "grad_norm": 2.1449427604675293, "learning_rate": 4.876910365346691e-06, "loss": 1.039, "step": 1234 }, { "epoch": 0.6496580746975276, "grad_norm": 2.075510025024414, "learning_rate": 4.876694983506826e-06, "loss": 1.047, "step": 1235 }, { "epoch": 0.6501841136244082, "grad_norm": 1.9154462814331055, "learning_rate": 4.876479418158862e-06, "loss": 0.9906, "step": 1236 }, { "epoch": 0.6507101525512888, "grad_norm": 2.2096331119537354, "learning_rate": 4.876263669319449e-06, "loss": 1.0843, "step": 1237 }, { "epoch": 0.6512361914781694, "grad_norm": 2.0682895183563232, "learning_rate": 4.87604773700524e-06, "loss": 1.0262, "step": 1238 }, { "epoch": 0.65176223040505, "grad_norm": 2.0859344005584717, "learning_rate": 4.8758316212329106e-06, "loss": 1.02, "step": 1239 }, { "epoch": 0.6522882693319305, "grad_norm": 2.060521364212036, "learning_rate": 4.875615322019146e-06, "loss": 1.0455, "step": 1240 }, { "epoch": 0.6528143082588111, "grad_norm": 2.049457311630249, "learning_rate": 4.875398839380647e-06, "loss": 1.0763, "step": 1241 }, { "epoch": 0.6533403471856918, "grad_norm": 2.2475039958953857, "learning_rate": 4.875182173334129e-06, "loss": 1.0599, "step": 1242 }, { "epoch": 0.6538663861125723, "grad_norm": 1.9375535249710083, "learning_rate": 4.874965323896321e-06, "loss": 0.9758, "step": 1243 }, { "epoch": 0.6543924250394529, "grad_norm": 2.0157570838928223, "learning_rate": 4.874748291083967e-06, "loss": 1.0491, "step": 1244 }, { "epoch": 0.6549184639663335, "grad_norm": 2.1339237689971924, "learning_rate": 4.874531074913823e-06, "loss": 0.9634, "step": 1245 }, { "epoch": 0.6554445028932141, "grad_norm": 1.946191430091858, "learning_rate": 4.874313675402662e-06, "loss": 1.0407, "step": 1246 }, { "epoch": 0.6559705418200947, "grad_norm": 1.9623258113861084, "learning_rate": 4.874096092567268e-06, "loss": 1.0662, "step": 1247 }, { "epoch": 0.6564965807469753, "grad_norm": 2.092224359512329, "learning_rate": 4.873878326424443e-06, "loss": 1.0802, "step": 1248 }, { "epoch": 0.6570226196738559, "grad_norm": 1.863853931427002, "learning_rate": 4.873660376990999e-06, "loss": 1.0789, "step": 1249 }, { "epoch": 0.6575486586007364, "grad_norm": 2.146857976913452, "learning_rate": 4.8734422442837655e-06, "loss": 1.0132, "step": 1250 }, { "epoch": 0.658074697527617, "grad_norm": 2.022573232650757, "learning_rate": 4.8732239283195844e-06, "loss": 1.0252, "step": 1251 }, { "epoch": 0.6586007364544977, "grad_norm": 2.160632848739624, "learning_rate": 4.873005429115312e-06, "loss": 1.0235, "step": 1252 }, { "epoch": 0.6591267753813782, "grad_norm": 2.0909252166748047, "learning_rate": 4.87278674668782e-06, "loss": 1.0671, "step": 1253 }, { "epoch": 0.6596528143082588, "grad_norm": 1.9689445495605469, "learning_rate": 4.872567881053991e-06, "loss": 1.0323, "step": 1254 }, { "epoch": 0.6601788532351394, "grad_norm": 2.141439914703369, "learning_rate": 4.872348832230727e-06, "loss": 1.0019, "step": 1255 }, { "epoch": 0.66070489216202, "grad_norm": 1.9927963018417358, "learning_rate": 4.872129600234938e-06, "loss": 1.0262, "step": 1256 }, { "epoch": 0.6612309310889006, "grad_norm": 2.1227667331695557, "learning_rate": 4.871910185083554e-06, "loss": 1.0341, "step": 1257 }, { "epoch": 0.6617569700157812, "grad_norm": 2.0554583072662354, "learning_rate": 4.871690586793514e-06, "loss": 1.0458, "step": 1258 }, { "epoch": 0.6622830089426618, "grad_norm": 1.9936654567718506, "learning_rate": 4.871470805381775e-06, "loss": 1.0125, "step": 1259 }, { "epoch": 0.6628090478695423, "grad_norm": 2.0953080654144287, "learning_rate": 4.871250840865306e-06, "loss": 1.0518, "step": 1260 }, { "epoch": 0.663335086796423, "grad_norm": 1.9445053339004517, "learning_rate": 4.871030693261091e-06, "loss": 0.9892, "step": 1261 }, { "epoch": 0.6638611257233035, "grad_norm": 2.054898500442505, "learning_rate": 4.870810362586127e-06, "loss": 1.0712, "step": 1262 }, { "epoch": 0.6643871646501841, "grad_norm": 2.158090114593506, "learning_rate": 4.870589848857428e-06, "loss": 0.9874, "step": 1263 }, { "epoch": 0.6649132035770647, "grad_norm": 2.081550121307373, "learning_rate": 4.870369152092019e-06, "loss": 1.0299, "step": 1264 }, { "epoch": 0.6654392425039453, "grad_norm": 1.9839400053024292, "learning_rate": 4.87014827230694e-06, "loss": 0.9997, "step": 1265 }, { "epoch": 0.6659652814308259, "grad_norm": 2.0596096515655518, "learning_rate": 4.869927209519246e-06, "loss": 1.0655, "step": 1266 }, { "epoch": 0.6664913203577064, "grad_norm": 2.3403422832489014, "learning_rate": 4.8697059637460055e-06, "loss": 1.0551, "step": 1267 }, { "epoch": 0.667017359284587, "grad_norm": 2.072814702987671, "learning_rate": 4.8694845350043004e-06, "loss": 1.0454, "step": 1268 }, { "epoch": 0.6675433982114677, "grad_norm": 2.2819271087646484, "learning_rate": 4.86926292331123e-06, "loss": 1.0076, "step": 1269 }, { "epoch": 0.6680694371383482, "grad_norm": 2.162179708480835, "learning_rate": 4.8690411286839024e-06, "loss": 1.0145, "step": 1270 }, { "epoch": 0.6685954760652288, "grad_norm": 2.1072568893432617, "learning_rate": 4.868819151139443e-06, "loss": 1.0936, "step": 1271 }, { "epoch": 0.6691215149921094, "grad_norm": 2.113056182861328, "learning_rate": 4.868596990694994e-06, "loss": 1.044, "step": 1272 }, { "epoch": 0.66964755391899, "grad_norm": 1.9856184720993042, "learning_rate": 4.868374647367705e-06, "loss": 1.0119, "step": 1273 }, { "epoch": 0.6701735928458706, "grad_norm": 2.013106346130371, "learning_rate": 4.868152121174746e-06, "loss": 1.0913, "step": 1274 }, { "epoch": 0.6706996317727512, "grad_norm": 1.8831686973571777, "learning_rate": 4.867929412133297e-06, "loss": 1.0077, "step": 1275 }, { "epoch": 0.6712256706996318, "grad_norm": 2.035214424133301, "learning_rate": 4.867706520260554e-06, "loss": 0.9683, "step": 1276 }, { "epoch": 0.6717517096265123, "grad_norm": 2.0336945056915283, "learning_rate": 4.867483445573727e-06, "loss": 1.0583, "step": 1277 }, { "epoch": 0.672277748553393, "grad_norm": 1.9241890907287598, "learning_rate": 4.867260188090041e-06, "loss": 1.0162, "step": 1278 }, { "epoch": 0.6728037874802736, "grad_norm": 2.122288942337036, "learning_rate": 4.8670367478267335e-06, "loss": 1.0633, "step": 1279 }, { "epoch": 0.6733298264071541, "grad_norm": 1.964282512664795, "learning_rate": 4.8668131248010555e-06, "loss": 1.0009, "step": 1280 }, { "epoch": 0.6738558653340347, "grad_norm": 2.075181722640991, "learning_rate": 4.866589319030273e-06, "loss": 1.0535, "step": 1281 }, { "epoch": 0.6743819042609153, "grad_norm": 2.086574077606201, "learning_rate": 4.866365330531668e-06, "loss": 1.0125, "step": 1282 }, { "epoch": 0.6749079431877959, "grad_norm": 2.176712989807129, "learning_rate": 4.866141159322535e-06, "loss": 1.0883, "step": 1283 }, { "epoch": 0.6754339821146765, "grad_norm": 2.4133596420288086, "learning_rate": 4.865916805420181e-06, "loss": 1.1115, "step": 1284 }, { "epoch": 0.6759600210415571, "grad_norm": 1.9632985591888428, "learning_rate": 4.865692268841931e-06, "loss": 0.9837, "step": 1285 }, { "epoch": 0.6764860599684377, "grad_norm": 2.320810556411743, "learning_rate": 4.865467549605119e-06, "loss": 1.0307, "step": 1286 }, { "epoch": 0.6770120988953182, "grad_norm": 2.259291172027588, "learning_rate": 4.865242647727097e-06, "loss": 1.0125, "step": 1287 }, { "epoch": 0.6775381378221988, "grad_norm": 2.069227695465088, "learning_rate": 4.8650175632252314e-06, "loss": 1.0348, "step": 1288 }, { "epoch": 0.6780641767490795, "grad_norm": 2.093912363052368, "learning_rate": 4.8647922961169e-06, "loss": 1.0628, "step": 1289 }, { "epoch": 0.67859021567596, "grad_norm": 2.0842857360839844, "learning_rate": 4.864566846419497e-06, "loss": 1.0296, "step": 1290 }, { "epoch": 0.6791162546028406, "grad_norm": 2.1448631286621094, "learning_rate": 4.864341214150428e-06, "loss": 1.0344, "step": 1291 }, { "epoch": 0.6796422935297212, "grad_norm": 2.173478841781616, "learning_rate": 4.864115399327115e-06, "loss": 1.0662, "step": 1292 }, { "epoch": 0.6801683324566018, "grad_norm": 2.1156740188598633, "learning_rate": 4.863889401966995e-06, "loss": 1.0568, "step": 1293 }, { "epoch": 0.6806943713834824, "grad_norm": 2.0641050338745117, "learning_rate": 4.863663222087515e-06, "loss": 1.0508, "step": 1294 }, { "epoch": 0.681220410310363, "grad_norm": 2.050645112991333, "learning_rate": 4.863436859706141e-06, "loss": 1.0198, "step": 1295 }, { "epoch": 0.6817464492372436, "grad_norm": 1.9624086618423462, "learning_rate": 4.86321031484035e-06, "loss": 1.012, "step": 1296 }, { "epoch": 0.6822724881641241, "grad_norm": 2.2763307094573975, "learning_rate": 4.8629835875076325e-06, "loss": 1.0208, "step": 1297 }, { "epoch": 0.6827985270910047, "grad_norm": 1.952094316482544, "learning_rate": 4.862756677725496e-06, "loss": 0.9912, "step": 1298 }, { "epoch": 0.6833245660178853, "grad_norm": 1.9964386224746704, "learning_rate": 4.862529585511461e-06, "loss": 1.0216, "step": 1299 }, { "epoch": 0.6838506049447659, "grad_norm": 2.0915441513061523, "learning_rate": 4.862302310883061e-06, "loss": 1.028, "step": 1300 }, { "epoch": 0.6843766438716465, "grad_norm": 2.239182233810425, "learning_rate": 4.862074853857843e-06, "loss": 1.1119, "step": 1301 }, { "epoch": 0.6849026827985271, "grad_norm": 2.120128870010376, "learning_rate": 4.861847214453371e-06, "loss": 1.0811, "step": 1302 }, { "epoch": 0.6854287217254077, "grad_norm": 1.8495033979415894, "learning_rate": 4.86161939268722e-06, "loss": 0.9559, "step": 1303 }, { "epoch": 0.6859547606522882, "grad_norm": 1.9767253398895264, "learning_rate": 4.861391388576982e-06, "loss": 0.9942, "step": 1304 }, { "epoch": 0.6864807995791689, "grad_norm": 1.9148463010787964, "learning_rate": 4.8611632021402605e-06, "loss": 1.0152, "step": 1305 }, { "epoch": 0.6870068385060495, "grad_norm": 2.036726474761963, "learning_rate": 4.860934833394674e-06, "loss": 1.0692, "step": 1306 }, { "epoch": 0.68753287743293, "grad_norm": 2.03383731842041, "learning_rate": 4.860706282357856e-06, "loss": 1.0429, "step": 1307 }, { "epoch": 0.6880589163598106, "grad_norm": 1.986863374710083, "learning_rate": 4.860477549047452e-06, "loss": 0.9737, "step": 1308 }, { "epoch": 0.6885849552866912, "grad_norm": 1.9917157888412476, "learning_rate": 4.860248633481124e-06, "loss": 0.9808, "step": 1309 }, { "epoch": 0.6891109942135718, "grad_norm": 1.9868308305740356, "learning_rate": 4.860019535676546e-06, "loss": 1.0001, "step": 1310 }, { "epoch": 0.6896370331404524, "grad_norm": 1.9900240898132324, "learning_rate": 4.859790255651408e-06, "loss": 1.0561, "step": 1311 }, { "epoch": 0.690163072067333, "grad_norm": 1.987703800201416, "learning_rate": 4.859560793423412e-06, "loss": 1.013, "step": 1312 }, { "epoch": 0.6906891109942136, "grad_norm": 1.9851711988449097, "learning_rate": 4.859331149010276e-06, "loss": 1.0727, "step": 1313 }, { "epoch": 0.6912151499210941, "grad_norm": 1.9733060598373413, "learning_rate": 4.8591013224297304e-06, "loss": 0.9924, "step": 1314 }, { "epoch": 0.6917411888479748, "grad_norm": 1.9737035036087036, "learning_rate": 4.85887131369952e-06, "loss": 1.0131, "step": 1315 }, { "epoch": 0.6922672277748554, "grad_norm": 2.176969528198242, "learning_rate": 4.858641122837407e-06, "loss": 1.0382, "step": 1316 }, { "epoch": 0.6927932667017359, "grad_norm": 1.951177716255188, "learning_rate": 4.858410749861161e-06, "loss": 1.011, "step": 1317 }, { "epoch": 0.6933193056286165, "grad_norm": 2.009986639022827, "learning_rate": 4.858180194788572e-06, "loss": 1.0999, "step": 1318 }, { "epoch": 0.6938453445554971, "grad_norm": 2.0470845699310303, "learning_rate": 4.857949457637441e-06, "loss": 1.0477, "step": 1319 }, { "epoch": 0.6943713834823777, "grad_norm": 2.163547992706299, "learning_rate": 4.857718538425582e-06, "loss": 1.0229, "step": 1320 }, { "epoch": 0.6948974224092583, "grad_norm": 2.0979368686676025, "learning_rate": 4.857487437170827e-06, "loss": 1.0686, "step": 1321 }, { "epoch": 0.6954234613361389, "grad_norm": 2.0388388633728027, "learning_rate": 4.857256153891017e-06, "loss": 0.991, "step": 1322 }, { "epoch": 0.6959495002630195, "grad_norm": 2.136115312576294, "learning_rate": 4.8570246886040124e-06, "loss": 1.0249, "step": 1323 }, { "epoch": 0.6964755391899, "grad_norm": 2.0932974815368652, "learning_rate": 4.8567930413276835e-06, "loss": 1.0649, "step": 1324 }, { "epoch": 0.6970015781167807, "grad_norm": 2.0559682846069336, "learning_rate": 4.856561212079916e-06, "loss": 0.9931, "step": 1325 }, { "epoch": 0.6975276170436613, "grad_norm": 1.9723689556121826, "learning_rate": 4.856329200878611e-06, "loss": 0.9628, "step": 1326 }, { "epoch": 0.6980536559705418, "grad_norm": 2.054049253463745, "learning_rate": 4.8560970077416805e-06, "loss": 1.0322, "step": 1327 }, { "epoch": 0.6985796948974224, "grad_norm": 2.100574254989624, "learning_rate": 4.855864632687055e-06, "loss": 1.0941, "step": 1328 }, { "epoch": 0.699105733824303, "grad_norm": 2.1415367126464844, "learning_rate": 4.8556320757326735e-06, "loss": 1.0341, "step": 1329 }, { "epoch": 0.6996317727511836, "grad_norm": 1.988004207611084, "learning_rate": 4.855399336896495e-06, "loss": 1.0357, "step": 1330 }, { "epoch": 0.7001578116780641, "grad_norm": 2.0249714851379395, "learning_rate": 4.855166416196487e-06, "loss": 1.0489, "step": 1331 }, { "epoch": 0.7006838506049448, "grad_norm": 1.9197039604187012, "learning_rate": 4.8549333136506356e-06, "loss": 1.0094, "step": 1332 }, { "epoch": 0.7012098895318254, "grad_norm": 2.153716564178467, "learning_rate": 4.854700029276938e-06, "loss": 1.0613, "step": 1333 }, { "epoch": 0.7017359284587059, "grad_norm": 1.9626339673995972, "learning_rate": 4.854466563093407e-06, "loss": 1.024, "step": 1334 }, { "epoch": 0.7022619673855865, "grad_norm": 2.0288281440734863, "learning_rate": 4.854232915118068e-06, "loss": 0.9778, "step": 1335 }, { "epoch": 0.7027880063124671, "grad_norm": 1.9677989482879639, "learning_rate": 4.853999085368963e-06, "loss": 0.9802, "step": 1336 }, { "epoch": 0.7033140452393477, "grad_norm": 2.054617404937744, "learning_rate": 4.853765073864144e-06, "loss": 0.9523, "step": 1337 }, { "epoch": 0.7038400841662283, "grad_norm": 2.0509955883026123, "learning_rate": 4.853530880621681e-06, "loss": 1.0324, "step": 1338 }, { "epoch": 0.7043661230931089, "grad_norm": 2.224724054336548, "learning_rate": 4.853296505659657e-06, "loss": 1.0965, "step": 1339 }, { "epoch": 0.7048921620199895, "grad_norm": 1.9698208570480347, "learning_rate": 4.8530619489961664e-06, "loss": 1.0486, "step": 1340 }, { "epoch": 0.70541820094687, "grad_norm": 2.129383087158203, "learning_rate": 4.85282721064932e-06, "loss": 1.0857, "step": 1341 }, { "epoch": 0.7059442398737507, "grad_norm": 2.2943053245544434, "learning_rate": 4.852592290637244e-06, "loss": 1.0628, "step": 1342 }, { "epoch": 0.7064702788006313, "grad_norm": 2.0792641639709473, "learning_rate": 4.852357188978075e-06, "loss": 1.0604, "step": 1343 }, { "epoch": 0.7069963177275118, "grad_norm": 2.0224812030792236, "learning_rate": 4.852121905689968e-06, "loss": 1.0687, "step": 1344 }, { "epoch": 0.7075223566543924, "grad_norm": 2.4030919075012207, "learning_rate": 4.851886440791087e-06, "loss": 1.0942, "step": 1345 }, { "epoch": 0.708048395581273, "grad_norm": 2.190215826034546, "learning_rate": 4.851650794299614e-06, "loss": 1.0393, "step": 1346 }, { "epoch": 0.7085744345081536, "grad_norm": 2.1099565029144287, "learning_rate": 4.851414966233743e-06, "loss": 1.0452, "step": 1347 }, { "epoch": 0.7091004734350342, "grad_norm": 2.156395673751831, "learning_rate": 4.851178956611682e-06, "loss": 1.0625, "step": 1348 }, { "epoch": 0.7096265123619148, "grad_norm": 2.1840314865112305, "learning_rate": 4.850942765451655e-06, "loss": 1.0467, "step": 1349 }, { "epoch": 0.7101525512887954, "grad_norm": 2.0080723762512207, "learning_rate": 4.850706392771899e-06, "loss": 1.0187, "step": 1350 }, { "epoch": 0.7106785902156759, "grad_norm": 2.1242828369140625, "learning_rate": 4.850469838590664e-06, "loss": 1.0459, "step": 1351 }, { "epoch": 0.7112046291425566, "grad_norm": 1.9652162790298462, "learning_rate": 4.8502331029262125e-06, "loss": 1.0404, "step": 1352 }, { "epoch": 0.7117306680694372, "grad_norm": 2.2363545894622803, "learning_rate": 4.849996185796827e-06, "loss": 1.0182, "step": 1353 }, { "epoch": 0.7122567069963177, "grad_norm": 2.028017044067383, "learning_rate": 4.849759087220798e-06, "loss": 1.0213, "step": 1354 }, { "epoch": 0.7127827459231983, "grad_norm": 2.265037775039673, "learning_rate": 4.849521807216432e-06, "loss": 1.0316, "step": 1355 }, { "epoch": 0.7133087848500789, "grad_norm": 2.083799362182617, "learning_rate": 4.849284345802051e-06, "loss": 1.0133, "step": 1356 }, { "epoch": 0.7138348237769595, "grad_norm": 1.9307647943496704, "learning_rate": 4.8490467029959895e-06, "loss": 1.0023, "step": 1357 }, { "epoch": 0.7143608627038401, "grad_norm": 2.1079766750335693, "learning_rate": 4.848808878816595e-06, "loss": 1.0208, "step": 1358 }, { "epoch": 0.7148869016307207, "grad_norm": 2.0214877128601074, "learning_rate": 4.8485708732822315e-06, "loss": 0.9904, "step": 1359 }, { "epoch": 0.7154129405576013, "grad_norm": 2.150768756866455, "learning_rate": 4.848332686411276e-06, "loss": 0.9969, "step": 1360 }, { "epoch": 0.7159389794844818, "grad_norm": 2.0330607891082764, "learning_rate": 4.8480943182221184e-06, "loss": 0.9865, "step": 1361 }, { "epoch": 0.7164650184113625, "grad_norm": 1.973970651626587, "learning_rate": 4.847855768733163e-06, "loss": 0.9815, "step": 1362 }, { "epoch": 0.716991057338243, "grad_norm": 2.074868679046631, "learning_rate": 4.84761703796283e-06, "loss": 1.0499, "step": 1363 }, { "epoch": 0.7175170962651236, "grad_norm": 1.9750478267669678, "learning_rate": 4.8473781259295514e-06, "loss": 0.9797, "step": 1364 }, { "epoch": 0.7180431351920042, "grad_norm": 1.971375823020935, "learning_rate": 4.847139032651774e-06, "loss": 0.9805, "step": 1365 }, { "epoch": 0.7185691741188848, "grad_norm": 2.0710880756378174, "learning_rate": 4.846899758147958e-06, "loss": 1.0143, "step": 1366 }, { "epoch": 0.7190952130457654, "grad_norm": 1.9696688652038574, "learning_rate": 4.8466603024365785e-06, "loss": 0.9869, "step": 1367 }, { "epoch": 0.7196212519726459, "grad_norm": 2.1022462844848633, "learning_rate": 4.846420665536126e-06, "loss": 1.0048, "step": 1368 }, { "epoch": 0.7201472908995266, "grad_norm": 2.164783000946045, "learning_rate": 4.8461808474651e-06, "loss": 1.0114, "step": 1369 }, { "epoch": 0.7206733298264072, "grad_norm": 2.0148744583129883, "learning_rate": 4.845940848242019e-06, "loss": 1.0232, "step": 1370 }, { "epoch": 0.7211993687532877, "grad_norm": 2.0193605422973633, "learning_rate": 4.845700667885414e-06, "loss": 0.9764, "step": 1371 }, { "epoch": 0.7217254076801684, "grad_norm": 2.005157232284546, "learning_rate": 4.845460306413829e-06, "loss": 1.0242, "step": 1372 }, { "epoch": 0.7222514466070489, "grad_norm": 2.128805637359619, "learning_rate": 4.845219763845823e-06, "loss": 0.9964, "step": 1373 }, { "epoch": 0.7227774855339295, "grad_norm": 1.9924060106277466, "learning_rate": 4.844979040199968e-06, "loss": 1.0185, "step": 1374 }, { "epoch": 0.7233035244608101, "grad_norm": 2.2126121520996094, "learning_rate": 4.844738135494851e-06, "loss": 1.0013, "step": 1375 }, { "epoch": 0.7238295633876907, "grad_norm": 2.0494630336761475, "learning_rate": 4.844497049749073e-06, "loss": 1.0628, "step": 1376 }, { "epoch": 0.7243556023145713, "grad_norm": 2.4115402698516846, "learning_rate": 4.844255782981249e-06, "loss": 1.0623, "step": 1377 }, { "epoch": 0.7248816412414518, "grad_norm": 2.062485933303833, "learning_rate": 4.8440143352100054e-06, "loss": 1.0115, "step": 1378 }, { "epoch": 0.7254076801683325, "grad_norm": 2.995894432067871, "learning_rate": 4.843772706453988e-06, "loss": 1.0805, "step": 1379 }, { "epoch": 0.7259337190952131, "grad_norm": 1.9974204301834106, "learning_rate": 4.84353089673185e-06, "loss": 1.0221, "step": 1380 }, { "epoch": 0.7264597580220936, "grad_norm": 2.1927318572998047, "learning_rate": 4.843288906062264e-06, "loss": 1.0273, "step": 1381 }, { "epoch": 0.7269857969489742, "grad_norm": 2.0213675498962402, "learning_rate": 4.8430467344639136e-06, "loss": 0.968, "step": 1382 }, { "epoch": 0.7275118358758548, "grad_norm": 2.2534306049346924, "learning_rate": 4.842804381955497e-06, "loss": 1.0457, "step": 1383 }, { "epoch": 0.7280378748027354, "grad_norm": 2.003638505935669, "learning_rate": 4.842561848555728e-06, "loss": 1.0471, "step": 1384 }, { "epoch": 0.728563913729616, "grad_norm": 2.217237949371338, "learning_rate": 4.842319134283331e-06, "loss": 1.0348, "step": 1385 }, { "epoch": 0.7290899526564966, "grad_norm": 2.1162800788879395, "learning_rate": 4.842076239157047e-06, "loss": 1.0548, "step": 1386 }, { "epoch": 0.7296159915833772, "grad_norm": 2.043252944946289, "learning_rate": 4.8418331631956325e-06, "loss": 1.0931, "step": 1387 }, { "epoch": 0.7301420305102577, "grad_norm": 2.099283218383789, "learning_rate": 4.841589906417853e-06, "loss": 1.0059, "step": 1388 }, { "epoch": 0.7306680694371384, "grad_norm": 1.9934890270233154, "learning_rate": 4.8413464688424904e-06, "loss": 1.0327, "step": 1389 }, { "epoch": 0.731194108364019, "grad_norm": 1.868202567100525, "learning_rate": 4.841102850488343e-06, "loss": 0.9622, "step": 1390 }, { "epoch": 0.7317201472908995, "grad_norm": 1.9592076539993286, "learning_rate": 4.84085905137422e-06, "loss": 1.0413, "step": 1391 }, { "epoch": 0.7322461862177801, "grad_norm": 2.0478546619415283, "learning_rate": 4.840615071518946e-06, "loss": 1.0343, "step": 1392 }, { "epoch": 0.7327722251446607, "grad_norm": 2.4996554851531982, "learning_rate": 4.840370910941358e-06, "loss": 1.1106, "step": 1393 }, { "epoch": 0.7332982640715413, "grad_norm": 2.0023233890533447, "learning_rate": 4.8401265696603085e-06, "loss": 1.0273, "step": 1394 }, { "epoch": 0.7338243029984218, "grad_norm": 2.0366029739379883, "learning_rate": 4.8398820476946625e-06, "loss": 1.0092, "step": 1395 }, { "epoch": 0.7343503419253025, "grad_norm": 2.2142248153686523, "learning_rate": 4.839637345063302e-06, "loss": 0.9884, "step": 1396 }, { "epoch": 0.7348763808521831, "grad_norm": 1.9955226182937622, "learning_rate": 4.839392461785119e-06, "loss": 1.054, "step": 1397 }, { "epoch": 0.7354024197790636, "grad_norm": 2.0607223510742188, "learning_rate": 4.839147397879023e-06, "loss": 0.9826, "step": 1398 }, { "epoch": 0.7359284587059443, "grad_norm": 2.054483652114868, "learning_rate": 4.8389021533639345e-06, "loss": 1.0738, "step": 1399 }, { "epoch": 0.7364544976328248, "grad_norm": 2.1066908836364746, "learning_rate": 4.8386567282587886e-06, "loss": 1.0937, "step": 1400 }, { "epoch": 0.7369805365597054, "grad_norm": 2.018155097961426, "learning_rate": 4.8384111225825355e-06, "loss": 0.9767, "step": 1401 }, { "epoch": 0.737506575486586, "grad_norm": 2.152189016342163, "learning_rate": 4.83816533635414e-06, "loss": 1.0062, "step": 1402 }, { "epoch": 0.7380326144134666, "grad_norm": 1.9946335554122925, "learning_rate": 4.8379193695925785e-06, "loss": 1.0724, "step": 1403 }, { "epoch": 0.7385586533403472, "grad_norm": 2.077017307281494, "learning_rate": 4.837673222316843e-06, "loss": 1.0991, "step": 1404 }, { "epoch": 0.7390846922672277, "grad_norm": 2.0850563049316406, "learning_rate": 4.837426894545938e-06, "loss": 1.0527, "step": 1405 }, { "epoch": 0.7396107311941084, "grad_norm": 1.9786406755447388, "learning_rate": 4.837180386298883e-06, "loss": 0.9666, "step": 1406 }, { "epoch": 0.740136770120989, "grad_norm": 2.0060155391693115, "learning_rate": 4.836933697594711e-06, "loss": 1.0795, "step": 1407 }, { "epoch": 0.7406628090478695, "grad_norm": 2.086906909942627, "learning_rate": 4.836686828452471e-06, "loss": 0.9925, "step": 1408 }, { "epoch": 0.7411888479747502, "grad_norm": 2.0125632286071777, "learning_rate": 4.836439778891223e-06, "loss": 0.9706, "step": 1409 }, { "epoch": 0.7417148869016307, "grad_norm": 1.8921434879302979, "learning_rate": 4.836192548930041e-06, "loss": 1.0237, "step": 1410 }, { "epoch": 0.7422409258285113, "grad_norm": 1.9400858879089355, "learning_rate": 4.835945138588015e-06, "loss": 1.0444, "step": 1411 }, { "epoch": 0.742766964755392, "grad_norm": 2.083749294281006, "learning_rate": 4.835697547884248e-06, "loss": 1.0136, "step": 1412 }, { "epoch": 0.7432930036822725, "grad_norm": 2.0750844478607178, "learning_rate": 4.8354497768378575e-06, "loss": 1.0863, "step": 1413 }, { "epoch": 0.7438190426091531, "grad_norm": 2.137214183807373, "learning_rate": 4.835201825467973e-06, "loss": 1.0095, "step": 1414 }, { "epoch": 0.7443450815360336, "grad_norm": 2.06549072265625, "learning_rate": 4.834953693793739e-06, "loss": 1.0449, "step": 1415 }, { "epoch": 0.7448711204629143, "grad_norm": 2.0396728515625, "learning_rate": 4.834705381834315e-06, "loss": 1.0093, "step": 1416 }, { "epoch": 0.7453971593897949, "grad_norm": 1.993697166442871, "learning_rate": 4.834456889608874e-06, "loss": 1.0075, "step": 1417 }, { "epoch": 0.7459231983166754, "grad_norm": 2.1017816066741943, "learning_rate": 4.834208217136601e-06, "loss": 1.0687, "step": 1418 }, { "epoch": 0.746449237243556, "grad_norm": 2.0740413665771484, "learning_rate": 4.833959364436698e-06, "loss": 0.9777, "step": 1419 }, { "epoch": 0.7469752761704366, "grad_norm": 2.0858206748962402, "learning_rate": 4.833710331528377e-06, "loss": 1.044, "step": 1420 }, { "epoch": 0.7475013150973172, "grad_norm": 2.33298921585083, "learning_rate": 4.833461118430869e-06, "loss": 1.0602, "step": 1421 }, { "epoch": 0.7480273540241978, "grad_norm": 2.1458897590637207, "learning_rate": 4.833211725163414e-06, "loss": 0.9903, "step": 1422 }, { "epoch": 0.7485533929510784, "grad_norm": 2.15071177482605, "learning_rate": 4.8329621517452685e-06, "loss": 1.011, "step": 1423 }, { "epoch": 0.749079431877959, "grad_norm": 2.0375895500183105, "learning_rate": 4.8327123981957025e-06, "loss": 1.0021, "step": 1424 }, { "epoch": 0.7496054708048395, "grad_norm": 1.9808685779571533, "learning_rate": 4.832462464534e-06, "loss": 1.025, "step": 1425 }, { "epoch": 0.7501315097317202, "grad_norm": 2.046558380126953, "learning_rate": 4.832212350779459e-06, "loss": 1.0435, "step": 1426 }, { "epoch": 0.7506575486586007, "grad_norm": 2.0020248889923096, "learning_rate": 4.831962056951392e-06, "loss": 1.0207, "step": 1427 }, { "epoch": 0.7511835875854813, "grad_norm": 1.9901740550994873, "learning_rate": 4.831711583069122e-06, "loss": 1.0505, "step": 1428 }, { "epoch": 0.751709626512362, "grad_norm": 2.112236738204956, "learning_rate": 4.83146092915199e-06, "loss": 1.0353, "step": 1429 }, { "epoch": 0.7522356654392425, "grad_norm": 2.0244028568267822, "learning_rate": 4.831210095219349e-06, "loss": 1.0169, "step": 1430 }, { "epoch": 0.7527617043661231, "grad_norm": 2.298645257949829, "learning_rate": 4.830959081290567e-06, "loss": 1.0498, "step": 1431 }, { "epoch": 0.7532877432930036, "grad_norm": 2.1593234539031982, "learning_rate": 4.8307078873850244e-06, "loss": 1.0954, "step": 1432 }, { "epoch": 0.7538137822198843, "grad_norm": 1.9387123584747314, "learning_rate": 4.830456513522117e-06, "loss": 0.9784, "step": 1433 }, { "epoch": 0.7543398211467649, "grad_norm": 2.1634531021118164, "learning_rate": 4.830204959721253e-06, "loss": 1.0516, "step": 1434 }, { "epoch": 0.7548658600736454, "grad_norm": 1.9310704469680786, "learning_rate": 4.829953226001855e-06, "loss": 0.9648, "step": 1435 }, { "epoch": 0.7553918990005261, "grad_norm": 2.0547149181365967, "learning_rate": 4.8297013123833605e-06, "loss": 1.0734, "step": 1436 }, { "epoch": 0.7559179379274066, "grad_norm": 2.222872734069824, "learning_rate": 4.829449218885219e-06, "loss": 0.9645, "step": 1437 }, { "epoch": 0.7564439768542872, "grad_norm": 2.128120183944702, "learning_rate": 4.829196945526897e-06, "loss": 1.046, "step": 1438 }, { "epoch": 0.7569700157811678, "grad_norm": 2.0309526920318604, "learning_rate": 4.828944492327872e-06, "loss": 1.0596, "step": 1439 }, { "epoch": 0.7574960547080484, "grad_norm": 2.0946176052093506, "learning_rate": 4.828691859307635e-06, "loss": 1.0134, "step": 1440 }, { "epoch": 0.758022093634929, "grad_norm": 1.9159823656082153, "learning_rate": 4.828439046485693e-06, "loss": 1.0081, "step": 1441 }, { "epoch": 0.7585481325618095, "grad_norm": 2.203627586364746, "learning_rate": 4.828186053881566e-06, "loss": 1.0451, "step": 1442 }, { "epoch": 0.7590741714886902, "grad_norm": 2.065521240234375, "learning_rate": 4.8279328815147895e-06, "loss": 1.0289, "step": 1443 }, { "epoch": 0.7596002104155708, "grad_norm": 2.1597719192504883, "learning_rate": 4.827679529404909e-06, "loss": 1.0373, "step": 1444 }, { "epoch": 0.7601262493424513, "grad_norm": 2.6100237369537354, "learning_rate": 4.827425997571488e-06, "loss": 1.0254, "step": 1445 }, { "epoch": 0.760652288269332, "grad_norm": 2.1975550651550293, "learning_rate": 4.8271722860341e-06, "loss": 1.0254, "step": 1446 }, { "epoch": 0.7611783271962125, "grad_norm": 2.019261360168457, "learning_rate": 4.826918394812336e-06, "loss": 1.0823, "step": 1447 }, { "epoch": 0.7617043661230931, "grad_norm": 1.9351961612701416, "learning_rate": 4.8266643239257996e-06, "loss": 1.0248, "step": 1448 }, { "epoch": 0.7622304050499737, "grad_norm": 1.9437129497528076, "learning_rate": 4.826410073394106e-06, "loss": 0.984, "step": 1449 }, { "epoch": 0.7627564439768543, "grad_norm": 2.277479887008667, "learning_rate": 4.826155643236889e-06, "loss": 1.0264, "step": 1450 }, { "epoch": 0.7632824829037349, "grad_norm": 2.2033772468566895, "learning_rate": 4.825901033473791e-06, "loss": 1.0249, "step": 1451 }, { "epoch": 0.7638085218306154, "grad_norm": 2.1912593841552734, "learning_rate": 4.825646244124472e-06, "loss": 1.0366, "step": 1452 }, { "epoch": 0.7643345607574961, "grad_norm": 2.0046746730804443, "learning_rate": 4.825391275208606e-06, "loss": 1.0411, "step": 1453 }, { "epoch": 0.7648605996843767, "grad_norm": 2.0601322650909424, "learning_rate": 4.825136126745877e-06, "loss": 1.052, "step": 1454 }, { "epoch": 0.7653866386112572, "grad_norm": 2.148794651031494, "learning_rate": 4.824880798755986e-06, "loss": 1.04, "step": 1455 }, { "epoch": 0.7659126775381379, "grad_norm": 2.027374505996704, "learning_rate": 4.824625291258649e-06, "loss": 1.005, "step": 1456 }, { "epoch": 0.7664387164650184, "grad_norm": 2.0703351497650146, "learning_rate": 4.824369604273592e-06, "loss": 1.0157, "step": 1457 }, { "epoch": 0.766964755391899, "grad_norm": 2.1002986431121826, "learning_rate": 4.8241137378205575e-06, "loss": 1.0355, "step": 1458 }, { "epoch": 0.7674907943187795, "grad_norm": 1.9970546960830688, "learning_rate": 4.823857691919302e-06, "loss": 0.9833, "step": 1459 }, { "epoch": 0.7680168332456602, "grad_norm": 2.0489771366119385, "learning_rate": 4.823601466589595e-06, "loss": 1.0351, "step": 1460 }, { "epoch": 0.7685428721725408, "grad_norm": 2.0190834999084473, "learning_rate": 4.823345061851219e-06, "loss": 1.0406, "step": 1461 }, { "epoch": 0.7690689110994213, "grad_norm": 2.0567877292633057, "learning_rate": 4.823088477723973e-06, "loss": 1.0593, "step": 1462 }, { "epoch": 0.769594950026302, "grad_norm": 1.883132815361023, "learning_rate": 4.822831714227667e-06, "loss": 1.0055, "step": 1463 }, { "epoch": 0.7701209889531825, "grad_norm": 1.9520277976989746, "learning_rate": 4.822574771382127e-06, "loss": 0.9831, "step": 1464 }, { "epoch": 0.7706470278800631, "grad_norm": 2.0123813152313232, "learning_rate": 4.822317649207191e-06, "loss": 0.9841, "step": 1465 }, { "epoch": 0.7711730668069438, "grad_norm": 2.089940309524536, "learning_rate": 4.8220603477227124e-06, "loss": 1.0121, "step": 1466 }, { "epoch": 0.7716991057338243, "grad_norm": 1.9485499858856201, "learning_rate": 4.8218028669485585e-06, "loss": 0.9744, "step": 1467 }, { "epoch": 0.7722251446607049, "grad_norm": 2.2764859199523926, "learning_rate": 4.821545206904608e-06, "loss": 1.0018, "step": 1468 }, { "epoch": 0.7727511835875854, "grad_norm": 2.039769411087036, "learning_rate": 4.821287367610756e-06, "loss": 1.0256, "step": 1469 }, { "epoch": 0.7732772225144661, "grad_norm": 2.0036065578460693, "learning_rate": 4.821029349086911e-06, "loss": 1.0399, "step": 1470 }, { "epoch": 0.7738032614413467, "grad_norm": 2.056286573410034, "learning_rate": 4.820771151352996e-06, "loss": 1.0077, "step": 1471 }, { "epoch": 0.7743293003682272, "grad_norm": 2.0001938343048096, "learning_rate": 4.820512774428944e-06, "loss": 1.0109, "step": 1472 }, { "epoch": 0.7748553392951079, "grad_norm": 2.007289409637451, "learning_rate": 4.820254218334707e-06, "loss": 1.0223, "step": 1473 }, { "epoch": 0.7753813782219884, "grad_norm": 2.079768657684326, "learning_rate": 4.8199954830902465e-06, "loss": 1.0565, "step": 1474 }, { "epoch": 0.775907417148869, "grad_norm": 2.030198097229004, "learning_rate": 4.819736568715543e-06, "loss": 1.033, "step": 1475 }, { "epoch": 0.7764334560757497, "grad_norm": 2.6482961177825928, "learning_rate": 4.819477475230584e-06, "loss": 1.0595, "step": 1476 }, { "epoch": 0.7769594950026302, "grad_norm": 2.160472869873047, "learning_rate": 4.8192182026553775e-06, "loss": 1.0214, "step": 1477 }, { "epoch": 0.7774855339295108, "grad_norm": 2.1956963539123535, "learning_rate": 4.818958751009941e-06, "loss": 1.0647, "step": 1478 }, { "epoch": 0.7780115728563913, "grad_norm": 2.346040725708008, "learning_rate": 4.818699120314306e-06, "loss": 1.0289, "step": 1479 }, { "epoch": 0.778537611783272, "grad_norm": 2.049593448638916, "learning_rate": 4.818439310588521e-06, "loss": 1.0188, "step": 1480 }, { "epoch": 0.7790636507101526, "grad_norm": 1.9567065238952637, "learning_rate": 4.818179321852646e-06, "loss": 1.0645, "step": 1481 }, { "epoch": 0.7795896896370331, "grad_norm": 2.0995101928710938, "learning_rate": 4.817919154126753e-06, "loss": 1.0283, "step": 1482 }, { "epoch": 0.7801157285639138, "grad_norm": 2.117649555206299, "learning_rate": 4.817658807430933e-06, "loss": 0.9973, "step": 1483 }, { "epoch": 0.7806417674907943, "grad_norm": 2.058525800704956, "learning_rate": 4.817398281785286e-06, "loss": 1.0278, "step": 1484 }, { "epoch": 0.7811678064176749, "grad_norm": 1.9914313554763794, "learning_rate": 4.817137577209927e-06, "loss": 0.9591, "step": 1485 }, { "epoch": 0.7816938453445555, "grad_norm": 1.9432276487350464, "learning_rate": 4.816876693724987e-06, "loss": 0.9964, "step": 1486 }, { "epoch": 0.7822198842714361, "grad_norm": 2.011399507522583, "learning_rate": 4.816615631350608e-06, "loss": 0.9963, "step": 1487 }, { "epoch": 0.7827459231983167, "grad_norm": 1.9606966972351074, "learning_rate": 4.816354390106947e-06, "loss": 0.9756, "step": 1488 }, { "epoch": 0.7832719621251972, "grad_norm": 2.011887788772583, "learning_rate": 4.816092970014176e-06, "loss": 1.0194, "step": 1489 }, { "epoch": 0.7837980010520779, "grad_norm": 2.0520918369293213, "learning_rate": 4.815831371092478e-06, "loss": 1.02, "step": 1490 }, { "epoch": 0.7843240399789585, "grad_norm": 2.018293619155884, "learning_rate": 4.815569593362053e-06, "loss": 1.0289, "step": 1491 }, { "epoch": 0.784850078905839, "grad_norm": 2.016738176345825, "learning_rate": 4.815307636843112e-06, "loss": 1.0523, "step": 1492 }, { "epoch": 0.7853761178327197, "grad_norm": 2.063619375228882, "learning_rate": 4.815045501555882e-06, "loss": 1.0099, "step": 1493 }, { "epoch": 0.7859021567596002, "grad_norm": 2.122360944747925, "learning_rate": 4.814783187520602e-06, "loss": 1.0346, "step": 1494 }, { "epoch": 0.7864281956864808, "grad_norm": 2.040095329284668, "learning_rate": 4.814520694757526e-06, "loss": 1.0017, "step": 1495 }, { "epoch": 0.7869542346133613, "grad_norm": 2.003471612930298, "learning_rate": 4.814258023286922e-06, "loss": 0.975, "step": 1496 }, { "epoch": 0.787480273540242, "grad_norm": 1.905517816543579, "learning_rate": 4.81399517312907e-06, "loss": 0.9899, "step": 1497 }, { "epoch": 0.7880063124671226, "grad_norm": 2.047112226486206, "learning_rate": 4.813732144304266e-06, "loss": 0.9558, "step": 1498 }, { "epoch": 0.7885323513940031, "grad_norm": 1.9621355533599854, "learning_rate": 4.8134689368328194e-06, "loss": 1.0668, "step": 1499 }, { "epoch": 0.7890583903208838, "grad_norm": 1.9221957921981812, "learning_rate": 4.813205550735052e-06, "loss": 1.0082, "step": 1500 }, { "epoch": 0.7895844292477643, "grad_norm": 2.002659797668457, "learning_rate": 4.812941986031299e-06, "loss": 1.0192, "step": 1501 }, { "epoch": 0.7901104681746449, "grad_norm": 2.1077136993408203, "learning_rate": 4.812678242741913e-06, "loss": 1.0316, "step": 1502 }, { "epoch": 0.7906365071015256, "grad_norm": 2.0782320499420166, "learning_rate": 4.812414320887256e-06, "loss": 1.058, "step": 1503 }, { "epoch": 0.7911625460284061, "grad_norm": 2.049888849258423, "learning_rate": 4.812150220487708e-06, "loss": 1.0033, "step": 1504 }, { "epoch": 0.7916885849552867, "grad_norm": 2.025468587875366, "learning_rate": 4.811885941563659e-06, "loss": 1.0066, "step": 1505 }, { "epoch": 0.7922146238821672, "grad_norm": 2.0612878799438477, "learning_rate": 4.8116214841355145e-06, "loss": 0.9783, "step": 1506 }, { "epoch": 0.7927406628090479, "grad_norm": 1.9370075464248657, "learning_rate": 4.811356848223693e-06, "loss": 1.0171, "step": 1507 }, { "epoch": 0.7932667017359285, "grad_norm": 2.069326877593994, "learning_rate": 4.8110920338486285e-06, "loss": 1.0283, "step": 1508 }, { "epoch": 0.793792740662809, "grad_norm": 2.076786518096924, "learning_rate": 4.810827041030768e-06, "loss": 0.9942, "step": 1509 }, { "epoch": 0.7943187795896897, "grad_norm": 1.8861708641052246, "learning_rate": 4.810561869790571e-06, "loss": 0.9909, "step": 1510 }, { "epoch": 0.7948448185165702, "grad_norm": 2.064493417739868, "learning_rate": 4.810296520148513e-06, "loss": 1.0302, "step": 1511 }, { "epoch": 0.7953708574434508, "grad_norm": 2.0212459564208984, "learning_rate": 4.810030992125081e-06, "loss": 0.9912, "step": 1512 }, { "epoch": 0.7958968963703315, "grad_norm": 2.047384023666382, "learning_rate": 4.809765285740776e-06, "loss": 1.0165, "step": 1513 }, { "epoch": 0.796422935297212, "grad_norm": 2.2222740650177, "learning_rate": 4.809499401016115e-06, "loss": 1.0295, "step": 1514 }, { "epoch": 0.7969489742240926, "grad_norm": 1.9516112804412842, "learning_rate": 4.809233337971627e-06, "loss": 0.9562, "step": 1515 }, { "epoch": 0.7974750131509731, "grad_norm": 2.0002121925354004, "learning_rate": 4.808967096627855e-06, "loss": 1.0076, "step": 1516 }, { "epoch": 0.7980010520778538, "grad_norm": 2.182039260864258, "learning_rate": 4.808700677005357e-06, "loss": 0.9925, "step": 1517 }, { "epoch": 0.7985270910047344, "grad_norm": 2.0578761100769043, "learning_rate": 4.808434079124701e-06, "loss": 0.9831, "step": 1518 }, { "epoch": 0.7990531299316149, "grad_norm": 1.8856642246246338, "learning_rate": 4.8081673030064735e-06, "loss": 1.0309, "step": 1519 }, { "epoch": 0.7995791688584956, "grad_norm": 2.1273880004882812, "learning_rate": 4.807900348671272e-06, "loss": 1.0581, "step": 1520 }, { "epoch": 0.8001052077853761, "grad_norm": 2.0696675777435303, "learning_rate": 4.8076332161397085e-06, "loss": 1.0402, "step": 1521 }, { "epoch": 0.8006312467122567, "grad_norm": 2.034176826477051, "learning_rate": 4.80736590543241e-06, "loss": 1.018, "step": 1522 }, { "epoch": 0.8011572856391374, "grad_norm": 1.9405510425567627, "learning_rate": 4.807098416570014e-06, "loss": 1.037, "step": 1523 }, { "epoch": 0.8016833245660179, "grad_norm": 2.0185844898223877, "learning_rate": 4.806830749573174e-06, "loss": 1.0817, "step": 1524 }, { "epoch": 0.8022093634928985, "grad_norm": 2.0617692470550537, "learning_rate": 4.806562904462559e-06, "loss": 0.989, "step": 1525 }, { "epoch": 0.802735402419779, "grad_norm": 2.022000789642334, "learning_rate": 4.806294881258846e-06, "loss": 1.0245, "step": 1526 }, { "epoch": 0.8032614413466597, "grad_norm": 2.189361572265625, "learning_rate": 4.806026679982733e-06, "loss": 1.0537, "step": 1527 }, { "epoch": 0.8037874802735402, "grad_norm": 2.0094563961029053, "learning_rate": 4.805758300654926e-06, "loss": 1.0437, "step": 1528 }, { "epoch": 0.8043135192004208, "grad_norm": 1.8940585851669312, "learning_rate": 4.805489743296148e-06, "loss": 0.9811, "step": 1529 }, { "epoch": 0.8048395581273015, "grad_norm": 2.0169241428375244, "learning_rate": 4.805221007927134e-06, "loss": 1.0354, "step": 1530 }, { "epoch": 0.805365597054182, "grad_norm": 2.1269545555114746, "learning_rate": 4.804952094568635e-06, "loss": 1.0439, "step": 1531 }, { "epoch": 0.8058916359810626, "grad_norm": 1.99850332736969, "learning_rate": 4.804683003241413e-06, "loss": 1.0313, "step": 1532 }, { "epoch": 0.8064176749079431, "grad_norm": 2.0577683448791504, "learning_rate": 4.804413733966244e-06, "loss": 1.0319, "step": 1533 }, { "epoch": 0.8069437138348238, "grad_norm": 1.993945837020874, "learning_rate": 4.804144286763921e-06, "loss": 1.0571, "step": 1534 }, { "epoch": 0.8074697527617044, "grad_norm": 2.00144624710083, "learning_rate": 4.803874661655246e-06, "loss": 1.0136, "step": 1535 }, { "epoch": 0.8079957916885849, "grad_norm": 2.114583969116211, "learning_rate": 4.8036048586610394e-06, "loss": 0.9996, "step": 1536 }, { "epoch": 0.8085218306154656, "grad_norm": 2.019767999649048, "learning_rate": 4.803334877802131e-06, "loss": 0.9812, "step": 1537 }, { "epoch": 0.8090478695423461, "grad_norm": 2.1253437995910645, "learning_rate": 4.803064719099368e-06, "loss": 1.041, "step": 1538 }, { "epoch": 0.8095739084692267, "grad_norm": 2.055514335632324, "learning_rate": 4.802794382573609e-06, "loss": 0.9733, "step": 1539 }, { "epoch": 0.8100999473961074, "grad_norm": 2.0274434089660645, "learning_rate": 4.802523868245727e-06, "loss": 1.0222, "step": 1540 }, { "epoch": 0.8106259863229879, "grad_norm": 2.1663291454315186, "learning_rate": 4.80225317613661e-06, "loss": 1.0308, "step": 1541 }, { "epoch": 0.8111520252498685, "grad_norm": 1.8864918947219849, "learning_rate": 4.801982306267156e-06, "loss": 0.9551, "step": 1542 }, { "epoch": 0.811678064176749, "grad_norm": 2.1302011013031006, "learning_rate": 4.801711258658281e-06, "loss": 1.0188, "step": 1543 }, { "epoch": 0.8122041031036297, "grad_norm": 1.9002829790115356, "learning_rate": 4.801440033330914e-06, "loss": 1.0278, "step": 1544 }, { "epoch": 0.8127301420305103, "grad_norm": 2.1114113330841064, "learning_rate": 4.801168630305995e-06, "loss": 1.0616, "step": 1545 }, { "epoch": 0.8132561809573908, "grad_norm": 1.9383304119110107, "learning_rate": 4.800897049604479e-06, "loss": 0.9977, "step": 1546 }, { "epoch": 0.8137822198842715, "grad_norm": 1.9206221103668213, "learning_rate": 4.800625291247338e-06, "loss": 0.9758, "step": 1547 }, { "epoch": 0.814308258811152, "grad_norm": 1.9258513450622559, "learning_rate": 4.800353355255552e-06, "loss": 0.985, "step": 1548 }, { "epoch": 0.8148342977380326, "grad_norm": 1.9767898321151733, "learning_rate": 4.800081241650117e-06, "loss": 0.9802, "step": 1549 }, { "epoch": 0.8153603366649133, "grad_norm": 1.9899487495422363, "learning_rate": 4.799808950452047e-06, "loss": 1.0104, "step": 1550 }, { "epoch": 0.8158863755917938, "grad_norm": 1.9970616102218628, "learning_rate": 4.799536481682362e-06, "loss": 1.0125, "step": 1551 }, { "epoch": 0.8164124145186744, "grad_norm": 1.9914542436599731, "learning_rate": 4.799263835362103e-06, "loss": 1.0458, "step": 1552 }, { "epoch": 0.8169384534455549, "grad_norm": 2.072939157485962, "learning_rate": 4.798991011512319e-06, "loss": 1.0663, "step": 1553 }, { "epoch": 0.8174644923724356, "grad_norm": 1.9783833026885986, "learning_rate": 4.798718010154076e-06, "loss": 1.0281, "step": 1554 }, { "epoch": 0.8179905312993162, "grad_norm": 2.4431405067443848, "learning_rate": 4.798444831308454e-06, "loss": 1.0667, "step": 1555 }, { "epoch": 0.8185165702261967, "grad_norm": 2.1270408630371094, "learning_rate": 4.798171474996543e-06, "loss": 1.0217, "step": 1556 }, { "epoch": 0.8190426091530774, "grad_norm": 2.091042995452881, "learning_rate": 4.797897941239452e-06, "loss": 1.0126, "step": 1557 }, { "epoch": 0.8195686480799579, "grad_norm": 2.016575336456299, "learning_rate": 4.797624230058299e-06, "loss": 1.0269, "step": 1558 }, { "epoch": 0.8200946870068385, "grad_norm": 2.1780738830566406, "learning_rate": 4.797350341474218e-06, "loss": 1.0405, "step": 1559 }, { "epoch": 0.820620725933719, "grad_norm": 2.0331525802612305, "learning_rate": 4.797076275508358e-06, "loss": 1.0452, "step": 1560 }, { "epoch": 0.8211467648605997, "grad_norm": 2.0023865699768066, "learning_rate": 4.796802032181877e-06, "loss": 0.9752, "step": 1561 }, { "epoch": 0.8216728037874803, "grad_norm": 2.11030912399292, "learning_rate": 4.796527611515952e-06, "loss": 1.0675, "step": 1562 }, { "epoch": 0.8221988427143608, "grad_norm": 2.0733113288879395, "learning_rate": 4.7962530135317705e-06, "loss": 1.0511, "step": 1563 }, { "epoch": 0.8227248816412415, "grad_norm": 2.0920655727386475, "learning_rate": 4.795978238250535e-06, "loss": 1.0797, "step": 1564 }, { "epoch": 0.823250920568122, "grad_norm": 2.218693256378174, "learning_rate": 4.795703285693461e-06, "loss": 1.0385, "step": 1565 }, { "epoch": 0.8237769594950026, "grad_norm": 1.9661623239517212, "learning_rate": 4.795428155881779e-06, "loss": 1.001, "step": 1566 }, { "epoch": 0.8243029984218833, "grad_norm": 2.1669209003448486, "learning_rate": 4.795152848836731e-06, "loss": 1.0317, "step": 1567 }, { "epoch": 0.8248290373487638, "grad_norm": 1.9323532581329346, "learning_rate": 4.794877364579573e-06, "loss": 1.0182, "step": 1568 }, { "epoch": 0.8253550762756444, "grad_norm": 1.9551295042037964, "learning_rate": 4.794601703131579e-06, "loss": 1.0048, "step": 1569 }, { "epoch": 0.8258811152025249, "grad_norm": 1.9809366464614868, "learning_rate": 4.7943258645140285e-06, "loss": 1.0377, "step": 1570 }, { "epoch": 0.8264071541294056, "grad_norm": 2.0074756145477295, "learning_rate": 4.794049848748224e-06, "loss": 1.0218, "step": 1571 }, { "epoch": 0.8269331930562862, "grad_norm": 2.0177736282348633, "learning_rate": 4.793773655855474e-06, "loss": 1.0402, "step": 1572 }, { "epoch": 0.8274592319831667, "grad_norm": 2.0348360538482666, "learning_rate": 4.7934972858571035e-06, "loss": 1.0312, "step": 1573 }, { "epoch": 0.8279852709100474, "grad_norm": 2.097808599472046, "learning_rate": 4.793220738774455e-06, "loss": 1.0618, "step": 1574 }, { "epoch": 0.8285113098369279, "grad_norm": 2.061023473739624, "learning_rate": 4.792944014628877e-06, "loss": 1.0464, "step": 1575 }, { "epoch": 0.8290373487638085, "grad_norm": 2.1510798931121826, "learning_rate": 4.792667113441738e-06, "loss": 1.0102, "step": 1576 }, { "epoch": 0.8295633876906892, "grad_norm": 2.1446409225463867, "learning_rate": 4.7923900352344185e-06, "loss": 1.0577, "step": 1577 }, { "epoch": 0.8300894266175697, "grad_norm": 2.2582831382751465, "learning_rate": 4.79211278002831e-06, "loss": 1.1042, "step": 1578 }, { "epoch": 0.8306154655444503, "grad_norm": 2.0069401264190674, "learning_rate": 4.791835347844821e-06, "loss": 0.9835, "step": 1579 }, { "epoch": 0.8311415044713308, "grad_norm": 2.0074360370635986, "learning_rate": 4.791557738705372e-06, "loss": 1.0596, "step": 1580 }, { "epoch": 0.8316675433982115, "grad_norm": 2.2237892150878906, "learning_rate": 4.791279952631399e-06, "loss": 1.0162, "step": 1581 }, { "epoch": 0.8321935823250921, "grad_norm": 2.0037453174591064, "learning_rate": 4.791001989644349e-06, "loss": 0.9879, "step": 1582 }, { "epoch": 0.8327196212519726, "grad_norm": 1.994869351387024, "learning_rate": 4.790723849765684e-06, "loss": 0.9908, "step": 1583 }, { "epoch": 0.8332456601788533, "grad_norm": 2.1808955669403076, "learning_rate": 4.790445533016879e-06, "loss": 0.9896, "step": 1584 }, { "epoch": 0.8337716991057338, "grad_norm": 1.9274131059646606, "learning_rate": 4.790167039419424e-06, "loss": 0.9383, "step": 1585 }, { "epoch": 0.8342977380326144, "grad_norm": 2.0095322132110596, "learning_rate": 4.789888368994823e-06, "loss": 1.0282, "step": 1586 }, { "epoch": 0.8348237769594951, "grad_norm": 1.957546353340149, "learning_rate": 4.7896095217645895e-06, "loss": 0.9559, "step": 1587 }, { "epoch": 0.8353498158863756, "grad_norm": 2.1231918334960938, "learning_rate": 4.789330497750258e-06, "loss": 1.0414, "step": 1588 }, { "epoch": 0.8358758548132562, "grad_norm": 2.0618984699249268, "learning_rate": 4.789051296973368e-06, "loss": 0.9931, "step": 1589 }, { "epoch": 0.8364018937401367, "grad_norm": 2.023416042327881, "learning_rate": 4.78877191945548e-06, "loss": 0.963, "step": 1590 }, { "epoch": 0.8369279326670174, "grad_norm": 2.0902810096740723, "learning_rate": 4.788492365218164e-06, "loss": 1.076, "step": 1591 }, { "epoch": 0.8374539715938979, "grad_norm": 1.9094164371490479, "learning_rate": 4.788212634283005e-06, "loss": 0.9444, "step": 1592 }, { "epoch": 0.8379800105207785, "grad_norm": 1.9887592792510986, "learning_rate": 4.7879327266716e-06, "loss": 1.0364, "step": 1593 }, { "epoch": 0.8385060494476592, "grad_norm": 2.0019707679748535, "learning_rate": 4.787652642405564e-06, "loss": 1.0544, "step": 1594 }, { "epoch": 0.8390320883745397, "grad_norm": 2.0776329040527344, "learning_rate": 4.787372381506521e-06, "loss": 0.9949, "step": 1595 }, { "epoch": 0.8395581273014203, "grad_norm": 2.0091662406921387, "learning_rate": 4.7870919439961094e-06, "loss": 1.0165, "step": 1596 }, { "epoch": 0.8400841662283008, "grad_norm": 2.0458288192749023, "learning_rate": 4.786811329895984e-06, "loss": 1.0341, "step": 1597 }, { "epoch": 0.8406102051551815, "grad_norm": 2.0741751194000244, "learning_rate": 4.78653053922781e-06, "loss": 1.0509, "step": 1598 }, { "epoch": 0.8411362440820621, "grad_norm": 2.141406774520874, "learning_rate": 4.7862495720132695e-06, "loss": 1.0665, "step": 1599 }, { "epoch": 0.8416622830089426, "grad_norm": 2.2400975227355957, "learning_rate": 4.785968428274055e-06, "loss": 0.93, "step": 1600 }, { "epoch": 0.8421883219358233, "grad_norm": 1.929742455482483, "learning_rate": 4.785687108031875e-06, "loss": 1.0339, "step": 1601 }, { "epoch": 0.8427143608627038, "grad_norm": 2.012728452682495, "learning_rate": 4.785405611308448e-06, "loss": 0.9945, "step": 1602 }, { "epoch": 0.8432403997895844, "grad_norm": 2.0826306343078613, "learning_rate": 4.785123938125511e-06, "loss": 1.0322, "step": 1603 }, { "epoch": 0.8437664387164651, "grad_norm": 2.0303595066070557, "learning_rate": 4.784842088504813e-06, "loss": 1.0304, "step": 1604 }, { "epoch": 0.8442924776433456, "grad_norm": 2.0710513591766357, "learning_rate": 4.7845600624681145e-06, "loss": 1.0358, "step": 1605 }, { "epoch": 0.8448185165702262, "grad_norm": 2.052515983581543, "learning_rate": 4.784277860037192e-06, "loss": 1.0316, "step": 1606 }, { "epoch": 0.8453445554971067, "grad_norm": 2.1331636905670166, "learning_rate": 4.783995481233835e-06, "loss": 1.0139, "step": 1607 }, { "epoch": 0.8458705944239874, "grad_norm": 1.9738709926605225, "learning_rate": 4.783712926079846e-06, "loss": 1.034, "step": 1608 }, { "epoch": 0.846396633350868, "grad_norm": 2.059412956237793, "learning_rate": 4.78343019459704e-06, "loss": 1.0468, "step": 1609 }, { "epoch": 0.8469226722777485, "grad_norm": 2.027773141860962, "learning_rate": 4.783147286807249e-06, "loss": 1.0028, "step": 1610 }, { "epoch": 0.8474487112046292, "grad_norm": 2.1288933753967285, "learning_rate": 4.782864202732317e-06, "loss": 1.0177, "step": 1611 }, { "epoch": 0.8479747501315097, "grad_norm": 2.160947322845459, "learning_rate": 4.7825809423941e-06, "loss": 0.9814, "step": 1612 }, { "epoch": 0.8485007890583903, "grad_norm": 2.021970272064209, "learning_rate": 4.782297505814469e-06, "loss": 1.0198, "step": 1613 }, { "epoch": 0.849026827985271, "grad_norm": 1.9154043197631836, "learning_rate": 4.7820138930153106e-06, "loss": 1.0044, "step": 1614 }, { "epoch": 0.8495528669121515, "grad_norm": 2.0858964920043945, "learning_rate": 4.781730104018521e-06, "loss": 0.9932, "step": 1615 }, { "epoch": 0.8500789058390321, "grad_norm": 2.236711025238037, "learning_rate": 4.7814461388460105e-06, "loss": 1.0495, "step": 1616 }, { "epoch": 0.8506049447659126, "grad_norm": 2.0810344219207764, "learning_rate": 4.781161997519707e-06, "loss": 1.0617, "step": 1617 }, { "epoch": 0.8511309836927933, "grad_norm": 2.224187135696411, "learning_rate": 4.780877680061551e-06, "loss": 0.9911, "step": 1618 }, { "epoch": 0.8516570226196739, "grad_norm": 1.8846218585968018, "learning_rate": 4.780593186493491e-06, "loss": 1.0185, "step": 1619 }, { "epoch": 0.8521830615465544, "grad_norm": 2.0876333713531494, "learning_rate": 4.780308516837495e-06, "loss": 1.0173, "step": 1620 }, { "epoch": 0.8527091004734351, "grad_norm": 1.942492961883545, "learning_rate": 4.780023671115544e-06, "loss": 1.0154, "step": 1621 }, { "epoch": 0.8532351394003156, "grad_norm": 1.9483400583267212, "learning_rate": 4.779738649349629e-06, "loss": 1.0492, "step": 1622 }, { "epoch": 0.8537611783271962, "grad_norm": 1.8866205215454102, "learning_rate": 4.7794534515617586e-06, "loss": 0.9896, "step": 1623 }, { "epoch": 0.8542872172540767, "grad_norm": 2.146117687225342, "learning_rate": 4.779168077773953e-06, "loss": 1.0391, "step": 1624 }, { "epoch": 0.8548132561809574, "grad_norm": 2.099858283996582, "learning_rate": 4.778882528008245e-06, "loss": 1.0185, "step": 1625 }, { "epoch": 0.855339295107838, "grad_norm": 2.0597662925720215, "learning_rate": 4.7785968022866846e-06, "loss": 1.0373, "step": 1626 }, { "epoch": 0.8558653340347185, "grad_norm": 2.0234663486480713, "learning_rate": 4.7783109006313316e-06, "loss": 1.0471, "step": 1627 }, { "epoch": 0.8563913729615992, "grad_norm": 1.9113049507141113, "learning_rate": 4.778024823064261e-06, "loss": 1.01, "step": 1628 }, { "epoch": 0.8569174118884797, "grad_norm": 2.4924910068511963, "learning_rate": 4.777738569607562e-06, "loss": 1.0267, "step": 1629 }, { "epoch": 0.8574434508153603, "grad_norm": 1.9605613946914673, "learning_rate": 4.777452140283336e-06, "loss": 1.0237, "step": 1630 }, { "epoch": 0.857969489742241, "grad_norm": 2.1404225826263428, "learning_rate": 4.7771655351136996e-06, "loss": 1.0353, "step": 1631 }, { "epoch": 0.8584955286691215, "grad_norm": 2.1174509525299072, "learning_rate": 4.776878754120781e-06, "loss": 1.0517, "step": 1632 }, { "epoch": 0.8590215675960021, "grad_norm": 1.895843267440796, "learning_rate": 4.7765917973267226e-06, "loss": 0.9479, "step": 1633 }, { "epoch": 0.8595476065228826, "grad_norm": 2.080152988433838, "learning_rate": 4.776304664753682e-06, "loss": 1.0642, "step": 1634 }, { "epoch": 0.8600736454497633, "grad_norm": 1.9730490446090698, "learning_rate": 4.776017356423827e-06, "loss": 1.0059, "step": 1635 }, { "epoch": 0.8605996843766439, "grad_norm": 2.19085693359375, "learning_rate": 4.775729872359343e-06, "loss": 1.0368, "step": 1636 }, { "epoch": 0.8611257233035244, "grad_norm": 2.14911150932312, "learning_rate": 4.775442212582428e-06, "loss": 1.0583, "step": 1637 }, { "epoch": 0.8616517622304051, "grad_norm": 1.9603419303894043, "learning_rate": 4.775154377115291e-06, "loss": 1.0336, "step": 1638 }, { "epoch": 0.8621778011572856, "grad_norm": 1.9417442083358765, "learning_rate": 4.774866365980156e-06, "loss": 0.9885, "step": 1639 }, { "epoch": 0.8627038400841662, "grad_norm": 2.092170000076294, "learning_rate": 4.774578179199261e-06, "loss": 1.0496, "step": 1640 }, { "epoch": 0.8632298790110469, "grad_norm": 2.0614163875579834, "learning_rate": 4.774289816794858e-06, "loss": 1.0011, "step": 1641 }, { "epoch": 0.8637559179379274, "grad_norm": 2.168977975845337, "learning_rate": 4.774001278789211e-06, "loss": 1.0342, "step": 1642 }, { "epoch": 0.864281956864808, "grad_norm": 2.0560708045959473, "learning_rate": 4.773712565204599e-06, "loss": 1.0239, "step": 1643 }, { "epoch": 0.8648079957916885, "grad_norm": 1.9980727434158325, "learning_rate": 4.773423676063314e-06, "loss": 1.0312, "step": 1644 }, { "epoch": 0.8653340347185692, "grad_norm": 2.0650413036346436, "learning_rate": 4.773134611387661e-06, "loss": 1.0468, "step": 1645 }, { "epoch": 0.8658600736454498, "grad_norm": 1.954148530960083, "learning_rate": 4.77284537119996e-06, "loss": 1.0138, "step": 1646 }, { "epoch": 0.8663861125723303, "grad_norm": 2.092515468597412, "learning_rate": 4.772555955522543e-06, "loss": 0.987, "step": 1647 }, { "epoch": 0.866912151499211, "grad_norm": 2.007941246032715, "learning_rate": 4.772266364377757e-06, "loss": 0.9918, "step": 1648 }, { "epoch": 0.8674381904260915, "grad_norm": 1.9608757495880127, "learning_rate": 4.77197659778796e-06, "loss": 1.0502, "step": 1649 }, { "epoch": 0.8679642293529721, "grad_norm": 2.0067436695098877, "learning_rate": 4.771686655775527e-06, "loss": 1.0335, "step": 1650 }, { "epoch": 0.8684902682798528, "grad_norm": 2.079745292663574, "learning_rate": 4.771396538362845e-06, "loss": 1.043, "step": 1651 }, { "epoch": 0.8690163072067333, "grad_norm": 1.9542405605316162, "learning_rate": 4.771106245572313e-06, "loss": 0.984, "step": 1652 }, { "epoch": 0.8695423461336139, "grad_norm": 2.028416872024536, "learning_rate": 4.770815777426346e-06, "loss": 0.9933, "step": 1653 }, { "epoch": 0.8700683850604944, "grad_norm": 1.9436818361282349, "learning_rate": 4.77052513394737e-06, "loss": 1.0118, "step": 1654 }, { "epoch": 0.8705944239873751, "grad_norm": 2.028409004211426, "learning_rate": 4.770234315157828e-06, "loss": 1.0494, "step": 1655 }, { "epoch": 0.8711204629142556, "grad_norm": 2.0709540843963623, "learning_rate": 4.769943321080174e-06, "loss": 1.0542, "step": 1656 }, { "epoch": 0.8716465018411362, "grad_norm": 2.0256619453430176, "learning_rate": 4.7696521517368755e-06, "loss": 1.0011, "step": 1657 }, { "epoch": 0.8721725407680169, "grad_norm": 2.0937297344207764, "learning_rate": 4.769360807150414e-06, "loss": 0.9974, "step": 1658 }, { "epoch": 0.8726985796948974, "grad_norm": 2.2346062660217285, "learning_rate": 4.769069287343285e-06, "loss": 1.0128, "step": 1659 }, { "epoch": 0.873224618621778, "grad_norm": 2.1082491874694824, "learning_rate": 4.7687775923379975e-06, "loss": 1.0321, "step": 1660 }, { "epoch": 0.8737506575486585, "grad_norm": 2.0769453048706055, "learning_rate": 4.768485722157074e-06, "loss": 0.973, "step": 1661 }, { "epoch": 0.8742766964755392, "grad_norm": 2.0329558849334717, "learning_rate": 4.768193676823048e-06, "loss": 1.0102, "step": 1662 }, { "epoch": 0.8748027354024198, "grad_norm": 2.0758261680603027, "learning_rate": 4.767901456358471e-06, "loss": 1.0125, "step": 1663 }, { "epoch": 0.8753287743293003, "grad_norm": 2.12320613861084, "learning_rate": 4.767609060785905e-06, "loss": 1.0294, "step": 1664 }, { "epoch": 0.875854813256181, "grad_norm": 1.9771841764450073, "learning_rate": 4.767316490127927e-06, "loss": 0.9886, "step": 1665 }, { "epoch": 0.8763808521830615, "grad_norm": 1.9373329877853394, "learning_rate": 4.7670237444071255e-06, "loss": 0.994, "step": 1666 }, { "epoch": 0.8769068911099421, "grad_norm": 2.0343801975250244, "learning_rate": 4.766730823646105e-06, "loss": 1.0352, "step": 1667 }, { "epoch": 0.8774329300368228, "grad_norm": 2.020343542098999, "learning_rate": 4.766437727867481e-06, "loss": 0.979, "step": 1668 }, { "epoch": 0.8779589689637033, "grad_norm": 2.107820510864258, "learning_rate": 4.766144457093886e-06, "loss": 1.0296, "step": 1669 }, { "epoch": 0.8784850078905839, "grad_norm": 2.1452198028564453, "learning_rate": 4.765851011347962e-06, "loss": 1.0438, "step": 1670 }, { "epoch": 0.8790110468174644, "grad_norm": 2.087686777114868, "learning_rate": 4.7655573906523665e-06, "loss": 0.9788, "step": 1671 }, { "epoch": 0.8795370857443451, "grad_norm": 2.083097457885742, "learning_rate": 4.765263595029771e-06, "loss": 0.9921, "step": 1672 }, { "epoch": 0.8800631246712257, "grad_norm": 2.0001168251037598, "learning_rate": 4.76496962450286e-06, "loss": 0.9784, "step": 1673 }, { "epoch": 0.8805891635981062, "grad_norm": 1.9493898153305054, "learning_rate": 4.7646754790943315e-06, "loss": 1.0145, "step": 1674 }, { "epoch": 0.8811152025249869, "grad_norm": 2.140746831893921, "learning_rate": 4.764381158826896e-06, "loss": 1.0286, "step": 1675 }, { "epoch": 0.8816412414518674, "grad_norm": 2.0411407947540283, "learning_rate": 4.764086663723278e-06, "loss": 1.0297, "step": 1676 }, { "epoch": 0.882167280378748, "grad_norm": 2.164043664932251, "learning_rate": 4.763791993806218e-06, "loss": 1.0246, "step": 1677 }, { "epoch": 0.8826933193056287, "grad_norm": 2.0231616497039795, "learning_rate": 4.7634971490984675e-06, "loss": 0.9692, "step": 1678 }, { "epoch": 0.8832193582325092, "grad_norm": 2.0884130001068115, "learning_rate": 4.763202129622789e-06, "loss": 1.0441, "step": 1679 }, { "epoch": 0.8837453971593898, "grad_norm": 1.959078311920166, "learning_rate": 4.7629069354019654e-06, "loss": 1.0166, "step": 1680 }, { "epoch": 0.8842714360862703, "grad_norm": 1.836121916770935, "learning_rate": 4.762611566458786e-06, "loss": 1.0347, "step": 1681 }, { "epoch": 0.884797475013151, "grad_norm": 2.099907398223877, "learning_rate": 4.762316022816058e-06, "loss": 1.0309, "step": 1682 }, { "epoch": 0.8853235139400316, "grad_norm": 1.941465139389038, "learning_rate": 4.7620203044966004e-06, "loss": 1.0203, "step": 1683 }, { "epoch": 0.8858495528669121, "grad_norm": 1.893522024154663, "learning_rate": 4.761724411523247e-06, "loss": 0.9769, "step": 1684 }, { "epoch": 0.8863755917937928, "grad_norm": 1.9919662475585938, "learning_rate": 4.7614283439188426e-06, "loss": 1.0116, "step": 1685 }, { "epoch": 0.8869016307206733, "grad_norm": 1.9670614004135132, "learning_rate": 4.761132101706249e-06, "loss": 0.9719, "step": 1686 }, { "epoch": 0.8874276696475539, "grad_norm": 1.9545384645462036, "learning_rate": 4.760835684908337e-06, "loss": 0.9986, "step": 1687 }, { "epoch": 0.8879537085744345, "grad_norm": 1.9402283430099487, "learning_rate": 4.7605390935479946e-06, "loss": 0.9911, "step": 1688 }, { "epoch": 0.8884797475013151, "grad_norm": 1.954526424407959, "learning_rate": 4.760242327648122e-06, "loss": 1.0021, "step": 1689 }, { "epoch": 0.8890057864281957, "grad_norm": 1.9458253383636475, "learning_rate": 4.759945387231633e-06, "loss": 1.0346, "step": 1690 }, { "epoch": 0.8895318253550762, "grad_norm": 1.9583990573883057, "learning_rate": 4.7596482723214565e-06, "loss": 1.0509, "step": 1691 }, { "epoch": 0.8900578642819569, "grad_norm": 2.0227482318878174, "learning_rate": 4.75935098294053e-06, "loss": 1.0651, "step": 1692 }, { "epoch": 0.8905839032088374, "grad_norm": 1.977971076965332, "learning_rate": 4.7590535191118096e-06, "loss": 1.0609, "step": 1693 }, { "epoch": 0.891109942135718, "grad_norm": 2.0564186573028564, "learning_rate": 4.758755880858262e-06, "loss": 1.0125, "step": 1694 }, { "epoch": 0.8916359810625987, "grad_norm": 1.9081783294677734, "learning_rate": 4.75845806820287e-06, "loss": 1.007, "step": 1695 }, { "epoch": 0.8921620199894792, "grad_norm": 2.0456745624542236, "learning_rate": 4.758160081168626e-06, "loss": 1.0116, "step": 1696 }, { "epoch": 0.8926880589163598, "grad_norm": 1.9237746000289917, "learning_rate": 4.757861919778539e-06, "loss": 1.0023, "step": 1697 }, { "epoch": 0.8932140978432404, "grad_norm": 1.9402356147766113, "learning_rate": 4.75756358405563e-06, "loss": 1.0264, "step": 1698 }, { "epoch": 0.893740136770121, "grad_norm": 1.9538573026657104, "learning_rate": 4.757265074022935e-06, "loss": 0.9582, "step": 1699 }, { "epoch": 0.8942661756970016, "grad_norm": 2.09053897857666, "learning_rate": 4.756966389703501e-06, "loss": 1.0245, "step": 1700 }, { "epoch": 0.8947922146238821, "grad_norm": 2.071685552597046, "learning_rate": 4.756667531120391e-06, "loss": 1.0124, "step": 1701 }, { "epoch": 0.8953182535507628, "grad_norm": 2.0141103267669678, "learning_rate": 4.75636849829668e-06, "loss": 0.9852, "step": 1702 }, { "epoch": 0.8958442924776433, "grad_norm": 1.9167203903198242, "learning_rate": 4.756069291255456e-06, "loss": 1.0194, "step": 1703 }, { "epoch": 0.8963703314045239, "grad_norm": 2.011918067932129, "learning_rate": 4.755769910019823e-06, "loss": 1.0029, "step": 1704 }, { "epoch": 0.8968963703314046, "grad_norm": 2.1252031326293945, "learning_rate": 4.755470354612895e-06, "loss": 1.0071, "step": 1705 }, { "epoch": 0.8974224092582851, "grad_norm": 2.0214016437530518, "learning_rate": 4.755170625057801e-06, "loss": 1.0371, "step": 1706 }, { "epoch": 0.8979484481851657, "grad_norm": 2.4289193153381348, "learning_rate": 4.754870721377685e-06, "loss": 1.0581, "step": 1707 }, { "epoch": 0.8984744871120462, "grad_norm": 2.1093404293060303, "learning_rate": 4.754570643595702e-06, "loss": 1.0017, "step": 1708 }, { "epoch": 0.8990005260389269, "grad_norm": 2.0420546531677246, "learning_rate": 4.7542703917350215e-06, "loss": 1.0642, "step": 1709 }, { "epoch": 0.8995265649658075, "grad_norm": 1.9818446636199951, "learning_rate": 4.753969965818827e-06, "loss": 1.0313, "step": 1710 }, { "epoch": 0.900052603892688, "grad_norm": 1.897628664970398, "learning_rate": 4.753669365870313e-06, "loss": 0.9875, "step": 1711 }, { "epoch": 0.9005786428195687, "grad_norm": 2.0208487510681152, "learning_rate": 4.753368591912693e-06, "loss": 1.0271, "step": 1712 }, { "epoch": 0.9011046817464492, "grad_norm": 1.9346519708633423, "learning_rate": 4.753067643969186e-06, "loss": 1.0352, "step": 1713 }, { "epoch": 0.9016307206733298, "grad_norm": 2.0617661476135254, "learning_rate": 4.75276652206303e-06, "loss": 0.9806, "step": 1714 }, { "epoch": 0.9021567596002105, "grad_norm": 1.8809938430786133, "learning_rate": 4.752465226217477e-06, "loss": 1.0333, "step": 1715 }, { "epoch": 0.902682798527091, "grad_norm": 2.047309398651123, "learning_rate": 4.752163756455789e-06, "loss": 1.0614, "step": 1716 }, { "epoch": 0.9032088374539716, "grad_norm": 2.1308083534240723, "learning_rate": 4.751862112801242e-06, "loss": 1.0229, "step": 1717 }, { "epoch": 0.9037348763808521, "grad_norm": 2.0333852767944336, "learning_rate": 4.751560295277127e-06, "loss": 1.0077, "step": 1718 }, { "epoch": 0.9042609153077328, "grad_norm": 1.9486128091812134, "learning_rate": 4.7512583039067485e-06, "loss": 1.0026, "step": 1719 }, { "epoch": 0.9047869542346134, "grad_norm": 2.004258394241333, "learning_rate": 4.750956138713424e-06, "loss": 0.986, "step": 1720 }, { "epoch": 0.9053129931614939, "grad_norm": 2.5763192176818848, "learning_rate": 4.750653799720483e-06, "loss": 0.979, "step": 1721 }, { "epoch": 0.9058390320883746, "grad_norm": 2.1086039543151855, "learning_rate": 4.750351286951269e-06, "loss": 1.0368, "step": 1722 }, { "epoch": 0.9063650710152551, "grad_norm": 2.0445361137390137, "learning_rate": 4.750048600429141e-06, "loss": 0.9756, "step": 1723 }, { "epoch": 0.9068911099421357, "grad_norm": 1.8900635242462158, "learning_rate": 4.7497457401774694e-06, "loss": 0.8947, "step": 1724 }, { "epoch": 0.9074171488690163, "grad_norm": 2.116900682449341, "learning_rate": 4.749442706219638e-06, "loss": 1.0502, "step": 1725 }, { "epoch": 0.9079431877958969, "grad_norm": 2.1096391677856445, "learning_rate": 4.749139498579044e-06, "loss": 1.0089, "step": 1726 }, { "epoch": 0.9084692267227775, "grad_norm": 2.2117018699645996, "learning_rate": 4.7488361172791005e-06, "loss": 1.056, "step": 1727 }, { "epoch": 0.908995265649658, "grad_norm": 2.0012335777282715, "learning_rate": 4.748532562343231e-06, "loss": 0.916, "step": 1728 }, { "epoch": 0.9095213045765387, "grad_norm": 1.8673421144485474, "learning_rate": 4.748228833794872e-06, "loss": 0.9844, "step": 1729 }, { "epoch": 0.9100473435034192, "grad_norm": 1.9152559041976929, "learning_rate": 4.747924931657477e-06, "loss": 0.9619, "step": 1730 }, { "epoch": 0.9105733824302998, "grad_norm": 2.107985496520996, "learning_rate": 4.7476208559545104e-06, "loss": 1.017, "step": 1731 }, { "epoch": 0.9110994213571805, "grad_norm": 2.162464141845703, "learning_rate": 4.7473166067094474e-06, "loss": 1.0197, "step": 1732 }, { "epoch": 0.911625460284061, "grad_norm": 2.085958480834961, "learning_rate": 4.747012183945784e-06, "loss": 1.0166, "step": 1733 }, { "epoch": 0.9121514992109416, "grad_norm": 2.0198309421539307, "learning_rate": 4.746707587687022e-06, "loss": 0.9883, "step": 1734 }, { "epoch": 0.9126775381378222, "grad_norm": 2.013784646987915, "learning_rate": 4.746402817956681e-06, "loss": 0.9775, "step": 1735 }, { "epoch": 0.9132035770647028, "grad_norm": 2.1442627906799316, "learning_rate": 4.746097874778293e-06, "loss": 1.0358, "step": 1736 }, { "epoch": 0.9137296159915834, "grad_norm": 2.143627643585205, "learning_rate": 4.745792758175402e-06, "loss": 0.9537, "step": 1737 }, { "epoch": 0.914255654918464, "grad_norm": 1.9581515789031982, "learning_rate": 4.745487468171566e-06, "loss": 0.9756, "step": 1738 }, { "epoch": 0.9147816938453446, "grad_norm": 1.9869537353515625, "learning_rate": 4.74518200479036e-06, "loss": 0.995, "step": 1739 }, { "epoch": 0.9153077327722251, "grad_norm": 1.9129465818405151, "learning_rate": 4.744876368055365e-06, "loss": 1.0088, "step": 1740 }, { "epoch": 0.9158337716991057, "grad_norm": 1.957229733467102, "learning_rate": 4.744570557990183e-06, "loss": 0.9832, "step": 1741 }, { "epoch": 0.9163598106259864, "grad_norm": 2.061002492904663, "learning_rate": 4.744264574618425e-06, "loss": 1.0338, "step": 1742 }, { "epoch": 0.9168858495528669, "grad_norm": 2.0439558029174805, "learning_rate": 4.743958417963715e-06, "loss": 1.0678, "step": 1743 }, { "epoch": 0.9174118884797475, "grad_norm": 2.0407450199127197, "learning_rate": 4.743652088049695e-06, "loss": 1.0219, "step": 1744 }, { "epoch": 0.917937927406628, "grad_norm": 2.2696166038513184, "learning_rate": 4.743345584900014e-06, "loss": 0.9909, "step": 1745 }, { "epoch": 0.9184639663335087, "grad_norm": 1.9783145189285278, "learning_rate": 4.74303890853834e-06, "loss": 0.9423, "step": 1746 }, { "epoch": 0.9189900052603893, "grad_norm": 2.019179344177246, "learning_rate": 4.74273205898835e-06, "loss": 0.9985, "step": 1747 }, { "epoch": 0.9195160441872698, "grad_norm": 1.966417670249939, "learning_rate": 4.742425036273737e-06, "loss": 1.0605, "step": 1748 }, { "epoch": 0.9200420831141505, "grad_norm": 1.9425163269042969, "learning_rate": 4.742117840418207e-06, "loss": 0.9855, "step": 1749 }, { "epoch": 0.920568122041031, "grad_norm": 1.9825159311294556, "learning_rate": 4.741810471445478e-06, "loss": 1.0214, "step": 1750 }, { "epoch": 0.9210941609679116, "grad_norm": 1.9764158725738525, "learning_rate": 4.741502929379284e-06, "loss": 1.0249, "step": 1751 }, { "epoch": 0.9216201998947923, "grad_norm": 2.0177724361419678, "learning_rate": 4.74119521424337e-06, "loss": 1.0434, "step": 1752 }, { "epoch": 0.9221462388216728, "grad_norm": 2.0949506759643555, "learning_rate": 4.740887326061495e-06, "loss": 1.0331, "step": 1753 }, { "epoch": 0.9226722777485534, "grad_norm": 1.9468920230865479, "learning_rate": 4.740579264857431e-06, "loss": 0.9212, "step": 1754 }, { "epoch": 0.923198316675434, "grad_norm": 2.2116925716400146, "learning_rate": 4.740271030654965e-06, "loss": 1.0241, "step": 1755 }, { "epoch": 0.9237243556023146, "grad_norm": 1.9227603673934937, "learning_rate": 4.739962623477896e-06, "loss": 0.98, "step": 1756 }, { "epoch": 0.9242503945291951, "grad_norm": 2.013141632080078, "learning_rate": 4.739654043350036e-06, "loss": 1.0321, "step": 1757 }, { "epoch": 0.9247764334560757, "grad_norm": 2.1053218841552734, "learning_rate": 4.739345290295211e-06, "loss": 1.0359, "step": 1758 }, { "epoch": 0.9253024723829564, "grad_norm": 2.072932243347168, "learning_rate": 4.739036364337261e-06, "loss": 0.9826, "step": 1759 }, { "epoch": 0.9258285113098369, "grad_norm": 2.104072093963623, "learning_rate": 4.738727265500037e-06, "loss": 1.0239, "step": 1760 }, { "epoch": 0.9263545502367175, "grad_norm": 2.0704009532928467, "learning_rate": 4.738417993807407e-06, "loss": 1.0235, "step": 1761 }, { "epoch": 0.9268805891635981, "grad_norm": 1.9992990493774414, "learning_rate": 4.738108549283249e-06, "loss": 0.988, "step": 1762 }, { "epoch": 0.9274066280904787, "grad_norm": 2.150501251220703, "learning_rate": 4.737798931951456e-06, "loss": 1.0574, "step": 1763 }, { "epoch": 0.9279326670173593, "grad_norm": 1.906421184539795, "learning_rate": 4.7374891418359345e-06, "loss": 1.0479, "step": 1764 }, { "epoch": 0.9284587059442398, "grad_norm": 1.8720351457595825, "learning_rate": 4.737179178960603e-06, "loss": 1.038, "step": 1765 }, { "epoch": 0.9289847448711205, "grad_norm": 1.9185991287231445, "learning_rate": 4.736869043349394e-06, "loss": 1.0632, "step": 1766 }, { "epoch": 0.929510783798001, "grad_norm": 2.040290594100952, "learning_rate": 4.736558735026255e-06, "loss": 0.9857, "step": 1767 }, { "epoch": 0.9300368227248816, "grad_norm": 1.9188529253005981, "learning_rate": 4.7362482540151445e-06, "loss": 1.0115, "step": 1768 }, { "epoch": 0.9305628616517623, "grad_norm": 2.092855215072632, "learning_rate": 4.7359376003400345e-06, "loss": 1.0318, "step": 1769 }, { "epoch": 0.9310889005786428, "grad_norm": 1.9537826776504517, "learning_rate": 4.735626774024912e-06, "loss": 1.0005, "step": 1770 }, { "epoch": 0.9316149395055234, "grad_norm": 1.8022964000701904, "learning_rate": 4.735315775093775e-06, "loss": 0.9696, "step": 1771 }, { "epoch": 0.932140978432404, "grad_norm": 2.0534324645996094, "learning_rate": 4.735004603570639e-06, "loss": 1.0647, "step": 1772 }, { "epoch": 0.9326670173592846, "grad_norm": 2.082421064376831, "learning_rate": 4.734693259479527e-06, "loss": 1.0168, "step": 1773 }, { "epoch": 0.9331930562861652, "grad_norm": 2.2331955432891846, "learning_rate": 4.734381742844481e-06, "loss": 1.0288, "step": 1774 }, { "epoch": 0.9337190952130457, "grad_norm": 1.9978649616241455, "learning_rate": 4.73407005368955e-06, "loss": 0.9542, "step": 1775 }, { "epoch": 0.9342451341399264, "grad_norm": 2.054856061935425, "learning_rate": 4.733758192038804e-06, "loss": 1.0457, "step": 1776 }, { "epoch": 0.9347711730668069, "grad_norm": 2.1446175575256348, "learning_rate": 4.733446157916319e-06, "loss": 1.0767, "step": 1777 }, { "epoch": 0.9352972119936875, "grad_norm": 2.149594783782959, "learning_rate": 4.7331339513461905e-06, "loss": 0.9975, "step": 1778 }, { "epoch": 0.9358232509205682, "grad_norm": 2.0066800117492676, "learning_rate": 4.732821572352522e-06, "loss": 1.0296, "step": 1779 }, { "epoch": 0.9363492898474487, "grad_norm": 2.4036574363708496, "learning_rate": 4.732509020959434e-06, "loss": 0.9726, "step": 1780 }, { "epoch": 0.9368753287743293, "grad_norm": 2.0901482105255127, "learning_rate": 4.73219629719106e-06, "loss": 1.0748, "step": 1781 }, { "epoch": 0.9374013677012099, "grad_norm": 2.093503713607788, "learning_rate": 4.731883401071543e-06, "loss": 1.0413, "step": 1782 }, { "epoch": 0.9379274066280905, "grad_norm": 2.1437647342681885, "learning_rate": 4.731570332625044e-06, "loss": 1.0624, "step": 1783 }, { "epoch": 0.9384534455549711, "grad_norm": 2.141866445541382, "learning_rate": 4.731257091875736e-06, "loss": 0.9547, "step": 1784 }, { "epoch": 0.9389794844818516, "grad_norm": 2.138530731201172, "learning_rate": 4.730943678847804e-06, "loss": 1.0498, "step": 1785 }, { "epoch": 0.9395055234087323, "grad_norm": 2.192941188812256, "learning_rate": 4.730630093565447e-06, "loss": 1.0426, "step": 1786 }, { "epoch": 0.9400315623356128, "grad_norm": 1.9256808757781982, "learning_rate": 4.730316336052877e-06, "loss": 0.9864, "step": 1787 }, { "epoch": 0.9405576012624934, "grad_norm": 2.1694893836975098, "learning_rate": 4.730002406334321e-06, "loss": 0.9926, "step": 1788 }, { "epoch": 0.941083640189374, "grad_norm": 1.9891979694366455, "learning_rate": 4.729688304434017e-06, "loss": 0.9835, "step": 1789 }, { "epoch": 0.9416096791162546, "grad_norm": 2.112396240234375, "learning_rate": 4.729374030376217e-06, "loss": 1.0131, "step": 1790 }, { "epoch": 0.9421357180431352, "grad_norm": 2.049139976501465, "learning_rate": 4.729059584185187e-06, "loss": 1.0176, "step": 1791 }, { "epoch": 0.9426617569700158, "grad_norm": 2.259706497192383, "learning_rate": 4.728744965885207e-06, "loss": 1.0566, "step": 1792 }, { "epoch": 0.9431877958968964, "grad_norm": 1.9924520254135132, "learning_rate": 4.728430175500567e-06, "loss": 0.9912, "step": 1793 }, { "epoch": 0.9437138348237769, "grad_norm": 2.1724114418029785, "learning_rate": 4.728115213055573e-06, "loss": 0.9919, "step": 1794 }, { "epoch": 0.9442398737506575, "grad_norm": 2.083853244781494, "learning_rate": 4.7278000785745445e-06, "loss": 1.0368, "step": 1795 }, { "epoch": 0.9447659126775382, "grad_norm": 2.089245080947876, "learning_rate": 4.727484772081814e-06, "loss": 1.0471, "step": 1796 }, { "epoch": 0.9452919516044187, "grad_norm": 1.9880348443984985, "learning_rate": 4.727169293601725e-06, "loss": 0.9752, "step": 1797 }, { "epoch": 0.9458179905312993, "grad_norm": 2.0518887042999268, "learning_rate": 4.7268536431586375e-06, "loss": 0.977, "step": 1798 }, { "epoch": 0.9463440294581799, "grad_norm": 2.3292527198791504, "learning_rate": 4.726537820776922e-06, "loss": 0.9696, "step": 1799 }, { "epoch": 0.9468700683850605, "grad_norm": 2.093759775161743, "learning_rate": 4.7262218264809656e-06, "loss": 1.028, "step": 1800 }, { "epoch": 0.9473961073119411, "grad_norm": 1.9579375982284546, "learning_rate": 4.7259056602951644e-06, "loss": 0.9797, "step": 1801 }, { "epoch": 0.9479221462388217, "grad_norm": 2.1174583435058594, "learning_rate": 4.725589322243932e-06, "loss": 0.9993, "step": 1802 }, { "epoch": 0.9484481851657023, "grad_norm": 2.167732000350952, "learning_rate": 4.725272812351692e-06, "loss": 1.0031, "step": 1803 }, { "epoch": 0.9489742240925828, "grad_norm": 2.1166253089904785, "learning_rate": 4.724956130642883e-06, "loss": 1.0029, "step": 1804 }, { "epoch": 0.9495002630194634, "grad_norm": 2.0212886333465576, "learning_rate": 4.724639277141957e-06, "loss": 1.0202, "step": 1805 }, { "epoch": 0.9500263019463441, "grad_norm": 2.1849446296691895, "learning_rate": 4.7243222518733775e-06, "loss": 0.9847, "step": 1806 }, { "epoch": 0.9505523408732246, "grad_norm": 2.019671678543091, "learning_rate": 4.724005054861623e-06, "loss": 1.0141, "step": 1807 }, { "epoch": 0.9510783798001052, "grad_norm": 2.0654826164245605, "learning_rate": 4.723687686131186e-06, "loss": 1.0266, "step": 1808 }, { "epoch": 0.9516044187269858, "grad_norm": 2.0668342113494873, "learning_rate": 4.7233701457065694e-06, "loss": 1.0249, "step": 1809 }, { "epoch": 0.9521304576538664, "grad_norm": 1.9022929668426514, "learning_rate": 4.723052433612292e-06, "loss": 1.0092, "step": 1810 }, { "epoch": 0.952656496580747, "grad_norm": 2.0411059856414795, "learning_rate": 4.722734549872884e-06, "loss": 0.9896, "step": 1811 }, { "epoch": 0.9531825355076275, "grad_norm": 2.0354626178741455, "learning_rate": 4.722416494512889e-06, "loss": 0.9529, "step": 1812 }, { "epoch": 0.9537085744345082, "grad_norm": 1.866688847541809, "learning_rate": 4.722098267556867e-06, "loss": 0.971, "step": 1813 }, { "epoch": 0.9542346133613887, "grad_norm": 1.9963386058807373, "learning_rate": 4.721779869029387e-06, "loss": 0.9931, "step": 1814 }, { "epoch": 0.9547606522882693, "grad_norm": 1.9810550212860107, "learning_rate": 4.721461298955033e-06, "loss": 1.0335, "step": 1815 }, { "epoch": 0.95528669121515, "grad_norm": 2.0094194412231445, "learning_rate": 4.721142557358402e-06, "loss": 1.0248, "step": 1816 }, { "epoch": 0.9558127301420305, "grad_norm": 2.110318183898926, "learning_rate": 4.720823644264106e-06, "loss": 0.9726, "step": 1817 }, { "epoch": 0.9563387690689111, "grad_norm": 2.051914691925049, "learning_rate": 4.720504559696768e-06, "loss": 1.0205, "step": 1818 }, { "epoch": 0.9568648079957917, "grad_norm": 2.0969302654266357, "learning_rate": 4.7201853036810245e-06, "loss": 1.0313, "step": 1819 }, { "epoch": 0.9573908469226723, "grad_norm": 2.098721742630005, "learning_rate": 4.719865876241525e-06, "loss": 1.0276, "step": 1820 }, { "epoch": 0.9579168858495528, "grad_norm": 1.9741021394729614, "learning_rate": 4.719546277402936e-06, "loss": 1.0142, "step": 1821 }, { "epoch": 0.9584429247764334, "grad_norm": 2.1097187995910645, "learning_rate": 4.71922650718993e-06, "loss": 0.9812, "step": 1822 }, { "epoch": 0.9589689637033141, "grad_norm": 2.1343348026275635, "learning_rate": 4.718906565627201e-06, "loss": 1.0126, "step": 1823 }, { "epoch": 0.9594950026301946, "grad_norm": 2.089698553085327, "learning_rate": 4.71858645273945e-06, "loss": 0.9982, "step": 1824 }, { "epoch": 0.9600210415570752, "grad_norm": 2.1942148208618164, "learning_rate": 4.7182661685513925e-06, "loss": 1.0781, "step": 1825 }, { "epoch": 0.9605470804839558, "grad_norm": 1.92880380153656, "learning_rate": 4.7179457130877605e-06, "loss": 1.0214, "step": 1826 }, { "epoch": 0.9610731194108364, "grad_norm": 2.093219518661499, "learning_rate": 4.717625086373295e-06, "loss": 1.0411, "step": 1827 }, { "epoch": 0.961599158337717, "grad_norm": 1.9406787157058716, "learning_rate": 4.7173042884327525e-06, "loss": 1.0296, "step": 1828 }, { "epoch": 0.9621251972645976, "grad_norm": 1.9737564325332642, "learning_rate": 4.7169833192909025e-06, "loss": 1.0119, "step": 1829 }, { "epoch": 0.9626512361914782, "grad_norm": 1.9281796216964722, "learning_rate": 4.7166621789725276e-06, "loss": 1.0203, "step": 1830 }, { "epoch": 0.9631772751183587, "grad_norm": 2.128120183944702, "learning_rate": 4.716340867502424e-06, "loss": 1.087, "step": 1831 }, { "epoch": 0.9637033140452393, "grad_norm": 2.1313352584838867, "learning_rate": 4.716019384905399e-06, "loss": 1.0049, "step": 1832 }, { "epoch": 0.96422935297212, "grad_norm": 1.882323980331421, "learning_rate": 4.715697731206275e-06, "loss": 1.052, "step": 1833 }, { "epoch": 0.9647553918990005, "grad_norm": 1.902729868888855, "learning_rate": 4.71537590642989e-06, "loss": 1.013, "step": 1834 }, { "epoch": 0.9652814308258811, "grad_norm": 1.9752705097198486, "learning_rate": 4.715053910601089e-06, "loss": 0.9964, "step": 1835 }, { "epoch": 0.9658074697527617, "grad_norm": 2.2092044353485107, "learning_rate": 4.714731743744736e-06, "loss": 1.0142, "step": 1836 }, { "epoch": 0.9663335086796423, "grad_norm": 1.9738699197769165, "learning_rate": 4.714409405885706e-06, "loss": 1.0431, "step": 1837 }, { "epoch": 0.9668595476065229, "grad_norm": 1.94752836227417, "learning_rate": 4.714086897048886e-06, "loss": 0.9776, "step": 1838 }, { "epoch": 0.9673855865334035, "grad_norm": 2.044384717941284, "learning_rate": 4.713764217259178e-06, "loss": 0.9428, "step": 1839 }, { "epoch": 0.9679116254602841, "grad_norm": 2.067378520965576, "learning_rate": 4.713441366541497e-06, "loss": 1.0222, "step": 1840 }, { "epoch": 0.9684376643871646, "grad_norm": 2.0729427337646484, "learning_rate": 4.71311834492077e-06, "loss": 1.0244, "step": 1841 }, { "epoch": 0.9689637033140452, "grad_norm": 1.9986896514892578, "learning_rate": 4.712795152421938e-06, "loss": 1.0246, "step": 1842 }, { "epoch": 0.9694897422409259, "grad_norm": 2.134274482727051, "learning_rate": 4.712471789069956e-06, "loss": 1.0317, "step": 1843 }, { "epoch": 0.9700157811678064, "grad_norm": 2.116116762161255, "learning_rate": 4.7121482548897896e-06, "loss": 1.0431, "step": 1844 }, { "epoch": 0.970541820094687, "grad_norm": 2.146329164505005, "learning_rate": 4.7118245499064205e-06, "loss": 1.0185, "step": 1845 }, { "epoch": 0.9710678590215676, "grad_norm": 2.2587080001831055, "learning_rate": 4.711500674144844e-06, "loss": 1.0172, "step": 1846 }, { "epoch": 0.9715938979484482, "grad_norm": 2.133565902709961, "learning_rate": 4.7111766276300645e-06, "loss": 1.0887, "step": 1847 }, { "epoch": 0.9721199368753288, "grad_norm": 2.4180047512054443, "learning_rate": 4.710852410387103e-06, "loss": 1.0686, "step": 1848 }, { "epoch": 0.9726459758022094, "grad_norm": 1.9758679866790771, "learning_rate": 4.7105280224409936e-06, "loss": 0.9851, "step": 1849 }, { "epoch": 0.97317201472909, "grad_norm": 2.0190632343292236, "learning_rate": 4.710203463816782e-06, "loss": 0.9967, "step": 1850 }, { "epoch": 0.9736980536559705, "grad_norm": 2.0636117458343506, "learning_rate": 4.709878734539527e-06, "loss": 1.0209, "step": 1851 }, { "epoch": 0.9742240925828511, "grad_norm": 2.0756478309631348, "learning_rate": 4.709553834634303e-06, "loss": 0.9793, "step": 1852 }, { "epoch": 0.9747501315097317, "grad_norm": 1.94191312789917, "learning_rate": 4.709228764126195e-06, "loss": 0.9697, "step": 1853 }, { "epoch": 0.9752761704366123, "grad_norm": 2.057345390319824, "learning_rate": 4.708903523040303e-06, "loss": 0.938, "step": 1854 }, { "epoch": 0.9758022093634929, "grad_norm": 2.1611337661743164, "learning_rate": 4.7085781114017384e-06, "loss": 1.0464, "step": 1855 }, { "epoch": 0.9763282482903735, "grad_norm": 1.9461411237716675, "learning_rate": 4.708252529235627e-06, "loss": 0.9934, "step": 1856 }, { "epoch": 0.9768542872172541, "grad_norm": 1.9107236862182617, "learning_rate": 4.707926776567108e-06, "loss": 0.9895, "step": 1857 }, { "epoch": 0.9773803261441346, "grad_norm": 2.0953640937805176, "learning_rate": 4.707600853421332e-06, "loss": 1.0009, "step": 1858 }, { "epoch": 0.9779063650710152, "grad_norm": 2.126648187637329, "learning_rate": 4.707274759823466e-06, "loss": 0.9801, "step": 1859 }, { "epoch": 0.9784324039978959, "grad_norm": 2.0868916511535645, "learning_rate": 4.706948495798687e-06, "loss": 0.9765, "step": 1860 }, { "epoch": 0.9789584429247764, "grad_norm": 2.0332181453704834, "learning_rate": 4.706622061372185e-06, "loss": 1.0216, "step": 1861 }, { "epoch": 0.979484481851657, "grad_norm": 2.05155348777771, "learning_rate": 4.706295456569167e-06, "loss": 1.0594, "step": 1862 }, { "epoch": 0.9800105207785376, "grad_norm": 2.1178739070892334, "learning_rate": 4.7059686814148485e-06, "loss": 1.0463, "step": 1863 }, { "epoch": 0.9805365597054182, "grad_norm": 1.9961886405944824, "learning_rate": 4.705641735934462e-06, "loss": 0.9658, "step": 1864 }, { "epoch": 0.9810625986322988, "grad_norm": 1.9905188083648682, "learning_rate": 4.705314620153251e-06, "loss": 0.9677, "step": 1865 }, { "epoch": 0.9815886375591794, "grad_norm": 1.9200838804244995, "learning_rate": 4.704987334096471e-06, "loss": 1.0011, "step": 1866 }, { "epoch": 0.98211467648606, "grad_norm": 2.069359302520752, "learning_rate": 4.704659877789395e-06, "loss": 1.01, "step": 1867 }, { "epoch": 0.9826407154129405, "grad_norm": 1.8069074153900146, "learning_rate": 4.704332251257304e-06, "loss": 1.037, "step": 1868 }, { "epoch": 0.9831667543398211, "grad_norm": 1.9900349378585815, "learning_rate": 4.704004454525496e-06, "loss": 1.0035, "step": 1869 }, { "epoch": 0.9836927932667018, "grad_norm": 1.902032494544983, "learning_rate": 4.70367648761928e-06, "loss": 1.0001, "step": 1870 }, { "epoch": 0.9842188321935823, "grad_norm": 2.5718839168548584, "learning_rate": 4.703348350563978e-06, "loss": 1.002, "step": 1871 }, { "epoch": 0.9847448711204629, "grad_norm": 1.90852952003479, "learning_rate": 4.703020043384927e-06, "loss": 1.0338, "step": 1872 }, { "epoch": 0.9852709100473435, "grad_norm": 2.0179872512817383, "learning_rate": 4.702691566107477e-06, "loss": 0.9724, "step": 1873 }, { "epoch": 0.9857969489742241, "grad_norm": 2.0315425395965576, "learning_rate": 4.702362918756988e-06, "loss": 1.0256, "step": 1874 }, { "epoch": 0.9863229879011047, "grad_norm": 1.898896336555481, "learning_rate": 4.702034101358837e-06, "loss": 0.9695, "step": 1875 }, { "epoch": 0.9868490268279853, "grad_norm": 2.1176962852478027, "learning_rate": 4.701705113938411e-06, "loss": 1.0217, "step": 1876 }, { "epoch": 0.9873750657548659, "grad_norm": 1.94914972782135, "learning_rate": 4.701375956521113e-06, "loss": 1.0081, "step": 1877 }, { "epoch": 0.9879011046817464, "grad_norm": 1.9665032625198364, "learning_rate": 4.701046629132358e-06, "loss": 1.0174, "step": 1878 }, { "epoch": 0.988427143608627, "grad_norm": 2.005793571472168, "learning_rate": 4.700717131797573e-06, "loss": 0.9653, "step": 1879 }, { "epoch": 0.9889531825355077, "grad_norm": 2.0769705772399902, "learning_rate": 4.700387464542199e-06, "loss": 1.0142, "step": 1880 }, { "epoch": 0.9894792214623882, "grad_norm": 1.9945422410964966, "learning_rate": 4.700057627391689e-06, "loss": 1.0225, "step": 1881 }, { "epoch": 0.9900052603892688, "grad_norm": 2.1121349334716797, "learning_rate": 4.699727620371513e-06, "loss": 1.0056, "step": 1882 }, { "epoch": 0.9905312993161494, "grad_norm": 2.156942844390869, "learning_rate": 4.699397443507148e-06, "loss": 1.0049, "step": 1883 }, { "epoch": 0.99105733824303, "grad_norm": 2.065075159072876, "learning_rate": 4.699067096824091e-06, "loss": 0.9694, "step": 1884 }, { "epoch": 0.9915833771699105, "grad_norm": 2.12490177154541, "learning_rate": 4.698736580347845e-06, "loss": 1.0268, "step": 1885 }, { "epoch": 0.9921094160967912, "grad_norm": 2.039874792098999, "learning_rate": 4.698405894103932e-06, "loss": 1.0122, "step": 1886 }, { "epoch": 0.9926354550236718, "grad_norm": 2.0004734992980957, "learning_rate": 4.698075038117884e-06, "loss": 0.9996, "step": 1887 }, { "epoch": 0.9931614939505523, "grad_norm": 1.996697187423706, "learning_rate": 4.697744012415248e-06, "loss": 1.0658, "step": 1888 }, { "epoch": 0.9936875328774329, "grad_norm": 1.9783189296722412, "learning_rate": 4.69741281702158e-06, "loss": 0.9799, "step": 1889 }, { "epoch": 0.9942135718043135, "grad_norm": 2.054898738861084, "learning_rate": 4.697081451962456e-06, "loss": 1.0302, "step": 1890 }, { "epoch": 0.9947396107311941, "grad_norm": 1.953337550163269, "learning_rate": 4.696749917263458e-06, "loss": 0.9634, "step": 1891 }, { "epoch": 0.9952656496580747, "grad_norm": 2.6126086711883545, "learning_rate": 4.6964182129501855e-06, "loss": 0.9659, "step": 1892 }, { "epoch": 0.9957916885849553, "grad_norm": 1.931026816368103, "learning_rate": 4.69608633904825e-06, "loss": 1.0456, "step": 1893 }, { "epoch": 0.9963177275118359, "grad_norm": 1.9246487617492676, "learning_rate": 4.695754295583276e-06, "loss": 1.0057, "step": 1894 }, { "epoch": 0.9968437664387164, "grad_norm": 1.9731547832489014, "learning_rate": 4.695422082580901e-06, "loss": 0.9619, "step": 1895 }, { "epoch": 0.997369805365597, "grad_norm": 2.1975600719451904, "learning_rate": 4.695089700066776e-06, "loss": 0.9667, "step": 1896 }, { "epoch": 0.9978958442924777, "grad_norm": 1.9038164615631104, "learning_rate": 4.6947571480665636e-06, "loss": 0.9564, "step": 1897 }, { "epoch": 0.9984218832193582, "grad_norm": 1.9997332096099854, "learning_rate": 4.694424426605942e-06, "loss": 0.9717, "step": 1898 }, { "epoch": 0.9989479221462388, "grad_norm": 2.0790839195251465, "learning_rate": 4.6940915357106e-06, "loss": 1.044, "step": 1899 }, { "epoch": 0.9994739610731194, "grad_norm": 2.0779690742492676, "learning_rate": 4.693758475406241e-06, "loss": 1.052, "step": 1900 }, { "epoch": 1.0, "grad_norm": 2.3423078060150146, "learning_rate": 4.693425245718581e-06, "loss": 0.9887, "step": 1901 }, { "epoch": 1.0005260389268806, "grad_norm": 1.9817070960998535, "learning_rate": 4.69309184667335e-06, "loss": 0.9254, "step": 1902 }, { "epoch": 1.0010520778537613, "grad_norm": 1.8153924942016602, "learning_rate": 4.6927582782962886e-06, "loss": 0.9244, "step": 1903 }, { "epoch": 1.0015781167806417, "grad_norm": 1.982853651046753, "learning_rate": 4.6924245406131534e-06, "loss": 0.9473, "step": 1904 }, { "epoch": 1.0021041557075223, "grad_norm": 1.8686907291412354, "learning_rate": 4.692090633649712e-06, "loss": 0.9236, "step": 1905 }, { "epoch": 1.002630194634403, "grad_norm": 2.0107433795928955, "learning_rate": 4.691756557431747e-06, "loss": 0.9597, "step": 1906 }, { "epoch": 1.0031562335612836, "grad_norm": 1.9539220333099365, "learning_rate": 4.691422311985051e-06, "loss": 0.9715, "step": 1907 }, { "epoch": 1.0036822724881642, "grad_norm": 2.040505886077881, "learning_rate": 4.691087897335434e-06, "loss": 0.9907, "step": 1908 }, { "epoch": 1.0042083114150446, "grad_norm": 1.9904555082321167, "learning_rate": 4.690753313508715e-06, "loss": 0.9367, "step": 1909 }, { "epoch": 1.0047343503419253, "grad_norm": 2.037346601486206, "learning_rate": 4.6904185605307276e-06, "loss": 0.9139, "step": 1910 }, { "epoch": 1.005260389268806, "grad_norm": 2.014995574951172, "learning_rate": 4.690083638427318e-06, "loss": 0.9179, "step": 1911 }, { "epoch": 1.0057864281956865, "grad_norm": 2.044022798538208, "learning_rate": 4.689748547224349e-06, "loss": 0.9634, "step": 1912 }, { "epoch": 1.0063124671225672, "grad_norm": 2.0251998901367188, "learning_rate": 4.689413286947691e-06, "loss": 0.9557, "step": 1913 }, { "epoch": 1.0068385060494476, "grad_norm": 2.0683465003967285, "learning_rate": 4.68907785762323e-06, "loss": 0.9334, "step": 1914 }, { "epoch": 1.0073645449763282, "grad_norm": 2.0559067726135254, "learning_rate": 4.688742259276865e-06, "loss": 0.9497, "step": 1915 }, { "epoch": 1.0078905839032088, "grad_norm": 2.0504722595214844, "learning_rate": 4.688406491934509e-06, "loss": 0.9642, "step": 1916 }, { "epoch": 1.0084166228300895, "grad_norm": 1.9897568225860596, "learning_rate": 4.6880705556220865e-06, "loss": 0.9055, "step": 1917 }, { "epoch": 1.0089426617569701, "grad_norm": 2.2071170806884766, "learning_rate": 4.6877344503655365e-06, "loss": 0.9615, "step": 1918 }, { "epoch": 1.0094687006838505, "grad_norm": 1.9466966390609741, "learning_rate": 4.687398176190808e-06, "loss": 0.9088, "step": 1919 }, { "epoch": 1.0099947396107312, "grad_norm": 2.242229461669922, "learning_rate": 4.687061733123868e-06, "loss": 0.9785, "step": 1920 }, { "epoch": 1.0105207785376118, "grad_norm": 2.1745779514312744, "learning_rate": 4.686725121190692e-06, "loss": 0.9806, "step": 1921 }, { "epoch": 1.0110468174644924, "grad_norm": 2.1215994358062744, "learning_rate": 4.686388340417271e-06, "loss": 0.9587, "step": 1922 }, { "epoch": 1.011572856391373, "grad_norm": 2.0110349655151367, "learning_rate": 4.686051390829607e-06, "loss": 0.9798, "step": 1923 }, { "epoch": 1.0120988953182535, "grad_norm": 1.906720519065857, "learning_rate": 4.685714272453717e-06, "loss": 0.9697, "step": 1924 }, { "epoch": 1.0126249342451341, "grad_norm": 2.1385791301727295, "learning_rate": 4.685376985315632e-06, "loss": 1.0257, "step": 1925 }, { "epoch": 1.0131509731720147, "grad_norm": 2.169401168823242, "learning_rate": 4.685039529441393e-06, "loss": 0.9831, "step": 1926 }, { "epoch": 1.0136770120988954, "grad_norm": 2.0535483360290527, "learning_rate": 4.684701904857055e-06, "loss": 0.9376, "step": 1927 }, { "epoch": 1.014203051025776, "grad_norm": 1.9508135318756104, "learning_rate": 4.684364111588688e-06, "loss": 0.9478, "step": 1928 }, { "epoch": 1.0147290899526564, "grad_norm": 1.9286326169967651, "learning_rate": 4.684026149662373e-06, "loss": 0.9279, "step": 1929 }, { "epoch": 1.015255128879537, "grad_norm": 2.037071466445923, "learning_rate": 4.683688019104203e-06, "loss": 0.9323, "step": 1930 }, { "epoch": 1.0157811678064177, "grad_norm": 2.027493715286255, "learning_rate": 4.683349719940288e-06, "loss": 0.9484, "step": 1931 }, { "epoch": 1.0163072067332983, "grad_norm": 2.050152540206909, "learning_rate": 4.683011252196747e-06, "loss": 0.914, "step": 1932 }, { "epoch": 1.016833245660179, "grad_norm": 2.033648729324341, "learning_rate": 4.682672615899713e-06, "loss": 0.9278, "step": 1933 }, { "epoch": 1.0173592845870594, "grad_norm": 1.9856821298599243, "learning_rate": 4.682333811075334e-06, "loss": 0.9069, "step": 1934 }, { "epoch": 1.01788532351394, "grad_norm": 2.2752439975738525, "learning_rate": 4.681994837749769e-06, "loss": 0.9808, "step": 1935 }, { "epoch": 1.0184113624408206, "grad_norm": 1.9650025367736816, "learning_rate": 4.681655695949191e-06, "loss": 0.9287, "step": 1936 }, { "epoch": 1.0189374013677013, "grad_norm": 2.068004608154297, "learning_rate": 4.681316385699786e-06, "loss": 0.9446, "step": 1937 }, { "epoch": 1.0194634402945817, "grad_norm": 1.9954777956008911, "learning_rate": 4.680976907027751e-06, "loss": 0.94, "step": 1938 }, { "epoch": 1.0199894792214623, "grad_norm": 1.9835753440856934, "learning_rate": 4.6806372599593e-06, "loss": 0.912, "step": 1939 }, { "epoch": 1.020515518148343, "grad_norm": 2.903367280960083, "learning_rate": 4.6802974445206554e-06, "loss": 0.9568, "step": 1940 }, { "epoch": 1.0210415570752236, "grad_norm": 2.0142931938171387, "learning_rate": 4.679957460738056e-06, "loss": 0.9534, "step": 1941 }, { "epoch": 1.0215675960021042, "grad_norm": 2.080303192138672, "learning_rate": 4.679617308637752e-06, "loss": 0.9518, "step": 1942 }, { "epoch": 1.0220936349289846, "grad_norm": 1.963465690612793, "learning_rate": 4.679276988246007e-06, "loss": 0.9263, "step": 1943 }, { "epoch": 1.0226196738558653, "grad_norm": 2.035710334777832, "learning_rate": 4.678936499589099e-06, "loss": 0.9576, "step": 1944 }, { "epoch": 1.023145712782746, "grad_norm": 2.036870002746582, "learning_rate": 4.678595842693316e-06, "loss": 0.9179, "step": 1945 }, { "epoch": 1.0236717517096265, "grad_norm": 2.0613720417022705, "learning_rate": 4.678255017584961e-06, "loss": 0.9778, "step": 1946 }, { "epoch": 1.0241977906365072, "grad_norm": 2.149697780609131, "learning_rate": 4.67791402429035e-06, "loss": 0.9222, "step": 1947 }, { "epoch": 1.0247238295633876, "grad_norm": 1.9258294105529785, "learning_rate": 4.677572862835811e-06, "loss": 0.903, "step": 1948 }, { "epoch": 1.0252498684902682, "grad_norm": 1.9599074125289917, "learning_rate": 4.677231533247687e-06, "loss": 0.9484, "step": 1949 }, { "epoch": 1.0257759074171489, "grad_norm": 1.917982816696167, "learning_rate": 4.67689003555233e-06, "loss": 0.8994, "step": 1950 }, { "epoch": 1.0263019463440295, "grad_norm": 2.0358879566192627, "learning_rate": 4.67654836977611e-06, "loss": 0.9367, "step": 1951 }, { "epoch": 1.0268279852709101, "grad_norm": 2.0364022254943848, "learning_rate": 4.6762065359454056e-06, "loss": 0.9272, "step": 1952 }, { "epoch": 1.0273540241977905, "grad_norm": 1.9820277690887451, "learning_rate": 4.675864534086612e-06, "loss": 0.9586, "step": 1953 }, { "epoch": 1.0278800631246712, "grad_norm": 2.1438517570495605, "learning_rate": 4.675522364226135e-06, "loss": 1.0007, "step": 1954 }, { "epoch": 1.0284061020515518, "grad_norm": 1.9437652826309204, "learning_rate": 4.675180026390393e-06, "loss": 0.9622, "step": 1955 }, { "epoch": 1.0289321409784324, "grad_norm": 2.197209596633911, "learning_rate": 4.67483752060582e-06, "loss": 0.9292, "step": 1956 }, { "epoch": 1.029458179905313, "grad_norm": 2.0781588554382324, "learning_rate": 4.674494846898861e-06, "loss": 0.9032, "step": 1957 }, { "epoch": 1.0299842188321935, "grad_norm": 2.0351462364196777, "learning_rate": 4.674152005295974e-06, "loss": 0.9644, "step": 1958 }, { "epoch": 1.0305102577590741, "grad_norm": 2.1569135189056396, "learning_rate": 4.67380899582363e-06, "loss": 0.9658, "step": 1959 }, { "epoch": 1.0310362966859548, "grad_norm": 2.0446736812591553, "learning_rate": 4.6734658185083135e-06, "loss": 0.8667, "step": 1960 }, { "epoch": 1.0315623356128354, "grad_norm": 2.2169229984283447, "learning_rate": 4.673122473376522e-06, "loss": 0.917, "step": 1961 }, { "epoch": 1.032088374539716, "grad_norm": 2.000225067138672, "learning_rate": 4.6727789604547655e-06, "loss": 0.8952, "step": 1962 }, { "epoch": 1.0326144134665964, "grad_norm": 1.986311435699463, "learning_rate": 4.672435279769567e-06, "loss": 0.9558, "step": 1963 }, { "epoch": 1.033140452393477, "grad_norm": 2.138862371444702, "learning_rate": 4.672091431347463e-06, "loss": 0.9604, "step": 1964 }, { "epoch": 1.0336664913203577, "grad_norm": 2.291006565093994, "learning_rate": 4.671747415215002e-06, "loss": 1.0001, "step": 1965 }, { "epoch": 1.0341925302472383, "grad_norm": 2.2395827770233154, "learning_rate": 4.671403231398747e-06, "loss": 0.9705, "step": 1966 }, { "epoch": 1.034718569174119, "grad_norm": 1.9645211696624756, "learning_rate": 4.671058879925271e-06, "loss": 0.9185, "step": 1967 }, { "epoch": 1.0352446081009994, "grad_norm": 2.1107375621795654, "learning_rate": 4.670714360821165e-06, "loss": 0.8937, "step": 1968 }, { "epoch": 1.03577064702788, "grad_norm": 2.1025230884552, "learning_rate": 4.670369674113026e-06, "loss": 0.9537, "step": 1969 }, { "epoch": 1.0362966859547607, "grad_norm": 2.0165746212005615, "learning_rate": 4.67002481982747e-06, "loss": 0.9679, "step": 1970 }, { "epoch": 1.0368227248816413, "grad_norm": 2.054098606109619, "learning_rate": 4.669679797991123e-06, "loss": 0.9748, "step": 1971 }, { "epoch": 1.037348763808522, "grad_norm": 2.160297393798828, "learning_rate": 4.669334608630627e-06, "loss": 0.9211, "step": 1972 }, { "epoch": 1.0378748027354023, "grad_norm": 2.1559903621673584, "learning_rate": 4.668989251772631e-06, "loss": 0.8847, "step": 1973 }, { "epoch": 1.038400841662283, "grad_norm": 1.9618030786514282, "learning_rate": 4.6686437274438025e-06, "loss": 0.9079, "step": 1974 }, { "epoch": 1.0389268805891636, "grad_norm": 1.9198315143585205, "learning_rate": 4.668298035670818e-06, "loss": 0.9138, "step": 1975 }, { "epoch": 1.0394529195160442, "grad_norm": 2.126873731613159, "learning_rate": 4.667952176480373e-06, "loss": 0.9888, "step": 1976 }, { "epoch": 1.0399789584429249, "grad_norm": 1.9329999685287476, "learning_rate": 4.667606149899168e-06, "loss": 0.9022, "step": 1977 }, { "epoch": 1.0405049973698053, "grad_norm": 2.1013362407684326, "learning_rate": 4.667259955953921e-06, "loss": 0.9787, "step": 1978 }, { "epoch": 1.041031036296686, "grad_norm": 1.891451358795166, "learning_rate": 4.666913594671363e-06, "loss": 0.9121, "step": 1979 }, { "epoch": 1.0415570752235666, "grad_norm": 2.084703207015991, "learning_rate": 4.666567066078237e-06, "loss": 0.9561, "step": 1980 }, { "epoch": 1.0420831141504472, "grad_norm": 1.9543099403381348, "learning_rate": 4.666220370201298e-06, "loss": 0.9344, "step": 1981 }, { "epoch": 1.0426091530773278, "grad_norm": 1.93330717086792, "learning_rate": 4.665873507067316e-06, "loss": 0.9802, "step": 1982 }, { "epoch": 1.0431351920042082, "grad_norm": 2.096243381500244, "learning_rate": 4.665526476703072e-06, "loss": 1.0, "step": 1983 }, { "epoch": 1.0436612309310889, "grad_norm": 2.0133352279663086, "learning_rate": 4.66517927913536e-06, "loss": 1.002, "step": 1984 }, { "epoch": 1.0441872698579695, "grad_norm": 1.921587586402893, "learning_rate": 4.6648319143909906e-06, "loss": 0.8968, "step": 1985 }, { "epoch": 1.0447133087848501, "grad_norm": 2.086876630783081, "learning_rate": 4.664484382496781e-06, "loss": 0.9112, "step": 1986 }, { "epoch": 1.0452393477117308, "grad_norm": 2.039179563522339, "learning_rate": 4.6641366834795665e-06, "loss": 0.9897, "step": 1987 }, { "epoch": 1.0457653866386112, "grad_norm": 1.9592386484146118, "learning_rate": 4.663788817366192e-06, "loss": 0.9364, "step": 1988 }, { "epoch": 1.0462914255654918, "grad_norm": 2.0162618160247803, "learning_rate": 4.66344078418352e-06, "loss": 0.9626, "step": 1989 }, { "epoch": 1.0468174644923725, "grad_norm": 1.9067870378494263, "learning_rate": 4.663092583958419e-06, "loss": 0.9235, "step": 1990 }, { "epoch": 1.047343503419253, "grad_norm": 2.151963710784912, "learning_rate": 4.662744216717775e-06, "loss": 0.9567, "step": 1991 }, { "epoch": 1.0478695423461337, "grad_norm": 2.244503974914551, "learning_rate": 4.662395682488487e-06, "loss": 0.9824, "step": 1992 }, { "epoch": 1.0483955812730141, "grad_norm": 2.0067191123962402, "learning_rate": 4.662046981297465e-06, "loss": 0.952, "step": 1993 }, { "epoch": 1.0489216201998948, "grad_norm": 2.012934923171997, "learning_rate": 4.6616981131716335e-06, "loss": 0.9217, "step": 1994 }, { "epoch": 1.0494476591267754, "grad_norm": 1.9931223392486572, "learning_rate": 4.6613490781379276e-06, "loss": 1.0257, "step": 1995 }, { "epoch": 1.049973698053656, "grad_norm": 1.9512076377868652, "learning_rate": 4.660999876223299e-06, "loss": 0.9377, "step": 1996 }, { "epoch": 1.0504997369805364, "grad_norm": 2.249743700027466, "learning_rate": 4.660650507454708e-06, "loss": 0.9249, "step": 1997 }, { "epoch": 1.051025775907417, "grad_norm": 2.1437506675720215, "learning_rate": 4.660300971859131e-06, "loss": 0.9217, "step": 1998 }, { "epoch": 1.0515518148342977, "grad_norm": 2.184690475463867, "learning_rate": 4.659951269463555e-06, "loss": 0.9518, "step": 1999 }, { "epoch": 1.0520778537611783, "grad_norm": 2.054569959640503, "learning_rate": 4.659601400294984e-06, "loss": 0.9346, "step": 2000 }, { "epoch": 1.052603892688059, "grad_norm": 2.049569845199585, "learning_rate": 4.6592513643804285e-06, "loss": 0.9472, "step": 2001 }, { "epoch": 1.0531299316149396, "grad_norm": 2.139266014099121, "learning_rate": 4.658901161746917e-06, "loss": 0.9687, "step": 2002 }, { "epoch": 1.05365597054182, "grad_norm": 2.0240185260772705, "learning_rate": 4.658550792421488e-06, "loss": 0.9403, "step": 2003 }, { "epoch": 1.0541820094687007, "grad_norm": 1.9521421194076538, "learning_rate": 4.6582002564311955e-06, "loss": 0.9716, "step": 2004 }, { "epoch": 1.0547080483955813, "grad_norm": 2.2795963287353516, "learning_rate": 4.657849553803104e-06, "loss": 0.9747, "step": 2005 }, { "epoch": 1.055234087322462, "grad_norm": 2.0353355407714844, "learning_rate": 4.657498684564292e-06, "loss": 0.9618, "step": 2006 }, { "epoch": 1.0557601262493423, "grad_norm": 2.4355268478393555, "learning_rate": 4.657147648741851e-06, "loss": 0.9846, "step": 2007 }, { "epoch": 1.056286165176223, "grad_norm": 2.16408371925354, "learning_rate": 4.656796446362883e-06, "loss": 0.9277, "step": 2008 }, { "epoch": 1.0568122041031036, "grad_norm": 2.2572405338287354, "learning_rate": 4.6564450774545085e-06, "loss": 1.0186, "step": 2009 }, { "epoch": 1.0573382430299842, "grad_norm": 2.01306414604187, "learning_rate": 4.656093542043854e-06, "loss": 0.9083, "step": 2010 }, { "epoch": 1.0578642819568649, "grad_norm": 2.0974233150482178, "learning_rate": 4.6557418401580634e-06, "loss": 0.9568, "step": 2011 }, { "epoch": 1.0583903208837453, "grad_norm": 2.685208797454834, "learning_rate": 4.655389971824292e-06, "loss": 0.9709, "step": 2012 }, { "epoch": 1.058916359810626, "grad_norm": 1.9793546199798584, "learning_rate": 4.6550379370697085e-06, "loss": 0.9482, "step": 2013 }, { "epoch": 1.0594423987375066, "grad_norm": 2.092241048812866, "learning_rate": 4.654685735921493e-06, "loss": 0.9631, "step": 2014 }, { "epoch": 1.0599684376643872, "grad_norm": 2.060007095336914, "learning_rate": 4.65433336840684e-06, "loss": 0.9416, "step": 2015 }, { "epoch": 1.0604944765912678, "grad_norm": 2.1290628910064697, "learning_rate": 4.653980834552956e-06, "loss": 0.9629, "step": 2016 }, { "epoch": 1.0610205155181482, "grad_norm": 2.187473773956299, "learning_rate": 4.6536281343870614e-06, "loss": 0.9476, "step": 2017 }, { "epoch": 1.0615465544450289, "grad_norm": 2.041978359222412, "learning_rate": 4.653275267936388e-06, "loss": 0.9479, "step": 2018 }, { "epoch": 1.0620725933719095, "grad_norm": 2.0300915241241455, "learning_rate": 4.652922235228182e-06, "loss": 0.9654, "step": 2019 }, { "epoch": 1.0625986322987901, "grad_norm": 1.9150184392929077, "learning_rate": 4.6525690362897006e-06, "loss": 0.9248, "step": 2020 }, { "epoch": 1.0631246712256708, "grad_norm": 1.8847254514694214, "learning_rate": 4.6522156711482146e-06, "loss": 0.9171, "step": 2021 }, { "epoch": 1.0636507101525512, "grad_norm": 2.097339630126953, "learning_rate": 4.651862139831008e-06, "loss": 0.9976, "step": 2022 }, { "epoch": 1.0641767490794318, "grad_norm": 2.09112286567688, "learning_rate": 4.651508442365379e-06, "loss": 0.9473, "step": 2023 }, { "epoch": 1.0647027880063125, "grad_norm": 2.2583703994750977, "learning_rate": 4.651154578778636e-06, "loss": 0.9849, "step": 2024 }, { "epoch": 1.065228826933193, "grad_norm": 2.1210403442382812, "learning_rate": 4.650800549098101e-06, "loss": 0.9396, "step": 2025 }, { "epoch": 1.0657548658600737, "grad_norm": 2.198775291442871, "learning_rate": 4.650446353351109e-06, "loss": 0.9, "step": 2026 }, { "epoch": 1.0662809047869541, "grad_norm": 2.0861377716064453, "learning_rate": 4.650091991565009e-06, "loss": 0.9248, "step": 2027 }, { "epoch": 1.0668069437138348, "grad_norm": 2.0532140731811523, "learning_rate": 4.649737463767161e-06, "loss": 0.8827, "step": 2028 }, { "epoch": 1.0673329826407154, "grad_norm": 2.1287131309509277, "learning_rate": 4.649382769984938e-06, "loss": 0.9748, "step": 2029 }, { "epoch": 1.067859021567596, "grad_norm": 2.07169771194458, "learning_rate": 4.649027910245728e-06, "loss": 0.9893, "step": 2030 }, { "epoch": 1.0683850604944767, "grad_norm": 2.0845425128936768, "learning_rate": 4.648672884576929e-06, "loss": 0.9565, "step": 2031 }, { "epoch": 1.068911099421357, "grad_norm": 2.153416395187378, "learning_rate": 4.648317693005955e-06, "loss": 0.9745, "step": 2032 }, { "epoch": 1.0694371383482377, "grad_norm": 2.0058255195617676, "learning_rate": 4.647962335560228e-06, "loss": 0.9396, "step": 2033 }, { "epoch": 1.0699631772751184, "grad_norm": 2.191807270050049, "learning_rate": 4.647606812267187e-06, "loss": 0.9702, "step": 2034 }, { "epoch": 1.070489216201999, "grad_norm": 2.1113696098327637, "learning_rate": 4.647251123154283e-06, "loss": 0.9245, "step": 2035 }, { "epoch": 1.0710152551288796, "grad_norm": 2.034909248352051, "learning_rate": 4.646895268248979e-06, "loss": 0.9886, "step": 2036 }, { "epoch": 1.07154129405576, "grad_norm": 1.967405080795288, "learning_rate": 4.646539247578751e-06, "loss": 0.9364, "step": 2037 }, { "epoch": 1.0720673329826407, "grad_norm": 2.005378484725952, "learning_rate": 4.646183061171087e-06, "loss": 0.9065, "step": 2038 }, { "epoch": 1.0725933719095213, "grad_norm": 2.1951041221618652, "learning_rate": 4.645826709053489e-06, "loss": 0.9649, "step": 2039 }, { "epoch": 1.073119410836402, "grad_norm": 2.10109543800354, "learning_rate": 4.6454701912534736e-06, "loss": 0.9455, "step": 2040 }, { "epoch": 1.0736454497632826, "grad_norm": 2.1643972396850586, "learning_rate": 4.6451135077985655e-06, "loss": 0.9892, "step": 2041 }, { "epoch": 1.074171488690163, "grad_norm": 2.028247833251953, "learning_rate": 4.644756658716305e-06, "loss": 0.9791, "step": 2042 }, { "epoch": 1.0746975276170436, "grad_norm": 2.0592141151428223, "learning_rate": 4.644399644034246e-06, "loss": 0.9557, "step": 2043 }, { "epoch": 1.0752235665439243, "grad_norm": 1.9696271419525146, "learning_rate": 4.644042463779954e-06, "loss": 0.9328, "step": 2044 }, { "epoch": 1.075749605470805, "grad_norm": 2.300461530685425, "learning_rate": 4.6436851179810075e-06, "loss": 0.947, "step": 2045 }, { "epoch": 1.0762756443976855, "grad_norm": 2.0531888008117676, "learning_rate": 4.643327606664996e-06, "loss": 0.939, "step": 2046 }, { "epoch": 1.076801683324566, "grad_norm": 2.0392346382141113, "learning_rate": 4.642969929859526e-06, "loss": 0.9779, "step": 2047 }, { "epoch": 1.0773277222514466, "grad_norm": 2.2741591930389404, "learning_rate": 4.642612087592212e-06, "loss": 0.9658, "step": 2048 }, { "epoch": 1.0778537611783272, "grad_norm": 2.1253297328948975, "learning_rate": 4.642254079890684e-06, "loss": 1.0048, "step": 2049 }, { "epoch": 1.0783798001052078, "grad_norm": 2.121539831161499, "learning_rate": 4.641895906782586e-06, "loss": 0.9418, "step": 2050 }, { "epoch": 1.0789058390320885, "grad_norm": 1.9954118728637695, "learning_rate": 4.641537568295572e-06, "loss": 0.9201, "step": 2051 }, { "epoch": 1.0794318779589689, "grad_norm": 2.434898853302002, "learning_rate": 4.64117906445731e-06, "loss": 1.0158, "step": 2052 }, { "epoch": 1.0799579168858495, "grad_norm": 2.2084853649139404, "learning_rate": 4.640820395295479e-06, "loss": 0.929, "step": 2053 }, { "epoch": 1.0804839558127302, "grad_norm": 2.0358357429504395, "learning_rate": 4.640461560837774e-06, "loss": 0.9432, "step": 2054 }, { "epoch": 1.0810099947396108, "grad_norm": 2.025202751159668, "learning_rate": 4.6401025611119025e-06, "loss": 0.9768, "step": 2055 }, { "epoch": 1.0815360336664912, "grad_norm": 1.9872761964797974, "learning_rate": 4.6397433961455805e-06, "loss": 0.9303, "step": 2056 }, { "epoch": 1.0820620725933718, "grad_norm": 2.1098103523254395, "learning_rate": 4.63938406596654e-06, "loss": 0.9847, "step": 2057 }, { "epoch": 1.0825881115202525, "grad_norm": 2.0892457962036133, "learning_rate": 4.6390245706025275e-06, "loss": 0.9349, "step": 2058 }, { "epoch": 1.083114150447133, "grad_norm": 2.0502517223358154, "learning_rate": 4.638664910081298e-06, "loss": 0.9366, "step": 2059 }, { "epoch": 1.0836401893740137, "grad_norm": 1.979395866394043, "learning_rate": 4.6383050844306234e-06, "loss": 0.9782, "step": 2060 }, { "epoch": 1.0841662283008944, "grad_norm": 1.9518492221832275, "learning_rate": 4.6379450936782854e-06, "loss": 0.9456, "step": 2061 }, { "epoch": 1.0846922672277748, "grad_norm": 2.1265623569488525, "learning_rate": 4.637584937852079e-06, "loss": 1.0054, "step": 2062 }, { "epoch": 1.0852183061546554, "grad_norm": 1.8959511518478394, "learning_rate": 4.637224616979813e-06, "loss": 0.9175, "step": 2063 }, { "epoch": 1.085744345081536, "grad_norm": 1.9260917901992798, "learning_rate": 4.636864131089307e-06, "loss": 0.942, "step": 2064 }, { "epoch": 1.0862703840084167, "grad_norm": 2.039825677871704, "learning_rate": 4.636503480208397e-06, "loss": 0.9008, "step": 2065 }, { "epoch": 1.086796422935297, "grad_norm": 1.9619112014770508, "learning_rate": 4.6361426643649265e-06, "loss": 0.9189, "step": 2066 }, { "epoch": 1.0873224618621777, "grad_norm": 1.9015494585037231, "learning_rate": 4.635781683586758e-06, "loss": 0.8762, "step": 2067 }, { "epoch": 1.0878485007890584, "grad_norm": 2.2694029808044434, "learning_rate": 4.6354205379017606e-06, "loss": 0.9519, "step": 2068 }, { "epoch": 1.088374539715939, "grad_norm": 1.8958660364151, "learning_rate": 4.635059227337819e-06, "loss": 0.972, "step": 2069 }, { "epoch": 1.0889005786428196, "grad_norm": 2.080143451690674, "learning_rate": 4.634697751922832e-06, "loss": 0.9156, "step": 2070 }, { "epoch": 1.0894266175697003, "grad_norm": 2.0264220237731934, "learning_rate": 4.634336111684709e-06, "loss": 0.9619, "step": 2071 }, { "epoch": 1.0899526564965807, "grad_norm": 2.071171998977661, "learning_rate": 4.633974306651373e-06, "loss": 0.9035, "step": 2072 }, { "epoch": 1.0904786954234613, "grad_norm": 1.926443338394165, "learning_rate": 4.633612336850759e-06, "loss": 0.9724, "step": 2073 }, { "epoch": 1.091004734350342, "grad_norm": 2.2006890773773193, "learning_rate": 4.633250202310815e-06, "loss": 0.941, "step": 2074 }, { "epoch": 1.0915307732772226, "grad_norm": 1.9194120168685913, "learning_rate": 4.632887903059503e-06, "loss": 0.9307, "step": 2075 }, { "epoch": 1.092056812204103, "grad_norm": 2.0963993072509766, "learning_rate": 4.632525439124796e-06, "loss": 0.9356, "step": 2076 }, { "epoch": 1.0925828511309836, "grad_norm": 2.040682792663574, "learning_rate": 4.63216281053468e-06, "loss": 0.968, "step": 2077 }, { "epoch": 1.0931088900578643, "grad_norm": 2.061224937438965, "learning_rate": 4.631800017317154e-06, "loss": 0.9381, "step": 2078 }, { "epoch": 1.093634928984745, "grad_norm": 2.0894832611083984, "learning_rate": 4.6314370595002315e-06, "loss": 0.9246, "step": 2079 }, { "epoch": 1.0941609679116255, "grad_norm": 2.0694963932037354, "learning_rate": 4.631073937111936e-06, "loss": 0.9059, "step": 2080 }, { "epoch": 1.094687006838506, "grad_norm": 2.2278406620025635, "learning_rate": 4.6307106501803035e-06, "loss": 0.9149, "step": 2081 }, { "epoch": 1.0952130457653866, "grad_norm": 2.0981225967407227, "learning_rate": 4.630347198733385e-06, "loss": 0.9965, "step": 2082 }, { "epoch": 1.0957390846922672, "grad_norm": 2.153197765350342, "learning_rate": 4.629983582799243e-06, "loss": 0.9471, "step": 2083 }, { "epoch": 1.0962651236191479, "grad_norm": 2.0150113105773926, "learning_rate": 4.629619802405953e-06, "loss": 0.9694, "step": 2084 }, { "epoch": 1.0967911625460285, "grad_norm": 2.080817222595215, "learning_rate": 4.6292558575816035e-06, "loss": 0.9275, "step": 2085 }, { "epoch": 1.097317201472909, "grad_norm": 2.024019956588745, "learning_rate": 4.6288917483542935e-06, "loss": 0.9466, "step": 2086 }, { "epoch": 1.0978432403997895, "grad_norm": 1.9700965881347656, "learning_rate": 4.628527474752138e-06, "loss": 0.9202, "step": 2087 }, { "epoch": 1.0983692793266702, "grad_norm": 1.9840142726898193, "learning_rate": 4.628163036803263e-06, "loss": 0.9836, "step": 2088 }, { "epoch": 1.0988953182535508, "grad_norm": 2.0773203372955322, "learning_rate": 4.627798434535807e-06, "loss": 0.9819, "step": 2089 }, { "epoch": 1.0994213571804314, "grad_norm": 2.5213847160339355, "learning_rate": 4.627433667977921e-06, "loss": 0.917, "step": 2090 }, { "epoch": 1.0999473961073118, "grad_norm": 2.071960687637329, "learning_rate": 4.627068737157769e-06, "loss": 0.9579, "step": 2091 }, { "epoch": 1.1004734350341925, "grad_norm": 2.091217517852783, "learning_rate": 4.626703642103528e-06, "loss": 0.9201, "step": 2092 }, { "epoch": 1.1009994739610731, "grad_norm": 2.1105289459228516, "learning_rate": 4.62633838284339e-06, "loss": 0.9008, "step": 2093 }, { "epoch": 1.1015255128879538, "grad_norm": 2.0329155921936035, "learning_rate": 4.625972959405553e-06, "loss": 0.9629, "step": 2094 }, { "epoch": 1.1020515518148344, "grad_norm": 2.2218475341796875, "learning_rate": 4.625607371818235e-06, "loss": 0.9771, "step": 2095 }, { "epoch": 1.1025775907417148, "grad_norm": 1.9372378587722778, "learning_rate": 4.625241620109662e-06, "loss": 0.8767, "step": 2096 }, { "epoch": 1.1031036296685954, "grad_norm": 2.11558198928833, "learning_rate": 4.624875704308075e-06, "loss": 0.9399, "step": 2097 }, { "epoch": 1.103629668595476, "grad_norm": 2.135223627090454, "learning_rate": 4.624509624441726e-06, "loss": 0.9942, "step": 2098 }, { "epoch": 1.1041557075223567, "grad_norm": 1.995465636253357, "learning_rate": 4.624143380538881e-06, "loss": 0.9041, "step": 2099 }, { "epoch": 1.1046817464492373, "grad_norm": 2.067887783050537, "learning_rate": 4.6237769726278195e-06, "loss": 0.985, "step": 2100 }, { "epoch": 1.1052077853761177, "grad_norm": 2.1130104064941406, "learning_rate": 4.623410400736831e-06, "loss": 0.9802, "step": 2101 }, { "epoch": 1.1057338243029984, "grad_norm": 2.0574796199798584, "learning_rate": 4.623043664894219e-06, "loss": 0.9311, "step": 2102 }, { "epoch": 1.106259863229879, "grad_norm": 2.0544402599334717, "learning_rate": 4.6226767651283e-06, "loss": 0.9446, "step": 2103 }, { "epoch": 1.1067859021567596, "grad_norm": 2.1335291862487793, "learning_rate": 4.622309701467403e-06, "loss": 0.9573, "step": 2104 }, { "epoch": 1.1073119410836403, "grad_norm": 2.5629992485046387, "learning_rate": 4.62194247393987e-06, "loss": 0.9915, "step": 2105 }, { "epoch": 1.1078379800105207, "grad_norm": 2.0296285152435303, "learning_rate": 4.6215750825740545e-06, "loss": 0.9257, "step": 2106 }, { "epoch": 1.1083640189374013, "grad_norm": 2.0325889587402344, "learning_rate": 4.621207527398324e-06, "loss": 0.8907, "step": 2107 }, { "epoch": 1.108890057864282, "grad_norm": 2.0547173023223877, "learning_rate": 4.620839808441056e-06, "loss": 0.9733, "step": 2108 }, { "epoch": 1.1094160967911626, "grad_norm": 1.995935082435608, "learning_rate": 4.620471925730645e-06, "loss": 0.9106, "step": 2109 }, { "epoch": 1.1099421357180432, "grad_norm": 2.371973991394043, "learning_rate": 4.6201038792954945e-06, "loss": 0.9309, "step": 2110 }, { "epoch": 1.1104681746449236, "grad_norm": 2.0574443340301514, "learning_rate": 4.6197356691640225e-06, "loss": 0.9848, "step": 2111 }, { "epoch": 1.1109942135718043, "grad_norm": 2.0146894454956055, "learning_rate": 4.6193672953646585e-06, "loss": 0.973, "step": 2112 }, { "epoch": 1.111520252498685, "grad_norm": 2.1383960247039795, "learning_rate": 4.618998757925846e-06, "loss": 0.9147, "step": 2113 }, { "epoch": 1.1120462914255655, "grad_norm": 2.035088539123535, "learning_rate": 4.618630056876039e-06, "loss": 0.9245, "step": 2114 }, { "epoch": 1.1125723303524462, "grad_norm": 2.038917064666748, "learning_rate": 4.618261192243706e-06, "loss": 1.0075, "step": 2115 }, { "epoch": 1.1130983692793266, "grad_norm": 1.9224687814712524, "learning_rate": 4.617892164057328e-06, "loss": 0.9209, "step": 2116 }, { "epoch": 1.1136244082062072, "grad_norm": 1.9721827507019043, "learning_rate": 4.617522972345398e-06, "loss": 0.8663, "step": 2117 }, { "epoch": 1.1141504471330879, "grad_norm": 1.9861339330673218, "learning_rate": 4.6171536171364225e-06, "loss": 0.9306, "step": 2118 }, { "epoch": 1.1146764860599685, "grad_norm": 2.097487688064575, "learning_rate": 4.616784098458918e-06, "loss": 0.9262, "step": 2119 }, { "epoch": 1.1152025249868491, "grad_norm": 2.0926623344421387, "learning_rate": 4.616414416341418e-06, "loss": 0.945, "step": 2120 }, { "epoch": 1.1157285639137295, "grad_norm": 2.019235849380493, "learning_rate": 4.616044570812465e-06, "loss": 0.9446, "step": 2121 }, { "epoch": 1.1162546028406102, "grad_norm": 2.095885992050171, "learning_rate": 4.615674561900615e-06, "loss": 0.9503, "step": 2122 }, { "epoch": 1.1167806417674908, "grad_norm": 2.0200493335723877, "learning_rate": 4.615304389634437e-06, "loss": 0.9141, "step": 2123 }, { "epoch": 1.1173066806943714, "grad_norm": 2.2150073051452637, "learning_rate": 4.614934054042514e-06, "loss": 0.945, "step": 2124 }, { "epoch": 1.1178327196212519, "grad_norm": 2.169135570526123, "learning_rate": 4.614563555153437e-06, "loss": 1.0375, "step": 2125 }, { "epoch": 1.1183587585481325, "grad_norm": 2.796290874481201, "learning_rate": 4.614192892995817e-06, "loss": 0.8948, "step": 2126 }, { "epoch": 1.1188847974750131, "grad_norm": 2.06923508644104, "learning_rate": 4.613822067598269e-06, "loss": 0.9306, "step": 2127 }, { "epoch": 1.1194108364018938, "grad_norm": 2.1803252696990967, "learning_rate": 4.613451078989428e-06, "loss": 0.9354, "step": 2128 }, { "epoch": 1.1199368753287744, "grad_norm": 1.9848898649215698, "learning_rate": 4.6130799271979385e-06, "loss": 0.9225, "step": 2129 }, { "epoch": 1.120462914255655, "grad_norm": 2.0420773029327393, "learning_rate": 4.612708612252456e-06, "loss": 0.958, "step": 2130 }, { "epoch": 1.1209889531825354, "grad_norm": 2.1113882064819336, "learning_rate": 4.6123371341816506e-06, "loss": 0.9673, "step": 2131 }, { "epoch": 1.121514992109416, "grad_norm": 2.2430622577667236, "learning_rate": 4.611965493014206e-06, "loss": 0.922, "step": 2132 }, { "epoch": 1.1220410310362967, "grad_norm": 2.1610355377197266, "learning_rate": 4.611593688778816e-06, "loss": 0.9566, "step": 2133 }, { "epoch": 1.1225670699631773, "grad_norm": 2.0127573013305664, "learning_rate": 4.611221721504189e-06, "loss": 0.9371, "step": 2134 }, { "epoch": 1.1230931088900578, "grad_norm": 2.0172536373138428, "learning_rate": 4.6108495912190435e-06, "loss": 0.9495, "step": 2135 }, { "epoch": 1.1236191478169384, "grad_norm": 2.1512820720672607, "learning_rate": 4.610477297952114e-06, "loss": 0.9608, "step": 2136 }, { "epoch": 1.124145186743819, "grad_norm": 1.8832893371582031, "learning_rate": 4.610104841732145e-06, "loss": 0.9251, "step": 2137 }, { "epoch": 1.1246712256706997, "grad_norm": 1.8792293071746826, "learning_rate": 4.6097322225878945e-06, "loss": 0.9487, "step": 2138 }, { "epoch": 1.1251972645975803, "grad_norm": 2.035464286804199, "learning_rate": 4.609359440548133e-06, "loss": 0.9327, "step": 2139 }, { "epoch": 1.125723303524461, "grad_norm": 2.070833683013916, "learning_rate": 4.6089864956416445e-06, "loss": 0.9014, "step": 2140 }, { "epoch": 1.1262493424513413, "grad_norm": 2.0978243350982666, "learning_rate": 4.608613387897223e-06, "loss": 0.9477, "step": 2141 }, { "epoch": 1.126775381378222, "grad_norm": 2.3318967819213867, "learning_rate": 4.608240117343677e-06, "loss": 0.9646, "step": 2142 }, { "epoch": 1.1273014203051026, "grad_norm": 2.1404736042022705, "learning_rate": 4.6078666840098275e-06, "loss": 0.9688, "step": 2143 }, { "epoch": 1.1278274592319832, "grad_norm": 2.244619846343994, "learning_rate": 4.607493087924508e-06, "loss": 0.9718, "step": 2144 }, { "epoch": 1.1283534981588637, "grad_norm": 2.287501335144043, "learning_rate": 4.607119329116565e-06, "loss": 0.9263, "step": 2145 }, { "epoch": 1.1288795370857443, "grad_norm": 1.9234886169433594, "learning_rate": 4.606745407614856e-06, "loss": 0.8622, "step": 2146 }, { "epoch": 1.129405576012625, "grad_norm": 2.1752331256866455, "learning_rate": 4.606371323448252e-06, "loss": 1.0183, "step": 2147 }, { "epoch": 1.1299316149395056, "grad_norm": 2.0048513412475586, "learning_rate": 4.605997076645638e-06, "loss": 0.9563, "step": 2148 }, { "epoch": 1.1304576538663862, "grad_norm": 2.272096633911133, "learning_rate": 4.605622667235907e-06, "loss": 1.0203, "step": 2149 }, { "epoch": 1.1309836927932668, "grad_norm": 2.023000717163086, "learning_rate": 4.6052480952479715e-06, "loss": 0.9408, "step": 2150 }, { "epoch": 1.1315097317201472, "grad_norm": 2.0389719009399414, "learning_rate": 4.604873360710751e-06, "loss": 0.9345, "step": 2151 }, { "epoch": 1.1320357706470279, "grad_norm": 2.1599810123443604, "learning_rate": 4.604498463653179e-06, "loss": 0.9609, "step": 2152 }, { "epoch": 1.1325618095739085, "grad_norm": 2.0829246044158936, "learning_rate": 4.604123404104202e-06, "loss": 0.9489, "step": 2153 }, { "epoch": 1.1330878485007891, "grad_norm": 2.0154478549957275, "learning_rate": 4.603748182092779e-06, "loss": 0.9404, "step": 2154 }, { "epoch": 1.1336138874276696, "grad_norm": 1.971787452697754, "learning_rate": 4.603372797647882e-06, "loss": 0.889, "step": 2155 }, { "epoch": 1.1341399263545502, "grad_norm": 2.367708206176758, "learning_rate": 4.602997250798494e-06, "loss": 0.9641, "step": 2156 }, { "epoch": 1.1346659652814308, "grad_norm": 1.9592807292938232, "learning_rate": 4.602621541573613e-06, "loss": 0.9394, "step": 2157 }, { "epoch": 1.1351920042083115, "grad_norm": 2.111332416534424, "learning_rate": 4.602245670002246e-06, "loss": 0.9458, "step": 2158 }, { "epoch": 1.135718043135192, "grad_norm": 2.0517547130584717, "learning_rate": 4.601869636113416e-06, "loss": 0.9487, "step": 2159 }, { "epoch": 1.1362440820620725, "grad_norm": 2.054208993911743, "learning_rate": 4.601493439936156e-06, "loss": 0.9189, "step": 2160 }, { "epoch": 1.1367701209889531, "grad_norm": 2.1155214309692383, "learning_rate": 4.601117081499515e-06, "loss": 0.9308, "step": 2161 }, { "epoch": 1.1372961599158338, "grad_norm": 2.1389827728271484, "learning_rate": 4.600740560832551e-06, "loss": 0.9746, "step": 2162 }, { "epoch": 1.1378221988427144, "grad_norm": 1.9715708494186401, "learning_rate": 4.600363877964334e-06, "loss": 0.9658, "step": 2163 }, { "epoch": 1.138348237769595, "grad_norm": 2.169759750366211, "learning_rate": 4.599987032923949e-06, "loss": 0.9543, "step": 2164 }, { "epoch": 1.1388742766964755, "grad_norm": 1.9511892795562744, "learning_rate": 4.599610025740494e-06, "loss": 0.972, "step": 2165 }, { "epoch": 1.139400315623356, "grad_norm": 2.095147132873535, "learning_rate": 4.599232856443078e-06, "loss": 0.9247, "step": 2166 }, { "epoch": 1.1399263545502367, "grad_norm": 2.0037872791290283, "learning_rate": 4.5988555250608225e-06, "loss": 0.9414, "step": 2167 }, { "epoch": 1.1404523934771174, "grad_norm": 1.9899228811264038, "learning_rate": 4.598478031622862e-06, "loss": 0.9716, "step": 2168 }, { "epoch": 1.140978432403998, "grad_norm": 2.0828182697296143, "learning_rate": 4.598100376158342e-06, "loss": 0.9108, "step": 2169 }, { "epoch": 1.1415044713308784, "grad_norm": 2.2328367233276367, "learning_rate": 4.597722558696424e-06, "loss": 0.9618, "step": 2170 }, { "epoch": 1.142030510257759, "grad_norm": 1.9552658796310425, "learning_rate": 4.5973445792662776e-06, "loss": 0.9713, "step": 2171 }, { "epoch": 1.1425565491846397, "grad_norm": 1.9701144695281982, "learning_rate": 4.596966437897089e-06, "loss": 0.9383, "step": 2172 }, { "epoch": 1.1430825881115203, "grad_norm": 2.198983907699585, "learning_rate": 4.596588134618054e-06, "loss": 0.9355, "step": 2173 }, { "epoch": 1.143608627038401, "grad_norm": 2.20420503616333, "learning_rate": 4.596209669458383e-06, "loss": 0.9992, "step": 2174 }, { "epoch": 1.1441346659652813, "grad_norm": 2.103262186050415, "learning_rate": 4.595831042447296e-06, "loss": 0.966, "step": 2175 }, { "epoch": 1.144660704892162, "grad_norm": 2.0620105266571045, "learning_rate": 4.595452253614029e-06, "loss": 0.9393, "step": 2176 }, { "epoch": 1.1451867438190426, "grad_norm": 2.0859262943267822, "learning_rate": 4.595073302987828e-06, "loss": 0.9824, "step": 2177 }, { "epoch": 1.1457127827459233, "grad_norm": 2.04518985748291, "learning_rate": 4.594694190597953e-06, "loss": 0.9389, "step": 2178 }, { "epoch": 1.1462388216728039, "grad_norm": 1.9948362112045288, "learning_rate": 4.594314916473676e-06, "loss": 0.9434, "step": 2179 }, { "epoch": 1.1467648605996843, "grad_norm": 2.104213237762451, "learning_rate": 4.59393548064428e-06, "loss": 0.9765, "step": 2180 }, { "epoch": 1.147290899526565, "grad_norm": 2.026655673980713, "learning_rate": 4.593555883139062e-06, "loss": 0.9489, "step": 2181 }, { "epoch": 1.1478169384534456, "grad_norm": 2.111635446548462, "learning_rate": 4.593176123987333e-06, "loss": 0.9549, "step": 2182 }, { "epoch": 1.1483429773803262, "grad_norm": 2.0284945964813232, "learning_rate": 4.592796203218413e-06, "loss": 0.9621, "step": 2183 }, { "epoch": 1.1488690163072066, "grad_norm": 2.026057243347168, "learning_rate": 4.592416120861637e-06, "loss": 0.9412, "step": 2184 }, { "epoch": 1.1493950552340872, "grad_norm": 2.0450279712677, "learning_rate": 4.592035876946351e-06, "loss": 0.939, "step": 2185 }, { "epoch": 1.1499210941609679, "grad_norm": 2.177182674407959, "learning_rate": 4.591655471501915e-06, "loss": 0.9546, "step": 2186 }, { "epoch": 1.1504471330878485, "grad_norm": 2.2185943126678467, "learning_rate": 4.591274904557701e-06, "loss": 0.9264, "step": 2187 }, { "epoch": 1.1509731720147292, "grad_norm": 2.103032350540161, "learning_rate": 4.590894176143092e-06, "loss": 0.9784, "step": 2188 }, { "epoch": 1.1514992109416098, "grad_norm": 2.1525919437408447, "learning_rate": 4.590513286287485e-06, "loss": 0.9442, "step": 2189 }, { "epoch": 1.1520252498684902, "grad_norm": 2.0014309883117676, "learning_rate": 4.5901322350202894e-06, "loss": 0.9326, "step": 2190 }, { "epoch": 1.1525512887953708, "grad_norm": 2.0484871864318848, "learning_rate": 4.589751022370926e-06, "loss": 0.9489, "step": 2191 }, { "epoch": 1.1530773277222515, "grad_norm": 2.1447575092315674, "learning_rate": 4.58936964836883e-06, "loss": 0.9073, "step": 2192 }, { "epoch": 1.153603366649132, "grad_norm": 2.328009605407715, "learning_rate": 4.588988113043448e-06, "loss": 0.9433, "step": 2193 }, { "epoch": 1.1541294055760125, "grad_norm": 2.051511526107788, "learning_rate": 4.5886064164242364e-06, "loss": 0.954, "step": 2194 }, { "epoch": 1.1546554445028931, "grad_norm": 2.137183904647827, "learning_rate": 4.588224558540668e-06, "loss": 0.9705, "step": 2195 }, { "epoch": 1.1551814834297738, "grad_norm": 2.0305912494659424, "learning_rate": 4.587842539422228e-06, "loss": 0.988, "step": 2196 }, { "epoch": 1.1557075223566544, "grad_norm": 2.0433285236358643, "learning_rate": 4.587460359098411e-06, "loss": 0.9428, "step": 2197 }, { "epoch": 1.156233561283535, "grad_norm": 2.0450432300567627, "learning_rate": 4.587078017598726e-06, "loss": 0.9209, "step": 2198 }, { "epoch": 1.1567596002104157, "grad_norm": 2.0686779022216797, "learning_rate": 4.5866955149526945e-06, "loss": 0.937, "step": 2199 }, { "epoch": 1.157285639137296, "grad_norm": 2.069709300994873, "learning_rate": 4.58631285118985e-06, "loss": 0.9374, "step": 2200 }, { "epoch": 1.1578116780641767, "grad_norm": 1.9677889347076416, "learning_rate": 4.585930026339738e-06, "loss": 0.9577, "step": 2201 }, { "epoch": 1.1583377169910574, "grad_norm": 2.0871002674102783, "learning_rate": 4.585547040431918e-06, "loss": 0.9573, "step": 2202 }, { "epoch": 1.158863755917938, "grad_norm": 2.0991642475128174, "learning_rate": 4.585163893495961e-06, "loss": 0.9567, "step": 2203 }, { "epoch": 1.1593897948448184, "grad_norm": 2.0081803798675537, "learning_rate": 4.584780585561448e-06, "loss": 0.9739, "step": 2204 }, { "epoch": 1.159915833771699, "grad_norm": 2.147531032562256, "learning_rate": 4.584397116657977e-06, "loss": 0.989, "step": 2205 }, { "epoch": 1.1604418726985797, "grad_norm": 2.0847082138061523, "learning_rate": 4.584013486815155e-06, "loss": 0.9587, "step": 2206 }, { "epoch": 1.1609679116254603, "grad_norm": 2.165778160095215, "learning_rate": 4.583629696062604e-06, "loss": 0.9763, "step": 2207 }, { "epoch": 1.161493950552341, "grad_norm": 1.9898335933685303, "learning_rate": 4.583245744429956e-06, "loss": 0.9406, "step": 2208 }, { "epoch": 1.1620199894792216, "grad_norm": 2.101166248321533, "learning_rate": 4.582861631946857e-06, "loss": 1.0025, "step": 2209 }, { "epoch": 1.162546028406102, "grad_norm": 2.094820022583008, "learning_rate": 4.582477358642964e-06, "loss": 0.9349, "step": 2210 }, { "epoch": 1.1630720673329826, "grad_norm": 2.1106255054473877, "learning_rate": 4.582092924547948e-06, "loss": 0.9975, "step": 2211 }, { "epoch": 1.1635981062598633, "grad_norm": 2.1793222427368164, "learning_rate": 4.581708329691493e-06, "loss": 0.9982, "step": 2212 }, { "epoch": 1.164124145186744, "grad_norm": 2.1435718536376953, "learning_rate": 4.581323574103291e-06, "loss": 0.8832, "step": 2213 }, { "epoch": 1.1646501841136243, "grad_norm": 1.996934175491333, "learning_rate": 4.580938657813052e-06, "loss": 0.8712, "step": 2214 }, { "epoch": 1.165176223040505, "grad_norm": 1.928216814994812, "learning_rate": 4.580553580850495e-06, "loss": 0.9186, "step": 2215 }, { "epoch": 1.1657022619673856, "grad_norm": 2.129814386367798, "learning_rate": 4.580168343245354e-06, "loss": 0.9258, "step": 2216 }, { "epoch": 1.1662283008942662, "grad_norm": 2.2137105464935303, "learning_rate": 4.579782945027371e-06, "loss": 0.9124, "step": 2217 }, { "epoch": 1.1667543398211468, "grad_norm": 2.036886692047119, "learning_rate": 4.5793973862263045e-06, "loss": 0.9172, "step": 2218 }, { "epoch": 1.1672803787480273, "grad_norm": 2.0155868530273438, "learning_rate": 4.579011666871924e-06, "loss": 0.9145, "step": 2219 }, { "epoch": 1.167806417674908, "grad_norm": 2.001176118850708, "learning_rate": 4.5786257869940125e-06, "loss": 0.9237, "step": 2220 }, { "epoch": 1.1683324566017885, "grad_norm": 2.418034791946411, "learning_rate": 4.578239746622363e-06, "loss": 0.9415, "step": 2221 }, { "epoch": 1.1688584955286692, "grad_norm": 2.069413423538208, "learning_rate": 4.577853545786782e-06, "loss": 0.9585, "step": 2222 }, { "epoch": 1.1693845344555498, "grad_norm": 2.1006906032562256, "learning_rate": 4.577467184517089e-06, "loss": 1.0138, "step": 2223 }, { "epoch": 1.1699105733824302, "grad_norm": 2.1174235343933105, "learning_rate": 4.577080662843117e-06, "loss": 0.9108, "step": 2224 }, { "epoch": 1.1704366123093108, "grad_norm": 2.51448392868042, "learning_rate": 4.576693980794708e-06, "loss": 1.0006, "step": 2225 }, { "epoch": 1.1709626512361915, "grad_norm": 2.03072190284729, "learning_rate": 4.57630713840172e-06, "loss": 0.9314, "step": 2226 }, { "epoch": 1.171488690163072, "grad_norm": 2.079005002975464, "learning_rate": 4.575920135694019e-06, "loss": 0.9023, "step": 2227 }, { "epoch": 1.1720147290899527, "grad_norm": 1.9575252532958984, "learning_rate": 4.575532972701488e-06, "loss": 0.9384, "step": 2228 }, { "epoch": 1.1725407680168332, "grad_norm": 2.0194175243377686, "learning_rate": 4.575145649454021e-06, "loss": 0.969, "step": 2229 }, { "epoch": 1.1730668069437138, "grad_norm": 2.0641977787017822, "learning_rate": 4.574758165981523e-06, "loss": 0.9374, "step": 2230 }, { "epoch": 1.1735928458705944, "grad_norm": 2.7739837169647217, "learning_rate": 4.5743705223139115e-06, "loss": 0.9322, "step": 2231 }, { "epoch": 1.174118884797475, "grad_norm": 2.1928372383117676, "learning_rate": 4.573982718481117e-06, "loss": 0.8982, "step": 2232 }, { "epoch": 1.1746449237243557, "grad_norm": 1.8924100399017334, "learning_rate": 4.573594754513083e-06, "loss": 0.9313, "step": 2233 }, { "epoch": 1.175170962651236, "grad_norm": 2.0933282375335693, "learning_rate": 4.573206630439766e-06, "loss": 0.9171, "step": 2234 }, { "epoch": 1.1756970015781167, "grad_norm": 2.0586800575256348, "learning_rate": 4.572818346291133e-06, "loss": 0.9756, "step": 2235 }, { "epoch": 1.1762230405049974, "grad_norm": 1.9426237344741821, "learning_rate": 4.572429902097161e-06, "loss": 0.8887, "step": 2236 }, { "epoch": 1.176749079431878, "grad_norm": 1.9896386861801147, "learning_rate": 4.572041297887846e-06, "loss": 0.927, "step": 2237 }, { "epoch": 1.1772751183587586, "grad_norm": 2.138934373855591, "learning_rate": 4.571652533693192e-06, "loss": 0.9835, "step": 2238 }, { "epoch": 1.177801157285639, "grad_norm": 2.141348361968994, "learning_rate": 4.571263609543215e-06, "loss": 0.9423, "step": 2239 }, { "epoch": 1.1783271962125197, "grad_norm": 2.3880257606506348, "learning_rate": 4.570874525467945e-06, "loss": 0.9188, "step": 2240 }, { "epoch": 1.1788532351394003, "grad_norm": 2.104442834854126, "learning_rate": 4.570485281497423e-06, "loss": 0.937, "step": 2241 }, { "epoch": 1.179379274066281, "grad_norm": 2.215341329574585, "learning_rate": 4.570095877661704e-06, "loss": 0.9318, "step": 2242 }, { "epoch": 1.1799053129931614, "grad_norm": 2.0575337409973145, "learning_rate": 4.569706313990854e-06, "loss": 0.9187, "step": 2243 }, { "epoch": 1.180431351920042, "grad_norm": 2.2835347652435303, "learning_rate": 4.569316590514952e-06, "loss": 0.9305, "step": 2244 }, { "epoch": 1.1809573908469226, "grad_norm": 2.066976308822632, "learning_rate": 4.56892670726409e-06, "loss": 0.9258, "step": 2245 }, { "epoch": 1.1814834297738033, "grad_norm": 2.0150883197784424, "learning_rate": 4.568536664268369e-06, "loss": 0.9259, "step": 2246 }, { "epoch": 1.182009468700684, "grad_norm": 1.9549976587295532, "learning_rate": 4.568146461557908e-06, "loss": 0.9753, "step": 2247 }, { "epoch": 1.1825355076275645, "grad_norm": 2.251574993133545, "learning_rate": 4.5677560991628326e-06, "loss": 1.0101, "step": 2248 }, { "epoch": 1.183061546554445, "grad_norm": 2.047912836074829, "learning_rate": 4.5673655771132835e-06, "loss": 0.9074, "step": 2249 }, { "epoch": 1.1835875854813256, "grad_norm": 2.1108009815216064, "learning_rate": 4.566974895439414e-06, "loss": 0.9252, "step": 2250 }, { "epoch": 1.1841136244082062, "grad_norm": 2.06037974357605, "learning_rate": 4.566584054171391e-06, "loss": 0.9745, "step": 2251 }, { "epoch": 1.1846396633350869, "grad_norm": 2.164132595062256, "learning_rate": 4.566193053339389e-06, "loss": 0.945, "step": 2252 }, { "epoch": 1.1851657022619673, "grad_norm": 2.1002984046936035, "learning_rate": 4.565801892973599e-06, "loss": 0.9341, "step": 2253 }, { "epoch": 1.185691741188848, "grad_norm": 2.0745604038238525, "learning_rate": 4.565410573104223e-06, "loss": 0.9163, "step": 2254 }, { "epoch": 1.1862177801157285, "grad_norm": 2.0689234733581543, "learning_rate": 4.565019093761476e-06, "loss": 0.9619, "step": 2255 }, { "epoch": 1.1867438190426092, "grad_norm": 1.8939684629440308, "learning_rate": 4.564627454975583e-06, "loss": 0.9117, "step": 2256 }, { "epoch": 1.1872698579694898, "grad_norm": 2.0398480892181396, "learning_rate": 4.564235656776784e-06, "loss": 0.9354, "step": 2257 }, { "epoch": 1.1877958968963704, "grad_norm": 2.2121827602386475, "learning_rate": 4.563843699195331e-06, "loss": 0.9898, "step": 2258 }, { "epoch": 1.1883219358232509, "grad_norm": 2.0941290855407715, "learning_rate": 4.563451582261488e-06, "loss": 0.9359, "step": 2259 }, { "epoch": 1.1888479747501315, "grad_norm": 1.958443522453308, "learning_rate": 4.5630593060055285e-06, "loss": 0.8915, "step": 2260 }, { "epoch": 1.1893740136770121, "grad_norm": 2.125643014907837, "learning_rate": 4.562666870457742e-06, "loss": 0.9695, "step": 2261 }, { "epoch": 1.1899000526038928, "grad_norm": 2.0711584091186523, "learning_rate": 4.56227427564843e-06, "loss": 0.9895, "step": 2262 }, { "epoch": 1.1904260915307732, "grad_norm": 1.9734212160110474, "learning_rate": 4.561881521607905e-06, "loss": 0.9276, "step": 2263 }, { "epoch": 1.1909521304576538, "grad_norm": 1.9757225513458252, "learning_rate": 4.561488608366491e-06, "loss": 0.933, "step": 2264 }, { "epoch": 1.1914781693845344, "grad_norm": 2.0021965503692627, "learning_rate": 4.561095535954526e-06, "loss": 0.9517, "step": 2265 }, { "epoch": 1.192004208311415, "grad_norm": 2.054388999938965, "learning_rate": 4.560702304402359e-06, "loss": 0.9494, "step": 2266 }, { "epoch": 1.1925302472382957, "grad_norm": 2.0927810668945312, "learning_rate": 4.560308913740354e-06, "loss": 0.9814, "step": 2267 }, { "epoch": 1.1930562861651763, "grad_norm": 2.0837948322296143, "learning_rate": 4.559915363998883e-06, "loss": 0.9452, "step": 2268 }, { "epoch": 1.1935823250920568, "grad_norm": 2.0685925483703613, "learning_rate": 4.559521655208334e-06, "loss": 0.9683, "step": 2269 }, { "epoch": 1.1941083640189374, "grad_norm": 2.033249855041504, "learning_rate": 4.559127787399106e-06, "loss": 0.9251, "step": 2270 }, { "epoch": 1.194634402945818, "grad_norm": 2.0225706100463867, "learning_rate": 4.558733760601608e-06, "loss": 0.944, "step": 2271 }, { "epoch": 1.1951604418726987, "grad_norm": 2.263442039489746, "learning_rate": 4.558339574846265e-06, "loss": 0.9721, "step": 2272 }, { "epoch": 1.195686480799579, "grad_norm": 1.9405720233917236, "learning_rate": 4.557945230163514e-06, "loss": 0.9235, "step": 2273 }, { "epoch": 1.1962125197264597, "grad_norm": 2.0711405277252197, "learning_rate": 4.5575507265838e-06, "loss": 0.9106, "step": 2274 }, { "epoch": 1.1967385586533403, "grad_norm": 1.9209696054458618, "learning_rate": 4.557156064137585e-06, "loss": 0.9504, "step": 2275 }, { "epoch": 1.197264597580221, "grad_norm": 1.9152365922927856, "learning_rate": 4.5567612428553414e-06, "loss": 0.9134, "step": 2276 }, { "epoch": 1.1977906365071016, "grad_norm": 2.214308738708496, "learning_rate": 4.556366262767554e-06, "loss": 1.0137, "step": 2277 }, { "epoch": 1.1983166754339822, "grad_norm": 1.9821407794952393, "learning_rate": 4.555971123904719e-06, "loss": 0.9552, "step": 2278 }, { "epoch": 1.1988427143608626, "grad_norm": 1.909018874168396, "learning_rate": 4.555575826297346e-06, "loss": 0.911, "step": 2279 }, { "epoch": 1.1993687532877433, "grad_norm": 1.9589500427246094, "learning_rate": 4.555180369975956e-06, "loss": 0.9533, "step": 2280 }, { "epoch": 1.199894792214624, "grad_norm": 1.9503285884857178, "learning_rate": 4.554784754971085e-06, "loss": 0.8835, "step": 2281 }, { "epoch": 1.2004208311415046, "grad_norm": 2.0783655643463135, "learning_rate": 4.554388981313275e-06, "loss": 0.9974, "step": 2282 }, { "epoch": 1.200946870068385, "grad_norm": 2.1446335315704346, "learning_rate": 4.553993049033088e-06, "loss": 0.9428, "step": 2283 }, { "epoch": 1.2014729089952656, "grad_norm": 1.9783402681350708, "learning_rate": 4.553596958161093e-06, "loss": 0.8851, "step": 2284 }, { "epoch": 1.2019989479221462, "grad_norm": 2.1446151733398438, "learning_rate": 4.553200708727873e-06, "loss": 0.9231, "step": 2285 }, { "epoch": 1.2025249868490269, "grad_norm": 1.9833011627197266, "learning_rate": 4.552804300764022e-06, "loss": 0.9165, "step": 2286 }, { "epoch": 1.2030510257759075, "grad_norm": 2.1723484992980957, "learning_rate": 4.55240773430015e-06, "loss": 0.9263, "step": 2287 }, { "epoch": 1.203577064702788, "grad_norm": 2.1445460319519043, "learning_rate": 4.552011009366873e-06, "loss": 0.9686, "step": 2288 }, { "epoch": 1.2041031036296685, "grad_norm": 2.0098588466644287, "learning_rate": 4.551614125994824e-06, "loss": 0.9675, "step": 2289 }, { "epoch": 1.2046291425565492, "grad_norm": 2.2001070976257324, "learning_rate": 4.551217084214649e-06, "loss": 1.0049, "step": 2290 }, { "epoch": 1.2051551814834298, "grad_norm": 1.9142197370529175, "learning_rate": 4.550819884057001e-06, "loss": 0.8986, "step": 2291 }, { "epoch": 1.2056812204103105, "grad_norm": 2.1119284629821777, "learning_rate": 4.55042252555255e-06, "loss": 0.9571, "step": 2292 }, { "epoch": 1.2062072593371909, "grad_norm": 2.2181107997894287, "learning_rate": 4.550025008731977e-06, "loss": 0.9761, "step": 2293 }, { "epoch": 1.2067332982640715, "grad_norm": 1.9644392728805542, "learning_rate": 4.549627333625975e-06, "loss": 0.9717, "step": 2294 }, { "epoch": 1.2072593371909521, "grad_norm": 2.106912851333618, "learning_rate": 4.549229500265246e-06, "loss": 1.0127, "step": 2295 }, { "epoch": 1.2077853761178328, "grad_norm": 2.0657334327697754, "learning_rate": 4.548831508680511e-06, "loss": 0.9563, "step": 2296 }, { "epoch": 1.2083114150447134, "grad_norm": 1.9914659261703491, "learning_rate": 4.548433358902499e-06, "loss": 0.9604, "step": 2297 }, { "epoch": 1.2088374539715938, "grad_norm": 2.0038280487060547, "learning_rate": 4.54803505096195e-06, "loss": 0.9058, "step": 2298 }, { "epoch": 1.2093634928984744, "grad_norm": 2.0413012504577637, "learning_rate": 4.547636584889619e-06, "loss": 0.9904, "step": 2299 }, { "epoch": 1.209889531825355, "grad_norm": 2.122159719467163, "learning_rate": 4.547237960716272e-06, "loss": 0.9509, "step": 2300 }, { "epoch": 1.2104155707522357, "grad_norm": 1.907065510749817, "learning_rate": 4.546839178472688e-06, "loss": 0.8814, "step": 2301 }, { "epoch": 1.2109416096791163, "grad_norm": 1.9999891519546509, "learning_rate": 4.5464402381896565e-06, "loss": 0.9119, "step": 2302 }, { "epoch": 1.2114676486059968, "grad_norm": 2.095425844192505, "learning_rate": 4.546041139897981e-06, "loss": 0.9855, "step": 2303 }, { "epoch": 1.2119936875328774, "grad_norm": 2.1287622451782227, "learning_rate": 4.545641883628475e-06, "loss": 0.9959, "step": 2304 }, { "epoch": 1.212519726459758, "grad_norm": 2.00107479095459, "learning_rate": 4.545242469411968e-06, "loss": 0.9404, "step": 2305 }, { "epoch": 1.2130457653866387, "grad_norm": 2.1348764896392822, "learning_rate": 4.544842897279298e-06, "loss": 0.9103, "step": 2306 }, { "epoch": 1.2135718043135193, "grad_norm": 2.061293125152588, "learning_rate": 4.544443167261317e-06, "loss": 0.9234, "step": 2307 }, { "epoch": 1.2140978432403997, "grad_norm": 2.0079479217529297, "learning_rate": 4.544043279388887e-06, "loss": 0.962, "step": 2308 }, { "epoch": 1.2146238821672803, "grad_norm": 2.0040595531463623, "learning_rate": 4.543643233692887e-06, "loss": 0.9185, "step": 2309 }, { "epoch": 1.215149921094161, "grad_norm": 1.9917961359024048, "learning_rate": 4.543243030204203e-06, "loss": 0.9079, "step": 2310 }, { "epoch": 1.2156759600210416, "grad_norm": 2.262044906616211, "learning_rate": 4.5428426689537355e-06, "loss": 0.9455, "step": 2311 }, { "epoch": 1.216201998947922, "grad_norm": 2.0358335971832275, "learning_rate": 4.5424421499723974e-06, "loss": 0.9794, "step": 2312 }, { "epoch": 1.2167280378748027, "grad_norm": 2.1631860733032227, "learning_rate": 4.542041473291113e-06, "loss": 0.966, "step": 2313 }, { "epoch": 1.2172540768016833, "grad_norm": 2.0367043018341064, "learning_rate": 4.54164063894082e-06, "loss": 0.9377, "step": 2314 }, { "epoch": 1.217780115728564, "grad_norm": 2.015761613845825, "learning_rate": 4.541239646952466e-06, "loss": 0.9207, "step": 2315 }, { "epoch": 1.2183061546554446, "grad_norm": 1.893565058708191, "learning_rate": 4.540838497357014e-06, "loss": 0.963, "step": 2316 }, { "epoch": 1.2188321935823252, "grad_norm": 2.026319742202759, "learning_rate": 4.540437190185435e-06, "loss": 0.9695, "step": 2317 }, { "epoch": 1.2193582325092056, "grad_norm": 2.119520664215088, "learning_rate": 4.540035725468718e-06, "loss": 0.9977, "step": 2318 }, { "epoch": 1.2198842714360862, "grad_norm": 1.9087727069854736, "learning_rate": 4.5396341032378564e-06, "loss": 0.9048, "step": 2319 }, { "epoch": 1.2204103103629669, "grad_norm": 1.9850622415542603, "learning_rate": 4.539232323523862e-06, "loss": 0.9819, "step": 2320 }, { "epoch": 1.2209363492898475, "grad_norm": 1.9811348915100098, "learning_rate": 4.538830386357759e-06, "loss": 0.9397, "step": 2321 }, { "epoch": 1.221462388216728, "grad_norm": 2.0679426193237305, "learning_rate": 4.538428291770578e-06, "loss": 0.9311, "step": 2322 }, { "epoch": 1.2219884271436086, "grad_norm": 2.18583345413208, "learning_rate": 4.538026039793366e-06, "loss": 0.9666, "step": 2323 }, { "epoch": 1.2225144660704892, "grad_norm": 2.1667392253875732, "learning_rate": 4.537623630457184e-06, "loss": 0.9855, "step": 2324 }, { "epoch": 1.2230405049973698, "grad_norm": 2.158278465270996, "learning_rate": 4.5372210637931e-06, "loss": 0.9479, "step": 2325 }, { "epoch": 1.2235665439242505, "grad_norm": 2.0774621963500977, "learning_rate": 4.536818339832197e-06, "loss": 0.9234, "step": 2326 }, { "epoch": 1.224092582851131, "grad_norm": 2.0122995376586914, "learning_rate": 4.536415458605572e-06, "loss": 0.982, "step": 2327 }, { "epoch": 1.2246186217780115, "grad_norm": 1.9868342876434326, "learning_rate": 4.53601242014433e-06, "loss": 0.9153, "step": 2328 }, { "epoch": 1.2251446607048921, "grad_norm": 1.9843225479125977, "learning_rate": 4.535609224479591e-06, "loss": 0.944, "step": 2329 }, { "epoch": 1.2256706996317728, "grad_norm": 2.0100514888763428, "learning_rate": 4.5352058716424855e-06, "loss": 0.9057, "step": 2330 }, { "epoch": 1.2261967385586534, "grad_norm": 1.9764091968536377, "learning_rate": 4.534802361664158e-06, "loss": 0.8985, "step": 2331 }, { "epoch": 1.2267227774855338, "grad_norm": 2.1023499965667725, "learning_rate": 4.534398694575764e-06, "loss": 0.8647, "step": 2332 }, { "epoch": 1.2272488164124145, "grad_norm": 2.141122579574585, "learning_rate": 4.53399487040847e-06, "loss": 0.9913, "step": 2333 }, { "epoch": 1.227774855339295, "grad_norm": 2.193509817123413, "learning_rate": 4.533590889193457e-06, "loss": 0.9396, "step": 2334 }, { "epoch": 1.2283008942661757, "grad_norm": 2.07718563079834, "learning_rate": 4.533186750961917e-06, "loss": 0.9662, "step": 2335 }, { "epoch": 1.2288269331930564, "grad_norm": 2.5485520362854004, "learning_rate": 4.532782455745054e-06, "loss": 0.9923, "step": 2336 }, { "epoch": 1.229352972119937, "grad_norm": 2.1097700595855713, "learning_rate": 4.532378003574084e-06, "loss": 0.9689, "step": 2337 }, { "epoch": 1.2298790110468174, "grad_norm": 2.0095794200897217, "learning_rate": 4.5319733944802345e-06, "loss": 0.956, "step": 2338 }, { "epoch": 1.230405049973698, "grad_norm": 1.9793727397918701, "learning_rate": 4.531568628494748e-06, "loss": 0.8748, "step": 2339 }, { "epoch": 1.2309310889005787, "grad_norm": 1.988430142402649, "learning_rate": 4.531163705648875e-06, "loss": 0.9903, "step": 2340 }, { "epoch": 1.2314571278274593, "grad_norm": 2.1825473308563232, "learning_rate": 4.530758625973882e-06, "loss": 0.9517, "step": 2341 }, { "epoch": 1.2319831667543397, "grad_norm": 2.120129108428955, "learning_rate": 4.530353389501045e-06, "loss": 0.9399, "step": 2342 }, { "epoch": 1.2325092056812204, "grad_norm": 2.168637990951538, "learning_rate": 4.529947996261652e-06, "loss": 0.9493, "step": 2343 }, { "epoch": 1.233035244608101, "grad_norm": 1.9897968769073486, "learning_rate": 4.529542446287005e-06, "loss": 0.963, "step": 2344 }, { "epoch": 1.2335612835349816, "grad_norm": 2.204454183578491, "learning_rate": 4.529136739608418e-06, "loss": 0.9177, "step": 2345 }, { "epoch": 1.2340873224618623, "grad_norm": 1.9778000116348267, "learning_rate": 4.5287308762572135e-06, "loss": 0.9425, "step": 2346 }, { "epoch": 1.234613361388743, "grad_norm": 2.2127273082733154, "learning_rate": 4.528324856264731e-06, "loss": 0.9161, "step": 2347 }, { "epoch": 1.2351394003156233, "grad_norm": 2.1865501403808594, "learning_rate": 4.52791867966232e-06, "loss": 0.9622, "step": 2348 }, { "epoch": 1.235665439242504, "grad_norm": 2.1409499645233154, "learning_rate": 4.527512346481341e-06, "loss": 0.9043, "step": 2349 }, { "epoch": 1.2361914781693846, "grad_norm": 2.0245275497436523, "learning_rate": 4.527105856753168e-06, "loss": 0.8997, "step": 2350 }, { "epoch": 1.2367175170962652, "grad_norm": 2.031557559967041, "learning_rate": 4.526699210509186e-06, "loss": 0.9467, "step": 2351 }, { "epoch": 1.2372435560231456, "grad_norm": 1.9724940061569214, "learning_rate": 4.5262924077807936e-06, "loss": 0.8992, "step": 2352 }, { "epoch": 1.2377695949500263, "grad_norm": 2.120136022567749, "learning_rate": 4.5258854485994e-06, "loss": 0.9568, "step": 2353 }, { "epoch": 1.2382956338769069, "grad_norm": 2.007608413696289, "learning_rate": 4.525478332996428e-06, "loss": 0.928, "step": 2354 }, { "epoch": 1.2388216728037875, "grad_norm": 2.1604855060577393, "learning_rate": 4.525071061003311e-06, "loss": 0.9795, "step": 2355 }, { "epoch": 1.2393477117306682, "grad_norm": 2.2880077362060547, "learning_rate": 4.524663632651495e-06, "loss": 0.96, "step": 2356 }, { "epoch": 1.2398737506575486, "grad_norm": 2.1377370357513428, "learning_rate": 4.524256047972438e-06, "loss": 0.9128, "step": 2357 }, { "epoch": 1.2403997895844292, "grad_norm": 1.9751715660095215, "learning_rate": 4.523848306997611e-06, "loss": 0.9054, "step": 2358 }, { "epoch": 1.2409258285113098, "grad_norm": 2.1510539054870605, "learning_rate": 4.523440409758495e-06, "loss": 0.9007, "step": 2359 }, { "epoch": 1.2414518674381905, "grad_norm": 1.988323450088501, "learning_rate": 4.523032356286587e-06, "loss": 0.8735, "step": 2360 }, { "epoch": 1.241977906365071, "grad_norm": 2.0008699893951416, "learning_rate": 4.522624146613389e-06, "loss": 0.9891, "step": 2361 }, { "epoch": 1.2425039452919515, "grad_norm": 2.041987180709839, "learning_rate": 4.522215780770424e-06, "loss": 0.9366, "step": 2362 }, { "epoch": 1.2430299842188322, "grad_norm": 1.9101216793060303, "learning_rate": 4.521807258789219e-06, "loss": 0.8801, "step": 2363 }, { "epoch": 1.2435560231457128, "grad_norm": 2.129915475845337, "learning_rate": 4.521398580701319e-06, "loss": 0.9313, "step": 2364 }, { "epoch": 1.2440820620725934, "grad_norm": 2.062591314315796, "learning_rate": 4.520989746538277e-06, "loss": 0.9736, "step": 2365 }, { "epoch": 1.244608100999474, "grad_norm": 2.126099109649658, "learning_rate": 4.5205807563316604e-06, "loss": 1.0002, "step": 2366 }, { "epoch": 1.2451341399263545, "grad_norm": 2.038667678833008, "learning_rate": 4.520171610113049e-06, "loss": 0.9156, "step": 2367 }, { "epoch": 1.245660178853235, "grad_norm": 2.0621137619018555, "learning_rate": 4.519762307914032e-06, "loss": 0.9685, "step": 2368 }, { "epoch": 1.2461862177801157, "grad_norm": 2.080672025680542, "learning_rate": 4.519352849766212e-06, "loss": 0.9618, "step": 2369 }, { "epoch": 1.2467122567069964, "grad_norm": 2.064520835876465, "learning_rate": 4.5189432357012055e-06, "loss": 0.9454, "step": 2370 }, { "epoch": 1.247238295633877, "grad_norm": 2.018462657928467, "learning_rate": 4.518533465750638e-06, "loss": 0.9063, "step": 2371 }, { "epoch": 1.2477643345607574, "grad_norm": 1.985949158668518, "learning_rate": 4.5181235399461484e-06, "loss": 0.9448, "step": 2372 }, { "epoch": 1.248290373487638, "grad_norm": 2.068023920059204, "learning_rate": 4.517713458319389e-06, "loss": 0.9186, "step": 2373 }, { "epoch": 1.2488164124145187, "grad_norm": 2.0476534366607666, "learning_rate": 4.517303220902022e-06, "loss": 0.8944, "step": 2374 }, { "epoch": 1.2493424513413993, "grad_norm": 2.1194283962249756, "learning_rate": 4.516892827725722e-06, "loss": 0.9273, "step": 2375 }, { "epoch": 1.24986849026828, "grad_norm": 2.097421884536743, "learning_rate": 4.516482278822177e-06, "loss": 0.9247, "step": 2376 }, { "epoch": 1.2503945291951604, "grad_norm": 2.0427608489990234, "learning_rate": 4.516071574223085e-06, "loss": 0.9542, "step": 2377 }, { "epoch": 1.250920568122041, "grad_norm": 2.206160068511963, "learning_rate": 4.515660713960158e-06, "loss": 0.9822, "step": 2378 }, { "epoch": 1.2514466070489216, "grad_norm": 2.2046313285827637, "learning_rate": 4.515249698065118e-06, "loss": 0.966, "step": 2379 }, { "epoch": 1.2519726459758023, "grad_norm": 2.1513445377349854, "learning_rate": 4.514838526569702e-06, "loss": 0.9193, "step": 2380 }, { "epoch": 1.2524986849026827, "grad_norm": 2.2664003372192383, "learning_rate": 4.514427199505655e-06, "loss": 0.9657, "step": 2381 }, { "epoch": 1.2530247238295633, "grad_norm": 2.037461280822754, "learning_rate": 4.514015716904739e-06, "loss": 0.9666, "step": 2382 }, { "epoch": 1.253550762756444, "grad_norm": 2.117765188217163, "learning_rate": 4.5136040787987225e-06, "loss": 0.9408, "step": 2383 }, { "epoch": 1.2540768016833246, "grad_norm": 2.0627427101135254, "learning_rate": 4.5131922852193884e-06, "loss": 0.9265, "step": 2384 }, { "epoch": 1.2546028406102052, "grad_norm": 2.0914230346679688, "learning_rate": 4.512780336198534e-06, "loss": 0.9483, "step": 2385 }, { "epoch": 1.2551288795370859, "grad_norm": 2.2735514640808105, "learning_rate": 4.5123682317679665e-06, "loss": 0.9732, "step": 2386 }, { "epoch": 1.2556549184639663, "grad_norm": 2.0221076011657715, "learning_rate": 4.511955971959503e-06, "loss": 0.9524, "step": 2387 }, { "epoch": 1.256180957390847, "grad_norm": 2.1264171600341797, "learning_rate": 4.511543556804977e-06, "loss": 0.9613, "step": 2388 }, { "epoch": 1.2567069963177275, "grad_norm": 2.041597604751587, "learning_rate": 4.51113098633623e-06, "loss": 0.9337, "step": 2389 }, { "epoch": 1.2572330352446082, "grad_norm": 1.8505622148513794, "learning_rate": 4.510718260585116e-06, "loss": 0.954, "step": 2390 }, { "epoch": 1.2577590741714886, "grad_norm": 1.9987319707870483, "learning_rate": 4.510305379583506e-06, "loss": 0.942, "step": 2391 }, { "epoch": 1.2582851130983692, "grad_norm": 2.174147844314575, "learning_rate": 4.509892343363276e-06, "loss": 0.9528, "step": 2392 }, { "epoch": 1.2588111520252498, "grad_norm": 2.0779876708984375, "learning_rate": 4.509479151956319e-06, "loss": 0.9426, "step": 2393 }, { "epoch": 1.2593371909521305, "grad_norm": 2.073341131210327, "learning_rate": 4.509065805394535e-06, "loss": 0.9534, "step": 2394 }, { "epoch": 1.2598632298790111, "grad_norm": 1.9890536069869995, "learning_rate": 4.508652303709844e-06, "loss": 0.9873, "step": 2395 }, { "epoch": 1.2603892688058917, "grad_norm": 1.9639276266098022, "learning_rate": 4.508238646934169e-06, "loss": 0.915, "step": 2396 }, { "epoch": 1.2609153077327722, "grad_norm": 2.0988283157348633, "learning_rate": 4.507824835099451e-06, "loss": 0.9225, "step": 2397 }, { "epoch": 1.2614413466596528, "grad_norm": 2.0186917781829834, "learning_rate": 4.50741086823764e-06, "loss": 0.9535, "step": 2398 }, { "epoch": 1.2619673855865334, "grad_norm": 2.1138482093811035, "learning_rate": 4.5069967463807e-06, "loss": 0.9428, "step": 2399 }, { "epoch": 1.262493424513414, "grad_norm": 2.1341164112091064, "learning_rate": 4.5065824695606045e-06, "loss": 0.9368, "step": 2400 }, { "epoch": 1.2630194634402945, "grad_norm": 2.0811543464660645, "learning_rate": 4.506168037809342e-06, "loss": 0.9558, "step": 2401 }, { "epoch": 1.263545502367175, "grad_norm": 2.067427158355713, "learning_rate": 4.505753451158911e-06, "loss": 0.8918, "step": 2402 }, { "epoch": 1.2640715412940557, "grad_norm": 2.081106185913086, "learning_rate": 4.505338709641321e-06, "loss": 0.9488, "step": 2403 }, { "epoch": 1.2645975802209364, "grad_norm": 1.9976449012756348, "learning_rate": 4.5049238132885966e-06, "loss": 0.956, "step": 2404 }, { "epoch": 1.265123619147817, "grad_norm": 1.9591141939163208, "learning_rate": 4.504508762132772e-06, "loss": 0.9195, "step": 2405 }, { "epoch": 1.2656496580746976, "grad_norm": 2.0610501766204834, "learning_rate": 4.504093556205893e-06, "loss": 0.9174, "step": 2406 }, { "epoch": 1.266175697001578, "grad_norm": 2.0975704193115234, "learning_rate": 4.503678195540019e-06, "loss": 0.9364, "step": 2407 }, { "epoch": 1.2667017359284587, "grad_norm": 2.036421775817871, "learning_rate": 4.503262680167221e-06, "loss": 0.855, "step": 2408 }, { "epoch": 1.2672277748553393, "grad_norm": 1.947721242904663, "learning_rate": 4.502847010119581e-06, "loss": 0.9063, "step": 2409 }, { "epoch": 1.26775381378222, "grad_norm": 2.057432174682617, "learning_rate": 4.5024311854291935e-06, "loss": 0.9356, "step": 2410 }, { "epoch": 1.2682798527091004, "grad_norm": 2.3818373680114746, "learning_rate": 4.502015206128165e-06, "loss": 0.9896, "step": 2411 }, { "epoch": 1.268805891635981, "grad_norm": 1.9357292652130127, "learning_rate": 4.501599072248614e-06, "loss": 0.927, "step": 2412 }, { "epoch": 1.2693319305628616, "grad_norm": 2.0625691413879395, "learning_rate": 4.501182783822671e-06, "loss": 0.9391, "step": 2413 }, { "epoch": 1.2698579694897423, "grad_norm": 2.3002283573150635, "learning_rate": 4.5007663408824775e-06, "loss": 0.9798, "step": 2414 }, { "epoch": 1.270384008416623, "grad_norm": 2.030616283416748, "learning_rate": 4.500349743460188e-06, "loss": 0.9863, "step": 2415 }, { "epoch": 1.2709100473435035, "grad_norm": 2.094247817993164, "learning_rate": 4.4999329915879694e-06, "loss": 0.9504, "step": 2416 }, { "epoch": 1.271436086270384, "grad_norm": 2.10866117477417, "learning_rate": 4.499516085297998e-06, "loss": 0.9585, "step": 2417 }, { "epoch": 1.2719621251972646, "grad_norm": 2.2691314220428467, "learning_rate": 4.4990990246224656e-06, "loss": 0.8979, "step": 2418 }, { "epoch": 1.2724881641241452, "grad_norm": 2.1009504795074463, "learning_rate": 4.498681809593574e-06, "loss": 0.9392, "step": 2419 }, { "epoch": 1.2730142030510256, "grad_norm": 2.042738199234009, "learning_rate": 4.498264440243534e-06, "loss": 0.8967, "step": 2420 }, { "epoch": 1.2735402419779063, "grad_norm": 2.00138521194458, "learning_rate": 4.497846916604576e-06, "loss": 0.9642, "step": 2421 }, { "epoch": 1.274066280904787, "grad_norm": 2.086472988128662, "learning_rate": 4.4974292387089334e-06, "loss": 0.9474, "step": 2422 }, { "epoch": 1.2745923198316675, "grad_norm": 1.9719960689544678, "learning_rate": 4.4970114065888585e-06, "loss": 0.9121, "step": 2423 }, { "epoch": 1.2751183587585482, "grad_norm": 2.027043342590332, "learning_rate": 4.49659342027661e-06, "loss": 0.9262, "step": 2424 }, { "epoch": 1.2756443976854288, "grad_norm": 2.1368863582611084, "learning_rate": 4.4961752798044645e-06, "loss": 0.9439, "step": 2425 }, { "epoch": 1.2761704366123094, "grad_norm": 2.1782031059265137, "learning_rate": 4.495756985204705e-06, "loss": 0.9222, "step": 2426 }, { "epoch": 1.2766964755391899, "grad_norm": 2.1297860145568848, "learning_rate": 4.49533853650963e-06, "loss": 0.8776, "step": 2427 }, { "epoch": 1.2772225144660705, "grad_norm": 2.205399990081787, "learning_rate": 4.494919933751548e-06, "loss": 0.9438, "step": 2428 }, { "epoch": 1.2777485533929511, "grad_norm": 2.076138496398926, "learning_rate": 4.494501176962779e-06, "loss": 0.94, "step": 2429 }, { "epoch": 1.2782745923198315, "grad_norm": 2.0347354412078857, "learning_rate": 4.4940822661756566e-06, "loss": 0.9011, "step": 2430 }, { "epoch": 1.2788006312467122, "grad_norm": 2.0521199703216553, "learning_rate": 4.493663201422526e-06, "loss": 0.9039, "step": 2431 }, { "epoch": 1.2793266701735928, "grad_norm": 2.13946795463562, "learning_rate": 4.493243982735742e-06, "loss": 0.9488, "step": 2432 }, { "epoch": 1.2798527091004734, "grad_norm": 2.1136245727539062, "learning_rate": 4.492824610147676e-06, "loss": 0.9265, "step": 2433 }, { "epoch": 1.280378748027354, "grad_norm": 2.0845329761505127, "learning_rate": 4.4924050836907065e-06, "loss": 0.9652, "step": 2434 }, { "epoch": 1.2809047869542347, "grad_norm": 2.2292892932891846, "learning_rate": 4.4919854033972254e-06, "loss": 0.9731, "step": 2435 }, { "epoch": 1.2814308258811151, "grad_norm": 2.179656505584717, "learning_rate": 4.491565569299637e-06, "loss": 0.9456, "step": 2436 }, { "epoch": 1.2819568648079958, "grad_norm": 2.0454165935516357, "learning_rate": 4.49114558143036e-06, "loss": 0.877, "step": 2437 }, { "epoch": 1.2824829037348764, "grad_norm": 2.105604648590088, "learning_rate": 4.490725439821817e-06, "loss": 0.9817, "step": 2438 }, { "epoch": 1.283008942661757, "grad_norm": 2.1095452308654785, "learning_rate": 4.490305144506453e-06, "loss": 0.9423, "step": 2439 }, { "epoch": 1.2835349815886374, "grad_norm": 1.8949556350708008, "learning_rate": 4.489884695516716e-06, "loss": 0.9731, "step": 2440 }, { "epoch": 1.284061020515518, "grad_norm": 1.9607486724853516, "learning_rate": 4.489464092885072e-06, "loss": 0.8632, "step": 2441 }, { "epoch": 1.2845870594423987, "grad_norm": 1.9174182415008545, "learning_rate": 4.489043336643994e-06, "loss": 0.916, "step": 2442 }, { "epoch": 1.2851130983692793, "grad_norm": 2.09295654296875, "learning_rate": 4.488622426825972e-06, "loss": 0.919, "step": 2443 }, { "epoch": 1.28563913729616, "grad_norm": 2.0831458568573, "learning_rate": 4.488201363463503e-06, "loss": 0.9494, "step": 2444 }, { "epoch": 1.2861651762230406, "grad_norm": 2.1123828887939453, "learning_rate": 4.487780146589098e-06, "loss": 0.9913, "step": 2445 }, { "epoch": 1.286691215149921, "grad_norm": 2.1118533611297607, "learning_rate": 4.48735877623528e-06, "loss": 0.9224, "step": 2446 }, { "epoch": 1.2872172540768017, "grad_norm": 1.9321138858795166, "learning_rate": 4.486937252434584e-06, "loss": 0.9135, "step": 2447 }, { "epoch": 1.2877432930036823, "grad_norm": 2.107614755630493, "learning_rate": 4.4865155752195565e-06, "loss": 0.9449, "step": 2448 }, { "epoch": 1.288269331930563, "grad_norm": 2.1365113258361816, "learning_rate": 4.486093744622756e-06, "loss": 0.9571, "step": 2449 }, { "epoch": 1.2887953708574433, "grad_norm": 2.0298421382904053, "learning_rate": 4.4856717606767515e-06, "loss": 0.9162, "step": 2450 }, { "epoch": 1.289321409784324, "grad_norm": 2.0793778896331787, "learning_rate": 4.485249623414125e-06, "loss": 0.9568, "step": 2451 }, { "epoch": 1.2898474487112046, "grad_norm": 2.1142778396606445, "learning_rate": 4.484827332867473e-06, "loss": 1.0266, "step": 2452 }, { "epoch": 1.2903734876380852, "grad_norm": 2.5684921741485596, "learning_rate": 4.484404889069398e-06, "loss": 0.952, "step": 2453 }, { "epoch": 1.2908995265649659, "grad_norm": 1.9950506687164307, "learning_rate": 4.483982292052519e-06, "loss": 0.8993, "step": 2454 }, { "epoch": 1.2914255654918465, "grad_norm": 2.0234720706939697, "learning_rate": 4.483559541849465e-06, "loss": 0.9031, "step": 2455 }, { "epoch": 1.291951604418727, "grad_norm": 2.147324800491333, "learning_rate": 4.483136638492877e-06, "loss": 1.0002, "step": 2456 }, { "epoch": 1.2924776433456076, "grad_norm": 2.3617396354675293, "learning_rate": 4.482713582015409e-06, "loss": 0.9163, "step": 2457 }, { "epoch": 1.2930036822724882, "grad_norm": 2.0739989280700684, "learning_rate": 4.482290372449725e-06, "loss": 0.9832, "step": 2458 }, { "epoch": 1.2935297211993688, "grad_norm": 2.0278656482696533, "learning_rate": 4.4818670098285e-06, "loss": 0.8862, "step": 2459 }, { "epoch": 1.2940557601262492, "grad_norm": 2.222480058670044, "learning_rate": 4.481443494184426e-06, "loss": 0.9608, "step": 2460 }, { "epoch": 1.2945817990531299, "grad_norm": 2.0249786376953125, "learning_rate": 4.481019825550201e-06, "loss": 0.9937, "step": 2461 }, { "epoch": 1.2951078379800105, "grad_norm": 2.045994520187378, "learning_rate": 4.480596003958537e-06, "loss": 0.9731, "step": 2462 }, { "epoch": 1.2956338769068911, "grad_norm": 2.124706268310547, "learning_rate": 4.480172029442158e-06, "loss": 0.9634, "step": 2463 }, { "epoch": 1.2961599158337718, "grad_norm": 1.9319393634796143, "learning_rate": 4.479747902033801e-06, "loss": 0.9429, "step": 2464 }, { "epoch": 1.2966859547606524, "grad_norm": 2.0962777137756348, "learning_rate": 4.479323621766212e-06, "loss": 0.9516, "step": 2465 }, { "epoch": 1.2972119936875328, "grad_norm": 2.1256442070007324, "learning_rate": 4.478899188672151e-06, "loss": 0.948, "step": 2466 }, { "epoch": 1.2977380326144135, "grad_norm": 1.9735897779464722, "learning_rate": 4.4784746027843885e-06, "loss": 0.9133, "step": 2467 }, { "epoch": 1.298264071541294, "grad_norm": 1.9956401586532593, "learning_rate": 4.478049864135708e-06, "loss": 0.9491, "step": 2468 }, { "epoch": 1.2987901104681747, "grad_norm": 1.9241101741790771, "learning_rate": 4.477624972758905e-06, "loss": 0.8982, "step": 2469 }, { "epoch": 1.2993161493950551, "grad_norm": 2.131563425064087, "learning_rate": 4.477199928686784e-06, "loss": 0.9395, "step": 2470 }, { "epoch": 1.2998421883219358, "grad_norm": 2.090785264968872, "learning_rate": 4.476774731952164e-06, "loss": 0.9385, "step": 2471 }, { "epoch": 1.3003682272488164, "grad_norm": 2.0358545780181885, "learning_rate": 4.476349382587876e-06, "loss": 0.9396, "step": 2472 }, { "epoch": 1.300894266175697, "grad_norm": 2.1488404273986816, "learning_rate": 4.475923880626761e-06, "loss": 0.956, "step": 2473 }, { "epoch": 1.3014203051025777, "grad_norm": 2.1079018115997314, "learning_rate": 4.475498226101673e-06, "loss": 0.924, "step": 2474 }, { "epoch": 1.3019463440294583, "grad_norm": 2.0726771354675293, "learning_rate": 4.475072419045477e-06, "loss": 0.8923, "step": 2475 }, { "epoch": 1.3024723829563387, "grad_norm": 2.2027525901794434, "learning_rate": 4.474646459491051e-06, "loss": 0.9192, "step": 2476 }, { "epoch": 1.3029984218832193, "grad_norm": 2.288954019546509, "learning_rate": 4.474220347471282e-06, "loss": 0.9775, "step": 2477 }, { "epoch": 1.3035244608101, "grad_norm": 2.2008302211761475, "learning_rate": 4.473794083019073e-06, "loss": 0.9722, "step": 2478 }, { "epoch": 1.3040504997369806, "grad_norm": 2.234795570373535, "learning_rate": 4.473367666167335e-06, "loss": 0.9365, "step": 2479 }, { "epoch": 1.304576538663861, "grad_norm": 2.0062167644500732, "learning_rate": 4.472941096948994e-06, "loss": 0.9607, "step": 2480 }, { "epoch": 1.3051025775907417, "grad_norm": 1.9761099815368652, "learning_rate": 4.472514375396985e-06, "loss": 0.8885, "step": 2481 }, { "epoch": 1.3056286165176223, "grad_norm": 2.0999045372009277, "learning_rate": 4.4720875015442545e-06, "loss": 0.9752, "step": 2482 }, { "epoch": 1.306154655444503, "grad_norm": 2.0382134914398193, "learning_rate": 4.471660475423764e-06, "loss": 0.9698, "step": 2483 }, { "epoch": 1.3066806943713836, "grad_norm": 1.9761773347854614, "learning_rate": 4.471233297068484e-06, "loss": 0.9182, "step": 2484 }, { "epoch": 1.3072067332982642, "grad_norm": 2.030435800552368, "learning_rate": 4.4708059665113964e-06, "loss": 0.9615, "step": 2485 }, { "epoch": 1.3077327722251446, "grad_norm": 2.6968722343444824, "learning_rate": 4.470378483785499e-06, "loss": 0.8918, "step": 2486 }, { "epoch": 1.3082588111520252, "grad_norm": 1.9564226865768433, "learning_rate": 4.469950848923796e-06, "loss": 0.8998, "step": 2487 }, { "epoch": 1.3087848500789059, "grad_norm": 1.9719346761703491, "learning_rate": 4.469523061959305e-06, "loss": 0.9288, "step": 2488 }, { "epoch": 1.3093108890057863, "grad_norm": 2.0258467197418213, "learning_rate": 4.46909512292506e-06, "loss": 0.8893, "step": 2489 }, { "epoch": 1.309836927932667, "grad_norm": 2.0949339866638184, "learning_rate": 4.4686670318540985e-06, "loss": 0.9722, "step": 2490 }, { "epoch": 1.3103629668595476, "grad_norm": 1.9772303104400635, "learning_rate": 4.468238788779476e-06, "loss": 0.9216, "step": 2491 }, { "epoch": 1.3108890057864282, "grad_norm": 2.2911159992218018, "learning_rate": 4.467810393734258e-06, "loss": 0.9934, "step": 2492 }, { "epoch": 1.3114150447133088, "grad_norm": 2.1708860397338867, "learning_rate": 4.46738184675152e-06, "loss": 0.9508, "step": 2493 }, { "epoch": 1.3119410836401895, "grad_norm": 1.987321138381958, "learning_rate": 4.466953147864352e-06, "loss": 0.9174, "step": 2494 }, { "epoch": 1.31246712256707, "grad_norm": 2.016629695892334, "learning_rate": 4.466524297105855e-06, "loss": 0.9283, "step": 2495 }, { "epoch": 1.3129931614939505, "grad_norm": 2.141136407852173, "learning_rate": 4.46609529450914e-06, "loss": 0.9569, "step": 2496 }, { "epoch": 1.3135192004208311, "grad_norm": 2.0675323009490967, "learning_rate": 4.465666140107331e-06, "loss": 0.9624, "step": 2497 }, { "epoch": 1.3140452393477118, "grad_norm": 2.1058006286621094, "learning_rate": 4.465236833933565e-06, "loss": 0.9828, "step": 2498 }, { "epoch": 1.3145712782745922, "grad_norm": 2.0386290550231934, "learning_rate": 4.464807376020987e-06, "loss": 0.9602, "step": 2499 }, { "epoch": 1.3150973172014728, "grad_norm": 2.048245429992676, "learning_rate": 4.464377766402757e-06, "loss": 0.9539, "step": 2500 }, { "epoch": 1.3156233561283535, "grad_norm": 2.112946033477783, "learning_rate": 4.463948005112048e-06, "loss": 0.9845, "step": 2501 }, { "epoch": 1.316149395055234, "grad_norm": 2.1279125213623047, "learning_rate": 4.46351809218204e-06, "loss": 0.9949, "step": 2502 }, { "epoch": 1.3166754339821147, "grad_norm": 2.1278843879699707, "learning_rate": 4.463088027645927e-06, "loss": 0.9024, "step": 2503 }, { "epoch": 1.3172014729089954, "grad_norm": 2.043644428253174, "learning_rate": 4.462657811536917e-06, "loss": 0.9018, "step": 2504 }, { "epoch": 1.3177275118358758, "grad_norm": 2.12113356590271, "learning_rate": 4.462227443888227e-06, "loss": 0.9548, "step": 2505 }, { "epoch": 1.3182535507627564, "grad_norm": 2.0414116382598877, "learning_rate": 4.461796924733084e-06, "loss": 0.9442, "step": 2506 }, { "epoch": 1.318779589689637, "grad_norm": 2.12378191947937, "learning_rate": 4.4613662541047305e-06, "loss": 0.9504, "step": 2507 }, { "epoch": 1.3193056286165177, "grad_norm": 2.197679281234741, "learning_rate": 4.4609354320364204e-06, "loss": 1.0346, "step": 2508 }, { "epoch": 1.319831667543398, "grad_norm": 2.0174758434295654, "learning_rate": 4.4605044585614174e-06, "loss": 0.9338, "step": 2509 }, { "epoch": 1.3203577064702787, "grad_norm": 2.1691973209381104, "learning_rate": 4.460073333712997e-06, "loss": 0.9806, "step": 2510 }, { "epoch": 1.3208837453971594, "grad_norm": 2.1645984649658203, "learning_rate": 4.459642057524448e-06, "loss": 1.0004, "step": 2511 }, { "epoch": 1.32140978432404, "grad_norm": 2.0355732440948486, "learning_rate": 4.459210630029068e-06, "loss": 0.9044, "step": 2512 }, { "epoch": 1.3219358232509206, "grad_norm": 2.1421124935150146, "learning_rate": 4.45877905126017e-06, "loss": 0.9928, "step": 2513 }, { "epoch": 1.3224618621778013, "grad_norm": 1.9635089635849, "learning_rate": 4.458347321251076e-06, "loss": 0.893, "step": 2514 }, { "epoch": 1.3229879011046817, "grad_norm": 2.012868881225586, "learning_rate": 4.457915440035121e-06, "loss": 0.9701, "step": 2515 }, { "epoch": 1.3235139400315623, "grad_norm": 2.090686559677124, "learning_rate": 4.457483407645651e-06, "loss": 0.9606, "step": 2516 }, { "epoch": 1.324039978958443, "grad_norm": 2.0503368377685547, "learning_rate": 4.457051224116023e-06, "loss": 0.9566, "step": 2517 }, { "epoch": 1.3245660178853236, "grad_norm": 1.9777687788009644, "learning_rate": 4.456618889479608e-06, "loss": 0.986, "step": 2518 }, { "epoch": 1.325092056812204, "grad_norm": 2.0104753971099854, "learning_rate": 4.456186403769786e-06, "loss": 0.9983, "step": 2519 }, { "epoch": 1.3256180957390846, "grad_norm": 2.1226019859313965, "learning_rate": 4.4557537670199505e-06, "loss": 0.9709, "step": 2520 }, { "epoch": 1.3261441346659653, "grad_norm": 2.061079263687134, "learning_rate": 4.4553209792635055e-06, "loss": 0.9499, "step": 2521 }, { "epoch": 1.326670173592846, "grad_norm": 2.1253788471221924, "learning_rate": 4.454888040533867e-06, "loss": 0.9375, "step": 2522 }, { "epoch": 1.3271962125197265, "grad_norm": 2.1977858543395996, "learning_rate": 4.454454950864464e-06, "loss": 0.8861, "step": 2523 }, { "epoch": 1.3277222514466072, "grad_norm": 2.219135284423828, "learning_rate": 4.454021710288735e-06, "loss": 0.9265, "step": 2524 }, { "epoch": 1.3282482903734876, "grad_norm": 2.074660539627075, "learning_rate": 4.4535883188401315e-06, "loss": 0.9912, "step": 2525 }, { "epoch": 1.3287743293003682, "grad_norm": 2.1656036376953125, "learning_rate": 4.453154776552117e-06, "loss": 0.9446, "step": 2526 }, { "epoch": 1.3293003682272488, "grad_norm": 1.962275505065918, "learning_rate": 4.452721083458164e-06, "loss": 0.9025, "step": 2527 }, { "epoch": 1.3298264071541295, "grad_norm": 2.0680644512176514, "learning_rate": 4.45228723959176e-06, "loss": 0.9606, "step": 2528 }, { "epoch": 1.3303524460810099, "grad_norm": 2.117361068725586, "learning_rate": 4.451853244986403e-06, "loss": 0.9622, "step": 2529 }, { "epoch": 1.3308784850078905, "grad_norm": 2.1107771396636963, "learning_rate": 4.4514190996756005e-06, "loss": 0.9792, "step": 2530 }, { "epoch": 1.3314045239347712, "grad_norm": 2.094346761703491, "learning_rate": 4.450984803692876e-06, "loss": 0.9317, "step": 2531 }, { "epoch": 1.3319305628616518, "grad_norm": 2.103429079055786, "learning_rate": 4.45055035707176e-06, "loss": 0.9472, "step": 2532 }, { "epoch": 1.3324566017885324, "grad_norm": 2.033013105392456, "learning_rate": 4.450115759845799e-06, "loss": 0.9561, "step": 2533 }, { "epoch": 1.332982640715413, "grad_norm": 2.102220058441162, "learning_rate": 4.449681012048547e-06, "loss": 0.8936, "step": 2534 }, { "epoch": 1.3335086796422935, "grad_norm": 1.9735665321350098, "learning_rate": 4.4492461137135715e-06, "loss": 0.9421, "step": 2535 }, { "epoch": 1.334034718569174, "grad_norm": 2.1749532222747803, "learning_rate": 4.448811064874453e-06, "loss": 0.9439, "step": 2536 }, { "epoch": 1.3345607574960547, "grad_norm": 1.955908179283142, "learning_rate": 4.448375865564781e-06, "loss": 0.9124, "step": 2537 }, { "epoch": 1.3350867964229354, "grad_norm": 2.041921377182007, "learning_rate": 4.447940515818158e-06, "loss": 0.9186, "step": 2538 }, { "epoch": 1.3356128353498158, "grad_norm": 1.945713996887207, "learning_rate": 4.447505015668199e-06, "loss": 0.9246, "step": 2539 }, { "epoch": 1.3361388742766964, "grad_norm": 2.085697889328003, "learning_rate": 4.447069365148529e-06, "loss": 0.9051, "step": 2540 }, { "epoch": 1.336664913203577, "grad_norm": 2.116046667098999, "learning_rate": 4.446633564292785e-06, "loss": 0.9515, "step": 2541 }, { "epoch": 1.3371909521304577, "grad_norm": 2.1968820095062256, "learning_rate": 4.446197613134616e-06, "loss": 0.9967, "step": 2542 }, { "epoch": 1.3377169910573383, "grad_norm": 2.061464786529541, "learning_rate": 4.445761511707682e-06, "loss": 0.9259, "step": 2543 }, { "epoch": 1.338243029984219, "grad_norm": 1.9848337173461914, "learning_rate": 4.445325260045656e-06, "loss": 0.9775, "step": 2544 }, { "epoch": 1.3387690689110994, "grad_norm": 2.1794087886810303, "learning_rate": 4.444888858182222e-06, "loss": 0.9738, "step": 2545 }, { "epoch": 1.33929510783798, "grad_norm": 2.2552716732025146, "learning_rate": 4.4444523061510725e-06, "loss": 0.9776, "step": 2546 }, { "epoch": 1.3398211467648606, "grad_norm": 2.059893846511841, "learning_rate": 4.444015603985917e-06, "loss": 0.9579, "step": 2547 }, { "epoch": 1.3403471856917413, "grad_norm": 2.019697666168213, "learning_rate": 4.443578751720472e-06, "loss": 0.9773, "step": 2548 }, { "epoch": 1.3408732246186217, "grad_norm": 2.0542571544647217, "learning_rate": 4.44314174938847e-06, "loss": 0.9466, "step": 2549 }, { "epoch": 1.3413992635455023, "grad_norm": 2.059788942337036, "learning_rate": 4.44270459702365e-06, "loss": 0.9203, "step": 2550 }, { "epoch": 1.341925302472383, "grad_norm": 2.0966968536376953, "learning_rate": 4.442267294659767e-06, "loss": 0.9394, "step": 2551 }, { "epoch": 1.3424513413992636, "grad_norm": 1.9987400770187378, "learning_rate": 4.441829842330585e-06, "loss": 0.9614, "step": 2552 }, { "epoch": 1.3429773803261442, "grad_norm": 2.062717914581299, "learning_rate": 4.44139224006988e-06, "loss": 0.9811, "step": 2553 }, { "epoch": 1.3435034192530249, "grad_norm": 2.2932512760162354, "learning_rate": 4.440954487911441e-06, "loss": 0.9232, "step": 2554 }, { "epoch": 1.3440294581799053, "grad_norm": 2.021737813949585, "learning_rate": 4.440516585889066e-06, "loss": 0.984, "step": 2555 }, { "epoch": 1.344555497106786, "grad_norm": 2.053077459335327, "learning_rate": 4.440078534036567e-06, "loss": 1.0011, "step": 2556 }, { "epoch": 1.3450815360336665, "grad_norm": 2.045336961746216, "learning_rate": 4.439640332387767e-06, "loss": 0.9304, "step": 2557 }, { "epoch": 1.345607574960547, "grad_norm": 2.1932125091552734, "learning_rate": 4.439201980976498e-06, "loss": 0.9421, "step": 2558 }, { "epoch": 1.3461336138874276, "grad_norm": 1.9581273794174194, "learning_rate": 4.438763479836609e-06, "loss": 0.9252, "step": 2559 }, { "epoch": 1.3466596528143082, "grad_norm": 1.9648422002792358, "learning_rate": 4.4383248290019555e-06, "loss": 0.932, "step": 2560 }, { "epoch": 1.3471856917411889, "grad_norm": 2.0493264198303223, "learning_rate": 4.437886028506406e-06, "loss": 0.9754, "step": 2561 }, { "epoch": 1.3477117306680695, "grad_norm": 2.2594149112701416, "learning_rate": 4.437447078383843e-06, "loss": 0.9004, "step": 2562 }, { "epoch": 1.3482377695949501, "grad_norm": 2.0411531925201416, "learning_rate": 4.437007978668156e-06, "loss": 0.9635, "step": 2563 }, { "epoch": 1.3487638085218308, "grad_norm": 2.0295286178588867, "learning_rate": 4.436568729393249e-06, "loss": 0.9224, "step": 2564 }, { "epoch": 1.3492898474487112, "grad_norm": 2.0805602073669434, "learning_rate": 4.436129330593038e-06, "loss": 0.9636, "step": 2565 }, { "epoch": 1.3498158863755918, "grad_norm": 2.0708534717559814, "learning_rate": 4.435689782301449e-06, "loss": 0.9452, "step": 2566 }, { "epoch": 1.3503419253024724, "grad_norm": 2.005274534225464, "learning_rate": 4.435250084552422e-06, "loss": 0.9832, "step": 2567 }, { "epoch": 1.3508679642293528, "grad_norm": 2.108915328979492, "learning_rate": 4.434810237379903e-06, "loss": 0.9682, "step": 2568 }, { "epoch": 1.3513940031562335, "grad_norm": 2.290133476257324, "learning_rate": 4.434370240817856e-06, "loss": 0.9577, "step": 2569 }, { "epoch": 1.3519200420831141, "grad_norm": 1.9996196031570435, "learning_rate": 4.433930094900253e-06, "loss": 0.9494, "step": 2570 }, { "epoch": 1.3524460810099947, "grad_norm": 2.068378210067749, "learning_rate": 4.4334897996610795e-06, "loss": 0.9632, "step": 2571 }, { "epoch": 1.3529721199368754, "grad_norm": 2.2263996601104736, "learning_rate": 4.4330493551343286e-06, "loss": 0.9812, "step": 2572 }, { "epoch": 1.353498158863756, "grad_norm": 2.0052454471588135, "learning_rate": 4.432608761354011e-06, "loss": 0.9448, "step": 2573 }, { "epoch": 1.3540241977906364, "grad_norm": 1.876429557800293, "learning_rate": 4.432168018354143e-06, "loss": 0.9026, "step": 2574 }, { "epoch": 1.354550236717517, "grad_norm": 2.1408214569091797, "learning_rate": 4.431727126168757e-06, "loss": 0.9356, "step": 2575 }, { "epoch": 1.3550762756443977, "grad_norm": 1.9882752895355225, "learning_rate": 4.431286084831892e-06, "loss": 0.9186, "step": 2576 }, { "epoch": 1.3556023145712783, "grad_norm": 2.128145456314087, "learning_rate": 4.430844894377605e-06, "loss": 0.9405, "step": 2577 }, { "epoch": 1.3561283534981587, "grad_norm": 2.059305429458618, "learning_rate": 4.4304035548399585e-06, "loss": 0.9209, "step": 2578 }, { "epoch": 1.3566543924250394, "grad_norm": 2.2779366970062256, "learning_rate": 4.429962066253029e-06, "loss": 1.0281, "step": 2579 }, { "epoch": 1.35718043135192, "grad_norm": 1.9767261743545532, "learning_rate": 4.429520428650907e-06, "loss": 0.9581, "step": 2580 }, { "epoch": 1.3577064702788006, "grad_norm": 2.1368567943573, "learning_rate": 4.42907864206769e-06, "loss": 0.952, "step": 2581 }, { "epoch": 1.3582325092056813, "grad_norm": 2.1179497241973877, "learning_rate": 4.428636706537488e-06, "loss": 0.9442, "step": 2582 }, { "epoch": 1.358758548132562, "grad_norm": 2.0669925212860107, "learning_rate": 4.428194622094425e-06, "loss": 0.9727, "step": 2583 }, { "epoch": 1.3592845870594423, "grad_norm": 2.081160068511963, "learning_rate": 4.427752388772635e-06, "loss": 0.9751, "step": 2584 }, { "epoch": 1.359810625986323, "grad_norm": 1.9884616136550903, "learning_rate": 4.427310006606264e-06, "loss": 0.9226, "step": 2585 }, { "epoch": 1.3603366649132036, "grad_norm": 2.130305528640747, "learning_rate": 4.426867475629466e-06, "loss": 0.9072, "step": 2586 }, { "epoch": 1.3608627038400842, "grad_norm": 2.2059152126312256, "learning_rate": 4.426424795876414e-06, "loss": 0.9469, "step": 2587 }, { "epoch": 1.3613887427669646, "grad_norm": 2.043848752975464, "learning_rate": 4.425981967381284e-06, "loss": 0.9374, "step": 2588 }, { "epoch": 1.3619147816938453, "grad_norm": 2.1981630325317383, "learning_rate": 4.4255389901782695e-06, "loss": 0.924, "step": 2589 }, { "epoch": 1.362440820620726, "grad_norm": 2.0194413661956787, "learning_rate": 4.425095864301573e-06, "loss": 0.9838, "step": 2590 }, { "epoch": 1.3629668595476065, "grad_norm": 2.1409754753112793, "learning_rate": 4.424652589785409e-06, "loss": 0.9054, "step": 2591 }, { "epoch": 1.3634928984744872, "grad_norm": 2.040001153945923, "learning_rate": 4.424209166664002e-06, "loss": 0.9419, "step": 2592 }, { "epoch": 1.3640189374013678, "grad_norm": 2.015530824661255, "learning_rate": 4.423765594971592e-06, "loss": 0.8718, "step": 2593 }, { "epoch": 1.3645449763282482, "grad_norm": 2.042180061340332, "learning_rate": 4.423321874742425e-06, "loss": 0.926, "step": 2594 }, { "epoch": 1.3650710152551289, "grad_norm": 2.10259747505188, "learning_rate": 4.422878006010764e-06, "loss": 0.9064, "step": 2595 }, { "epoch": 1.3655970541820095, "grad_norm": 1.9849088191986084, "learning_rate": 4.4224339888108785e-06, "loss": 0.9026, "step": 2596 }, { "epoch": 1.3661230931088901, "grad_norm": 2.0189146995544434, "learning_rate": 4.421989823177053e-06, "loss": 0.9334, "step": 2597 }, { "epoch": 1.3666491320357705, "grad_norm": 2.3741374015808105, "learning_rate": 4.421545509143581e-06, "loss": 0.9743, "step": 2598 }, { "epoch": 1.3671751709626512, "grad_norm": 2.226306438446045, "learning_rate": 4.42110104674477e-06, "loss": 0.9548, "step": 2599 }, { "epoch": 1.3677012098895318, "grad_norm": 2.2016761302948, "learning_rate": 4.420656436014937e-06, "loss": 0.9332, "step": 2600 }, { "epoch": 1.3682272488164124, "grad_norm": 2.193507671356201, "learning_rate": 4.420211676988412e-06, "loss": 0.9941, "step": 2601 }, { "epoch": 1.368753287743293, "grad_norm": 2.064352035522461, "learning_rate": 4.419766769699534e-06, "loss": 0.9172, "step": 2602 }, { "epoch": 1.3692793266701737, "grad_norm": 2.1775360107421875, "learning_rate": 4.419321714182654e-06, "loss": 0.9771, "step": 2603 }, { "epoch": 1.3698053655970541, "grad_norm": 2.454411745071411, "learning_rate": 4.4188765104721386e-06, "loss": 0.9139, "step": 2604 }, { "epoch": 1.3703314045239348, "grad_norm": 2.157207727432251, "learning_rate": 4.41843115860236e-06, "loss": 0.9697, "step": 2605 }, { "epoch": 1.3708574434508154, "grad_norm": 2.005450963973999, "learning_rate": 4.417985658607706e-06, "loss": 0.9366, "step": 2606 }, { "epoch": 1.371383482377696, "grad_norm": 1.9498809576034546, "learning_rate": 4.417540010522573e-06, "loss": 0.8898, "step": 2607 }, { "epoch": 1.3719095213045764, "grad_norm": 2.0493645668029785, "learning_rate": 4.4170942143813715e-06, "loss": 0.9417, "step": 2608 }, { "epoch": 1.372435560231457, "grad_norm": 2.289781093597412, "learning_rate": 4.416648270218521e-06, "loss": 0.9575, "step": 2609 }, { "epoch": 1.3729615991583377, "grad_norm": 2.0118207931518555, "learning_rate": 4.416202178068454e-06, "loss": 0.9282, "step": 2610 }, { "epoch": 1.3734876380852183, "grad_norm": 1.9697855710983276, "learning_rate": 4.415755937965615e-06, "loss": 0.9277, "step": 2611 }, { "epoch": 1.374013677012099, "grad_norm": 1.9677777290344238, "learning_rate": 4.415309549944456e-06, "loss": 0.9322, "step": 2612 }, { "epoch": 1.3745397159389796, "grad_norm": 2.0683321952819824, "learning_rate": 4.414863014039445e-06, "loss": 0.9361, "step": 2613 }, { "epoch": 1.37506575486586, "grad_norm": 2.1321773529052734, "learning_rate": 4.414416330285061e-06, "loss": 0.9093, "step": 2614 }, { "epoch": 1.3755917937927407, "grad_norm": 1.9600199460983276, "learning_rate": 4.413969498715791e-06, "loss": 0.9428, "step": 2615 }, { "epoch": 1.3761178327196213, "grad_norm": 1.8668115139007568, "learning_rate": 4.413522519366136e-06, "loss": 0.9093, "step": 2616 }, { "epoch": 1.3766438716465017, "grad_norm": 1.9672036170959473, "learning_rate": 4.413075392270608e-06, "loss": 0.9426, "step": 2617 }, { "epoch": 1.3771699105733823, "grad_norm": 1.9827178716659546, "learning_rate": 4.412628117463731e-06, "loss": 0.9616, "step": 2618 }, { "epoch": 1.377695949500263, "grad_norm": 2.099527597427368, "learning_rate": 4.4121806949800395e-06, "loss": 0.938, "step": 2619 }, { "epoch": 1.3782219884271436, "grad_norm": 2.0840420722961426, "learning_rate": 4.411733124854079e-06, "loss": 0.9304, "step": 2620 }, { "epoch": 1.3787480273540242, "grad_norm": 1.967368245124817, "learning_rate": 4.411285407120407e-06, "loss": 0.9662, "step": 2621 }, { "epoch": 1.3792740662809049, "grad_norm": 2.0552897453308105, "learning_rate": 4.410837541813594e-06, "loss": 0.929, "step": 2622 }, { "epoch": 1.3798001052077855, "grad_norm": 2.1046664714813232, "learning_rate": 4.410389528968218e-06, "loss": 0.9127, "step": 2623 }, { "epoch": 1.380326144134666, "grad_norm": 2.147519111633301, "learning_rate": 4.409941368618873e-06, "loss": 0.9379, "step": 2624 }, { "epoch": 1.3808521830615466, "grad_norm": 1.9555045366287231, "learning_rate": 4.409493060800161e-06, "loss": 0.9662, "step": 2625 }, { "epoch": 1.3813782219884272, "grad_norm": 2.1241092681884766, "learning_rate": 4.409044605546697e-06, "loss": 0.9444, "step": 2626 }, { "epoch": 1.3819042609153076, "grad_norm": 2.022423028945923, "learning_rate": 4.408596002893105e-06, "loss": 0.9405, "step": 2627 }, { "epoch": 1.3824302998421882, "grad_norm": 1.9746357202529907, "learning_rate": 4.4081472528740254e-06, "loss": 0.9266, "step": 2628 }, { "epoch": 1.3829563387690689, "grad_norm": 2.0098395347595215, "learning_rate": 4.4076983555241036e-06, "loss": 0.9917, "step": 2629 }, { "epoch": 1.3834823776959495, "grad_norm": 2.0168423652648926, "learning_rate": 4.4072493108780024e-06, "loss": 0.9402, "step": 2630 }, { "epoch": 1.3840084166228301, "grad_norm": 2.041032552719116, "learning_rate": 4.406800118970391e-06, "loss": 0.8898, "step": 2631 }, { "epoch": 1.3845344555497108, "grad_norm": 1.9849998950958252, "learning_rate": 4.406350779835955e-06, "loss": 0.9602, "step": 2632 }, { "epoch": 1.3850604944765912, "grad_norm": 2.139587640762329, "learning_rate": 4.405901293509384e-06, "loss": 0.9802, "step": 2633 }, { "epoch": 1.3855865334034718, "grad_norm": 2.1281473636627197, "learning_rate": 4.405451660025388e-06, "loss": 0.9788, "step": 2634 }, { "epoch": 1.3861125723303525, "grad_norm": 2.1347475051879883, "learning_rate": 4.405001879418682e-06, "loss": 0.9587, "step": 2635 }, { "epoch": 1.386638611257233, "grad_norm": 2.028740882873535, "learning_rate": 4.404551951723995e-06, "loss": 0.9376, "step": 2636 }, { "epoch": 1.3871646501841135, "grad_norm": 2.101991891860962, "learning_rate": 4.404101876976064e-06, "loss": 0.9696, "step": 2637 }, { "epoch": 1.3876906891109941, "grad_norm": 2.0458288192749023, "learning_rate": 4.403651655209643e-06, "loss": 1.0081, "step": 2638 }, { "epoch": 1.3882167280378748, "grad_norm": 1.973692536354065, "learning_rate": 4.403201286459493e-06, "loss": 0.8829, "step": 2639 }, { "epoch": 1.3887427669647554, "grad_norm": 2.2197206020355225, "learning_rate": 4.402750770760387e-06, "loss": 1.0196, "step": 2640 }, { "epoch": 1.389268805891636, "grad_norm": 2.0076587200164795, "learning_rate": 4.40230010814711e-06, "loss": 0.911, "step": 2641 }, { "epoch": 1.3897948448185167, "grad_norm": 2.0094308853149414, "learning_rate": 4.4018492986544615e-06, "loss": 0.9587, "step": 2642 }, { "epoch": 1.390320883745397, "grad_norm": 2.03900146484375, "learning_rate": 4.401398342317244e-06, "loss": 0.9515, "step": 2643 }, { "epoch": 1.3908469226722777, "grad_norm": 2.0408756732940674, "learning_rate": 4.400947239170281e-06, "loss": 0.9081, "step": 2644 }, { "epoch": 1.3913729615991584, "grad_norm": 2.020554304122925, "learning_rate": 4.4004959892484e-06, "loss": 0.9323, "step": 2645 }, { "epoch": 1.391899000526039, "grad_norm": 2.05672287940979, "learning_rate": 4.400044592586444e-06, "loss": 0.9031, "step": 2646 }, { "epoch": 1.3924250394529194, "grad_norm": 1.9206362962722778, "learning_rate": 4.399593049219265e-06, "loss": 0.8898, "step": 2647 }, { "epoch": 1.3929510783798, "grad_norm": 2.1542060375213623, "learning_rate": 4.39914135918173e-06, "loss": 0.9092, "step": 2648 }, { "epoch": 1.3934771173066807, "grad_norm": 2.160116195678711, "learning_rate": 4.39868952250871e-06, "loss": 0.9383, "step": 2649 }, { "epoch": 1.3940031562335613, "grad_norm": 2.011389970779419, "learning_rate": 4.398237539235096e-06, "loss": 0.9644, "step": 2650 }, { "epoch": 1.394529195160442, "grad_norm": 2.059589147567749, "learning_rate": 4.397785409395785e-06, "loss": 0.9255, "step": 2651 }, { "epoch": 1.3950552340873226, "grad_norm": 1.9272221326828003, "learning_rate": 4.3973331330256855e-06, "loss": 0.9214, "step": 2652 }, { "epoch": 1.395581273014203, "grad_norm": 1.9866399765014648, "learning_rate": 4.39688071015972e-06, "loss": 0.9349, "step": 2653 }, { "epoch": 1.3961073119410836, "grad_norm": 1.939637303352356, "learning_rate": 4.3964281408328205e-06, "loss": 0.9356, "step": 2654 }, { "epoch": 1.3966333508679643, "grad_norm": 2.0816657543182373, "learning_rate": 4.3959754250799295e-06, "loss": 0.937, "step": 2655 }, { "epoch": 1.3971593897948449, "grad_norm": 2.040522336959839, "learning_rate": 4.395522562936004e-06, "loss": 0.9206, "step": 2656 }, { "epoch": 1.3976854287217253, "grad_norm": 2.075200080871582, "learning_rate": 4.395069554436007e-06, "loss": 0.9118, "step": 2657 }, { "epoch": 1.398211467648606, "grad_norm": 1.8599414825439453, "learning_rate": 4.394616399614918e-06, "loss": 0.9123, "step": 2658 }, { "epoch": 1.3987375065754866, "grad_norm": 1.982853889465332, "learning_rate": 4.394163098507725e-06, "loss": 0.9036, "step": 2659 }, { "epoch": 1.3992635455023672, "grad_norm": 1.9821887016296387, "learning_rate": 4.39370965114943e-06, "loss": 0.979, "step": 2660 }, { "epoch": 1.3997895844292478, "grad_norm": 2.0990328788757324, "learning_rate": 4.393256057575041e-06, "loss": 0.9789, "step": 2661 }, { "epoch": 1.4003156233561285, "grad_norm": 2.0919525623321533, "learning_rate": 4.392802317819582e-06, "loss": 0.9322, "step": 2662 }, { "epoch": 1.4008416622830089, "grad_norm": 2.128913640975952, "learning_rate": 4.392348431918088e-06, "loss": 0.9616, "step": 2663 }, { "epoch": 1.4013677012098895, "grad_norm": 2.1351349353790283, "learning_rate": 4.391894399905604e-06, "loss": 0.9615, "step": 2664 }, { "epoch": 1.4018937401367702, "grad_norm": 2.0462634563446045, "learning_rate": 4.391440221817186e-06, "loss": 0.9211, "step": 2665 }, { "epoch": 1.4024197790636508, "grad_norm": 2.055590867996216, "learning_rate": 4.390985897687902e-06, "loss": 0.9131, "step": 2666 }, { "epoch": 1.4029458179905312, "grad_norm": 2.18259596824646, "learning_rate": 4.390531427552829e-06, "loss": 0.95, "step": 2667 }, { "epoch": 1.4034718569174118, "grad_norm": 2.030566453933716, "learning_rate": 4.39007681144706e-06, "loss": 0.9661, "step": 2668 }, { "epoch": 1.4039978958442925, "grad_norm": 2.1099185943603516, "learning_rate": 4.389622049405696e-06, "loss": 0.9214, "step": 2669 }, { "epoch": 1.404523934771173, "grad_norm": 2.0410983562469482, "learning_rate": 4.389167141463849e-06, "loss": 0.951, "step": 2670 }, { "epoch": 1.4050499736980537, "grad_norm": 2.051135778427124, "learning_rate": 4.388712087656644e-06, "loss": 0.9663, "step": 2671 }, { "epoch": 1.4055760126249344, "grad_norm": 1.989233374595642, "learning_rate": 4.388256888019215e-06, "loss": 0.9677, "step": 2672 }, { "epoch": 1.4061020515518148, "grad_norm": 2.050755262374878, "learning_rate": 4.3878015425867105e-06, "loss": 0.8911, "step": 2673 }, { "epoch": 1.4066280904786954, "grad_norm": 2.1822195053100586, "learning_rate": 4.387346051394287e-06, "loss": 0.922, "step": 2674 }, { "epoch": 1.407154129405576, "grad_norm": 1.9840717315673828, "learning_rate": 4.386890414477114e-06, "loss": 0.9505, "step": 2675 }, { "epoch": 1.4076801683324567, "grad_norm": 2.111574172973633, "learning_rate": 4.386434631870372e-06, "loss": 0.9313, "step": 2676 }, { "epoch": 1.408206207259337, "grad_norm": 2.0208487510681152, "learning_rate": 4.3859787036092525e-06, "loss": 0.9173, "step": 2677 }, { "epoch": 1.4087322461862177, "grad_norm": 2.087120771408081, "learning_rate": 4.3855226297289595e-06, "loss": 0.9596, "step": 2678 }, { "epoch": 1.4092582851130984, "grad_norm": 2.1067450046539307, "learning_rate": 4.385066410264706e-06, "loss": 0.9707, "step": 2679 }, { "epoch": 1.409784324039979, "grad_norm": 1.8783036470413208, "learning_rate": 4.3846100452517156e-06, "loss": 0.9167, "step": 2680 }, { "epoch": 1.4103103629668596, "grad_norm": 2.2781338691711426, "learning_rate": 4.3841535347252284e-06, "loss": 0.8218, "step": 2681 }, { "epoch": 1.4108364018937403, "grad_norm": 1.961376428604126, "learning_rate": 4.38369687872049e-06, "loss": 0.9083, "step": 2682 }, { "epoch": 1.4113624408206207, "grad_norm": 1.8851439952850342, "learning_rate": 4.383240077272761e-06, "loss": 0.932, "step": 2683 }, { "epoch": 1.4118884797475013, "grad_norm": 2.1675922870635986, "learning_rate": 4.3827831304173105e-06, "loss": 0.9404, "step": 2684 }, { "epoch": 1.412414518674382, "grad_norm": 2.210278272628784, "learning_rate": 4.3823260381894205e-06, "loss": 0.9639, "step": 2685 }, { "epoch": 1.4129405576012624, "grad_norm": 2.1129822731018066, "learning_rate": 4.381868800624383e-06, "loss": 0.9686, "step": 2686 }, { "epoch": 1.413466596528143, "grad_norm": 2.0460870265960693, "learning_rate": 4.381411417757503e-06, "loss": 0.9283, "step": 2687 }, { "epoch": 1.4139926354550236, "grad_norm": 1.9752156734466553, "learning_rate": 4.3809538896240956e-06, "loss": 0.9529, "step": 2688 }, { "epoch": 1.4145186743819043, "grad_norm": 1.92936110496521, "learning_rate": 4.3804962162594865e-06, "loss": 0.9012, "step": 2689 }, { "epoch": 1.415044713308785, "grad_norm": 1.996164083480835, "learning_rate": 4.380038397699015e-06, "loss": 0.9451, "step": 2690 }, { "epoch": 1.4155707522356655, "grad_norm": 2.04750394821167, "learning_rate": 4.379580433978027e-06, "loss": 0.8968, "step": 2691 }, { "epoch": 1.4160967911625462, "grad_norm": 2.072307586669922, "learning_rate": 4.379122325131884e-06, "loss": 0.9479, "step": 2692 }, { "epoch": 1.4166228300894266, "grad_norm": 2.035949468612671, "learning_rate": 4.378664071195959e-06, "loss": 0.9246, "step": 2693 }, { "epoch": 1.4171488690163072, "grad_norm": 2.0732228755950928, "learning_rate": 4.3782056722056334e-06, "loss": 0.935, "step": 2694 }, { "epoch": 1.4176749079431878, "grad_norm": 1.9315673112869263, "learning_rate": 4.377747128196299e-06, "loss": 0.9209, "step": 2695 }, { "epoch": 1.4182009468700683, "grad_norm": 2.1663613319396973, "learning_rate": 4.377288439203362e-06, "loss": 0.9496, "step": 2696 }, { "epoch": 1.418726985796949, "grad_norm": 1.9999171495437622, "learning_rate": 4.37682960526224e-06, "loss": 0.9614, "step": 2697 }, { "epoch": 1.4192530247238295, "grad_norm": 2.052579879760742, "learning_rate": 4.376370626408357e-06, "loss": 0.9347, "step": 2698 }, { "epoch": 1.4197790636507102, "grad_norm": 2.0660557746887207, "learning_rate": 4.375911502677154e-06, "loss": 0.922, "step": 2699 }, { "epoch": 1.4203051025775908, "grad_norm": 1.9994977712631226, "learning_rate": 4.37545223410408e-06, "loss": 0.942, "step": 2700 }, { "epoch": 1.4208311415044714, "grad_norm": 2.145674228668213, "learning_rate": 4.3749928207245954e-06, "loss": 0.9308, "step": 2701 }, { "epoch": 1.4213571804313518, "grad_norm": 2.1065597534179688, "learning_rate": 4.374533262574172e-06, "loss": 0.9852, "step": 2702 }, { "epoch": 1.4218832193582325, "grad_norm": 2.435072422027588, "learning_rate": 4.374073559688294e-06, "loss": 0.9525, "step": 2703 }, { "epoch": 1.422409258285113, "grad_norm": 2.01636004447937, "learning_rate": 4.373613712102455e-06, "loss": 0.944, "step": 2704 }, { "epoch": 1.4229352972119937, "grad_norm": 2.146397829055786, "learning_rate": 4.373153719852161e-06, "loss": 0.9631, "step": 2705 }, { "epoch": 1.4234613361388742, "grad_norm": 1.9630359411239624, "learning_rate": 4.372693582972928e-06, "loss": 0.9303, "step": 2706 }, { "epoch": 1.4239873750657548, "grad_norm": 1.8797608613967896, "learning_rate": 4.372233301500284e-06, "loss": 0.9177, "step": 2707 }, { "epoch": 1.4245134139926354, "grad_norm": 1.992043137550354, "learning_rate": 4.371772875469767e-06, "loss": 0.9622, "step": 2708 }, { "epoch": 1.425039452919516, "grad_norm": 1.9467499256134033, "learning_rate": 4.371312304916929e-06, "loss": 0.9322, "step": 2709 }, { "epoch": 1.4255654918463967, "grad_norm": 2.274771213531494, "learning_rate": 4.370851589877332e-06, "loss": 0.9428, "step": 2710 }, { "epoch": 1.4260915307732773, "grad_norm": 2.126898765563965, "learning_rate": 4.370390730386546e-06, "loss": 0.9545, "step": 2711 }, { "epoch": 1.4266175697001577, "grad_norm": 2.096545457839966, "learning_rate": 4.369929726480156e-06, "loss": 0.9079, "step": 2712 }, { "epoch": 1.4271436086270384, "grad_norm": 2.1355035305023193, "learning_rate": 4.369468578193755e-06, "loss": 0.9582, "step": 2713 }, { "epoch": 1.427669647553919, "grad_norm": 2.243448257446289, "learning_rate": 4.369007285562952e-06, "loss": 0.9886, "step": 2714 }, { "epoch": 1.4281956864807996, "grad_norm": 2.0589983463287354, "learning_rate": 4.368545848623362e-06, "loss": 0.9882, "step": 2715 }, { "epoch": 1.42872172540768, "grad_norm": 2.072906732559204, "learning_rate": 4.368084267410614e-06, "loss": 0.9577, "step": 2716 }, { "epoch": 1.4292477643345607, "grad_norm": 2.1737213134765625, "learning_rate": 4.367622541960347e-06, "loss": 0.9759, "step": 2717 }, { "epoch": 1.4297738032614413, "grad_norm": 2.2150609493255615, "learning_rate": 4.367160672308211e-06, "loss": 0.9581, "step": 2718 }, { "epoch": 1.430299842188322, "grad_norm": 2.2187082767486572, "learning_rate": 4.366698658489869e-06, "loss": 1.0004, "step": 2719 }, { "epoch": 1.4308258811152026, "grad_norm": 2.127653121948242, "learning_rate": 4.366236500540992e-06, "loss": 0.9436, "step": 2720 }, { "epoch": 1.4313519200420832, "grad_norm": 2.180147171020508, "learning_rate": 4.365774198497266e-06, "loss": 0.9978, "step": 2721 }, { "epoch": 1.4318779589689636, "grad_norm": 1.9998571872711182, "learning_rate": 4.365311752394384e-06, "loss": 0.9294, "step": 2722 }, { "epoch": 1.4324039978958443, "grad_norm": 1.970004916191101, "learning_rate": 4.364849162268054e-06, "loss": 0.9654, "step": 2723 }, { "epoch": 1.432930036822725, "grad_norm": 2.1497817039489746, "learning_rate": 4.364386428153992e-06, "loss": 0.9413, "step": 2724 }, { "epoch": 1.4334560757496055, "grad_norm": 1.9739991426467896, "learning_rate": 4.363923550087926e-06, "loss": 0.9488, "step": 2725 }, { "epoch": 1.433982114676486, "grad_norm": 1.930517315864563, "learning_rate": 4.363460528105598e-06, "loss": 0.9284, "step": 2726 }, { "epoch": 1.4345081536033666, "grad_norm": 2.066530227661133, "learning_rate": 4.3629973622427545e-06, "loss": 0.965, "step": 2727 }, { "epoch": 1.4350341925302472, "grad_norm": 2.2125234603881836, "learning_rate": 4.362534052535161e-06, "loss": 1.0175, "step": 2728 }, { "epoch": 1.4355602314571279, "grad_norm": 2.2930169105529785, "learning_rate": 4.362070599018587e-06, "loss": 0.9545, "step": 2729 }, { "epoch": 1.4360862703840085, "grad_norm": 1.9766452312469482, "learning_rate": 4.36160700172882e-06, "loss": 0.9357, "step": 2730 }, { "epoch": 1.4366123093108891, "grad_norm": 2.060267210006714, "learning_rate": 4.3611432607016536e-06, "loss": 0.9723, "step": 2731 }, { "epoch": 1.4371383482377695, "grad_norm": 2.0135252475738525, "learning_rate": 4.360679375972893e-06, "loss": 0.9515, "step": 2732 }, { "epoch": 1.4376643871646502, "grad_norm": 1.9998459815979004, "learning_rate": 4.360215347578355e-06, "loss": 0.8722, "step": 2733 }, { "epoch": 1.4381904260915308, "grad_norm": 2.0200717449188232, "learning_rate": 4.3597511755538695e-06, "loss": 0.9758, "step": 2734 }, { "epoch": 1.4387164650184114, "grad_norm": 2.147653341293335, "learning_rate": 4.359286859935276e-06, "loss": 0.964, "step": 2735 }, { "epoch": 1.4392425039452919, "grad_norm": 1.997365951538086, "learning_rate": 4.358822400758424e-06, "loss": 0.9303, "step": 2736 }, { "epoch": 1.4397685428721725, "grad_norm": 2.0461249351501465, "learning_rate": 4.358357798059175e-06, "loss": 0.9045, "step": 2737 }, { "epoch": 1.4402945817990531, "grad_norm": 2.1255886554718018, "learning_rate": 4.357893051873402e-06, "loss": 0.9565, "step": 2738 }, { "epoch": 1.4408206207259338, "grad_norm": 2.0816283226013184, "learning_rate": 4.357428162236988e-06, "loss": 0.9459, "step": 2739 }, { "epoch": 1.4413466596528144, "grad_norm": 2.100107192993164, "learning_rate": 4.35696312918583e-06, "loss": 0.962, "step": 2740 }, { "epoch": 1.441872698579695, "grad_norm": 2.2955291271209717, "learning_rate": 4.356497952755832e-06, "loss": 1.0057, "step": 2741 }, { "epoch": 1.4423987375065754, "grad_norm": 2.0716357231140137, "learning_rate": 4.3560326329829115e-06, "loss": 0.9997, "step": 2742 }, { "epoch": 1.442924776433456, "grad_norm": 1.953810453414917, "learning_rate": 4.3555671699029965e-06, "loss": 0.9002, "step": 2743 }, { "epoch": 1.4434508153603367, "grad_norm": 2.075598955154419, "learning_rate": 4.355101563552026e-06, "loss": 1.0174, "step": 2744 }, { "epoch": 1.4439768542872171, "grad_norm": 2.031865119934082, "learning_rate": 4.35463581396595e-06, "loss": 0.9218, "step": 2745 }, { "epoch": 1.4445028932140977, "grad_norm": 2.2142674922943115, "learning_rate": 4.354169921180729e-06, "loss": 0.9674, "step": 2746 }, { "epoch": 1.4450289321409784, "grad_norm": 1.9982706308364868, "learning_rate": 4.353703885232337e-06, "loss": 0.9983, "step": 2747 }, { "epoch": 1.445554971067859, "grad_norm": 2.209235429763794, "learning_rate": 4.3532377061567564e-06, "loss": 0.95, "step": 2748 }, { "epoch": 1.4460810099947397, "grad_norm": 2.1034419536590576, "learning_rate": 4.352771383989982e-06, "loss": 0.9972, "step": 2749 }, { "epoch": 1.4466070489216203, "grad_norm": 2.0965964794158936, "learning_rate": 4.3523049187680176e-06, "loss": 0.9163, "step": 2750 }, { "epoch": 1.447133087848501, "grad_norm": 2.135223627090454, "learning_rate": 4.351838310526882e-06, "loss": 0.921, "step": 2751 }, { "epoch": 1.4476591267753813, "grad_norm": 2.009145498275757, "learning_rate": 4.351371559302601e-06, "loss": 0.9618, "step": 2752 }, { "epoch": 1.448185165702262, "grad_norm": 2.0639655590057373, "learning_rate": 4.350904665131214e-06, "loss": 0.9415, "step": 2753 }, { "epoch": 1.4487112046291426, "grad_norm": 2.023287773132324, "learning_rate": 4.35043762804877e-06, "loss": 0.934, "step": 2754 }, { "epoch": 1.449237243556023, "grad_norm": 2.6245598793029785, "learning_rate": 4.349970448091331e-06, "loss": 0.98, "step": 2755 }, { "epoch": 1.4497632824829036, "grad_norm": 1.9980888366699219, "learning_rate": 4.349503125294966e-06, "loss": 0.926, "step": 2756 }, { "epoch": 1.4502893214097843, "grad_norm": 2.0972256660461426, "learning_rate": 4.3490356596957606e-06, "loss": 0.9526, "step": 2757 }, { "epoch": 1.450815360336665, "grad_norm": 1.9339475631713867, "learning_rate": 4.348568051329807e-06, "loss": 0.946, "step": 2758 }, { "epoch": 1.4513413992635456, "grad_norm": 2.0820112228393555, "learning_rate": 4.348100300233209e-06, "loss": 0.9795, "step": 2759 }, { "epoch": 1.4518674381904262, "grad_norm": 1.9987796545028687, "learning_rate": 4.347632406442085e-06, "loss": 0.9559, "step": 2760 }, { "epoch": 1.4523934771173068, "grad_norm": 2.133943557739258, "learning_rate": 4.3471643699925594e-06, "loss": 0.9414, "step": 2761 }, { "epoch": 1.4529195160441872, "grad_norm": 1.9641112089157104, "learning_rate": 4.346696190920771e-06, "loss": 0.8977, "step": 2762 }, { "epoch": 1.4534455549710679, "grad_norm": 1.9633978605270386, "learning_rate": 4.3462278692628675e-06, "loss": 0.951, "step": 2763 }, { "epoch": 1.4539715938979485, "grad_norm": 2.073227643966675, "learning_rate": 4.34575940505501e-06, "loss": 0.9436, "step": 2764 }, { "epoch": 1.454497632824829, "grad_norm": 2.080355167388916, "learning_rate": 4.345290798333369e-06, "loss": 0.9302, "step": 2765 }, { "epoch": 1.4550236717517095, "grad_norm": 2.071218967437744, "learning_rate": 4.344822049134126e-06, "loss": 0.979, "step": 2766 }, { "epoch": 1.4555497106785902, "grad_norm": 1.9839423894882202, "learning_rate": 4.344353157493475e-06, "loss": 0.9477, "step": 2767 }, { "epoch": 1.4560757496054708, "grad_norm": 1.9391570091247559, "learning_rate": 4.343884123447618e-06, "loss": 0.9042, "step": 2768 }, { "epoch": 1.4566017885323514, "grad_norm": 2.070699453353882, "learning_rate": 4.343414947032771e-06, "loss": 0.8974, "step": 2769 }, { "epoch": 1.457127827459232, "grad_norm": 1.9143608808517456, "learning_rate": 4.342945628285159e-06, "loss": 1.0173, "step": 2770 }, { "epoch": 1.4576538663861125, "grad_norm": 2.040562868118286, "learning_rate": 4.342476167241019e-06, "loss": 0.9201, "step": 2771 }, { "epoch": 1.4581799053129931, "grad_norm": 2.0661396980285645, "learning_rate": 4.342006563936599e-06, "loss": 0.9206, "step": 2772 }, { "epoch": 1.4587059442398738, "grad_norm": 2.0306687355041504, "learning_rate": 4.341536818408158e-06, "loss": 0.9346, "step": 2773 }, { "epoch": 1.4592319831667544, "grad_norm": 2.020789384841919, "learning_rate": 4.3410669306919666e-06, "loss": 0.9565, "step": 2774 }, { "epoch": 1.4597580220936348, "grad_norm": 2.044773817062378, "learning_rate": 4.340596900824303e-06, "loss": 0.9584, "step": 2775 }, { "epoch": 1.4602840610205154, "grad_norm": 2.112931728363037, "learning_rate": 4.340126728841461e-06, "loss": 0.9221, "step": 2776 }, { "epoch": 1.460810099947396, "grad_norm": 1.990900993347168, "learning_rate": 4.339656414779742e-06, "loss": 0.9116, "step": 2777 }, { "epoch": 1.4613361388742767, "grad_norm": 2.0449421405792236, "learning_rate": 4.33918595867546e-06, "loss": 0.9581, "step": 2778 }, { "epoch": 1.4618621778011573, "grad_norm": 1.8929235935211182, "learning_rate": 4.33871536056494e-06, "loss": 0.9274, "step": 2779 }, { "epoch": 1.462388216728038, "grad_norm": 2.001241445541382, "learning_rate": 4.338244620484517e-06, "loss": 0.92, "step": 2780 }, { "epoch": 1.4629142556549184, "grad_norm": 1.966840147972107, "learning_rate": 4.337773738470539e-06, "loss": 0.9073, "step": 2781 }, { "epoch": 1.463440294581799, "grad_norm": 2.086106777191162, "learning_rate": 4.337302714559361e-06, "loss": 0.8889, "step": 2782 }, { "epoch": 1.4639663335086797, "grad_norm": 1.8999865055084229, "learning_rate": 4.336831548787354e-06, "loss": 0.9005, "step": 2783 }, { "epoch": 1.4644923724355603, "grad_norm": 2.0190553665161133, "learning_rate": 4.336360241190896e-06, "loss": 0.9016, "step": 2784 }, { "epoch": 1.4650184113624407, "grad_norm": 2.081059694290161, "learning_rate": 4.335888791806377e-06, "loss": 0.9042, "step": 2785 }, { "epoch": 1.4655444502893213, "grad_norm": 2.0061800479888916, "learning_rate": 4.3354172006701985e-06, "loss": 0.9404, "step": 2786 }, { "epoch": 1.466070489216202, "grad_norm": 2.066344976425171, "learning_rate": 4.334945467818774e-06, "loss": 1.0027, "step": 2787 }, { "epoch": 1.4665965281430826, "grad_norm": 2.0009186267852783, "learning_rate": 4.3344735932885265e-06, "loss": 0.953, "step": 2788 }, { "epoch": 1.4671225670699632, "grad_norm": 2.0518672466278076, "learning_rate": 4.334001577115888e-06, "loss": 0.9466, "step": 2789 }, { "epoch": 1.4676486059968439, "grad_norm": 2.108964204788208, "learning_rate": 4.333529419337306e-06, "loss": 0.9264, "step": 2790 }, { "epoch": 1.4681746449237243, "grad_norm": 2.1517839431762695, "learning_rate": 4.333057119989235e-06, "loss": 0.9676, "step": 2791 }, { "epoch": 1.468700683850605, "grad_norm": 2.0158376693725586, "learning_rate": 4.332584679108141e-06, "loss": 0.9285, "step": 2792 }, { "epoch": 1.4692267227774856, "grad_norm": 1.9635580778121948, "learning_rate": 4.332112096730505e-06, "loss": 0.9108, "step": 2793 }, { "epoch": 1.4697527617043662, "grad_norm": 4.2303948402404785, "learning_rate": 4.3316393728928145e-06, "loss": 0.9461, "step": 2794 }, { "epoch": 1.4702788006312466, "grad_norm": 2.1729414463043213, "learning_rate": 4.331166507631567e-06, "loss": 0.9954, "step": 2795 }, { "epoch": 1.4708048395581272, "grad_norm": 2.0636355876922607, "learning_rate": 4.330693500983275e-06, "loss": 0.9424, "step": 2796 }, { "epoch": 1.4713308784850079, "grad_norm": 2.0802760124206543, "learning_rate": 4.330220352984461e-06, "loss": 0.9575, "step": 2797 }, { "epoch": 1.4718569174118885, "grad_norm": 1.989311695098877, "learning_rate": 4.329747063671656e-06, "loss": 0.9357, "step": 2798 }, { "epoch": 1.4723829563387691, "grad_norm": 1.855409860610962, "learning_rate": 4.329273633081403e-06, "loss": 0.8957, "step": 2799 }, { "epoch": 1.4729089952656498, "grad_norm": 2.1010360717773438, "learning_rate": 4.328800061250258e-06, "loss": 0.9646, "step": 2800 }, { "epoch": 1.4734350341925302, "grad_norm": 2.0032832622528076, "learning_rate": 4.328326348214784e-06, "loss": 0.8992, "step": 2801 }, { "epoch": 1.4739610731194108, "grad_norm": 2.0246925354003906, "learning_rate": 4.327852494011559e-06, "loss": 0.964, "step": 2802 }, { "epoch": 1.4744871120462915, "grad_norm": 2.136380434036255, "learning_rate": 4.327378498677169e-06, "loss": 0.9054, "step": 2803 }, { "epoch": 1.475013150973172, "grad_norm": 2.098071813583374, "learning_rate": 4.326904362248212e-06, "loss": 1.0004, "step": 2804 }, { "epoch": 1.4755391899000525, "grad_norm": 1.933727741241455, "learning_rate": 4.326430084761296e-06, "loss": 0.9402, "step": 2805 }, { "epoch": 1.4760652288269331, "grad_norm": 1.9036450386047363, "learning_rate": 4.325955666253043e-06, "loss": 0.9109, "step": 2806 }, { "epoch": 1.4765912677538138, "grad_norm": 2.2040369510650635, "learning_rate": 4.325481106760081e-06, "loss": 0.9325, "step": 2807 }, { "epoch": 1.4771173066806944, "grad_norm": 2.009504795074463, "learning_rate": 4.325006406319053e-06, "loss": 0.9026, "step": 2808 }, { "epoch": 1.477643345607575, "grad_norm": 2.3199565410614014, "learning_rate": 4.324531564966611e-06, "loss": 0.9873, "step": 2809 }, { "epoch": 1.4781693845344557, "grad_norm": 2.0439627170562744, "learning_rate": 4.3240565827394175e-06, "loss": 0.9246, "step": 2810 }, { "epoch": 1.478695423461336, "grad_norm": 2.013406753540039, "learning_rate": 4.323581459674147e-06, "loss": 0.9511, "step": 2811 }, { "epoch": 1.4792214623882167, "grad_norm": 1.95964515209198, "learning_rate": 4.323106195807484e-06, "loss": 0.9417, "step": 2812 }, { "epoch": 1.4797475013150974, "grad_norm": 2.0100247859954834, "learning_rate": 4.322630791176125e-06, "loss": 0.9274, "step": 2813 }, { "epoch": 1.4802735402419778, "grad_norm": 2.192779302597046, "learning_rate": 4.322155245816777e-06, "loss": 0.9878, "step": 2814 }, { "epoch": 1.4807995791688584, "grad_norm": 2.030768871307373, "learning_rate": 4.321679559766156e-06, "loss": 0.9025, "step": 2815 }, { "epoch": 1.481325618095739, "grad_norm": 1.9836081266403198, "learning_rate": 4.321203733060993e-06, "loss": 0.949, "step": 2816 }, { "epoch": 1.4818516570226197, "grad_norm": 2.0333588123321533, "learning_rate": 4.3207277657380255e-06, "loss": 0.9459, "step": 2817 }, { "epoch": 1.4823776959495003, "grad_norm": 2.035921812057495, "learning_rate": 4.3202516578340024e-06, "loss": 0.9468, "step": 2818 }, { "epoch": 1.482903734876381, "grad_norm": 1.989184856414795, "learning_rate": 4.319775409385688e-06, "loss": 0.9563, "step": 2819 }, { "epoch": 1.4834297738032616, "grad_norm": 2.0952956676483154, "learning_rate": 4.319299020429851e-06, "loss": 0.9089, "step": 2820 }, { "epoch": 1.483955812730142, "grad_norm": 2.2449991703033447, "learning_rate": 4.318822491003276e-06, "loss": 1.0142, "step": 2821 }, { "epoch": 1.4844818516570226, "grad_norm": 2.065350294113159, "learning_rate": 4.3183458211427554e-06, "loss": 0.9017, "step": 2822 }, { "epoch": 1.4850078905839033, "grad_norm": 2.016639471054077, "learning_rate": 4.317869010885094e-06, "loss": 0.951, "step": 2823 }, { "epoch": 1.4855339295107837, "grad_norm": 2.2200684547424316, "learning_rate": 4.317392060267108e-06, "loss": 0.9102, "step": 2824 }, { "epoch": 1.4860599684376643, "grad_norm": 2.0607798099517822, "learning_rate": 4.316914969325622e-06, "loss": 0.9037, "step": 2825 }, { "epoch": 1.486586007364545, "grad_norm": 2.1300361156463623, "learning_rate": 4.316437738097473e-06, "loss": 0.988, "step": 2826 }, { "epoch": 1.4871120462914256, "grad_norm": 2.0302493572235107, "learning_rate": 4.315960366619511e-06, "loss": 0.9148, "step": 2827 }, { "epoch": 1.4876380852183062, "grad_norm": 2.10941481590271, "learning_rate": 4.315482854928591e-06, "loss": 0.9566, "step": 2828 }, { "epoch": 1.4881641241451868, "grad_norm": 2.1145081520080566, "learning_rate": 4.315005203061584e-06, "loss": 0.9662, "step": 2829 }, { "epoch": 1.4886901630720673, "grad_norm": 2.1365935802459717, "learning_rate": 4.314527411055371e-06, "loss": 0.9707, "step": 2830 }, { "epoch": 1.4892162019989479, "grad_norm": 2.082724094390869, "learning_rate": 4.314049478946842e-06, "loss": 0.99, "step": 2831 }, { "epoch": 1.4897422409258285, "grad_norm": 2.1307926177978516, "learning_rate": 4.313571406772899e-06, "loss": 0.9417, "step": 2832 }, { "epoch": 1.4902682798527092, "grad_norm": 1.963191032409668, "learning_rate": 4.3130931945704554e-06, "loss": 0.9214, "step": 2833 }, { "epoch": 1.4907943187795896, "grad_norm": 2.2791664600372314, "learning_rate": 4.312614842376434e-06, "loss": 0.9229, "step": 2834 }, { "epoch": 1.4913203577064702, "grad_norm": 2.023608922958374, "learning_rate": 4.312136350227769e-06, "loss": 0.9386, "step": 2835 }, { "epoch": 1.4918463966333508, "grad_norm": 2.239856481552124, "learning_rate": 4.311657718161405e-06, "loss": 0.9091, "step": 2836 }, { "epoch": 1.4923724355602315, "grad_norm": 1.9821600914001465, "learning_rate": 4.311178946214299e-06, "loss": 0.9533, "step": 2837 }, { "epoch": 1.492898474487112, "grad_norm": 1.967998743057251, "learning_rate": 4.310700034423417e-06, "loss": 0.9011, "step": 2838 }, { "epoch": 1.4934245134139927, "grad_norm": 1.9766757488250732, "learning_rate": 4.310220982825738e-06, "loss": 0.9295, "step": 2839 }, { "epoch": 1.4939505523408732, "grad_norm": 1.9775986671447754, "learning_rate": 4.3097417914582475e-06, "loss": 0.914, "step": 2840 }, { "epoch": 1.4944765912677538, "grad_norm": 2.0174460411071777, "learning_rate": 4.309262460357946e-06, "loss": 0.9597, "step": 2841 }, { "epoch": 1.4950026301946344, "grad_norm": 2.358562707901001, "learning_rate": 4.308782989561844e-06, "loss": 0.9703, "step": 2842 }, { "epoch": 1.495528669121515, "grad_norm": 2.00032639503479, "learning_rate": 4.308303379106962e-06, "loss": 0.957, "step": 2843 }, { "epoch": 1.4960547080483955, "grad_norm": 2.1930489540100098, "learning_rate": 4.30782362903033e-06, "loss": 0.9834, "step": 2844 }, { "epoch": 1.496580746975276, "grad_norm": 2.0691113471984863, "learning_rate": 4.307343739368991e-06, "loss": 0.9799, "step": 2845 }, { "epoch": 1.4971067859021567, "grad_norm": 2.1371543407440186, "learning_rate": 4.306863710159999e-06, "loss": 0.9436, "step": 2846 }, { "epoch": 1.4976328248290374, "grad_norm": 2.1186442375183105, "learning_rate": 4.306383541440415e-06, "loss": 0.9843, "step": 2847 }, { "epoch": 1.498158863755918, "grad_norm": 2.091587543487549, "learning_rate": 4.3059032332473174e-06, "loss": 0.9357, "step": 2848 }, { "epoch": 1.4986849026827986, "grad_norm": 2.022662401199341, "learning_rate": 4.305422785617789e-06, "loss": 0.9234, "step": 2849 }, { "epoch": 1.499210941609679, "grad_norm": 2.015878915786743, "learning_rate": 4.304942198588926e-06, "loss": 0.9361, "step": 2850 }, { "epoch": 1.4997369805365597, "grad_norm": 1.9839386940002441, "learning_rate": 4.304461472197836e-06, "loss": 0.9103, "step": 2851 }, { "epoch": 1.5002630194634403, "grad_norm": 1.995928168296814, "learning_rate": 4.303980606481636e-06, "loss": 0.9094, "step": 2852 }, { "epoch": 1.5007890583903207, "grad_norm": 2.102646827697754, "learning_rate": 4.303499601477455e-06, "loss": 0.9397, "step": 2853 }, { "epoch": 1.5013150973172014, "grad_norm": 2.030672550201416, "learning_rate": 4.30301845722243e-06, "loss": 0.9101, "step": 2854 }, { "epoch": 1.501841136244082, "grad_norm": 2.064537286758423, "learning_rate": 4.302537173753714e-06, "loss": 0.9662, "step": 2855 }, { "epoch": 1.5023671751709626, "grad_norm": 2.000596284866333, "learning_rate": 4.302055751108465e-06, "loss": 0.9134, "step": 2856 }, { "epoch": 1.5028932140978433, "grad_norm": 2.0901315212249756, "learning_rate": 4.301574189323857e-06, "loss": 0.9528, "step": 2857 }, { "epoch": 1.503419253024724, "grad_norm": 1.9879727363586426, "learning_rate": 4.30109248843707e-06, "loss": 0.8697, "step": 2858 }, { "epoch": 1.5039452919516045, "grad_norm": 1.9458873271942139, "learning_rate": 4.300610648485296e-06, "loss": 0.9542, "step": 2859 }, { "epoch": 1.5044713308784852, "grad_norm": 1.984338402748108, "learning_rate": 4.300128669505741e-06, "loss": 0.9782, "step": 2860 }, { "epoch": 1.5049973698053656, "grad_norm": 1.9819425344467163, "learning_rate": 4.299646551535618e-06, "loss": 0.9478, "step": 2861 }, { "epoch": 1.5055234087322462, "grad_norm": 1.972513198852539, "learning_rate": 4.299164294612153e-06, "loss": 0.9438, "step": 2862 }, { "epoch": 1.5060494476591266, "grad_norm": 1.9727842807769775, "learning_rate": 4.29868189877258e-06, "loss": 0.9008, "step": 2863 }, { "epoch": 1.5065754865860073, "grad_norm": 1.9885272979736328, "learning_rate": 4.298199364054147e-06, "loss": 0.9472, "step": 2864 }, { "epoch": 1.507101525512888, "grad_norm": 3.059291362762451, "learning_rate": 4.2977166904941105e-06, "loss": 0.9379, "step": 2865 }, { "epoch": 1.5076275644397685, "grad_norm": 2.0359585285186768, "learning_rate": 4.297233878129739e-06, "loss": 0.9033, "step": 2866 }, { "epoch": 1.5081536033666492, "grad_norm": 2.0338592529296875, "learning_rate": 4.296750926998311e-06, "loss": 0.9086, "step": 2867 }, { "epoch": 1.5086796422935298, "grad_norm": 2.0874390602111816, "learning_rate": 4.296267837137115e-06, "loss": 0.9548, "step": 2868 }, { "epoch": 1.5092056812204104, "grad_norm": 1.9003558158874512, "learning_rate": 4.295784608583451e-06, "loss": 0.9116, "step": 2869 }, { "epoch": 1.5097317201472908, "grad_norm": 2.1565816402435303, "learning_rate": 4.295301241374632e-06, "loss": 0.9157, "step": 2870 }, { "epoch": 1.5102577590741715, "grad_norm": 1.9842712879180908, "learning_rate": 4.294817735547977e-06, "loss": 0.9291, "step": 2871 }, { "epoch": 1.5107837980010521, "grad_norm": 1.9991481304168701, "learning_rate": 4.29433409114082e-06, "loss": 0.9047, "step": 2872 }, { "epoch": 1.5113098369279325, "grad_norm": 1.9973630905151367, "learning_rate": 4.293850308190502e-06, "loss": 0.9625, "step": 2873 }, { "epoch": 1.5118358758548132, "grad_norm": 1.8854007720947266, "learning_rate": 4.2933663867343785e-06, "loss": 0.9208, "step": 2874 }, { "epoch": 1.5123619147816938, "grad_norm": 2.1708757877349854, "learning_rate": 4.292882326809814e-06, "loss": 1.0043, "step": 2875 }, { "epoch": 1.5128879537085744, "grad_norm": 2.030937671661377, "learning_rate": 4.2923981284541805e-06, "loss": 0.9343, "step": 2876 }, { "epoch": 1.513413992635455, "grad_norm": 1.9324885606765747, "learning_rate": 4.291913791704867e-06, "loss": 0.9047, "step": 2877 }, { "epoch": 1.5139400315623357, "grad_norm": 2.183217763900757, "learning_rate": 4.2914293165992675e-06, "loss": 0.9516, "step": 2878 }, { "epoch": 1.5144660704892163, "grad_norm": 2.1086974143981934, "learning_rate": 4.290944703174791e-06, "loss": 0.9303, "step": 2879 }, { "epoch": 1.5149921094160967, "grad_norm": 2.1121649742126465, "learning_rate": 4.290459951468853e-06, "loss": 0.9018, "step": 2880 }, { "epoch": 1.5155181483429774, "grad_norm": 1.879559874534607, "learning_rate": 4.289975061518884e-06, "loss": 0.9525, "step": 2881 }, { "epoch": 1.516044187269858, "grad_norm": 2.0514028072357178, "learning_rate": 4.289490033362322e-06, "loss": 0.9741, "step": 2882 }, { "epoch": 1.5165702261967384, "grad_norm": 2.1021642684936523, "learning_rate": 4.2890048670366154e-06, "loss": 0.9898, "step": 2883 }, { "epoch": 1.517096265123619, "grad_norm": 1.877241849899292, "learning_rate": 4.2885195625792275e-06, "loss": 0.8746, "step": 2884 }, { "epoch": 1.5176223040504997, "grad_norm": 1.9616249799728394, "learning_rate": 4.288034120027628e-06, "loss": 0.8823, "step": 2885 }, { "epoch": 1.5181483429773803, "grad_norm": 2.1360700130462646, "learning_rate": 4.287548539419298e-06, "loss": 0.9771, "step": 2886 }, { "epoch": 1.518674381904261, "grad_norm": 1.9651004076004028, "learning_rate": 4.287062820791731e-06, "loss": 0.9203, "step": 2887 }, { "epoch": 1.5192004208311416, "grad_norm": 1.9593781232833862, "learning_rate": 4.286576964182429e-06, "loss": 0.9295, "step": 2888 }, { "epoch": 1.5197264597580222, "grad_norm": 2.0677568912506104, "learning_rate": 4.286090969628907e-06, "loss": 0.937, "step": 2889 }, { "epoch": 1.5202524986849026, "grad_norm": 2.019524335861206, "learning_rate": 4.285604837168689e-06, "loss": 0.9655, "step": 2890 }, { "epoch": 1.5207785376117833, "grad_norm": 2.0417678356170654, "learning_rate": 4.285118566839308e-06, "loss": 0.9564, "step": 2891 }, { "epoch": 1.521304576538664, "grad_norm": 2.0766942501068115, "learning_rate": 4.284632158678312e-06, "loss": 0.9795, "step": 2892 }, { "epoch": 1.5218306154655443, "grad_norm": 2.1768674850463867, "learning_rate": 4.284145612723256e-06, "loss": 0.9203, "step": 2893 }, { "epoch": 1.522356654392425, "grad_norm": 2.027491569519043, "learning_rate": 4.283658929011708e-06, "loss": 0.9526, "step": 2894 }, { "epoch": 1.5228826933193056, "grad_norm": 2.1974449157714844, "learning_rate": 4.283172107581245e-06, "loss": 0.9475, "step": 2895 }, { "epoch": 1.5234087322461862, "grad_norm": 2.0966193675994873, "learning_rate": 4.282685148469454e-06, "loss": 0.9261, "step": 2896 }, { "epoch": 1.5239347711730669, "grad_norm": 2.052272319793701, "learning_rate": 4.282198051713936e-06, "loss": 0.9781, "step": 2897 }, { "epoch": 1.5244608100999475, "grad_norm": 2.1015512943267822, "learning_rate": 4.281710817352299e-06, "loss": 0.8729, "step": 2898 }, { "epoch": 1.5249868490268281, "grad_norm": 2.110563039779663, "learning_rate": 4.281223445422165e-06, "loss": 0.9974, "step": 2899 }, { "epoch": 1.5255128879537085, "grad_norm": 2.1721880435943604, "learning_rate": 4.280735935961161e-06, "loss": 0.9293, "step": 2900 }, { "epoch": 1.5260389268805892, "grad_norm": 2.011016845703125, "learning_rate": 4.2802482890069315e-06, "loss": 0.9336, "step": 2901 }, { "epoch": 1.5265649658074696, "grad_norm": 2.1780524253845215, "learning_rate": 4.279760504597128e-06, "loss": 0.9383, "step": 2902 }, { "epoch": 1.5270910047343502, "grad_norm": 2.0925979614257812, "learning_rate": 4.279272582769412e-06, "loss": 0.9651, "step": 2903 }, { "epoch": 1.5276170436612309, "grad_norm": 2.1651055812835693, "learning_rate": 4.278784523561458e-06, "loss": 0.99, "step": 2904 }, { "epoch": 1.5281430825881115, "grad_norm": 1.9780375957489014, "learning_rate": 4.278296327010948e-06, "loss": 0.9006, "step": 2905 }, { "epoch": 1.5286691215149921, "grad_norm": 2.053251028060913, "learning_rate": 4.2778079931555775e-06, "loss": 0.9746, "step": 2906 }, { "epoch": 1.5291951604418728, "grad_norm": 2.0084316730499268, "learning_rate": 4.277319522033051e-06, "loss": 0.9276, "step": 2907 }, { "epoch": 1.5297211993687534, "grad_norm": 2.1033427715301514, "learning_rate": 4.276830913681085e-06, "loss": 0.9612, "step": 2908 }, { "epoch": 1.530247238295634, "grad_norm": 3.971057176589966, "learning_rate": 4.2763421681374045e-06, "loss": 0.9064, "step": 2909 }, { "epoch": 1.5307732772225144, "grad_norm": 2.1207163333892822, "learning_rate": 4.275853285439747e-06, "loss": 0.9463, "step": 2910 }, { "epoch": 1.531299316149395, "grad_norm": 2.7853474617004395, "learning_rate": 4.27536426562586e-06, "loss": 0.9335, "step": 2911 }, { "epoch": 1.5318253550762755, "grad_norm": 2.0105173587799072, "learning_rate": 4.2748751087335e-06, "loss": 0.9734, "step": 2912 }, { "epoch": 1.5323513940031561, "grad_norm": 2.0165185928344727, "learning_rate": 4.274385814800438e-06, "loss": 0.9229, "step": 2913 }, { "epoch": 1.5328774329300368, "grad_norm": 1.9177309274673462, "learning_rate": 4.273896383864451e-06, "loss": 0.9276, "step": 2914 }, { "epoch": 1.5334034718569174, "grad_norm": 2.082285165786743, "learning_rate": 4.273406815963329e-06, "loss": 0.9122, "step": 2915 }, { "epoch": 1.533929510783798, "grad_norm": 1.9629693031311035, "learning_rate": 4.272917111134873e-06, "loss": 0.9476, "step": 2916 }, { "epoch": 1.5344555497106787, "grad_norm": 2.0855002403259277, "learning_rate": 4.272427269416893e-06, "loss": 0.8956, "step": 2917 }, { "epoch": 1.5349815886375593, "grad_norm": 2.0533595085144043, "learning_rate": 4.271937290847212e-06, "loss": 0.9518, "step": 2918 }, { "epoch": 1.53550762756444, "grad_norm": 2.0000126361846924, "learning_rate": 4.27144717546366e-06, "loss": 0.8994, "step": 2919 }, { "epoch": 1.5360336664913203, "grad_norm": 2.0330493450164795, "learning_rate": 4.2709569233040806e-06, "loss": 0.9349, "step": 2920 }, { "epoch": 1.536559705418201, "grad_norm": 2.2399914264678955, "learning_rate": 4.270466534406326e-06, "loss": 0.9755, "step": 2921 }, { "epoch": 1.5370857443450814, "grad_norm": 2.0268807411193848, "learning_rate": 4.269976008808261e-06, "loss": 0.9097, "step": 2922 }, { "epoch": 1.537611783271962, "grad_norm": 1.9109435081481934, "learning_rate": 4.26948534654776e-06, "loss": 0.8683, "step": 2923 }, { "epoch": 1.5381378221988427, "grad_norm": 2.0855066776275635, "learning_rate": 4.268994547662705e-06, "loss": 0.958, "step": 2924 }, { "epoch": 1.5386638611257233, "grad_norm": 2.0536108016967773, "learning_rate": 4.268503612190995e-06, "loss": 0.9176, "step": 2925 }, { "epoch": 1.539189900052604, "grad_norm": 2.137848138809204, "learning_rate": 4.268012540170533e-06, "loss": 0.9535, "step": 2926 }, { "epoch": 1.5397159389794846, "grad_norm": 2.0253493785858154, "learning_rate": 4.267521331639237e-06, "loss": 0.8924, "step": 2927 }, { "epoch": 1.5402419779063652, "grad_norm": 1.972090244293213, "learning_rate": 4.267029986635034e-06, "loss": 0.9428, "step": 2928 }, { "epoch": 1.5407680168332458, "grad_norm": 2.040821075439453, "learning_rate": 4.266538505195861e-06, "loss": 0.9717, "step": 2929 }, { "epoch": 1.5412940557601262, "grad_norm": 1.9857984781265259, "learning_rate": 4.266046887359665e-06, "loss": 0.9474, "step": 2930 }, { "epoch": 1.5418200946870069, "grad_norm": 2.050723075866699, "learning_rate": 4.265555133164406e-06, "loss": 0.9506, "step": 2931 }, { "epoch": 1.5423461336138873, "grad_norm": 1.996460199356079, "learning_rate": 4.265063242648052e-06, "loss": 0.9111, "step": 2932 }, { "epoch": 1.542872172540768, "grad_norm": 2.0638771057128906, "learning_rate": 4.264571215848584e-06, "loss": 0.939, "step": 2933 }, { "epoch": 1.5433982114676486, "grad_norm": 1.990309476852417, "learning_rate": 4.264079052803991e-06, "loss": 0.8726, "step": 2934 }, { "epoch": 1.5439242503945292, "grad_norm": 1.9167861938476562, "learning_rate": 4.263586753552274e-06, "loss": 0.9005, "step": 2935 }, { "epoch": 1.5444502893214098, "grad_norm": 2.025639533996582, "learning_rate": 4.263094318131443e-06, "loss": 0.9234, "step": 2936 }, { "epoch": 1.5449763282482905, "grad_norm": 2.042449474334717, "learning_rate": 4.262601746579521e-06, "loss": 0.9545, "step": 2937 }, { "epoch": 1.545502367175171, "grad_norm": 1.97848641872406, "learning_rate": 4.26210903893454e-06, "loss": 0.8961, "step": 2938 }, { "epoch": 1.5460284061020515, "grad_norm": 1.9675039052963257, "learning_rate": 4.261616195234544e-06, "loss": 1.0115, "step": 2939 }, { "epoch": 1.5465544450289321, "grad_norm": 2.0312752723693848, "learning_rate": 4.261123215517583e-06, "loss": 0.9335, "step": 2940 }, { "epoch": 1.5470804839558128, "grad_norm": 2.0146288871765137, "learning_rate": 4.260630099821722e-06, "loss": 0.9767, "step": 2941 }, { "epoch": 1.5476065228826932, "grad_norm": 1.9374853372573853, "learning_rate": 4.260136848185036e-06, "loss": 0.9363, "step": 2942 }, { "epoch": 1.5481325618095738, "grad_norm": 2.024721384048462, "learning_rate": 4.2596434606456106e-06, "loss": 0.9664, "step": 2943 }, { "epoch": 1.5486586007364544, "grad_norm": 1.9361040592193604, "learning_rate": 4.259149937241538e-06, "loss": 0.9168, "step": 2944 }, { "epoch": 1.549184639663335, "grad_norm": 1.9720629453659058, "learning_rate": 4.258656278010926e-06, "loss": 0.9508, "step": 2945 }, { "epoch": 1.5497106785902157, "grad_norm": 2.2427873611450195, "learning_rate": 4.25816248299189e-06, "loss": 0.929, "step": 2946 }, { "epoch": 1.5502367175170964, "grad_norm": 2.058295965194702, "learning_rate": 4.257668552222558e-06, "loss": 0.9301, "step": 2947 }, { "epoch": 1.550762756443977, "grad_norm": 2.1682353019714355, "learning_rate": 4.257174485741064e-06, "loss": 0.9307, "step": 2948 }, { "epoch": 1.5512887953708574, "grad_norm": 1.8824107646942139, "learning_rate": 4.256680283585559e-06, "loss": 0.9651, "step": 2949 }, { "epoch": 1.551814834297738, "grad_norm": 1.9697431325912476, "learning_rate": 4.2561859457942e-06, "loss": 0.956, "step": 2950 }, { "epoch": 1.5523408732246187, "grad_norm": 2.080841302871704, "learning_rate": 4.255691472405155e-06, "loss": 0.9525, "step": 2951 }, { "epoch": 1.552866912151499, "grad_norm": 2.130826950073242, "learning_rate": 4.255196863456602e-06, "loss": 0.9365, "step": 2952 }, { "epoch": 1.5533929510783797, "grad_norm": 2.101513385772705, "learning_rate": 4.254702118986732e-06, "loss": 0.8871, "step": 2953 }, { "epoch": 1.5539189900052603, "grad_norm": 2.055342197418213, "learning_rate": 4.254207239033746e-06, "loss": 0.9333, "step": 2954 }, { "epoch": 1.554445028932141, "grad_norm": 1.9468308687210083, "learning_rate": 4.253712223635852e-06, "loss": 0.9198, "step": 2955 }, { "epoch": 1.5549710678590216, "grad_norm": 2.011702299118042, "learning_rate": 4.253217072831272e-06, "loss": 0.936, "step": 2956 }, { "epoch": 1.5554971067859023, "grad_norm": 1.9965795278549194, "learning_rate": 4.252721786658237e-06, "loss": 0.8836, "step": 2957 }, { "epoch": 1.5560231457127829, "grad_norm": 2.023535966873169, "learning_rate": 4.252226365154989e-06, "loss": 0.9256, "step": 2958 }, { "epoch": 1.5565491846396633, "grad_norm": 1.9498130083084106, "learning_rate": 4.251730808359781e-06, "loss": 0.9481, "step": 2959 }, { "epoch": 1.557075223566544, "grad_norm": 1.9864977598190308, "learning_rate": 4.251235116310874e-06, "loss": 0.8909, "step": 2960 }, { "epoch": 1.5576012624934246, "grad_norm": 2.0688984394073486, "learning_rate": 4.250739289046542e-06, "loss": 0.9526, "step": 2961 }, { "epoch": 1.558127301420305, "grad_norm": 1.9319058656692505, "learning_rate": 4.250243326605069e-06, "loss": 0.9905, "step": 2962 }, { "epoch": 1.5586533403471856, "grad_norm": 2.342468738555908, "learning_rate": 4.249747229024748e-06, "loss": 0.9612, "step": 2963 }, { "epoch": 1.5591793792740662, "grad_norm": 2.1509134769439697, "learning_rate": 4.249250996343884e-06, "loss": 0.9446, "step": 2964 }, { "epoch": 1.5597054182009469, "grad_norm": 2.132373571395874, "learning_rate": 4.248754628600793e-06, "loss": 0.9379, "step": 2965 }, { "epoch": 1.5602314571278275, "grad_norm": 2.0995333194732666, "learning_rate": 4.248258125833797e-06, "loss": 0.9239, "step": 2966 }, { "epoch": 1.5607574960547081, "grad_norm": 2.122519016265869, "learning_rate": 4.247761488081236e-06, "loss": 0.917, "step": 2967 }, { "epoch": 1.5612835349815888, "grad_norm": 1.9754114151000977, "learning_rate": 4.247264715381453e-06, "loss": 0.9197, "step": 2968 }, { "epoch": 1.5618095739084692, "grad_norm": 2.0227582454681396, "learning_rate": 4.2467678077728044e-06, "loss": 0.9128, "step": 2969 }, { "epoch": 1.5623356128353498, "grad_norm": 1.9663293361663818, "learning_rate": 4.246270765293659e-06, "loss": 0.9485, "step": 2970 }, { "epoch": 1.5628616517622302, "grad_norm": 1.953763484954834, "learning_rate": 4.245773587982394e-06, "loss": 0.9392, "step": 2971 }, { "epoch": 1.5633876906891109, "grad_norm": 2.063044309616089, "learning_rate": 4.245276275877396e-06, "loss": 0.9448, "step": 2972 }, { "epoch": 1.5639137296159915, "grad_norm": 2.148085117340088, "learning_rate": 4.244778829017063e-06, "loss": 0.9352, "step": 2973 }, { "epoch": 1.5644397685428721, "grad_norm": 2.1457817554473877, "learning_rate": 4.244281247439805e-06, "loss": 0.9651, "step": 2974 }, { "epoch": 1.5649658074697528, "grad_norm": 2.127017021179199, "learning_rate": 4.243783531184041e-06, "loss": 0.9157, "step": 2975 }, { "epoch": 1.5654918463966334, "grad_norm": 1.941909670829773, "learning_rate": 4.2432856802882e-06, "loss": 0.93, "step": 2976 }, { "epoch": 1.566017885323514, "grad_norm": 2.045715093612671, "learning_rate": 4.24278769479072e-06, "loss": 0.8973, "step": 2977 }, { "epoch": 1.5665439242503947, "grad_norm": 2.024904727935791, "learning_rate": 4.242289574730053e-06, "loss": 0.9165, "step": 2978 }, { "epoch": 1.567069963177275, "grad_norm": 1.9800280332565308, "learning_rate": 4.241791320144661e-06, "loss": 0.9598, "step": 2979 }, { "epoch": 1.5675960021041557, "grad_norm": 2.0844175815582275, "learning_rate": 4.241292931073012e-06, "loss": 0.9064, "step": 2980 }, { "epoch": 1.5681220410310361, "grad_norm": 1.9466551542282104, "learning_rate": 4.240794407553589e-06, "loss": 0.8825, "step": 2981 }, { "epoch": 1.5686480799579168, "grad_norm": 1.9948161840438843, "learning_rate": 4.240295749624883e-06, "loss": 0.889, "step": 2982 }, { "epoch": 1.5691741188847974, "grad_norm": 2.0572495460510254, "learning_rate": 4.2397969573253965e-06, "loss": 0.9171, "step": 2983 }, { "epoch": 1.569700157811678, "grad_norm": 1.970253348350525, "learning_rate": 4.239298030693643e-06, "loss": 0.9645, "step": 2984 }, { "epoch": 1.5702261967385587, "grad_norm": 2.019176959991455, "learning_rate": 4.238798969768143e-06, "loss": 0.9488, "step": 2985 }, { "epoch": 1.5707522356654393, "grad_norm": 2.239705801010132, "learning_rate": 4.238299774587432e-06, "loss": 0.8997, "step": 2986 }, { "epoch": 1.57127827459232, "grad_norm": 2.076761245727539, "learning_rate": 4.2378004451900515e-06, "loss": 0.9058, "step": 2987 }, { "epoch": 1.5718043135192006, "grad_norm": 2.033193588256836, "learning_rate": 4.237300981614557e-06, "loss": 0.8999, "step": 2988 }, { "epoch": 1.572330352446081, "grad_norm": 2.116682291030884, "learning_rate": 4.236801383899514e-06, "loss": 0.9616, "step": 2989 }, { "epoch": 1.5728563913729616, "grad_norm": 1.901473879814148, "learning_rate": 4.236301652083493e-06, "loss": 0.876, "step": 2990 }, { "epoch": 1.573382430299842, "grad_norm": 2.0186798572540283, "learning_rate": 4.235801786205083e-06, "loss": 0.9549, "step": 2991 }, { "epoch": 1.5739084692267227, "grad_norm": 1.9795666933059692, "learning_rate": 4.235301786302878e-06, "loss": 0.9298, "step": 2992 }, { "epoch": 1.5744345081536033, "grad_norm": 1.9244426488876343, "learning_rate": 4.234801652415484e-06, "loss": 0.9036, "step": 2993 }, { "epoch": 1.574960547080484, "grad_norm": 2.013563394546509, "learning_rate": 4.234301384581516e-06, "loss": 0.9765, "step": 2994 }, { "epoch": 1.5754865860073646, "grad_norm": 2.078366994857788, "learning_rate": 4.2338009828396015e-06, "loss": 0.9556, "step": 2995 }, { "epoch": 1.5760126249342452, "grad_norm": 2.0059587955474854, "learning_rate": 4.233300447228376e-06, "loss": 0.9333, "step": 2996 }, { "epoch": 1.5765386638611258, "grad_norm": 1.874704122543335, "learning_rate": 4.2327997777864895e-06, "loss": 0.8841, "step": 2997 }, { "epoch": 1.5770647027880065, "grad_norm": 2.0818729400634766, "learning_rate": 4.232298974552596e-06, "loss": 0.9115, "step": 2998 }, { "epoch": 1.577590741714887, "grad_norm": 2.0479767322540283, "learning_rate": 4.231798037565365e-06, "loss": 0.9048, "step": 2999 }, { "epoch": 1.5781167806417675, "grad_norm": 1.973334789276123, "learning_rate": 4.2312969668634745e-06, "loss": 0.9308, "step": 3000 }, { "epoch": 1.578642819568648, "grad_norm": 1.9209327697753906, "learning_rate": 4.230795762485612e-06, "loss": 0.9196, "step": 3001 }, { "epoch": 1.5791688584955286, "grad_norm": 2.012134552001953, "learning_rate": 4.230294424470478e-06, "loss": 0.9649, "step": 3002 }, { "epoch": 1.5796948974224092, "grad_norm": 2.057433605194092, "learning_rate": 4.229792952856779e-06, "loss": 0.9747, "step": 3003 }, { "epoch": 1.5802209363492898, "grad_norm": 1.8826886415481567, "learning_rate": 4.2292913476832375e-06, "loss": 0.962, "step": 3004 }, { "epoch": 1.5807469752761705, "grad_norm": 2.063971519470215, "learning_rate": 4.22878960898858e-06, "loss": 0.9498, "step": 3005 }, { "epoch": 1.581273014203051, "grad_norm": 2.021562099456787, "learning_rate": 4.228287736811548e-06, "loss": 0.9084, "step": 3006 }, { "epoch": 1.5817990531299317, "grad_norm": 2.029219388961792, "learning_rate": 4.227785731190893e-06, "loss": 0.9443, "step": 3007 }, { "epoch": 1.5823250920568122, "grad_norm": 1.8719290494918823, "learning_rate": 4.227283592165373e-06, "loss": 0.9135, "step": 3008 }, { "epoch": 1.5828511309836928, "grad_norm": 1.9184664487838745, "learning_rate": 4.226781319773761e-06, "loss": 0.9267, "step": 3009 }, { "epoch": 1.5833771699105734, "grad_norm": 2.0591418743133545, "learning_rate": 4.226278914054837e-06, "loss": 0.9522, "step": 3010 }, { "epoch": 1.5839032088374538, "grad_norm": 1.992311954498291, "learning_rate": 4.225776375047394e-06, "loss": 0.913, "step": 3011 }, { "epoch": 1.5844292477643345, "grad_norm": 1.8933684825897217, "learning_rate": 4.225273702790231e-06, "loss": 0.9486, "step": 3012 }, { "epoch": 1.584955286691215, "grad_norm": 1.999765157699585, "learning_rate": 4.224770897322162e-06, "loss": 0.9097, "step": 3013 }, { "epoch": 1.5854813256180957, "grad_norm": 2.038668632507324, "learning_rate": 4.224267958682009e-06, "loss": 0.9389, "step": 3014 }, { "epoch": 1.5860073645449764, "grad_norm": 1.9329705238342285, "learning_rate": 4.2237648869086055e-06, "loss": 0.9617, "step": 3015 }, { "epoch": 1.586533403471857, "grad_norm": 2.012115955352783, "learning_rate": 4.223261682040793e-06, "loss": 0.9012, "step": 3016 }, { "epoch": 1.5870594423987376, "grad_norm": 2.1576831340789795, "learning_rate": 4.222758344117424e-06, "loss": 0.9212, "step": 3017 }, { "epoch": 1.587585481325618, "grad_norm": 1.9946175813674927, "learning_rate": 4.222254873177366e-06, "loss": 0.9553, "step": 3018 }, { "epoch": 1.5881115202524987, "grad_norm": 2.050882339477539, "learning_rate": 4.221751269259488e-06, "loss": 0.9352, "step": 3019 }, { "epoch": 1.5886375591793793, "grad_norm": 1.9702178239822388, "learning_rate": 4.2212475324026765e-06, "loss": 0.8958, "step": 3020 }, { "epoch": 1.5891635981062597, "grad_norm": 2.0302233695983887, "learning_rate": 4.220743662645825e-06, "loss": 0.96, "step": 3021 }, { "epoch": 1.5896896370331404, "grad_norm": 2.007420063018799, "learning_rate": 4.220239660027839e-06, "loss": 0.9651, "step": 3022 }, { "epoch": 1.590215675960021, "grad_norm": 1.9850428104400635, "learning_rate": 4.219735524587632e-06, "loss": 0.9015, "step": 3023 }, { "epoch": 1.5907417148869016, "grad_norm": 2.006885051727295, "learning_rate": 4.2192312563641285e-06, "loss": 0.9478, "step": 3024 }, { "epoch": 1.5912677538137823, "grad_norm": 1.8564372062683105, "learning_rate": 4.218726855396265e-06, "loss": 0.8902, "step": 3025 }, { "epoch": 1.591793792740663, "grad_norm": 2.064643383026123, "learning_rate": 4.218222321722988e-06, "loss": 0.9758, "step": 3026 }, { "epoch": 1.5923198316675435, "grad_norm": 1.9374020099639893, "learning_rate": 4.217717655383252e-06, "loss": 0.8865, "step": 3027 }, { "epoch": 1.592845870594424, "grad_norm": 2.206101655960083, "learning_rate": 4.217212856416023e-06, "loss": 0.9711, "step": 3028 }, { "epoch": 1.5933719095213046, "grad_norm": 1.9797239303588867, "learning_rate": 4.216707924860277e-06, "loss": 0.9303, "step": 3029 }, { "epoch": 1.5938979484481852, "grad_norm": 2.039867401123047, "learning_rate": 4.216202860755001e-06, "loss": 0.9324, "step": 3030 }, { "epoch": 1.5944239873750656, "grad_norm": 2.0361459255218506, "learning_rate": 4.215697664139192e-06, "loss": 0.9229, "step": 3031 }, { "epoch": 1.5949500263019463, "grad_norm": 2.052764654159546, "learning_rate": 4.215192335051857e-06, "loss": 0.9114, "step": 3032 }, { "epoch": 1.595476065228827, "grad_norm": 2.084118127822876, "learning_rate": 4.214686873532013e-06, "loss": 0.9188, "step": 3033 }, { "epoch": 1.5960021041557075, "grad_norm": 2.352452278137207, "learning_rate": 4.214181279618686e-06, "loss": 0.944, "step": 3034 }, { "epoch": 1.5965281430825882, "grad_norm": 2.076841354370117, "learning_rate": 4.213675553350915e-06, "loss": 0.9896, "step": 3035 }, { "epoch": 1.5970541820094688, "grad_norm": 1.9557950496673584, "learning_rate": 4.213169694767749e-06, "loss": 0.935, "step": 3036 }, { "epoch": 1.5975802209363494, "grad_norm": 2.0926027297973633, "learning_rate": 4.212663703908244e-06, "loss": 0.9745, "step": 3037 }, { "epoch": 1.5981062598632298, "grad_norm": 2.065674066543579, "learning_rate": 4.212157580811469e-06, "loss": 0.9136, "step": 3038 }, { "epoch": 1.5986322987901105, "grad_norm": 1.9909225702285767, "learning_rate": 4.211651325516504e-06, "loss": 0.9501, "step": 3039 }, { "epoch": 1.599158337716991, "grad_norm": 2.153087854385376, "learning_rate": 4.211144938062434e-06, "loss": 0.9155, "step": 3040 }, { "epoch": 1.5996843766438715, "grad_norm": 2.061505079269409, "learning_rate": 4.210638418488363e-06, "loss": 0.9457, "step": 3041 }, { "epoch": 1.6002104155707522, "grad_norm": 1.9815692901611328, "learning_rate": 4.210131766833396e-06, "loss": 0.9528, "step": 3042 }, { "epoch": 1.6007364544976328, "grad_norm": 2.0386016368865967, "learning_rate": 4.2096249831366535e-06, "loss": 0.9551, "step": 3043 }, { "epoch": 1.6012624934245134, "grad_norm": 2.04970645904541, "learning_rate": 4.209118067437266e-06, "loss": 0.9282, "step": 3044 }, { "epoch": 1.601788532351394, "grad_norm": 2.013005018234253, "learning_rate": 4.208611019774372e-06, "loss": 0.9468, "step": 3045 }, { "epoch": 1.6023145712782747, "grad_norm": 1.9524039030075073, "learning_rate": 4.208103840187121e-06, "loss": 0.9581, "step": 3046 }, { "epoch": 1.6028406102051553, "grad_norm": 2.1470460891723633, "learning_rate": 4.207596528714675e-06, "loss": 0.9304, "step": 3047 }, { "epoch": 1.6033666491320357, "grad_norm": 2.1197633743286133, "learning_rate": 4.207089085396203e-06, "loss": 0.9498, "step": 3048 }, { "epoch": 1.6038926880589164, "grad_norm": 1.953028678894043, "learning_rate": 4.206581510270885e-06, "loss": 0.9385, "step": 3049 }, { "epoch": 1.6044187269857968, "grad_norm": 2.1204376220703125, "learning_rate": 4.206073803377913e-06, "loss": 0.9772, "step": 3050 }, { "epoch": 1.6049447659126774, "grad_norm": 2.153193712234497, "learning_rate": 4.2055659647564875e-06, "loss": 0.9488, "step": 3051 }, { "epoch": 1.605470804839558, "grad_norm": 2.1081886291503906, "learning_rate": 4.205057994445818e-06, "loss": 0.9291, "step": 3052 }, { "epoch": 1.6059968437664387, "grad_norm": 2.0400824546813965, "learning_rate": 4.2045498924851266e-06, "loss": 0.9512, "step": 3053 }, { "epoch": 1.6065228826933193, "grad_norm": 2.0144882202148438, "learning_rate": 4.2040416589136445e-06, "loss": 0.9566, "step": 3054 }, { "epoch": 1.6070489216202, "grad_norm": 2.105506181716919, "learning_rate": 4.2035332937706145e-06, "loss": 0.9245, "step": 3055 }, { "epoch": 1.6075749605470806, "grad_norm": 1.857825517654419, "learning_rate": 4.203024797095286e-06, "loss": 0.916, "step": 3056 }, { "epoch": 1.6081009994739612, "grad_norm": 2.0820188522338867, "learning_rate": 4.202516168926921e-06, "loss": 0.9359, "step": 3057 }, { "epoch": 1.6086270384008416, "grad_norm": 2.287496328353882, "learning_rate": 4.202007409304793e-06, "loss": 0.9798, "step": 3058 }, { "epoch": 1.6091530773277223, "grad_norm": 2.085906982421875, "learning_rate": 4.201498518268184e-06, "loss": 0.9623, "step": 3059 }, { "epoch": 1.6096791162546027, "grad_norm": 1.9612935781478882, "learning_rate": 4.200989495856383e-06, "loss": 0.9044, "step": 3060 }, { "epoch": 1.6102051551814833, "grad_norm": 1.9369161128997803, "learning_rate": 4.200480342108698e-06, "loss": 0.9829, "step": 3061 }, { "epoch": 1.610731194108364, "grad_norm": 2.031216621398926, "learning_rate": 4.1999710570644354e-06, "loss": 0.9347, "step": 3062 }, { "epoch": 1.6112572330352446, "grad_norm": 1.9881701469421387, "learning_rate": 4.1994616407629225e-06, "loss": 0.9293, "step": 3063 }, { "epoch": 1.6117832719621252, "grad_norm": 2.299139976501465, "learning_rate": 4.198952093243491e-06, "loss": 1.0016, "step": 3064 }, { "epoch": 1.6123093108890059, "grad_norm": 1.9135477542877197, "learning_rate": 4.198442414545482e-06, "loss": 0.8458, "step": 3065 }, { "epoch": 1.6128353498158865, "grad_norm": 1.994346022605896, "learning_rate": 4.19793260470825e-06, "loss": 0.9244, "step": 3066 }, { "epoch": 1.613361388742767, "grad_norm": 1.923024296760559, "learning_rate": 4.197422663771158e-06, "loss": 0.9403, "step": 3067 }, { "epoch": 1.6138874276696475, "grad_norm": 2.1085264682769775, "learning_rate": 4.196912591773578e-06, "loss": 0.9221, "step": 3068 }, { "epoch": 1.6144134665965282, "grad_norm": 2.141895055770874, "learning_rate": 4.196402388754897e-06, "loss": 0.9547, "step": 3069 }, { "epoch": 1.6149395055234086, "grad_norm": 1.9833990335464478, "learning_rate": 4.195892054754504e-06, "loss": 0.9117, "step": 3070 }, { "epoch": 1.6154655444502892, "grad_norm": 2.216627359390259, "learning_rate": 4.195381589811805e-06, "loss": 0.9246, "step": 3071 }, { "epoch": 1.6159915833771699, "grad_norm": 2.049163579940796, "learning_rate": 4.194870993966214e-06, "loss": 0.9732, "step": 3072 }, { "epoch": 1.6165176223040505, "grad_norm": 2.0937061309814453, "learning_rate": 4.194360267257155e-06, "loss": 0.9848, "step": 3073 }, { "epoch": 1.6170436612309311, "grad_norm": 2.0454630851745605, "learning_rate": 4.19384940972406e-06, "loss": 0.9469, "step": 3074 }, { "epoch": 1.6175697001578118, "grad_norm": 2.1442503929138184, "learning_rate": 4.193338421406375e-06, "loss": 0.9344, "step": 3075 }, { "epoch": 1.6180957390846924, "grad_norm": 2.012234926223755, "learning_rate": 4.192827302343553e-06, "loss": 0.964, "step": 3076 }, { "epoch": 1.6186217780115728, "grad_norm": 2.215226411819458, "learning_rate": 4.192316052575059e-06, "loss": 0.9623, "step": 3077 }, { "epoch": 1.6191478169384534, "grad_norm": 2.119033098220825, "learning_rate": 4.191804672140367e-06, "loss": 0.9899, "step": 3078 }, { "epoch": 1.619673855865334, "grad_norm": 2.088515281677246, "learning_rate": 4.191293161078962e-06, "loss": 0.9402, "step": 3079 }, { "epoch": 1.6201998947922145, "grad_norm": 2.0898115634918213, "learning_rate": 4.190781519430337e-06, "loss": 0.9665, "step": 3080 }, { "epoch": 1.6207259337190951, "grad_norm": 1.9977632761001587, "learning_rate": 4.190269747233998e-06, "loss": 0.9353, "step": 3081 }, { "epoch": 1.6212519726459758, "grad_norm": 2.0618317127227783, "learning_rate": 4.18975784452946e-06, "loss": 0.9394, "step": 3082 }, { "epoch": 1.6217780115728564, "grad_norm": 2.0050265789031982, "learning_rate": 4.189245811356246e-06, "loss": 0.9242, "step": 3083 }, { "epoch": 1.622304050499737, "grad_norm": 2.0560061931610107, "learning_rate": 4.188733647753893e-06, "loss": 0.9436, "step": 3084 }, { "epoch": 1.6228300894266177, "grad_norm": 1.9926953315734863, "learning_rate": 4.188221353761944e-06, "loss": 0.9097, "step": 3085 }, { "epoch": 1.6233561283534983, "grad_norm": 2.0866425037384033, "learning_rate": 4.187708929419956e-06, "loss": 0.9619, "step": 3086 }, { "epoch": 1.6238821672803787, "grad_norm": 2.047328472137451, "learning_rate": 4.1871963747674916e-06, "loss": 0.9068, "step": 3087 }, { "epoch": 1.6244082062072593, "grad_norm": 1.9664018154144287, "learning_rate": 4.1866836898441265e-06, "loss": 0.9652, "step": 3088 }, { "epoch": 1.62493424513414, "grad_norm": 2.173121213912964, "learning_rate": 4.186170874689448e-06, "loss": 0.9887, "step": 3089 }, { "epoch": 1.6254602840610204, "grad_norm": 2.1074130535125732, "learning_rate": 4.185657929343049e-06, "loss": 0.9538, "step": 3090 }, { "epoch": 1.625986322987901, "grad_norm": 2.1423840522766113, "learning_rate": 4.185144853844535e-06, "loss": 0.9835, "step": 3091 }, { "epoch": 1.6265123619147817, "grad_norm": 2.026642084121704, "learning_rate": 4.184631648233523e-06, "loss": 0.9129, "step": 3092 }, { "epoch": 1.6270384008416623, "grad_norm": 1.9563450813293457, "learning_rate": 4.184118312549636e-06, "loss": 0.9475, "step": 3093 }, { "epoch": 1.627564439768543, "grad_norm": 2.0315756797790527, "learning_rate": 4.1836048468325115e-06, "loss": 0.8751, "step": 3094 }, { "epoch": 1.6280904786954236, "grad_norm": 2.045595407485962, "learning_rate": 4.1830912511217935e-06, "loss": 0.892, "step": 3095 }, { "epoch": 1.6286165176223042, "grad_norm": 2.065134048461914, "learning_rate": 4.182577525457138e-06, "loss": 0.9238, "step": 3096 }, { "epoch": 1.6291425565491846, "grad_norm": 1.991685390472412, "learning_rate": 4.182063669878211e-06, "loss": 0.9295, "step": 3097 }, { "epoch": 1.6296685954760652, "grad_norm": 2.017810106277466, "learning_rate": 4.181549684424687e-06, "loss": 0.8972, "step": 3098 }, { "epoch": 1.6301946344029457, "grad_norm": 2.072643756866455, "learning_rate": 4.181035569136252e-06, "loss": 0.8713, "step": 3099 }, { "epoch": 1.6307206733298263, "grad_norm": 2.0642311573028564, "learning_rate": 4.180521324052602e-06, "loss": 0.9312, "step": 3100 }, { "epoch": 1.631246712256707, "grad_norm": 2.052084445953369, "learning_rate": 4.1800069492134425e-06, "loss": 0.889, "step": 3101 }, { "epoch": 1.6317727511835876, "grad_norm": 1.9940053224563599, "learning_rate": 4.179492444658488e-06, "loss": 0.9606, "step": 3102 }, { "epoch": 1.6322987901104682, "grad_norm": 1.9448810815811157, "learning_rate": 4.178977810427466e-06, "loss": 0.9042, "step": 3103 }, { "epoch": 1.6328248290373488, "grad_norm": 2.0380942821502686, "learning_rate": 4.1784630465601114e-06, "loss": 0.8952, "step": 3104 }, { "epoch": 1.6333508679642295, "grad_norm": 2.0342228412628174, "learning_rate": 4.1779481530961695e-06, "loss": 0.8834, "step": 3105 }, { "epoch": 1.63387690689111, "grad_norm": 2.086081027984619, "learning_rate": 4.177433130075397e-06, "loss": 0.949, "step": 3106 }, { "epoch": 1.6344029458179905, "grad_norm": 2.1689419746398926, "learning_rate": 4.176917977537558e-06, "loss": 0.9563, "step": 3107 }, { "epoch": 1.6349289847448711, "grad_norm": 1.9891860485076904, "learning_rate": 4.1764026955224276e-06, "loss": 0.9233, "step": 3108 }, { "epoch": 1.6354550236717516, "grad_norm": 1.9214919805526733, "learning_rate": 4.175887284069795e-06, "loss": 0.8776, "step": 3109 }, { "epoch": 1.6359810625986322, "grad_norm": 1.9802346229553223, "learning_rate": 4.175371743219453e-06, "loss": 0.9275, "step": 3110 }, { "epoch": 1.6365071015255128, "grad_norm": 1.956176519393921, "learning_rate": 4.174856073011208e-06, "loss": 0.9669, "step": 3111 }, { "epoch": 1.6370331404523935, "grad_norm": 1.9850467443466187, "learning_rate": 4.174340273484875e-06, "loss": 0.9446, "step": 3112 }, { "epoch": 1.637559179379274, "grad_norm": 2.1451969146728516, "learning_rate": 4.173824344680281e-06, "loss": 1.0022, "step": 3113 }, { "epoch": 1.6380852183061547, "grad_norm": 2.0854461193084717, "learning_rate": 4.173308286637261e-06, "loss": 1.0051, "step": 3114 }, { "epoch": 1.6386112572330354, "grad_norm": 2.2442920207977295, "learning_rate": 4.1727920993956604e-06, "loss": 0.9611, "step": 3115 }, { "epoch": 1.639137296159916, "grad_norm": 2.0998659133911133, "learning_rate": 4.172275782995334e-06, "loss": 0.9306, "step": 3116 }, { "epoch": 1.6396633350867964, "grad_norm": 2.0424299240112305, "learning_rate": 4.171759337476149e-06, "loss": 0.9085, "step": 3117 }, { "epoch": 1.640189374013677, "grad_norm": 2.0824828147888184, "learning_rate": 4.171242762877981e-06, "loss": 0.9236, "step": 3118 }, { "epoch": 1.6407154129405574, "grad_norm": 2.1286230087280273, "learning_rate": 4.170726059240713e-06, "loss": 1.0158, "step": 3119 }, { "epoch": 1.641241451867438, "grad_norm": 2.1157569885253906, "learning_rate": 4.1702092266042425e-06, "loss": 0.9332, "step": 3120 }, { "epoch": 1.6417674907943187, "grad_norm": 2.1147117614746094, "learning_rate": 4.169692265008475e-06, "loss": 0.9673, "step": 3121 }, { "epoch": 1.6422935297211994, "grad_norm": 2.021984100341797, "learning_rate": 4.169175174493325e-06, "loss": 0.9808, "step": 3122 }, { "epoch": 1.64281956864808, "grad_norm": 1.9503893852233887, "learning_rate": 4.168657955098718e-06, "loss": 0.9293, "step": 3123 }, { "epoch": 1.6433456075749606, "grad_norm": 2.125218629837036, "learning_rate": 4.1681406068645895e-06, "loss": 0.9082, "step": 3124 }, { "epoch": 1.6438716465018413, "grad_norm": 2.25494647026062, "learning_rate": 4.167623129830884e-06, "loss": 0.9515, "step": 3125 }, { "epoch": 1.644397685428722, "grad_norm": 2.164952516555786, "learning_rate": 4.167105524037558e-06, "loss": 0.9344, "step": 3126 }, { "epoch": 1.6449237243556023, "grad_norm": 2.136427164077759, "learning_rate": 4.166587789524576e-06, "loss": 0.9056, "step": 3127 }, { "epoch": 1.645449763282483, "grad_norm": 1.9986604452133179, "learning_rate": 4.166069926331912e-06, "loss": 0.9076, "step": 3128 }, { "epoch": 1.6459758022093633, "grad_norm": 2.1529273986816406, "learning_rate": 4.165551934499553e-06, "loss": 0.9151, "step": 3129 }, { "epoch": 1.646501841136244, "grad_norm": 2.1443097591400146, "learning_rate": 4.1650338140674916e-06, "loss": 0.9907, "step": 3130 }, { "epoch": 1.6470278800631246, "grad_norm": 2.0582191944122314, "learning_rate": 4.164515565075735e-06, "loss": 0.9531, "step": 3131 }, { "epoch": 1.6475539189900053, "grad_norm": 2.032761573791504, "learning_rate": 4.163997187564296e-06, "loss": 0.8946, "step": 3132 }, { "epoch": 1.6480799579168859, "grad_norm": 2.1963999271392822, "learning_rate": 4.163478681573201e-06, "loss": 0.955, "step": 3133 }, { "epoch": 1.6486059968437665, "grad_norm": 2.0777454376220703, "learning_rate": 4.162960047142482e-06, "loss": 0.9267, "step": 3134 }, { "epoch": 1.6491320357706472, "grad_norm": 2.2676467895507812, "learning_rate": 4.162441284312186e-06, "loss": 1.021, "step": 3135 }, { "epoch": 1.6496580746975276, "grad_norm": 2.0743093490600586, "learning_rate": 4.161922393122368e-06, "loss": 0.9285, "step": 3136 }, { "epoch": 1.6501841136244082, "grad_norm": 2.017014980316162, "learning_rate": 4.161403373613089e-06, "loss": 0.932, "step": 3137 }, { "epoch": 1.6507101525512888, "grad_norm": 1.935449242591858, "learning_rate": 4.160884225824427e-06, "loss": 0.8828, "step": 3138 }, { "epoch": 1.6512361914781692, "grad_norm": 2.1415903568267822, "learning_rate": 4.160364949796462e-06, "loss": 0.8867, "step": 3139 }, { "epoch": 1.6517622304050499, "grad_norm": 2.27426815032959, "learning_rate": 4.1598455455692924e-06, "loss": 0.9527, "step": 3140 }, { "epoch": 1.6522882693319305, "grad_norm": 2.1137592792510986, "learning_rate": 4.15932601318302e-06, "loss": 0.9242, "step": 3141 }, { "epoch": 1.6528143082588111, "grad_norm": 2.1169378757476807, "learning_rate": 4.1588063526777586e-06, "loss": 0.9069, "step": 3142 }, { "epoch": 1.6533403471856918, "grad_norm": 2.0287654399871826, "learning_rate": 4.158286564093632e-06, "loss": 0.9316, "step": 3143 }, { "epoch": 1.6538663861125724, "grad_norm": 2.131171226501465, "learning_rate": 4.157766647470774e-06, "loss": 0.9938, "step": 3144 }, { "epoch": 1.654392425039453, "grad_norm": 1.9909673929214478, "learning_rate": 4.1572466028493285e-06, "loss": 0.8913, "step": 3145 }, { "epoch": 1.6549184639663335, "grad_norm": 1.9932284355163574, "learning_rate": 4.1567264302694495e-06, "loss": 0.8966, "step": 3146 }, { "epoch": 1.655444502893214, "grad_norm": 2.0377657413482666, "learning_rate": 4.156206129771298e-06, "loss": 0.9106, "step": 3147 }, { "epoch": 1.6559705418200947, "grad_norm": 2.0442769527435303, "learning_rate": 4.155685701395049e-06, "loss": 1.0005, "step": 3148 }, { "epoch": 1.6564965807469751, "grad_norm": 1.9996798038482666, "learning_rate": 4.155165145180885e-06, "loss": 0.8855, "step": 3149 }, { "epoch": 1.6570226196738558, "grad_norm": 2.041524887084961, "learning_rate": 4.154644461168999e-06, "loss": 0.9461, "step": 3150 }, { "epoch": 1.6575486586007364, "grad_norm": 1.9929088354110718, "learning_rate": 4.154123649399594e-06, "loss": 0.9425, "step": 3151 }, { "epoch": 1.658074697527617, "grad_norm": 1.9028164148330688, "learning_rate": 4.153602709912882e-06, "loss": 0.8973, "step": 3152 }, { "epoch": 1.6586007364544977, "grad_norm": 1.9024345874786377, "learning_rate": 4.1530816427490865e-06, "loss": 0.9084, "step": 3153 }, { "epoch": 1.6591267753813783, "grad_norm": 2.1555325984954834, "learning_rate": 4.152560447948438e-06, "loss": 0.933, "step": 3154 }, { "epoch": 1.659652814308259, "grad_norm": 2.1050522327423096, "learning_rate": 4.152039125551182e-06, "loss": 0.9663, "step": 3155 }, { "epoch": 1.6601788532351394, "grad_norm": 1.9364120960235596, "learning_rate": 4.151517675597566e-06, "loss": 0.8771, "step": 3156 }, { "epoch": 1.66070489216202, "grad_norm": 2.0937752723693848, "learning_rate": 4.150996098127856e-06, "loss": 0.9343, "step": 3157 }, { "epoch": 1.6612309310889006, "grad_norm": 1.9322218894958496, "learning_rate": 4.150474393182322e-06, "loss": 0.9277, "step": 3158 }, { "epoch": 1.661756970015781, "grad_norm": 1.9301400184631348, "learning_rate": 4.149952560801246e-06, "loss": 0.8937, "step": 3159 }, { "epoch": 1.6622830089426617, "grad_norm": 2.0601139068603516, "learning_rate": 4.149430601024919e-06, "loss": 0.9411, "step": 3160 }, { "epoch": 1.6628090478695423, "grad_norm": 2.0804789066314697, "learning_rate": 4.148908513893643e-06, "loss": 0.9271, "step": 3161 }, { "epoch": 1.663335086796423, "grad_norm": 2.0413947105407715, "learning_rate": 4.148386299447728e-06, "loss": 0.9446, "step": 3162 }, { "epoch": 1.6638611257233036, "grad_norm": 2.0702428817749023, "learning_rate": 4.1478639577274956e-06, "loss": 0.9527, "step": 3163 }, { "epoch": 1.6643871646501842, "grad_norm": 2.1859652996063232, "learning_rate": 4.147341488773277e-06, "loss": 0.9219, "step": 3164 }, { "epoch": 1.6649132035770648, "grad_norm": 1.9993692636489868, "learning_rate": 4.146818892625412e-06, "loss": 0.9205, "step": 3165 }, { "epoch": 1.6654392425039453, "grad_norm": 1.9294373989105225, "learning_rate": 4.146296169324251e-06, "loss": 0.9163, "step": 3166 }, { "epoch": 1.665965281430826, "grad_norm": 2.0685832500457764, "learning_rate": 4.145773318910156e-06, "loss": 0.9098, "step": 3167 }, { "epoch": 1.6664913203577063, "grad_norm": 2.0580673217773438, "learning_rate": 4.145250341423494e-06, "loss": 0.9478, "step": 3168 }, { "epoch": 1.667017359284587, "grad_norm": 1.9835233688354492, "learning_rate": 4.144727236904647e-06, "loss": 0.9843, "step": 3169 }, { "epoch": 1.6675433982114676, "grad_norm": 1.9416699409484863, "learning_rate": 4.144204005394005e-06, "loss": 0.8998, "step": 3170 }, { "epoch": 1.6680694371383482, "grad_norm": 2.109788179397583, "learning_rate": 4.143680646931966e-06, "loss": 1.012, "step": 3171 }, { "epoch": 1.6685954760652288, "grad_norm": 1.9718213081359863, "learning_rate": 4.143157161558939e-06, "loss": 0.9549, "step": 3172 }, { "epoch": 1.6691215149921095, "grad_norm": 2.1271800994873047, "learning_rate": 4.142633549315345e-06, "loss": 0.9119, "step": 3173 }, { "epoch": 1.6696475539189901, "grad_norm": 1.9742629528045654, "learning_rate": 4.142109810241611e-06, "loss": 0.9187, "step": 3174 }, { "epoch": 1.6701735928458707, "grad_norm": 2.1130025386810303, "learning_rate": 4.1415859443781775e-06, "loss": 0.9665, "step": 3175 }, { "epoch": 1.6706996317727512, "grad_norm": 2.1361539363861084, "learning_rate": 4.141061951765491e-06, "loss": 0.9287, "step": 3176 }, { "epoch": 1.6712256706996318, "grad_norm": 1.9771889448165894, "learning_rate": 4.140537832444012e-06, "loss": 0.9135, "step": 3177 }, { "epoch": 1.6717517096265122, "grad_norm": 2.1832566261291504, "learning_rate": 4.1400135864542054e-06, "loss": 0.8852, "step": 3178 }, { "epoch": 1.6722777485533928, "grad_norm": 2.119732618331909, "learning_rate": 4.139489213836552e-06, "loss": 0.9804, "step": 3179 }, { "epoch": 1.6728037874802735, "grad_norm": 2.2114458084106445, "learning_rate": 4.138964714631538e-06, "loss": 0.9262, "step": 3180 }, { "epoch": 1.673329826407154, "grad_norm": 2.1006972789764404, "learning_rate": 4.1384400888796604e-06, "loss": 0.9234, "step": 3181 }, { "epoch": 1.6738558653340347, "grad_norm": 2.1315839290618896, "learning_rate": 4.137915336621428e-06, "loss": 0.9986, "step": 3182 }, { "epoch": 1.6743819042609154, "grad_norm": 1.92546546459198, "learning_rate": 4.137390457897356e-06, "loss": 0.9263, "step": 3183 }, { "epoch": 1.674907943187796, "grad_norm": 2.1958208084106445, "learning_rate": 4.136865452747971e-06, "loss": 0.9483, "step": 3184 }, { "epoch": 1.6754339821146766, "grad_norm": 2.2576241493225098, "learning_rate": 4.13634032121381e-06, "loss": 0.9555, "step": 3185 }, { "epoch": 1.675960021041557, "grad_norm": 2.141944646835327, "learning_rate": 4.1358150633354195e-06, "loss": 0.9573, "step": 3186 }, { "epoch": 1.6764860599684377, "grad_norm": 2.1498608589172363, "learning_rate": 4.135289679153356e-06, "loss": 0.9284, "step": 3187 }, { "epoch": 1.677012098895318, "grad_norm": 1.8684433698654175, "learning_rate": 4.134764168708183e-06, "loss": 0.8987, "step": 3188 }, { "epoch": 1.6775381378221987, "grad_norm": 2.0984723567962646, "learning_rate": 4.134238532040479e-06, "loss": 0.9444, "step": 3189 }, { "epoch": 1.6780641767490794, "grad_norm": 1.9849915504455566, "learning_rate": 4.1337127691908255e-06, "loss": 0.961, "step": 3190 }, { "epoch": 1.67859021567596, "grad_norm": 1.9701485633850098, "learning_rate": 4.13318688019982e-06, "loss": 0.9391, "step": 3191 }, { "epoch": 1.6791162546028406, "grad_norm": 2.1401596069335938, "learning_rate": 4.132660865108065e-06, "loss": 0.9557, "step": 3192 }, { "epoch": 1.6796422935297213, "grad_norm": 1.9514589309692383, "learning_rate": 4.132134723956178e-06, "loss": 0.9181, "step": 3193 }, { "epoch": 1.680168332456602, "grad_norm": 1.9245766401290894, "learning_rate": 4.131608456784782e-06, "loss": 0.897, "step": 3194 }, { "epoch": 1.6806943713834825, "grad_norm": 1.9949605464935303, "learning_rate": 4.13108206363451e-06, "loss": 0.9441, "step": 3195 }, { "epoch": 1.681220410310363, "grad_norm": 2.337113857269287, "learning_rate": 4.130555544546005e-06, "loss": 0.8951, "step": 3196 }, { "epoch": 1.6817464492372436, "grad_norm": 1.9845924377441406, "learning_rate": 4.130028899559922e-06, "loss": 0.934, "step": 3197 }, { "epoch": 1.682272488164124, "grad_norm": 1.9929417371749878, "learning_rate": 4.129502128716922e-06, "loss": 0.969, "step": 3198 }, { "epoch": 1.6827985270910046, "grad_norm": 1.9864381551742554, "learning_rate": 4.12897523205768e-06, "loss": 0.9729, "step": 3199 }, { "epoch": 1.6833245660178853, "grad_norm": 2.0064280033111572, "learning_rate": 4.128448209622878e-06, "loss": 0.8764, "step": 3200 }, { "epoch": 1.683850604944766, "grad_norm": 1.9478604793548584, "learning_rate": 4.1279210614532075e-06, "loss": 0.9274, "step": 3201 }, { "epoch": 1.6843766438716465, "grad_norm": 2.097722053527832, "learning_rate": 4.12739378758937e-06, "loss": 0.9318, "step": 3202 }, { "epoch": 1.6849026827985272, "grad_norm": 2.1442971229553223, "learning_rate": 4.126866388072078e-06, "loss": 0.921, "step": 3203 }, { "epoch": 1.6854287217254078, "grad_norm": 2.283609390258789, "learning_rate": 4.126338862942053e-06, "loss": 0.8789, "step": 3204 }, { "epoch": 1.6859547606522882, "grad_norm": 2.0093772411346436, "learning_rate": 4.125811212240024e-06, "loss": 0.9492, "step": 3205 }, { "epoch": 1.6864807995791689, "grad_norm": 2.0583980083465576, "learning_rate": 4.125283436006734e-06, "loss": 0.9086, "step": 3206 }, { "epoch": 1.6870068385060495, "grad_norm": 2.033256769180298, "learning_rate": 4.1247555342829324e-06, "loss": 0.9349, "step": 3207 }, { "epoch": 1.68753287743293, "grad_norm": 2.022223472595215, "learning_rate": 4.124227507109379e-06, "loss": 0.9331, "step": 3208 }, { "epoch": 1.6880589163598105, "grad_norm": 2.046595811843872, "learning_rate": 4.1236993545268435e-06, "loss": 0.9894, "step": 3209 }, { "epoch": 1.6885849552866912, "grad_norm": 1.9218565225601196, "learning_rate": 4.123171076576107e-06, "loss": 0.9271, "step": 3210 }, { "epoch": 1.6891109942135718, "grad_norm": 2.1003799438476562, "learning_rate": 4.122642673297956e-06, "loss": 0.949, "step": 3211 }, { "epoch": 1.6896370331404524, "grad_norm": 1.9997978210449219, "learning_rate": 4.122114144733191e-06, "loss": 0.9741, "step": 3212 }, { "epoch": 1.690163072067333, "grad_norm": 2.0700809955596924, "learning_rate": 4.121585490922621e-06, "loss": 0.9892, "step": 3213 }, { "epoch": 1.6906891109942137, "grad_norm": 1.9868223667144775, "learning_rate": 4.121056711907062e-06, "loss": 0.8757, "step": 3214 }, { "epoch": 1.6912151499210941, "grad_norm": 2.085045337677002, "learning_rate": 4.120527807727343e-06, "loss": 0.9595, "step": 3215 }, { "epoch": 1.6917411888479748, "grad_norm": 1.824670433998108, "learning_rate": 4.1199987784243025e-06, "loss": 0.9565, "step": 3216 }, { "epoch": 1.6922672277748554, "grad_norm": 1.9989205598831177, "learning_rate": 4.119469624038786e-06, "loss": 0.959, "step": 3217 }, { "epoch": 1.6927932667017358, "grad_norm": 1.9225279092788696, "learning_rate": 4.11894034461165e-06, "loss": 0.9166, "step": 3218 }, { "epoch": 1.6933193056286164, "grad_norm": 2.031460762023926, "learning_rate": 4.1184109401837635e-06, "loss": 0.9507, "step": 3219 }, { "epoch": 1.693845344555497, "grad_norm": 2.001452922821045, "learning_rate": 4.117881410795999e-06, "loss": 0.9006, "step": 3220 }, { "epoch": 1.6943713834823777, "grad_norm": 1.879812240600586, "learning_rate": 4.117351756489246e-06, "loss": 0.861, "step": 3221 }, { "epoch": 1.6948974224092583, "grad_norm": 2.122471809387207, "learning_rate": 4.116821977304398e-06, "loss": 0.8968, "step": 3222 }, { "epoch": 1.695423461336139, "grad_norm": 2.114089012145996, "learning_rate": 4.1162920732823595e-06, "loss": 0.9038, "step": 3223 }, { "epoch": 1.6959495002630196, "grad_norm": 1.99599289894104, "learning_rate": 4.115762044464046e-06, "loss": 0.9623, "step": 3224 }, { "epoch": 1.6964755391899, "grad_norm": 1.9803627729415894, "learning_rate": 4.115231890890381e-06, "loss": 0.9592, "step": 3225 }, { "epoch": 1.6970015781167807, "grad_norm": 1.9647092819213867, "learning_rate": 4.114701612602299e-06, "loss": 0.9371, "step": 3226 }, { "epoch": 1.6975276170436613, "grad_norm": 2.1353018283843994, "learning_rate": 4.1141712096407436e-06, "loss": 0.8824, "step": 3227 }, { "epoch": 1.6980536559705417, "grad_norm": 1.999079704284668, "learning_rate": 4.113640682046667e-06, "loss": 0.9609, "step": 3228 }, { "epoch": 1.6985796948974223, "grad_norm": 2.0933756828308105, "learning_rate": 4.113110029861035e-06, "loss": 0.9286, "step": 3229 }, { "epoch": 1.699105733824303, "grad_norm": 2.275855302810669, "learning_rate": 4.1125792531248165e-06, "loss": 0.9584, "step": 3230 }, { "epoch": 1.6996317727511836, "grad_norm": 2.0643019676208496, "learning_rate": 4.112048351878995e-06, "loss": 0.9332, "step": 3231 }, { "epoch": 1.7001578116780642, "grad_norm": 1.9502769708633423, "learning_rate": 4.111517326164562e-06, "loss": 0.9221, "step": 3232 }, { "epoch": 1.7006838506049449, "grad_norm": 2.0422542095184326, "learning_rate": 4.11098617602252e-06, "loss": 0.9784, "step": 3233 }, { "epoch": 1.7012098895318255, "grad_norm": 1.9388501644134521, "learning_rate": 4.110454901493878e-06, "loss": 0.9744, "step": 3234 }, { "epoch": 1.701735928458706, "grad_norm": 1.9232968091964722, "learning_rate": 4.109923502619657e-06, "loss": 0.8971, "step": 3235 }, { "epoch": 1.7022619673855865, "grad_norm": 2.1951730251312256, "learning_rate": 4.109391979440889e-06, "loss": 0.985, "step": 3236 }, { "epoch": 1.702788006312467, "grad_norm": 1.9620068073272705, "learning_rate": 4.108860331998611e-06, "loss": 0.8322, "step": 3237 }, { "epoch": 1.7033140452393476, "grad_norm": 1.961638331413269, "learning_rate": 4.108328560333873e-06, "loss": 0.9242, "step": 3238 }, { "epoch": 1.7038400841662282, "grad_norm": 2.0044515132904053, "learning_rate": 4.107796664487734e-06, "loss": 0.8943, "step": 3239 }, { "epoch": 1.7043661230931089, "grad_norm": 1.8574254512786865, "learning_rate": 4.107264644501264e-06, "loss": 0.9227, "step": 3240 }, { "epoch": 1.7048921620199895, "grad_norm": 1.9833990335464478, "learning_rate": 4.106732500415539e-06, "loss": 0.9405, "step": 3241 }, { "epoch": 1.7054182009468701, "grad_norm": 1.9433012008666992, "learning_rate": 4.106200232271647e-06, "loss": 0.8671, "step": 3242 }, { "epoch": 1.7059442398737508, "grad_norm": 2.0191597938537598, "learning_rate": 4.105667840110686e-06, "loss": 0.9877, "step": 3243 }, { "epoch": 1.7064702788006314, "grad_norm": 2.02719783782959, "learning_rate": 4.105135323973762e-06, "loss": 0.9427, "step": 3244 }, { "epoch": 1.7069963177275118, "grad_norm": 2.0298643112182617, "learning_rate": 4.104602683901991e-06, "loss": 0.9013, "step": 3245 }, { "epoch": 1.7075223566543924, "grad_norm": 2.04894757270813, "learning_rate": 4.104069919936501e-06, "loss": 0.8836, "step": 3246 }, { "epoch": 1.7080483955812729, "grad_norm": 1.8774256706237793, "learning_rate": 4.103537032118426e-06, "loss": 0.9109, "step": 3247 }, { "epoch": 1.7085744345081535, "grad_norm": 1.989170789718628, "learning_rate": 4.10300402048891e-06, "loss": 0.9529, "step": 3248 }, { "epoch": 1.7091004734350341, "grad_norm": 1.9783909320831299, "learning_rate": 4.102470885089109e-06, "loss": 0.9024, "step": 3249 }, { "epoch": 1.7096265123619148, "grad_norm": 2.037511110305786, "learning_rate": 4.101937625960187e-06, "loss": 0.9438, "step": 3250 }, { "epoch": 1.7101525512887954, "grad_norm": 1.983834147453308, "learning_rate": 4.101404243143318e-06, "loss": 0.9009, "step": 3251 }, { "epoch": 1.710678590215676, "grad_norm": 1.9569228887557983, "learning_rate": 4.100870736679684e-06, "loss": 0.9031, "step": 3252 }, { "epoch": 1.7112046291425567, "grad_norm": 2.137965440750122, "learning_rate": 4.100337106610479e-06, "loss": 0.9367, "step": 3253 }, { "epoch": 1.7117306680694373, "grad_norm": 2.038703203201294, "learning_rate": 4.099803352976906e-06, "loss": 0.9436, "step": 3254 }, { "epoch": 1.7122567069963177, "grad_norm": 2.0265614986419678, "learning_rate": 4.0992694758201754e-06, "loss": 0.9541, "step": 3255 }, { "epoch": 1.7127827459231983, "grad_norm": 1.9582267999649048, "learning_rate": 4.098735475181509e-06, "loss": 0.9345, "step": 3256 }, { "epoch": 1.7133087848500788, "grad_norm": 2.1450209617614746, "learning_rate": 4.098201351102138e-06, "loss": 0.9859, "step": 3257 }, { "epoch": 1.7138348237769594, "grad_norm": 2.063988447189331, "learning_rate": 4.097667103623303e-06, "loss": 0.9701, "step": 3258 }, { "epoch": 1.71436086270384, "grad_norm": 2.0069267749786377, "learning_rate": 4.097132732786253e-06, "loss": 0.9743, "step": 3259 }, { "epoch": 1.7148869016307207, "grad_norm": 1.9959112405776978, "learning_rate": 4.096598238632249e-06, "loss": 0.9029, "step": 3260 }, { "epoch": 1.7154129405576013, "grad_norm": 1.9694600105285645, "learning_rate": 4.09606362120256e-06, "loss": 0.9015, "step": 3261 }, { "epoch": 1.715938979484482, "grad_norm": 2.4184322357177734, "learning_rate": 4.0955288805384645e-06, "loss": 0.8851, "step": 3262 }, { "epoch": 1.7164650184113626, "grad_norm": 1.9955581426620483, "learning_rate": 4.094994016681248e-06, "loss": 0.964, "step": 3263 }, { "epoch": 1.716991057338243, "grad_norm": 1.981042504310608, "learning_rate": 4.094459029672213e-06, "loss": 0.931, "step": 3264 }, { "epoch": 1.7175170962651236, "grad_norm": 1.9434010982513428, "learning_rate": 4.093923919552663e-06, "loss": 0.8766, "step": 3265 }, { "epoch": 1.7180431351920042, "grad_norm": 1.9982467889785767, "learning_rate": 4.0933886863639145e-06, "loss": 0.8826, "step": 3266 }, { "epoch": 1.7185691741188847, "grad_norm": 1.9993340969085693, "learning_rate": 4.092853330147296e-06, "loss": 0.9527, "step": 3267 }, { "epoch": 1.7190952130457653, "grad_norm": 2.0537142753601074, "learning_rate": 4.092317850944141e-06, "loss": 0.9147, "step": 3268 }, { "epoch": 1.719621251972646, "grad_norm": 1.9260109663009644, "learning_rate": 4.091782248795796e-06, "loss": 0.8483, "step": 3269 }, { "epoch": 1.7201472908995266, "grad_norm": 2.032158136367798, "learning_rate": 4.091246523743615e-06, "loss": 0.929, "step": 3270 }, { "epoch": 1.7206733298264072, "grad_norm": 2.10296368598938, "learning_rate": 4.090710675828963e-06, "loss": 0.9243, "step": 3271 }, { "epoch": 1.7211993687532878, "grad_norm": 2.0934739112854004, "learning_rate": 4.090174705093212e-06, "loss": 0.9266, "step": 3272 }, { "epoch": 1.7217254076801685, "grad_norm": 1.994555115699768, "learning_rate": 4.089638611577745e-06, "loss": 0.9328, "step": 3273 }, { "epoch": 1.7222514466070489, "grad_norm": 2.169400215148926, "learning_rate": 4.089102395323957e-06, "loss": 0.9347, "step": 3274 }, { "epoch": 1.7227774855339295, "grad_norm": 2.2973196506500244, "learning_rate": 4.088566056373248e-06, "loss": 0.946, "step": 3275 }, { "epoch": 1.7233035244608101, "grad_norm": 1.9845727682113647, "learning_rate": 4.08802959476703e-06, "loss": 0.8932, "step": 3276 }, { "epoch": 1.7238295633876906, "grad_norm": 1.9923732280731201, "learning_rate": 4.087493010546725e-06, "loss": 0.9289, "step": 3277 }, { "epoch": 1.7243556023145712, "grad_norm": 3.3909382820129395, "learning_rate": 4.086956303753761e-06, "loss": 0.9671, "step": 3278 }, { "epoch": 1.7248816412414518, "grad_norm": 1.9383569955825806, "learning_rate": 4.08641947442958e-06, "loss": 0.9332, "step": 3279 }, { "epoch": 1.7254076801683325, "grad_norm": 1.9901137351989746, "learning_rate": 4.085882522615631e-06, "loss": 0.9043, "step": 3280 }, { "epoch": 1.725933719095213, "grad_norm": 1.969484806060791, "learning_rate": 4.085345448353373e-06, "loss": 0.8957, "step": 3281 }, { "epoch": 1.7264597580220937, "grad_norm": 1.9522175788879395, "learning_rate": 4.084808251684274e-06, "loss": 0.9594, "step": 3282 }, { "epoch": 1.7269857969489744, "grad_norm": 1.9766138792037964, "learning_rate": 4.08427093264981e-06, "loss": 0.919, "step": 3283 }, { "epoch": 1.7275118358758548, "grad_norm": 2.008881092071533, "learning_rate": 4.083733491291471e-06, "loss": 0.942, "step": 3284 }, { "epoch": 1.7280378748027354, "grad_norm": 2.085505247116089, "learning_rate": 4.083195927650752e-06, "loss": 0.8945, "step": 3285 }, { "epoch": 1.728563913729616, "grad_norm": 1.9567203521728516, "learning_rate": 4.0826582417691605e-06, "loss": 0.9192, "step": 3286 }, { "epoch": 1.7290899526564965, "grad_norm": 2.1172187328338623, "learning_rate": 4.08212043368821e-06, "loss": 0.8969, "step": 3287 }, { "epoch": 1.729615991583377, "grad_norm": 1.9815617799758911, "learning_rate": 4.081582503449427e-06, "loss": 0.9183, "step": 3288 }, { "epoch": 1.7301420305102577, "grad_norm": 1.9883172512054443, "learning_rate": 4.081044451094346e-06, "loss": 0.9549, "step": 3289 }, { "epoch": 1.7306680694371384, "grad_norm": 1.9001867771148682, "learning_rate": 4.080506276664509e-06, "loss": 0.9247, "step": 3290 }, { "epoch": 1.731194108364019, "grad_norm": 2.0134174823760986, "learning_rate": 4.07996798020147e-06, "loss": 0.9058, "step": 3291 }, { "epoch": 1.7317201472908996, "grad_norm": 2.0924973487854004, "learning_rate": 4.079429561746794e-06, "loss": 0.9548, "step": 3292 }, { "epoch": 1.7322461862177803, "grad_norm": 1.994047999382019, "learning_rate": 4.07889102134205e-06, "loss": 0.9381, "step": 3293 }, { "epoch": 1.7327722251446607, "grad_norm": 1.980707049369812, "learning_rate": 4.078352359028821e-06, "loss": 0.9435, "step": 3294 }, { "epoch": 1.7332982640715413, "grad_norm": 2.001751661300659, "learning_rate": 4.077813574848698e-06, "loss": 0.9294, "step": 3295 }, { "epoch": 1.7338243029984217, "grad_norm": 1.9710110425949097, "learning_rate": 4.07727466884328e-06, "loss": 0.9307, "step": 3296 }, { "epoch": 1.7343503419253024, "grad_norm": 2.128279209136963, "learning_rate": 4.076735641054177e-06, "loss": 0.9091, "step": 3297 }, { "epoch": 1.734876380852183, "grad_norm": 2.0614635944366455, "learning_rate": 4.07619649152301e-06, "loss": 0.9059, "step": 3298 }, { "epoch": 1.7354024197790636, "grad_norm": 2.0709080696105957, "learning_rate": 4.075657220291406e-06, "loss": 0.9665, "step": 3299 }, { "epoch": 1.7359284587059443, "grad_norm": 2.1249303817749023, "learning_rate": 4.075117827401003e-06, "loss": 0.9429, "step": 3300 }, { "epoch": 1.736454497632825, "grad_norm": 2.183804988861084, "learning_rate": 4.074578312893449e-06, "loss": 0.9603, "step": 3301 }, { "epoch": 1.7369805365597055, "grad_norm": 1.9954277276992798, "learning_rate": 4.074038676810399e-06, "loss": 0.8813, "step": 3302 }, { "epoch": 1.7375065754865862, "grad_norm": 2.0698442459106445, "learning_rate": 4.073498919193522e-06, "loss": 0.93, "step": 3303 }, { "epoch": 1.7380326144134666, "grad_norm": 1.9365150928497314, "learning_rate": 4.07295904008449e-06, "loss": 0.9466, "step": 3304 }, { "epoch": 1.7385586533403472, "grad_norm": 2.223771572113037, "learning_rate": 4.0724190395249905e-06, "loss": 0.9252, "step": 3305 }, { "epoch": 1.7390846922672276, "grad_norm": 1.9846419095993042, "learning_rate": 4.071878917556716e-06, "loss": 0.9207, "step": 3306 }, { "epoch": 1.7396107311941083, "grad_norm": 2.1234962940216064, "learning_rate": 4.071338674221373e-06, "loss": 0.9907, "step": 3307 }, { "epoch": 1.7401367701209889, "grad_norm": 2.0506014823913574, "learning_rate": 4.07079830956067e-06, "loss": 0.9653, "step": 3308 }, { "epoch": 1.7406628090478695, "grad_norm": 2.043147563934326, "learning_rate": 4.070257823616332e-06, "loss": 0.9406, "step": 3309 }, { "epoch": 1.7411888479747502, "grad_norm": 2.0823752880096436, "learning_rate": 4.069717216430093e-06, "loss": 0.9736, "step": 3310 }, { "epoch": 1.7417148869016308, "grad_norm": 1.9480175971984863, "learning_rate": 4.069176488043689e-06, "loss": 0.9258, "step": 3311 }, { "epoch": 1.7422409258285114, "grad_norm": 2.002666473388672, "learning_rate": 4.068635638498874e-06, "loss": 0.9734, "step": 3312 }, { "epoch": 1.742766964755392, "grad_norm": 2.2887017726898193, "learning_rate": 4.068094667837407e-06, "loss": 0.9505, "step": 3313 }, { "epoch": 1.7432930036822725, "grad_norm": 2.0582175254821777, "learning_rate": 4.067553576101056e-06, "loss": 0.9222, "step": 3314 }, { "epoch": 1.743819042609153, "grad_norm": 2.223386287689209, "learning_rate": 4.067012363331601e-06, "loss": 0.9363, "step": 3315 }, { "epoch": 1.7443450815360335, "grad_norm": 2.11930251121521, "learning_rate": 4.066471029570829e-06, "loss": 0.9246, "step": 3316 }, { "epoch": 1.7448711204629141, "grad_norm": 2.291123628616333, "learning_rate": 4.0659295748605374e-06, "loss": 0.9042, "step": 3317 }, { "epoch": 1.7453971593897948, "grad_norm": 1.9040521383285522, "learning_rate": 4.065387999242533e-06, "loss": 0.9317, "step": 3318 }, { "epoch": 1.7459231983166754, "grad_norm": 2.213954210281372, "learning_rate": 4.0648463027586316e-06, "loss": 0.9399, "step": 3319 }, { "epoch": 1.746449237243556, "grad_norm": 2.166907787322998, "learning_rate": 4.064304485450657e-06, "loss": 0.9521, "step": 3320 }, { "epoch": 1.7469752761704367, "grad_norm": 2.1662700176239014, "learning_rate": 4.063762547360446e-06, "loss": 0.9706, "step": 3321 }, { "epoch": 1.7475013150973173, "grad_norm": 2.051248073577881, "learning_rate": 4.06322048852984e-06, "loss": 0.9348, "step": 3322 }, { "epoch": 1.748027354024198, "grad_norm": 2.1157615184783936, "learning_rate": 4.062678309000695e-06, "loss": 0.9516, "step": 3323 }, { "epoch": 1.7485533929510784, "grad_norm": 1.9936703443527222, "learning_rate": 4.06213600881487e-06, "loss": 0.9134, "step": 3324 }, { "epoch": 1.749079431877959, "grad_norm": 1.8598010540008545, "learning_rate": 4.0615935880142406e-06, "loss": 0.8601, "step": 3325 }, { "epoch": 1.7496054708048394, "grad_norm": 2.115668773651123, "learning_rate": 4.061051046640685e-06, "loss": 0.9157, "step": 3326 }, { "epoch": 1.75013150973172, "grad_norm": 1.9370783567428589, "learning_rate": 4.060508384736095e-06, "loss": 0.9661, "step": 3327 }, { "epoch": 1.7506575486586007, "grad_norm": 2.0576865673065186, "learning_rate": 4.0599656023423695e-06, "loss": 0.8764, "step": 3328 }, { "epoch": 1.7511835875854813, "grad_norm": 2.078967809677124, "learning_rate": 4.059422699501418e-06, "loss": 0.9192, "step": 3329 }, { "epoch": 1.751709626512362, "grad_norm": 2.1120834350585938, "learning_rate": 4.058879676255158e-06, "loss": 0.9515, "step": 3330 }, { "epoch": 1.7522356654392426, "grad_norm": 1.9745832681655884, "learning_rate": 4.058336532645519e-06, "loss": 0.8756, "step": 3331 }, { "epoch": 1.7527617043661232, "grad_norm": 2.1134536266326904, "learning_rate": 4.057793268714438e-06, "loss": 0.9737, "step": 3332 }, { "epoch": 1.7532877432930036, "grad_norm": 2.158564329147339, "learning_rate": 4.0572498845038575e-06, "loss": 0.8804, "step": 3333 }, { "epoch": 1.7538137822198843, "grad_norm": 2.121643543243408, "learning_rate": 4.056706380055737e-06, "loss": 0.9237, "step": 3334 }, { "epoch": 1.754339821146765, "grad_norm": 1.9843847751617432, "learning_rate": 4.056162755412038e-06, "loss": 0.9501, "step": 3335 }, { "epoch": 1.7548658600736453, "grad_norm": 1.998321294784546, "learning_rate": 4.055619010614738e-06, "loss": 0.9545, "step": 3336 }, { "epoch": 1.755391899000526, "grad_norm": 2.104769229888916, "learning_rate": 4.055075145705819e-06, "loss": 0.9272, "step": 3337 }, { "epoch": 1.7559179379274066, "grad_norm": 2.0450971126556396, "learning_rate": 4.054531160727272e-06, "loss": 0.9261, "step": 3338 }, { "epoch": 1.7564439768542872, "grad_norm": 2.0160062313079834, "learning_rate": 4.053987055721102e-06, "loss": 0.8997, "step": 3339 }, { "epoch": 1.7569700157811678, "grad_norm": 2.213660478591919, "learning_rate": 4.053442830729316e-06, "loss": 0.923, "step": 3340 }, { "epoch": 1.7574960547080485, "grad_norm": 2.109800100326538, "learning_rate": 4.052898485793938e-06, "loss": 0.9012, "step": 3341 }, { "epoch": 1.7580220936349291, "grad_norm": 2.068057060241699, "learning_rate": 4.052354020956995e-06, "loss": 0.9589, "step": 3342 }, { "epoch": 1.7585481325618095, "grad_norm": 1.9910136461257935, "learning_rate": 4.0518094362605285e-06, "loss": 0.9209, "step": 3343 }, { "epoch": 1.7590741714886902, "grad_norm": 2.1302149295806885, "learning_rate": 4.0512647317465856e-06, "loss": 0.9447, "step": 3344 }, { "epoch": 1.7596002104155708, "grad_norm": 2.0052196979522705, "learning_rate": 4.050719907457222e-06, "loss": 0.9138, "step": 3345 }, { "epoch": 1.7601262493424512, "grad_norm": 1.9952465295791626, "learning_rate": 4.050174963434508e-06, "loss": 0.9339, "step": 3346 }, { "epoch": 1.7606522882693318, "grad_norm": 2.0861756801605225, "learning_rate": 4.049629899720516e-06, "loss": 0.913, "step": 3347 }, { "epoch": 1.7611783271962125, "grad_norm": 2.0138020515441895, "learning_rate": 4.0490847163573335e-06, "loss": 0.9264, "step": 3348 }, { "epoch": 1.7617043661230931, "grad_norm": 2.0093681812286377, "learning_rate": 4.048539413387053e-06, "loss": 0.9597, "step": 3349 }, { "epoch": 1.7622304050499737, "grad_norm": 2.0494930744171143, "learning_rate": 4.047993990851781e-06, "loss": 0.974, "step": 3350 }, { "epoch": 1.7627564439768544, "grad_norm": 2.0490882396698, "learning_rate": 4.0474484487936275e-06, "loss": 0.9241, "step": 3351 }, { "epoch": 1.763282482903735, "grad_norm": 1.9500864744186401, "learning_rate": 4.046902787254717e-06, "loss": 0.9448, "step": 3352 }, { "epoch": 1.7638085218306154, "grad_norm": 2.1233692169189453, "learning_rate": 4.046357006277177e-06, "loss": 0.9255, "step": 3353 }, { "epoch": 1.764334560757496, "grad_norm": 2.1407630443573, "learning_rate": 4.0458111059031525e-06, "loss": 0.9697, "step": 3354 }, { "epoch": 1.7648605996843767, "grad_norm": 2.052542209625244, "learning_rate": 4.04526508617479e-06, "loss": 0.9533, "step": 3355 }, { "epoch": 1.765386638611257, "grad_norm": 1.98019540309906, "learning_rate": 4.044718947134251e-06, "loss": 0.9118, "step": 3356 }, { "epoch": 1.7659126775381377, "grad_norm": 1.932976245880127, "learning_rate": 4.0441726888237025e-06, "loss": 0.9289, "step": 3357 }, { "epoch": 1.7664387164650184, "grad_norm": 2.0778164863586426, "learning_rate": 4.0436263112853205e-06, "loss": 0.9055, "step": 3358 }, { "epoch": 1.766964755391899, "grad_norm": 1.998678207397461, "learning_rate": 4.043079814561294e-06, "loss": 0.9465, "step": 3359 }, { "epoch": 1.7674907943187796, "grad_norm": 1.97935950756073, "learning_rate": 4.042533198693818e-06, "loss": 0.9617, "step": 3360 }, { "epoch": 1.7680168332456603, "grad_norm": 2.1482112407684326, "learning_rate": 4.041986463725097e-06, "loss": 0.9664, "step": 3361 }, { "epoch": 1.768542872172541, "grad_norm": 1.997873306274414, "learning_rate": 4.041439609697345e-06, "loss": 0.8919, "step": 3362 }, { "epoch": 1.7690689110994213, "grad_norm": 2.037944793701172, "learning_rate": 4.040892636652787e-06, "loss": 0.9327, "step": 3363 }, { "epoch": 1.769594950026302, "grad_norm": 2.235060691833496, "learning_rate": 4.040345544633654e-06, "loss": 0.9484, "step": 3364 }, { "epoch": 1.7701209889531824, "grad_norm": 2.0589184761047363, "learning_rate": 4.039798333682188e-06, "loss": 0.9324, "step": 3365 }, { "epoch": 1.770647027880063, "grad_norm": 1.9436758756637573, "learning_rate": 4.039251003840641e-06, "loss": 0.9637, "step": 3366 }, { "epoch": 1.7711730668069436, "grad_norm": 1.9861931800842285, "learning_rate": 4.038703555151271e-06, "loss": 0.9415, "step": 3367 }, { "epoch": 1.7716991057338243, "grad_norm": 1.8725347518920898, "learning_rate": 4.03815598765635e-06, "loss": 0.8893, "step": 3368 }, { "epoch": 1.772225144660705, "grad_norm": 2.049994945526123, "learning_rate": 4.037608301398155e-06, "loss": 0.9413, "step": 3369 }, { "epoch": 1.7727511835875855, "grad_norm": 1.93240225315094, "learning_rate": 4.037060496418973e-06, "loss": 0.866, "step": 3370 }, { "epoch": 1.7732772225144662, "grad_norm": 2.157377243041992, "learning_rate": 4.036512572761103e-06, "loss": 0.9135, "step": 3371 }, { "epoch": 1.7738032614413468, "grad_norm": 2.0129494667053223, "learning_rate": 4.03596453046685e-06, "loss": 0.9028, "step": 3372 }, { "epoch": 1.7743293003682272, "grad_norm": 2.0105013847351074, "learning_rate": 4.035416369578527e-06, "loss": 0.9115, "step": 3373 }, { "epoch": 1.7748553392951079, "grad_norm": 2.2003941535949707, "learning_rate": 4.0348680901384604e-06, "loss": 0.9599, "step": 3374 }, { "epoch": 1.7753813782219883, "grad_norm": 2.059209108352661, "learning_rate": 4.034319692188984e-06, "loss": 0.9076, "step": 3375 }, { "epoch": 1.775907417148869, "grad_norm": 1.91313898563385, "learning_rate": 4.033771175772439e-06, "loss": 0.9006, "step": 3376 }, { "epoch": 1.7764334560757495, "grad_norm": 2.1229684352874756, "learning_rate": 4.0332225409311795e-06, "loss": 0.9237, "step": 3377 }, { "epoch": 1.7769594950026302, "grad_norm": 1.9086703062057495, "learning_rate": 4.032673787707564e-06, "loss": 0.9071, "step": 3378 }, { "epoch": 1.7774855339295108, "grad_norm": 1.9538326263427734, "learning_rate": 4.0321249161439635e-06, "loss": 0.8935, "step": 3379 }, { "epoch": 1.7780115728563914, "grad_norm": 2.0689868927001953, "learning_rate": 4.031575926282757e-06, "loss": 0.9726, "step": 3380 }, { "epoch": 1.778537611783272, "grad_norm": 2.095792531967163, "learning_rate": 4.031026818166332e-06, "loss": 0.9441, "step": 3381 }, { "epoch": 1.7790636507101527, "grad_norm": 2.124300003051758, "learning_rate": 4.030477591837088e-06, "loss": 0.9116, "step": 3382 }, { "epoch": 1.7795896896370331, "grad_norm": 2.085632801055908, "learning_rate": 4.02992824733743e-06, "loss": 0.92, "step": 3383 }, { "epoch": 1.7801157285639138, "grad_norm": 1.9192638397216797, "learning_rate": 4.029378784709774e-06, "loss": 0.9261, "step": 3384 }, { "epoch": 1.7806417674907942, "grad_norm": 1.8831740617752075, "learning_rate": 4.028829203996544e-06, "loss": 0.942, "step": 3385 }, { "epoch": 1.7811678064176748, "grad_norm": 2.037264823913574, "learning_rate": 4.028279505240176e-06, "loss": 0.8794, "step": 3386 }, { "epoch": 1.7816938453445554, "grad_norm": 2.0986642837524414, "learning_rate": 4.027729688483112e-06, "loss": 0.9363, "step": 3387 }, { "epoch": 1.782219884271436, "grad_norm": 2.0402517318725586, "learning_rate": 4.027179753767803e-06, "loss": 0.9687, "step": 3388 }, { "epoch": 1.7827459231983167, "grad_norm": 2.0788092613220215, "learning_rate": 4.0266297011367126e-06, "loss": 0.9769, "step": 3389 }, { "epoch": 1.7832719621251973, "grad_norm": 2.0980873107910156, "learning_rate": 4.026079530632309e-06, "loss": 0.9048, "step": 3390 }, { "epoch": 1.783798001052078, "grad_norm": 1.9436454772949219, "learning_rate": 4.025529242297071e-06, "loss": 0.8717, "step": 3391 }, { "epoch": 1.7843240399789586, "grad_norm": 2.1506896018981934, "learning_rate": 4.024978836173491e-06, "loss": 0.9504, "step": 3392 }, { "epoch": 1.784850078905839, "grad_norm": 2.014055013656616, "learning_rate": 4.024428312304064e-06, "loss": 0.9186, "step": 3393 }, { "epoch": 1.7853761178327197, "grad_norm": 2.0455405712127686, "learning_rate": 4.023877670731296e-06, "loss": 0.89, "step": 3394 }, { "epoch": 1.7859021567596, "grad_norm": 2.091071605682373, "learning_rate": 4.023326911497705e-06, "loss": 0.9457, "step": 3395 }, { "epoch": 1.7864281956864807, "grad_norm": 1.9758284091949463, "learning_rate": 4.022776034645814e-06, "loss": 0.9166, "step": 3396 }, { "epoch": 1.7869542346133613, "grad_norm": 2.020355463027954, "learning_rate": 4.022225040218158e-06, "loss": 0.9448, "step": 3397 }, { "epoch": 1.787480273540242, "grad_norm": 2.024291515350342, "learning_rate": 4.021673928257281e-06, "loss": 0.9847, "step": 3398 }, { "epoch": 1.7880063124671226, "grad_norm": 2.0182437896728516, "learning_rate": 4.021122698805733e-06, "loss": 0.9613, "step": 3399 }, { "epoch": 1.7885323513940032, "grad_norm": 2.1047723293304443, "learning_rate": 4.020571351906077e-06, "loss": 0.8924, "step": 3400 }, { "epoch": 1.7890583903208839, "grad_norm": 2.1373345851898193, "learning_rate": 4.020019887600882e-06, "loss": 0.8715, "step": 3401 }, { "epoch": 1.7895844292477643, "grad_norm": 2.0742130279541016, "learning_rate": 4.019468305932728e-06, "loss": 0.9074, "step": 3402 }, { "epoch": 1.790110468174645, "grad_norm": 1.9964598417282104, "learning_rate": 4.018916606944205e-06, "loss": 0.9133, "step": 3403 }, { "epoch": 1.7906365071015256, "grad_norm": 2.060347080230713, "learning_rate": 4.018364790677907e-06, "loss": 0.954, "step": 3404 }, { "epoch": 1.791162546028406, "grad_norm": 1.9850090742111206, "learning_rate": 4.017812857176443e-06, "loss": 0.9305, "step": 3405 }, { "epoch": 1.7916885849552866, "grad_norm": 1.9255491495132446, "learning_rate": 4.0172608064824284e-06, "loss": 0.9121, "step": 3406 }, { "epoch": 1.7922146238821672, "grad_norm": 2.1255195140838623, "learning_rate": 4.016708638638487e-06, "loss": 0.9093, "step": 3407 }, { "epoch": 1.7927406628090479, "grad_norm": 2.060781240463257, "learning_rate": 4.016156353687253e-06, "loss": 0.9531, "step": 3408 }, { "epoch": 1.7932667017359285, "grad_norm": 2.0284042358398438, "learning_rate": 4.01560395167137e-06, "loss": 0.9552, "step": 3409 }, { "epoch": 1.7937927406628091, "grad_norm": 2.0162978172302246, "learning_rate": 4.015051432633487e-06, "loss": 1.0171, "step": 3410 }, { "epoch": 1.7943187795896898, "grad_norm": 1.9135181903839111, "learning_rate": 4.014498796616269e-06, "loss": 0.9062, "step": 3411 }, { "epoch": 1.7948448185165702, "grad_norm": 2.0738272666931152, "learning_rate": 4.013946043662382e-06, "loss": 0.9397, "step": 3412 }, { "epoch": 1.7953708574434508, "grad_norm": 2.1009130477905273, "learning_rate": 4.013393173814507e-06, "loss": 0.9524, "step": 3413 }, { "epoch": 1.7958968963703315, "grad_norm": 2.2217934131622314, "learning_rate": 4.012840187115331e-06, "loss": 0.9275, "step": 3414 }, { "epoch": 1.7964229352972119, "grad_norm": 1.976807951927185, "learning_rate": 4.012287083607552e-06, "loss": 0.9285, "step": 3415 }, { "epoch": 1.7969489742240925, "grad_norm": 2.154989242553711, "learning_rate": 4.011733863333874e-06, "loss": 0.9834, "step": 3416 }, { "epoch": 1.7974750131509731, "grad_norm": 2.0007622241973877, "learning_rate": 4.011180526337014e-06, "loss": 0.9589, "step": 3417 }, { "epoch": 1.7980010520778538, "grad_norm": 2.1359214782714844, "learning_rate": 4.010627072659694e-06, "loss": 0.931, "step": 3418 }, { "epoch": 1.7985270910047344, "grad_norm": 2.0183515548706055, "learning_rate": 4.010073502344648e-06, "loss": 0.9644, "step": 3419 }, { "epoch": 1.799053129931615, "grad_norm": 2.0154995918273926, "learning_rate": 4.009519815434619e-06, "loss": 0.9341, "step": 3420 }, { "epoch": 1.7995791688584957, "grad_norm": 2.030836820602417, "learning_rate": 4.008966011972357e-06, "loss": 0.9219, "step": 3421 }, { "epoch": 1.800105207785376, "grad_norm": 2.099733829498291, "learning_rate": 4.008412092000621e-06, "loss": 0.9248, "step": 3422 }, { "epoch": 1.8006312467122567, "grad_norm": 1.9904857873916626, "learning_rate": 4.007858055562181e-06, "loss": 0.9101, "step": 3423 }, { "epoch": 1.8011572856391374, "grad_norm": 2.0814883708953857, "learning_rate": 4.007303902699815e-06, "loss": 0.9311, "step": 3424 }, { "epoch": 1.8016833245660178, "grad_norm": 2.056016445159912, "learning_rate": 4.00674963345631e-06, "loss": 0.9385, "step": 3425 }, { "epoch": 1.8022093634928984, "grad_norm": 2.0093741416931152, "learning_rate": 4.006195247874462e-06, "loss": 0.9388, "step": 3426 }, { "epoch": 1.802735402419779, "grad_norm": 2.0039663314819336, "learning_rate": 4.005640745997075e-06, "loss": 0.9341, "step": 3427 }, { "epoch": 1.8032614413466597, "grad_norm": 2.116356611251831, "learning_rate": 4.005086127866964e-06, "loss": 0.9732, "step": 3428 }, { "epoch": 1.8037874802735403, "grad_norm": 1.9771263599395752, "learning_rate": 4.004531393526951e-06, "loss": 0.9142, "step": 3429 }, { "epoch": 1.804313519200421, "grad_norm": 2.0317890644073486, "learning_rate": 4.003976543019868e-06, "loss": 0.9494, "step": 3430 }, { "epoch": 1.8048395581273016, "grad_norm": 1.9724228382110596, "learning_rate": 4.003421576388557e-06, "loss": 0.9429, "step": 3431 }, { "epoch": 1.805365597054182, "grad_norm": 2.042203903198242, "learning_rate": 4.002866493675867e-06, "loss": 0.9109, "step": 3432 }, { "epoch": 1.8058916359810626, "grad_norm": 2.16092848777771, "learning_rate": 4.002311294924656e-06, "loss": 0.9762, "step": 3433 }, { "epoch": 1.806417674907943, "grad_norm": 2.238441228866577, "learning_rate": 4.001755980177793e-06, "loss": 0.8959, "step": 3434 }, { "epoch": 1.8069437138348237, "grad_norm": 1.994781494140625, "learning_rate": 4.001200549478153e-06, "loss": 0.9076, "step": 3435 }, { "epoch": 1.8074697527617043, "grad_norm": 2.1131508350372314, "learning_rate": 4.000645002868624e-06, "loss": 0.9805, "step": 3436 }, { "epoch": 1.807995791688585, "grad_norm": 1.9825927019119263, "learning_rate": 4.000089340392098e-06, "loss": 0.9148, "step": 3437 }, { "epoch": 1.8085218306154656, "grad_norm": 1.9437118768692017, "learning_rate": 3.9995335620914795e-06, "loss": 0.8814, "step": 3438 }, { "epoch": 1.8090478695423462, "grad_norm": 2.112123489379883, "learning_rate": 3.9989776680096825e-06, "loss": 0.9005, "step": 3439 }, { "epoch": 1.8095739084692268, "grad_norm": 1.907055139541626, "learning_rate": 3.998421658189626e-06, "loss": 0.9416, "step": 3440 }, { "epoch": 1.8100999473961075, "grad_norm": 2.0174427032470703, "learning_rate": 3.997865532674242e-06, "loss": 0.9194, "step": 3441 }, { "epoch": 1.8106259863229879, "grad_norm": 1.9886671304702759, "learning_rate": 3.997309291506469e-06, "loss": 0.8964, "step": 3442 }, { "epoch": 1.8111520252498685, "grad_norm": 1.8449174165725708, "learning_rate": 3.996752934729256e-06, "loss": 0.9377, "step": 3443 }, { "epoch": 1.811678064176749, "grad_norm": 2.189577341079712, "learning_rate": 3.9961964623855584e-06, "loss": 0.9184, "step": 3444 }, { "epoch": 1.8122041031036296, "grad_norm": 1.9280545711517334, "learning_rate": 3.9956398745183434e-06, "loss": 0.9141, "step": 3445 }, { "epoch": 1.8127301420305102, "grad_norm": 2.119157075881958, "learning_rate": 3.995083171170586e-06, "loss": 0.9477, "step": 3446 }, { "epoch": 1.8132561809573908, "grad_norm": 2.035979747772217, "learning_rate": 3.99452635238527e-06, "loss": 0.8714, "step": 3447 }, { "epoch": 1.8137822198842715, "grad_norm": 2.0809593200683594, "learning_rate": 3.993969418205389e-06, "loss": 0.9573, "step": 3448 }, { "epoch": 1.814308258811152, "grad_norm": 1.9352896213531494, "learning_rate": 3.993412368673942e-06, "loss": 0.8903, "step": 3449 }, { "epoch": 1.8148342977380327, "grad_norm": 2.013093948364258, "learning_rate": 3.992855203833943e-06, "loss": 0.94, "step": 3450 }, { "epoch": 1.8153603366649134, "grad_norm": 2.097919464111328, "learning_rate": 3.9922979237284094e-06, "loss": 0.9373, "step": 3451 }, { "epoch": 1.8158863755917938, "grad_norm": 1.9477840662002563, "learning_rate": 3.99174052840037e-06, "loss": 0.9429, "step": 3452 }, { "epoch": 1.8164124145186744, "grad_norm": 2.093498945236206, "learning_rate": 3.991183017892863e-06, "loss": 0.9699, "step": 3453 }, { "epoch": 1.8169384534455548, "grad_norm": 2.1500258445739746, "learning_rate": 3.990625392248935e-06, "loss": 0.9855, "step": 3454 }, { "epoch": 1.8174644923724355, "grad_norm": 1.9574027061462402, "learning_rate": 3.990067651511637e-06, "loss": 0.983, "step": 3455 }, { "epoch": 1.817990531299316, "grad_norm": 2.1163482666015625, "learning_rate": 3.989509795724038e-06, "loss": 0.84, "step": 3456 }, { "epoch": 1.8185165702261967, "grad_norm": 1.987338662147522, "learning_rate": 3.9889518249292095e-06, "loss": 0.92, "step": 3457 }, { "epoch": 1.8190426091530774, "grad_norm": 2.0121538639068604, "learning_rate": 3.988393739170231e-06, "loss": 0.9392, "step": 3458 }, { "epoch": 1.819568648079958, "grad_norm": 2.9024999141693115, "learning_rate": 3.987835538490194e-06, "loss": 0.9129, "step": 3459 }, { "epoch": 1.8200946870068386, "grad_norm": 2.004316806793213, "learning_rate": 3.9872772229322e-06, "loss": 0.9544, "step": 3460 }, { "epoch": 1.820620725933719, "grad_norm": 2.0067389011383057, "learning_rate": 3.9867187925393566e-06, "loss": 0.9239, "step": 3461 }, { "epoch": 1.8211467648605997, "grad_norm": 2.09218692779541, "learning_rate": 3.986160247354779e-06, "loss": 0.9387, "step": 3462 }, { "epoch": 1.8216728037874803, "grad_norm": 2.061206579208374, "learning_rate": 3.985601587421596e-06, "loss": 0.9259, "step": 3463 }, { "epoch": 1.8221988427143607, "grad_norm": 2.206636428833008, "learning_rate": 3.985042812782941e-06, "loss": 0.929, "step": 3464 }, { "epoch": 1.8227248816412414, "grad_norm": 2.2978951930999756, "learning_rate": 3.9844839234819575e-06, "loss": 0.9518, "step": 3465 }, { "epoch": 1.823250920568122, "grad_norm": 2.1015002727508545, "learning_rate": 3.983924919561799e-06, "loss": 0.9238, "step": 3466 }, { "epoch": 1.8237769594950026, "grad_norm": 2.033895969390869, "learning_rate": 3.983365801065627e-06, "loss": 0.9068, "step": 3467 }, { "epoch": 1.8243029984218833, "grad_norm": 1.9699472188949585, "learning_rate": 3.9828065680366125e-06, "loss": 0.9226, "step": 3468 }, { "epoch": 1.824829037348764, "grad_norm": 2.005648136138916, "learning_rate": 3.982247220517933e-06, "loss": 0.8994, "step": 3469 }, { "epoch": 1.8253550762756445, "grad_norm": 1.9914512634277344, "learning_rate": 3.981687758552777e-06, "loss": 0.9424, "step": 3470 }, { "epoch": 1.825881115202525, "grad_norm": 2.1137712001800537, "learning_rate": 3.981128182184342e-06, "loss": 0.9184, "step": 3471 }, { "epoch": 1.8264071541294056, "grad_norm": 2.2346999645233154, "learning_rate": 3.980568491455834e-06, "loss": 0.8919, "step": 3472 }, { "epoch": 1.8269331930562862, "grad_norm": 2.0023348331451416, "learning_rate": 3.980008686410467e-06, "loss": 0.9461, "step": 3473 }, { "epoch": 1.8274592319831666, "grad_norm": 2.0820887088775635, "learning_rate": 3.979448767091464e-06, "loss": 0.9131, "step": 3474 }, { "epoch": 1.8279852709100473, "grad_norm": 1.8880040645599365, "learning_rate": 3.978888733542058e-06, "loss": 0.9383, "step": 3475 }, { "epoch": 1.828511309836928, "grad_norm": 1.9987270832061768, "learning_rate": 3.978328585805488e-06, "loss": 0.9356, "step": 3476 }, { "epoch": 1.8290373487638085, "grad_norm": 1.984171986579895, "learning_rate": 3.977768323925006e-06, "loss": 0.8697, "step": 3477 }, { "epoch": 1.8295633876906892, "grad_norm": 1.9593433141708374, "learning_rate": 3.977207947943872e-06, "loss": 0.9397, "step": 3478 }, { "epoch": 1.8300894266175698, "grad_norm": 2.1641416549682617, "learning_rate": 3.976647457905349e-06, "loss": 0.9444, "step": 3479 }, { "epoch": 1.8306154655444504, "grad_norm": 2.08544921875, "learning_rate": 3.976086853852718e-06, "loss": 0.9853, "step": 3480 }, { "epoch": 1.8311415044713308, "grad_norm": 2.1902668476104736, "learning_rate": 3.9755261358292604e-06, "loss": 0.8569, "step": 3481 }, { "epoch": 1.8316675433982115, "grad_norm": 2.251384735107422, "learning_rate": 3.9749653038782725e-06, "loss": 0.9372, "step": 3482 }, { "epoch": 1.832193582325092, "grad_norm": 2.038652181625366, "learning_rate": 3.974404358043054e-06, "loss": 0.9624, "step": 3483 }, { "epoch": 1.8327196212519725, "grad_norm": 2.1229982376098633, "learning_rate": 3.9738432983669195e-06, "loss": 0.9698, "step": 3484 }, { "epoch": 1.8332456601788532, "grad_norm": 2.0407466888427734, "learning_rate": 3.9732821248931874e-06, "loss": 0.8918, "step": 3485 }, { "epoch": 1.8337716991057338, "grad_norm": 2.099503993988037, "learning_rate": 3.972720837665188e-06, "loss": 0.9041, "step": 3486 }, { "epoch": 1.8342977380326144, "grad_norm": 1.93677818775177, "learning_rate": 3.972159436726259e-06, "loss": 0.8743, "step": 3487 }, { "epoch": 1.834823776959495, "grad_norm": 2.3074264526367188, "learning_rate": 3.971597922119745e-06, "loss": 0.9377, "step": 3488 }, { "epoch": 1.8353498158863757, "grad_norm": 2.1103453636169434, "learning_rate": 3.971036293889004e-06, "loss": 0.9421, "step": 3489 }, { "epoch": 1.8358758548132563, "grad_norm": 2.43330717086792, "learning_rate": 3.970474552077399e-06, "loss": 0.8997, "step": 3490 }, { "epoch": 1.8364018937401367, "grad_norm": 1.9432933330535889, "learning_rate": 3.969912696728302e-06, "loss": 0.9191, "step": 3491 }, { "epoch": 1.8369279326670174, "grad_norm": 2.0006496906280518, "learning_rate": 3.969350727885095e-06, "loss": 0.9202, "step": 3492 }, { "epoch": 1.8374539715938978, "grad_norm": 2.0259854793548584, "learning_rate": 3.968788645591171e-06, "loss": 0.9747, "step": 3493 }, { "epoch": 1.8379800105207784, "grad_norm": 2.106186866760254, "learning_rate": 3.968226449889926e-06, "loss": 0.9078, "step": 3494 }, { "epoch": 1.838506049447659, "grad_norm": 2.0630154609680176, "learning_rate": 3.967664140824769e-06, "loss": 0.8738, "step": 3495 }, { "epoch": 1.8390320883745397, "grad_norm": 2.037552833557129, "learning_rate": 3.9671017184391165e-06, "loss": 1.0377, "step": 3496 }, { "epoch": 1.8395581273014203, "grad_norm": 2.241241693496704, "learning_rate": 3.966539182776395e-06, "loss": 0.9448, "step": 3497 }, { "epoch": 1.840084166228301, "grad_norm": 2.006606340408325, "learning_rate": 3.965976533880037e-06, "loss": 0.928, "step": 3498 }, { "epoch": 1.8406102051551816, "grad_norm": 1.9464002847671509, "learning_rate": 3.965413771793487e-06, "loss": 0.97, "step": 3499 }, { "epoch": 1.8411362440820622, "grad_norm": 2.0451865196228027, "learning_rate": 3.964850896560196e-06, "loss": 0.9165, "step": 3500 }, { "epoch": 1.8416622830089426, "grad_norm": 1.9204659461975098, "learning_rate": 3.964287908223624e-06, "loss": 0.9142, "step": 3501 }, { "epoch": 1.8421883219358233, "grad_norm": 2.125077486038208, "learning_rate": 3.9637248068272414e-06, "loss": 0.988, "step": 3502 }, { "epoch": 1.8427143608627037, "grad_norm": 1.939674735069275, "learning_rate": 3.963161592414526e-06, "loss": 0.862, "step": 3503 }, { "epoch": 1.8432403997895843, "grad_norm": 1.897651195526123, "learning_rate": 3.9625982650289625e-06, "loss": 0.8733, "step": 3504 }, { "epoch": 1.843766438716465, "grad_norm": 2.00661039352417, "learning_rate": 3.962034824714048e-06, "loss": 0.9172, "step": 3505 }, { "epoch": 1.8442924776433456, "grad_norm": 1.98454749584198, "learning_rate": 3.961471271513286e-06, "loss": 0.9424, "step": 3506 }, { "epoch": 1.8448185165702262, "grad_norm": 2.035205125808716, "learning_rate": 3.960907605470189e-06, "loss": 0.8885, "step": 3507 }, { "epoch": 1.8453445554971069, "grad_norm": 2.179349422454834, "learning_rate": 3.960343826628279e-06, "loss": 0.9111, "step": 3508 }, { "epoch": 1.8458705944239875, "grad_norm": 2.022300958633423, "learning_rate": 3.9597799350310865e-06, "loss": 0.9136, "step": 3509 }, { "epoch": 1.8463966333508681, "grad_norm": 2.047797918319702, "learning_rate": 3.959215930722149e-06, "loss": 0.9462, "step": 3510 }, { "epoch": 1.8469226722777485, "grad_norm": 2.07372784614563, "learning_rate": 3.958651813745015e-06, "loss": 0.9396, "step": 3511 }, { "epoch": 1.8474487112046292, "grad_norm": 2.019566774368286, "learning_rate": 3.958087584143241e-06, "loss": 0.9722, "step": 3512 }, { "epoch": 1.8479747501315096, "grad_norm": 1.9272618293762207, "learning_rate": 3.957523241960391e-06, "loss": 0.8958, "step": 3513 }, { "epoch": 1.8485007890583902, "grad_norm": 2.054652690887451, "learning_rate": 3.956958787240042e-06, "loss": 0.9224, "step": 3514 }, { "epoch": 1.8490268279852708, "grad_norm": 2.078669309616089, "learning_rate": 3.9563942200257715e-06, "loss": 0.9243, "step": 3515 }, { "epoch": 1.8495528669121515, "grad_norm": 2.0470850467681885, "learning_rate": 3.9558295403611735e-06, "loss": 0.9303, "step": 3516 }, { "epoch": 1.8500789058390321, "grad_norm": 2.0248045921325684, "learning_rate": 3.955264748289847e-06, "loss": 0.9309, "step": 3517 }, { "epoch": 1.8506049447659128, "grad_norm": 1.88893723487854, "learning_rate": 3.954699843855401e-06, "loss": 0.8972, "step": 3518 }, { "epoch": 1.8511309836927934, "grad_norm": 2.1490895748138428, "learning_rate": 3.954134827101451e-06, "loss": 0.9634, "step": 3519 }, { "epoch": 1.851657022619674, "grad_norm": 1.9519559144973755, "learning_rate": 3.953569698071625e-06, "loss": 0.9785, "step": 3520 }, { "epoch": 1.8521830615465544, "grad_norm": 1.9176199436187744, "learning_rate": 3.953004456809556e-06, "loss": 0.9059, "step": 3521 }, { "epoch": 1.852709100473435, "grad_norm": 1.8604464530944824, "learning_rate": 3.9524391033588876e-06, "loss": 0.8784, "step": 3522 }, { "epoch": 1.8532351394003155, "grad_norm": 2.101825475692749, "learning_rate": 3.95187363776327e-06, "loss": 0.9172, "step": 3523 }, { "epoch": 1.8537611783271961, "grad_norm": 1.93021559715271, "learning_rate": 3.9513080600663665e-06, "loss": 0.9763, "step": 3524 }, { "epoch": 1.8542872172540767, "grad_norm": 2.113861322402954, "learning_rate": 3.950742370311846e-06, "loss": 0.9769, "step": 3525 }, { "epoch": 1.8548132561809574, "grad_norm": 2.099055051803589, "learning_rate": 3.950176568543382e-06, "loss": 0.8805, "step": 3526 }, { "epoch": 1.855339295107838, "grad_norm": 2.029845952987671, "learning_rate": 3.949610654804666e-06, "loss": 0.9417, "step": 3527 }, { "epoch": 1.8558653340347187, "grad_norm": 2.2449495792388916, "learning_rate": 3.94904462913939e-06, "loss": 0.8459, "step": 3528 }, { "epoch": 1.8563913729615993, "grad_norm": 1.970975399017334, "learning_rate": 3.948478491591259e-06, "loss": 0.9175, "step": 3529 }, { "epoch": 1.8569174118884797, "grad_norm": 2.04496431350708, "learning_rate": 3.947912242203984e-06, "loss": 0.8819, "step": 3530 }, { "epoch": 1.8574434508153603, "grad_norm": 2.012033462524414, "learning_rate": 3.947345881021287e-06, "loss": 0.9318, "step": 3531 }, { "epoch": 1.857969489742241, "grad_norm": 2.1508708000183105, "learning_rate": 3.946779408086898e-06, "loss": 0.9753, "step": 3532 }, { "epoch": 1.8584955286691214, "grad_norm": 1.9989691972732544, "learning_rate": 3.946212823444555e-06, "loss": 0.933, "step": 3533 }, { "epoch": 1.859021567596002, "grad_norm": 2.150106191635132, "learning_rate": 3.945646127138003e-06, "loss": 0.9417, "step": 3534 }, { "epoch": 1.8595476065228826, "grad_norm": 2.0419235229492188, "learning_rate": 3.945079319211001e-06, "loss": 0.9687, "step": 3535 }, { "epoch": 1.8600736454497633, "grad_norm": 2.0383622646331787, "learning_rate": 3.944512399707309e-06, "loss": 0.9552, "step": 3536 }, { "epoch": 1.860599684376644, "grad_norm": 2.070049524307251, "learning_rate": 3.943945368670702e-06, "loss": 0.9677, "step": 3537 }, { "epoch": 1.8611257233035245, "grad_norm": 1.9403014183044434, "learning_rate": 3.94337822614496e-06, "loss": 0.905, "step": 3538 }, { "epoch": 1.8616517622304052, "grad_norm": 1.9676467180252075, "learning_rate": 3.942810972173875e-06, "loss": 0.8961, "step": 3539 }, { "epoch": 1.8621778011572856, "grad_norm": 2.0308592319488525, "learning_rate": 3.942243606801244e-06, "loss": 0.974, "step": 3540 }, { "epoch": 1.8627038400841662, "grad_norm": 2.3764054775238037, "learning_rate": 3.941676130070874e-06, "loss": 0.9501, "step": 3541 }, { "epoch": 1.8632298790110469, "grad_norm": 1.9631781578063965, "learning_rate": 3.941108542026582e-06, "loss": 0.9582, "step": 3542 }, { "epoch": 1.8637559179379273, "grad_norm": 2.015774726867676, "learning_rate": 3.940540842712192e-06, "loss": 0.9663, "step": 3543 }, { "epoch": 1.864281956864808, "grad_norm": 1.9868817329406738, "learning_rate": 3.939973032171534e-06, "loss": 0.9296, "step": 3544 }, { "epoch": 1.8648079957916885, "grad_norm": 2.0739920139312744, "learning_rate": 3.939405110448454e-06, "loss": 0.9182, "step": 3545 }, { "epoch": 1.8653340347185692, "grad_norm": 2.1579036712646484, "learning_rate": 3.938837077586799e-06, "loss": 0.9168, "step": 3546 }, { "epoch": 1.8658600736454498, "grad_norm": 1.8793091773986816, "learning_rate": 3.938268933630428e-06, "loss": 0.8804, "step": 3547 }, { "epoch": 1.8663861125723304, "grad_norm": 2.020940065383911, "learning_rate": 3.937700678623209e-06, "loss": 0.9249, "step": 3548 }, { "epoch": 1.866912151499211, "grad_norm": 2.0007359981536865, "learning_rate": 3.937132312609019e-06, "loss": 0.9348, "step": 3549 }, { "epoch": 1.8674381904260915, "grad_norm": 2.0313973426818848, "learning_rate": 3.936563835631739e-06, "loss": 0.9433, "step": 3550 }, { "epoch": 1.8679642293529721, "grad_norm": 1.9634615182876587, "learning_rate": 3.935995247735265e-06, "loss": 0.9424, "step": 3551 }, { "epoch": 1.8684902682798528, "grad_norm": 2.065763235092163, "learning_rate": 3.935426548963497e-06, "loss": 0.9016, "step": 3552 }, { "epoch": 1.8690163072067332, "grad_norm": 2.0466232299804688, "learning_rate": 3.934857739360345e-06, "loss": 0.9697, "step": 3553 }, { "epoch": 1.8695423461336138, "grad_norm": 2.0537874698638916, "learning_rate": 3.9342888189697295e-06, "loss": 0.9518, "step": 3554 }, { "epoch": 1.8700683850604944, "grad_norm": 1.9860124588012695, "learning_rate": 3.933719787835575e-06, "loss": 0.9465, "step": 3555 }, { "epoch": 1.870594423987375, "grad_norm": 2.1217265129089355, "learning_rate": 3.9331506460018175e-06, "loss": 0.9013, "step": 3556 }, { "epoch": 1.8711204629142557, "grad_norm": 2.0979650020599365, "learning_rate": 3.932581393512404e-06, "loss": 0.966, "step": 3557 }, { "epoch": 1.8716465018411363, "grad_norm": 2.1109416484832764, "learning_rate": 3.932012030411285e-06, "loss": 0.8636, "step": 3558 }, { "epoch": 1.872172540768017, "grad_norm": 1.9996652603149414, "learning_rate": 3.931442556742422e-06, "loss": 0.914, "step": 3559 }, { "epoch": 1.8726985796948974, "grad_norm": 1.9707735776901245, "learning_rate": 3.930872972549786e-06, "loss": 0.9091, "step": 3560 }, { "epoch": 1.873224618621778, "grad_norm": 2.126910924911499, "learning_rate": 3.930303277877354e-06, "loss": 0.9279, "step": 3561 }, { "epoch": 1.8737506575486584, "grad_norm": 1.9631305932998657, "learning_rate": 3.929733472769114e-06, "loss": 0.9808, "step": 3562 }, { "epoch": 1.874276696475539, "grad_norm": 2.0501370429992676, "learning_rate": 3.929163557269061e-06, "loss": 0.9403, "step": 3563 }, { "epoch": 1.8748027354024197, "grad_norm": 1.8951704502105713, "learning_rate": 3.9285935314212e-06, "loss": 0.9056, "step": 3564 }, { "epoch": 1.8753287743293003, "grad_norm": 2.058051109313965, "learning_rate": 3.928023395269543e-06, "loss": 0.9108, "step": 3565 }, { "epoch": 1.875854813256181, "grad_norm": 2.1401350498199463, "learning_rate": 3.927453148858109e-06, "loss": 0.9322, "step": 3566 }, { "epoch": 1.8763808521830616, "grad_norm": 2.0684049129486084, "learning_rate": 3.926882792230929e-06, "loss": 0.946, "step": 3567 }, { "epoch": 1.8769068911099422, "grad_norm": 2.124798059463501, "learning_rate": 3.926312325432043e-06, "loss": 0.908, "step": 3568 }, { "epoch": 1.8774329300368229, "grad_norm": 2.369767665863037, "learning_rate": 3.925741748505496e-06, "loss": 0.9851, "step": 3569 }, { "epoch": 1.8779589689637033, "grad_norm": 2.009350538253784, "learning_rate": 3.925171061495342e-06, "loss": 0.9367, "step": 3570 }, { "epoch": 1.878485007890584, "grad_norm": 2.1008574962615967, "learning_rate": 3.9246002644456475e-06, "loss": 0.9569, "step": 3571 }, { "epoch": 1.8790110468174643, "grad_norm": 2.0691325664520264, "learning_rate": 3.92402935740048e-06, "loss": 0.9193, "step": 3572 }, { "epoch": 1.879537085744345, "grad_norm": 2.1765453815460205, "learning_rate": 3.9234583404039245e-06, "loss": 0.8742, "step": 3573 }, { "epoch": 1.8800631246712256, "grad_norm": 2.004166603088379, "learning_rate": 3.922887213500067e-06, "loss": 0.923, "step": 3574 }, { "epoch": 1.8805891635981062, "grad_norm": 2.0143234729766846, "learning_rate": 3.922315976733008e-06, "loss": 0.9512, "step": 3575 }, { "epoch": 1.8811152025249869, "grad_norm": 2.1664822101593018, "learning_rate": 3.921744630146851e-06, "loss": 0.9997, "step": 3576 }, { "epoch": 1.8816412414518675, "grad_norm": 2.0898940563201904, "learning_rate": 3.921173173785712e-06, "loss": 0.9678, "step": 3577 }, { "epoch": 1.8821672803787481, "grad_norm": 2.0758349895477295, "learning_rate": 3.920601607693712e-06, "loss": 0.8768, "step": 3578 }, { "epoch": 1.8826933193056288, "grad_norm": 2.125844717025757, "learning_rate": 3.920029931914986e-06, "loss": 0.8709, "step": 3579 }, { "epoch": 1.8832193582325092, "grad_norm": 2.0366671085357666, "learning_rate": 3.91945814649367e-06, "loss": 0.8688, "step": 3580 }, { "epoch": 1.8837453971593898, "grad_norm": 2.055398941040039, "learning_rate": 3.918886251473914e-06, "loss": 0.9797, "step": 3581 }, { "epoch": 1.8842714360862702, "grad_norm": 2.0487844944000244, "learning_rate": 3.918314246899876e-06, "loss": 0.9574, "step": 3582 }, { "epoch": 1.8847974750131509, "grad_norm": 1.9624133110046387, "learning_rate": 3.91774213281572e-06, "loss": 0.918, "step": 3583 }, { "epoch": 1.8853235139400315, "grad_norm": 2.0293092727661133, "learning_rate": 3.91716990926562e-06, "loss": 0.9201, "step": 3584 }, { "epoch": 1.8858495528669121, "grad_norm": 2.2962074279785156, "learning_rate": 3.916597576293759e-06, "loss": 0.9452, "step": 3585 }, { "epoch": 1.8863755917937928, "grad_norm": 2.127540349960327, "learning_rate": 3.9160251339443256e-06, "loss": 0.9648, "step": 3586 }, { "epoch": 1.8869016307206734, "grad_norm": 1.877824306488037, "learning_rate": 3.915452582261521e-06, "loss": 0.9235, "step": 3587 }, { "epoch": 1.887427669647554, "grad_norm": 2.057386875152588, "learning_rate": 3.914879921289551e-06, "loss": 0.9292, "step": 3588 }, { "epoch": 1.8879537085744345, "grad_norm": 3.2312283515930176, "learning_rate": 3.914307151072635e-06, "loss": 0.9328, "step": 3589 }, { "epoch": 1.888479747501315, "grad_norm": 1.950832724571228, "learning_rate": 3.9137342716549934e-06, "loss": 0.9605, "step": 3590 }, { "epoch": 1.8890057864281957, "grad_norm": 1.954448938369751, "learning_rate": 3.913161283080861e-06, "loss": 0.9513, "step": 3591 }, { "epoch": 1.8895318253550761, "grad_norm": 2.27455735206604, "learning_rate": 3.912588185394478e-06, "loss": 0.973, "step": 3592 }, { "epoch": 1.8900578642819568, "grad_norm": 2.0080385208129883, "learning_rate": 3.912014978640095e-06, "loss": 0.921, "step": 3593 }, { "epoch": 1.8905839032088374, "grad_norm": 2.099836826324463, "learning_rate": 3.911441662861971e-06, "loss": 0.9357, "step": 3594 }, { "epoch": 1.891109942135718, "grad_norm": 2.0644240379333496, "learning_rate": 3.910868238104371e-06, "loss": 0.9077, "step": 3595 }, { "epoch": 1.8916359810625987, "grad_norm": 2.009563446044922, "learning_rate": 3.910294704411571e-06, "loss": 0.9689, "step": 3596 }, { "epoch": 1.8921620199894793, "grad_norm": 2.054391860961914, "learning_rate": 3.909721061827854e-06, "loss": 0.9415, "step": 3597 }, { "epoch": 1.89268805891636, "grad_norm": 2.0017175674438477, "learning_rate": 3.90914731039751e-06, "loss": 0.9376, "step": 3598 }, { "epoch": 1.8932140978432404, "grad_norm": 2.0345120429992676, "learning_rate": 3.908573450164843e-06, "loss": 0.9262, "step": 3599 }, { "epoch": 1.893740136770121, "grad_norm": 2.150937557220459, "learning_rate": 3.907999481174158e-06, "loss": 0.9221, "step": 3600 }, { "epoch": 1.8942661756970016, "grad_norm": 2.1007256507873535, "learning_rate": 3.907425403469775e-06, "loss": 0.9695, "step": 3601 }, { "epoch": 1.894792214623882, "grad_norm": 1.9815309047698975, "learning_rate": 3.906851217096016e-06, "loss": 0.9216, "step": 3602 }, { "epoch": 1.8953182535507627, "grad_norm": 2.0300369262695312, "learning_rate": 3.9062769220972175e-06, "loss": 0.9402, "step": 3603 }, { "epoch": 1.8958442924776433, "grad_norm": 2.0138251781463623, "learning_rate": 3.905702518517721e-06, "loss": 0.9359, "step": 3604 }, { "epoch": 1.896370331404524, "grad_norm": 1.9801183938980103, "learning_rate": 3.905128006401876e-06, "loss": 0.8944, "step": 3605 }, { "epoch": 1.8968963703314046, "grad_norm": 1.9915120601654053, "learning_rate": 3.904553385794043e-06, "loss": 0.949, "step": 3606 }, { "epoch": 1.8974224092582852, "grad_norm": 2.2659802436828613, "learning_rate": 3.903978656738588e-06, "loss": 0.9494, "step": 3607 }, { "epoch": 1.8979484481851658, "grad_norm": 1.9523696899414062, "learning_rate": 3.903403819279887e-06, "loss": 0.9271, "step": 3608 }, { "epoch": 1.8984744871120462, "grad_norm": 1.9705463647842407, "learning_rate": 3.902828873462325e-06, "loss": 0.9143, "step": 3609 }, { "epoch": 1.8990005260389269, "grad_norm": 2.035841703414917, "learning_rate": 3.902253819330294e-06, "loss": 0.9478, "step": 3610 }, { "epoch": 1.8995265649658075, "grad_norm": 2.091041326522827, "learning_rate": 3.901678656928193e-06, "loss": 0.9618, "step": 3611 }, { "epoch": 1.900052603892688, "grad_norm": 2.0914456844329834, "learning_rate": 3.901103386300433e-06, "loss": 0.938, "step": 3612 }, { "epoch": 1.9005786428195686, "grad_norm": 2.0624167919158936, "learning_rate": 3.900528007491431e-06, "loss": 0.9657, "step": 3613 }, { "epoch": 1.9011046817464492, "grad_norm": 2.2461578845977783, "learning_rate": 3.899952520545613e-06, "loss": 0.9332, "step": 3614 }, { "epoch": 1.9016307206733298, "grad_norm": 2.151547908782959, "learning_rate": 3.899376925507415e-06, "loss": 0.9012, "step": 3615 }, { "epoch": 1.9021567596002105, "grad_norm": 1.9578272104263306, "learning_rate": 3.898801222421275e-06, "loss": 0.9635, "step": 3616 }, { "epoch": 1.902682798527091, "grad_norm": 2.041421890258789, "learning_rate": 3.898225411331647e-06, "loss": 0.9166, "step": 3617 }, { "epoch": 1.9032088374539717, "grad_norm": 2.0090739727020264, "learning_rate": 3.89764949228299e-06, "loss": 0.8683, "step": 3618 }, { "epoch": 1.9037348763808521, "grad_norm": 2.2205519676208496, "learning_rate": 3.89707346531977e-06, "loss": 0.8927, "step": 3619 }, { "epoch": 1.9042609153077328, "grad_norm": 2.2684524059295654, "learning_rate": 3.896497330486466e-06, "loss": 0.9509, "step": 3620 }, { "epoch": 1.9047869542346134, "grad_norm": 2.0757508277893066, "learning_rate": 3.89592108782756e-06, "loss": 0.9806, "step": 3621 }, { "epoch": 1.9053129931614938, "grad_norm": 2.0777761936187744, "learning_rate": 3.8953447373875444e-06, "loss": 0.9496, "step": 3622 }, { "epoch": 1.9058390320883745, "grad_norm": 2.0333797931671143, "learning_rate": 3.89476827921092e-06, "loss": 0.9306, "step": 3623 }, { "epoch": 1.906365071015255, "grad_norm": 1.8876526355743408, "learning_rate": 3.894191713342197e-06, "loss": 0.8991, "step": 3624 }, { "epoch": 1.9068911099421357, "grad_norm": 1.9477458000183105, "learning_rate": 3.893615039825893e-06, "loss": 0.8891, "step": 3625 }, { "epoch": 1.9074171488690164, "grad_norm": 3.033585786819458, "learning_rate": 3.893038258706533e-06, "loss": 0.9388, "step": 3626 }, { "epoch": 1.907943187795897, "grad_norm": 1.9571001529693604, "learning_rate": 3.892461370028651e-06, "loss": 0.9243, "step": 3627 }, { "epoch": 1.9084692267227776, "grad_norm": 2.1075046062469482, "learning_rate": 3.89188437383679e-06, "loss": 0.9102, "step": 3628 }, { "epoch": 1.908995265649658, "grad_norm": 1.987993597984314, "learning_rate": 3.891307270175499e-06, "loss": 0.976, "step": 3629 }, { "epoch": 1.9095213045765387, "grad_norm": 2.0590975284576416, "learning_rate": 3.8907300590893405e-06, "loss": 0.9873, "step": 3630 }, { "epoch": 1.910047343503419, "grad_norm": 1.9899734258651733, "learning_rate": 3.890152740622879e-06, "loss": 0.9302, "step": 3631 }, { "epoch": 1.9105733824302997, "grad_norm": 1.9252071380615234, "learning_rate": 3.88957531482069e-06, "loss": 0.8609, "step": 3632 }, { "epoch": 1.9110994213571804, "grad_norm": 2.0324370861053467, "learning_rate": 3.88899778172736e-06, "loss": 0.959, "step": 3633 }, { "epoch": 1.911625460284061, "grad_norm": 1.955954670906067, "learning_rate": 3.888420141387479e-06, "loss": 0.9463, "step": 3634 }, { "epoch": 1.9121514992109416, "grad_norm": 2.011570930480957, "learning_rate": 3.887842393845647e-06, "loss": 0.9082, "step": 3635 }, { "epoch": 1.9126775381378223, "grad_norm": 1.8616174459457397, "learning_rate": 3.887264539146475e-06, "loss": 0.822, "step": 3636 }, { "epoch": 1.913203577064703, "grad_norm": 1.9932093620300293, "learning_rate": 3.8866865773345775e-06, "loss": 0.9408, "step": 3637 }, { "epoch": 1.9137296159915835, "grad_norm": 2.084038496017456, "learning_rate": 3.8861085084545804e-06, "loss": 0.9423, "step": 3638 }, { "epoch": 1.914255654918464, "grad_norm": 2.0028975009918213, "learning_rate": 3.885530332551119e-06, "loss": 0.9405, "step": 3639 }, { "epoch": 1.9147816938453446, "grad_norm": 2.0257554054260254, "learning_rate": 3.884952049668834e-06, "loss": 0.9229, "step": 3640 }, { "epoch": 1.915307732772225, "grad_norm": 2.041738271713257, "learning_rate": 3.884373659852375e-06, "loss": 0.9212, "step": 3641 }, { "epoch": 1.9158337716991056, "grad_norm": 2.1852664947509766, "learning_rate": 3.883795163146401e-06, "loss": 0.9516, "step": 3642 }, { "epoch": 1.9163598106259863, "grad_norm": 2.118609666824341, "learning_rate": 3.883216559595578e-06, "loss": 0.8979, "step": 3643 }, { "epoch": 1.916885849552867, "grad_norm": 2.0641415119171143, "learning_rate": 3.88263784924458e-06, "loss": 0.9353, "step": 3644 }, { "epoch": 1.9174118884797475, "grad_norm": 2.07556414604187, "learning_rate": 3.882059032138093e-06, "loss": 0.9462, "step": 3645 }, { "epoch": 1.9179379274066282, "grad_norm": 1.8993345499038696, "learning_rate": 3.8814801083208055e-06, "loss": 0.881, "step": 3646 }, { "epoch": 1.9184639663335088, "grad_norm": 2.1719818115234375, "learning_rate": 3.880901077837417e-06, "loss": 0.9887, "step": 3647 }, { "epoch": 1.9189900052603894, "grad_norm": 1.9939392805099487, "learning_rate": 3.880321940732637e-06, "loss": 0.9207, "step": 3648 }, { "epoch": 1.9195160441872698, "grad_norm": 2.0594565868377686, "learning_rate": 3.879742697051182e-06, "loss": 0.9562, "step": 3649 }, { "epoch": 1.9200420831141505, "grad_norm": 2.0248348712921143, "learning_rate": 3.879163346837774e-06, "loss": 0.9588, "step": 3650 }, { "epoch": 1.920568122041031, "grad_norm": 1.984763264656067, "learning_rate": 3.878583890137147e-06, "loss": 0.8868, "step": 3651 }, { "epoch": 1.9210941609679115, "grad_norm": 2.1007652282714844, "learning_rate": 3.878004326994041e-06, "loss": 0.9158, "step": 3652 }, { "epoch": 1.9216201998947922, "grad_norm": 2.0460150241851807, "learning_rate": 3.877424657453206e-06, "loss": 0.9232, "step": 3653 }, { "epoch": 1.9221462388216728, "grad_norm": 2.0245115756988525, "learning_rate": 3.876844881559397e-06, "loss": 0.8974, "step": 3654 }, { "epoch": 1.9226722777485534, "grad_norm": 2.114398956298828, "learning_rate": 3.876264999357381e-06, "loss": 0.9799, "step": 3655 }, { "epoch": 1.923198316675434, "grad_norm": 2.089329481124878, "learning_rate": 3.875685010891932e-06, "loss": 0.9099, "step": 3656 }, { "epoch": 1.9237243556023147, "grad_norm": 2.0064103603363037, "learning_rate": 3.8751049162078306e-06, "loss": 0.9408, "step": 3657 }, { "epoch": 1.924250394529195, "grad_norm": 2.079148769378662, "learning_rate": 3.874524715349867e-06, "loss": 0.883, "step": 3658 }, { "epoch": 1.9247764334560757, "grad_norm": 2.3152129650115967, "learning_rate": 3.873944408362839e-06, "loss": 0.9905, "step": 3659 }, { "epoch": 1.9253024723829564, "grad_norm": 2.098961114883423, "learning_rate": 3.873363995291555e-06, "loss": 0.9313, "step": 3660 }, { "epoch": 1.9258285113098368, "grad_norm": 2.1047542095184326, "learning_rate": 3.8727834761808265e-06, "loss": 0.933, "step": 3661 }, { "epoch": 1.9263545502367174, "grad_norm": 1.9624619483947754, "learning_rate": 3.872202851075478e-06, "loss": 0.9063, "step": 3662 }, { "epoch": 1.926880589163598, "grad_norm": 2.1317026615142822, "learning_rate": 3.87162212002034e-06, "loss": 1.0107, "step": 3663 }, { "epoch": 1.9274066280904787, "grad_norm": 1.983315348625183, "learning_rate": 3.871041283060252e-06, "loss": 0.9195, "step": 3664 }, { "epoch": 1.9279326670173593, "grad_norm": 1.998632550239563, "learning_rate": 3.870460340240061e-06, "loss": 0.908, "step": 3665 }, { "epoch": 1.92845870594424, "grad_norm": 2.03670072555542, "learning_rate": 3.869879291604623e-06, "loss": 0.9694, "step": 3666 }, { "epoch": 1.9289847448711206, "grad_norm": 2.0909924507141113, "learning_rate": 3.869298137198799e-06, "loss": 0.9395, "step": 3667 }, { "epoch": 1.929510783798001, "grad_norm": 1.8925726413726807, "learning_rate": 3.868716877067465e-06, "loss": 0.9029, "step": 3668 }, { "epoch": 1.9300368227248816, "grad_norm": 2.022911548614502, "learning_rate": 3.868135511255498e-06, "loss": 0.9254, "step": 3669 }, { "epoch": 1.9305628616517623, "grad_norm": 2.0514330863952637, "learning_rate": 3.867554039807787e-06, "loss": 0.9092, "step": 3670 }, { "epoch": 1.9310889005786427, "grad_norm": 1.999674677848816, "learning_rate": 3.8669724627692286e-06, "loss": 0.9699, "step": 3671 }, { "epoch": 1.9316149395055233, "grad_norm": 2.05678129196167, "learning_rate": 3.866390780184726e-06, "loss": 0.9735, "step": 3672 }, { "epoch": 1.932140978432404, "grad_norm": 2.085590124130249, "learning_rate": 3.8658089920991925e-06, "loss": 0.8801, "step": 3673 }, { "epoch": 1.9326670173592846, "grad_norm": 1.9885491132736206, "learning_rate": 3.865227098557549e-06, "loss": 0.9209, "step": 3674 }, { "epoch": 1.9331930562861652, "grad_norm": 2.196415662765503, "learning_rate": 3.864645099604724e-06, "loss": 0.9276, "step": 3675 }, { "epoch": 1.9337190952130459, "grad_norm": 1.8344975709915161, "learning_rate": 3.864062995285655e-06, "loss": 0.909, "step": 3676 }, { "epoch": 1.9342451341399265, "grad_norm": 2.0901834964752197, "learning_rate": 3.863480785645286e-06, "loss": 0.9277, "step": 3677 }, { "epoch": 1.934771173066807, "grad_norm": 2.1690406799316406, "learning_rate": 3.8628984707285725e-06, "loss": 0.8746, "step": 3678 }, { "epoch": 1.9352972119936875, "grad_norm": 2.040762424468994, "learning_rate": 3.862316050580473e-06, "loss": 0.9166, "step": 3679 }, { "epoch": 1.9358232509205682, "grad_norm": 1.9970393180847168, "learning_rate": 3.86173352524596e-06, "loss": 0.8848, "step": 3680 }, { "epoch": 1.9363492898474486, "grad_norm": 1.9791969060897827, "learning_rate": 3.861150894770008e-06, "loss": 0.8964, "step": 3681 }, { "epoch": 1.9368753287743292, "grad_norm": 2.1223771572113037, "learning_rate": 3.860568159197606e-06, "loss": 0.9815, "step": 3682 }, { "epoch": 1.9374013677012099, "grad_norm": 1.9766583442687988, "learning_rate": 3.859985318573745e-06, "loss": 0.9412, "step": 3683 }, { "epoch": 1.9379274066280905, "grad_norm": 1.9654362201690674, "learning_rate": 3.859402372943428e-06, "loss": 0.9381, "step": 3684 }, { "epoch": 1.9384534455549711, "grad_norm": 2.3252549171447754, "learning_rate": 3.858819322351667e-06, "loss": 0.947, "step": 3685 }, { "epoch": 1.9389794844818518, "grad_norm": 1.995221495628357, "learning_rate": 3.858236166843476e-06, "loss": 0.9367, "step": 3686 }, { "epoch": 1.9395055234087324, "grad_norm": 1.9967231750488281, "learning_rate": 3.857652906463886e-06, "loss": 0.9408, "step": 3687 }, { "epoch": 1.9400315623356128, "grad_norm": 2.0427780151367188, "learning_rate": 3.857069541257928e-06, "loss": 0.9301, "step": 3688 }, { "epoch": 1.9405576012624934, "grad_norm": 2.0469813346862793, "learning_rate": 3.856486071270645e-06, "loss": 0.9251, "step": 3689 }, { "epoch": 1.9410836401893738, "grad_norm": 2.1560215950012207, "learning_rate": 3.85590249654709e-06, "loss": 0.9303, "step": 3690 }, { "epoch": 1.9416096791162545, "grad_norm": 2.150679111480713, "learning_rate": 3.855318817132318e-06, "loss": 0.9144, "step": 3691 }, { "epoch": 1.9421357180431351, "grad_norm": 2.093212366104126, "learning_rate": 3.854735033071398e-06, "loss": 0.9746, "step": 3692 }, { "epoch": 1.9426617569700158, "grad_norm": 2.0641937255859375, "learning_rate": 3.854151144409405e-06, "loss": 0.8845, "step": 3693 }, { "epoch": 1.9431877958968964, "grad_norm": 2.0783212184906006, "learning_rate": 3.853567151191421e-06, "loss": 0.9351, "step": 3694 }, { "epoch": 1.943713834823777, "grad_norm": 2.0518221855163574, "learning_rate": 3.852983053462536e-06, "loss": 0.9608, "step": 3695 }, { "epoch": 1.9442398737506577, "grad_norm": 1.9672813415527344, "learning_rate": 3.852398851267853e-06, "loss": 0.8898, "step": 3696 }, { "epoch": 1.9447659126775383, "grad_norm": 1.9930319786071777, "learning_rate": 3.851814544652475e-06, "loss": 0.8886, "step": 3697 }, { "epoch": 1.9452919516044187, "grad_norm": 2.096101999282837, "learning_rate": 3.851230133661518e-06, "loss": 0.9094, "step": 3698 }, { "epoch": 1.9458179905312993, "grad_norm": 2.036329746246338, "learning_rate": 3.850645618340107e-06, "loss": 0.9404, "step": 3699 }, { "epoch": 1.9463440294581797, "grad_norm": 2.061896562576294, "learning_rate": 3.850060998733371e-06, "loss": 0.9542, "step": 3700 }, { "epoch": 1.9468700683850604, "grad_norm": 2.0338451862335205, "learning_rate": 3.849476274886451e-06, "loss": 0.887, "step": 3701 }, { "epoch": 1.947396107311941, "grad_norm": 1.9912186861038208, "learning_rate": 3.848891446844494e-06, "loss": 0.896, "step": 3702 }, { "epoch": 1.9479221462388217, "grad_norm": 2.1454694271087646, "learning_rate": 3.848306514652655e-06, "loss": 0.9176, "step": 3703 }, { "epoch": 1.9484481851657023, "grad_norm": 2.037468194961548, "learning_rate": 3.8477214783560975e-06, "loss": 0.9542, "step": 3704 }, { "epoch": 1.948974224092583, "grad_norm": 1.9898576736450195, "learning_rate": 3.8471363379999935e-06, "loss": 0.9174, "step": 3705 }, { "epoch": 1.9495002630194636, "grad_norm": 1.9927200078964233, "learning_rate": 3.846551093629522e-06, "loss": 0.9024, "step": 3706 }, { "epoch": 1.9500263019463442, "grad_norm": 2.0695693492889404, "learning_rate": 3.8459657452898714e-06, "loss": 0.9337, "step": 3707 }, { "epoch": 1.9505523408732246, "grad_norm": 2.1777498722076416, "learning_rate": 3.845380293026236e-06, "loss": 0.8862, "step": 3708 }, { "epoch": 1.9510783798001052, "grad_norm": 1.9665758609771729, "learning_rate": 3.84479473688382e-06, "loss": 0.8591, "step": 3709 }, { "epoch": 1.9516044187269856, "grad_norm": 2.0922954082489014, "learning_rate": 3.844209076907836e-06, "loss": 0.9255, "step": 3710 }, { "epoch": 1.9521304576538663, "grad_norm": 2.1177494525909424, "learning_rate": 3.843623313143503e-06, "loss": 0.9439, "step": 3711 }, { "epoch": 1.952656496580747, "grad_norm": 2.041552782058716, "learning_rate": 3.8430374456360486e-06, "loss": 0.8789, "step": 3712 }, { "epoch": 1.9531825355076275, "grad_norm": 2.0672214031219482, "learning_rate": 3.842451474430707e-06, "loss": 0.9426, "step": 3713 }, { "epoch": 1.9537085744345082, "grad_norm": 2.0467536449432373, "learning_rate": 3.841865399572725e-06, "loss": 0.894, "step": 3714 }, { "epoch": 1.9542346133613888, "grad_norm": 2.0909125804901123, "learning_rate": 3.841279221107352e-06, "loss": 0.8702, "step": 3715 }, { "epoch": 1.9547606522882695, "grad_norm": 2.0857174396514893, "learning_rate": 3.840692939079848e-06, "loss": 0.9279, "step": 3716 }, { "epoch": 1.95528669121515, "grad_norm": 2.0140902996063232, "learning_rate": 3.840106553535482e-06, "loss": 0.9305, "step": 3717 }, { "epoch": 1.9558127301420305, "grad_norm": 1.952136516571045, "learning_rate": 3.839520064519528e-06, "loss": 0.8755, "step": 3718 }, { "epoch": 1.9563387690689111, "grad_norm": 2.0122833251953125, "learning_rate": 3.838933472077271e-06, "loss": 0.8571, "step": 3719 }, { "epoch": 1.9568648079957915, "grad_norm": 1.9882973432540894, "learning_rate": 3.838346776254e-06, "loss": 0.9303, "step": 3720 }, { "epoch": 1.9573908469226722, "grad_norm": 2.374667167663574, "learning_rate": 3.837759977095019e-06, "loss": 0.9209, "step": 3721 }, { "epoch": 1.9579168858495528, "grad_norm": 2.002048969268799, "learning_rate": 3.837173074645632e-06, "loss": 0.9023, "step": 3722 }, { "epoch": 1.9584429247764334, "grad_norm": 2.130359172821045, "learning_rate": 3.836586068951157e-06, "loss": 0.9033, "step": 3723 }, { "epoch": 1.958968963703314, "grad_norm": 2.143298387527466, "learning_rate": 3.835998960056915e-06, "loss": 0.9437, "step": 3724 }, { "epoch": 1.9594950026301947, "grad_norm": 2.0639281272888184, "learning_rate": 3.835411748008241e-06, "loss": 0.9239, "step": 3725 }, { "epoch": 1.9600210415570753, "grad_norm": 2.1186087131500244, "learning_rate": 3.834824432850471e-06, "loss": 0.9517, "step": 3726 }, { "epoch": 1.9605470804839558, "grad_norm": 2.086833953857422, "learning_rate": 3.834237014628954e-06, "loss": 0.9462, "step": 3727 }, { "epoch": 1.9610731194108364, "grad_norm": 2.0423338413238525, "learning_rate": 3.8336494933890455e-06, "loss": 0.9423, "step": 3728 }, { "epoch": 1.961599158337717, "grad_norm": 2.1271145343780518, "learning_rate": 3.833061869176109e-06, "loss": 0.8833, "step": 3729 }, { "epoch": 1.9621251972645974, "grad_norm": 2.020315170288086, "learning_rate": 3.832474142035516e-06, "loss": 0.9602, "step": 3730 }, { "epoch": 1.962651236191478, "grad_norm": 1.994666576385498, "learning_rate": 3.8318863120126445e-06, "loss": 0.9176, "step": 3731 }, { "epoch": 1.9631772751183587, "grad_norm": 2.020279884338379, "learning_rate": 3.831298379152884e-06, "loss": 0.9439, "step": 3732 }, { "epoch": 1.9637033140452393, "grad_norm": 1.978032112121582, "learning_rate": 3.830710343501627e-06, "loss": 0.8713, "step": 3733 }, { "epoch": 1.96422935297212, "grad_norm": 2.230541229248047, "learning_rate": 3.830122205104277e-06, "loss": 0.9199, "step": 3734 }, { "epoch": 1.9647553918990006, "grad_norm": 2.0482237339019775, "learning_rate": 3.829533964006248e-06, "loss": 0.9111, "step": 3735 }, { "epoch": 1.9652814308258812, "grad_norm": 2.1555042266845703, "learning_rate": 3.828945620252956e-06, "loss": 0.9417, "step": 3736 }, { "epoch": 1.9658074697527617, "grad_norm": 1.9371111392974854, "learning_rate": 3.828357173889828e-06, "loss": 0.9222, "step": 3737 }, { "epoch": 1.9663335086796423, "grad_norm": 1.9611694812774658, "learning_rate": 3.8277686249622994e-06, "loss": 0.8849, "step": 3738 }, { "epoch": 1.966859547606523, "grad_norm": 2.1161680221557617, "learning_rate": 3.827179973515815e-06, "loss": 0.9615, "step": 3739 }, { "epoch": 1.9673855865334033, "grad_norm": 2.0131423473358154, "learning_rate": 3.826591219595821e-06, "loss": 0.8836, "step": 3740 }, { "epoch": 1.967911625460284, "grad_norm": 1.9942444562911987, "learning_rate": 3.82600236324778e-06, "loss": 0.9373, "step": 3741 }, { "epoch": 1.9684376643871646, "grad_norm": 1.8967758417129517, "learning_rate": 3.825413404517156e-06, "loss": 0.9021, "step": 3742 }, { "epoch": 1.9689637033140452, "grad_norm": 1.8794455528259277, "learning_rate": 3.824824343449425e-06, "loss": 0.9309, "step": 3743 }, { "epoch": 1.9694897422409259, "grad_norm": 1.946311116218567, "learning_rate": 3.8242351800900674e-06, "loss": 0.8965, "step": 3744 }, { "epoch": 1.9700157811678065, "grad_norm": 2.0435941219329834, "learning_rate": 3.823645914484575e-06, "loss": 0.9217, "step": 3745 }, { "epoch": 1.9705418200946871, "grad_norm": 1.9912713766098022, "learning_rate": 3.823056546678445e-06, "loss": 0.9325, "step": 3746 }, { "epoch": 1.9710678590215676, "grad_norm": 1.891200304031372, "learning_rate": 3.822467076717184e-06, "loss": 0.9494, "step": 3747 }, { "epoch": 1.9715938979484482, "grad_norm": 1.9701929092407227, "learning_rate": 3.821877504646306e-06, "loss": 0.9224, "step": 3748 }, { "epoch": 1.9721199368753288, "grad_norm": 2.028235673904419, "learning_rate": 3.821287830511332e-06, "loss": 0.9088, "step": 3749 }, { "epoch": 1.9726459758022092, "grad_norm": 1.9580707550048828, "learning_rate": 3.820698054357792e-06, "loss": 0.9471, "step": 3750 }, { "epoch": 1.9731720147290899, "grad_norm": 2.0986881256103516, "learning_rate": 3.820108176231223e-06, "loss": 0.8719, "step": 3751 }, { "epoch": 1.9736980536559705, "grad_norm": 1.945340633392334, "learning_rate": 3.81951819617717e-06, "loss": 0.8948, "step": 3752 }, { "epoch": 1.9742240925828511, "grad_norm": 2.095881462097168, "learning_rate": 3.8189281142411885e-06, "loss": 0.893, "step": 3753 }, { "epoch": 1.9747501315097318, "grad_norm": 2.002525568008423, "learning_rate": 3.8183379304688375e-06, "loss": 0.8472, "step": 3754 }, { "epoch": 1.9752761704366124, "grad_norm": 2.4062154293060303, "learning_rate": 3.817747644905685e-06, "loss": 0.9429, "step": 3755 }, { "epoch": 1.975802209363493, "grad_norm": 2.005993604660034, "learning_rate": 3.817157257597311e-06, "loss": 0.935, "step": 3756 }, { "epoch": 1.9763282482903735, "grad_norm": 2.0487518310546875, "learning_rate": 3.8165667685892975e-06, "loss": 0.8916, "step": 3757 }, { "epoch": 1.976854287217254, "grad_norm": 1.9535064697265625, "learning_rate": 3.8159761779272375e-06, "loss": 0.94, "step": 3758 }, { "epoch": 1.9773803261441345, "grad_norm": 2.1313724517822266, "learning_rate": 3.815385485656732e-06, "loss": 0.9342, "step": 3759 }, { "epoch": 1.9779063650710151, "grad_norm": 2.204428195953369, "learning_rate": 3.81479469182339e-06, "loss": 0.9595, "step": 3760 }, { "epoch": 1.9784324039978958, "grad_norm": 2.049039602279663, "learning_rate": 3.814203796472826e-06, "loss": 0.9878, "step": 3761 }, { "epoch": 1.9789584429247764, "grad_norm": 2.3214313983917236, "learning_rate": 3.8136127996506646e-06, "loss": 0.9841, "step": 3762 }, { "epoch": 1.979484481851657, "grad_norm": 2.0491721630096436, "learning_rate": 3.813021701402537e-06, "loss": 0.9442, "step": 3763 }, { "epoch": 1.9800105207785377, "grad_norm": 2.259814500808716, "learning_rate": 3.8124305017740836e-06, "loss": 0.9934, "step": 3764 }, { "epoch": 1.9805365597054183, "grad_norm": 2.1149747371673584, "learning_rate": 3.811839200810952e-06, "loss": 0.901, "step": 3765 }, { "epoch": 1.981062598632299, "grad_norm": 2.0713698863983154, "learning_rate": 3.811247798558796e-06, "loss": 0.8785, "step": 3766 }, { "epoch": 1.9815886375591794, "grad_norm": 1.9660581350326538, "learning_rate": 3.810656295063279e-06, "loss": 0.8642, "step": 3767 }, { "epoch": 1.98211467648606, "grad_norm": 2.0408098697662354, "learning_rate": 3.8100646903700723e-06, "loss": 0.8872, "step": 3768 }, { "epoch": 1.9826407154129404, "grad_norm": 2.0717782974243164, "learning_rate": 3.8094729845248546e-06, "loss": 0.9656, "step": 3769 }, { "epoch": 1.983166754339821, "grad_norm": 2.035109043121338, "learning_rate": 3.8088811775733124e-06, "loss": 0.9504, "step": 3770 }, { "epoch": 1.9836927932667017, "grad_norm": 2.251352071762085, "learning_rate": 3.8082892695611407e-06, "loss": 0.9709, "step": 3771 }, { "epoch": 1.9842188321935823, "grad_norm": 2.101968765258789, "learning_rate": 3.8076972605340397e-06, "loss": 0.9324, "step": 3772 }, { "epoch": 1.984744871120463, "grad_norm": 1.9815831184387207, "learning_rate": 3.807105150537721e-06, "loss": 0.8799, "step": 3773 }, { "epoch": 1.9852709100473436, "grad_norm": 2.0454111099243164, "learning_rate": 3.8065129396179007e-06, "loss": 0.9219, "step": 3774 }, { "epoch": 1.9857969489742242, "grad_norm": 2.024466037750244, "learning_rate": 3.805920627820306e-06, "loss": 0.9341, "step": 3775 }, { "epoch": 1.9863229879011048, "grad_norm": 1.9955847263336182, "learning_rate": 3.805328215190669e-06, "loss": 0.9092, "step": 3776 }, { "epoch": 1.9868490268279853, "grad_norm": 2.1025545597076416, "learning_rate": 3.804735701774731e-06, "loss": 0.9297, "step": 3777 }, { "epoch": 1.9873750657548659, "grad_norm": 2.141676425933838, "learning_rate": 3.804143087618241e-06, "loss": 0.9414, "step": 3778 }, { "epoch": 1.9879011046817463, "grad_norm": 2.0731565952301025, "learning_rate": 3.803550372766956e-06, "loss": 0.9677, "step": 3779 }, { "epoch": 1.988427143608627, "grad_norm": 2.244962453842163, "learning_rate": 3.8029575572666387e-06, "loss": 0.9456, "step": 3780 }, { "epoch": 1.9889531825355076, "grad_norm": 2.0196962356567383, "learning_rate": 3.8023646411630633e-06, "loss": 0.9449, "step": 3781 }, { "epoch": 1.9894792214623882, "grad_norm": 2.038806200027466, "learning_rate": 3.801771624502009e-06, "loss": 0.9313, "step": 3782 }, { "epoch": 1.9900052603892688, "grad_norm": 2.0902819633483887, "learning_rate": 3.8011785073292627e-06, "loss": 0.9408, "step": 3783 }, { "epoch": 1.9905312993161495, "grad_norm": 2.00836181640625, "learning_rate": 3.8005852896906215e-06, "loss": 0.9369, "step": 3784 }, { "epoch": 1.99105733824303, "grad_norm": 2.069345712661743, "learning_rate": 3.7999919716318877e-06, "loss": 0.9748, "step": 3785 }, { "epoch": 1.9915833771699105, "grad_norm": 2.071449041366577, "learning_rate": 3.7993985531988708e-06, "loss": 0.9252, "step": 3786 }, { "epoch": 1.9921094160967912, "grad_norm": 2.245893955230713, "learning_rate": 3.7988050344373915e-06, "loss": 0.9503, "step": 3787 }, { "epoch": 1.9926354550236718, "grad_norm": 1.9542043209075928, "learning_rate": 3.798211415393276e-06, "loss": 0.8913, "step": 3788 }, { "epoch": 1.9931614939505522, "grad_norm": 2.013779401779175, "learning_rate": 3.7976176961123578e-06, "loss": 0.9097, "step": 3789 }, { "epoch": 1.9936875328774328, "grad_norm": 1.9830703735351562, "learning_rate": 3.797023876640479e-06, "loss": 0.9301, "step": 3790 }, { "epoch": 1.9942135718043135, "grad_norm": 1.9405882358551025, "learning_rate": 3.79642995702349e-06, "loss": 0.9477, "step": 3791 }, { "epoch": 1.994739610731194, "grad_norm": 2.159137725830078, "learning_rate": 3.7958359373072468e-06, "loss": 0.9709, "step": 3792 }, { "epoch": 1.9952656496580747, "grad_norm": 2.1361355781555176, "learning_rate": 3.7952418175376153e-06, "loss": 0.921, "step": 3793 }, { "epoch": 1.9957916885849554, "grad_norm": 1.8702547550201416, "learning_rate": 3.794647597760469e-06, "loss": 0.915, "step": 3794 }, { "epoch": 1.996317727511836, "grad_norm": 2.100464105606079, "learning_rate": 3.794053278021688e-06, "loss": 0.9085, "step": 3795 }, { "epoch": 1.9968437664387164, "grad_norm": 2.0043835639953613, "learning_rate": 3.7934588583671594e-06, "loss": 0.8704, "step": 3796 }, { "epoch": 1.997369805365597, "grad_norm": 2.139164924621582, "learning_rate": 3.7928643388427814e-06, "loss": 0.9234, "step": 3797 }, { "epoch": 1.9978958442924777, "grad_norm": 2.084885597229004, "learning_rate": 3.7922697194944564e-06, "loss": 0.9498, "step": 3798 }, { "epoch": 1.998421883219358, "grad_norm": 2.1158130168914795, "learning_rate": 3.791675000368096e-06, "loss": 0.9519, "step": 3799 }, { "epoch": 1.9989479221462387, "grad_norm": 2.0837512016296387, "learning_rate": 3.7910801815096186e-06, "loss": 0.9091, "step": 3800 }, { "epoch": 1.9994739610731194, "grad_norm": 1.8967145681381226, "learning_rate": 3.7904852629649523e-06, "loss": 0.8711, "step": 3801 }, { "epoch": 2.0, "grad_norm": 2.044868230819702, "learning_rate": 3.789890244780031e-06, "loss": 0.9276, "step": 3802 }, { "epoch": 2.0005260389268806, "grad_norm": 1.9324544668197632, "learning_rate": 3.7892951270007984e-06, "loss": 0.8819, "step": 3803 }, { "epoch": 2.0010520778537613, "grad_norm": 2.1012792587280273, "learning_rate": 3.7886999096732017e-06, "loss": 0.8686, "step": 3804 }, { "epoch": 2.001578116780642, "grad_norm": 1.8515793085098267, "learning_rate": 3.7881045928432005e-06, "loss": 0.8108, "step": 3805 }, { "epoch": 2.0021041557075225, "grad_norm": 1.8814657926559448, "learning_rate": 3.7875091765567586e-06, "loss": 0.8421, "step": 3806 }, { "epoch": 2.0026301946344027, "grad_norm": 1.9697299003601074, "learning_rate": 3.786913660859851e-06, "loss": 0.8688, "step": 3807 }, { "epoch": 2.0031562335612834, "grad_norm": 2.0397582054138184, "learning_rate": 3.7863180457984562e-06, "loss": 0.9052, "step": 3808 }, { "epoch": 2.003682272488164, "grad_norm": 2.0169777870178223, "learning_rate": 3.785722331418564e-06, "loss": 0.8732, "step": 3809 }, { "epoch": 2.0042083114150446, "grad_norm": 2.1055283546447754, "learning_rate": 3.78512651776617e-06, "loss": 0.8435, "step": 3810 }, { "epoch": 2.0047343503419253, "grad_norm": 2.158822774887085, "learning_rate": 3.784530604887277e-06, "loss": 0.8896, "step": 3811 }, { "epoch": 2.005260389268806, "grad_norm": 1.8128688335418701, "learning_rate": 3.783934592827897e-06, "loss": 0.8685, "step": 3812 }, { "epoch": 2.0057864281956865, "grad_norm": 2.16433048248291, "learning_rate": 3.783338481634049e-06, "loss": 0.8653, "step": 3813 }, { "epoch": 2.006312467122567, "grad_norm": 2.2747700214385986, "learning_rate": 3.7827422713517604e-06, "loss": 0.9177, "step": 3814 }, { "epoch": 2.006838506049448, "grad_norm": 1.9674586057662964, "learning_rate": 3.7821459620270646e-06, "loss": 0.8832, "step": 3815 }, { "epoch": 2.0073645449763284, "grad_norm": 2.0820772647857666, "learning_rate": 3.781549553706003e-06, "loss": 0.8656, "step": 3816 }, { "epoch": 2.0078905839032086, "grad_norm": 1.9695658683776855, "learning_rate": 3.780953046434626e-06, "loss": 0.8726, "step": 3817 }, { "epoch": 2.0084166228300893, "grad_norm": 2.081181764602661, "learning_rate": 3.78035644025899e-06, "loss": 0.8627, "step": 3818 }, { "epoch": 2.00894266175697, "grad_norm": 2.0753157138824463, "learning_rate": 3.7797597352251604e-06, "loss": 0.886, "step": 3819 }, { "epoch": 2.0094687006838505, "grad_norm": 2.215916156768799, "learning_rate": 3.77916293137921e-06, "loss": 0.8416, "step": 3820 }, { "epoch": 2.009994739610731, "grad_norm": 2.3293678760528564, "learning_rate": 3.778566028767219e-06, "loss": 0.8581, "step": 3821 }, { "epoch": 2.010520778537612, "grad_norm": 2.0302600860595703, "learning_rate": 3.777969027435274e-06, "loss": 0.9372, "step": 3822 }, { "epoch": 2.0110468174644924, "grad_norm": 2.6738765239715576, "learning_rate": 3.7773719274294706e-06, "loss": 0.906, "step": 3823 }, { "epoch": 2.011572856391373, "grad_norm": 2.242668390274048, "learning_rate": 3.7767747287959126e-06, "loss": 0.8623, "step": 3824 }, { "epoch": 2.0120988953182537, "grad_norm": 1.9181817770004272, "learning_rate": 3.776177431580711e-06, "loss": 0.8829, "step": 3825 }, { "epoch": 2.0126249342451343, "grad_norm": 2.0382919311523438, "learning_rate": 3.775580035829982e-06, "loss": 0.8742, "step": 3826 }, { "epoch": 2.0131509731720145, "grad_norm": 2.21323823928833, "learning_rate": 3.774982541589853e-06, "loss": 0.8822, "step": 3827 }, { "epoch": 2.013677012098895, "grad_norm": 2.1555404663085938, "learning_rate": 3.7743849489064564e-06, "loss": 0.9112, "step": 3828 }, { "epoch": 2.014203051025776, "grad_norm": 2.1244027614593506, "learning_rate": 3.7737872578259342e-06, "loss": 0.8583, "step": 3829 }, { "epoch": 2.0147290899526564, "grad_norm": 2.1133501529693604, "learning_rate": 3.773189468394435e-06, "loss": 0.8622, "step": 3830 }, { "epoch": 2.015255128879537, "grad_norm": 2.0445339679718018, "learning_rate": 3.7725915806581133e-06, "loss": 0.8352, "step": 3831 }, { "epoch": 2.0157811678064177, "grad_norm": 2.298804759979248, "learning_rate": 3.771993594663135e-06, "loss": 0.8996, "step": 3832 }, { "epoch": 2.0163072067332983, "grad_norm": 2.401646375656128, "learning_rate": 3.77139551045567e-06, "loss": 0.8622, "step": 3833 }, { "epoch": 2.016833245660179, "grad_norm": 2.059013605117798, "learning_rate": 3.7707973280818988e-06, "loss": 0.8702, "step": 3834 }, { "epoch": 2.0173592845870596, "grad_norm": 2.2547876834869385, "learning_rate": 3.770199047588007e-06, "loss": 0.8986, "step": 3835 }, { "epoch": 2.0178853235139402, "grad_norm": 2.0395021438598633, "learning_rate": 3.7696006690201883e-06, "loss": 0.8569, "step": 3836 }, { "epoch": 2.0184113624408204, "grad_norm": 2.150195360183716, "learning_rate": 3.769002192424643e-06, "loss": 0.9162, "step": 3837 }, { "epoch": 2.018937401367701, "grad_norm": 2.2160632610321045, "learning_rate": 3.768403617847585e-06, "loss": 0.9093, "step": 3838 }, { "epoch": 2.0194634402945817, "grad_norm": 2.338444709777832, "learning_rate": 3.7678049453352266e-06, "loss": 0.8675, "step": 3839 }, { "epoch": 2.0199894792214623, "grad_norm": 2.0220580101013184, "learning_rate": 3.7672061749337944e-06, "loss": 0.838, "step": 3840 }, { "epoch": 2.020515518148343, "grad_norm": 2.1409084796905518, "learning_rate": 3.7666073066895197e-06, "loss": 0.8751, "step": 3841 }, { "epoch": 2.0210415570752236, "grad_norm": 1.9475165605545044, "learning_rate": 3.7660083406486424e-06, "loss": 0.8301, "step": 3842 }, { "epoch": 2.0215675960021042, "grad_norm": 2.4445762634277344, "learning_rate": 3.7654092768574084e-06, "loss": 0.8483, "step": 3843 }, { "epoch": 2.022093634928985, "grad_norm": 2.325181007385254, "learning_rate": 3.764810115362073e-06, "loss": 0.8857, "step": 3844 }, { "epoch": 2.0226196738558655, "grad_norm": 2.216928720474243, "learning_rate": 3.7642108562089e-06, "loss": 0.9326, "step": 3845 }, { "epoch": 2.023145712782746, "grad_norm": 2.0433201789855957, "learning_rate": 3.7636114994441563e-06, "loss": 0.8447, "step": 3846 }, { "epoch": 2.0236717517096263, "grad_norm": 2.037320852279663, "learning_rate": 3.7630120451141205e-06, "loss": 0.8002, "step": 3847 }, { "epoch": 2.024197790636507, "grad_norm": 2.1208412647247314, "learning_rate": 3.7624124932650773e-06, "loss": 0.884, "step": 3848 }, { "epoch": 2.0247238295633876, "grad_norm": 2.1287434101104736, "learning_rate": 3.7618128439433184e-06, "loss": 0.8845, "step": 3849 }, { "epoch": 2.0252498684902682, "grad_norm": 2.219541311264038, "learning_rate": 3.7612130971951446e-06, "loss": 0.9237, "step": 3850 }, { "epoch": 2.025775907417149, "grad_norm": 1.8505560159683228, "learning_rate": 3.760613253066862e-06, "loss": 0.8287, "step": 3851 }, { "epoch": 2.0263019463440295, "grad_norm": 2.011486291885376, "learning_rate": 3.760013311604787e-06, "loss": 0.845, "step": 3852 }, { "epoch": 2.02682798527091, "grad_norm": 1.9631990194320679, "learning_rate": 3.7594132728552414e-06, "loss": 0.8731, "step": 3853 }, { "epoch": 2.0273540241977908, "grad_norm": 2.1568896770477295, "learning_rate": 3.758813136864553e-06, "loss": 0.8445, "step": 3854 }, { "epoch": 2.0278800631246714, "grad_norm": 2.0318989753723145, "learning_rate": 3.7582129036790623e-06, "loss": 0.8461, "step": 3855 }, { "epoch": 2.028406102051552, "grad_norm": 2.295093297958374, "learning_rate": 3.7576125733451117e-06, "loss": 0.8986, "step": 3856 }, { "epoch": 2.028932140978432, "grad_norm": 2.1023261547088623, "learning_rate": 3.7570121459090558e-06, "loss": 0.8241, "step": 3857 }, { "epoch": 2.029458179905313, "grad_norm": 2.1455793380737305, "learning_rate": 3.7564116214172534e-06, "loss": 0.8888, "step": 3858 }, { "epoch": 2.0299842188321935, "grad_norm": 1.9220783710479736, "learning_rate": 3.755810999916072e-06, "loss": 0.8169, "step": 3859 }, { "epoch": 2.030510257759074, "grad_norm": 2.126554012298584, "learning_rate": 3.7552102814518853e-06, "loss": 0.8677, "step": 3860 }, { "epoch": 2.0310362966859548, "grad_norm": 2.0620853900909424, "learning_rate": 3.7546094660710773e-06, "loss": 0.9048, "step": 3861 }, { "epoch": 2.0315623356128354, "grad_norm": 2.0701792240142822, "learning_rate": 3.7540085538200367e-06, "loss": 0.9256, "step": 3862 }, { "epoch": 2.032088374539716, "grad_norm": 2.1357316970825195, "learning_rate": 3.7534075447451613e-06, "loss": 0.8887, "step": 3863 }, { "epoch": 2.0326144134665967, "grad_norm": 2.0946543216705322, "learning_rate": 3.7528064388928576e-06, "loss": 0.9104, "step": 3864 }, { "epoch": 2.0331404523934773, "grad_norm": 2.193769693374634, "learning_rate": 3.752205236309534e-06, "loss": 0.877, "step": 3865 }, { "epoch": 2.033666491320358, "grad_norm": 2.128945827484131, "learning_rate": 3.7516039370416134e-06, "loss": 0.8863, "step": 3866 }, { "epoch": 2.034192530247238, "grad_norm": 2.130593776702881, "learning_rate": 3.751002541135522e-06, "loss": 0.8587, "step": 3867 }, { "epoch": 2.0347185691741188, "grad_norm": 2.088643789291382, "learning_rate": 3.750401048637694e-06, "loss": 0.8756, "step": 3868 }, { "epoch": 2.0352446081009994, "grad_norm": 2.0018200874328613, "learning_rate": 3.7497994595945717e-06, "loss": 0.8793, "step": 3869 }, { "epoch": 2.03577064702788, "grad_norm": 2.118602991104126, "learning_rate": 3.749197774052606e-06, "loss": 0.8787, "step": 3870 }, { "epoch": 2.0362966859547607, "grad_norm": 2.177366256713867, "learning_rate": 3.748595992058252e-06, "loss": 0.8674, "step": 3871 }, { "epoch": 2.0368227248816413, "grad_norm": 2.0989413261413574, "learning_rate": 3.7479941136579757e-06, "loss": 0.8855, "step": 3872 }, { "epoch": 2.037348763808522, "grad_norm": 2.0205960273742676, "learning_rate": 3.747392138898247e-06, "loss": 0.8382, "step": 3873 }, { "epoch": 2.0378748027354026, "grad_norm": 2.1133978366851807, "learning_rate": 3.7467900678255475e-06, "loss": 0.8707, "step": 3874 }, { "epoch": 2.038400841662283, "grad_norm": 2.065305233001709, "learning_rate": 3.7461879004863633e-06, "loss": 0.7913, "step": 3875 }, { "epoch": 2.0389268805891634, "grad_norm": 2.0252208709716797, "learning_rate": 3.7455856369271887e-06, "loss": 0.8429, "step": 3876 }, { "epoch": 2.039452919516044, "grad_norm": 1.981558918952942, "learning_rate": 3.7449832771945244e-06, "loss": 0.8631, "step": 3877 }, { "epoch": 2.0399789584429247, "grad_norm": 2.0610392093658447, "learning_rate": 3.7443808213348803e-06, "loss": 0.8431, "step": 3878 }, { "epoch": 2.0405049973698053, "grad_norm": 1.9911890029907227, "learning_rate": 3.7437782693947723e-06, "loss": 0.8556, "step": 3879 }, { "epoch": 2.041031036296686, "grad_norm": 2.075105667114258, "learning_rate": 3.743175621420725e-06, "loss": 0.8474, "step": 3880 }, { "epoch": 2.0415570752235666, "grad_norm": 2.196594715118408, "learning_rate": 3.7425728774592695e-06, "loss": 0.8377, "step": 3881 }, { "epoch": 2.042083114150447, "grad_norm": 2.1999762058258057, "learning_rate": 3.7419700375569445e-06, "loss": 0.8548, "step": 3882 }, { "epoch": 2.042609153077328, "grad_norm": 2.1714982986450195, "learning_rate": 3.7413671017602966e-06, "loss": 0.8597, "step": 3883 }, { "epoch": 2.0431351920042085, "grad_norm": 2.151400327682495, "learning_rate": 3.740764070115879e-06, "loss": 0.9017, "step": 3884 }, { "epoch": 2.043661230931089, "grad_norm": 2.214592933654785, "learning_rate": 3.740160942670252e-06, "loss": 0.8998, "step": 3885 }, { "epoch": 2.0441872698579693, "grad_norm": 2.211949586868286, "learning_rate": 3.739557719469984e-06, "loss": 0.8464, "step": 3886 }, { "epoch": 2.04471330878485, "grad_norm": 2.0692906379699707, "learning_rate": 3.7389544005616518e-06, "loss": 0.8451, "step": 3887 }, { "epoch": 2.0452393477117305, "grad_norm": 2.3382716178894043, "learning_rate": 3.7383509859918384e-06, "loss": 0.8473, "step": 3888 }, { "epoch": 2.045765386638611, "grad_norm": 2.0335793495178223, "learning_rate": 3.737747475807134e-06, "loss": 0.8403, "step": 3889 }, { "epoch": 2.046291425565492, "grad_norm": 2.1546132564544678, "learning_rate": 3.737143870054136e-06, "loss": 0.8816, "step": 3890 }, { "epoch": 2.0468174644923725, "grad_norm": 1.949670672416687, "learning_rate": 3.7365401687794496e-06, "loss": 0.8602, "step": 3891 }, { "epoch": 2.047343503419253, "grad_norm": 2.1397831439971924, "learning_rate": 3.735936372029689e-06, "loss": 0.8974, "step": 3892 }, { "epoch": 2.0478695423461337, "grad_norm": 2.037754535675049, "learning_rate": 3.7353324798514725e-06, "loss": 0.8962, "step": 3893 }, { "epoch": 2.0483955812730144, "grad_norm": 2.1237149238586426, "learning_rate": 3.7347284922914285e-06, "loss": 0.8731, "step": 3894 }, { "epoch": 2.048921620199895, "grad_norm": 2.0275261402130127, "learning_rate": 3.7341244093961915e-06, "loss": 0.8576, "step": 3895 }, { "epoch": 2.049447659126775, "grad_norm": 2.074918270111084, "learning_rate": 3.7335202312124034e-06, "loss": 0.8824, "step": 3896 }, { "epoch": 2.049973698053656, "grad_norm": 2.1301252841949463, "learning_rate": 3.7329159577867137e-06, "loss": 0.888, "step": 3897 }, { "epoch": 2.0504997369805364, "grad_norm": 2.0343546867370605, "learning_rate": 3.7323115891657794e-06, "loss": 0.8322, "step": 3898 }, { "epoch": 2.051025775907417, "grad_norm": 2.1331627368927, "learning_rate": 3.731707125396265e-06, "loss": 0.8893, "step": 3899 }, { "epoch": 2.0515518148342977, "grad_norm": 2.0007245540618896, "learning_rate": 3.731102566524842e-06, "loss": 0.8507, "step": 3900 }, { "epoch": 2.0520778537611783, "grad_norm": 1.974657416343689, "learning_rate": 3.730497912598189e-06, "loss": 0.8589, "step": 3901 }, { "epoch": 2.052603892688059, "grad_norm": 2.2127580642700195, "learning_rate": 3.7298931636629917e-06, "loss": 0.8863, "step": 3902 }, { "epoch": 2.0531299316149396, "grad_norm": 2.1541671752929688, "learning_rate": 3.729288319765944e-06, "loss": 0.906, "step": 3903 }, { "epoch": 2.0536559705418203, "grad_norm": 1.910448431968689, "learning_rate": 3.728683380953747e-06, "loss": 0.825, "step": 3904 }, { "epoch": 2.054182009468701, "grad_norm": 2.2508139610290527, "learning_rate": 3.728078347273109e-06, "loss": 0.8417, "step": 3905 }, { "epoch": 2.054708048395581, "grad_norm": 2.2990832328796387, "learning_rate": 3.7274732187707457e-06, "loss": 0.8987, "step": 3906 }, { "epoch": 2.0552340873224617, "grad_norm": 2.2015087604522705, "learning_rate": 3.72686799549338e-06, "loss": 0.9045, "step": 3907 }, { "epoch": 2.0557601262493423, "grad_norm": 2.1351354122161865, "learning_rate": 3.726262677487741e-06, "loss": 0.8545, "step": 3908 }, { "epoch": 2.056286165176223, "grad_norm": 1.9964978694915771, "learning_rate": 3.7256572648005666e-06, "loss": 0.8471, "step": 3909 }, { "epoch": 2.0568122041031036, "grad_norm": 2.250133514404297, "learning_rate": 3.7250517574786028e-06, "loss": 0.8668, "step": 3910 }, { "epoch": 2.0573382430299842, "grad_norm": 2.1591506004333496, "learning_rate": 3.7244461555686e-06, "loss": 0.8779, "step": 3911 }, { "epoch": 2.057864281956865, "grad_norm": 2.1623170375823975, "learning_rate": 3.723840459117319e-06, "loss": 0.8348, "step": 3912 }, { "epoch": 2.0583903208837455, "grad_norm": 2.141899824142456, "learning_rate": 3.723234668171526e-06, "loss": 0.8536, "step": 3913 }, { "epoch": 2.058916359810626, "grad_norm": 1.982509732246399, "learning_rate": 3.7226287827779944e-06, "loss": 0.8429, "step": 3914 }, { "epoch": 2.059442398737507, "grad_norm": 2.1034417152404785, "learning_rate": 3.7220228029835065e-06, "loss": 0.8445, "step": 3915 }, { "epoch": 2.059968437664387, "grad_norm": 2.147608757019043, "learning_rate": 3.7214167288348494e-06, "loss": 0.8631, "step": 3916 }, { "epoch": 2.0604944765912676, "grad_norm": 2.1992483139038086, "learning_rate": 3.7208105603788213e-06, "loss": 0.8891, "step": 3917 }, { "epoch": 2.0610205155181482, "grad_norm": 2.1651837825775146, "learning_rate": 3.7202042976622233e-06, "loss": 0.8767, "step": 3918 }, { "epoch": 2.061546554445029, "grad_norm": 2.074176549911499, "learning_rate": 3.719597940731867e-06, "loss": 0.8566, "step": 3919 }, { "epoch": 2.0620725933719095, "grad_norm": 2.3077428340911865, "learning_rate": 3.71899148963457e-06, "loss": 0.8688, "step": 3920 }, { "epoch": 2.06259863229879, "grad_norm": 2.0964107513427734, "learning_rate": 3.718384944417156e-06, "loss": 0.846, "step": 3921 }, { "epoch": 2.063124671225671, "grad_norm": 2.1837058067321777, "learning_rate": 3.717778305126459e-06, "loss": 0.915, "step": 3922 }, { "epoch": 2.0636507101525514, "grad_norm": 2.1159892082214355, "learning_rate": 3.717171571809317e-06, "loss": 0.8977, "step": 3923 }, { "epoch": 2.064176749079432, "grad_norm": 2.0657877922058105, "learning_rate": 3.716564744512578e-06, "loss": 0.8627, "step": 3924 }, { "epoch": 2.0647027880063122, "grad_norm": 2.0964975357055664, "learning_rate": 3.7159578232830954e-06, "loss": 0.8305, "step": 3925 }, { "epoch": 2.065228826933193, "grad_norm": 2.0189549922943115, "learning_rate": 3.71535080816773e-06, "loss": 0.8338, "step": 3926 }, { "epoch": 2.0657548658600735, "grad_norm": 2.055124521255493, "learning_rate": 3.714743699213352e-06, "loss": 0.9213, "step": 3927 }, { "epoch": 2.066280904786954, "grad_norm": 2.0454020500183105, "learning_rate": 3.714136496466836e-06, "loss": 0.8554, "step": 3928 }, { "epoch": 2.0668069437138348, "grad_norm": 1.972805142402649, "learning_rate": 3.713529199975064e-06, "loss": 0.822, "step": 3929 }, { "epoch": 2.0673329826407154, "grad_norm": 2.0066187381744385, "learning_rate": 3.7129218097849275e-06, "loss": 0.8832, "step": 3930 }, { "epoch": 2.067859021567596, "grad_norm": 2.2114450931549072, "learning_rate": 3.7123143259433247e-06, "loss": 0.8851, "step": 3931 }, { "epoch": 2.0683850604944767, "grad_norm": 2.2430622577667236, "learning_rate": 3.711706748497159e-06, "loss": 0.8206, "step": 3932 }, { "epoch": 2.0689110994213573, "grad_norm": 2.0854556560516357, "learning_rate": 3.7110990774933423e-06, "loss": 0.7914, "step": 3933 }, { "epoch": 2.069437138348238, "grad_norm": 2.202176570892334, "learning_rate": 3.7104913129787946e-06, "loss": 0.8455, "step": 3934 }, { "epoch": 2.0699631772751186, "grad_norm": 2.041372537612915, "learning_rate": 3.7098834550004426e-06, "loss": 0.846, "step": 3935 }, { "epoch": 2.0704892162019988, "grad_norm": 2.090280532836914, "learning_rate": 3.709275503605218e-06, "loss": 0.8917, "step": 3936 }, { "epoch": 2.0710152551288794, "grad_norm": 2.0236968994140625, "learning_rate": 3.708667458840064e-06, "loss": 0.8485, "step": 3937 }, { "epoch": 2.07154129405576, "grad_norm": 2.0521340370178223, "learning_rate": 3.7080593207519268e-06, "loss": 0.8584, "step": 3938 }, { "epoch": 2.0720673329826407, "grad_norm": 2.187636375427246, "learning_rate": 3.7074510893877624e-06, "loss": 0.9023, "step": 3939 }, { "epoch": 2.0725933719095213, "grad_norm": 2.1443138122558594, "learning_rate": 3.7068427647945334e-06, "loss": 0.8728, "step": 3940 }, { "epoch": 2.073119410836402, "grad_norm": 2.211897134780884, "learning_rate": 3.7062343470192085e-06, "loss": 0.942, "step": 3941 }, { "epoch": 2.0736454497632826, "grad_norm": 2.1046040058135986, "learning_rate": 3.705625836108766e-06, "loss": 0.8817, "step": 3942 }, { "epoch": 2.074171488690163, "grad_norm": 2.2636232376098633, "learning_rate": 3.7050172321101883e-06, "loss": 0.8572, "step": 3943 }, { "epoch": 2.074697527617044, "grad_norm": 2.212718963623047, "learning_rate": 3.7044085350704683e-06, "loss": 0.8943, "step": 3944 }, { "epoch": 2.075223566543924, "grad_norm": 2.480024576187134, "learning_rate": 3.703799745036603e-06, "loss": 0.8288, "step": 3945 }, { "epoch": 2.0757496054708047, "grad_norm": 2.1955480575561523, "learning_rate": 3.703190862055599e-06, "loss": 0.8626, "step": 3946 }, { "epoch": 2.0762756443976853, "grad_norm": 2.0615439414978027, "learning_rate": 3.7025818861744677e-06, "loss": 0.8519, "step": 3947 }, { "epoch": 2.076801683324566, "grad_norm": 2.0665676593780518, "learning_rate": 3.7019728174402304e-06, "loss": 0.874, "step": 3948 }, { "epoch": 2.0773277222514466, "grad_norm": 2.118298053741455, "learning_rate": 3.701363655899914e-06, "loss": 0.8765, "step": 3949 }, { "epoch": 2.077853761178327, "grad_norm": 2.081981658935547, "learning_rate": 3.700754401600552e-06, "loss": 0.8925, "step": 3950 }, { "epoch": 2.078379800105208, "grad_norm": 2.002983808517456, "learning_rate": 3.700145054589186e-06, "loss": 0.8012, "step": 3951 }, { "epoch": 2.0789058390320885, "grad_norm": 2.095959424972534, "learning_rate": 3.699535614912865e-06, "loss": 0.8893, "step": 3952 }, { "epoch": 2.079431877958969, "grad_norm": 2.0297133922576904, "learning_rate": 3.698926082618644e-06, "loss": 0.8477, "step": 3953 }, { "epoch": 2.0799579168858497, "grad_norm": 2.1438112258911133, "learning_rate": 3.6983164577535867e-06, "loss": 0.8463, "step": 3954 }, { "epoch": 2.08048395581273, "grad_norm": 2.154857873916626, "learning_rate": 3.6977067403647634e-06, "loss": 0.902, "step": 3955 }, { "epoch": 2.0810099947396106, "grad_norm": 2.0178000926971436, "learning_rate": 3.69709693049925e-06, "loss": 0.8971, "step": 3956 }, { "epoch": 2.081536033666491, "grad_norm": 2.2135653495788574, "learning_rate": 3.6964870282041316e-06, "loss": 0.8518, "step": 3957 }, { "epoch": 2.082062072593372, "grad_norm": 2.2675933837890625, "learning_rate": 3.6958770335264992e-06, "loss": 0.9153, "step": 3958 }, { "epoch": 2.0825881115202525, "grad_norm": 2.047260284423828, "learning_rate": 3.695266946513452e-06, "loss": 0.7989, "step": 3959 }, { "epoch": 2.083114150447133, "grad_norm": 2.1332545280456543, "learning_rate": 3.6946567672120957e-06, "loss": 0.8658, "step": 3960 }, { "epoch": 2.0836401893740137, "grad_norm": 2.223459482192993, "learning_rate": 3.6940464956695426e-06, "loss": 0.8855, "step": 3961 }, { "epoch": 2.0841662283008944, "grad_norm": 2.3312764167785645, "learning_rate": 3.6934361319329136e-06, "loss": 0.8355, "step": 3962 }, { "epoch": 2.084692267227775, "grad_norm": 2.0176711082458496, "learning_rate": 3.692825676049334e-06, "loss": 0.8409, "step": 3963 }, { "epoch": 2.0852183061546556, "grad_norm": 2.0182061195373535, "learning_rate": 3.69221512806594e-06, "loss": 0.8272, "step": 3964 }, { "epoch": 2.085744345081536, "grad_norm": 2.083758592605591, "learning_rate": 3.691604488029872e-06, "loss": 0.8708, "step": 3965 }, { "epoch": 2.0862703840084165, "grad_norm": 2.5022287368774414, "learning_rate": 3.6909937559882776e-06, "loss": 0.8657, "step": 3966 }, { "epoch": 2.086796422935297, "grad_norm": 2.2183871269226074, "learning_rate": 3.6903829319883137e-06, "loss": 0.8871, "step": 3967 }, { "epoch": 2.0873224618621777, "grad_norm": 2.054117202758789, "learning_rate": 3.6897720160771426e-06, "loss": 0.8982, "step": 3968 }, { "epoch": 2.0878485007890584, "grad_norm": 2.3053927421569824, "learning_rate": 3.689161008301933e-06, "loss": 0.8504, "step": 3969 }, { "epoch": 2.088374539715939, "grad_norm": 2.162670850753784, "learning_rate": 3.688549908709863e-06, "loss": 0.8672, "step": 3970 }, { "epoch": 2.0889005786428196, "grad_norm": 2.1603682041168213, "learning_rate": 3.6879387173481158e-06, "loss": 0.8901, "step": 3971 }, { "epoch": 2.0894266175697003, "grad_norm": 1.9814544916152954, "learning_rate": 3.687327434263882e-06, "loss": 0.8767, "step": 3972 }, { "epoch": 2.089952656496581, "grad_norm": 2.0629725456237793, "learning_rate": 3.6867160595043612e-06, "loss": 0.8724, "step": 3973 }, { "epoch": 2.0904786954234615, "grad_norm": 2.0546772480010986, "learning_rate": 3.6861045931167573e-06, "loss": 0.863, "step": 3974 }, { "epoch": 2.0910047343503417, "grad_norm": 2.0917580127716064, "learning_rate": 3.685493035148283e-06, "loss": 0.9035, "step": 3975 }, { "epoch": 2.0915307732772224, "grad_norm": 2.177222967147827, "learning_rate": 3.6848813856461563e-06, "loss": 0.8678, "step": 3976 }, { "epoch": 2.092056812204103, "grad_norm": 2.0584640502929688, "learning_rate": 3.6842696446576046e-06, "loss": 0.8294, "step": 3977 }, { "epoch": 2.0925828511309836, "grad_norm": 2.240145444869995, "learning_rate": 3.683657812229862e-06, "loss": 0.9105, "step": 3978 }, { "epoch": 2.0931088900578643, "grad_norm": 2.050130844116211, "learning_rate": 3.6830458884101682e-06, "loss": 0.8613, "step": 3979 }, { "epoch": 2.093634928984745, "grad_norm": 2.072331428527832, "learning_rate": 3.682433873245771e-06, "loss": 0.8322, "step": 3980 }, { "epoch": 2.0941609679116255, "grad_norm": 2.209925413131714, "learning_rate": 3.681821766783924e-06, "loss": 0.8855, "step": 3981 }, { "epoch": 2.094687006838506, "grad_norm": 2.2131872177124023, "learning_rate": 3.681209569071891e-06, "loss": 0.9247, "step": 3982 }, { "epoch": 2.095213045765387, "grad_norm": 2.2380175590515137, "learning_rate": 3.6805972801569377e-06, "loss": 0.963, "step": 3983 }, { "epoch": 2.0957390846922674, "grad_norm": 2.089524030685425, "learning_rate": 3.679984900086342e-06, "loss": 0.8187, "step": 3984 }, { "epoch": 2.0962651236191476, "grad_norm": 2.1329755783081055, "learning_rate": 3.6793724289073864e-06, "loss": 0.8338, "step": 3985 }, { "epoch": 2.0967911625460283, "grad_norm": 2.1133809089660645, "learning_rate": 3.678759866667361e-06, "loss": 0.8668, "step": 3986 }, { "epoch": 2.097317201472909, "grad_norm": 2.397538185119629, "learning_rate": 3.678147213413561e-06, "loss": 0.8357, "step": 3987 }, { "epoch": 2.0978432403997895, "grad_norm": 2.2058184146881104, "learning_rate": 3.677534469193291e-06, "loss": 0.8543, "step": 3988 }, { "epoch": 2.09836927932667, "grad_norm": 2.1603333950042725, "learning_rate": 3.6769216340538626e-06, "loss": 0.8753, "step": 3989 }, { "epoch": 2.098895318253551, "grad_norm": 2.1441192626953125, "learning_rate": 3.6763087080425936e-06, "loss": 0.9109, "step": 3990 }, { "epoch": 2.0994213571804314, "grad_norm": 2.2109951972961426, "learning_rate": 3.675695691206808e-06, "loss": 0.9116, "step": 3991 }, { "epoch": 2.099947396107312, "grad_norm": 2.166762590408325, "learning_rate": 3.6750825835938385e-06, "loss": 0.8639, "step": 3992 }, { "epoch": 2.1004734350341927, "grad_norm": 2.196349859237671, "learning_rate": 3.6744693852510245e-06, "loss": 0.8802, "step": 3993 }, { "epoch": 2.100999473961073, "grad_norm": 2.1084911823272705, "learning_rate": 3.6738560962257102e-06, "loss": 0.9133, "step": 3994 }, { "epoch": 2.1015255128879535, "grad_norm": 2.1162121295928955, "learning_rate": 3.6732427165652503e-06, "loss": 0.8663, "step": 3995 }, { "epoch": 2.102051551814834, "grad_norm": 2.081663131713867, "learning_rate": 3.672629246317004e-06, "loss": 0.828, "step": 3996 }, { "epoch": 2.102577590741715, "grad_norm": 2.1558377742767334, "learning_rate": 3.672015685528338e-06, "loss": 0.8974, "step": 3997 }, { "epoch": 2.1031036296685954, "grad_norm": 1.8951877355575562, "learning_rate": 3.671402034246627e-06, "loss": 0.8406, "step": 3998 }, { "epoch": 2.103629668595476, "grad_norm": 2.024456024169922, "learning_rate": 3.6707882925192506e-06, "loss": 0.8512, "step": 3999 }, { "epoch": 2.1041557075223567, "grad_norm": 2.200667381286621, "learning_rate": 3.670174460393598e-06, "loss": 0.866, "step": 4000 }, { "epoch": 2.1046817464492373, "grad_norm": 2.02412748336792, "learning_rate": 3.6695605379170634e-06, "loss": 0.8929, "step": 4001 }, { "epoch": 2.105207785376118, "grad_norm": 2.0692760944366455, "learning_rate": 3.668946525137049e-06, "loss": 0.8981, "step": 4002 }, { "epoch": 2.1057338243029986, "grad_norm": 2.0676326751708984, "learning_rate": 3.6683324221009635e-06, "loss": 0.8498, "step": 4003 }, { "epoch": 2.1062598632298792, "grad_norm": 2.031890392303467, "learning_rate": 3.6677182288562225e-06, "loss": 0.8458, "step": 4004 }, { "epoch": 2.1067859021567594, "grad_norm": 2.065512180328369, "learning_rate": 3.6671039454502486e-06, "loss": 0.8372, "step": 4005 }, { "epoch": 2.10731194108364, "grad_norm": 2.148578643798828, "learning_rate": 3.666489571930473e-06, "loss": 0.8937, "step": 4006 }, { "epoch": 2.1078379800105207, "grad_norm": 2.1452410221099854, "learning_rate": 3.6658751083443306e-06, "loss": 0.901, "step": 4007 }, { "epoch": 2.1083640189374013, "grad_norm": 2.2177367210388184, "learning_rate": 3.665260554739265e-06, "loss": 0.9104, "step": 4008 }, { "epoch": 2.108890057864282, "grad_norm": 2.176065444946289, "learning_rate": 3.664645911162728e-06, "loss": 0.8532, "step": 4009 }, { "epoch": 2.1094160967911626, "grad_norm": 2.004549264907837, "learning_rate": 3.664031177662177e-06, "loss": 0.8616, "step": 4010 }, { "epoch": 2.1099421357180432, "grad_norm": 2.108278274536133, "learning_rate": 3.6634163542850755e-06, "loss": 0.9093, "step": 4011 }, { "epoch": 2.110468174644924, "grad_norm": 2.043325662612915, "learning_rate": 3.6628014410788955e-06, "loss": 0.8466, "step": 4012 }, { "epoch": 2.1109942135718045, "grad_norm": 2.20375394821167, "learning_rate": 3.6621864380911153e-06, "loss": 0.8845, "step": 4013 }, { "epoch": 2.1115202524986847, "grad_norm": 2.020378351211548, "learning_rate": 3.6615713453692203e-06, "loss": 0.851, "step": 4014 }, { "epoch": 2.1120462914255653, "grad_norm": 2.0112192630767822, "learning_rate": 3.660956162960703e-06, "loss": 0.874, "step": 4015 }, { "epoch": 2.112572330352446, "grad_norm": 1.9552867412567139, "learning_rate": 3.660340890913061e-06, "loss": 0.8123, "step": 4016 }, { "epoch": 2.1130983692793266, "grad_norm": 2.1251490116119385, "learning_rate": 3.6597255292738026e-06, "loss": 0.8469, "step": 4017 }, { "epoch": 2.1136244082062072, "grad_norm": 2.0359630584716797, "learning_rate": 3.6591100780904387e-06, "loss": 0.8761, "step": 4018 }, { "epoch": 2.114150447133088, "grad_norm": 2.0877702236175537, "learning_rate": 3.6584945374104906e-06, "loss": 0.8212, "step": 4019 }, { "epoch": 2.1146764860599685, "grad_norm": 1.9993816614151, "learning_rate": 3.6578789072814847e-06, "loss": 0.8196, "step": 4020 }, { "epoch": 2.115202524986849, "grad_norm": 2.107799768447876, "learning_rate": 3.6572631877509545e-06, "loss": 0.9037, "step": 4021 }, { "epoch": 2.1157285639137298, "grad_norm": 2.134258985519409, "learning_rate": 3.656647378866441e-06, "loss": 0.8481, "step": 4022 }, { "epoch": 2.1162546028406104, "grad_norm": 2.151359796524048, "learning_rate": 3.6560314806754915e-06, "loss": 0.8317, "step": 4023 }, { "epoch": 2.1167806417674906, "grad_norm": 2.0561530590057373, "learning_rate": 3.65541549322566e-06, "loss": 0.868, "step": 4024 }, { "epoch": 2.1173066806943712, "grad_norm": 2.149268865585327, "learning_rate": 3.6547994165645086e-06, "loss": 0.8443, "step": 4025 }, { "epoch": 2.117832719621252, "grad_norm": 2.1969377994537354, "learning_rate": 3.6541832507396043e-06, "loss": 0.8962, "step": 4026 }, { "epoch": 2.1183587585481325, "grad_norm": 2.158400535583496, "learning_rate": 3.6535669957985232e-06, "loss": 0.9221, "step": 4027 }, { "epoch": 2.118884797475013, "grad_norm": 2.032106637954712, "learning_rate": 3.6529506517888467e-06, "loss": 0.8127, "step": 4028 }, { "epoch": 2.1194108364018938, "grad_norm": 2.0377964973449707, "learning_rate": 3.6523342187581644e-06, "loss": 0.7708, "step": 4029 }, { "epoch": 2.1199368753287744, "grad_norm": 2.0704598426818848, "learning_rate": 3.6517176967540714e-06, "loss": 0.893, "step": 4030 }, { "epoch": 2.120462914255655, "grad_norm": 1.9751828908920288, "learning_rate": 3.6511010858241695e-06, "loss": 0.8661, "step": 4031 }, { "epoch": 2.1209889531825357, "grad_norm": 2.092008113861084, "learning_rate": 3.65048438601607e-06, "loss": 0.8477, "step": 4032 }, { "epoch": 2.1215149921094163, "grad_norm": 2.7124805450439453, "learning_rate": 3.649867597377388e-06, "loss": 0.8585, "step": 4033 }, { "epoch": 2.1220410310362965, "grad_norm": 2.2475826740264893, "learning_rate": 3.649250719955746e-06, "loss": 0.8851, "step": 4034 }, { "epoch": 2.122567069963177, "grad_norm": 2.1466259956359863, "learning_rate": 3.6486337537987754e-06, "loss": 0.9088, "step": 4035 }, { "epoch": 2.1230931088900578, "grad_norm": 1.996488332748413, "learning_rate": 3.6480166989541126e-06, "loss": 0.8443, "step": 4036 }, { "epoch": 2.1236191478169384, "grad_norm": 2.0911123752593994, "learning_rate": 3.647399555469401e-06, "loss": 0.8784, "step": 4037 }, { "epoch": 2.124145186743819, "grad_norm": 1.9540727138519287, "learning_rate": 3.6467823233922905e-06, "loss": 0.8875, "step": 4038 }, { "epoch": 2.1246712256706997, "grad_norm": 2.074430465698242, "learning_rate": 3.6461650027704402e-06, "loss": 0.8414, "step": 4039 }, { "epoch": 2.1251972645975803, "grad_norm": 2.1090662479400635, "learning_rate": 3.6455475936515134e-06, "loss": 0.8496, "step": 4040 }, { "epoch": 2.125723303524461, "grad_norm": 2.3682987689971924, "learning_rate": 3.644930096083181e-06, "loss": 0.8964, "step": 4041 }, { "epoch": 2.1262493424513416, "grad_norm": 2.1032111644744873, "learning_rate": 3.6443125101131215e-06, "loss": 0.8315, "step": 4042 }, { "epoch": 2.126775381378222, "grad_norm": 2.150630235671997, "learning_rate": 3.6436948357890188e-06, "loss": 0.8624, "step": 4043 }, { "epoch": 2.1273014203051024, "grad_norm": 2.242189407348633, "learning_rate": 3.6430770731585646e-06, "loss": 0.8599, "step": 4044 }, { "epoch": 2.127827459231983, "grad_norm": 2.108157157897949, "learning_rate": 3.6424592222694576e-06, "loss": 0.8276, "step": 4045 }, { "epoch": 2.1283534981588637, "grad_norm": 2.1490418910980225, "learning_rate": 3.641841283169403e-06, "loss": 0.8334, "step": 4046 }, { "epoch": 2.1288795370857443, "grad_norm": 2.1062753200531006, "learning_rate": 3.6412232559061132e-06, "loss": 0.8423, "step": 4047 }, { "epoch": 2.129405576012625, "grad_norm": 2.106274366378784, "learning_rate": 3.640605140527306e-06, "loss": 0.8791, "step": 4048 }, { "epoch": 2.1299316149395056, "grad_norm": 1.988842487335205, "learning_rate": 3.6399869370807077e-06, "loss": 0.8671, "step": 4049 }, { "epoch": 2.130457653866386, "grad_norm": 2.1451900005340576, "learning_rate": 3.6393686456140497e-06, "loss": 0.8645, "step": 4050 }, { "epoch": 2.130983692793267, "grad_norm": 2.235856294631958, "learning_rate": 3.6387502661750724e-06, "loss": 0.8391, "step": 4051 }, { "epoch": 2.1315097317201475, "grad_norm": 2.088796615600586, "learning_rate": 3.6381317988115206e-06, "loss": 0.8207, "step": 4052 }, { "epoch": 2.132035770647028, "grad_norm": 2.3086771965026855, "learning_rate": 3.637513243571149e-06, "loss": 0.8816, "step": 4053 }, { "epoch": 2.1325618095739083, "grad_norm": 2.134464979171753, "learning_rate": 3.636894600501716e-06, "loss": 0.8527, "step": 4054 }, { "epoch": 2.133087848500789, "grad_norm": 2.041529417037964, "learning_rate": 3.6362758696509866e-06, "loss": 0.8895, "step": 4055 }, { "epoch": 2.1336138874276696, "grad_norm": 2.0979433059692383, "learning_rate": 3.6356570510667355e-06, "loss": 0.8996, "step": 4056 }, { "epoch": 2.13413992635455, "grad_norm": 2.3879358768463135, "learning_rate": 3.635038144796743e-06, "loss": 0.87, "step": 4057 }, { "epoch": 2.134665965281431, "grad_norm": 2.153602361679077, "learning_rate": 3.634419150888794e-06, "loss": 0.8652, "step": 4058 }, { "epoch": 2.1351920042083115, "grad_norm": 2.0157840251922607, "learning_rate": 3.6338000693906838e-06, "loss": 0.8654, "step": 4059 }, { "epoch": 2.135718043135192, "grad_norm": 2.2252182960510254, "learning_rate": 3.6331809003502117e-06, "loss": 0.9218, "step": 4060 }, { "epoch": 2.1362440820620727, "grad_norm": 2.1406612396240234, "learning_rate": 3.6325616438151846e-06, "loss": 0.8722, "step": 4061 }, { "epoch": 2.1367701209889534, "grad_norm": 2.0403623580932617, "learning_rate": 3.6319422998334153e-06, "loss": 0.8513, "step": 4062 }, { "epoch": 2.1372961599158335, "grad_norm": 2.2241806983947754, "learning_rate": 3.6313228684527258e-06, "loss": 0.8843, "step": 4063 }, { "epoch": 2.137822198842714, "grad_norm": 2.1331231594085693, "learning_rate": 3.630703349720943e-06, "loss": 0.9061, "step": 4064 }, { "epoch": 2.138348237769595, "grad_norm": 2.1365530490875244, "learning_rate": 3.6300837436859004e-06, "loss": 0.8785, "step": 4065 }, { "epoch": 2.1388742766964755, "grad_norm": 2.10040545463562, "learning_rate": 3.6294640503954394e-06, "loss": 0.8409, "step": 4066 }, { "epoch": 2.139400315623356, "grad_norm": 2.2356724739074707, "learning_rate": 3.6288442698974067e-06, "loss": 0.9169, "step": 4067 }, { "epoch": 2.1399263545502367, "grad_norm": 2.1221671104431152, "learning_rate": 3.628224402239656e-06, "loss": 0.9446, "step": 4068 }, { "epoch": 2.1404523934771174, "grad_norm": 2.1039652824401855, "learning_rate": 3.6276044474700494e-06, "loss": 0.8597, "step": 4069 }, { "epoch": 2.140978432403998, "grad_norm": 2.051757335662842, "learning_rate": 3.626984405636454e-06, "loss": 0.8271, "step": 4070 }, { "epoch": 2.1415044713308786, "grad_norm": 2.2209742069244385, "learning_rate": 3.6263642767867442e-06, "loss": 0.875, "step": 4071 }, { "epoch": 2.1420305102577593, "grad_norm": 2.124410629272461, "learning_rate": 3.625744060968801e-06, "loss": 0.8232, "step": 4072 }, { "epoch": 2.14255654918464, "grad_norm": 2.250659942626953, "learning_rate": 3.6251237582305114e-06, "loss": 0.8578, "step": 4073 }, { "epoch": 2.14308258811152, "grad_norm": 2.2641327381134033, "learning_rate": 3.6245033686197716e-06, "loss": 0.8666, "step": 4074 }, { "epoch": 2.1436086270384007, "grad_norm": 2.11293625831604, "learning_rate": 3.623882892184481e-06, "loss": 0.8321, "step": 4075 }, { "epoch": 2.1441346659652813, "grad_norm": 2.0726466178894043, "learning_rate": 3.6232623289725482e-06, "loss": 0.8412, "step": 4076 }, { "epoch": 2.144660704892162, "grad_norm": 2.1450514793395996, "learning_rate": 3.6226416790318886e-06, "loss": 0.8309, "step": 4077 }, { "epoch": 2.1451867438190426, "grad_norm": 2.139995574951172, "learning_rate": 3.6220209424104224e-06, "loss": 0.8233, "step": 4078 }, { "epoch": 2.1457127827459233, "grad_norm": 2.283642292022705, "learning_rate": 3.6214001191560775e-06, "loss": 0.8747, "step": 4079 }, { "epoch": 2.146238821672804, "grad_norm": 2.2206647396087646, "learning_rate": 3.62077920931679e-06, "loss": 0.866, "step": 4080 }, { "epoch": 2.1467648605996845, "grad_norm": 2.1933889389038086, "learning_rate": 3.6201582129404993e-06, "loss": 0.8654, "step": 4081 }, { "epoch": 2.147290899526565, "grad_norm": 1.9683947563171387, "learning_rate": 3.619537130075155e-06, "loss": 0.8248, "step": 4082 }, { "epoch": 2.1478169384534453, "grad_norm": 2.130875825881958, "learning_rate": 3.6189159607687113e-06, "loss": 0.8157, "step": 4083 }, { "epoch": 2.148342977380326, "grad_norm": 2.1500020027160645, "learning_rate": 3.6182947050691296e-06, "loss": 0.8901, "step": 4084 }, { "epoch": 2.1488690163072066, "grad_norm": 2.1434788703918457, "learning_rate": 3.6176733630243778e-06, "loss": 0.9081, "step": 4085 }, { "epoch": 2.1493950552340872, "grad_norm": 2.034954071044922, "learning_rate": 3.6170519346824307e-06, "loss": 0.8313, "step": 4086 }, { "epoch": 2.149921094160968, "grad_norm": 2.0177066326141357, "learning_rate": 3.616430420091269e-06, "loss": 0.8595, "step": 4087 }, { "epoch": 2.1504471330878485, "grad_norm": 2.1270992755889893, "learning_rate": 3.6158088192988816e-06, "loss": 0.8529, "step": 4088 }, { "epoch": 2.150973172014729, "grad_norm": 2.3170366287231445, "learning_rate": 3.615187132353264e-06, "loss": 0.9284, "step": 4089 }, { "epoch": 2.15149921094161, "grad_norm": 2.096381664276123, "learning_rate": 3.6145653593024165e-06, "loss": 0.8664, "step": 4090 }, { "epoch": 2.1520252498684904, "grad_norm": 2.1424918174743652, "learning_rate": 3.6139435001943467e-06, "loss": 0.8319, "step": 4091 }, { "epoch": 2.152551288795371, "grad_norm": 2.1311798095703125, "learning_rate": 3.61332155507707e-06, "loss": 0.8498, "step": 4092 }, { "epoch": 2.1530773277222512, "grad_norm": 2.193495035171509, "learning_rate": 3.612699523998607e-06, "loss": 0.8553, "step": 4093 }, { "epoch": 2.153603366649132, "grad_norm": 2.1302406787872314, "learning_rate": 3.6120774070069868e-06, "loss": 0.8606, "step": 4094 }, { "epoch": 2.1541294055760125, "grad_norm": 2.2360782623291016, "learning_rate": 3.6114552041502427e-06, "loss": 0.8993, "step": 4095 }, { "epoch": 2.154655444502893, "grad_norm": 2.113185405731201, "learning_rate": 3.610832915476418e-06, "loss": 0.9025, "step": 4096 }, { "epoch": 2.155181483429774, "grad_norm": 2.103240489959717, "learning_rate": 3.6102105410335575e-06, "loss": 0.8856, "step": 4097 }, { "epoch": 2.1557075223566544, "grad_norm": 2.2705283164978027, "learning_rate": 3.609588080869717e-06, "loss": 0.9022, "step": 4098 }, { "epoch": 2.156233561283535, "grad_norm": 2.1705024242401123, "learning_rate": 3.608965535032958e-06, "loss": 0.848, "step": 4099 }, { "epoch": 2.1567596002104157, "grad_norm": 1.983557105064392, "learning_rate": 3.608342903571348e-06, "loss": 0.8534, "step": 4100 }, { "epoch": 2.1572856391372963, "grad_norm": 2.151183843612671, "learning_rate": 3.6077201865329607e-06, "loss": 0.8793, "step": 4101 }, { "epoch": 2.157811678064177, "grad_norm": 2.118394374847412, "learning_rate": 3.6070973839658785e-06, "loss": 0.9056, "step": 4102 }, { "epoch": 2.158337716991057, "grad_norm": 2.1328799724578857, "learning_rate": 3.6064744959181867e-06, "loss": 0.8802, "step": 4103 }, { "epoch": 2.1588637559179378, "grad_norm": 2.2392377853393555, "learning_rate": 3.605851522437981e-06, "loss": 0.8932, "step": 4104 }, { "epoch": 2.1593897948448184, "grad_norm": 2.1332662105560303, "learning_rate": 3.6052284635733615e-06, "loss": 0.8335, "step": 4105 }, { "epoch": 2.159915833771699, "grad_norm": 2.2517380714416504, "learning_rate": 3.6046053193724363e-06, "loss": 0.8599, "step": 4106 }, { "epoch": 2.1604418726985797, "grad_norm": 2.238494873046875, "learning_rate": 3.6039820898833176e-06, "loss": 0.8876, "step": 4107 }, { "epoch": 2.1609679116254603, "grad_norm": 2.181370735168457, "learning_rate": 3.6033587751541275e-06, "loss": 0.8377, "step": 4108 }, { "epoch": 2.161493950552341, "grad_norm": 2.244814872741699, "learning_rate": 3.602735375232992e-06, "loss": 0.9002, "step": 4109 }, { "epoch": 2.1620199894792216, "grad_norm": 2.1054112911224365, "learning_rate": 3.6021118901680453e-06, "loss": 0.8341, "step": 4110 }, { "epoch": 2.162546028406102, "grad_norm": 2.2180449962615967, "learning_rate": 3.601488320007427e-06, "loss": 0.8624, "step": 4111 }, { "epoch": 2.1630720673329824, "grad_norm": 2.0056049823760986, "learning_rate": 3.6008646647992847e-06, "loss": 0.8472, "step": 4112 }, { "epoch": 2.163598106259863, "grad_norm": 2.117708683013916, "learning_rate": 3.6002409245917707e-06, "loss": 0.8612, "step": 4113 }, { "epoch": 2.1641241451867437, "grad_norm": 2.0889832973480225, "learning_rate": 3.5996170994330465e-06, "loss": 0.8531, "step": 4114 }, { "epoch": 2.1646501841136243, "grad_norm": 2.0072662830352783, "learning_rate": 3.598993189371277e-06, "loss": 0.8617, "step": 4115 }, { "epoch": 2.165176223040505, "grad_norm": 1.9730743169784546, "learning_rate": 3.5983691944546356e-06, "loss": 0.8455, "step": 4116 }, { "epoch": 2.1657022619673856, "grad_norm": 2.1501071453094482, "learning_rate": 3.5977451147313027e-06, "loss": 0.8706, "step": 4117 }, { "epoch": 2.166228300894266, "grad_norm": 2.1422548294067383, "learning_rate": 3.597120950249463e-06, "loss": 0.9415, "step": 4118 }, { "epoch": 2.166754339821147, "grad_norm": 2.0429635047912598, "learning_rate": 3.59649670105731e-06, "loss": 0.8828, "step": 4119 }, { "epoch": 2.1672803787480275, "grad_norm": 2.095120429992676, "learning_rate": 3.5958723672030427e-06, "loss": 0.8692, "step": 4120 }, { "epoch": 2.167806417674908, "grad_norm": 2.0914809703826904, "learning_rate": 3.5952479487348673e-06, "loss": 0.8759, "step": 4121 }, { "epoch": 2.1683324566017887, "grad_norm": 2.08974289894104, "learning_rate": 3.5946234457009952e-06, "loss": 0.8983, "step": 4122 }, { "epoch": 2.168858495528669, "grad_norm": 2.1366801261901855, "learning_rate": 3.593998858149646e-06, "loss": 0.878, "step": 4123 }, { "epoch": 2.1693845344555496, "grad_norm": 2.001303195953369, "learning_rate": 3.593374186129045e-06, "loss": 0.8599, "step": 4124 }, { "epoch": 2.16991057338243, "grad_norm": 2.156665325164795, "learning_rate": 3.5927494296874227e-06, "loss": 0.9572, "step": 4125 }, { "epoch": 2.170436612309311, "grad_norm": 2.0636799335479736, "learning_rate": 3.592124588873019e-06, "loss": 0.8697, "step": 4126 }, { "epoch": 2.1709626512361915, "grad_norm": 2.0708532333374023, "learning_rate": 3.591499663734078e-06, "loss": 0.8569, "step": 4127 }, { "epoch": 2.171488690163072, "grad_norm": 2.0519070625305176, "learning_rate": 3.5908746543188524e-06, "loss": 0.8948, "step": 4128 }, { "epoch": 2.1720147290899527, "grad_norm": 2.040027141571045, "learning_rate": 3.590249560675598e-06, "loss": 0.8536, "step": 4129 }, { "epoch": 2.1725407680168334, "grad_norm": 1.9483141899108887, "learning_rate": 3.58962438285258e-06, "loss": 0.844, "step": 4130 }, { "epoch": 2.173066806943714, "grad_norm": 2.143894910812378, "learning_rate": 3.58899912089807e-06, "loss": 0.9013, "step": 4131 }, { "epoch": 2.173592845870594, "grad_norm": 2.520326852798462, "learning_rate": 3.5883737748603447e-06, "loss": 0.841, "step": 4132 }, { "epoch": 2.174118884797475, "grad_norm": 2.1002626419067383, "learning_rate": 3.5877483447876886e-06, "loss": 0.8828, "step": 4133 }, { "epoch": 2.1746449237243555, "grad_norm": 2.0630040168762207, "learning_rate": 3.587122830728391e-06, "loss": 0.8529, "step": 4134 }, { "epoch": 2.175170962651236, "grad_norm": 2.2008252143859863, "learning_rate": 3.5864972327307506e-06, "loss": 0.8513, "step": 4135 }, { "epoch": 2.1756970015781167, "grad_norm": 2.001927137374878, "learning_rate": 3.5858715508430686e-06, "loss": 0.8322, "step": 4136 }, { "epoch": 2.1762230405049974, "grad_norm": 2.253263235092163, "learning_rate": 3.5852457851136557e-06, "loss": 0.8914, "step": 4137 }, { "epoch": 2.176749079431878, "grad_norm": 2.1944315433502197, "learning_rate": 3.5846199355908283e-06, "loss": 0.8626, "step": 4138 }, { "epoch": 2.1772751183587586, "grad_norm": 2.0573890209198, "learning_rate": 3.5839940023229097e-06, "loss": 0.8426, "step": 4139 }, { "epoch": 2.1778011572856393, "grad_norm": 2.1906769275665283, "learning_rate": 3.583367985358228e-06, "loss": 0.8841, "step": 4140 }, { "epoch": 2.17832719621252, "grad_norm": 2.1218936443328857, "learning_rate": 3.582741884745119e-06, "loss": 0.8172, "step": 4141 }, { "epoch": 2.1788532351394005, "grad_norm": 2.7154877185821533, "learning_rate": 3.5821157005319263e-06, "loss": 0.857, "step": 4142 }, { "epoch": 2.1793792740662807, "grad_norm": 2.24946928024292, "learning_rate": 3.5814894327669965e-06, "loss": 0.9255, "step": 4143 }, { "epoch": 2.1799053129931614, "grad_norm": 2.0119271278381348, "learning_rate": 3.5808630814986857e-06, "loss": 0.8361, "step": 4144 }, { "epoch": 2.180431351920042, "grad_norm": 2.141295909881592, "learning_rate": 3.580236646775356e-06, "loss": 0.8547, "step": 4145 }, { "epoch": 2.1809573908469226, "grad_norm": 2.1024622917175293, "learning_rate": 3.579610128645374e-06, "loss": 0.8116, "step": 4146 }, { "epoch": 2.1814834297738033, "grad_norm": 1.9815858602523804, "learning_rate": 3.578983527157115e-06, "loss": 0.8625, "step": 4147 }, { "epoch": 2.182009468700684, "grad_norm": 2.2920022010803223, "learning_rate": 3.5783568423589593e-06, "loss": 0.8766, "step": 4148 }, { "epoch": 2.1825355076275645, "grad_norm": 2.0788581371307373, "learning_rate": 3.577730074299295e-06, "loss": 0.8372, "step": 4149 }, { "epoch": 2.183061546554445, "grad_norm": 2.0075607299804688, "learning_rate": 3.5771032230265146e-06, "loss": 0.8428, "step": 4150 }, { "epoch": 2.183587585481326, "grad_norm": 2.2261738777160645, "learning_rate": 3.576476288589019e-06, "loss": 0.8515, "step": 4151 }, { "epoch": 2.184113624408206, "grad_norm": 2.3486971855163574, "learning_rate": 3.5758492710352144e-06, "loss": 0.8768, "step": 4152 }, { "epoch": 2.1846396633350866, "grad_norm": 2.1998422145843506, "learning_rate": 3.575222170413514e-06, "loss": 0.8686, "step": 4153 }, { "epoch": 2.1851657022619673, "grad_norm": 2.0592329502105713, "learning_rate": 3.5745949867723368e-06, "loss": 0.837, "step": 4154 }, { "epoch": 2.185691741188848, "grad_norm": 2.2015960216522217, "learning_rate": 3.5739677201601085e-06, "loss": 0.8608, "step": 4155 }, { "epoch": 2.1862177801157285, "grad_norm": 2.045452117919922, "learning_rate": 3.573340370625262e-06, "loss": 0.8603, "step": 4156 }, { "epoch": 2.186743819042609, "grad_norm": 2.2400918006896973, "learning_rate": 3.5727129382162355e-06, "loss": 0.8863, "step": 4157 }, { "epoch": 2.18726985796949, "grad_norm": 2.2280173301696777, "learning_rate": 3.572085422981474e-06, "loss": 0.8809, "step": 4158 }, { "epoch": 2.1877958968963704, "grad_norm": 2.100773334503174, "learning_rate": 3.5714578249694274e-06, "loss": 0.8807, "step": 4159 }, { "epoch": 2.188321935823251, "grad_norm": 2.187225341796875, "learning_rate": 3.5708301442285564e-06, "loss": 0.8448, "step": 4160 }, { "epoch": 2.1888479747501317, "grad_norm": 2.1500113010406494, "learning_rate": 3.5702023808073227e-06, "loss": 0.8525, "step": 4161 }, { "epoch": 2.189374013677012, "grad_norm": 2.1455650329589844, "learning_rate": 3.5695745347541984e-06, "loss": 0.8843, "step": 4162 }, { "epoch": 2.1899000526038925, "grad_norm": 2.107534170150757, "learning_rate": 3.5689466061176594e-06, "loss": 0.8502, "step": 4163 }, { "epoch": 2.190426091530773, "grad_norm": 1.9831626415252686, "learning_rate": 3.5683185949461897e-06, "loss": 0.8898, "step": 4164 }, { "epoch": 2.190952130457654, "grad_norm": 2.0310144424438477, "learning_rate": 3.567690501288278e-06, "loss": 0.8853, "step": 4165 }, { "epoch": 2.1914781693845344, "grad_norm": 2.0457518100738525, "learning_rate": 3.56706232519242e-06, "loss": 0.8411, "step": 4166 }, { "epoch": 2.192004208311415, "grad_norm": 2.1799845695495605, "learning_rate": 3.566434066707121e-06, "loss": 0.864, "step": 4167 }, { "epoch": 2.1925302472382957, "grad_norm": 2.10738468170166, "learning_rate": 3.565805725880886e-06, "loss": 0.904, "step": 4168 }, { "epoch": 2.1930562861651763, "grad_norm": 2.0865280628204346, "learning_rate": 3.565177302762233e-06, "loss": 0.8882, "step": 4169 }, { "epoch": 2.193582325092057, "grad_norm": 2.122330904006958, "learning_rate": 3.5645487973996816e-06, "loss": 0.8578, "step": 4170 }, { "epoch": 2.1941083640189376, "grad_norm": 2.141927480697632, "learning_rate": 3.563920209841761e-06, "loss": 0.8538, "step": 4171 }, { "epoch": 2.194634402945818, "grad_norm": 2.1214215755462646, "learning_rate": 3.563291540137004e-06, "loss": 0.8717, "step": 4172 }, { "epoch": 2.1951604418726984, "grad_norm": 2.218599319458008, "learning_rate": 3.5626627883339515e-06, "loss": 0.9027, "step": 4173 }, { "epoch": 2.195686480799579, "grad_norm": 2.1105246543884277, "learning_rate": 3.562033954481152e-06, "loss": 0.8819, "step": 4174 }, { "epoch": 2.1962125197264597, "grad_norm": 2.157323122024536, "learning_rate": 3.561405038627156e-06, "loss": 0.8753, "step": 4175 }, { "epoch": 2.1967385586533403, "grad_norm": 2.0801522731781006, "learning_rate": 3.5607760408205257e-06, "loss": 0.8573, "step": 4176 }, { "epoch": 2.197264597580221, "grad_norm": 2.1710987091064453, "learning_rate": 3.560146961109824e-06, "loss": 0.8475, "step": 4177 }, { "epoch": 2.1977906365071016, "grad_norm": 2.22119402885437, "learning_rate": 3.5595177995436264e-06, "loss": 0.8424, "step": 4178 }, { "epoch": 2.1983166754339822, "grad_norm": 2.13811993598938, "learning_rate": 3.5588885561705085e-06, "loss": 0.8741, "step": 4179 }, { "epoch": 2.198842714360863, "grad_norm": 2.0428004264831543, "learning_rate": 3.558259231039056e-06, "loss": 0.8614, "step": 4180 }, { "epoch": 2.199368753287743, "grad_norm": 2.2283437252044678, "learning_rate": 3.557629824197861e-06, "loss": 0.8765, "step": 4181 }, { "epoch": 2.1998947922146237, "grad_norm": 2.0320839881896973, "learning_rate": 3.557000335695521e-06, "loss": 0.9032, "step": 4182 }, { "epoch": 2.2004208311415043, "grad_norm": 2.188304901123047, "learning_rate": 3.5563707655806374e-06, "loss": 0.9129, "step": 4183 }, { "epoch": 2.200946870068385, "grad_norm": 2.272050619125366, "learning_rate": 3.5557411139018216e-06, "loss": 0.931, "step": 4184 }, { "epoch": 2.2014729089952656, "grad_norm": 2.6149709224700928, "learning_rate": 3.5551113807076916e-06, "loss": 0.8608, "step": 4185 }, { "epoch": 2.2019989479221462, "grad_norm": 2.2002363204956055, "learning_rate": 3.5544815660468666e-06, "loss": 0.853, "step": 4186 }, { "epoch": 2.202524986849027, "grad_norm": 2.087035894393921, "learning_rate": 3.5538516699679794e-06, "loss": 0.8624, "step": 4187 }, { "epoch": 2.2030510257759075, "grad_norm": 2.0335192680358887, "learning_rate": 3.5532216925196615e-06, "loss": 0.9082, "step": 4188 }, { "epoch": 2.203577064702788, "grad_norm": 2.1938936710357666, "learning_rate": 3.5525916337505573e-06, "loss": 0.8695, "step": 4189 }, { "epoch": 2.2041031036296688, "grad_norm": 2.0097696781158447, "learning_rate": 3.5519614937093126e-06, "loss": 0.8179, "step": 4190 }, { "epoch": 2.2046291425565494, "grad_norm": 2.2551372051239014, "learning_rate": 3.551331272444582e-06, "loss": 0.9156, "step": 4191 }, { "epoch": 2.2051551814834296, "grad_norm": 2.141223907470703, "learning_rate": 3.5507009700050264e-06, "loss": 0.8587, "step": 4192 }, { "epoch": 2.2056812204103102, "grad_norm": 2.1000430583953857, "learning_rate": 3.550070586439312e-06, "loss": 0.9162, "step": 4193 }, { "epoch": 2.206207259337191, "grad_norm": 2.1592979431152344, "learning_rate": 3.5494401217961115e-06, "loss": 0.8297, "step": 4194 }, { "epoch": 2.2067332982640715, "grad_norm": 2.0791616439819336, "learning_rate": 3.548809576124104e-06, "loss": 0.9032, "step": 4195 }, { "epoch": 2.207259337190952, "grad_norm": 2.1275134086608887, "learning_rate": 3.5481789494719756e-06, "loss": 0.8911, "step": 4196 }, { "epoch": 2.2077853761178328, "grad_norm": 2.2034800052642822, "learning_rate": 3.5475482418884168e-06, "loss": 0.8859, "step": 4197 }, { "epoch": 2.2083114150447134, "grad_norm": 2.0067138671875, "learning_rate": 3.546917453422125e-06, "loss": 0.8524, "step": 4198 }, { "epoch": 2.208837453971594, "grad_norm": 2.051272392272949, "learning_rate": 3.546286584121806e-06, "loss": 0.8533, "step": 4199 }, { "epoch": 2.2093634928984747, "grad_norm": 2.062000274658203, "learning_rate": 3.5456556340361696e-06, "loss": 0.824, "step": 4200 }, { "epoch": 2.209889531825355, "grad_norm": 2.1374127864837646, "learning_rate": 3.5450246032139317e-06, "loss": 0.883, "step": 4201 }, { "epoch": 2.2104155707522355, "grad_norm": 2.5353798866271973, "learning_rate": 3.544393491703816e-06, "loss": 0.8784, "step": 4202 }, { "epoch": 2.210941609679116, "grad_norm": 2.0371382236480713, "learning_rate": 3.5437622995545508e-06, "loss": 0.9068, "step": 4203 }, { "epoch": 2.2114676486059968, "grad_norm": 2.0304863452911377, "learning_rate": 3.5431310268148712e-06, "loss": 0.8415, "step": 4204 }, { "epoch": 2.2119936875328774, "grad_norm": 2.2806100845336914, "learning_rate": 3.5424996735335193e-06, "loss": 0.9174, "step": 4205 }, { "epoch": 2.212519726459758, "grad_norm": 2.052002429962158, "learning_rate": 3.541868239759243e-06, "loss": 0.8608, "step": 4206 }, { "epoch": 2.2130457653866387, "grad_norm": 2.1530139446258545, "learning_rate": 3.5412367255407964e-06, "loss": 0.8584, "step": 4207 }, { "epoch": 2.2135718043135193, "grad_norm": 2.1331427097320557, "learning_rate": 3.5406051309269384e-06, "loss": 0.8351, "step": 4208 }, { "epoch": 2.2140978432404, "grad_norm": 2.160874843597412, "learning_rate": 3.5399734559664357e-06, "loss": 0.822, "step": 4209 }, { "epoch": 2.2146238821672806, "grad_norm": 2.0492167472839355, "learning_rate": 3.539341700708062e-06, "loss": 0.8842, "step": 4210 }, { "epoch": 2.215149921094161, "grad_norm": 2.0056469440460205, "learning_rate": 3.5387098652005945e-06, "loss": 0.8449, "step": 4211 }, { "epoch": 2.2156759600210414, "grad_norm": 2.2672998905181885, "learning_rate": 3.53807794949282e-06, "loss": 0.8855, "step": 4212 }, { "epoch": 2.216201998947922, "grad_norm": 2.2707412242889404, "learning_rate": 3.5374459536335272e-06, "loss": 0.9064, "step": 4213 }, { "epoch": 2.2167280378748027, "grad_norm": 2.1340043544769287, "learning_rate": 3.5368138776715154e-06, "loss": 0.8837, "step": 4214 }, { "epoch": 2.2172540768016833, "grad_norm": 2.1326889991760254, "learning_rate": 3.536181721655587e-06, "loss": 0.8819, "step": 4215 }, { "epoch": 2.217780115728564, "grad_norm": 2.0954203605651855, "learning_rate": 3.5355494856345527e-06, "loss": 0.9368, "step": 4216 }, { "epoch": 2.2183061546554446, "grad_norm": 1.9655033349990845, "learning_rate": 3.5349171696572283e-06, "loss": 0.8537, "step": 4217 }, { "epoch": 2.218832193582325, "grad_norm": 2.101762533187866, "learning_rate": 3.5342847737724347e-06, "loss": 0.8513, "step": 4218 }, { "epoch": 2.219358232509206, "grad_norm": 2.019273042678833, "learning_rate": 3.5336522980290004e-06, "loss": 0.8533, "step": 4219 }, { "epoch": 2.2198842714360865, "grad_norm": 2.1068456172943115, "learning_rate": 3.5330197424757608e-06, "loss": 0.8603, "step": 4220 }, { "epoch": 2.2204103103629667, "grad_norm": 2.2011494636535645, "learning_rate": 3.5323871071615556e-06, "loss": 0.9115, "step": 4221 }, { "epoch": 2.2209363492898473, "grad_norm": 1.9718877077102661, "learning_rate": 3.5317543921352317e-06, "loss": 0.8072, "step": 4222 }, { "epoch": 2.221462388216728, "grad_norm": 2.0835483074188232, "learning_rate": 3.5311215974456412e-06, "loss": 0.8521, "step": 4223 }, { "epoch": 2.2219884271436086, "grad_norm": 2.0352232456207275, "learning_rate": 3.5304887231416453e-06, "loss": 0.8445, "step": 4224 }, { "epoch": 2.222514466070489, "grad_norm": 2.183950185775757, "learning_rate": 3.529855769272107e-06, "loss": 0.8703, "step": 4225 }, { "epoch": 2.22304050499737, "grad_norm": 2.041753053665161, "learning_rate": 3.5292227358858982e-06, "loss": 0.8214, "step": 4226 }, { "epoch": 2.2235665439242505, "grad_norm": 2.1767640113830566, "learning_rate": 3.528589623031896e-06, "loss": 0.8779, "step": 4227 }, { "epoch": 2.224092582851131, "grad_norm": 2.2264418601989746, "learning_rate": 3.527956430758985e-06, "loss": 0.8812, "step": 4228 }, { "epoch": 2.2246186217780117, "grad_norm": 2.1695525646209717, "learning_rate": 3.527323159116054e-06, "loss": 0.8762, "step": 4229 }, { "epoch": 2.2251446607048924, "grad_norm": 2.0562469959259033, "learning_rate": 3.5266898081519996e-06, "loss": 0.8384, "step": 4230 }, { "epoch": 2.2256706996317726, "grad_norm": 2.2022643089294434, "learning_rate": 3.5260563779157236e-06, "loss": 0.8711, "step": 4231 }, { "epoch": 2.226196738558653, "grad_norm": 2.013554573059082, "learning_rate": 3.5254228684561333e-06, "loss": 0.8919, "step": 4232 }, { "epoch": 2.226722777485534, "grad_norm": 2.1985723972320557, "learning_rate": 3.5247892798221428e-06, "loss": 0.8869, "step": 4233 }, { "epoch": 2.2272488164124145, "grad_norm": 2.2243082523345947, "learning_rate": 3.524155612062673e-06, "loss": 0.872, "step": 4234 }, { "epoch": 2.227774855339295, "grad_norm": 2.297344923019409, "learning_rate": 3.5235218652266506e-06, "loss": 0.8303, "step": 4235 }, { "epoch": 2.2283008942661757, "grad_norm": 2.1908962726593018, "learning_rate": 3.5228880393630075e-06, "loss": 0.8779, "step": 4236 }, { "epoch": 2.2288269331930564, "grad_norm": 2.2931530475616455, "learning_rate": 3.5222541345206834e-06, "loss": 0.9185, "step": 4237 }, { "epoch": 2.229352972119937, "grad_norm": 2.1484785079956055, "learning_rate": 3.5216201507486216e-06, "loss": 0.8786, "step": 4238 }, { "epoch": 2.2298790110468176, "grad_norm": 2.0589776039123535, "learning_rate": 3.5209860880957738e-06, "loss": 0.8021, "step": 4239 }, { "epoch": 2.2304050499736983, "grad_norm": 2.135418176651001, "learning_rate": 3.5203519466110963e-06, "loss": 0.865, "step": 4240 }, { "epoch": 2.2309310889005785, "grad_norm": 2.0229220390319824, "learning_rate": 3.5197177263435524e-06, "loss": 0.8998, "step": 4241 }, { "epoch": 2.231457127827459, "grad_norm": 2.319934606552124, "learning_rate": 3.5190834273421116e-06, "loss": 0.8415, "step": 4242 }, { "epoch": 2.2319831667543397, "grad_norm": 2.1715381145477295, "learning_rate": 3.5184490496557488e-06, "loss": 0.9046, "step": 4243 }, { "epoch": 2.2325092056812204, "grad_norm": 1.9889158010482788, "learning_rate": 3.517814593333445e-06, "loss": 0.8588, "step": 4244 }, { "epoch": 2.233035244608101, "grad_norm": 2.1952552795410156, "learning_rate": 3.517180058424187e-06, "loss": 0.8656, "step": 4245 }, { "epoch": 2.2335612835349816, "grad_norm": 2.0684115886688232, "learning_rate": 3.5165454449769697e-06, "loss": 0.8688, "step": 4246 }, { "epoch": 2.2340873224618623, "grad_norm": 1.9831233024597168, "learning_rate": 3.515910753040791e-06, "loss": 0.8253, "step": 4247 }, { "epoch": 2.234613361388743, "grad_norm": 2.3787081241607666, "learning_rate": 3.515275982664657e-06, "loss": 0.9229, "step": 4248 }, { "epoch": 2.2351394003156235, "grad_norm": 2.2091002464294434, "learning_rate": 3.51464113389758e-06, "loss": 0.8911, "step": 4249 }, { "epoch": 2.2356654392425037, "grad_norm": 2.0995006561279297, "learning_rate": 3.5140062067885762e-06, "loss": 0.8078, "step": 4250 }, { "epoch": 2.2361914781693844, "grad_norm": 2.121108293533325, "learning_rate": 3.5133712013866694e-06, "loss": 0.8477, "step": 4251 }, { "epoch": 2.236717517096265, "grad_norm": 2.0082993507385254, "learning_rate": 3.5127361177408903e-06, "loss": 0.8045, "step": 4252 }, { "epoch": 2.2372435560231456, "grad_norm": 2.1125218868255615, "learning_rate": 3.512100955900274e-06, "loss": 0.843, "step": 4253 }, { "epoch": 2.2377695949500263, "grad_norm": 2.154398202896118, "learning_rate": 3.5114657159138626e-06, "loss": 0.9035, "step": 4254 }, { "epoch": 2.238295633876907, "grad_norm": 2.012594699859619, "learning_rate": 3.5108303978307034e-06, "loss": 0.8774, "step": 4255 }, { "epoch": 2.2388216728037875, "grad_norm": 2.03456711769104, "learning_rate": 3.5101950016998515e-06, "loss": 0.8351, "step": 4256 }, { "epoch": 2.239347711730668, "grad_norm": 2.0587034225463867, "learning_rate": 3.509559527570365e-06, "loss": 0.8943, "step": 4257 }, { "epoch": 2.239873750657549, "grad_norm": 2.1251728534698486, "learning_rate": 3.5089239754913107e-06, "loss": 0.8217, "step": 4258 }, { "epoch": 2.2403997895844294, "grad_norm": 2.0433340072631836, "learning_rate": 3.50828834551176e-06, "loss": 0.8874, "step": 4259 }, { "epoch": 2.24092582851131, "grad_norm": 1.9678043127059937, "learning_rate": 3.5076526376807916e-06, "loss": 0.8453, "step": 4260 }, { "epoch": 2.2414518674381902, "grad_norm": 2.17108416557312, "learning_rate": 3.5070168520474895e-06, "loss": 0.893, "step": 4261 }, { "epoch": 2.241977906365071, "grad_norm": 2.1280593872070312, "learning_rate": 3.5063809886609424e-06, "loss": 0.8557, "step": 4262 }, { "epoch": 2.2425039452919515, "grad_norm": 2.067106246948242, "learning_rate": 3.5057450475702478e-06, "loss": 0.8292, "step": 4263 }, { "epoch": 2.243029984218832, "grad_norm": 2.162203311920166, "learning_rate": 3.5051090288245064e-06, "loss": 0.856, "step": 4264 }, { "epoch": 2.243556023145713, "grad_norm": 1.9915986061096191, "learning_rate": 3.5044729324728266e-06, "loss": 0.7898, "step": 4265 }, { "epoch": 2.2440820620725934, "grad_norm": 2.1245481967926025, "learning_rate": 3.5038367585643225e-06, "loss": 0.8498, "step": 4266 }, { "epoch": 2.244608100999474, "grad_norm": 2.1004154682159424, "learning_rate": 3.5032005071481147e-06, "loss": 0.8838, "step": 4267 }, { "epoch": 2.2451341399263547, "grad_norm": 2.017887830734253, "learning_rate": 3.5025641782733274e-06, "loss": 0.9011, "step": 4268 }, { "epoch": 2.2456601788532353, "grad_norm": 2.2040300369262695, "learning_rate": 3.5019277719890926e-06, "loss": 0.8892, "step": 4269 }, { "epoch": 2.2461862177801155, "grad_norm": 1.999802827835083, "learning_rate": 3.5012912883445497e-06, "loss": 0.873, "step": 4270 }, { "epoch": 2.246712256706996, "grad_norm": 2.0048611164093018, "learning_rate": 3.500654727388842e-06, "loss": 0.8634, "step": 4271 }, { "epoch": 2.247238295633877, "grad_norm": 2.111542224884033, "learning_rate": 3.5000180891711187e-06, "loss": 0.8586, "step": 4272 }, { "epoch": 2.2477643345607574, "grad_norm": 2.0371601581573486, "learning_rate": 3.4993813737405363e-06, "loss": 0.9076, "step": 4273 }, { "epoch": 2.248290373487638, "grad_norm": 2.0389769077301025, "learning_rate": 3.4987445811462562e-06, "loss": 0.9, "step": 4274 }, { "epoch": 2.2488164124145187, "grad_norm": 2.1538991928100586, "learning_rate": 3.4981077114374456e-06, "loss": 0.8637, "step": 4275 }, { "epoch": 2.2493424513413993, "grad_norm": 1.9246553182601929, "learning_rate": 3.4974707646632783e-06, "loss": 0.8846, "step": 4276 }, { "epoch": 2.24986849026828, "grad_norm": 2.1794588565826416, "learning_rate": 3.4968337408729345e-06, "loss": 0.8666, "step": 4277 }, { "epoch": 2.2503945291951606, "grad_norm": 2.088866949081421, "learning_rate": 3.4961966401156e-06, "loss": 0.8905, "step": 4278 }, { "epoch": 2.250920568122041, "grad_norm": 2.1613283157348633, "learning_rate": 3.4955594624404653e-06, "loss": 0.911, "step": 4279 }, { "epoch": 2.251446607048922, "grad_norm": 2.006049156188965, "learning_rate": 3.4949222078967275e-06, "loss": 0.8533, "step": 4280 }, { "epoch": 2.251972645975802, "grad_norm": 2.0219216346740723, "learning_rate": 3.4942848765335917e-06, "loss": 0.9122, "step": 4281 }, { "epoch": 2.2524986849026827, "grad_norm": 2.258366346359253, "learning_rate": 3.493647468400265e-06, "loss": 0.8683, "step": 4282 }, { "epoch": 2.2530247238295633, "grad_norm": 2.131554365158081, "learning_rate": 3.4930099835459646e-06, "loss": 0.9059, "step": 4283 }, { "epoch": 2.253550762756444, "grad_norm": 2.0112369060516357, "learning_rate": 3.4923724220199107e-06, "loss": 0.8644, "step": 4284 }, { "epoch": 2.2540768016833246, "grad_norm": 2.1222331523895264, "learning_rate": 3.4917347838713305e-06, "loss": 0.8387, "step": 4285 }, { "epoch": 2.254602840610205, "grad_norm": 2.0844948291778564, "learning_rate": 3.491097069149457e-06, "loss": 0.8793, "step": 4286 }, { "epoch": 2.255128879537086, "grad_norm": 2.0851430892944336, "learning_rate": 3.490459277903529e-06, "loss": 0.8978, "step": 4287 }, { "epoch": 2.2556549184639665, "grad_norm": 2.2222628593444824, "learning_rate": 3.489821410182791e-06, "loss": 0.8701, "step": 4288 }, { "epoch": 2.256180957390847, "grad_norm": 2.1356382369995117, "learning_rate": 3.4891834660364943e-06, "loss": 0.9172, "step": 4289 }, { "epoch": 2.2567069963177273, "grad_norm": 2.265181064605713, "learning_rate": 3.488545445513895e-06, "loss": 0.8495, "step": 4290 }, { "epoch": 2.257233035244608, "grad_norm": 1.9859869480133057, "learning_rate": 3.487907348664256e-06, "loss": 0.8307, "step": 4291 }, { "epoch": 2.2577590741714886, "grad_norm": 2.245936632156372, "learning_rate": 3.487269175536846e-06, "loss": 0.9128, "step": 4292 }, { "epoch": 2.258285113098369, "grad_norm": 2.280730962753296, "learning_rate": 3.4866309261809385e-06, "loss": 0.8705, "step": 4293 }, { "epoch": 2.25881115202525, "grad_norm": 2.0300488471984863, "learning_rate": 3.4859926006458135e-06, "loss": 0.8601, "step": 4294 }, { "epoch": 2.2593371909521305, "grad_norm": 1.9570188522338867, "learning_rate": 3.485354198980757e-06, "loss": 0.8389, "step": 4295 }, { "epoch": 2.259863229879011, "grad_norm": 2.137993812561035, "learning_rate": 3.4847157212350623e-06, "loss": 0.8279, "step": 4296 }, { "epoch": 2.2603892688058917, "grad_norm": 2.162769079208374, "learning_rate": 3.4840771674580272e-06, "loss": 0.8713, "step": 4297 }, { "epoch": 2.2609153077327724, "grad_norm": 2.270376682281494, "learning_rate": 3.4834385376989533e-06, "loss": 0.8174, "step": 4298 }, { "epoch": 2.2614413466596526, "grad_norm": 2.18310809135437, "learning_rate": 3.4827998320071517e-06, "loss": 0.8727, "step": 4299 }, { "epoch": 2.2619673855865337, "grad_norm": 2.229448080062866, "learning_rate": 3.482161050431937e-06, "loss": 0.8977, "step": 4300 }, { "epoch": 2.262493424513414, "grad_norm": 2.0726687908172607, "learning_rate": 3.4815221930226306e-06, "loss": 0.8857, "step": 4301 }, { "epoch": 2.2630194634402945, "grad_norm": 2.1234445571899414, "learning_rate": 3.4808832598285604e-06, "loss": 0.8684, "step": 4302 }, { "epoch": 2.263545502367175, "grad_norm": 2.1012253761291504, "learning_rate": 3.4802442508990594e-06, "loss": 0.842, "step": 4303 }, { "epoch": 2.2640715412940557, "grad_norm": 1.9456729888916016, "learning_rate": 3.4796051662834655e-06, "loss": 0.8478, "step": 4304 }, { "epoch": 2.2645975802209364, "grad_norm": 2.091815710067749, "learning_rate": 3.4789660060311236e-06, "loss": 0.8601, "step": 4305 }, { "epoch": 2.265123619147817, "grad_norm": 1.9662847518920898, "learning_rate": 3.4783267701913847e-06, "loss": 0.8426, "step": 4306 }, { "epoch": 2.2656496580746976, "grad_norm": 2.113997220993042, "learning_rate": 3.4776874588136046e-06, "loss": 0.8924, "step": 4307 }, { "epoch": 2.2661756970015783, "grad_norm": 1.9844086170196533, "learning_rate": 3.4770480719471454e-06, "loss": 0.8485, "step": 4308 }, { "epoch": 2.266701735928459, "grad_norm": 2.032829999923706, "learning_rate": 3.4764086096413755e-06, "loss": 0.8551, "step": 4309 }, { "epoch": 2.267227774855339, "grad_norm": 2.2652835845947266, "learning_rate": 3.4757690719456695e-06, "loss": 0.8961, "step": 4310 }, { "epoch": 2.2677538137822197, "grad_norm": 2.078303337097168, "learning_rate": 3.475129458909405e-06, "loss": 0.8693, "step": 4311 }, { "epoch": 2.2682798527091004, "grad_norm": 2.039674758911133, "learning_rate": 3.474489770581968e-06, "loss": 0.8742, "step": 4312 }, { "epoch": 2.268805891635981, "grad_norm": 2.0421817302703857, "learning_rate": 3.4738500070127516e-06, "loss": 0.846, "step": 4313 }, { "epoch": 2.2693319305628616, "grad_norm": 2.069044351577759, "learning_rate": 3.473210168251151e-06, "loss": 0.9097, "step": 4314 }, { "epoch": 2.2698579694897423, "grad_norm": 2.2852792739868164, "learning_rate": 3.47257025434657e-06, "loss": 0.8906, "step": 4315 }, { "epoch": 2.270384008416623, "grad_norm": 2.149278163909912, "learning_rate": 3.471930265348417e-06, "loss": 0.9044, "step": 4316 }, { "epoch": 2.2709100473435035, "grad_norm": 2.2667994499206543, "learning_rate": 3.4712902013061067e-06, "loss": 0.8948, "step": 4317 }, { "epoch": 2.271436086270384, "grad_norm": 2.2345080375671387, "learning_rate": 3.4706500622690587e-06, "loss": 0.8889, "step": 4318 }, { "epoch": 2.2719621251972644, "grad_norm": 2.0733249187469482, "learning_rate": 3.4700098482866994e-06, "loss": 0.8801, "step": 4319 }, { "epoch": 2.272488164124145, "grad_norm": 2.1901328563690186, "learning_rate": 3.469369559408461e-06, "loss": 0.9064, "step": 4320 }, { "epoch": 2.2730142030510256, "grad_norm": 2.0984387397766113, "learning_rate": 3.468729195683781e-06, "loss": 0.8593, "step": 4321 }, { "epoch": 2.2735402419779063, "grad_norm": 2.0511350631713867, "learning_rate": 3.468088757162103e-06, "loss": 0.8095, "step": 4322 }, { "epoch": 2.274066280904787, "grad_norm": 2.12673020362854, "learning_rate": 3.467448243892876e-06, "loss": 0.842, "step": 4323 }, { "epoch": 2.2745923198316675, "grad_norm": 2.0762014389038086, "learning_rate": 3.466807655925555e-06, "loss": 0.8738, "step": 4324 }, { "epoch": 2.275118358758548, "grad_norm": 2.242586851119995, "learning_rate": 3.4661669933096e-06, "loss": 0.9088, "step": 4325 }, { "epoch": 2.275644397685429, "grad_norm": 2.1537024974823, "learning_rate": 3.465526256094478e-06, "loss": 0.8866, "step": 4326 }, { "epoch": 2.2761704366123094, "grad_norm": 2.121854066848755, "learning_rate": 3.464885444329662e-06, "loss": 0.8992, "step": 4327 }, { "epoch": 2.27669647553919, "grad_norm": 2.0761935710906982, "learning_rate": 3.4642445580646293e-06, "loss": 0.8587, "step": 4328 }, { "epoch": 2.2772225144660707, "grad_norm": 2.070582628250122, "learning_rate": 3.4636035973488636e-06, "loss": 0.8811, "step": 4329 }, { "epoch": 2.277748553392951, "grad_norm": 2.040220260620117, "learning_rate": 3.462962562231854e-06, "loss": 0.8279, "step": 4330 }, { "epoch": 2.2782745923198315, "grad_norm": 2.0260262489318848, "learning_rate": 3.4623214527630977e-06, "loss": 0.8581, "step": 4331 }, { "epoch": 2.278800631246712, "grad_norm": 2.0644946098327637, "learning_rate": 3.4616802689920935e-06, "loss": 0.8562, "step": 4332 }, { "epoch": 2.279326670173593, "grad_norm": 2.1459715366363525, "learning_rate": 3.461039010968349e-06, "loss": 0.8501, "step": 4333 }, { "epoch": 2.2798527091004734, "grad_norm": 2.0744025707244873, "learning_rate": 3.4603976787413772e-06, "loss": 0.9056, "step": 4334 }, { "epoch": 2.280378748027354, "grad_norm": 2.158862590789795, "learning_rate": 3.459756272360696e-06, "loss": 0.8587, "step": 4335 }, { "epoch": 2.2809047869542347, "grad_norm": 2.1113994121551514, "learning_rate": 3.4591147918758283e-06, "loss": 0.83, "step": 4336 }, { "epoch": 2.2814308258811153, "grad_norm": 2.066450595855713, "learning_rate": 3.4584732373363045e-06, "loss": 0.8597, "step": 4337 }, { "epoch": 2.281956864807996, "grad_norm": 2.01365327835083, "learning_rate": 3.4578316087916608e-06, "loss": 0.8499, "step": 4338 }, { "epoch": 2.282482903734876, "grad_norm": 2.143472671508789, "learning_rate": 3.4571899062914366e-06, "loss": 0.8319, "step": 4339 }, { "epoch": 2.283008942661757, "grad_norm": 2.162968873977661, "learning_rate": 3.4565481298851798e-06, "loss": 0.9011, "step": 4340 }, { "epoch": 2.2835349815886374, "grad_norm": 2.0251502990722656, "learning_rate": 3.455906279622443e-06, "loss": 0.8347, "step": 4341 }, { "epoch": 2.284061020515518, "grad_norm": 2.066408395767212, "learning_rate": 3.4552643555527845e-06, "loss": 0.8271, "step": 4342 }, { "epoch": 2.2845870594423987, "grad_norm": 2.213531970977783, "learning_rate": 3.4546223577257667e-06, "loss": 0.9173, "step": 4343 }, { "epoch": 2.2851130983692793, "grad_norm": 2.2408361434936523, "learning_rate": 3.45398028619096e-06, "loss": 0.8767, "step": 4344 }, { "epoch": 2.28563913729616, "grad_norm": 2.097219705581665, "learning_rate": 3.4533381409979407e-06, "loss": 0.8766, "step": 4345 }, { "epoch": 2.2861651762230406, "grad_norm": 2.1763646602630615, "learning_rate": 3.452695922196289e-06, "loss": 0.8917, "step": 4346 }, { "epoch": 2.2866912151499212, "grad_norm": 2.232351064682007, "learning_rate": 3.452053629835592e-06, "loss": 0.8474, "step": 4347 }, { "epoch": 2.287217254076802, "grad_norm": 2.093715190887451, "learning_rate": 3.4514112639654405e-06, "loss": 0.856, "step": 4348 }, { "epoch": 2.2877432930036825, "grad_norm": 2.174497604370117, "learning_rate": 3.4507688246354344e-06, "loss": 0.8883, "step": 4349 }, { "epoch": 2.2882693319305627, "grad_norm": 2.0189294815063477, "learning_rate": 3.4501263118951762e-06, "loss": 0.8157, "step": 4350 }, { "epoch": 2.2887953708574433, "grad_norm": 2.049172878265381, "learning_rate": 3.4494837257942754e-06, "loss": 0.8211, "step": 4351 }, { "epoch": 2.289321409784324, "grad_norm": 2.1016745567321777, "learning_rate": 3.448841066382348e-06, "loss": 0.907, "step": 4352 }, { "epoch": 2.2898474487112046, "grad_norm": 2.1344809532165527, "learning_rate": 3.4481983337090137e-06, "loss": 0.8721, "step": 4353 }, { "epoch": 2.2903734876380852, "grad_norm": 2.0472795963287354, "learning_rate": 3.4475555278238987e-06, "loss": 0.8947, "step": 4354 }, { "epoch": 2.290899526564966, "grad_norm": 2.07641863822937, "learning_rate": 3.4469126487766353e-06, "loss": 0.8442, "step": 4355 }, { "epoch": 2.2914255654918465, "grad_norm": 2.1421542167663574, "learning_rate": 3.446269696616861e-06, "loss": 0.8065, "step": 4356 }, { "epoch": 2.291951604418727, "grad_norm": 2.029745101928711, "learning_rate": 3.44562667139422e-06, "loss": 0.8495, "step": 4357 }, { "epoch": 2.2924776433456078, "grad_norm": 2.2116329669952393, "learning_rate": 3.4449835731583597e-06, "loss": 0.9188, "step": 4358 }, { "epoch": 2.293003682272488, "grad_norm": 2.302241563796997, "learning_rate": 3.4443404019589354e-06, "loss": 0.9028, "step": 4359 }, { "epoch": 2.2935297211993686, "grad_norm": 2.095918893814087, "learning_rate": 3.443697157845608e-06, "loss": 0.9446, "step": 4360 }, { "epoch": 2.2940557601262492, "grad_norm": 2.2808518409729004, "learning_rate": 3.443053840868042e-06, "loss": 0.8409, "step": 4361 }, { "epoch": 2.29458179905313, "grad_norm": 2.1732470989227295, "learning_rate": 3.442410451075909e-06, "loss": 0.8907, "step": 4362 }, { "epoch": 2.2951078379800105, "grad_norm": 2.1883485317230225, "learning_rate": 3.4417669885188876e-06, "loss": 0.8519, "step": 4363 }, { "epoch": 2.295633876906891, "grad_norm": 2.183263063430786, "learning_rate": 3.441123453246659e-06, "loss": 0.913, "step": 4364 }, { "epoch": 2.2961599158337718, "grad_norm": 2.0992751121520996, "learning_rate": 3.440479845308912e-06, "loss": 0.85, "step": 4365 }, { "epoch": 2.2966859547606524, "grad_norm": 2.0351672172546387, "learning_rate": 3.4398361647553403e-06, "loss": 0.8109, "step": 4366 }, { "epoch": 2.297211993687533, "grad_norm": 2.1656272411346436, "learning_rate": 3.439192411635644e-06, "loss": 0.9196, "step": 4367 }, { "epoch": 2.2977380326144132, "grad_norm": 2.10567569732666, "learning_rate": 3.438548585999527e-06, "loss": 0.88, "step": 4368 }, { "epoch": 2.298264071541294, "grad_norm": 1.969288945198059, "learning_rate": 3.4379046878967003e-06, "loss": 0.852, "step": 4369 }, { "epoch": 2.2987901104681745, "grad_norm": 2.2979350090026855, "learning_rate": 3.4372607173768825e-06, "loss": 0.8498, "step": 4370 }, { "epoch": 2.299316149395055, "grad_norm": 2.0931737422943115, "learning_rate": 3.4366166744897925e-06, "loss": 0.9, "step": 4371 }, { "epoch": 2.2998421883219358, "grad_norm": 2.1737639904022217, "learning_rate": 3.435972559285159e-06, "loss": 0.9037, "step": 4372 }, { "epoch": 2.3003682272488164, "grad_norm": 2.103475570678711, "learning_rate": 3.435328371812715e-06, "loss": 0.8586, "step": 4373 }, { "epoch": 2.300894266175697, "grad_norm": 2.0488972663879395, "learning_rate": 3.4346841121221996e-06, "loss": 0.8097, "step": 4374 }, { "epoch": 2.3014203051025777, "grad_norm": 2.2580041885375977, "learning_rate": 3.4340397802633565e-06, "loss": 0.8367, "step": 4375 }, { "epoch": 2.3019463440294583, "grad_norm": 2.215437412261963, "learning_rate": 3.4333953762859352e-06, "loss": 0.8574, "step": 4376 }, { "epoch": 2.302472382956339, "grad_norm": 2.009531021118164, "learning_rate": 3.4327509002396923e-06, "loss": 0.8548, "step": 4377 }, { "epoch": 2.3029984218832196, "grad_norm": 2.016782283782959, "learning_rate": 3.432106352174388e-06, "loss": 0.8126, "step": 4378 }, { "epoch": 2.3035244608100998, "grad_norm": 2.3308098316192627, "learning_rate": 3.431461732139788e-06, "loss": 0.8575, "step": 4379 }, { "epoch": 2.3040504997369804, "grad_norm": 2.0387165546417236, "learning_rate": 3.4308170401856656e-06, "loss": 0.8751, "step": 4380 }, { "epoch": 2.304576538663861, "grad_norm": 1.974931001663208, "learning_rate": 3.4301722763617983e-06, "loss": 0.842, "step": 4381 }, { "epoch": 2.3051025775907417, "grad_norm": 2.030056953430176, "learning_rate": 3.429527440717968e-06, "loss": 0.8691, "step": 4382 }, { "epoch": 2.3056286165176223, "grad_norm": 2.18574595451355, "learning_rate": 3.4288825333039654e-06, "loss": 0.813, "step": 4383 }, { "epoch": 2.306154655444503, "grad_norm": 2.281344413757324, "learning_rate": 3.428237554169583e-06, "loss": 0.8641, "step": 4384 }, { "epoch": 2.3066806943713836, "grad_norm": 2.3100666999816895, "learning_rate": 3.427592503364621e-06, "loss": 0.8977, "step": 4385 }, { "epoch": 2.307206733298264, "grad_norm": 2.0826337337493896, "learning_rate": 3.4269473809388853e-06, "loss": 0.8241, "step": 4386 }, { "epoch": 2.307732772225145, "grad_norm": 2.151582717895508, "learning_rate": 3.426302186942186e-06, "loss": 0.8527, "step": 4387 }, { "epoch": 2.308258811152025, "grad_norm": 2.3485841751098633, "learning_rate": 3.4256569214243406e-06, "loss": 0.8857, "step": 4388 }, { "epoch": 2.3087848500789057, "grad_norm": 2.19370436668396, "learning_rate": 3.42501158443517e-06, "loss": 0.898, "step": 4389 }, { "epoch": 2.3093108890057863, "grad_norm": 2.1616735458374023, "learning_rate": 3.424366176024501e-06, "loss": 0.8509, "step": 4390 }, { "epoch": 2.309836927932667, "grad_norm": 2.1148264408111572, "learning_rate": 3.423720696242168e-06, "loss": 0.8895, "step": 4391 }, { "epoch": 2.3103629668595476, "grad_norm": 2.074716567993164, "learning_rate": 3.4230751451380094e-06, "loss": 0.8452, "step": 4392 }, { "epoch": 2.310889005786428, "grad_norm": 2.2700624465942383, "learning_rate": 3.4224295227618674e-06, "loss": 0.9152, "step": 4393 }, { "epoch": 2.311415044713309, "grad_norm": 2.120661497116089, "learning_rate": 3.4217838291635935e-06, "loss": 0.8612, "step": 4394 }, { "epoch": 2.3119410836401895, "grad_norm": 2.0461814403533936, "learning_rate": 3.421138064393042e-06, "loss": 0.8334, "step": 4395 }, { "epoch": 2.31246712256707, "grad_norm": 2.1634323596954346, "learning_rate": 3.420492228500073e-06, "loss": 0.8999, "step": 4396 }, { "epoch": 2.3129931614939507, "grad_norm": 2.0591623783111572, "learning_rate": 3.4198463215345524e-06, "loss": 0.9078, "step": 4397 }, { "epoch": 2.3135192004208314, "grad_norm": 2.106152296066284, "learning_rate": 3.419200343546351e-06, "loss": 0.8679, "step": 4398 }, { "epoch": 2.3140452393477116, "grad_norm": 2.2311220169067383, "learning_rate": 3.4185542945853474e-06, "loss": 0.874, "step": 4399 }, { "epoch": 2.314571278274592, "grad_norm": 2.1792402267456055, "learning_rate": 3.4179081747014226e-06, "loss": 0.8731, "step": 4400 }, { "epoch": 2.315097317201473, "grad_norm": 2.08194899559021, "learning_rate": 3.4172619839444656e-06, "loss": 0.8727, "step": 4401 }, { "epoch": 2.3156233561283535, "grad_norm": 2.0430803298950195, "learning_rate": 3.4166157223643683e-06, "loss": 0.9036, "step": 4402 }, { "epoch": 2.316149395055234, "grad_norm": 2.065828800201416, "learning_rate": 3.41596939001103e-06, "loss": 0.8262, "step": 4403 }, { "epoch": 2.3166754339821147, "grad_norm": 2.294121265411377, "learning_rate": 3.4153229869343564e-06, "loss": 0.9023, "step": 4404 }, { "epoch": 2.3172014729089954, "grad_norm": 2.033903121948242, "learning_rate": 3.4146765131842546e-06, "loss": 0.9002, "step": 4405 }, { "epoch": 2.317727511835876, "grad_norm": 2.248424768447876, "learning_rate": 3.414029968810643e-06, "loss": 0.8531, "step": 4406 }, { "epoch": 2.3182535507627566, "grad_norm": 2.1871066093444824, "learning_rate": 3.4133833538634386e-06, "loss": 0.8674, "step": 4407 }, { "epoch": 2.318779589689637, "grad_norm": 2.1818161010742188, "learning_rate": 3.412736668392571e-06, "loss": 0.8508, "step": 4408 }, { "epoch": 2.3193056286165175, "grad_norm": 2.1951756477355957, "learning_rate": 3.4120899124479688e-06, "loss": 0.8627, "step": 4409 }, { "epoch": 2.319831667543398, "grad_norm": 2.2190048694610596, "learning_rate": 3.411443086079571e-06, "loss": 0.8425, "step": 4410 }, { "epoch": 2.3203577064702787, "grad_norm": 2.2467074394226074, "learning_rate": 3.410796189337319e-06, "loss": 0.8757, "step": 4411 }, { "epoch": 2.3208837453971594, "grad_norm": 2.2147939205169678, "learning_rate": 3.410149222271161e-06, "loss": 0.9106, "step": 4412 }, { "epoch": 2.32140978432404, "grad_norm": 2.14060115814209, "learning_rate": 3.4095021849310506e-06, "loss": 0.8564, "step": 4413 }, { "epoch": 2.3219358232509206, "grad_norm": 2.088477373123169, "learning_rate": 3.408855077366946e-06, "loss": 0.8829, "step": 4414 }, { "epoch": 2.3224618621778013, "grad_norm": 2.172668933868408, "learning_rate": 3.4082078996288104e-06, "loss": 0.9138, "step": 4415 }, { "epoch": 2.322987901104682, "grad_norm": 2.1732444763183594, "learning_rate": 3.407560651766615e-06, "loss": 0.8751, "step": 4416 }, { "epoch": 2.323513940031562, "grad_norm": 2.232740879058838, "learning_rate": 3.4069133338303345e-06, "loss": 0.9061, "step": 4417 }, { "epoch": 2.324039978958443, "grad_norm": 2.3688206672668457, "learning_rate": 3.406265945869949e-06, "loss": 0.8873, "step": 4418 }, { "epoch": 2.3245660178853234, "grad_norm": 2.0248425006866455, "learning_rate": 3.4056184879354436e-06, "loss": 0.9003, "step": 4419 }, { "epoch": 2.325092056812204, "grad_norm": 2.0382091999053955, "learning_rate": 3.40497096007681e-06, "loss": 0.8948, "step": 4420 }, { "epoch": 2.3256180957390846, "grad_norm": 2.44964861869812, "learning_rate": 3.404323362344045e-06, "loss": 0.912, "step": 4421 }, { "epoch": 2.3261441346659653, "grad_norm": 2.0528979301452637, "learning_rate": 3.4036756947871497e-06, "loss": 0.8896, "step": 4422 }, { "epoch": 2.326670173592846, "grad_norm": 2.3105461597442627, "learning_rate": 3.4030279574561327e-06, "loss": 0.9121, "step": 4423 }, { "epoch": 2.3271962125197265, "grad_norm": 2.2737538814544678, "learning_rate": 3.402380150401006e-06, "loss": 0.8725, "step": 4424 }, { "epoch": 2.327722251446607, "grad_norm": 2.1879889965057373, "learning_rate": 3.4017322736717882e-06, "loss": 0.8669, "step": 4425 }, { "epoch": 2.328248290373488, "grad_norm": 2.164395570755005, "learning_rate": 3.4010843273185025e-06, "loss": 0.8804, "step": 4426 }, { "epoch": 2.3287743293003684, "grad_norm": 2.154611349105835, "learning_rate": 3.4004363113911774e-06, "loss": 0.9098, "step": 4427 }, { "epoch": 2.3293003682272486, "grad_norm": 2.2746245861053467, "learning_rate": 3.3997882259398483e-06, "loss": 0.9067, "step": 4428 }, { "epoch": 2.3298264071541293, "grad_norm": 2.181692361831665, "learning_rate": 3.3991400710145533e-06, "loss": 0.878, "step": 4429 }, { "epoch": 2.33035244608101, "grad_norm": 2.2383594512939453, "learning_rate": 3.3984918466653385e-06, "loss": 0.9176, "step": 4430 }, { "epoch": 2.3308784850078905, "grad_norm": 2.0400376319885254, "learning_rate": 3.3978435529422538e-06, "loss": 0.9183, "step": 4431 }, { "epoch": 2.331404523934771, "grad_norm": 2.1192517280578613, "learning_rate": 3.3971951898953554e-06, "loss": 0.874, "step": 4432 }, { "epoch": 2.331930562861652, "grad_norm": 2.089022159576416, "learning_rate": 3.3965467575747035e-06, "loss": 0.8664, "step": 4433 }, { "epoch": 2.3324566017885324, "grad_norm": 2.234487533569336, "learning_rate": 3.3958982560303644e-06, "loss": 0.8213, "step": 4434 }, { "epoch": 2.332982640715413, "grad_norm": 2.232316493988037, "learning_rate": 3.395249685312412e-06, "loss": 0.8906, "step": 4435 }, { "epoch": 2.3335086796422937, "grad_norm": 2.358140468597412, "learning_rate": 3.3946010454709206e-06, "loss": 0.9118, "step": 4436 }, { "epoch": 2.334034718569174, "grad_norm": 2.2153425216674805, "learning_rate": 3.393952336555974e-06, "loss": 0.8985, "step": 4437 }, { "epoch": 2.3345607574960545, "grad_norm": 2.1997649669647217, "learning_rate": 3.39330355861766e-06, "loss": 0.8256, "step": 4438 }, { "epoch": 2.335086796422935, "grad_norm": 2.1090774536132812, "learning_rate": 3.392654711706072e-06, "loss": 0.8342, "step": 4439 }, { "epoch": 2.335612835349816, "grad_norm": 2.042059898376465, "learning_rate": 3.3920057958713075e-06, "loss": 0.8444, "step": 4440 }, { "epoch": 2.3361388742766964, "grad_norm": 2.209395408630371, "learning_rate": 3.3913568111634706e-06, "loss": 0.8569, "step": 4441 }, { "epoch": 2.336664913203577, "grad_norm": 2.2019219398498535, "learning_rate": 3.3907077576326706e-06, "loss": 0.8672, "step": 4442 }, { "epoch": 2.3371909521304577, "grad_norm": 1.9959986209869385, "learning_rate": 3.3900586353290214e-06, "loss": 0.8489, "step": 4443 }, { "epoch": 2.3377169910573383, "grad_norm": 2.1159682273864746, "learning_rate": 3.3894094443026437e-06, "loss": 0.8685, "step": 4444 }, { "epoch": 2.338243029984219, "grad_norm": 2.0306484699249268, "learning_rate": 3.388760184603661e-06, "loss": 0.8538, "step": 4445 }, { "epoch": 2.3387690689110996, "grad_norm": 2.2062647342681885, "learning_rate": 3.3881108562822053e-06, "loss": 0.8228, "step": 4446 }, { "epoch": 2.3392951078379802, "grad_norm": 2.122352123260498, "learning_rate": 3.38746145938841e-06, "loss": 0.8346, "step": 4447 }, { "epoch": 2.3398211467648604, "grad_norm": 2.304992914199829, "learning_rate": 3.386811993972418e-06, "loss": 0.8572, "step": 4448 }, { "epoch": 2.340347185691741, "grad_norm": 2.2666351795196533, "learning_rate": 3.386162460084375e-06, "loss": 0.9018, "step": 4449 }, { "epoch": 2.3408732246186217, "grad_norm": 2.1409356594085693, "learning_rate": 3.3855128577744325e-06, "loss": 0.8562, "step": 4450 }, { "epoch": 2.3413992635455023, "grad_norm": 2.1681880950927734, "learning_rate": 3.3848631870927463e-06, "loss": 0.8778, "step": 4451 }, { "epoch": 2.341925302472383, "grad_norm": 2.211961507797241, "learning_rate": 3.3842134480894782e-06, "loss": 0.8438, "step": 4452 }, { "epoch": 2.3424513413992636, "grad_norm": 2.0960910320281982, "learning_rate": 3.3835636408147978e-06, "loss": 0.8869, "step": 4453 }, { "epoch": 2.342977380326144, "grad_norm": 2.1412312984466553, "learning_rate": 3.3829137653188754e-06, "loss": 0.8699, "step": 4454 }, { "epoch": 2.343503419253025, "grad_norm": 2.3611090183258057, "learning_rate": 3.38226382165189e-06, "loss": 0.9005, "step": 4455 }, { "epoch": 2.3440294581799055, "grad_norm": 2.6216659545898438, "learning_rate": 3.3816138098640244e-06, "loss": 0.9284, "step": 4456 }, { "epoch": 2.3445554971067857, "grad_norm": 2.3666319847106934, "learning_rate": 3.380963730005467e-06, "loss": 0.8018, "step": 4457 }, { "epoch": 2.3450815360336663, "grad_norm": 2.145164966583252, "learning_rate": 3.380313582126411e-06, "loss": 0.8844, "step": 4458 }, { "epoch": 2.345607574960547, "grad_norm": 2.1737453937530518, "learning_rate": 3.3796633662770555e-06, "loss": 0.8334, "step": 4459 }, { "epoch": 2.3461336138874276, "grad_norm": 2.2430126667022705, "learning_rate": 3.3790130825076052e-06, "loss": 0.846, "step": 4460 }, { "epoch": 2.346659652814308, "grad_norm": 2.117422342300415, "learning_rate": 3.3783627308682687e-06, "loss": 0.8927, "step": 4461 }, { "epoch": 2.347185691741189, "grad_norm": 2.0387628078460693, "learning_rate": 3.3777123114092615e-06, "loss": 0.8837, "step": 4462 }, { "epoch": 2.3477117306680695, "grad_norm": 2.1376733779907227, "learning_rate": 3.3770618241808024e-06, "loss": 0.9102, "step": 4463 }, { "epoch": 2.34823776959495, "grad_norm": 2.0879666805267334, "learning_rate": 3.3764112692331163e-06, "loss": 0.8523, "step": 4464 }, { "epoch": 2.3487638085218308, "grad_norm": 2.1453332901000977, "learning_rate": 3.3757606466164343e-06, "loss": 0.8577, "step": 4465 }, { "epoch": 2.3492898474487114, "grad_norm": 2.0041863918304443, "learning_rate": 3.375109956380992e-06, "loss": 0.8848, "step": 4466 }, { "epoch": 2.349815886375592, "grad_norm": 2.226841926574707, "learning_rate": 3.3744591985770302e-06, "loss": 0.9057, "step": 4467 }, { "epoch": 2.350341925302472, "grad_norm": 2.207286834716797, "learning_rate": 3.373808373254794e-06, "loss": 0.8617, "step": 4468 }, { "epoch": 2.350867964229353, "grad_norm": 2.113358736038208, "learning_rate": 3.373157480464536e-06, "loss": 0.8774, "step": 4469 }, { "epoch": 2.3513940031562335, "grad_norm": 2.1797287464141846, "learning_rate": 3.3725065202565106e-06, "loss": 0.8665, "step": 4470 }, { "epoch": 2.351920042083114, "grad_norm": 2.2577271461486816, "learning_rate": 3.371855492680981e-06, "loss": 0.842, "step": 4471 }, { "epoch": 2.3524460810099947, "grad_norm": 2.064201831817627, "learning_rate": 3.371204397788213e-06, "loss": 0.86, "step": 4472 }, { "epoch": 2.3529721199368754, "grad_norm": 2.1605470180511475, "learning_rate": 3.3705532356284792e-06, "loss": 0.8843, "step": 4473 }, { "epoch": 2.353498158863756, "grad_norm": 2.073932647705078, "learning_rate": 3.369902006252057e-06, "loss": 0.835, "step": 4474 }, { "epoch": 2.3540241977906367, "grad_norm": 2.0544135570526123, "learning_rate": 3.3692507097092286e-06, "loss": 0.8693, "step": 4475 }, { "epoch": 2.3545502367175173, "grad_norm": 2.28818678855896, "learning_rate": 3.368599346050281e-06, "loss": 0.927, "step": 4476 }, { "epoch": 2.3550762756443975, "grad_norm": 2.1185436248779297, "learning_rate": 3.367947915325507e-06, "loss": 0.8669, "step": 4477 }, { "epoch": 2.355602314571278, "grad_norm": 2.255263566970825, "learning_rate": 3.3672964175852053e-06, "loss": 0.8773, "step": 4478 }, { "epoch": 2.3561283534981587, "grad_norm": 2.085987091064453, "learning_rate": 3.366644852879678e-06, "loss": 0.862, "step": 4479 }, { "epoch": 2.3566543924250394, "grad_norm": 2.046147584915161, "learning_rate": 3.365993221259235e-06, "loss": 0.8823, "step": 4480 }, { "epoch": 2.35718043135192, "grad_norm": 2.081066846847534, "learning_rate": 3.365341522774188e-06, "loss": 0.849, "step": 4481 }, { "epoch": 2.3577064702788006, "grad_norm": 2.2208640575408936, "learning_rate": 3.364689757474856e-06, "loss": 0.8513, "step": 4482 }, { "epoch": 2.3582325092056813, "grad_norm": 2.182530403137207, "learning_rate": 3.364037925411563e-06, "loss": 0.8849, "step": 4483 }, { "epoch": 2.358758548132562, "grad_norm": 2.1046009063720703, "learning_rate": 3.3633860266346375e-06, "loss": 0.8702, "step": 4484 }, { "epoch": 2.3592845870594426, "grad_norm": 2.1490073204040527, "learning_rate": 3.3627340611944146e-06, "loss": 0.8818, "step": 4485 }, { "epoch": 2.3598106259863227, "grad_norm": 2.211876630783081, "learning_rate": 3.3620820291412325e-06, "loss": 0.8895, "step": 4486 }, { "epoch": 2.360336664913204, "grad_norm": 2.0112290382385254, "learning_rate": 3.361429930525436e-06, "loss": 0.8614, "step": 4487 }, { "epoch": 2.360862703840084, "grad_norm": 2.1341376304626465, "learning_rate": 3.360777765397375e-06, "loss": 0.878, "step": 4488 }, { "epoch": 2.3613887427669646, "grad_norm": 2.4266395568847656, "learning_rate": 3.360125533807403e-06, "loss": 0.8609, "step": 4489 }, { "epoch": 2.3619147816938453, "grad_norm": 2.177643299102783, "learning_rate": 3.35947323580588e-06, "loss": 0.904, "step": 4490 }, { "epoch": 2.362440820620726, "grad_norm": 2.1336145401000977, "learning_rate": 3.3588208714431714e-06, "loss": 0.8282, "step": 4491 }, { "epoch": 2.3629668595476065, "grad_norm": 2.180192232131958, "learning_rate": 3.358168440769648e-06, "loss": 0.8985, "step": 4492 }, { "epoch": 2.363492898474487, "grad_norm": 2.260572910308838, "learning_rate": 3.3575159438356835e-06, "loss": 0.8993, "step": 4493 }, { "epoch": 2.364018937401368, "grad_norm": 2.202387571334839, "learning_rate": 3.3568633806916584e-06, "loss": 0.8409, "step": 4494 }, { "epoch": 2.3645449763282484, "grad_norm": 2.154290199279785, "learning_rate": 3.3562107513879583e-06, "loss": 0.9045, "step": 4495 }, { "epoch": 2.365071015255129, "grad_norm": 2.1294844150543213, "learning_rate": 3.3555580559749736e-06, "loss": 0.8671, "step": 4496 }, { "epoch": 2.3655970541820093, "grad_norm": 2.152261734008789, "learning_rate": 3.3549052945030992e-06, "loss": 0.8511, "step": 4497 }, { "epoch": 2.36612309310889, "grad_norm": 2.0878777503967285, "learning_rate": 3.354252467022737e-06, "loss": 0.8379, "step": 4498 }, { "epoch": 2.3666491320357705, "grad_norm": 2.3093135356903076, "learning_rate": 3.353599573584293e-06, "loss": 0.8321, "step": 4499 }, { "epoch": 2.367175170962651, "grad_norm": 2.1622679233551025, "learning_rate": 3.3529466142381768e-06, "loss": 0.8959, "step": 4500 }, { "epoch": 2.367701209889532, "grad_norm": 2.1361749172210693, "learning_rate": 3.352293589034804e-06, "loss": 0.8339, "step": 4501 }, { "epoch": 2.3682272488164124, "grad_norm": 2.2278809547424316, "learning_rate": 3.3516404980245974e-06, "loss": 0.913, "step": 4502 }, { "epoch": 2.368753287743293, "grad_norm": 2.078442335128784, "learning_rate": 3.350987341257982e-06, "loss": 0.8533, "step": 4503 }, { "epoch": 2.3692793266701737, "grad_norm": 2.1720800399780273, "learning_rate": 3.3503341187853887e-06, "loss": 0.8535, "step": 4504 }, { "epoch": 2.3698053655970543, "grad_norm": 2.231580972671509, "learning_rate": 3.3496808306572552e-06, "loss": 0.8647, "step": 4505 }, { "epoch": 2.3703314045239345, "grad_norm": 2.081709861755371, "learning_rate": 3.3490274769240216e-06, "loss": 0.8112, "step": 4506 }, { "epoch": 2.370857443450815, "grad_norm": 2.1474642753601074, "learning_rate": 3.348374057636134e-06, "loss": 0.8819, "step": 4507 }, { "epoch": 2.371383482377696, "grad_norm": 2.08227801322937, "learning_rate": 3.3477205728440444e-06, "loss": 0.8637, "step": 4508 }, { "epoch": 2.3719095213045764, "grad_norm": 2.1230804920196533, "learning_rate": 3.3470670225982095e-06, "loss": 0.8106, "step": 4509 }, { "epoch": 2.372435560231457, "grad_norm": 2.1904079914093018, "learning_rate": 3.346413406949091e-06, "loss": 0.7999, "step": 4510 }, { "epoch": 2.3729615991583377, "grad_norm": 2.1369755268096924, "learning_rate": 3.3457597259471553e-06, "loss": 0.8866, "step": 4511 }, { "epoch": 2.3734876380852183, "grad_norm": 2.046910285949707, "learning_rate": 3.3451059796428736e-06, "loss": 0.8171, "step": 4512 }, { "epoch": 2.374013677012099, "grad_norm": 2.0299670696258545, "learning_rate": 3.3444521680867235e-06, "loss": 0.8002, "step": 4513 }, { "epoch": 2.3745397159389796, "grad_norm": 2.0571177005767822, "learning_rate": 3.3437982913291857e-06, "loss": 0.837, "step": 4514 }, { "epoch": 2.3750657548658602, "grad_norm": 2.149925470352173, "learning_rate": 3.343144349420747e-06, "loss": 0.9192, "step": 4515 }, { "epoch": 2.375591793792741, "grad_norm": 2.315999746322632, "learning_rate": 3.3424903424119004e-06, "loss": 0.9212, "step": 4516 }, { "epoch": 2.376117832719621, "grad_norm": 2.247075080871582, "learning_rate": 3.3418362703531424e-06, "loss": 0.8975, "step": 4517 }, { "epoch": 2.3766438716465017, "grad_norm": 2.0656683444976807, "learning_rate": 3.3411821332949747e-06, "loss": 0.8564, "step": 4518 }, { "epoch": 2.3771699105733823, "grad_norm": 2.0697195529937744, "learning_rate": 3.3405279312879034e-06, "loss": 0.8627, "step": 4519 }, { "epoch": 2.377695949500263, "grad_norm": 2.202467679977417, "learning_rate": 3.3398736643824415e-06, "loss": 0.881, "step": 4520 }, { "epoch": 2.3782219884271436, "grad_norm": 4.204126358032227, "learning_rate": 3.3392193326291045e-06, "loss": 0.8623, "step": 4521 }, { "epoch": 2.3787480273540242, "grad_norm": 2.225133180618286, "learning_rate": 3.3385649360784157e-06, "loss": 0.8549, "step": 4522 }, { "epoch": 2.379274066280905, "grad_norm": 2.041497230529785, "learning_rate": 3.337910474780902e-06, "loss": 0.8679, "step": 4523 }, { "epoch": 2.3798001052077855, "grad_norm": 2.0643181800842285, "learning_rate": 3.337255948787095e-06, "loss": 0.922, "step": 4524 }, { "epoch": 2.380326144134666, "grad_norm": 2.0735743045806885, "learning_rate": 3.336601358147531e-06, "loss": 0.8521, "step": 4525 }, { "epoch": 2.3808521830615463, "grad_norm": 2.2833971977233887, "learning_rate": 3.3359467029127516e-06, "loss": 0.9174, "step": 4526 }, { "epoch": 2.381378221988427, "grad_norm": 2.1576030254364014, "learning_rate": 3.335291983133305e-06, "loss": 0.9119, "step": 4527 }, { "epoch": 2.3819042609153076, "grad_norm": 2.127601146697998, "learning_rate": 3.334637198859743e-06, "loss": 0.8954, "step": 4528 }, { "epoch": 2.3824302998421882, "grad_norm": 2.0619876384735107, "learning_rate": 3.333982350142622e-06, "loss": 0.8922, "step": 4529 }, { "epoch": 2.382956338769069, "grad_norm": 2.240267515182495, "learning_rate": 3.333327437032503e-06, "loss": 0.895, "step": 4530 }, { "epoch": 2.3834823776959495, "grad_norm": 2.309187650680542, "learning_rate": 3.3326724595799546e-06, "loss": 0.886, "step": 4531 }, { "epoch": 2.38400841662283, "grad_norm": 2.063678741455078, "learning_rate": 3.3320174178355468e-06, "loss": 0.8953, "step": 4532 }, { "epoch": 2.3845344555497108, "grad_norm": 2.086050271987915, "learning_rate": 3.331362311849857e-06, "loss": 0.8586, "step": 4533 }, { "epoch": 2.3850604944765914, "grad_norm": 2.1293606758117676, "learning_rate": 3.3307071416734675e-06, "loss": 0.8446, "step": 4534 }, { "epoch": 2.385586533403472, "grad_norm": 2.3072867393493652, "learning_rate": 3.330051907356965e-06, "loss": 0.9111, "step": 4535 }, { "epoch": 2.3861125723303527, "grad_norm": 2.17637300491333, "learning_rate": 3.32939660895094e-06, "loss": 0.873, "step": 4536 }, { "epoch": 2.386638611257233, "grad_norm": 2.3507750034332275, "learning_rate": 3.3287412465059886e-06, "loss": 0.8886, "step": 4537 }, { "epoch": 2.3871646501841135, "grad_norm": 3.0440115928649902, "learning_rate": 3.328085820072715e-06, "loss": 0.8406, "step": 4538 }, { "epoch": 2.387690689110994, "grad_norm": 2.295194625854492, "learning_rate": 3.3274303297017222e-06, "loss": 0.8537, "step": 4539 }, { "epoch": 2.3882167280378748, "grad_norm": 2.06528377532959, "learning_rate": 3.3267747754436243e-06, "loss": 0.8742, "step": 4540 }, { "epoch": 2.3887427669647554, "grad_norm": 2.3181843757629395, "learning_rate": 3.326119157349037e-06, "loss": 0.8495, "step": 4541 }, { "epoch": 2.389268805891636, "grad_norm": 2.0612070560455322, "learning_rate": 3.3254634754685807e-06, "loss": 0.9161, "step": 4542 }, { "epoch": 2.3897948448185167, "grad_norm": 2.0258734226226807, "learning_rate": 3.324807729852881e-06, "loss": 0.8252, "step": 4543 }, { "epoch": 2.3903208837453973, "grad_norm": 2.0916411876678467, "learning_rate": 3.3241519205525708e-06, "loss": 0.9009, "step": 4544 }, { "epoch": 2.390846922672278, "grad_norm": 2.2885091304779053, "learning_rate": 3.323496047618286e-06, "loss": 0.9323, "step": 4545 }, { "epoch": 2.391372961599158, "grad_norm": 2.122157573699951, "learning_rate": 3.3228401111006657e-06, "loss": 0.8486, "step": 4546 }, { "epoch": 2.3918990005260388, "grad_norm": 2.141904830932617, "learning_rate": 3.3221841110503567e-06, "loss": 0.8687, "step": 4547 }, { "epoch": 2.3924250394529194, "grad_norm": 2.142369508743286, "learning_rate": 3.3215280475180107e-06, "loss": 0.8935, "step": 4548 }, { "epoch": 2.3929510783798, "grad_norm": 2.4064347743988037, "learning_rate": 3.3208719205542815e-06, "loss": 0.8671, "step": 4549 }, { "epoch": 2.3934771173066807, "grad_norm": 2.0880796909332275, "learning_rate": 3.3202157302098313e-06, "loss": 0.8329, "step": 4550 }, { "epoch": 2.3940031562335613, "grad_norm": 2.1675336360931396, "learning_rate": 3.319559476535324e-06, "loss": 0.872, "step": 4551 }, { "epoch": 2.394529195160442, "grad_norm": 2.2019832134246826, "learning_rate": 3.3189031595814308e-06, "loss": 0.9305, "step": 4552 }, { "epoch": 2.3950552340873226, "grad_norm": 2.174999713897705, "learning_rate": 3.318246779398828e-06, "loss": 0.8739, "step": 4553 }, { "epoch": 2.395581273014203, "grad_norm": 2.1488232612609863, "learning_rate": 3.3175903360381935e-06, "loss": 0.8177, "step": 4554 }, { "epoch": 2.3961073119410834, "grad_norm": 2.2104954719543457, "learning_rate": 3.316933829550213e-06, "loss": 0.822, "step": 4555 }, { "epoch": 2.3966333508679645, "grad_norm": 2.0240249633789062, "learning_rate": 3.3162772599855767e-06, "loss": 0.8395, "step": 4556 }, { "epoch": 2.3971593897948447, "grad_norm": 2.176832675933838, "learning_rate": 3.3156206273949787e-06, "loss": 0.9108, "step": 4557 }, { "epoch": 2.3976854287217253, "grad_norm": 2.0302371978759766, "learning_rate": 3.314963931829119e-06, "loss": 0.8473, "step": 4558 }, { "epoch": 2.398211467648606, "grad_norm": 2.183255434036255, "learning_rate": 3.3143071733387033e-06, "loss": 0.8757, "step": 4559 }, { "epoch": 2.3987375065754866, "grad_norm": 2.2155511379241943, "learning_rate": 3.313650351974439e-06, "loss": 0.8694, "step": 4560 }, { "epoch": 2.399263545502367, "grad_norm": 2.197721004486084, "learning_rate": 3.31299346778704e-06, "loss": 0.8951, "step": 4561 }, { "epoch": 2.399789584429248, "grad_norm": 2.1071231365203857, "learning_rate": 3.3123365208272263e-06, "loss": 0.8345, "step": 4562 }, { "epoch": 2.4003156233561285, "grad_norm": 2.101775884628296, "learning_rate": 3.311679511145722e-06, "loss": 0.895, "step": 4563 }, { "epoch": 2.400841662283009, "grad_norm": 2.1427109241485596, "learning_rate": 3.3110224387932553e-06, "loss": 0.893, "step": 4564 }, { "epoch": 2.4013677012098897, "grad_norm": 2.2999653816223145, "learning_rate": 3.3103653038205595e-06, "loss": 0.8911, "step": 4565 }, { "epoch": 2.40189374013677, "grad_norm": 2.0130341053009033, "learning_rate": 3.309708106278374e-06, "loss": 0.8379, "step": 4566 }, { "epoch": 2.4024197790636506, "grad_norm": 2.2768304347991943, "learning_rate": 3.30905084621744e-06, "loss": 0.9008, "step": 4567 }, { "epoch": 2.402945817990531, "grad_norm": 2.239983081817627, "learning_rate": 3.308393523688508e-06, "loss": 0.8437, "step": 4568 }, { "epoch": 2.403471856917412, "grad_norm": 2.081622362136841, "learning_rate": 3.307736138742328e-06, "loss": 0.8928, "step": 4569 }, { "epoch": 2.4039978958442925, "grad_norm": 2.067183017730713, "learning_rate": 3.3070786914296604e-06, "loss": 0.8373, "step": 4570 }, { "epoch": 2.404523934771173, "grad_norm": 2.119030475616455, "learning_rate": 3.306421181801266e-06, "loss": 0.8428, "step": 4571 }, { "epoch": 2.4050499736980537, "grad_norm": 2.0294859409332275, "learning_rate": 3.305763609907913e-06, "loss": 0.8289, "step": 4572 }, { "epoch": 2.4055760126249344, "grad_norm": 2.230595350265503, "learning_rate": 3.3051059758003717e-06, "loss": 0.867, "step": 4573 }, { "epoch": 2.406102051551815, "grad_norm": 2.1302363872528076, "learning_rate": 3.3044482795294214e-06, "loss": 0.8532, "step": 4574 }, { "epoch": 2.406628090478695, "grad_norm": 2.0594003200531006, "learning_rate": 3.303790521145842e-06, "loss": 0.8931, "step": 4575 }, { "epoch": 2.407154129405576, "grad_norm": 2.2251973152160645, "learning_rate": 3.303132700700421e-06, "loss": 0.8409, "step": 4576 }, { "epoch": 2.4076801683324565, "grad_norm": 2.2774264812469482, "learning_rate": 3.3024748182439494e-06, "loss": 0.8617, "step": 4577 }, { "epoch": 2.408206207259337, "grad_norm": 2.0260679721832275, "learning_rate": 3.301816873827224e-06, "loss": 0.8688, "step": 4578 }, { "epoch": 2.4087322461862177, "grad_norm": 2.4303534030914307, "learning_rate": 3.3011588675010444e-06, "loss": 0.8853, "step": 4579 }, { "epoch": 2.4092582851130984, "grad_norm": 2.1305887699127197, "learning_rate": 3.3005007993162156e-06, "loss": 0.8651, "step": 4580 }, { "epoch": 2.409784324039979, "grad_norm": 2.184699296951294, "learning_rate": 3.2998426693235507e-06, "loss": 0.8651, "step": 4581 }, { "epoch": 2.4103103629668596, "grad_norm": 2.0676052570343018, "learning_rate": 3.2991844775738623e-06, "loss": 0.8883, "step": 4582 }, { "epoch": 2.4108364018937403, "grad_norm": 2.138193130493164, "learning_rate": 3.2985262241179715e-06, "loss": 0.8697, "step": 4583 }, { "epoch": 2.411362440820621, "grad_norm": 2.2230002880096436, "learning_rate": 3.2978679090067036e-06, "loss": 0.8161, "step": 4584 }, { "epoch": 2.4118884797475015, "grad_norm": 2.0622825622558594, "learning_rate": 3.297209532290887e-06, "loss": 0.883, "step": 4585 }, { "epoch": 2.4124145186743817, "grad_norm": 2.0601229667663574, "learning_rate": 3.2965510940213556e-06, "loss": 0.8573, "step": 4586 }, { "epoch": 2.4129405576012624, "grad_norm": 2.019260883331299, "learning_rate": 3.2958925942489494e-06, "loss": 0.8431, "step": 4587 }, { "epoch": 2.413466596528143, "grad_norm": 2.1321792602539062, "learning_rate": 3.2952340330245118e-06, "loss": 0.8498, "step": 4588 }, { "epoch": 2.4139926354550236, "grad_norm": 2.01550555229187, "learning_rate": 3.2945754103988912e-06, "loss": 0.8861, "step": 4589 }, { "epoch": 2.4145186743819043, "grad_norm": 1.9152880907058716, "learning_rate": 3.2939167264229416e-06, "loss": 0.8473, "step": 4590 }, { "epoch": 2.415044713308785, "grad_norm": 2.0729894638061523, "learning_rate": 3.293257981147519e-06, "loss": 0.895, "step": 4591 }, { "epoch": 2.4155707522356655, "grad_norm": 2.1401994228363037, "learning_rate": 3.2925991746234885e-06, "loss": 0.8299, "step": 4592 }, { "epoch": 2.416096791162546, "grad_norm": 2.2747154235839844, "learning_rate": 3.2919403069017154e-06, "loss": 0.8362, "step": 4593 }, { "epoch": 2.416622830089427, "grad_norm": 2.1981728076934814, "learning_rate": 3.2912813780330726e-06, "loss": 0.9001, "step": 4594 }, { "epoch": 2.417148869016307, "grad_norm": 2.0812315940856934, "learning_rate": 3.290622388068438e-06, "loss": 0.8487, "step": 4595 }, { "epoch": 2.4176749079431876, "grad_norm": 2.067906618118286, "learning_rate": 3.289963337058692e-06, "loss": 0.8549, "step": 4596 }, { "epoch": 2.4182009468700683, "grad_norm": 2.0333797931671143, "learning_rate": 3.2893042250547213e-06, "loss": 0.8914, "step": 4597 }, { "epoch": 2.418726985796949, "grad_norm": 2.139587163925171, "learning_rate": 3.2886450521074166e-06, "loss": 0.8603, "step": 4598 }, { "epoch": 2.4192530247238295, "grad_norm": 2.1783828735351562, "learning_rate": 3.287985818267674e-06, "loss": 0.8755, "step": 4599 }, { "epoch": 2.41977906365071, "grad_norm": 2.033174753189087, "learning_rate": 3.2873265235863933e-06, "loss": 0.849, "step": 4600 }, { "epoch": 2.420305102577591, "grad_norm": 2.148143768310547, "learning_rate": 3.2866671681144796e-06, "loss": 0.8558, "step": 4601 }, { "epoch": 2.4208311415044714, "grad_norm": 2.2645652294158936, "learning_rate": 3.286007751902844e-06, "loss": 0.8687, "step": 4602 }, { "epoch": 2.421357180431352, "grad_norm": 2.120403289794922, "learning_rate": 3.2853482750024e-06, "loss": 0.8128, "step": 4603 }, { "epoch": 2.4218832193582327, "grad_norm": 2.0981850624084473, "learning_rate": 3.2846887374640667e-06, "loss": 0.8925, "step": 4604 }, { "epoch": 2.4224092582851133, "grad_norm": 2.1903598308563232, "learning_rate": 3.284029139338768e-06, "loss": 0.8422, "step": 4605 }, { "epoch": 2.4229352972119935, "grad_norm": 2.165849208831787, "learning_rate": 3.283369480677433e-06, "loss": 0.8681, "step": 4606 }, { "epoch": 2.423461336138874, "grad_norm": 2.386314630508423, "learning_rate": 3.2827097615309944e-06, "loss": 0.9176, "step": 4607 }, { "epoch": 2.423987375065755, "grad_norm": 2.18986177444458, "learning_rate": 3.2820499819503896e-06, "loss": 0.8499, "step": 4608 }, { "epoch": 2.4245134139926354, "grad_norm": 2.010610818862915, "learning_rate": 3.2813901419865623e-06, "loss": 0.8322, "step": 4609 }, { "epoch": 2.425039452919516, "grad_norm": 2.074674129486084, "learning_rate": 3.2807302416904594e-06, "loss": 0.8615, "step": 4610 }, { "epoch": 2.4255654918463967, "grad_norm": 2.3297247886657715, "learning_rate": 3.280070281113032e-06, "loss": 0.9498, "step": 4611 }, { "epoch": 2.4260915307732773, "grad_norm": 2.200709104537964, "learning_rate": 3.279410260305237e-06, "loss": 0.843, "step": 4612 }, { "epoch": 2.426617569700158, "grad_norm": 2.074005126953125, "learning_rate": 3.2787501793180364e-06, "loss": 0.8088, "step": 4613 }, { "epoch": 2.4271436086270386, "grad_norm": 2.2149908542633057, "learning_rate": 3.2780900382023944e-06, "loss": 0.9314, "step": 4614 }, { "epoch": 2.427669647553919, "grad_norm": 2.083340644836426, "learning_rate": 3.277429837009283e-06, "loss": 0.8263, "step": 4615 }, { "epoch": 2.4281956864807994, "grad_norm": 2.216353416442871, "learning_rate": 3.276769575789676e-06, "loss": 0.8484, "step": 4616 }, { "epoch": 2.42872172540768, "grad_norm": 2.2071335315704346, "learning_rate": 3.276109254594555e-06, "loss": 0.8451, "step": 4617 }, { "epoch": 2.4292477643345607, "grad_norm": 2.1416165828704834, "learning_rate": 3.275448873474902e-06, "loss": 0.8529, "step": 4618 }, { "epoch": 2.4297738032614413, "grad_norm": 2.1632590293884277, "learning_rate": 3.2747884324817076e-06, "loss": 0.8719, "step": 4619 }, { "epoch": 2.430299842188322, "grad_norm": 2.1609015464782715, "learning_rate": 3.2741279316659647e-06, "loss": 0.8537, "step": 4620 }, { "epoch": 2.4308258811152026, "grad_norm": 2.2424097061157227, "learning_rate": 3.2734673710786724e-06, "loss": 0.847, "step": 4621 }, { "epoch": 2.4313519200420832, "grad_norm": 2.118997573852539, "learning_rate": 3.272806750770833e-06, "loss": 0.8706, "step": 4622 }, { "epoch": 2.431877958968964, "grad_norm": 2.0217978954315186, "learning_rate": 3.2721460707934526e-06, "loss": 0.8332, "step": 4623 }, { "epoch": 2.432403997895844, "grad_norm": 2.348512649536133, "learning_rate": 3.271485331197546e-06, "loss": 0.9027, "step": 4624 }, { "epoch": 2.432930036822725, "grad_norm": 2.173116683959961, "learning_rate": 3.270824532034128e-06, "loss": 0.8809, "step": 4625 }, { "epoch": 2.4334560757496053, "grad_norm": 1.977131962776184, "learning_rate": 3.2701636733542197e-06, "loss": 0.8667, "step": 4626 }, { "epoch": 2.433982114676486, "grad_norm": 2.119617223739624, "learning_rate": 3.269502755208849e-06, "loss": 0.8972, "step": 4627 }, { "epoch": 2.4345081536033666, "grad_norm": 2.1332132816314697, "learning_rate": 3.2688417776490443e-06, "loss": 0.8434, "step": 4628 }, { "epoch": 2.435034192530247, "grad_norm": 2.255924701690674, "learning_rate": 3.268180740725841e-06, "loss": 0.8786, "step": 4629 }, { "epoch": 2.435560231457128, "grad_norm": 2.2864255905151367, "learning_rate": 3.267519644490279e-06, "loss": 0.8776, "step": 4630 }, { "epoch": 2.4360862703840085, "grad_norm": 2.114379405975342, "learning_rate": 3.266858488993403e-06, "loss": 0.848, "step": 4631 }, { "epoch": 2.436612309310889, "grad_norm": 2.0397727489471436, "learning_rate": 3.26619727428626e-06, "loss": 0.7787, "step": 4632 }, { "epoch": 2.4371383482377698, "grad_norm": 2.3769612312316895, "learning_rate": 3.2655360004199065e-06, "loss": 0.8811, "step": 4633 }, { "epoch": 2.4376643871646504, "grad_norm": 2.168011426925659, "learning_rate": 3.2648746674453974e-06, "loss": 0.9032, "step": 4634 }, { "epoch": 2.4381904260915306, "grad_norm": 2.3379416465759277, "learning_rate": 3.264213275413797e-06, "loss": 0.8811, "step": 4635 }, { "epoch": 2.438716465018411, "grad_norm": 2.1434385776519775, "learning_rate": 3.2635518243761717e-06, "loss": 0.8654, "step": 4636 }, { "epoch": 2.439242503945292, "grad_norm": 2.137521743774414, "learning_rate": 3.2628903143835924e-06, "loss": 0.8879, "step": 4637 }, { "epoch": 2.4397685428721725, "grad_norm": 2.1275484561920166, "learning_rate": 3.2622287454871365e-06, "loss": 0.8758, "step": 4638 }, { "epoch": 2.440294581799053, "grad_norm": 2.071444034576416, "learning_rate": 3.2615671177378837e-06, "loss": 0.8593, "step": 4639 }, { "epoch": 2.4408206207259338, "grad_norm": 2.1014246940612793, "learning_rate": 3.260905431186921e-06, "loss": 0.8256, "step": 4640 }, { "epoch": 2.4413466596528144, "grad_norm": 2.0950260162353516, "learning_rate": 3.260243685885336e-06, "loss": 0.8718, "step": 4641 }, { "epoch": 2.441872698579695, "grad_norm": 2.2020888328552246, "learning_rate": 3.259581881884224e-06, "loss": 0.8693, "step": 4642 }, { "epoch": 2.4423987375065757, "grad_norm": 2.1953625679016113, "learning_rate": 3.2589200192346837e-06, "loss": 0.8846, "step": 4643 }, { "epoch": 2.442924776433456, "grad_norm": 2.1466047763824463, "learning_rate": 3.258258097987819e-06, "loss": 0.8473, "step": 4644 }, { "epoch": 2.4434508153603365, "grad_norm": 2.232877731323242, "learning_rate": 3.2575961181947367e-06, "loss": 0.8755, "step": 4645 }, { "epoch": 2.443976854287217, "grad_norm": 2.170015573501587, "learning_rate": 3.2569340799065507e-06, "loss": 0.8356, "step": 4646 }, { "epoch": 2.4445028932140977, "grad_norm": 2.173382520675659, "learning_rate": 3.2562719831743767e-06, "loss": 0.8834, "step": 4647 }, { "epoch": 2.4450289321409784, "grad_norm": 1.9480879306793213, "learning_rate": 3.2556098280493366e-06, "loss": 0.8184, "step": 4648 }, { "epoch": 2.445554971067859, "grad_norm": 2.201627731323242, "learning_rate": 3.2549476145825563e-06, "loss": 0.8549, "step": 4649 }, { "epoch": 2.4460810099947397, "grad_norm": 2.091203212738037, "learning_rate": 3.2542853428251663e-06, "loss": 0.8155, "step": 4650 }, { "epoch": 2.4466070489216203, "grad_norm": 2.3306474685668945, "learning_rate": 3.2536230128283018e-06, "loss": 0.8868, "step": 4651 }, { "epoch": 2.447133087848501, "grad_norm": 2.3060033321380615, "learning_rate": 3.252960624643102e-06, "loss": 0.8647, "step": 4652 }, { "epoch": 2.4476591267753816, "grad_norm": 2.2593679428100586, "learning_rate": 3.2522981783207114e-06, "loss": 0.8631, "step": 4653 }, { "epoch": 2.448185165702262, "grad_norm": 2.033881902694702, "learning_rate": 3.2516356739122774e-06, "loss": 0.8375, "step": 4654 }, { "epoch": 2.4487112046291424, "grad_norm": 2.023622751235962, "learning_rate": 3.2509731114689537e-06, "loss": 0.8509, "step": 4655 }, { "epoch": 2.449237243556023, "grad_norm": 2.0493929386138916, "learning_rate": 3.2503104910418977e-06, "loss": 0.8242, "step": 4656 }, { "epoch": 2.4497632824829036, "grad_norm": 2.0123960971832275, "learning_rate": 3.2496478126822706e-06, "loss": 0.8514, "step": 4657 }, { "epoch": 2.4502893214097843, "grad_norm": 2.1661665439605713, "learning_rate": 3.24898507644124e-06, "loss": 0.9293, "step": 4658 }, { "epoch": 2.450815360336665, "grad_norm": 2.19235897064209, "learning_rate": 3.2483222823699756e-06, "loss": 0.8962, "step": 4659 }, { "epoch": 2.4513413992635456, "grad_norm": 2.167860269546509, "learning_rate": 3.247659430519654e-06, "loss": 0.8674, "step": 4660 }, { "epoch": 2.451867438190426, "grad_norm": 2.027833938598633, "learning_rate": 3.2469965209414535e-06, "loss": 0.8465, "step": 4661 }, { "epoch": 2.452393477117307, "grad_norm": 2.055025339126587, "learning_rate": 3.246333553686558e-06, "loss": 0.8377, "step": 4662 }, { "epoch": 2.4529195160441875, "grad_norm": 2.2224934101104736, "learning_rate": 3.2456705288061585e-06, "loss": 0.8416, "step": 4663 }, { "epoch": 2.4534455549710676, "grad_norm": 2.1015732288360596, "learning_rate": 3.2450074463514462e-06, "loss": 0.8509, "step": 4664 }, { "epoch": 2.4539715938979483, "grad_norm": 2.03589129447937, "learning_rate": 3.244344306373619e-06, "loss": 0.8236, "step": 4665 }, { "epoch": 2.454497632824829, "grad_norm": 2.124605655670166, "learning_rate": 3.243681108923879e-06, "loss": 0.8433, "step": 4666 }, { "epoch": 2.4550236717517095, "grad_norm": 2.0556154251098633, "learning_rate": 3.2430178540534337e-06, "loss": 0.8227, "step": 4667 }, { "epoch": 2.45554971067859, "grad_norm": 2.1403369903564453, "learning_rate": 3.2423545418134915e-06, "loss": 0.8529, "step": 4668 }, { "epoch": 2.456075749605471, "grad_norm": 2.096108913421631, "learning_rate": 3.2416911722552703e-06, "loss": 0.8785, "step": 4669 }, { "epoch": 2.4566017885323514, "grad_norm": 2.232759475708008, "learning_rate": 3.2410277454299884e-06, "loss": 0.8448, "step": 4670 }, { "epoch": 2.457127827459232, "grad_norm": 2.1648342609405518, "learning_rate": 3.240364261388871e-06, "loss": 0.818, "step": 4671 }, { "epoch": 2.4576538663861127, "grad_norm": 2.020411729812622, "learning_rate": 3.2397007201831443e-06, "loss": 0.871, "step": 4672 }, { "epoch": 2.4581799053129934, "grad_norm": 2.1507019996643066, "learning_rate": 3.2390371218640436e-06, "loss": 0.8578, "step": 4673 }, { "epoch": 2.458705944239874, "grad_norm": 2.252338409423828, "learning_rate": 3.238373466482807e-06, "loss": 0.8658, "step": 4674 }, { "epoch": 2.459231983166754, "grad_norm": 2.101916551589966, "learning_rate": 3.237709754090674e-06, "loss": 0.8587, "step": 4675 }, { "epoch": 2.459758022093635, "grad_norm": 2.151738405227661, "learning_rate": 3.237045984738892e-06, "loss": 0.8389, "step": 4676 }, { "epoch": 2.4602840610205154, "grad_norm": 2.071928024291992, "learning_rate": 3.2363821584787103e-06, "loss": 0.8613, "step": 4677 }, { "epoch": 2.460810099947396, "grad_norm": 2.0637142658233643, "learning_rate": 3.2357182753613863e-06, "loss": 0.8327, "step": 4678 }, { "epoch": 2.4613361388742767, "grad_norm": 2.1120190620422363, "learning_rate": 3.2350543354381776e-06, "loss": 0.8445, "step": 4679 }, { "epoch": 2.4618621778011573, "grad_norm": 2.077342987060547, "learning_rate": 3.2343903387603482e-06, "loss": 0.8198, "step": 4680 }, { "epoch": 2.462388216728038, "grad_norm": 2.181290626525879, "learning_rate": 3.2337262853791667e-06, "loss": 0.8829, "step": 4681 }, { "epoch": 2.4629142556549186, "grad_norm": 2.1638312339782715, "learning_rate": 3.2330621753459055e-06, "loss": 0.8904, "step": 4682 }, { "epoch": 2.4634402945817993, "grad_norm": 2.121673107147217, "learning_rate": 3.2323980087118416e-06, "loss": 0.858, "step": 4683 }, { "epoch": 2.4639663335086794, "grad_norm": 2.2227680683135986, "learning_rate": 3.2317337855282554e-06, "loss": 0.8711, "step": 4684 }, { "epoch": 2.46449237243556, "grad_norm": 2.3010406494140625, "learning_rate": 3.231069505846434e-06, "loss": 0.8526, "step": 4685 }, { "epoch": 2.4650184113624407, "grad_norm": 2.150268793106079, "learning_rate": 3.2304051697176665e-06, "loss": 0.8493, "step": 4686 }, { "epoch": 2.4655444502893213, "grad_norm": 2.1595466136932373, "learning_rate": 3.229740777193247e-06, "loss": 0.8461, "step": 4687 }, { "epoch": 2.466070489216202, "grad_norm": 2.1780056953430176, "learning_rate": 3.2290763283244753e-06, "loss": 0.9083, "step": 4688 }, { "epoch": 2.4665965281430826, "grad_norm": 2.043163537979126, "learning_rate": 3.2284118231626537e-06, "loss": 0.8757, "step": 4689 }, { "epoch": 2.4671225670699632, "grad_norm": 2.8763527870178223, "learning_rate": 3.2277472617590887e-06, "loss": 0.8828, "step": 4690 }, { "epoch": 2.467648605996844, "grad_norm": 2.087245225906372, "learning_rate": 3.2270826441650936e-06, "loss": 0.7906, "step": 4691 }, { "epoch": 2.4681746449237245, "grad_norm": 1.9523296356201172, "learning_rate": 3.2264179704319846e-06, "loss": 0.8351, "step": 4692 }, { "epoch": 2.4687006838506047, "grad_norm": 2.155015707015991, "learning_rate": 3.225753240611081e-06, "loss": 0.858, "step": 4693 }, { "epoch": 2.469226722777486, "grad_norm": 2.154721736907959, "learning_rate": 3.2250884547537086e-06, "loss": 0.8563, "step": 4694 }, { "epoch": 2.469752761704366, "grad_norm": 2.2699337005615234, "learning_rate": 3.2244236129111954e-06, "loss": 0.8899, "step": 4695 }, { "epoch": 2.4702788006312466, "grad_norm": 2.2659080028533936, "learning_rate": 3.223758715134876e-06, "loss": 0.8724, "step": 4696 }, { "epoch": 2.4708048395581272, "grad_norm": 2.0598866939544678, "learning_rate": 3.2230937614760865e-06, "loss": 0.8966, "step": 4697 }, { "epoch": 2.471330878485008, "grad_norm": 2.097683906555176, "learning_rate": 3.22242875198617e-06, "loss": 0.8509, "step": 4698 }, { "epoch": 2.4718569174118885, "grad_norm": 1.967423677444458, "learning_rate": 3.221763686716474e-06, "loss": 0.8594, "step": 4699 }, { "epoch": 2.472382956338769, "grad_norm": 2.1128299236297607, "learning_rate": 3.2210985657183463e-06, "loss": 0.8649, "step": 4700 }, { "epoch": 2.47290899526565, "grad_norm": 2.5545272827148438, "learning_rate": 3.220433389043145e-06, "loss": 0.8504, "step": 4701 }, { "epoch": 2.4734350341925304, "grad_norm": 2.1258761882781982, "learning_rate": 3.219768156742228e-06, "loss": 0.9031, "step": 4702 }, { "epoch": 2.473961073119411, "grad_norm": 2.1612679958343506, "learning_rate": 3.2191028688669573e-06, "loss": 0.8405, "step": 4703 }, { "epoch": 2.4744871120462912, "grad_norm": 2.2054483890533447, "learning_rate": 3.2184375254687028e-06, "loss": 0.8421, "step": 4704 }, { "epoch": 2.475013150973172, "grad_norm": 1.9889014959335327, "learning_rate": 3.217772126598836e-06, "loss": 0.8457, "step": 4705 }, { "epoch": 2.4755391899000525, "grad_norm": 1.9637149572372437, "learning_rate": 3.217106672308734e-06, "loss": 0.8769, "step": 4706 }, { "epoch": 2.476065228826933, "grad_norm": 2.204890012741089, "learning_rate": 3.2164411626497766e-06, "loss": 0.8609, "step": 4707 }, { "epoch": 2.4765912677538138, "grad_norm": 2.0342633724212646, "learning_rate": 3.2157755976733485e-06, "loss": 0.8237, "step": 4708 }, { "epoch": 2.4771173066806944, "grad_norm": 2.276935338973999, "learning_rate": 3.215109977430839e-06, "loss": 0.8957, "step": 4709 }, { "epoch": 2.477643345607575, "grad_norm": 2.1884186267852783, "learning_rate": 3.214444301973644e-06, "loss": 0.8303, "step": 4710 }, { "epoch": 2.4781693845344557, "grad_norm": 2.0602285861968994, "learning_rate": 3.2137785713531576e-06, "loss": 0.8512, "step": 4711 }, { "epoch": 2.4786954234613363, "grad_norm": 2.112170696258545, "learning_rate": 3.2131127856207844e-06, "loss": 0.867, "step": 4712 }, { "epoch": 2.4792214623882165, "grad_norm": 1.9928957223892212, "learning_rate": 3.2124469448279306e-06, "loss": 0.8274, "step": 4713 }, { "epoch": 2.479747501315097, "grad_norm": 2.246804714202881, "learning_rate": 3.211781049026005e-06, "loss": 0.8328, "step": 4714 }, { "epoch": 2.4802735402419778, "grad_norm": 2.2183032035827637, "learning_rate": 3.211115098266423e-06, "loss": 0.9038, "step": 4715 }, { "epoch": 2.4807995791688584, "grad_norm": 2.2525839805603027, "learning_rate": 3.2104490926006044e-06, "loss": 0.8633, "step": 4716 }, { "epoch": 2.481325618095739, "grad_norm": 2.1048166751861572, "learning_rate": 3.2097830320799726e-06, "loss": 0.7899, "step": 4717 }, { "epoch": 2.4818516570226197, "grad_norm": 2.1858372688293457, "learning_rate": 3.2091169167559543e-06, "loss": 0.8376, "step": 4718 }, { "epoch": 2.4823776959495003, "grad_norm": 2.1468169689178467, "learning_rate": 3.208450746679982e-06, "loss": 0.8828, "step": 4719 }, { "epoch": 2.482903734876381, "grad_norm": 2.1390464305877686, "learning_rate": 3.2077845219034907e-06, "loss": 0.9038, "step": 4720 }, { "epoch": 2.4834297738032616, "grad_norm": 2.165156841278076, "learning_rate": 3.207118242477921e-06, "loss": 0.8698, "step": 4721 }, { "epoch": 2.483955812730142, "grad_norm": 2.1569983959198, "learning_rate": 3.206451908454718e-06, "loss": 0.8492, "step": 4722 }, { "epoch": 2.484481851657023, "grad_norm": 2.4152708053588867, "learning_rate": 3.2057855198853294e-06, "loss": 0.8912, "step": 4723 }, { "epoch": 2.485007890583903, "grad_norm": 1.9276199340820312, "learning_rate": 3.205119076821209e-06, "loss": 0.8649, "step": 4724 }, { "epoch": 2.4855339295107837, "grad_norm": 2.3139569759368896, "learning_rate": 3.2044525793138126e-06, "loss": 0.8863, "step": 4725 }, { "epoch": 2.4860599684376643, "grad_norm": 2.110682249069214, "learning_rate": 3.2037860274146023e-06, "loss": 0.8484, "step": 4726 }, { "epoch": 2.486586007364545, "grad_norm": 2.0970141887664795, "learning_rate": 3.203119421175043e-06, "loss": 0.8247, "step": 4727 }, { "epoch": 2.4871120462914256, "grad_norm": 2.100579023361206, "learning_rate": 3.202452760646605e-06, "loss": 0.832, "step": 4728 }, { "epoch": 2.487638085218306, "grad_norm": 2.1293439865112305, "learning_rate": 3.2017860458807615e-06, "loss": 0.8615, "step": 4729 }, { "epoch": 2.488164124145187, "grad_norm": 2.1302785873413086, "learning_rate": 3.201119276928991e-06, "loss": 0.8638, "step": 4730 }, { "epoch": 2.4886901630720675, "grad_norm": 2.0832507610321045, "learning_rate": 3.200452453842776e-06, "loss": 0.836, "step": 4731 }, { "epoch": 2.489216201998948, "grad_norm": 2.0681936740875244, "learning_rate": 3.1997855766736017e-06, "loss": 0.8434, "step": 4732 }, { "epoch": 2.4897422409258283, "grad_norm": 2.0312297344207764, "learning_rate": 3.1991186454729597e-06, "loss": 0.8711, "step": 4733 }, { "epoch": 2.490268279852709, "grad_norm": 2.170987844467163, "learning_rate": 3.1984516602923445e-06, "loss": 0.8306, "step": 4734 }, { "epoch": 2.4907943187795896, "grad_norm": 2.069817066192627, "learning_rate": 3.197784621183255e-06, "loss": 0.8912, "step": 4735 }, { "epoch": 2.49132035770647, "grad_norm": 2.0202367305755615, "learning_rate": 3.197117528197194e-06, "loss": 0.7976, "step": 4736 }, { "epoch": 2.491846396633351, "grad_norm": 2.1273558139801025, "learning_rate": 3.196450381385669e-06, "loss": 0.8744, "step": 4737 }, { "epoch": 2.4923724355602315, "grad_norm": 2.159618854522705, "learning_rate": 3.1957831808001915e-06, "loss": 0.9, "step": 4738 }, { "epoch": 2.492898474487112, "grad_norm": 2.275146961212158, "learning_rate": 3.1951159264922766e-06, "loss": 0.8891, "step": 4739 }, { "epoch": 2.4934245134139927, "grad_norm": 2.1256372928619385, "learning_rate": 3.1944486185134437e-06, "loss": 0.8544, "step": 4740 }, { "epoch": 2.4939505523408734, "grad_norm": 2.307889938354492, "learning_rate": 3.193781256915217e-06, "loss": 0.8682, "step": 4741 }, { "epoch": 2.494476591267754, "grad_norm": 2.2047386169433594, "learning_rate": 3.1931138417491258e-06, "loss": 0.8721, "step": 4742 }, { "epoch": 2.4950026301946346, "grad_norm": 2.0408718585968018, "learning_rate": 3.192446373066701e-06, "loss": 0.8298, "step": 4743 }, { "epoch": 2.495528669121515, "grad_norm": 1.9943463802337646, "learning_rate": 3.1917788509194782e-06, "loss": 0.8391, "step": 4744 }, { "epoch": 2.4960547080483955, "grad_norm": 2.1115291118621826, "learning_rate": 3.1911112753589986e-06, "loss": 0.8517, "step": 4745 }, { "epoch": 2.496580746975276, "grad_norm": 2.0354933738708496, "learning_rate": 3.190443646436806e-06, "loss": 0.8692, "step": 4746 }, { "epoch": 2.4971067859021567, "grad_norm": 2.2668871879577637, "learning_rate": 3.1897759642044496e-06, "loss": 0.8943, "step": 4747 }, { "epoch": 2.4976328248290374, "grad_norm": 2.1765334606170654, "learning_rate": 3.1891082287134816e-06, "loss": 0.8645, "step": 4748 }, { "epoch": 2.498158863755918, "grad_norm": 2.155735731124878, "learning_rate": 3.18844044001546e-06, "loss": 0.8564, "step": 4749 }, { "epoch": 2.4986849026827986, "grad_norm": 2.03271222114563, "learning_rate": 3.1877725981619456e-06, "loss": 0.8768, "step": 4750 }, { "epoch": 2.4992109416096793, "grad_norm": 2.106574773788452, "learning_rate": 3.1871047032045015e-06, "loss": 0.8483, "step": 4751 }, { "epoch": 2.49973698053656, "grad_norm": 2.196385383605957, "learning_rate": 3.1864367551946995e-06, "loss": 0.8197, "step": 4752 }, { "epoch": 2.50026301946344, "grad_norm": 2.0804617404937744, "learning_rate": 3.1857687541841102e-06, "loss": 0.8476, "step": 4753 }, { "epoch": 2.5007890583903207, "grad_norm": 2.2193429470062256, "learning_rate": 3.1851007002243128e-06, "loss": 0.8325, "step": 4754 }, { "epoch": 2.5013150973172014, "grad_norm": 2.096031427383423, "learning_rate": 3.1844325933668884e-06, "loss": 0.8504, "step": 4755 }, { "epoch": 2.501841136244082, "grad_norm": 2.153536319732666, "learning_rate": 3.183764433663422e-06, "loss": 0.8472, "step": 4756 }, { "epoch": 2.5023671751709626, "grad_norm": 2.156170606613159, "learning_rate": 3.1830962211655037e-06, "loss": 0.8572, "step": 4757 }, { "epoch": 2.5028932140978433, "grad_norm": 2.2555110454559326, "learning_rate": 3.1824279559247263e-06, "loss": 0.881, "step": 4758 }, { "epoch": 2.503419253024724, "grad_norm": 2.2591793537139893, "learning_rate": 3.1817596379926885e-06, "loss": 0.847, "step": 4759 }, { "epoch": 2.5039452919516045, "grad_norm": 2.1686532497406006, "learning_rate": 3.1810912674209924e-06, "loss": 0.9127, "step": 4760 }, { "epoch": 2.504471330878485, "grad_norm": 2.1407623291015625, "learning_rate": 3.1804228442612427e-06, "loss": 0.8717, "step": 4761 }, { "epoch": 2.5049973698053654, "grad_norm": 2.2114436626434326, "learning_rate": 3.17975436856505e-06, "loss": 0.8747, "step": 4762 }, { "epoch": 2.5055234087322464, "grad_norm": 2.2002127170562744, "learning_rate": 3.1790858403840287e-06, "loss": 0.843, "step": 4763 }, { "epoch": 2.5060494476591266, "grad_norm": 2.1939914226531982, "learning_rate": 3.178417259769795e-06, "loss": 0.863, "step": 4764 }, { "epoch": 2.5065754865860073, "grad_norm": 2.115520477294922, "learning_rate": 3.177748626773973e-06, "loss": 0.8953, "step": 4765 }, { "epoch": 2.507101525512888, "grad_norm": 2.138334035873413, "learning_rate": 3.177079941448188e-06, "loss": 0.8257, "step": 4766 }, { "epoch": 2.5076275644397685, "grad_norm": 1.985433578491211, "learning_rate": 3.1764112038440707e-06, "loss": 0.8911, "step": 4767 }, { "epoch": 2.508153603366649, "grad_norm": 2.5756351947784424, "learning_rate": 3.1757424140132546e-06, "loss": 0.8838, "step": 4768 }, { "epoch": 2.50867964229353, "grad_norm": 2.145082712173462, "learning_rate": 3.1750735720073782e-06, "loss": 0.885, "step": 4769 }, { "epoch": 2.5092056812204104, "grad_norm": 2.102243423461914, "learning_rate": 3.1744046778780847e-06, "loss": 0.8765, "step": 4770 }, { "epoch": 2.5097317201472906, "grad_norm": 2.0892181396484375, "learning_rate": 3.1737357316770177e-06, "loss": 0.8915, "step": 4771 }, { "epoch": 2.5102577590741717, "grad_norm": 2.1610379219055176, "learning_rate": 3.173066733455831e-06, "loss": 0.9154, "step": 4772 }, { "epoch": 2.510783798001052, "grad_norm": 2.23358416557312, "learning_rate": 3.1723976832661764e-06, "loss": 0.9132, "step": 4773 }, { "epoch": 2.5113098369279325, "grad_norm": 2.1177031993865967, "learning_rate": 3.171728581159714e-06, "loss": 0.8729, "step": 4774 }, { "epoch": 2.511835875854813, "grad_norm": 2.2375974655151367, "learning_rate": 3.171059427188104e-06, "loss": 0.8793, "step": 4775 }, { "epoch": 2.512361914781694, "grad_norm": 2.3255906105041504, "learning_rate": 3.170390221403014e-06, "loss": 0.9126, "step": 4776 }, { "epoch": 2.5128879537085744, "grad_norm": 2.220562696456909, "learning_rate": 3.1697209638561155e-06, "loss": 0.8384, "step": 4777 }, { "epoch": 2.513413992635455, "grad_norm": 2.3126378059387207, "learning_rate": 3.1690516545990806e-06, "loss": 0.872, "step": 4778 }, { "epoch": 2.5139400315623357, "grad_norm": 2.105604887008667, "learning_rate": 3.1683822936835895e-06, "loss": 0.7685, "step": 4779 }, { "epoch": 2.5144660704892163, "grad_norm": 2.1912217140197754, "learning_rate": 3.167712881161324e-06, "loss": 0.849, "step": 4780 }, { "epoch": 2.514992109416097, "grad_norm": 1.9324488639831543, "learning_rate": 3.16704341708397e-06, "loss": 0.8292, "step": 4781 }, { "epoch": 2.515518148342977, "grad_norm": 2.219200372695923, "learning_rate": 3.1663739015032176e-06, "loss": 0.8375, "step": 4782 }, { "epoch": 2.5160441872698582, "grad_norm": 2.1358325481414795, "learning_rate": 3.1657043344707617e-06, "loss": 0.8741, "step": 4783 }, { "epoch": 2.5165702261967384, "grad_norm": 2.2541422843933105, "learning_rate": 3.1650347160382998e-06, "loss": 0.8555, "step": 4784 }, { "epoch": 2.517096265123619, "grad_norm": 2.091334104537964, "learning_rate": 3.164365046257536e-06, "loss": 0.8526, "step": 4785 }, { "epoch": 2.5176223040504997, "grad_norm": 2.135281562805176, "learning_rate": 3.1636953251801743e-06, "loss": 0.8374, "step": 4786 }, { "epoch": 2.5181483429773803, "grad_norm": 2.2293076515197754, "learning_rate": 3.1630255528579258e-06, "loss": 0.8959, "step": 4787 }, { "epoch": 2.518674381904261, "grad_norm": 2.3065247535705566, "learning_rate": 3.1623557293425044e-06, "loss": 0.8809, "step": 4788 }, { "epoch": 2.5192004208311416, "grad_norm": 2.134904384613037, "learning_rate": 3.161685854685628e-06, "loss": 0.8623, "step": 4789 }, { "epoch": 2.5197264597580222, "grad_norm": 2.1642343997955322, "learning_rate": 3.1610159289390195e-06, "loss": 0.8804, "step": 4790 }, { "epoch": 2.5202524986849024, "grad_norm": 2.1415600776672363, "learning_rate": 3.160345952154404e-06, "loss": 0.8618, "step": 4791 }, { "epoch": 2.5207785376117835, "grad_norm": 2.1917197704315186, "learning_rate": 3.159675924383512e-06, "loss": 0.8549, "step": 4792 }, { "epoch": 2.5213045765386637, "grad_norm": 2.0698225498199463, "learning_rate": 3.159005845678076e-06, "loss": 0.8887, "step": 4793 }, { "epoch": 2.5218306154655443, "grad_norm": 2.2009358406066895, "learning_rate": 3.158335716089835e-06, "loss": 0.8877, "step": 4794 }, { "epoch": 2.522356654392425, "grad_norm": 2.048922061920166, "learning_rate": 3.1576655356705306e-06, "loss": 0.8503, "step": 4795 }, { "epoch": 2.5228826933193056, "grad_norm": 2.1223301887512207, "learning_rate": 3.156995304471908e-06, "loss": 0.7886, "step": 4796 }, { "epoch": 2.5234087322461862, "grad_norm": 2.149343252182007, "learning_rate": 3.1563250225457163e-06, "loss": 0.8699, "step": 4797 }, { "epoch": 2.523934771173067, "grad_norm": 2.2047276496887207, "learning_rate": 3.1556546899437107e-06, "loss": 0.8585, "step": 4798 }, { "epoch": 2.5244608100999475, "grad_norm": 2.362220287322998, "learning_rate": 3.1549843067176473e-06, "loss": 0.8566, "step": 4799 }, { "epoch": 2.524986849026828, "grad_norm": 2.0104141235351562, "learning_rate": 3.154313872919287e-06, "loss": 0.8259, "step": 4800 }, { "epoch": 2.5255128879537088, "grad_norm": 2.156411647796631, "learning_rate": 3.1536433886003948e-06, "loss": 0.834, "step": 4801 }, { "epoch": 2.526038926880589, "grad_norm": 2.1166343688964844, "learning_rate": 3.1529728538127415e-06, "loss": 0.8576, "step": 4802 }, { "epoch": 2.5265649658074696, "grad_norm": 2.2476656436920166, "learning_rate": 3.1523022686080986e-06, "loss": 0.8669, "step": 4803 }, { "epoch": 2.52709100473435, "grad_norm": 2.3251049518585205, "learning_rate": 3.1516316330382434e-06, "loss": 0.877, "step": 4804 }, { "epoch": 2.527617043661231, "grad_norm": 2.0992820262908936, "learning_rate": 3.150960947154956e-06, "loss": 0.8607, "step": 4805 }, { "epoch": 2.5281430825881115, "grad_norm": 2.1324074268341064, "learning_rate": 3.1502902110100224e-06, "loss": 0.8948, "step": 4806 }, { "epoch": 2.528669121514992, "grad_norm": 2.201124429702759, "learning_rate": 3.1496194246552304e-06, "loss": 0.8772, "step": 4807 }, { "epoch": 2.5291951604418728, "grad_norm": 2.143425226211548, "learning_rate": 3.148948588142372e-06, "loss": 0.8768, "step": 4808 }, { "epoch": 2.5297211993687534, "grad_norm": 2.1666855812072754, "learning_rate": 3.1482777015232435e-06, "loss": 0.8595, "step": 4809 }, { "epoch": 2.530247238295634, "grad_norm": 2.119865655899048, "learning_rate": 3.147606764849646e-06, "loss": 0.8917, "step": 4810 }, { "epoch": 2.530773277222514, "grad_norm": 2.2413406372070312, "learning_rate": 3.1469357781733833e-06, "loss": 0.88, "step": 4811 }, { "epoch": 2.5312993161493953, "grad_norm": 2.2525837421417236, "learning_rate": 3.1462647415462623e-06, "loss": 0.8886, "step": 4812 }, { "epoch": 2.5318253550762755, "grad_norm": 2.1929705142974854, "learning_rate": 3.145593655020095e-06, "loss": 0.8238, "step": 4813 }, { "epoch": 2.532351394003156, "grad_norm": 2.118004560470581, "learning_rate": 3.144922518646697e-06, "loss": 0.8782, "step": 4814 }, { "epoch": 2.5328774329300368, "grad_norm": 2.0773844718933105, "learning_rate": 3.1442513324778885e-06, "loss": 0.8377, "step": 4815 }, { "epoch": 2.5334034718569174, "grad_norm": 2.105982780456543, "learning_rate": 3.1435800965654924e-06, "loss": 0.8568, "step": 4816 }, { "epoch": 2.533929510783798, "grad_norm": 2.263314723968506, "learning_rate": 3.1429088109613354e-06, "loss": 0.8847, "step": 4817 }, { "epoch": 2.5344555497106787, "grad_norm": 2.1269514560699463, "learning_rate": 3.1422374757172487e-06, "loss": 0.9213, "step": 4818 }, { "epoch": 2.5349815886375593, "grad_norm": 2.1588759422302246, "learning_rate": 3.1415660908850666e-06, "loss": 0.9014, "step": 4819 }, { "epoch": 2.53550762756444, "grad_norm": 2.134244203567505, "learning_rate": 3.1408946565166286e-06, "loss": 0.824, "step": 4820 }, { "epoch": 2.5360336664913206, "grad_norm": 2.3167872428894043, "learning_rate": 3.1402231726637767e-06, "loss": 0.9167, "step": 4821 }, { "epoch": 2.5365597054182007, "grad_norm": 2.069037914276123, "learning_rate": 3.139551639378357e-06, "loss": 0.8522, "step": 4822 }, { "epoch": 2.5370857443450814, "grad_norm": 2.254324197769165, "learning_rate": 3.13888005671222e-06, "loss": 0.9052, "step": 4823 }, { "epoch": 2.537611783271962, "grad_norm": 2.030669689178467, "learning_rate": 3.1382084247172183e-06, "loss": 0.8653, "step": 4824 }, { "epoch": 2.5381378221988427, "grad_norm": 2.291720151901245, "learning_rate": 3.1375367434452115e-06, "loss": 0.9004, "step": 4825 }, { "epoch": 2.5386638611257233, "grad_norm": 2.1879196166992188, "learning_rate": 3.1368650129480595e-06, "loss": 0.8745, "step": 4826 }, { "epoch": 2.539189900052604, "grad_norm": 2.4145424365997314, "learning_rate": 3.136193233277629e-06, "loss": 0.9094, "step": 4827 }, { "epoch": 2.5397159389794846, "grad_norm": 2.069216251373291, "learning_rate": 3.135521404485788e-06, "loss": 0.8539, "step": 4828 }, { "epoch": 2.540241977906365, "grad_norm": 2.210820436477661, "learning_rate": 3.1348495266244093e-06, "loss": 0.8289, "step": 4829 }, { "epoch": 2.540768016833246, "grad_norm": 2.0330371856689453, "learning_rate": 3.1341775997453705e-06, "loss": 0.8295, "step": 4830 }, { "epoch": 2.541294055760126, "grad_norm": 2.083348035812378, "learning_rate": 3.1335056239005518e-06, "loss": 0.7929, "step": 4831 }, { "epoch": 2.541820094687007, "grad_norm": 2.0453341007232666, "learning_rate": 3.132833599141837e-06, "loss": 0.871, "step": 4832 }, { "epoch": 2.5423461336138873, "grad_norm": 2.013253688812256, "learning_rate": 3.1321615255211137e-06, "loss": 0.8788, "step": 4833 }, { "epoch": 2.542872172540768, "grad_norm": 2.1001274585723877, "learning_rate": 3.131489403090275e-06, "loss": 0.91, "step": 4834 }, { "epoch": 2.5433982114676486, "grad_norm": 2.184832811355591, "learning_rate": 3.130817231901215e-06, "loss": 0.8685, "step": 4835 }, { "epoch": 2.543924250394529, "grad_norm": 2.264946222305298, "learning_rate": 3.1301450120058345e-06, "loss": 0.8603, "step": 4836 }, { "epoch": 2.54445028932141, "grad_norm": 2.0698368549346924, "learning_rate": 3.1294727434560355e-06, "loss": 0.8966, "step": 4837 }, { "epoch": 2.5449763282482905, "grad_norm": 2.056734800338745, "learning_rate": 3.1288004263037258e-06, "loss": 0.8548, "step": 4838 }, { "epoch": 2.545502367175171, "grad_norm": 2.174271821975708, "learning_rate": 3.1281280606008148e-06, "loss": 0.818, "step": 4839 }, { "epoch": 2.5460284061020513, "grad_norm": 2.0376367568969727, "learning_rate": 3.1274556463992167e-06, "loss": 0.8608, "step": 4840 }, { "epoch": 2.5465544450289324, "grad_norm": 2.056023120880127, "learning_rate": 3.1267831837508515e-06, "loss": 0.8569, "step": 4841 }, { "epoch": 2.5470804839558125, "grad_norm": 2.2890868186950684, "learning_rate": 3.1261106727076403e-06, "loss": 0.8722, "step": 4842 }, { "epoch": 2.547606522882693, "grad_norm": 2.1005306243896484, "learning_rate": 3.125438113321507e-06, "loss": 0.8241, "step": 4843 }, { "epoch": 2.548132561809574, "grad_norm": 2.1388425827026367, "learning_rate": 3.1247655056443823e-06, "loss": 0.8737, "step": 4844 }, { "epoch": 2.5486586007364544, "grad_norm": 2.190706491470337, "learning_rate": 3.124092849728199e-06, "loss": 0.8868, "step": 4845 }, { "epoch": 2.549184639663335, "grad_norm": 2.1983895301818848, "learning_rate": 3.1234201456248936e-06, "loss": 0.8665, "step": 4846 }, { "epoch": 2.5497106785902157, "grad_norm": 2.2156622409820557, "learning_rate": 3.1227473933864076e-06, "loss": 0.9052, "step": 4847 }, { "epoch": 2.5502367175170964, "grad_norm": 2.2113072872161865, "learning_rate": 3.122074593064684e-06, "loss": 0.8677, "step": 4848 }, { "epoch": 2.550762756443977, "grad_norm": 2.270080327987671, "learning_rate": 3.1214017447116714e-06, "loss": 0.8784, "step": 4849 }, { "epoch": 2.5512887953708576, "grad_norm": 2.1563305854797363, "learning_rate": 3.120728848379321e-06, "loss": 0.8326, "step": 4850 }, { "epoch": 2.551814834297738, "grad_norm": 2.1893162727355957, "learning_rate": 3.1200559041195876e-06, "loss": 0.8693, "step": 4851 }, { "epoch": 2.552340873224619, "grad_norm": 2.109708786010742, "learning_rate": 3.1193829119844315e-06, "loss": 0.8936, "step": 4852 }, { "epoch": 2.552866912151499, "grad_norm": 2.1532673835754395, "learning_rate": 3.1187098720258147e-06, "loss": 0.8842, "step": 4853 }, { "epoch": 2.5533929510783797, "grad_norm": 2.102642059326172, "learning_rate": 3.118036784295703e-06, "loss": 0.8767, "step": 4854 }, { "epoch": 2.5539189900052603, "grad_norm": 2.308022975921631, "learning_rate": 3.117363648846068e-06, "loss": 0.8819, "step": 4855 }, { "epoch": 2.554445028932141, "grad_norm": 2.2444803714752197, "learning_rate": 3.116690465728882e-06, "loss": 0.9223, "step": 4856 }, { "epoch": 2.5549710678590216, "grad_norm": 2.23453950881958, "learning_rate": 3.1160172349961234e-06, "loss": 0.8729, "step": 4857 }, { "epoch": 2.5554971067859023, "grad_norm": 2.125089406967163, "learning_rate": 3.1153439566997723e-06, "loss": 0.8551, "step": 4858 }, { "epoch": 2.556023145712783, "grad_norm": 2.0221333503723145, "learning_rate": 3.114670630891815e-06, "loss": 0.8941, "step": 4859 }, { "epoch": 2.556549184639663, "grad_norm": 2.028895378112793, "learning_rate": 3.1139972576242394e-06, "loss": 0.8675, "step": 4860 }, { "epoch": 2.557075223566544, "grad_norm": 1.9944618940353394, "learning_rate": 3.1133238369490364e-06, "loss": 0.843, "step": 4861 }, { "epoch": 2.5576012624934243, "grad_norm": 2.068620204925537, "learning_rate": 3.112650368918203e-06, "loss": 0.8808, "step": 4862 }, { "epoch": 2.558127301420305, "grad_norm": 2.190218687057495, "learning_rate": 3.1119768535837393e-06, "loss": 0.8601, "step": 4863 }, { "epoch": 2.5586533403471856, "grad_norm": 2.0900609493255615, "learning_rate": 3.1113032909976465e-06, "loss": 0.8878, "step": 4864 }, { "epoch": 2.5591793792740662, "grad_norm": 2.245415687561035, "learning_rate": 3.110629681211933e-06, "loss": 0.8351, "step": 4865 }, { "epoch": 2.559705418200947, "grad_norm": 2.1592485904693604, "learning_rate": 3.109956024278608e-06, "loss": 0.8534, "step": 4866 }, { "epoch": 2.5602314571278275, "grad_norm": 2.160346508026123, "learning_rate": 3.109282320249687e-06, "loss": 0.9141, "step": 4867 }, { "epoch": 2.560757496054708, "grad_norm": 2.039933681488037, "learning_rate": 3.108608569177186e-06, "loss": 0.8276, "step": 4868 }, { "epoch": 2.561283534981589, "grad_norm": 2.2058701515197754, "learning_rate": 3.1079347711131276e-06, "loss": 0.8828, "step": 4869 }, { "epoch": 2.5618095739084694, "grad_norm": 2.2626984119415283, "learning_rate": 3.107260926109536e-06, "loss": 0.8557, "step": 4870 }, { "epoch": 2.5623356128353496, "grad_norm": 2.1341898441314697, "learning_rate": 3.1065870342184403e-06, "loss": 0.8516, "step": 4871 }, { "epoch": 2.5628616517622302, "grad_norm": 2.2280731201171875, "learning_rate": 3.105913095491872e-06, "loss": 0.8959, "step": 4872 }, { "epoch": 2.563387690689111, "grad_norm": 2.2224960327148438, "learning_rate": 3.1052391099818673e-06, "loss": 0.876, "step": 4873 }, { "epoch": 2.5639137296159915, "grad_norm": 2.254575490951538, "learning_rate": 3.104565077740466e-06, "loss": 0.8541, "step": 4874 }, { "epoch": 2.564439768542872, "grad_norm": 2.24760103225708, "learning_rate": 3.1038909988197104e-06, "loss": 0.8461, "step": 4875 }, { "epoch": 2.564965807469753, "grad_norm": 2.067974328994751, "learning_rate": 3.103216873271647e-06, "loss": 0.8716, "step": 4876 }, { "epoch": 2.5654918463966334, "grad_norm": 2.023578643798828, "learning_rate": 3.102542701148327e-06, "loss": 0.8771, "step": 4877 }, { "epoch": 2.566017885323514, "grad_norm": 2.176870346069336, "learning_rate": 3.101868482501804e-06, "loss": 0.842, "step": 4878 }, { "epoch": 2.5665439242503947, "grad_norm": 1.9572207927703857, "learning_rate": 3.1011942173841342e-06, "loss": 0.8772, "step": 4879 }, { "epoch": 2.567069963177275, "grad_norm": 2.0724399089813232, "learning_rate": 3.10051990584738e-06, "loss": 0.855, "step": 4880 }, { "epoch": 2.567596002104156, "grad_norm": 2.1123993396759033, "learning_rate": 3.099845547943605e-06, "loss": 0.8368, "step": 4881 }, { "epoch": 2.568122041031036, "grad_norm": 2.1326355934143066, "learning_rate": 3.0991711437248785e-06, "loss": 0.8928, "step": 4882 }, { "epoch": 2.5686480799579168, "grad_norm": 2.3330674171447754, "learning_rate": 3.0984966932432715e-06, "loss": 0.9231, "step": 4883 }, { "epoch": 2.5691741188847974, "grad_norm": 2.0847697257995605, "learning_rate": 3.097822196550859e-06, "loss": 0.8774, "step": 4884 }, { "epoch": 2.569700157811678, "grad_norm": 2.150047779083252, "learning_rate": 3.09714765369972e-06, "loss": 0.8979, "step": 4885 }, { "epoch": 2.5702261967385587, "grad_norm": 1.9964009523391724, "learning_rate": 3.0964730647419365e-06, "loss": 0.8508, "step": 4886 }, { "epoch": 2.5707522356654393, "grad_norm": 2.1069588661193848, "learning_rate": 3.0957984297295963e-06, "loss": 0.853, "step": 4887 }, { "epoch": 2.57127827459232, "grad_norm": 2.0673940181732178, "learning_rate": 3.095123748714788e-06, "loss": 0.888, "step": 4888 }, { "epoch": 2.5718043135192006, "grad_norm": 2.2297050952911377, "learning_rate": 3.0944490217496032e-06, "loss": 0.8787, "step": 4889 }, { "epoch": 2.572330352446081, "grad_norm": 2.126462936401367, "learning_rate": 3.093774248886141e-06, "loss": 0.8113, "step": 4890 }, { "epoch": 2.5728563913729614, "grad_norm": 1.987273931503296, "learning_rate": 3.0930994301765e-06, "loss": 0.8132, "step": 4891 }, { "epoch": 2.573382430299842, "grad_norm": 2.0410077571868896, "learning_rate": 3.0924245656727846e-06, "loss": 0.8707, "step": 4892 }, { "epoch": 2.5739084692267227, "grad_norm": 2.041771650314331, "learning_rate": 3.091749655427102e-06, "loss": 0.8087, "step": 4893 }, { "epoch": 2.5744345081536033, "grad_norm": 2.051525354385376, "learning_rate": 3.0910746994915626e-06, "loss": 0.8152, "step": 4894 }, { "epoch": 2.574960547080484, "grad_norm": 2.1224405765533447, "learning_rate": 3.0903996979182817e-06, "loss": 0.7829, "step": 4895 }, { "epoch": 2.5754865860073646, "grad_norm": 2.114220142364502, "learning_rate": 3.0897246507593757e-06, "loss": 0.8482, "step": 4896 }, { "epoch": 2.576012624934245, "grad_norm": 2.126922130584717, "learning_rate": 3.0890495580669672e-06, "loss": 0.8978, "step": 4897 }, { "epoch": 2.576538663861126, "grad_norm": 2.131983518600464, "learning_rate": 3.0883744198931797e-06, "loss": 0.8452, "step": 4898 }, { "epoch": 2.5770647027880065, "grad_norm": 2.424879312515259, "learning_rate": 3.0876992362901442e-06, "loss": 0.8934, "step": 4899 }, { "epoch": 2.5775907417148867, "grad_norm": 2.2261946201324463, "learning_rate": 3.08702400730999e-06, "loss": 0.8709, "step": 4900 }, { "epoch": 2.5781167806417677, "grad_norm": 2.207287073135376, "learning_rate": 3.086348733004853e-06, "loss": 0.8572, "step": 4901 }, { "epoch": 2.578642819568648, "grad_norm": 2.1266067028045654, "learning_rate": 3.0856734134268745e-06, "loss": 0.8815, "step": 4902 }, { "epoch": 2.5791688584955286, "grad_norm": 2.1052074432373047, "learning_rate": 3.0849980486281933e-06, "loss": 0.8618, "step": 4903 }, { "epoch": 2.579694897422409, "grad_norm": 2.000812530517578, "learning_rate": 3.0843226386609576e-06, "loss": 0.7923, "step": 4904 }, { "epoch": 2.58022093634929, "grad_norm": 2.2113101482391357, "learning_rate": 3.083647183577316e-06, "loss": 0.918, "step": 4905 }, { "epoch": 2.5807469752761705, "grad_norm": 2.150761127471924, "learning_rate": 3.0829716834294222e-06, "loss": 0.8719, "step": 4906 }, { "epoch": 2.581273014203051, "grad_norm": 2.165925979614258, "learning_rate": 3.082296138269431e-06, "loss": 0.8729, "step": 4907 }, { "epoch": 2.5817990531299317, "grad_norm": 2.2171056270599365, "learning_rate": 3.081620548149504e-06, "loss": 0.8743, "step": 4908 }, { "epoch": 2.582325092056812, "grad_norm": 2.165393829345703, "learning_rate": 3.080944913121804e-06, "loss": 0.8973, "step": 4909 }, { "epoch": 2.582851130983693, "grad_norm": 2.1381678581237793, "learning_rate": 3.0802692332384966e-06, "loss": 0.8553, "step": 4910 }, { "epoch": 2.583377169910573, "grad_norm": 2.117344856262207, "learning_rate": 3.079593508551753e-06, "loss": 0.8987, "step": 4911 }, { "epoch": 2.583903208837454, "grad_norm": 2.106372117996216, "learning_rate": 3.0789177391137463e-06, "loss": 0.8821, "step": 4912 }, { "epoch": 2.5844292477643345, "grad_norm": 2.070113182067871, "learning_rate": 3.0782419249766553e-06, "loss": 0.8735, "step": 4913 }, { "epoch": 2.584955286691215, "grad_norm": 2.217109441757202, "learning_rate": 3.0775660661926593e-06, "loss": 0.8913, "step": 4914 }, { "epoch": 2.5854813256180957, "grad_norm": 2.1684751510620117, "learning_rate": 3.0768901628139417e-06, "loss": 0.8555, "step": 4915 }, { "epoch": 2.5860073645449764, "grad_norm": 2.249298572540283, "learning_rate": 3.076214214892691e-06, "loss": 0.8812, "step": 4916 }, { "epoch": 2.586533403471857, "grad_norm": 2.383953094482422, "learning_rate": 3.0755382224810986e-06, "loss": 0.8324, "step": 4917 }, { "epoch": 2.5870594423987376, "grad_norm": 2.2448055744171143, "learning_rate": 3.074862185631357e-06, "loss": 0.9407, "step": 4918 }, { "epoch": 2.5875854813256183, "grad_norm": 2.349682331085205, "learning_rate": 3.0741861043956657e-06, "loss": 0.9031, "step": 4919 }, { "epoch": 2.5881115202524985, "grad_norm": 2.2829387187957764, "learning_rate": 3.073509978826226e-06, "loss": 0.8426, "step": 4920 }, { "epoch": 2.5886375591793795, "grad_norm": 2.0444629192352295, "learning_rate": 3.0728338089752414e-06, "loss": 0.8301, "step": 4921 }, { "epoch": 2.5891635981062597, "grad_norm": 2.2543492317199707, "learning_rate": 3.07215759489492e-06, "loss": 0.8664, "step": 4922 }, { "epoch": 2.5896896370331404, "grad_norm": 2.256535291671753, "learning_rate": 3.0714813366374742e-06, "loss": 0.8334, "step": 4923 }, { "epoch": 2.590215675960021, "grad_norm": 2.121526002883911, "learning_rate": 3.0708050342551184e-06, "loss": 0.8839, "step": 4924 }, { "epoch": 2.5907417148869016, "grad_norm": 2.1865363121032715, "learning_rate": 3.070128687800071e-06, "loss": 0.8858, "step": 4925 }, { "epoch": 2.5912677538137823, "grad_norm": 2.0567994117736816, "learning_rate": 3.0694522973245535e-06, "loss": 0.8567, "step": 4926 }, { "epoch": 2.591793792740663, "grad_norm": 2.019508123397827, "learning_rate": 3.068775862880792e-06, "loss": 0.8199, "step": 4927 }, { "epoch": 2.5923198316675435, "grad_norm": 2.0306825637817383, "learning_rate": 3.0680993845210127e-06, "loss": 0.8389, "step": 4928 }, { "epoch": 2.5928458705944237, "grad_norm": 2.137507677078247, "learning_rate": 3.0674228622974494e-06, "loss": 0.8639, "step": 4929 }, { "epoch": 2.593371909521305, "grad_norm": 2.222081184387207, "learning_rate": 3.0667462962623367e-06, "loss": 0.8421, "step": 4930 }, { "epoch": 2.593897948448185, "grad_norm": 2.152656316757202, "learning_rate": 3.0660696864679142e-06, "loss": 0.9063, "step": 4931 }, { "epoch": 2.5944239873750656, "grad_norm": 2.4453184604644775, "learning_rate": 3.0653930329664227e-06, "loss": 0.833, "step": 4932 }, { "epoch": 2.5949500263019463, "grad_norm": 2.473618268966675, "learning_rate": 3.064716335810108e-06, "loss": 0.9288, "step": 4933 }, { "epoch": 2.595476065228827, "grad_norm": 2.2377943992614746, "learning_rate": 3.064039595051219e-06, "loss": 0.8468, "step": 4934 }, { "epoch": 2.5960021041557075, "grad_norm": 2.267429828643799, "learning_rate": 3.063362810742008e-06, "loss": 0.9255, "step": 4935 }, { "epoch": 2.596528143082588, "grad_norm": 2.1109211444854736, "learning_rate": 3.0626859829347298e-06, "loss": 0.8759, "step": 4936 }, { "epoch": 2.597054182009469, "grad_norm": 2.067333221435547, "learning_rate": 3.0620091116816436e-06, "loss": 0.8572, "step": 4937 }, { "epoch": 2.5975802209363494, "grad_norm": 2.199721336364746, "learning_rate": 3.061332197035013e-06, "loss": 0.8557, "step": 4938 }, { "epoch": 2.59810625986323, "grad_norm": 2.086308717727661, "learning_rate": 3.0606552390471015e-06, "loss": 0.8962, "step": 4939 }, { "epoch": 2.5986322987901103, "grad_norm": 2.0455543994903564, "learning_rate": 3.059978237770179e-06, "loss": 0.891, "step": 4940 }, { "epoch": 2.599158337716991, "grad_norm": 2.0469722747802734, "learning_rate": 3.0593011932565176e-06, "loss": 0.8497, "step": 4941 }, { "epoch": 2.5996843766438715, "grad_norm": 2.1133131980895996, "learning_rate": 3.0586241055583933e-06, "loss": 0.8314, "step": 4942 }, { "epoch": 2.600210415570752, "grad_norm": 2.146045207977295, "learning_rate": 3.0579469747280836e-06, "loss": 0.8942, "step": 4943 }, { "epoch": 2.600736454497633, "grad_norm": 2.103142499923706, "learning_rate": 3.057269800817873e-06, "loss": 0.8385, "step": 4944 }, { "epoch": 2.6012624934245134, "grad_norm": 1.9773310422897339, "learning_rate": 3.056592583880046e-06, "loss": 0.855, "step": 4945 }, { "epoch": 2.601788532351394, "grad_norm": 2.1176865100860596, "learning_rate": 3.0559153239668903e-06, "loss": 0.8746, "step": 4946 }, { "epoch": 2.6023145712782747, "grad_norm": 2.137795925140381, "learning_rate": 3.0552380211307e-06, "loss": 0.8661, "step": 4947 }, { "epoch": 2.6028406102051553, "grad_norm": 2.096848487854004, "learning_rate": 3.0545606754237694e-06, "loss": 0.7896, "step": 4948 }, { "epoch": 2.6033666491320355, "grad_norm": 2.1517276763916016, "learning_rate": 3.053883286898398e-06, "loss": 0.8771, "step": 4949 }, { "epoch": 2.6038926880589166, "grad_norm": 2.172236680984497, "learning_rate": 3.053205855606888e-06, "loss": 0.9153, "step": 4950 }, { "epoch": 2.604418726985797, "grad_norm": 2.2087953090667725, "learning_rate": 3.052528381601545e-06, "loss": 0.908, "step": 4951 }, { "epoch": 2.6049447659126774, "grad_norm": 2.086710214614868, "learning_rate": 3.051850864934677e-06, "loss": 0.7894, "step": 4952 }, { "epoch": 2.605470804839558, "grad_norm": 2.422354221343994, "learning_rate": 3.0511733056585966e-06, "loss": 0.8548, "step": 4953 }, { "epoch": 2.6059968437664387, "grad_norm": 2.1599812507629395, "learning_rate": 3.0504957038256187e-06, "loss": 0.8674, "step": 4954 }, { "epoch": 2.6065228826933193, "grad_norm": 2.205836772918701, "learning_rate": 3.049818059488062e-06, "loss": 0.9239, "step": 4955 }, { "epoch": 2.6070489216202, "grad_norm": 2.124905824661255, "learning_rate": 3.04914037269825e-06, "loss": 0.8637, "step": 4956 }, { "epoch": 2.6075749605470806, "grad_norm": 1.9691417217254639, "learning_rate": 3.048462643508506e-06, "loss": 0.8604, "step": 4957 }, { "epoch": 2.6081009994739612, "grad_norm": 2.2705249786376953, "learning_rate": 3.0477848719711586e-06, "loss": 0.9295, "step": 4958 }, { "epoch": 2.608627038400842, "grad_norm": 2.067674398422241, "learning_rate": 3.0471070581385403e-06, "loss": 0.8598, "step": 4959 }, { "epoch": 2.609153077327722, "grad_norm": 2.2735607624053955, "learning_rate": 3.046429202062985e-06, "loss": 0.8811, "step": 4960 }, { "epoch": 2.6096791162546027, "grad_norm": 2.1234474182128906, "learning_rate": 3.0457513037968325e-06, "loss": 0.8628, "step": 4961 }, { "epoch": 2.6102051551814833, "grad_norm": 2.025573492050171, "learning_rate": 3.045073363392423e-06, "loss": 0.8653, "step": 4962 }, { "epoch": 2.610731194108364, "grad_norm": 2.227337598800659, "learning_rate": 3.044395380902102e-06, "loss": 0.8541, "step": 4963 }, { "epoch": 2.6112572330352446, "grad_norm": 2.0316569805145264, "learning_rate": 3.0437173563782176e-06, "loss": 0.8637, "step": 4964 }, { "epoch": 2.6117832719621252, "grad_norm": 2.054654836654663, "learning_rate": 3.04303928987312e-06, "loss": 0.9102, "step": 4965 }, { "epoch": 2.612309310889006, "grad_norm": 2.0690088272094727, "learning_rate": 3.0423611814391645e-06, "loss": 0.8607, "step": 4966 }, { "epoch": 2.6128353498158865, "grad_norm": 2.213421583175659, "learning_rate": 3.04168303112871e-06, "loss": 0.8319, "step": 4967 }, { "epoch": 2.613361388742767, "grad_norm": 2.0875186920166016, "learning_rate": 3.041004838994115e-06, "loss": 0.8651, "step": 4968 }, { "epoch": 2.6138874276696473, "grad_norm": 2.1682024002075195, "learning_rate": 3.040326605087746e-06, "loss": 0.867, "step": 4969 }, { "epoch": 2.6144134665965284, "grad_norm": 2.2044677734375, "learning_rate": 3.0396483294619696e-06, "loss": 0.8725, "step": 4970 }, { "epoch": 2.6149395055234086, "grad_norm": 2.01615309715271, "learning_rate": 3.038970012169155e-06, "loss": 0.8765, "step": 4971 }, { "epoch": 2.6154655444502892, "grad_norm": 2.0782089233398438, "learning_rate": 3.038291653261678e-06, "loss": 0.9042, "step": 4972 }, { "epoch": 2.61599158337717, "grad_norm": 2.1850247383117676, "learning_rate": 3.037613252791915e-06, "loss": 0.8413, "step": 4973 }, { "epoch": 2.6165176223040505, "grad_norm": 2.149055004119873, "learning_rate": 3.0369348108122465e-06, "loss": 0.8247, "step": 4974 }, { "epoch": 2.617043661230931, "grad_norm": 2.099086284637451, "learning_rate": 3.036256327375056e-06, "loss": 0.8638, "step": 4975 }, { "epoch": 2.6175697001578118, "grad_norm": 2.175006151199341, "learning_rate": 3.03557780253273e-06, "loss": 0.8975, "step": 4976 }, { "epoch": 2.6180957390846924, "grad_norm": 2.040523052215576, "learning_rate": 3.0348992363376584e-06, "loss": 0.8358, "step": 4977 }, { "epoch": 2.6186217780115726, "grad_norm": 2.241870403289795, "learning_rate": 3.0342206288422336e-06, "loss": 0.8642, "step": 4978 }, { "epoch": 2.6191478169384537, "grad_norm": 2.235835552215576, "learning_rate": 3.033541980098853e-06, "loss": 0.8748, "step": 4979 }, { "epoch": 2.619673855865334, "grad_norm": 2.19262433052063, "learning_rate": 3.032863290159916e-06, "loss": 0.8209, "step": 4980 }, { "epoch": 2.6201998947922145, "grad_norm": 2.200974941253662, "learning_rate": 3.032184559077825e-06, "loss": 0.9218, "step": 4981 }, { "epoch": 2.620725933719095, "grad_norm": 2.0750837326049805, "learning_rate": 3.031505786904986e-06, "loss": 0.8927, "step": 4982 }, { "epoch": 2.6212519726459758, "grad_norm": 2.092756748199463, "learning_rate": 3.030826973693806e-06, "loss": 0.8485, "step": 4983 }, { "epoch": 2.6217780115728564, "grad_norm": 2.0999159812927246, "learning_rate": 3.0301481194967007e-06, "loss": 0.8454, "step": 4984 }, { "epoch": 2.622304050499737, "grad_norm": 2.009605646133423, "learning_rate": 3.0294692243660827e-06, "loss": 0.8511, "step": 4985 }, { "epoch": 2.6228300894266177, "grad_norm": 2.1283154487609863, "learning_rate": 3.028790288354371e-06, "loss": 0.8979, "step": 4986 }, { "epoch": 2.6233561283534983, "grad_norm": 2.12890362739563, "learning_rate": 3.028111311513989e-06, "loss": 0.9012, "step": 4987 }, { "epoch": 2.623882167280379, "grad_norm": 2.3051679134368896, "learning_rate": 3.0274322938973596e-06, "loss": 0.8381, "step": 4988 }, { "epoch": 2.624408206207259, "grad_norm": 1.980338454246521, "learning_rate": 3.026753235556911e-06, "loss": 0.8481, "step": 4989 }, { "epoch": 2.62493424513414, "grad_norm": 2.2021164894104004, "learning_rate": 3.026074136545074e-06, "loss": 0.8702, "step": 4990 }, { "epoch": 2.6254602840610204, "grad_norm": 2.204867362976074, "learning_rate": 3.025394996914284e-06, "loss": 0.8774, "step": 4991 }, { "epoch": 2.625986322987901, "grad_norm": 2.0833232402801514, "learning_rate": 3.0247158167169776e-06, "loss": 0.9159, "step": 4992 }, { "epoch": 2.6265123619147817, "grad_norm": 2.1655163764953613, "learning_rate": 3.0240365960055963e-06, "loss": 0.8918, "step": 4993 }, { "epoch": 2.6270384008416623, "grad_norm": 2.0999555587768555, "learning_rate": 3.023357334832582e-06, "loss": 0.8518, "step": 4994 }, { "epoch": 2.627564439768543, "grad_norm": 2.162543296813965, "learning_rate": 3.0226780332503826e-06, "loss": 0.8551, "step": 4995 }, { "epoch": 2.6280904786954236, "grad_norm": 2.093903064727783, "learning_rate": 3.021998691311447e-06, "loss": 0.8356, "step": 4996 }, { "epoch": 2.628616517622304, "grad_norm": 2.0125296115875244, "learning_rate": 3.021319309068229e-06, "loss": 0.8462, "step": 4997 }, { "epoch": 2.6291425565491844, "grad_norm": 2.1161234378814697, "learning_rate": 3.0206398865731846e-06, "loss": 0.8552, "step": 4998 }, { "epoch": 2.6296685954760655, "grad_norm": 2.1994869709014893, "learning_rate": 3.019960423878774e-06, "loss": 0.8906, "step": 4999 }, { "epoch": 2.6301946344029457, "grad_norm": 2.1337168216705322, "learning_rate": 3.019280921037458e-06, "loss": 0.8896, "step": 5000 }, { "epoch": 2.6307206733298263, "grad_norm": 2.123728036880493, "learning_rate": 3.018601378101702e-06, "loss": 0.8735, "step": 5001 }, { "epoch": 2.631246712256707, "grad_norm": 2.013857841491699, "learning_rate": 3.0179217951239755e-06, "loss": 0.8603, "step": 5002 }, { "epoch": 2.6317727511835876, "grad_norm": 2.1562891006469727, "learning_rate": 3.017242172156749e-06, "loss": 0.8409, "step": 5003 }, { "epoch": 2.632298790110468, "grad_norm": 2.2303454875946045, "learning_rate": 3.016562509252498e-06, "loss": 0.9054, "step": 5004 }, { "epoch": 2.632824829037349, "grad_norm": 2.065103530883789, "learning_rate": 3.0158828064637004e-06, "loss": 0.8445, "step": 5005 }, { "epoch": 2.6333508679642295, "grad_norm": 2.236968994140625, "learning_rate": 3.0152030638428365e-06, "loss": 0.9127, "step": 5006 }, { "epoch": 2.63387690689111, "grad_norm": 2.14941668510437, "learning_rate": 3.0145232814423902e-06, "loss": 0.9087, "step": 5007 }, { "epoch": 2.6344029458179907, "grad_norm": 2.2251391410827637, "learning_rate": 3.0138434593148485e-06, "loss": 0.8939, "step": 5008 }, { "epoch": 2.634928984744871, "grad_norm": 2.0615382194519043, "learning_rate": 3.013163597512703e-06, "loss": 0.8952, "step": 5009 }, { "epoch": 2.6354550236717516, "grad_norm": 2.113995313644409, "learning_rate": 3.012483696088444e-06, "loss": 0.8071, "step": 5010 }, { "epoch": 2.635981062598632, "grad_norm": 2.1052839756011963, "learning_rate": 3.0118037550945694e-06, "loss": 0.9187, "step": 5011 }, { "epoch": 2.636507101525513, "grad_norm": 2.038095474243164, "learning_rate": 3.0111237745835785e-06, "loss": 0.9181, "step": 5012 }, { "epoch": 2.6370331404523935, "grad_norm": 2.239121675491333, "learning_rate": 3.0104437546079733e-06, "loss": 0.8797, "step": 5013 }, { "epoch": 2.637559179379274, "grad_norm": 2.106593132019043, "learning_rate": 3.009763695220259e-06, "loss": 0.879, "step": 5014 }, { "epoch": 2.6380852183061547, "grad_norm": 2.234524726867676, "learning_rate": 3.009083596472944e-06, "loss": 0.8309, "step": 5015 }, { "epoch": 2.6386112572330354, "grad_norm": 2.1223249435424805, "learning_rate": 3.00840345841854e-06, "loss": 0.8325, "step": 5016 }, { "epoch": 2.639137296159916, "grad_norm": 1.99759042263031, "learning_rate": 3.007723281109562e-06, "loss": 0.8491, "step": 5017 }, { "epoch": 2.639663335086796, "grad_norm": 2.088017225265503, "learning_rate": 3.007043064598526e-06, "loss": 0.8601, "step": 5018 }, { "epoch": 2.6401893740136773, "grad_norm": 2.164340019226074, "learning_rate": 3.0063628089379534e-06, "loss": 0.8497, "step": 5019 }, { "epoch": 2.6407154129405574, "grad_norm": 2.1976499557495117, "learning_rate": 3.0056825141803682e-06, "loss": 0.8871, "step": 5020 }, { "epoch": 2.641241451867438, "grad_norm": 1.9878344535827637, "learning_rate": 3.0050021803782954e-06, "loss": 0.8426, "step": 5021 }, { "epoch": 2.6417674907943187, "grad_norm": 2.0330920219421387, "learning_rate": 3.0043218075842657e-06, "loss": 0.8874, "step": 5022 }, { "epoch": 2.6422935297211994, "grad_norm": 2.1295769214630127, "learning_rate": 3.0036413958508122e-06, "loss": 0.9284, "step": 5023 }, { "epoch": 2.64281956864808, "grad_norm": 2.1539742946624756, "learning_rate": 3.002960945230469e-06, "loss": 0.8902, "step": 5024 }, { "epoch": 2.6433456075749606, "grad_norm": 2.260293483734131, "learning_rate": 3.0022804557757763e-06, "loss": 0.8751, "step": 5025 }, { "epoch": 2.6438716465018413, "grad_norm": 2.1515257358551025, "learning_rate": 3.0015999275392737e-06, "loss": 0.7899, "step": 5026 }, { "epoch": 2.644397685428722, "grad_norm": 1.9256139993667603, "learning_rate": 3.000919360573508e-06, "loss": 0.8711, "step": 5027 }, { "epoch": 2.6449237243556025, "grad_norm": 1.9904085397720337, "learning_rate": 3.0002387549310254e-06, "loss": 0.8863, "step": 5028 }, { "epoch": 2.6454497632824827, "grad_norm": 2.190939426422119, "learning_rate": 2.9995581106643766e-06, "loss": 0.8893, "step": 5029 }, { "epoch": 2.6459758022093633, "grad_norm": 2.1012701988220215, "learning_rate": 2.9988774278261156e-06, "loss": 0.8831, "step": 5030 }, { "epoch": 2.646501841136244, "grad_norm": 2.2108967304229736, "learning_rate": 2.998196706468799e-06, "loss": 0.8755, "step": 5031 }, { "epoch": 2.6470278800631246, "grad_norm": 2.2387490272521973, "learning_rate": 2.9975159466449857e-06, "loss": 0.877, "step": 5032 }, { "epoch": 2.6475539189900053, "grad_norm": 2.0577619075775146, "learning_rate": 2.9968351484072376e-06, "loss": 0.865, "step": 5033 }, { "epoch": 2.648079957916886, "grad_norm": 2.1304306983947754, "learning_rate": 2.996154311808122e-06, "loss": 0.8773, "step": 5034 }, { "epoch": 2.6486059968437665, "grad_norm": 2.3746819496154785, "learning_rate": 2.995473436900205e-06, "loss": 0.8648, "step": 5035 }, { "epoch": 2.649132035770647, "grad_norm": 2.2624902725219727, "learning_rate": 2.994792523736061e-06, "loss": 0.8876, "step": 5036 }, { "epoch": 2.649658074697528, "grad_norm": 2.216461181640625, "learning_rate": 2.9941115723682606e-06, "loss": 0.8283, "step": 5037 }, { "epoch": 2.650184113624408, "grad_norm": 2.1763532161712646, "learning_rate": 2.993430582849384e-06, "loss": 0.8755, "step": 5038 }, { "epoch": 2.650710152551289, "grad_norm": 2.0555810928344727, "learning_rate": 2.99274955523201e-06, "loss": 0.86, "step": 5039 }, { "epoch": 2.6512361914781692, "grad_norm": 2.187427520751953, "learning_rate": 2.9920684895687217e-06, "loss": 0.8897, "step": 5040 }, { "epoch": 2.65176223040505, "grad_norm": 2.1367087364196777, "learning_rate": 2.9913873859121056e-06, "loss": 0.871, "step": 5041 }, { "epoch": 2.6522882693319305, "grad_norm": 2.3681488037109375, "learning_rate": 2.9907062443147517e-06, "loss": 0.8886, "step": 5042 }, { "epoch": 2.652814308258811, "grad_norm": 2.2079362869262695, "learning_rate": 2.99002506482925e-06, "loss": 0.8844, "step": 5043 }, { "epoch": 2.653340347185692, "grad_norm": 2.1367673873901367, "learning_rate": 2.9893438475081963e-06, "loss": 0.9208, "step": 5044 }, { "epoch": 2.6538663861125724, "grad_norm": 2.1451048851013184, "learning_rate": 2.988662592404189e-06, "loss": 0.8598, "step": 5045 }, { "epoch": 2.654392425039453, "grad_norm": 2.167510509490967, "learning_rate": 2.987981299569827e-06, "loss": 0.8644, "step": 5046 }, { "epoch": 2.6549184639663332, "grad_norm": 2.0434138774871826, "learning_rate": 2.9872999690577155e-06, "loss": 0.8285, "step": 5047 }, { "epoch": 2.6554445028932143, "grad_norm": 2.118492603302002, "learning_rate": 2.9866186009204606e-06, "loss": 0.849, "step": 5048 }, { "epoch": 2.6559705418200945, "grad_norm": 2.0487430095672607, "learning_rate": 2.985937195210672e-06, "loss": 0.8673, "step": 5049 }, { "epoch": 2.656496580746975, "grad_norm": 2.213047504425049, "learning_rate": 2.9852557519809613e-06, "loss": 0.8723, "step": 5050 }, { "epoch": 2.657022619673856, "grad_norm": 2.2848167419433594, "learning_rate": 2.9845742712839442e-06, "loss": 0.9177, "step": 5051 }, { "epoch": 2.6575486586007364, "grad_norm": 2.0007801055908203, "learning_rate": 2.9838927531722393e-06, "loss": 0.8762, "step": 5052 }, { "epoch": 2.658074697527617, "grad_norm": 2.223155975341797, "learning_rate": 2.9832111976984663e-06, "loss": 0.8432, "step": 5053 }, { "epoch": 2.6586007364544977, "grad_norm": 2.049638271331787, "learning_rate": 2.982529604915251e-06, "loss": 0.8473, "step": 5054 }, { "epoch": 2.6591267753813783, "grad_norm": 2.15936017036438, "learning_rate": 2.9818479748752177e-06, "loss": 0.8806, "step": 5055 }, { "epoch": 2.659652814308259, "grad_norm": 2.1343305110931396, "learning_rate": 2.9811663076309983e-06, "loss": 0.8826, "step": 5056 }, { "epoch": 2.6601788532351396, "grad_norm": 2.0846128463745117, "learning_rate": 2.9804846032352243e-06, "loss": 0.8358, "step": 5057 }, { "epoch": 2.6607048921620198, "grad_norm": 2.020618438720703, "learning_rate": 2.9798028617405308e-06, "loss": 0.8137, "step": 5058 }, { "epoch": 2.661230931088901, "grad_norm": 2.058281183242798, "learning_rate": 2.979121083199557e-06, "loss": 0.8593, "step": 5059 }, { "epoch": 2.661756970015781, "grad_norm": 2.339982271194458, "learning_rate": 2.9784392676649433e-06, "loss": 0.9079, "step": 5060 }, { "epoch": 2.6622830089426617, "grad_norm": 2.146796941757202, "learning_rate": 2.977757415189334e-06, "loss": 0.8838, "step": 5061 }, { "epoch": 2.6628090478695423, "grad_norm": 2.1282131671905518, "learning_rate": 2.977075525825376e-06, "loss": 0.8451, "step": 5062 }, { "epoch": 2.663335086796423, "grad_norm": 2.291994094848633, "learning_rate": 2.9763935996257187e-06, "loss": 0.812, "step": 5063 }, { "epoch": 2.6638611257233036, "grad_norm": 2.088050127029419, "learning_rate": 2.975711636643014e-06, "loss": 0.8368, "step": 5064 }, { "epoch": 2.664387164650184, "grad_norm": 2.123783588409424, "learning_rate": 2.9750296369299188e-06, "loss": 0.8777, "step": 5065 }, { "epoch": 2.664913203577065, "grad_norm": 2.1591906547546387, "learning_rate": 2.9743476005390902e-06, "loss": 0.8553, "step": 5066 }, { "epoch": 2.665439242503945, "grad_norm": 2.2808377742767334, "learning_rate": 2.97366552752319e-06, "loss": 0.8364, "step": 5067 }, { "epoch": 2.665965281430826, "grad_norm": 2.3841168880462646, "learning_rate": 2.9729834179348805e-06, "loss": 0.9198, "step": 5068 }, { "epoch": 2.6664913203577063, "grad_norm": 2.0192089080810547, "learning_rate": 2.9723012718268295e-06, "loss": 0.8599, "step": 5069 }, { "epoch": 2.667017359284587, "grad_norm": 2.3098342418670654, "learning_rate": 2.971619089251708e-06, "loss": 0.9056, "step": 5070 }, { "epoch": 2.6675433982114676, "grad_norm": 2.067403554916382, "learning_rate": 2.970936870262185e-06, "loss": 0.9142, "step": 5071 }, { "epoch": 2.668069437138348, "grad_norm": 2.0295233726501465, "learning_rate": 2.970254614910938e-06, "loss": 0.8117, "step": 5072 }, { "epoch": 2.668595476065229, "grad_norm": 2.143162727355957, "learning_rate": 2.9695723232506442e-06, "loss": 0.8544, "step": 5073 }, { "epoch": 2.6691215149921095, "grad_norm": 2.087739944458008, "learning_rate": 2.968889995333985e-06, "loss": 0.8148, "step": 5074 }, { "epoch": 2.66964755391899, "grad_norm": 2.111665964126587, "learning_rate": 2.9682076312136425e-06, "loss": 0.865, "step": 5075 }, { "epoch": 2.6701735928458707, "grad_norm": 2.046574354171753, "learning_rate": 2.967525230942304e-06, "loss": 0.823, "step": 5076 }, { "epoch": 2.6706996317727514, "grad_norm": 2.0451266765594482, "learning_rate": 2.966842794572659e-06, "loss": 0.8631, "step": 5077 }, { "epoch": 2.6712256706996316, "grad_norm": 2.222809314727783, "learning_rate": 2.9661603221573986e-06, "loss": 0.9108, "step": 5078 }, { "epoch": 2.671751709626512, "grad_norm": 2.195380926132202, "learning_rate": 2.9654778137492185e-06, "loss": 0.9067, "step": 5079 }, { "epoch": 2.672277748553393, "grad_norm": 2.113037347793579, "learning_rate": 2.964795269400815e-06, "loss": 0.8431, "step": 5080 }, { "epoch": 2.6728037874802735, "grad_norm": 2.139397144317627, "learning_rate": 2.9641126891648884e-06, "loss": 0.8272, "step": 5081 }, { "epoch": 2.673329826407154, "grad_norm": 2.0172653198242188, "learning_rate": 2.963430073094143e-06, "loss": 0.8412, "step": 5082 }, { "epoch": 2.6738558653340347, "grad_norm": 2.067352771759033, "learning_rate": 2.9627474212412833e-06, "loss": 0.8785, "step": 5083 }, { "epoch": 2.6743819042609154, "grad_norm": 2.215958595275879, "learning_rate": 2.9620647336590186e-06, "loss": 0.8918, "step": 5084 }, { "epoch": 2.674907943187796, "grad_norm": 2.2911486625671387, "learning_rate": 2.9613820104000603e-06, "loss": 0.9126, "step": 5085 }, { "epoch": 2.6754339821146766, "grad_norm": 2.048518657684326, "learning_rate": 2.9606992515171206e-06, "loss": 0.8785, "step": 5086 }, { "epoch": 2.675960021041557, "grad_norm": 2.205204963684082, "learning_rate": 2.960016457062919e-06, "loss": 0.9363, "step": 5087 }, { "epoch": 2.676486059968438, "grad_norm": 2.0942041873931885, "learning_rate": 2.9593336270901733e-06, "loss": 0.855, "step": 5088 }, { "epoch": 2.677012098895318, "grad_norm": 2.1659207344055176, "learning_rate": 2.9586507616516065e-06, "loss": 0.8488, "step": 5089 }, { "epoch": 2.6775381378221987, "grad_norm": 2.340712070465088, "learning_rate": 2.9579678607999428e-06, "loss": 0.8111, "step": 5090 }, { "epoch": 2.6780641767490794, "grad_norm": 2.1929757595062256, "learning_rate": 2.957284924587912e-06, "loss": 0.8851, "step": 5091 }, { "epoch": 2.67859021567596, "grad_norm": 2.186767339706421, "learning_rate": 2.9566019530682433e-06, "loss": 0.8388, "step": 5092 }, { "epoch": 2.6791162546028406, "grad_norm": 2.0876657962799072, "learning_rate": 2.9559189462936687e-06, "loss": 0.8477, "step": 5093 }, { "epoch": 2.6796422935297213, "grad_norm": 2.0844547748565674, "learning_rate": 2.9552359043169253e-06, "loss": 0.8149, "step": 5094 }, { "epoch": 2.680168332456602, "grad_norm": 2.1256513595581055, "learning_rate": 2.9545528271907532e-06, "loss": 0.8389, "step": 5095 }, { "epoch": 2.6806943713834825, "grad_norm": 2.152540683746338, "learning_rate": 2.9538697149678912e-06, "loss": 0.872, "step": 5096 }, { "epoch": 2.681220410310363, "grad_norm": 2.0682032108306885, "learning_rate": 2.953186567701085e-06, "loss": 0.8931, "step": 5097 }, { "epoch": 2.6817464492372434, "grad_norm": 2.103710889816284, "learning_rate": 2.952503385443081e-06, "loss": 0.8918, "step": 5098 }, { "epoch": 2.682272488164124, "grad_norm": 2.189635992050171, "learning_rate": 2.951820168246629e-06, "loss": 0.8837, "step": 5099 }, { "epoch": 2.6827985270910046, "grad_norm": 1.9977314472198486, "learning_rate": 2.9511369161644803e-06, "loss": 0.8701, "step": 5100 }, { "epoch": 2.6833245660178853, "grad_norm": 2.0539143085479736, "learning_rate": 2.95045362924939e-06, "loss": 0.839, "step": 5101 }, { "epoch": 2.683850604944766, "grad_norm": 2.0580382347106934, "learning_rate": 2.949770307554117e-06, "loss": 0.9505, "step": 5102 }, { "epoch": 2.6843766438716465, "grad_norm": 2.1524507999420166, "learning_rate": 2.94908695113142e-06, "loss": 0.8425, "step": 5103 }, { "epoch": 2.684902682798527, "grad_norm": 2.024958848953247, "learning_rate": 2.9484035600340636e-06, "loss": 0.8396, "step": 5104 }, { "epoch": 2.685428721725408, "grad_norm": 2.134732246398926, "learning_rate": 2.9477201343148114e-06, "loss": 0.8564, "step": 5105 }, { "epoch": 2.6859547606522884, "grad_norm": 2.3130898475646973, "learning_rate": 2.9470366740264335e-06, "loss": 0.8334, "step": 5106 }, { "epoch": 2.6864807995791686, "grad_norm": 2.1652069091796875, "learning_rate": 2.9463531792216994e-06, "loss": 0.8631, "step": 5107 }, { "epoch": 2.6870068385060497, "grad_norm": 2.1992437839508057, "learning_rate": 2.9456696499533834e-06, "loss": 0.8733, "step": 5108 }, { "epoch": 2.68753287743293, "grad_norm": 1.9541934728622437, "learning_rate": 2.9449860862742624e-06, "loss": 0.8247, "step": 5109 }, { "epoch": 2.6880589163598105, "grad_norm": 2.234273672103882, "learning_rate": 2.9443024882371146e-06, "loss": 0.8471, "step": 5110 }, { "epoch": 2.688584955286691, "grad_norm": 2.1924798488616943, "learning_rate": 2.9436188558947217e-06, "loss": 0.8851, "step": 5111 }, { "epoch": 2.689110994213572, "grad_norm": 2.033834934234619, "learning_rate": 2.9429351892998677e-06, "loss": 0.8601, "step": 5112 }, { "epoch": 2.6896370331404524, "grad_norm": 2.3147642612457275, "learning_rate": 2.94225148850534e-06, "loss": 0.9099, "step": 5113 }, { "epoch": 2.690163072067333, "grad_norm": 2.159672498703003, "learning_rate": 2.9415677535639276e-06, "loss": 0.8617, "step": 5114 }, { "epoch": 2.6906891109942137, "grad_norm": 2.153672695159912, "learning_rate": 2.9408839845284236e-06, "loss": 0.8792, "step": 5115 }, { "epoch": 2.691215149921094, "grad_norm": 2.126248836517334, "learning_rate": 2.9402001814516222e-06, "loss": 0.8507, "step": 5116 }, { "epoch": 2.691741188847975, "grad_norm": 2.1768035888671875, "learning_rate": 2.9395163443863207e-06, "loss": 0.861, "step": 5117 }, { "epoch": 2.692267227774855, "grad_norm": 2.080192804336548, "learning_rate": 2.938832473385319e-06, "loss": 0.8487, "step": 5118 }, { "epoch": 2.692793266701736, "grad_norm": 2.2531867027282715, "learning_rate": 2.93814856850142e-06, "loss": 0.8905, "step": 5119 }, { "epoch": 2.6933193056286164, "grad_norm": 2.1240313053131104, "learning_rate": 2.93746462978743e-06, "loss": 0.8337, "step": 5120 }, { "epoch": 2.693845344555497, "grad_norm": 2.1786365509033203, "learning_rate": 2.9367806572961553e-06, "loss": 0.8701, "step": 5121 }, { "epoch": 2.6943713834823777, "grad_norm": 2.063542127609253, "learning_rate": 2.936096651080408e-06, "loss": 0.8615, "step": 5122 }, { "epoch": 2.6948974224092583, "grad_norm": 2.592956066131592, "learning_rate": 2.9354126111929994e-06, "loss": 0.8684, "step": 5123 }, { "epoch": 2.695423461336139, "grad_norm": 2.168936252593994, "learning_rate": 2.934728537686746e-06, "loss": 0.8793, "step": 5124 }, { "epoch": 2.6959495002630196, "grad_norm": 1.976986050605774, "learning_rate": 2.934044430614467e-06, "loss": 0.8237, "step": 5125 }, { "epoch": 2.6964755391899002, "grad_norm": 2.2964651584625244, "learning_rate": 2.933360290028982e-06, "loss": 0.8572, "step": 5126 }, { "epoch": 2.6970015781167804, "grad_norm": 2.1017110347747803, "learning_rate": 2.9326761159831157e-06, "loss": 0.893, "step": 5127 }, { "epoch": 2.6975276170436615, "grad_norm": 2.205183506011963, "learning_rate": 2.9319919085296937e-06, "loss": 0.9078, "step": 5128 }, { "epoch": 2.6980536559705417, "grad_norm": 2.040782928466797, "learning_rate": 2.931307667721544e-06, "loss": 0.8432, "step": 5129 }, { "epoch": 2.6985796948974223, "grad_norm": 2.218319892883301, "learning_rate": 2.9306233936114985e-06, "loss": 0.8844, "step": 5130 }, { "epoch": 2.699105733824303, "grad_norm": 2.1565723419189453, "learning_rate": 2.9299390862523915e-06, "loss": 0.8466, "step": 5131 }, { "epoch": 2.6996317727511836, "grad_norm": 2.0803558826446533, "learning_rate": 2.929254745697058e-06, "loss": 0.8377, "step": 5132 }, { "epoch": 2.7001578116780642, "grad_norm": 2.0779879093170166, "learning_rate": 2.928570371998337e-06, "loss": 0.8137, "step": 5133 }, { "epoch": 2.700683850604945, "grad_norm": 2.0836880207061768, "learning_rate": 2.9278859652090725e-06, "loss": 0.8113, "step": 5134 }, { "epoch": 2.7012098895318255, "grad_norm": 2.140331506729126, "learning_rate": 2.927201525382105e-06, "loss": 0.8525, "step": 5135 }, { "epoch": 2.7017359284587057, "grad_norm": 2.102402687072754, "learning_rate": 2.9265170525702834e-06, "loss": 0.8268, "step": 5136 }, { "epoch": 2.7022619673855868, "grad_norm": 2.2917346954345703, "learning_rate": 2.9258325468264565e-06, "loss": 0.9084, "step": 5137 }, { "epoch": 2.702788006312467, "grad_norm": 2.0138742923736572, "learning_rate": 2.9251480082034754e-06, "loss": 0.8188, "step": 5138 }, { "epoch": 2.7033140452393476, "grad_norm": 2.259744882583618, "learning_rate": 2.9244634367541952e-06, "loss": 0.8509, "step": 5139 }, { "epoch": 2.7038400841662282, "grad_norm": 2.235116481781006, "learning_rate": 2.923778832531472e-06, "loss": 0.8789, "step": 5140 }, { "epoch": 2.704366123093109, "grad_norm": 2.0997676849365234, "learning_rate": 2.9230941955881655e-06, "loss": 0.8729, "step": 5141 }, { "epoch": 2.7048921620199895, "grad_norm": 2.160753011703491, "learning_rate": 2.9224095259771367e-06, "loss": 0.8469, "step": 5142 }, { "epoch": 2.70541820094687, "grad_norm": 2.389517307281494, "learning_rate": 2.9217248237512503e-06, "loss": 0.7855, "step": 5143 }, { "epoch": 2.7059442398737508, "grad_norm": 2.060338258743286, "learning_rate": 2.921040088963374e-06, "loss": 0.8534, "step": 5144 }, { "epoch": 2.7064702788006314, "grad_norm": 2.0122106075286865, "learning_rate": 2.920355321666376e-06, "loss": 0.8554, "step": 5145 }, { "epoch": 2.706996317727512, "grad_norm": 2.1708900928497314, "learning_rate": 2.9196705219131293e-06, "loss": 0.9477, "step": 5146 }, { "epoch": 2.7075223566543922, "grad_norm": 2.289604663848877, "learning_rate": 2.918985689756507e-06, "loss": 0.8637, "step": 5147 }, { "epoch": 2.708048395581273, "grad_norm": 2.1472413539886475, "learning_rate": 2.9183008252493873e-06, "loss": 0.8571, "step": 5148 }, { "epoch": 2.7085744345081535, "grad_norm": 2.258530855178833, "learning_rate": 2.917615928444649e-06, "loss": 0.8719, "step": 5149 }, { "epoch": 2.709100473435034, "grad_norm": 2.22092604637146, "learning_rate": 2.9169309993951734e-06, "loss": 0.8765, "step": 5150 }, { "epoch": 2.7096265123619148, "grad_norm": 2.5041797161102295, "learning_rate": 2.916246038153846e-06, "loss": 0.8413, "step": 5151 }, { "epoch": 2.7101525512887954, "grad_norm": 2.192054271697998, "learning_rate": 2.9155610447735535e-06, "loss": 0.8907, "step": 5152 }, { "epoch": 2.710678590215676, "grad_norm": 2.2294297218322754, "learning_rate": 2.914876019307184e-06, "loss": 0.8946, "step": 5153 }, { "epoch": 2.7112046291425567, "grad_norm": 2.2202858924865723, "learning_rate": 2.9141909618076304e-06, "loss": 0.8668, "step": 5154 }, { "epoch": 2.7117306680694373, "grad_norm": 2.016706705093384, "learning_rate": 2.9135058723277863e-06, "loss": 0.8333, "step": 5155 }, { "epoch": 2.7122567069963175, "grad_norm": 2.1792125701904297, "learning_rate": 2.9128207509205503e-06, "loss": 0.8724, "step": 5156 }, { "epoch": 2.7127827459231986, "grad_norm": 1.922990083694458, "learning_rate": 2.912135597638819e-06, "loss": 0.8625, "step": 5157 }, { "epoch": 2.7133087848500788, "grad_norm": 2.206533670425415, "learning_rate": 2.911450412535496e-06, "loss": 0.8839, "step": 5158 }, { "epoch": 2.7138348237769594, "grad_norm": 2.019646406173706, "learning_rate": 2.9107651956634854e-06, "loss": 0.8758, "step": 5159 }, { "epoch": 2.71436086270384, "grad_norm": 2.312981128692627, "learning_rate": 2.910079947075692e-06, "loss": 0.9004, "step": 5160 }, { "epoch": 2.7148869016307207, "grad_norm": 2.159842014312744, "learning_rate": 2.9093946668250257e-06, "loss": 0.838, "step": 5161 }, { "epoch": 2.7154129405576013, "grad_norm": 2.031733274459839, "learning_rate": 2.9087093549643987e-06, "loss": 0.8492, "step": 5162 }, { "epoch": 2.715938979484482, "grad_norm": 2.8198091983795166, "learning_rate": 2.908024011546725e-06, "loss": 0.8612, "step": 5163 }, { "epoch": 2.7164650184113626, "grad_norm": 2.2509584426879883, "learning_rate": 2.907338636624921e-06, "loss": 0.9234, "step": 5164 }, { "epoch": 2.7169910573382428, "grad_norm": 2.2310569286346436, "learning_rate": 2.9066532302519042e-06, "loss": 0.8904, "step": 5165 }, { "epoch": 2.717517096265124, "grad_norm": 2.339999198913574, "learning_rate": 2.9059677924805966e-06, "loss": 0.8711, "step": 5166 }, { "epoch": 2.718043135192004, "grad_norm": 2.0996506214141846, "learning_rate": 2.905282323363922e-06, "loss": 0.8471, "step": 5167 }, { "epoch": 2.7185691741188847, "grad_norm": 2.114731788635254, "learning_rate": 2.9045968229548067e-06, "loss": 0.8782, "step": 5168 }, { "epoch": 2.7190952130457653, "grad_norm": 2.101783037185669, "learning_rate": 2.9039112913061777e-06, "loss": 0.8538, "step": 5169 }, { "epoch": 2.719621251972646, "grad_norm": 1.9470703601837158, "learning_rate": 2.9032257284709687e-06, "loss": 0.8279, "step": 5170 }, { "epoch": 2.7201472908995266, "grad_norm": 2.1497299671173096, "learning_rate": 2.902540134502111e-06, "loss": 0.8898, "step": 5171 }, { "epoch": 2.720673329826407, "grad_norm": 2.296560764312744, "learning_rate": 2.9018545094525402e-06, "loss": 0.8597, "step": 5172 }, { "epoch": 2.721199368753288, "grad_norm": 2.2587788105010986, "learning_rate": 2.9011688533751948e-06, "loss": 0.8758, "step": 5173 }, { "epoch": 2.7217254076801685, "grad_norm": 2.053093671798706, "learning_rate": 2.900483166323016e-06, "loss": 0.879, "step": 5174 }, { "epoch": 2.722251446607049, "grad_norm": 2.1514482498168945, "learning_rate": 2.8997974483489456e-06, "loss": 0.8536, "step": 5175 }, { "epoch": 2.7227774855339293, "grad_norm": 3.1339778900146484, "learning_rate": 2.89911169950593e-06, "loss": 0.8535, "step": 5176 }, { "epoch": 2.7233035244608104, "grad_norm": 2.0730018615722656, "learning_rate": 2.8984259198469154e-06, "loss": 0.8358, "step": 5177 }, { "epoch": 2.7238295633876906, "grad_norm": 2.13045072555542, "learning_rate": 2.897740109424853e-06, "loss": 0.8828, "step": 5178 }, { "epoch": 2.724355602314571, "grad_norm": 2.1560544967651367, "learning_rate": 2.897054268292695e-06, "loss": 0.9285, "step": 5179 }, { "epoch": 2.724881641241452, "grad_norm": 2.2103872299194336, "learning_rate": 2.8963683965033964e-06, "loss": 0.8474, "step": 5180 }, { "epoch": 2.7254076801683325, "grad_norm": 2.159815788269043, "learning_rate": 2.895682494109914e-06, "loss": 0.8413, "step": 5181 }, { "epoch": 2.725933719095213, "grad_norm": 2.168814182281494, "learning_rate": 2.894996561165207e-06, "loss": 0.8944, "step": 5182 }, { "epoch": 2.7264597580220937, "grad_norm": 2.1619069576263428, "learning_rate": 2.894310597722238e-06, "loss": 0.882, "step": 5183 }, { "epoch": 2.7269857969489744, "grad_norm": 2.188507318496704, "learning_rate": 2.8936246038339714e-06, "loss": 0.8922, "step": 5184 }, { "epoch": 2.7275118358758546, "grad_norm": 2.3349661827087402, "learning_rate": 2.8929385795533727e-06, "loss": 0.8887, "step": 5185 }, { "epoch": 2.7280378748027356, "grad_norm": 2.04679799079895, "learning_rate": 2.8922525249334116e-06, "loss": 0.8246, "step": 5186 }, { "epoch": 2.728563913729616, "grad_norm": 2.0342459678649902, "learning_rate": 2.8915664400270595e-06, "loss": 0.864, "step": 5187 }, { "epoch": 2.7290899526564965, "grad_norm": 2.1347031593322754, "learning_rate": 2.89088032488729e-06, "loss": 0.8108, "step": 5188 }, { "epoch": 2.729615991583377, "grad_norm": 1.9798531532287598, "learning_rate": 2.8901941795670784e-06, "loss": 0.8466, "step": 5189 }, { "epoch": 2.7301420305102577, "grad_norm": 2.371927499771118, "learning_rate": 2.8895080041194035e-06, "loss": 0.8797, "step": 5190 }, { "epoch": 2.7306680694371384, "grad_norm": 2.4939346313476562, "learning_rate": 2.888821798597246e-06, "loss": 0.9124, "step": 5191 }, { "epoch": 2.731194108364019, "grad_norm": 2.1278979778289795, "learning_rate": 2.8881355630535883e-06, "loss": 0.9385, "step": 5192 }, { "epoch": 2.7317201472908996, "grad_norm": 2.082120895385742, "learning_rate": 2.887449297541416e-06, "loss": 0.8806, "step": 5193 }, { "epoch": 2.7322461862177803, "grad_norm": 2.098106622695923, "learning_rate": 2.886763002113717e-06, "loss": 0.8844, "step": 5194 }, { "epoch": 2.732772225144661, "grad_norm": 2.030982494354248, "learning_rate": 2.8860766768234815e-06, "loss": 0.8752, "step": 5195 }, { "epoch": 2.733298264071541, "grad_norm": 2.1818740367889404, "learning_rate": 2.8853903217237e-06, "loss": 0.9106, "step": 5196 }, { "epoch": 2.7338243029984217, "grad_norm": 2.2460193634033203, "learning_rate": 2.8847039368673685e-06, "loss": 0.8802, "step": 5197 }, { "epoch": 2.7343503419253024, "grad_norm": 2.178748846054077, "learning_rate": 2.8840175223074828e-06, "loss": 0.8783, "step": 5198 }, { "epoch": 2.734876380852183, "grad_norm": 2.1220059394836426, "learning_rate": 2.883331078097043e-06, "loss": 0.8742, "step": 5199 }, { "epoch": 2.7354024197790636, "grad_norm": 2.1040196418762207, "learning_rate": 2.8826446042890493e-06, "loss": 0.8141, "step": 5200 }, { "epoch": 2.7359284587059443, "grad_norm": 2.150376319885254, "learning_rate": 2.8819581009365073e-06, "loss": 0.8617, "step": 5201 }, { "epoch": 2.736454497632825, "grad_norm": 3.347476005554199, "learning_rate": 2.8812715680924207e-06, "loss": 0.8534, "step": 5202 }, { "epoch": 2.7369805365597055, "grad_norm": 2.2195258140563965, "learning_rate": 2.8805850058097984e-06, "loss": 0.836, "step": 5203 }, { "epoch": 2.737506575486586, "grad_norm": 2.0441200733184814, "learning_rate": 2.8798984141416507e-06, "loss": 0.8807, "step": 5204 }, { "epoch": 2.7380326144134663, "grad_norm": 2.1395087242126465, "learning_rate": 2.8792117931409914e-06, "loss": 0.8675, "step": 5205 }, { "epoch": 2.7385586533403474, "grad_norm": 2.1000423431396484, "learning_rate": 2.8785251428608353e-06, "loss": 0.8829, "step": 5206 }, { "epoch": 2.7390846922672276, "grad_norm": 2.275969982147217, "learning_rate": 2.8778384633541988e-06, "loss": 0.8668, "step": 5207 }, { "epoch": 2.7396107311941083, "grad_norm": 2.0071678161621094, "learning_rate": 2.8771517546741013e-06, "loss": 0.8614, "step": 5208 }, { "epoch": 2.740136770120989, "grad_norm": 2.1880736351013184, "learning_rate": 2.8764650168735663e-06, "loss": 0.899, "step": 5209 }, { "epoch": 2.7406628090478695, "grad_norm": 2.1351335048675537, "learning_rate": 2.875778250005615e-06, "loss": 0.8632, "step": 5210 }, { "epoch": 2.74118884797475, "grad_norm": 2.0884382724761963, "learning_rate": 2.8750914541232767e-06, "loss": 0.8285, "step": 5211 }, { "epoch": 2.741714886901631, "grad_norm": 2.292912006378174, "learning_rate": 2.874404629279578e-06, "loss": 0.7969, "step": 5212 }, { "epoch": 2.7422409258285114, "grad_norm": 2.0256457328796387, "learning_rate": 2.8737177755275502e-06, "loss": 0.8671, "step": 5213 }, { "epoch": 2.742766964755392, "grad_norm": 2.063638210296631, "learning_rate": 2.8730308929202264e-06, "loss": 0.8511, "step": 5214 }, { "epoch": 2.7432930036822727, "grad_norm": 2.0310566425323486, "learning_rate": 2.872343981510642e-06, "loss": 0.8508, "step": 5215 }, { "epoch": 2.743819042609153, "grad_norm": 2.109600067138672, "learning_rate": 2.871657041351834e-06, "loss": 0.8436, "step": 5216 }, { "epoch": 2.7443450815360335, "grad_norm": 2.178868055343628, "learning_rate": 2.8709700724968416e-06, "loss": 0.911, "step": 5217 }, { "epoch": 2.744871120462914, "grad_norm": 2.049928665161133, "learning_rate": 2.8702830749987074e-06, "loss": 0.846, "step": 5218 }, { "epoch": 2.745397159389795, "grad_norm": 2.1712396144866943, "learning_rate": 2.869596048910476e-06, "loss": 0.8592, "step": 5219 }, { "epoch": 2.7459231983166754, "grad_norm": 2.2142767906188965, "learning_rate": 2.8689089942851926e-06, "loss": 0.9041, "step": 5220 }, { "epoch": 2.746449237243556, "grad_norm": 2.1801950931549072, "learning_rate": 2.868221911175906e-06, "loss": 0.9449, "step": 5221 }, { "epoch": 2.7469752761704367, "grad_norm": 1.9918333292007446, "learning_rate": 2.867534799635667e-06, "loss": 0.8454, "step": 5222 }, { "epoch": 2.7475013150973173, "grad_norm": 2.0739688873291016, "learning_rate": 2.866847659717529e-06, "loss": 0.8804, "step": 5223 }, { "epoch": 2.748027354024198, "grad_norm": 2.0737762451171875, "learning_rate": 2.866160491474546e-06, "loss": 0.8318, "step": 5224 }, { "epoch": 2.748553392951078, "grad_norm": 2.111987352371216, "learning_rate": 2.8654732949597762e-06, "loss": 0.8346, "step": 5225 }, { "epoch": 2.7490794318779592, "grad_norm": 2.0488698482513428, "learning_rate": 2.8647860702262787e-06, "loss": 0.8858, "step": 5226 }, { "epoch": 2.7496054708048394, "grad_norm": 2.0360288619995117, "learning_rate": 2.864098817327115e-06, "loss": 0.8683, "step": 5227 }, { "epoch": 2.75013150973172, "grad_norm": 1.9535809755325317, "learning_rate": 2.8634115363153486e-06, "loss": 0.8094, "step": 5228 }, { "epoch": 2.7506575486586007, "grad_norm": 2.2190473079681396, "learning_rate": 2.862724227244046e-06, "loss": 0.8818, "step": 5229 }, { "epoch": 2.7511835875854813, "grad_norm": 2.1683144569396973, "learning_rate": 2.8620368901662756e-06, "loss": 0.8054, "step": 5230 }, { "epoch": 2.751709626512362, "grad_norm": 2.085204839706421, "learning_rate": 2.861349525135107e-06, "loss": 0.8802, "step": 5231 }, { "epoch": 2.7522356654392426, "grad_norm": 2.1819474697113037, "learning_rate": 2.860662132203613e-06, "loss": 0.8762, "step": 5232 }, { "epoch": 2.752761704366123, "grad_norm": 2.148897886276245, "learning_rate": 2.8599747114248688e-06, "loss": 0.8266, "step": 5233 }, { "epoch": 2.7532877432930034, "grad_norm": 2.249756336212158, "learning_rate": 2.8592872628519504e-06, "loss": 0.8957, "step": 5234 }, { "epoch": 2.7538137822198845, "grad_norm": 2.1569700241088867, "learning_rate": 2.858599786537936e-06, "loss": 0.8587, "step": 5235 }, { "epoch": 2.7543398211467647, "grad_norm": 2.169220209121704, "learning_rate": 2.857912282535908e-06, "loss": 0.8183, "step": 5236 }, { "epoch": 2.7548658600736453, "grad_norm": 2.2666780948638916, "learning_rate": 2.85722475089895e-06, "loss": 0.8632, "step": 5237 }, { "epoch": 2.755391899000526, "grad_norm": 2.1614632606506348, "learning_rate": 2.8565371916801454e-06, "loss": 0.8443, "step": 5238 }, { "epoch": 2.7559179379274066, "grad_norm": 2.275144100189209, "learning_rate": 2.855849604932583e-06, "loss": 0.8757, "step": 5239 }, { "epoch": 2.756443976854287, "grad_norm": 2.644984483718872, "learning_rate": 2.855161990709352e-06, "loss": 0.876, "step": 5240 }, { "epoch": 2.756970015781168, "grad_norm": 2.1550536155700684, "learning_rate": 2.854474349063544e-06, "loss": 0.8316, "step": 5241 }, { "epoch": 2.7574960547080485, "grad_norm": 2.129417657852173, "learning_rate": 2.8537866800482534e-06, "loss": 0.824, "step": 5242 }, { "epoch": 2.758022093634929, "grad_norm": 2.015564203262329, "learning_rate": 2.853098983716575e-06, "loss": 0.8789, "step": 5243 }, { "epoch": 2.7585481325618098, "grad_norm": 2.1063907146453857, "learning_rate": 2.8524112601216087e-06, "loss": 0.9, "step": 5244 }, { "epoch": 2.75907417148869, "grad_norm": 2.0672435760498047, "learning_rate": 2.851723509316453e-06, "loss": 0.8275, "step": 5245 }, { "epoch": 2.759600210415571, "grad_norm": 2.304270029067993, "learning_rate": 2.85103573135421e-06, "loss": 0.8896, "step": 5246 }, { "epoch": 2.760126249342451, "grad_norm": 2.201354742050171, "learning_rate": 2.8503479262879853e-06, "loss": 0.9124, "step": 5247 }, { "epoch": 2.760652288269332, "grad_norm": 2.089156150817871, "learning_rate": 2.849660094170884e-06, "loss": 0.8249, "step": 5248 }, { "epoch": 2.7611783271962125, "grad_norm": 2.2052321434020996, "learning_rate": 2.8489722350560168e-06, "loss": 0.83, "step": 5249 }, { "epoch": 2.761704366123093, "grad_norm": 2.2449488639831543, "learning_rate": 2.848284348996492e-06, "loss": 0.8945, "step": 5250 }, { "epoch": 2.7622304050499737, "grad_norm": 2.080833673477173, "learning_rate": 2.8475964360454233e-06, "loss": 0.8633, "step": 5251 }, { "epoch": 2.7627564439768544, "grad_norm": 2.0980384349823, "learning_rate": 2.846908496255925e-06, "loss": 0.8597, "step": 5252 }, { "epoch": 2.763282482903735, "grad_norm": 2.3277063369750977, "learning_rate": 2.8462205296811146e-06, "loss": 0.8923, "step": 5253 }, { "epoch": 2.763808521830615, "grad_norm": 1.9724496603012085, "learning_rate": 2.8455325363741103e-06, "loss": 0.8161, "step": 5254 }, { "epoch": 2.7643345607574963, "grad_norm": 1.9394125938415527, "learning_rate": 2.844844516388034e-06, "loss": 0.8021, "step": 5255 }, { "epoch": 2.7648605996843765, "grad_norm": 2.432300090789795, "learning_rate": 2.844156469776008e-06, "loss": 0.8321, "step": 5256 }, { "epoch": 2.765386638611257, "grad_norm": 2.2546980381011963, "learning_rate": 2.843468396591157e-06, "loss": 0.8666, "step": 5257 }, { "epoch": 2.7659126775381377, "grad_norm": 2.1843931674957275, "learning_rate": 2.8427802968866087e-06, "loss": 0.8405, "step": 5258 }, { "epoch": 2.7664387164650184, "grad_norm": 2.3195505142211914, "learning_rate": 2.842092170715493e-06, "loss": 0.8539, "step": 5259 }, { "epoch": 2.766964755391899, "grad_norm": 2.011634349822998, "learning_rate": 2.84140401813094e-06, "loss": 0.8486, "step": 5260 }, { "epoch": 2.7674907943187796, "grad_norm": 2.083970308303833, "learning_rate": 2.8407158391860827e-06, "loss": 0.8779, "step": 5261 }, { "epoch": 2.7680168332456603, "grad_norm": 2.17366361618042, "learning_rate": 2.840027633934058e-06, "loss": 0.9122, "step": 5262 }, { "epoch": 2.768542872172541, "grad_norm": 2.0477182865142822, "learning_rate": 2.8393394024280024e-06, "loss": 0.8895, "step": 5263 }, { "epoch": 2.7690689110994215, "grad_norm": 2.1812543869018555, "learning_rate": 2.838651144721055e-06, "loss": 0.8814, "step": 5264 }, { "epoch": 2.7695949500263017, "grad_norm": 2.241117238998413, "learning_rate": 2.837962860866356e-06, "loss": 0.891, "step": 5265 }, { "epoch": 2.7701209889531824, "grad_norm": 2.2945120334625244, "learning_rate": 2.837274550917052e-06, "loss": 0.9053, "step": 5266 }, { "epoch": 2.770647027880063, "grad_norm": 2.0876104831695557, "learning_rate": 2.836586214926285e-06, "loss": 0.8648, "step": 5267 }, { "epoch": 2.7711730668069436, "grad_norm": 2.1584441661834717, "learning_rate": 2.8358978529472053e-06, "loss": 0.8701, "step": 5268 }, { "epoch": 2.7716991057338243, "grad_norm": 2.1225497722625732, "learning_rate": 2.8352094650329605e-06, "loss": 0.8505, "step": 5269 }, { "epoch": 2.772225144660705, "grad_norm": 2.221593141555786, "learning_rate": 2.834521051236703e-06, "loss": 0.8754, "step": 5270 }, { "epoch": 2.7727511835875855, "grad_norm": 2.0214076042175293, "learning_rate": 2.833832611611585e-06, "loss": 0.8392, "step": 5271 }, { "epoch": 2.773277222514466, "grad_norm": 2.1419191360473633, "learning_rate": 2.833144146210763e-06, "loss": 0.8747, "step": 5272 }, { "epoch": 2.773803261441347, "grad_norm": 2.104393482208252, "learning_rate": 2.8324556550873945e-06, "loss": 0.8555, "step": 5273 }, { "epoch": 2.774329300368227, "grad_norm": 2.0118255615234375, "learning_rate": 2.8317671382946376e-06, "loss": 0.8482, "step": 5274 }, { "epoch": 2.774855339295108, "grad_norm": 2.3004519939422607, "learning_rate": 2.831078595885656e-06, "loss": 0.8335, "step": 5275 }, { "epoch": 2.7753813782219883, "grad_norm": 2.2494354248046875, "learning_rate": 2.830390027913611e-06, "loss": 0.8577, "step": 5276 }, { "epoch": 2.775907417148869, "grad_norm": 2.0864474773406982, "learning_rate": 2.829701434431669e-06, "loss": 0.8467, "step": 5277 }, { "epoch": 2.7764334560757495, "grad_norm": 2.2120447158813477, "learning_rate": 2.8290128154929964e-06, "loss": 0.8327, "step": 5278 }, { "epoch": 2.77695949500263, "grad_norm": 2.294074058532715, "learning_rate": 2.828324171150763e-06, "loss": 0.8808, "step": 5279 }, { "epoch": 2.777485533929511, "grad_norm": 2.5046465396881104, "learning_rate": 2.8276355014581404e-06, "loss": 0.8675, "step": 5280 }, { "epoch": 2.7780115728563914, "grad_norm": 2.031752824783325, "learning_rate": 2.826946806468302e-06, "loss": 0.8336, "step": 5281 }, { "epoch": 2.778537611783272, "grad_norm": 1.954582691192627, "learning_rate": 2.826258086234421e-06, "loss": 0.8594, "step": 5282 }, { "epoch": 2.7790636507101527, "grad_norm": 2.0650768280029297, "learning_rate": 2.8255693408096774e-06, "loss": 0.8819, "step": 5283 }, { "epoch": 2.7795896896370333, "grad_norm": 2.2039341926574707, "learning_rate": 2.8248805702472484e-06, "loss": 0.8578, "step": 5284 }, { "epoch": 2.7801157285639135, "grad_norm": 2.084015130996704, "learning_rate": 2.8241917746003143e-06, "loss": 0.8659, "step": 5285 }, { "epoch": 2.780641767490794, "grad_norm": 2.391941547393799, "learning_rate": 2.8235029539220603e-06, "loss": 0.8481, "step": 5286 }, { "epoch": 2.781167806417675, "grad_norm": 2.067767381668091, "learning_rate": 2.8228141082656696e-06, "loss": 0.8211, "step": 5287 }, { "epoch": 2.7816938453445554, "grad_norm": 2.178492546081543, "learning_rate": 2.8221252376843294e-06, "loss": 0.786, "step": 5288 }, { "epoch": 2.782219884271436, "grad_norm": 2.002418279647827, "learning_rate": 2.8214363422312287e-06, "loss": 0.8198, "step": 5289 }, { "epoch": 2.7827459231983167, "grad_norm": 1.9593043327331543, "learning_rate": 2.8207474219595565e-06, "loss": 0.8458, "step": 5290 }, { "epoch": 2.7832719621251973, "grad_norm": 2.442033529281616, "learning_rate": 2.8200584769225083e-06, "loss": 0.8548, "step": 5291 }, { "epoch": 2.783798001052078, "grad_norm": 2.1798665523529053, "learning_rate": 2.819369507173276e-06, "loss": 0.8433, "step": 5292 }, { "epoch": 2.7843240399789586, "grad_norm": 2.018627643585205, "learning_rate": 2.8186805127650574e-06, "loss": 0.8003, "step": 5293 }, { "epoch": 2.784850078905839, "grad_norm": 2.350266218185425, "learning_rate": 2.81799149375105e-06, "loss": 0.8595, "step": 5294 }, { "epoch": 2.78537611783272, "grad_norm": 2.188613176345825, "learning_rate": 2.8173024501844544e-06, "loss": 0.8624, "step": 5295 }, { "epoch": 2.7859021567596, "grad_norm": 2.1122357845306396, "learning_rate": 2.8166133821184714e-06, "loss": 0.9251, "step": 5296 }, { "epoch": 2.7864281956864807, "grad_norm": 2.2160654067993164, "learning_rate": 2.815924289606307e-06, "loss": 0.8284, "step": 5297 }, { "epoch": 2.7869542346133613, "grad_norm": 2.2467801570892334, "learning_rate": 2.815235172701166e-06, "loss": 0.8764, "step": 5298 }, { "epoch": 2.787480273540242, "grad_norm": 2.138960123062134, "learning_rate": 2.8145460314562565e-06, "loss": 0.8588, "step": 5299 }, { "epoch": 2.7880063124671226, "grad_norm": 2.109307289123535, "learning_rate": 2.8138568659247876e-06, "loss": 0.855, "step": 5300 }, { "epoch": 2.7885323513940032, "grad_norm": 2.5954320430755615, "learning_rate": 2.8131676761599707e-06, "loss": 0.8671, "step": 5301 }, { "epoch": 2.789058390320884, "grad_norm": 2.022606134414673, "learning_rate": 2.81247846221502e-06, "loss": 0.8356, "step": 5302 }, { "epoch": 2.789584429247764, "grad_norm": 2.1255509853363037, "learning_rate": 2.811789224143149e-06, "loss": 0.8952, "step": 5303 }, { "epoch": 2.790110468174645, "grad_norm": 2.069589376449585, "learning_rate": 2.8110999619975764e-06, "loss": 0.8306, "step": 5304 }, { "epoch": 2.7906365071015253, "grad_norm": 2.103966236114502, "learning_rate": 2.8104106758315213e-06, "loss": 0.8637, "step": 5305 }, { "epoch": 2.791162546028406, "grad_norm": 2.103513240814209, "learning_rate": 2.8097213656982036e-06, "loss": 0.8481, "step": 5306 }, { "epoch": 2.7916885849552866, "grad_norm": 2.0879805088043213, "learning_rate": 2.8090320316508457e-06, "loss": 0.8702, "step": 5307 }, { "epoch": 2.7922146238821672, "grad_norm": 2.028095006942749, "learning_rate": 2.808342673742673e-06, "loss": 0.8733, "step": 5308 }, { "epoch": 2.792740662809048, "grad_norm": 2.2439136505126953, "learning_rate": 2.8076532920269117e-06, "loss": 0.8587, "step": 5309 }, { "epoch": 2.7932667017359285, "grad_norm": 2.1774730682373047, "learning_rate": 2.8069638865567887e-06, "loss": 0.7907, "step": 5310 }, { "epoch": 2.793792740662809, "grad_norm": 2.0717687606811523, "learning_rate": 2.806274457385536e-06, "loss": 0.9092, "step": 5311 }, { "epoch": 2.7943187795896898, "grad_norm": 2.1468582153320312, "learning_rate": 2.805585004566384e-06, "loss": 0.8452, "step": 5312 }, { "epoch": 2.7948448185165704, "grad_norm": 2.262733221054077, "learning_rate": 2.804895528152567e-06, "loss": 0.8423, "step": 5313 }, { "epoch": 2.7953708574434506, "grad_norm": 2.130955457687378, "learning_rate": 2.8042060281973197e-06, "loss": 0.8284, "step": 5314 }, { "epoch": 2.7958968963703317, "grad_norm": 2.129607915878296, "learning_rate": 2.8035165047538806e-06, "loss": 0.8817, "step": 5315 }, { "epoch": 2.796422935297212, "grad_norm": 2.0715420246124268, "learning_rate": 2.802826957875488e-06, "loss": 0.8241, "step": 5316 }, { "epoch": 2.7969489742240925, "grad_norm": 2.1744885444641113, "learning_rate": 2.8021373876153834e-06, "loss": 0.8859, "step": 5317 }, { "epoch": 2.797475013150973, "grad_norm": 2.2784790992736816, "learning_rate": 2.8014477940268085e-06, "loss": 0.8639, "step": 5318 }, { "epoch": 2.7980010520778538, "grad_norm": 2.6049916744232178, "learning_rate": 2.8007581771630087e-06, "loss": 0.8512, "step": 5319 }, { "epoch": 2.7985270910047344, "grad_norm": 2.1853280067443848, "learning_rate": 2.80006853707723e-06, "loss": 0.8891, "step": 5320 }, { "epoch": 2.799053129931615, "grad_norm": 2.333702802658081, "learning_rate": 2.7993788738227207e-06, "loss": 0.8182, "step": 5321 }, { "epoch": 2.7995791688584957, "grad_norm": 2.255486249923706, "learning_rate": 2.7986891874527305e-06, "loss": 0.8759, "step": 5322 }, { "epoch": 2.800105207785376, "grad_norm": 2.1415271759033203, "learning_rate": 2.7979994780205115e-06, "loss": 0.8675, "step": 5323 }, { "epoch": 2.800631246712257, "grad_norm": 2.1210906505584717, "learning_rate": 2.7973097455793167e-06, "loss": 0.8799, "step": 5324 }, { "epoch": 2.801157285639137, "grad_norm": 2.123589277267456, "learning_rate": 2.796619990182402e-06, "loss": 0.8902, "step": 5325 }, { "epoch": 2.8016833245660178, "grad_norm": 2.1194303035736084, "learning_rate": 2.7959302118830227e-06, "loss": 0.9113, "step": 5326 }, { "epoch": 2.8022093634928984, "grad_norm": 2.114001512527466, "learning_rate": 2.7952404107344404e-06, "loss": 0.8482, "step": 5327 }, { "epoch": 2.802735402419779, "grad_norm": 2.0249998569488525, "learning_rate": 2.7945505867899133e-06, "loss": 0.8324, "step": 5328 }, { "epoch": 2.8032614413466597, "grad_norm": 2.089390277862549, "learning_rate": 2.7938607401027044e-06, "loss": 0.8508, "step": 5329 }, { "epoch": 2.8037874802735403, "grad_norm": 2.1537797451019287, "learning_rate": 2.7931708707260785e-06, "loss": 0.8733, "step": 5330 }, { "epoch": 2.804313519200421, "grad_norm": 2.057251453399658, "learning_rate": 2.7924809787133006e-06, "loss": 0.8009, "step": 5331 }, { "epoch": 2.8048395581273016, "grad_norm": 2.0963268280029297, "learning_rate": 2.7917910641176384e-06, "loss": 0.8732, "step": 5332 }, { "epoch": 2.805365597054182, "grad_norm": 2.1280508041381836, "learning_rate": 2.7911011269923604e-06, "loss": 0.8998, "step": 5333 }, { "epoch": 2.8058916359810624, "grad_norm": 2.0244622230529785, "learning_rate": 2.79041116739074e-06, "loss": 0.8647, "step": 5334 }, { "epoch": 2.806417674907943, "grad_norm": 2.2100234031677246, "learning_rate": 2.7897211853660485e-06, "loss": 0.8624, "step": 5335 }, { "epoch": 2.8069437138348237, "grad_norm": 2.1573469638824463, "learning_rate": 2.78903118097156e-06, "loss": 0.844, "step": 5336 }, { "epoch": 2.8074697527617043, "grad_norm": 2.0012574195861816, "learning_rate": 2.7883411542605503e-06, "loss": 0.8499, "step": 5337 }, { "epoch": 2.807995791688585, "grad_norm": 2.0572893619537354, "learning_rate": 2.7876511052862997e-06, "loss": 0.8169, "step": 5338 }, { "epoch": 2.8085218306154656, "grad_norm": 2.2022910118103027, "learning_rate": 2.7869610341020857e-06, "loss": 0.8737, "step": 5339 }, { "epoch": 2.809047869542346, "grad_norm": 2.1151015758514404, "learning_rate": 2.7862709407611903e-06, "loss": 0.8787, "step": 5340 }, { "epoch": 2.809573908469227, "grad_norm": 2.0495872497558594, "learning_rate": 2.785580825316898e-06, "loss": 0.8482, "step": 5341 }, { "epoch": 2.8100999473961075, "grad_norm": 2.2020013332366943, "learning_rate": 2.784890687822492e-06, "loss": 0.8417, "step": 5342 }, { "epoch": 2.8106259863229877, "grad_norm": 2.129423141479492, "learning_rate": 2.784200528331259e-06, "loss": 0.8223, "step": 5343 }, { "epoch": 2.8111520252498687, "grad_norm": 2.167365550994873, "learning_rate": 2.7835103468964867e-06, "loss": 0.8688, "step": 5344 }, { "epoch": 2.811678064176749, "grad_norm": 2.2323029041290283, "learning_rate": 2.782820143571467e-06, "loss": 0.8932, "step": 5345 }, { "epoch": 2.8122041031036296, "grad_norm": 2.1835832595825195, "learning_rate": 2.7821299184094895e-06, "loss": 0.8796, "step": 5346 }, { "epoch": 2.81273014203051, "grad_norm": 2.0453941822052, "learning_rate": 2.781439671463849e-06, "loss": 0.8426, "step": 5347 }, { "epoch": 2.813256180957391, "grad_norm": 2.0816233158111572, "learning_rate": 2.78074940278784e-06, "loss": 0.7997, "step": 5348 }, { "epoch": 2.8137822198842715, "grad_norm": 2.1493337154388428, "learning_rate": 2.780059112434758e-06, "loss": 0.8535, "step": 5349 }, { "epoch": 2.814308258811152, "grad_norm": 2.0851123332977295, "learning_rate": 2.779368800457902e-06, "loss": 0.8628, "step": 5350 }, { "epoch": 2.8148342977380327, "grad_norm": 2.0202455520629883, "learning_rate": 2.7786784669105733e-06, "loss": 0.8586, "step": 5351 }, { "epoch": 2.8153603366649134, "grad_norm": 2.252342700958252, "learning_rate": 2.777988111846072e-06, "loss": 0.9041, "step": 5352 }, { "epoch": 2.815886375591794, "grad_norm": 2.1639673709869385, "learning_rate": 2.777297735317702e-06, "loss": 0.8408, "step": 5353 }, { "epoch": 2.816412414518674, "grad_norm": 2.1529381275177, "learning_rate": 2.776607337378769e-06, "loss": 0.8634, "step": 5354 }, { "epoch": 2.816938453445555, "grad_norm": 2.1796717643737793, "learning_rate": 2.7759169180825774e-06, "loss": 0.8339, "step": 5355 }, { "epoch": 2.8174644923724355, "grad_norm": 2.054490327835083, "learning_rate": 2.775226477482438e-06, "loss": 0.8676, "step": 5356 }, { "epoch": 2.817990531299316, "grad_norm": 2.1466805934906006, "learning_rate": 2.7745360156316593e-06, "loss": 0.8333, "step": 5357 }, { "epoch": 2.8185165702261967, "grad_norm": 2.1574718952178955, "learning_rate": 2.773845532583553e-06, "loss": 0.8718, "step": 5358 }, { "epoch": 2.8190426091530774, "grad_norm": 2.317553997039795, "learning_rate": 2.7731550283914333e-06, "loss": 0.8827, "step": 5359 }, { "epoch": 2.819568648079958, "grad_norm": 2.229602098464966, "learning_rate": 2.7724645031086144e-06, "loss": 0.8519, "step": 5360 }, { "epoch": 2.8200946870068386, "grad_norm": 2.2534587383270264, "learning_rate": 2.7717739567884118e-06, "loss": 0.863, "step": 5361 }, { "epoch": 2.8206207259337193, "grad_norm": 2.0465610027313232, "learning_rate": 2.7710833894841444e-06, "loss": 0.8388, "step": 5362 }, { "epoch": 2.8211467648605995, "grad_norm": 2.142087459564209, "learning_rate": 2.7703928012491325e-06, "loss": 0.8309, "step": 5363 }, { "epoch": 2.8216728037874805, "grad_norm": 2.2084968090057373, "learning_rate": 2.769702192136697e-06, "loss": 0.8605, "step": 5364 }, { "epoch": 2.8221988427143607, "grad_norm": 2.127042293548584, "learning_rate": 2.7690115622001605e-06, "loss": 0.8482, "step": 5365 }, { "epoch": 2.8227248816412414, "grad_norm": 1.9140255451202393, "learning_rate": 2.7683209114928478e-06, "loss": 0.8721, "step": 5366 }, { "epoch": 2.823250920568122, "grad_norm": 2.391310453414917, "learning_rate": 2.767630240068085e-06, "loss": 0.8176, "step": 5367 }, { "epoch": 2.8237769594950026, "grad_norm": 2.1941120624542236, "learning_rate": 2.7669395479791994e-06, "loss": 0.8648, "step": 5368 }, { "epoch": 2.8243029984218833, "grad_norm": 2.3753623962402344, "learning_rate": 2.766248835279521e-06, "loss": 0.872, "step": 5369 }, { "epoch": 2.824829037348764, "grad_norm": 1.9920499324798584, "learning_rate": 2.765558102022381e-06, "loss": 0.8079, "step": 5370 }, { "epoch": 2.8253550762756445, "grad_norm": 2.2767977714538574, "learning_rate": 2.764867348261111e-06, "loss": 0.7868, "step": 5371 }, { "epoch": 2.8258811152025247, "grad_norm": 2.1764142513275146, "learning_rate": 2.7641765740490467e-06, "loss": 0.8649, "step": 5372 }, { "epoch": 2.826407154129406, "grad_norm": 2.0671937465667725, "learning_rate": 2.763485779439522e-06, "loss": 0.8442, "step": 5373 }, { "epoch": 2.826933193056286, "grad_norm": 2.023116111755371, "learning_rate": 2.762794964485874e-06, "loss": 0.9023, "step": 5374 }, { "epoch": 2.8274592319831666, "grad_norm": 2.1761550903320312, "learning_rate": 2.762104129241443e-06, "loss": 0.8679, "step": 5375 }, { "epoch": 2.8279852709100473, "grad_norm": 2.348499298095703, "learning_rate": 2.761413273759569e-06, "loss": 0.8918, "step": 5376 }, { "epoch": 2.828511309836928, "grad_norm": 2.194767713546753, "learning_rate": 2.7607223980935936e-06, "loss": 0.9192, "step": 5377 }, { "epoch": 2.8290373487638085, "grad_norm": 2.2609801292419434, "learning_rate": 2.760031502296861e-06, "loss": 0.8319, "step": 5378 }, { "epoch": 2.829563387690689, "grad_norm": 2.070990800857544, "learning_rate": 2.7593405864227152e-06, "loss": 0.8522, "step": 5379 }, { "epoch": 2.83008942661757, "grad_norm": 2.081516981124878, "learning_rate": 2.758649650524503e-06, "loss": 0.8407, "step": 5380 }, { "epoch": 2.8306154655444504, "grad_norm": 2.0562057495117188, "learning_rate": 2.757958694655574e-06, "loss": 0.8478, "step": 5381 }, { "epoch": 2.831141504471331, "grad_norm": 2.1489832401275635, "learning_rate": 2.7572677188692755e-06, "loss": 0.8547, "step": 5382 }, { "epoch": 2.8316675433982113, "grad_norm": 1.9609392881393433, "learning_rate": 2.7565767232189605e-06, "loss": 0.8289, "step": 5383 }, { "epoch": 2.8321935823250923, "grad_norm": 2.256464958190918, "learning_rate": 2.755885707757982e-06, "loss": 0.8541, "step": 5384 }, { "epoch": 2.8327196212519725, "grad_norm": 2.0667736530303955, "learning_rate": 2.755194672539694e-06, "loss": 0.8538, "step": 5385 }, { "epoch": 2.833245660178853, "grad_norm": 2.154191732406616, "learning_rate": 2.7545036176174505e-06, "loss": 0.8679, "step": 5386 }, { "epoch": 2.833771699105734, "grad_norm": 2.1617021560668945, "learning_rate": 2.753812543044611e-06, "loss": 0.8737, "step": 5387 }, { "epoch": 2.8342977380326144, "grad_norm": 2.225125789642334, "learning_rate": 2.7531214488745347e-06, "loss": 0.8412, "step": 5388 }, { "epoch": 2.834823776959495, "grad_norm": 2.1514060497283936, "learning_rate": 2.7524303351605802e-06, "loss": 0.9023, "step": 5389 }, { "epoch": 2.8353498158863757, "grad_norm": 2.1900529861450195, "learning_rate": 2.7517392019561105e-06, "loss": 0.9267, "step": 5390 }, { "epoch": 2.8358758548132563, "grad_norm": 2.0870845317840576, "learning_rate": 2.751048049314489e-06, "loss": 0.8419, "step": 5391 }, { "epoch": 2.8364018937401365, "grad_norm": 2.0744030475616455, "learning_rate": 2.7503568772890798e-06, "loss": 0.8446, "step": 5392 }, { "epoch": 2.8369279326670176, "grad_norm": 2.026177167892456, "learning_rate": 2.7496656859332504e-06, "loss": 0.8268, "step": 5393 }, { "epoch": 2.837453971593898, "grad_norm": 2.1333305835723877, "learning_rate": 2.7489744753003677e-06, "loss": 0.8233, "step": 5394 }, { "epoch": 2.8379800105207784, "grad_norm": 2.1306352615356445, "learning_rate": 2.7482832454438026e-06, "loss": 0.8812, "step": 5395 }, { "epoch": 2.838506049447659, "grad_norm": 2.2063076496124268, "learning_rate": 2.747591996416924e-06, "loss": 0.8355, "step": 5396 }, { "epoch": 2.8390320883745397, "grad_norm": 2.006971597671509, "learning_rate": 2.7469007282731055e-06, "loss": 0.838, "step": 5397 }, { "epoch": 2.8395581273014203, "grad_norm": 2.162226915359497, "learning_rate": 2.7462094410657203e-06, "loss": 0.8704, "step": 5398 }, { "epoch": 2.840084166228301, "grad_norm": 2.070786476135254, "learning_rate": 2.745518134848144e-06, "loss": 0.8637, "step": 5399 }, { "epoch": 2.8406102051551816, "grad_norm": 2.0638136863708496, "learning_rate": 2.7448268096737534e-06, "loss": 0.8973, "step": 5400 }, { "epoch": 2.8411362440820622, "grad_norm": 2.134835958480835, "learning_rate": 2.744135465595927e-06, "loss": 0.9224, "step": 5401 }, { "epoch": 2.841662283008943, "grad_norm": 2.1903038024902344, "learning_rate": 2.7434441026680443e-06, "loss": 0.8589, "step": 5402 }, { "epoch": 2.842188321935823, "grad_norm": 2.039916515350342, "learning_rate": 2.742752720943486e-06, "loss": 0.8172, "step": 5403 }, { "epoch": 2.8427143608627037, "grad_norm": 2.346670389175415, "learning_rate": 2.742061320475635e-06, "loss": 0.8433, "step": 5404 }, { "epoch": 2.8432403997895843, "grad_norm": 2.188767671585083, "learning_rate": 2.741369901317875e-06, "loss": 0.8706, "step": 5405 }, { "epoch": 2.843766438716465, "grad_norm": 2.1683034896850586, "learning_rate": 2.7406784635235928e-06, "loss": 0.9149, "step": 5406 }, { "epoch": 2.8442924776433456, "grad_norm": 2.1542139053344727, "learning_rate": 2.7399870071461738e-06, "loss": 0.8992, "step": 5407 }, { "epoch": 2.844818516570226, "grad_norm": 2.14772891998291, "learning_rate": 2.739295532239007e-06, "loss": 0.875, "step": 5408 }, { "epoch": 2.845344555497107, "grad_norm": 2.0942463874816895, "learning_rate": 2.7386040388554825e-06, "loss": 0.8123, "step": 5409 }, { "epoch": 2.8458705944239875, "grad_norm": 2.295013427734375, "learning_rate": 2.73791252704899e-06, "loss": 0.864, "step": 5410 }, { "epoch": 2.846396633350868, "grad_norm": 2.2136921882629395, "learning_rate": 2.737220996872924e-06, "loss": 0.924, "step": 5411 }, { "epoch": 2.8469226722777483, "grad_norm": 2.0614030361175537, "learning_rate": 2.7365294483806783e-06, "loss": 0.8539, "step": 5412 }, { "epoch": 2.8474487112046294, "grad_norm": 2.233582019805908, "learning_rate": 2.7358378816256476e-06, "loss": 0.8993, "step": 5413 }, { "epoch": 2.8479747501315096, "grad_norm": 2.3411264419555664, "learning_rate": 2.7351462966612287e-06, "loss": 0.9088, "step": 5414 }, { "epoch": 2.84850078905839, "grad_norm": 2.22340726852417, "learning_rate": 2.7344546935408205e-06, "loss": 0.8976, "step": 5415 }, { "epoch": 2.849026827985271, "grad_norm": 2.0445773601531982, "learning_rate": 2.7337630723178227e-06, "loss": 0.8794, "step": 5416 }, { "epoch": 2.8495528669121515, "grad_norm": 2.196671962738037, "learning_rate": 2.7330714330456358e-06, "loss": 0.9139, "step": 5417 }, { "epoch": 2.850078905839032, "grad_norm": 2.0170178413391113, "learning_rate": 2.732379775777662e-06, "loss": 0.8343, "step": 5418 }, { "epoch": 2.8506049447659128, "grad_norm": 2.2108583450317383, "learning_rate": 2.7316881005673067e-06, "loss": 0.8739, "step": 5419 }, { "epoch": 2.8511309836927934, "grad_norm": 2.305912733078003, "learning_rate": 2.730996407467974e-06, "loss": 0.8834, "step": 5420 }, { "epoch": 2.851657022619674, "grad_norm": 2.1281137466430664, "learning_rate": 2.7303046965330713e-06, "loss": 0.8599, "step": 5421 }, { "epoch": 2.8521830615465547, "grad_norm": 2.0392191410064697, "learning_rate": 2.7296129678160055e-06, "loss": 0.8348, "step": 5422 }, { "epoch": 2.852709100473435, "grad_norm": 2.233207941055298, "learning_rate": 2.7289212213701873e-06, "loss": 0.8621, "step": 5423 }, { "epoch": 2.8532351394003155, "grad_norm": 2.118955612182617, "learning_rate": 2.728229457249025e-06, "loss": 0.8641, "step": 5424 }, { "epoch": 2.853761178327196, "grad_norm": 2.1662497520446777, "learning_rate": 2.7275376755059333e-06, "loss": 0.8467, "step": 5425 }, { "epoch": 2.8542872172540767, "grad_norm": 2.1501529216766357, "learning_rate": 2.726845876194326e-06, "loss": 0.8558, "step": 5426 }, { "epoch": 2.8548132561809574, "grad_norm": 2.195068597793579, "learning_rate": 2.7261540593676163e-06, "loss": 0.9397, "step": 5427 }, { "epoch": 2.855339295107838, "grad_norm": 2.2474489212036133, "learning_rate": 2.72546222507922e-06, "loss": 0.8476, "step": 5428 }, { "epoch": 2.8558653340347187, "grad_norm": 2.242422580718994, "learning_rate": 2.724770373382556e-06, "loss": 0.8253, "step": 5429 }, { "epoch": 2.8563913729615993, "grad_norm": 2.041154623031616, "learning_rate": 2.724078504331042e-06, "loss": 0.8447, "step": 5430 }, { "epoch": 2.85691741188848, "grad_norm": 2.1431264877319336, "learning_rate": 2.7233866179781e-06, "loss": 0.8458, "step": 5431 }, { "epoch": 2.85744345081536, "grad_norm": 2.131866216659546, "learning_rate": 2.7226947143771498e-06, "loss": 0.8971, "step": 5432 }, { "epoch": 2.857969489742241, "grad_norm": 2.057859182357788, "learning_rate": 2.722002793581615e-06, "loss": 0.8423, "step": 5433 }, { "epoch": 2.8584955286691214, "grad_norm": 2.2008755207061768, "learning_rate": 2.72131085564492e-06, "loss": 0.8395, "step": 5434 }, { "epoch": 2.859021567596002, "grad_norm": 2.138016700744629, "learning_rate": 2.7206189006204904e-06, "loss": 0.926, "step": 5435 }, { "epoch": 2.8595476065228826, "grad_norm": 2.056736707687378, "learning_rate": 2.719926928561752e-06, "loss": 0.8634, "step": 5436 }, { "epoch": 2.8600736454497633, "grad_norm": 2.150646924972534, "learning_rate": 2.719234939522134e-06, "loss": 0.8899, "step": 5437 }, { "epoch": 2.860599684376644, "grad_norm": 2.081115961074829, "learning_rate": 2.7185429335550654e-06, "loss": 0.8553, "step": 5438 }, { "epoch": 2.8611257233035245, "grad_norm": 2.122917890548706, "learning_rate": 2.7178509107139783e-06, "loss": 0.8288, "step": 5439 }, { "epoch": 2.861651762230405, "grad_norm": 2.4551570415496826, "learning_rate": 2.717158871052303e-06, "loss": 0.8326, "step": 5440 }, { "epoch": 2.8621778011572854, "grad_norm": 2.469022512435913, "learning_rate": 2.7164668146234737e-06, "loss": 0.852, "step": 5441 }, { "epoch": 2.8627038400841665, "grad_norm": 2.1991848945617676, "learning_rate": 2.715774741480924e-06, "loss": 0.8895, "step": 5442 }, { "epoch": 2.8632298790110466, "grad_norm": 2.1287388801574707, "learning_rate": 2.715082651678092e-06, "loss": 0.9055, "step": 5443 }, { "epoch": 2.8637559179379273, "grad_norm": 2.2542526721954346, "learning_rate": 2.7143905452684134e-06, "loss": 0.9259, "step": 5444 }, { "epoch": 2.864281956864808, "grad_norm": 2.1360507011413574, "learning_rate": 2.7136984223053274e-06, "loss": 0.8804, "step": 5445 }, { "epoch": 2.8648079957916885, "grad_norm": 2.2134244441986084, "learning_rate": 2.7130062828422737e-06, "loss": 0.866, "step": 5446 }, { "epoch": 2.865334034718569, "grad_norm": 1.996119499206543, "learning_rate": 2.712314126932693e-06, "loss": 0.8935, "step": 5447 }, { "epoch": 2.86586007364545, "grad_norm": 2.1010327339172363, "learning_rate": 2.711621954630029e-06, "loss": 0.8721, "step": 5448 }, { "epoch": 2.8663861125723304, "grad_norm": 2.184575080871582, "learning_rate": 2.710929765987723e-06, "loss": 0.9289, "step": 5449 }, { "epoch": 2.866912151499211, "grad_norm": 2.200265407562256, "learning_rate": 2.7102375610592213e-06, "loss": 0.8197, "step": 5450 }, { "epoch": 2.8674381904260917, "grad_norm": 2.0980114936828613, "learning_rate": 2.709545339897971e-06, "loss": 0.8259, "step": 5451 }, { "epoch": 2.867964229352972, "grad_norm": 2.0895886421203613, "learning_rate": 2.7088531025574183e-06, "loss": 0.8201, "step": 5452 }, { "epoch": 2.868490268279853, "grad_norm": 2.1912758350372314, "learning_rate": 2.708160849091011e-06, "loss": 0.885, "step": 5453 }, { "epoch": 2.869016307206733, "grad_norm": 2.1283578872680664, "learning_rate": 2.7074685795522006e-06, "loss": 0.8634, "step": 5454 }, { "epoch": 2.869542346133614, "grad_norm": 2.156942844390869, "learning_rate": 2.7067762939944374e-06, "loss": 0.8644, "step": 5455 }, { "epoch": 2.8700683850604944, "grad_norm": 2.1844921112060547, "learning_rate": 2.706083992471175e-06, "loss": 0.8888, "step": 5456 }, { "epoch": 2.870594423987375, "grad_norm": 2.1616597175598145, "learning_rate": 2.7053916750358654e-06, "loss": 0.8436, "step": 5457 }, { "epoch": 2.8711204629142557, "grad_norm": 2.204432487487793, "learning_rate": 2.704699341741964e-06, "loss": 0.8833, "step": 5458 }, { "epoch": 2.8716465018411363, "grad_norm": 1.9942139387130737, "learning_rate": 2.704006992642928e-06, "loss": 0.8965, "step": 5459 }, { "epoch": 2.872172540768017, "grad_norm": 2.1086528301239014, "learning_rate": 2.703314627792213e-06, "loss": 0.8393, "step": 5460 }, { "epoch": 2.872698579694897, "grad_norm": 2.311593532562256, "learning_rate": 2.7026222472432773e-06, "loss": 0.831, "step": 5461 }, { "epoch": 2.8732246186217782, "grad_norm": 1.9757003784179688, "learning_rate": 2.7019298510495822e-06, "loss": 0.8388, "step": 5462 }, { "epoch": 2.8737506575486584, "grad_norm": 2.0773825645446777, "learning_rate": 2.7012374392645886e-06, "loss": 0.8277, "step": 5463 }, { "epoch": 2.874276696475539, "grad_norm": 2.1253254413604736, "learning_rate": 2.7005450119417576e-06, "loss": 0.8707, "step": 5464 }, { "epoch": 2.8748027354024197, "grad_norm": 2.1837973594665527, "learning_rate": 2.6998525691345524e-06, "loss": 0.8589, "step": 5465 }, { "epoch": 2.8753287743293003, "grad_norm": 2.4215993881225586, "learning_rate": 2.699160110896438e-06, "loss": 0.8539, "step": 5466 }, { "epoch": 2.875854813256181, "grad_norm": 2.0694470405578613, "learning_rate": 2.6984676372808803e-06, "loss": 0.8482, "step": 5467 }, { "epoch": 2.8763808521830616, "grad_norm": 2.1323864459991455, "learning_rate": 2.697775148341346e-06, "loss": 0.8544, "step": 5468 }, { "epoch": 2.8769068911099422, "grad_norm": 2.1615917682647705, "learning_rate": 2.6970826441313038e-06, "loss": 0.8334, "step": 5469 }, { "epoch": 2.877432930036823, "grad_norm": 2.0499861240386963, "learning_rate": 2.696390124704222e-06, "loss": 0.8335, "step": 5470 }, { "epoch": 2.8779589689637035, "grad_norm": 2.226217746734619, "learning_rate": 2.695697590113571e-06, "loss": 0.8885, "step": 5471 }, { "epoch": 2.8784850078905837, "grad_norm": 2.0294673442840576, "learning_rate": 2.6950050404128224e-06, "loss": 0.8642, "step": 5472 }, { "epoch": 2.8790110468174643, "grad_norm": 2.074906349182129, "learning_rate": 2.6943124756554504e-06, "loss": 0.8689, "step": 5473 }, { "epoch": 2.879537085744345, "grad_norm": 2.305142879486084, "learning_rate": 2.6936198958949274e-06, "loss": 0.8737, "step": 5474 }, { "epoch": 2.8800631246712256, "grad_norm": 2.1322145462036133, "learning_rate": 2.692927301184729e-06, "loss": 0.8386, "step": 5475 }, { "epoch": 2.8805891635981062, "grad_norm": 2.0141525268554688, "learning_rate": 2.6922346915783316e-06, "loss": 0.8535, "step": 5476 }, { "epoch": 2.881115202524987, "grad_norm": 2.101836681365967, "learning_rate": 2.691542067129213e-06, "loss": 0.8383, "step": 5477 }, { "epoch": 2.8816412414518675, "grad_norm": 2.093751907348633, "learning_rate": 2.6908494278908504e-06, "loss": 0.8331, "step": 5478 }, { "epoch": 2.882167280378748, "grad_norm": 2.124910593032837, "learning_rate": 2.6901567739167246e-06, "loss": 0.8627, "step": 5479 }, { "epoch": 2.8826933193056288, "grad_norm": 2.074191093444824, "learning_rate": 2.689464105260316e-06, "loss": 0.801, "step": 5480 }, { "epoch": 2.883219358232509, "grad_norm": 2.2795424461364746, "learning_rate": 2.6887714219751064e-06, "loss": 0.8456, "step": 5481 }, { "epoch": 2.88374539715939, "grad_norm": 2.201787233352661, "learning_rate": 2.68807872411458e-06, "loss": 0.8937, "step": 5482 }, { "epoch": 2.8842714360862702, "grad_norm": 2.156515598297119, "learning_rate": 2.6873860117322194e-06, "loss": 0.8545, "step": 5483 }, { "epoch": 2.884797475013151, "grad_norm": 2.004132032394409, "learning_rate": 2.6866932848815112e-06, "loss": 0.8582, "step": 5484 }, { "epoch": 2.8853235139400315, "grad_norm": 2.041616201400757, "learning_rate": 2.6860005436159413e-06, "loss": 0.8644, "step": 5485 }, { "epoch": 2.885849552866912, "grad_norm": 2.154232978820801, "learning_rate": 2.6853077879889973e-06, "loss": 0.9074, "step": 5486 }, { "epoch": 2.8863755917937928, "grad_norm": 2.387528657913208, "learning_rate": 2.684615018054168e-06, "loss": 0.8715, "step": 5487 }, { "epoch": 2.8869016307206734, "grad_norm": 2.173146963119507, "learning_rate": 2.6839222338649434e-06, "loss": 0.8761, "step": 5488 }, { "epoch": 2.887427669647554, "grad_norm": 2.1991143226623535, "learning_rate": 2.6832294354748134e-06, "loss": 0.9385, "step": 5489 }, { "epoch": 2.8879537085744342, "grad_norm": 2.0519092082977295, "learning_rate": 2.6825366229372706e-06, "loss": 0.8152, "step": 5490 }, { "epoch": 2.8884797475013153, "grad_norm": 2.097606658935547, "learning_rate": 2.6818437963058085e-06, "loss": 0.8726, "step": 5491 }, { "epoch": 2.8890057864281955, "grad_norm": 2.3182342052459717, "learning_rate": 2.681150955633921e-06, "loss": 0.8441, "step": 5492 }, { "epoch": 2.889531825355076, "grad_norm": 2.119159460067749, "learning_rate": 2.680458100975103e-06, "loss": 0.88, "step": 5493 }, { "epoch": 2.8900578642819568, "grad_norm": 2.030351161956787, "learning_rate": 2.6797652323828515e-06, "loss": 0.8531, "step": 5494 }, { "epoch": 2.8905839032088374, "grad_norm": 2.080674409866333, "learning_rate": 2.679072349910663e-06, "loss": 0.885, "step": 5495 }, { "epoch": 2.891109942135718, "grad_norm": 2.0554535388946533, "learning_rate": 2.6783794536120367e-06, "loss": 0.8832, "step": 5496 }, { "epoch": 2.8916359810625987, "grad_norm": 2.095919609069824, "learning_rate": 2.6776865435404716e-06, "loss": 0.8742, "step": 5497 }, { "epoch": 2.8921620199894793, "grad_norm": 2.2091104984283447, "learning_rate": 2.6769936197494685e-06, "loss": 0.8867, "step": 5498 }, { "epoch": 2.89268805891636, "grad_norm": 2.0908563137054443, "learning_rate": 2.676300682292529e-06, "loss": 0.8869, "step": 5499 }, { "epoch": 2.8932140978432406, "grad_norm": 2.0515379905700684, "learning_rate": 2.6756077312231564e-06, "loss": 0.9042, "step": 5500 }, { "epoch": 2.8937401367701208, "grad_norm": 2.233567953109741, "learning_rate": 2.674914766594854e-06, "loss": 0.841, "step": 5501 }, { "epoch": 2.894266175697002, "grad_norm": 2.1680727005004883, "learning_rate": 2.6742217884611264e-06, "loss": 0.8536, "step": 5502 }, { "epoch": 2.894792214623882, "grad_norm": 2.1029632091522217, "learning_rate": 2.67352879687548e-06, "loss": 0.8654, "step": 5503 }, { "epoch": 2.8953182535507627, "grad_norm": 2.2548248767852783, "learning_rate": 2.67283579189142e-06, "loss": 0.8549, "step": 5504 }, { "epoch": 2.8958442924776433, "grad_norm": 2.086467981338501, "learning_rate": 2.6721427735624572e-06, "loss": 0.859, "step": 5505 }, { "epoch": 2.896370331404524, "grad_norm": 1.9999185800552368, "learning_rate": 2.6714497419420983e-06, "loss": 0.7951, "step": 5506 }, { "epoch": 2.8968963703314046, "grad_norm": 2.1137852668762207, "learning_rate": 2.6707566970838544e-06, "loss": 0.8649, "step": 5507 }, { "epoch": 2.897422409258285, "grad_norm": 2.2565581798553467, "learning_rate": 2.670063639041236e-06, "loss": 0.8291, "step": 5508 }, { "epoch": 2.897948448185166, "grad_norm": 2.156967878341675, "learning_rate": 2.6693705678677555e-06, "loss": 0.8815, "step": 5509 }, { "epoch": 2.898474487112046, "grad_norm": 2.1462979316711426, "learning_rate": 2.6686774836169256e-06, "loss": 0.8504, "step": 5510 }, { "epoch": 2.899000526038927, "grad_norm": 2.2066869735717773, "learning_rate": 2.6679843863422594e-06, "loss": 0.8886, "step": 5511 }, { "epoch": 2.8995265649658073, "grad_norm": 2.2202091217041016, "learning_rate": 2.6672912760972745e-06, "loss": 0.9103, "step": 5512 }, { "epoch": 2.900052603892688, "grad_norm": 2.1201012134552, "learning_rate": 2.666598152935485e-06, "loss": 0.8241, "step": 5513 }, { "epoch": 2.9005786428195686, "grad_norm": 2.14327335357666, "learning_rate": 2.6659050169104085e-06, "loss": 0.8659, "step": 5514 }, { "epoch": 2.901104681746449, "grad_norm": 2.154996871948242, "learning_rate": 2.6652118680755623e-06, "loss": 0.8722, "step": 5515 }, { "epoch": 2.90163072067333, "grad_norm": 2.3348562717437744, "learning_rate": 2.664518706484467e-06, "loss": 0.8839, "step": 5516 }, { "epoch": 2.9021567596002105, "grad_norm": 2.037233352661133, "learning_rate": 2.663825532190641e-06, "loss": 0.8853, "step": 5517 }, { "epoch": 2.902682798527091, "grad_norm": 2.5984809398651123, "learning_rate": 2.6631323452476067e-06, "loss": 0.8548, "step": 5518 }, { "epoch": 2.9032088374539717, "grad_norm": 2.0724360942840576, "learning_rate": 2.6624391457088846e-06, "loss": 0.8659, "step": 5519 }, { "epoch": 2.9037348763808524, "grad_norm": 2.136324167251587, "learning_rate": 2.6617459336279986e-06, "loss": 0.8624, "step": 5520 }, { "epoch": 2.9042609153077326, "grad_norm": 2.424056053161621, "learning_rate": 2.6610527090584725e-06, "loss": 0.8998, "step": 5521 }, { "epoch": 2.9047869542346136, "grad_norm": 2.2181196212768555, "learning_rate": 2.660359472053832e-06, "loss": 0.8789, "step": 5522 }, { "epoch": 2.905312993161494, "grad_norm": 2.128340244293213, "learning_rate": 2.659666222667601e-06, "loss": 0.8363, "step": 5523 }, { "epoch": 2.9058390320883745, "grad_norm": 2.184786558151245, "learning_rate": 2.6589729609533077e-06, "loss": 0.9366, "step": 5524 }, { "epoch": 2.906365071015255, "grad_norm": 2.0402917861938477, "learning_rate": 2.65827968696448e-06, "loss": 0.867, "step": 5525 }, { "epoch": 2.9068911099421357, "grad_norm": 2.0968940258026123, "learning_rate": 2.6575864007546453e-06, "loss": 0.8685, "step": 5526 }, { "epoch": 2.9074171488690164, "grad_norm": 2.206838846206665, "learning_rate": 2.6568931023773355e-06, "loss": 0.8623, "step": 5527 }, { "epoch": 2.907943187795897, "grad_norm": 2.1626570224761963, "learning_rate": 2.656199791886078e-06, "loss": 0.8768, "step": 5528 }, { "epoch": 2.9084692267227776, "grad_norm": 2.0420351028442383, "learning_rate": 2.655506469334407e-06, "loss": 0.849, "step": 5529 }, { "epoch": 2.908995265649658, "grad_norm": 2.16514253616333, "learning_rate": 2.654813134775854e-06, "loss": 0.8554, "step": 5530 }, { "epoch": 2.909521304576539, "grad_norm": 2.141601800918579, "learning_rate": 2.654119788263953e-06, "loss": 0.8485, "step": 5531 }, { "epoch": 2.910047343503419, "grad_norm": 2.0859155654907227, "learning_rate": 2.6534264298522366e-06, "loss": 0.8506, "step": 5532 }, { "epoch": 2.9105733824302997, "grad_norm": 2.4352457523345947, "learning_rate": 2.6527330595942412e-06, "loss": 0.86, "step": 5533 }, { "epoch": 2.9110994213571804, "grad_norm": 2.1577723026275635, "learning_rate": 2.652039677543504e-06, "loss": 0.9261, "step": 5534 }, { "epoch": 2.911625460284061, "grad_norm": 2.159653663635254, "learning_rate": 2.6513462837535597e-06, "loss": 0.8389, "step": 5535 }, { "epoch": 2.9121514992109416, "grad_norm": 2.2490360736846924, "learning_rate": 2.6506528782779478e-06, "loss": 0.8483, "step": 5536 }, { "epoch": 2.9126775381378223, "grad_norm": 2.0082390308380127, "learning_rate": 2.649959461170207e-06, "loss": 0.8179, "step": 5537 }, { "epoch": 2.913203577064703, "grad_norm": 2.1046338081359863, "learning_rate": 2.649266032483877e-06, "loss": 0.8564, "step": 5538 }, { "epoch": 2.9137296159915835, "grad_norm": 2.0772643089294434, "learning_rate": 2.6485725922724973e-06, "loss": 0.8698, "step": 5539 }, { "epoch": 2.914255654918464, "grad_norm": 2.2684783935546875, "learning_rate": 2.6478791405896114e-06, "loss": 0.8677, "step": 5540 }, { "epoch": 2.9147816938453444, "grad_norm": 2.076192855834961, "learning_rate": 2.6471856774887607e-06, "loss": 0.8049, "step": 5541 }, { "epoch": 2.915307732772225, "grad_norm": 2.21669340133667, "learning_rate": 2.646492203023488e-06, "loss": 0.8055, "step": 5542 }, { "epoch": 2.9158337716991056, "grad_norm": 2.0618045330047607, "learning_rate": 2.6457987172473387e-06, "loss": 0.8481, "step": 5543 }, { "epoch": 2.9163598106259863, "grad_norm": 2.1812984943389893, "learning_rate": 2.6451052202138565e-06, "loss": 0.8869, "step": 5544 }, { "epoch": 2.916885849552867, "grad_norm": 2.2167868614196777, "learning_rate": 2.644411711976588e-06, "loss": 0.9019, "step": 5545 }, { "epoch": 2.9174118884797475, "grad_norm": 2.3041064739227295, "learning_rate": 2.64371819258908e-06, "loss": 0.8914, "step": 5546 }, { "epoch": 2.917937927406628, "grad_norm": 2.20035719871521, "learning_rate": 2.6430246621048807e-06, "loss": 0.9026, "step": 5547 }, { "epoch": 2.918463966333509, "grad_norm": 2.225909948348999, "learning_rate": 2.6423311205775383e-06, "loss": 0.8806, "step": 5548 }, { "epoch": 2.9189900052603894, "grad_norm": 2.165020704269409, "learning_rate": 2.641637568060601e-06, "loss": 0.8764, "step": 5549 }, { "epoch": 2.9195160441872696, "grad_norm": 2.14363956451416, "learning_rate": 2.6409440046076203e-06, "loss": 0.8613, "step": 5550 }, { "epoch": 2.9200420831141507, "grad_norm": 2.069394111633301, "learning_rate": 2.6402504302721465e-06, "loss": 0.8479, "step": 5551 }, { "epoch": 2.920568122041031, "grad_norm": 2.487534999847412, "learning_rate": 2.6395568451077317e-06, "loss": 0.8993, "step": 5552 }, { "epoch": 2.9210941609679115, "grad_norm": 2.0706708431243896, "learning_rate": 2.6388632491679296e-06, "loss": 0.8741, "step": 5553 }, { "epoch": 2.921620199894792, "grad_norm": 1.9984725713729858, "learning_rate": 2.638169642506292e-06, "loss": 0.8637, "step": 5554 }, { "epoch": 2.922146238821673, "grad_norm": 2.198622226715088, "learning_rate": 2.6374760251763746e-06, "loss": 0.86, "step": 5555 }, { "epoch": 2.9226722777485534, "grad_norm": 2.043895959854126, "learning_rate": 2.6367823972317324e-06, "loss": 0.8358, "step": 5556 }, { "epoch": 2.923198316675434, "grad_norm": 2.281885862350464, "learning_rate": 2.636088758725921e-06, "loss": 0.8444, "step": 5557 }, { "epoch": 2.9237243556023147, "grad_norm": 2.2699151039123535, "learning_rate": 2.6353951097124967e-06, "loss": 0.8878, "step": 5558 }, { "epoch": 2.924250394529195, "grad_norm": 2.1686975955963135, "learning_rate": 2.634701450245019e-06, "loss": 0.8426, "step": 5559 }, { "epoch": 2.924776433456076, "grad_norm": 2.115002393722534, "learning_rate": 2.634007780377045e-06, "loss": 0.8686, "step": 5560 }, { "epoch": 2.925302472382956, "grad_norm": 2.110584020614624, "learning_rate": 2.633314100162134e-06, "loss": 0.8842, "step": 5561 }, { "epoch": 2.925828511309837, "grad_norm": 2.2194478511810303, "learning_rate": 2.6326204096538466e-06, "loss": 0.8335, "step": 5562 }, { "epoch": 2.9263545502367174, "grad_norm": 2.08543062210083, "learning_rate": 2.631926708905744e-06, "loss": 0.8651, "step": 5563 }, { "epoch": 2.926880589163598, "grad_norm": 2.207033634185791, "learning_rate": 2.6312329979713863e-06, "loss": 0.8767, "step": 5564 }, { "epoch": 2.9274066280904787, "grad_norm": 2.1411428451538086, "learning_rate": 2.6305392769043362e-06, "loss": 0.9042, "step": 5565 }, { "epoch": 2.9279326670173593, "grad_norm": 2.037659168243408, "learning_rate": 2.629845545758159e-06, "loss": 0.8557, "step": 5566 }, { "epoch": 2.92845870594424, "grad_norm": 2.1294867992401123, "learning_rate": 2.6291518045864173e-06, "loss": 0.8501, "step": 5567 }, { "epoch": 2.9289847448711206, "grad_norm": 2.2772789001464844, "learning_rate": 2.628458053442675e-06, "loss": 0.836, "step": 5568 }, { "epoch": 2.9295107837980012, "grad_norm": 2.089571952819824, "learning_rate": 2.6277642923804988e-06, "loss": 0.8603, "step": 5569 }, { "epoch": 2.9300368227248814, "grad_norm": 2.0076773166656494, "learning_rate": 2.6270705214534548e-06, "loss": 0.841, "step": 5570 }, { "epoch": 2.9305628616517625, "grad_norm": 2.137336492538452, "learning_rate": 2.626376740715109e-06, "loss": 0.8633, "step": 5571 }, { "epoch": 2.9310889005786427, "grad_norm": 2.1234207153320312, "learning_rate": 2.625682950219031e-06, "loss": 0.8745, "step": 5572 }, { "epoch": 2.9316149395055233, "grad_norm": 2.0331742763519287, "learning_rate": 2.624989150018789e-06, "loss": 0.8523, "step": 5573 }, { "epoch": 2.932140978432404, "grad_norm": 2.1377944946289062, "learning_rate": 2.6242953401679514e-06, "loss": 0.8815, "step": 5574 }, { "epoch": 2.9326670173592846, "grad_norm": 2.2025632858276367, "learning_rate": 2.6236015207200887e-06, "loss": 0.8795, "step": 5575 }, { "epoch": 2.9331930562861652, "grad_norm": 2.1662251949310303, "learning_rate": 2.6229076917287723e-06, "loss": 0.8931, "step": 5576 }, { "epoch": 2.933719095213046, "grad_norm": 2.1432271003723145, "learning_rate": 2.6222138532475727e-06, "loss": 0.8428, "step": 5577 }, { "epoch": 2.9342451341399265, "grad_norm": 2.225395679473877, "learning_rate": 2.6215200053300627e-06, "loss": 0.8558, "step": 5578 }, { "epoch": 2.9347711730668067, "grad_norm": 2.3703887462615967, "learning_rate": 2.620826148029816e-06, "loss": 0.8831, "step": 5579 }, { "epoch": 2.9352972119936878, "grad_norm": 2.2526228427886963, "learning_rate": 2.6201322814004054e-06, "loss": 0.8613, "step": 5580 }, { "epoch": 2.935823250920568, "grad_norm": 3.4306552410125732, "learning_rate": 2.619438405495405e-06, "loss": 0.8258, "step": 5581 }, { "epoch": 2.9363492898474486, "grad_norm": 2.1361422538757324, "learning_rate": 2.618744520368391e-06, "loss": 0.8189, "step": 5582 }, { "epoch": 2.936875328774329, "grad_norm": 2.1845200061798096, "learning_rate": 2.6180506260729382e-06, "loss": 0.8852, "step": 5583 }, { "epoch": 2.93740136770121, "grad_norm": 2.1148431301116943, "learning_rate": 2.617356722662625e-06, "loss": 0.8581, "step": 5584 }, { "epoch": 2.9379274066280905, "grad_norm": 1.9638389348983765, "learning_rate": 2.616662810191027e-06, "loss": 0.8373, "step": 5585 }, { "epoch": 2.938453445554971, "grad_norm": 2.095846176147461, "learning_rate": 2.6159688887117235e-06, "loss": 0.8463, "step": 5586 }, { "epoch": 2.9389794844818518, "grad_norm": 2.4141805171966553, "learning_rate": 2.6152749582782916e-06, "loss": 0.862, "step": 5587 }, { "epoch": 2.9395055234087324, "grad_norm": 2.073904037475586, "learning_rate": 2.614581018944312e-06, "loss": 0.7763, "step": 5588 }, { "epoch": 2.940031562335613, "grad_norm": 2.1504011154174805, "learning_rate": 2.613887070763364e-06, "loss": 0.858, "step": 5589 }, { "epoch": 2.940557601262493, "grad_norm": 2.120551109313965, "learning_rate": 2.6131931137890283e-06, "loss": 0.8769, "step": 5590 }, { "epoch": 2.941083640189374, "grad_norm": 2.204704761505127, "learning_rate": 2.6124991480748873e-06, "loss": 0.8793, "step": 5591 }, { "epoch": 2.9416096791162545, "grad_norm": 2.0500478744506836, "learning_rate": 2.611805173674523e-06, "loss": 0.841, "step": 5592 }, { "epoch": 2.942135718043135, "grad_norm": 2.145325183868408, "learning_rate": 2.611111190641517e-06, "loss": 0.8381, "step": 5593 }, { "epoch": 2.9426617569700158, "grad_norm": 2.094003915786743, "learning_rate": 2.610417199029453e-06, "loss": 0.8277, "step": 5594 }, { "epoch": 2.9431877958968964, "grad_norm": 2.2349865436553955, "learning_rate": 2.6097231988919162e-06, "loss": 0.8739, "step": 5595 }, { "epoch": 2.943713834823777, "grad_norm": 2.0360376834869385, "learning_rate": 2.6090291902824903e-06, "loss": 0.8782, "step": 5596 }, { "epoch": 2.9442398737506577, "grad_norm": 2.03725266456604, "learning_rate": 2.608335173254762e-06, "loss": 0.8481, "step": 5597 }, { "epoch": 2.9447659126775383, "grad_norm": 2.04251766204834, "learning_rate": 2.6076411478623165e-06, "loss": 0.838, "step": 5598 }, { "epoch": 2.9452919516044185, "grad_norm": 2.186295509338379, "learning_rate": 2.60694711415874e-06, "loss": 0.8725, "step": 5599 }, { "epoch": 2.9458179905312996, "grad_norm": 2.218148708343506, "learning_rate": 2.60625307219762e-06, "loss": 0.85, "step": 5600 }, { "epoch": 2.9463440294581797, "grad_norm": 2.1883676052093506, "learning_rate": 2.605559022032545e-06, "loss": 0.8752, "step": 5601 }, { "epoch": 2.9468700683850604, "grad_norm": 2.0386106967926025, "learning_rate": 2.6048649637171035e-06, "loss": 0.865, "step": 5602 }, { "epoch": 2.947396107311941, "grad_norm": 2.0840916633605957, "learning_rate": 2.604170897304885e-06, "loss": 0.8269, "step": 5603 }, { "epoch": 2.9479221462388217, "grad_norm": 2.063124418258667, "learning_rate": 2.6034768228494793e-06, "loss": 0.8238, "step": 5604 }, { "epoch": 2.9484481851657023, "grad_norm": 2.255406618118286, "learning_rate": 2.602782740404477e-06, "loss": 0.9309, "step": 5605 }, { "epoch": 2.948974224092583, "grad_norm": 2.0394771099090576, "learning_rate": 2.602088650023468e-06, "loss": 0.8525, "step": 5606 }, { "epoch": 2.9495002630194636, "grad_norm": 2.165799140930176, "learning_rate": 2.6013945517600452e-06, "loss": 0.8814, "step": 5607 }, { "epoch": 2.950026301946344, "grad_norm": 2.2064552307128906, "learning_rate": 2.600700445667801e-06, "loss": 0.8795, "step": 5608 }, { "epoch": 2.950552340873225, "grad_norm": 2.1994264125823975, "learning_rate": 2.6000063318003283e-06, "loss": 0.8789, "step": 5609 }, { "epoch": 2.951078379800105, "grad_norm": 2.234448194503784, "learning_rate": 2.5993122102112207e-06, "loss": 0.8675, "step": 5610 }, { "epoch": 2.9516044187269856, "grad_norm": 2.400737762451172, "learning_rate": 2.5986180809540716e-06, "loss": 0.8221, "step": 5611 }, { "epoch": 2.9521304576538663, "grad_norm": 2.05903959274292, "learning_rate": 2.597923944082476e-06, "loss": 0.8021, "step": 5612 }, { "epoch": 2.952656496580747, "grad_norm": 2.161031723022461, "learning_rate": 2.59722979965003e-06, "loss": 0.8628, "step": 5613 }, { "epoch": 2.9531825355076275, "grad_norm": 2.2087483406066895, "learning_rate": 2.596535647710328e-06, "loss": 0.8856, "step": 5614 }, { "epoch": 2.953708574434508, "grad_norm": 2.301504373550415, "learning_rate": 2.595841488316968e-06, "loss": 0.832, "step": 5615 }, { "epoch": 2.954234613361389, "grad_norm": 2.1061205863952637, "learning_rate": 2.595147321523547e-06, "loss": 0.8452, "step": 5616 }, { "epoch": 2.9547606522882695, "grad_norm": 2.065734386444092, "learning_rate": 2.594453147383662e-06, "loss": 0.8206, "step": 5617 }, { "epoch": 2.95528669121515, "grad_norm": 2.0582311153411865, "learning_rate": 2.5937589659509103e-06, "loss": 0.8359, "step": 5618 }, { "epoch": 2.9558127301420303, "grad_norm": 2.3873939514160156, "learning_rate": 2.5930647772788926e-06, "loss": 0.9165, "step": 5619 }, { "epoch": 2.9563387690689114, "grad_norm": 2.3410468101501465, "learning_rate": 2.5923705814212073e-06, "loss": 0.8411, "step": 5620 }, { "epoch": 2.9568648079957915, "grad_norm": 2.2544775009155273, "learning_rate": 2.591676378431454e-06, "loss": 0.9, "step": 5621 }, { "epoch": 2.957390846922672, "grad_norm": 2.212414026260376, "learning_rate": 2.5909821683632335e-06, "loss": 0.8761, "step": 5622 }, { "epoch": 2.957916885849553, "grad_norm": 2.085202932357788, "learning_rate": 2.590287951270147e-06, "loss": 0.8928, "step": 5623 }, { "epoch": 2.9584429247764334, "grad_norm": 2.196532726287842, "learning_rate": 2.589593727205795e-06, "loss": 0.8424, "step": 5624 }, { "epoch": 2.958968963703314, "grad_norm": 2.159904718399048, "learning_rate": 2.5888994962237808e-06, "loss": 0.8598, "step": 5625 }, { "epoch": 2.9594950026301947, "grad_norm": 2.239759683609009, "learning_rate": 2.5882052583777062e-06, "loss": 0.8885, "step": 5626 }, { "epoch": 2.9600210415570753, "grad_norm": 2.3337960243225098, "learning_rate": 2.587511013721175e-06, "loss": 0.9519, "step": 5627 }, { "epoch": 2.9605470804839555, "grad_norm": 2.1502795219421387, "learning_rate": 2.5868167623077906e-06, "loss": 0.855, "step": 5628 }, { "epoch": 2.9610731194108366, "grad_norm": 2.180718421936035, "learning_rate": 2.586122504191156e-06, "loss": 0.8517, "step": 5629 }, { "epoch": 2.961599158337717, "grad_norm": 2.24312424659729, "learning_rate": 2.5854282394248776e-06, "loss": 0.8784, "step": 5630 }, { "epoch": 2.9621251972645974, "grad_norm": 2.1780917644500732, "learning_rate": 2.5847339680625593e-06, "loss": 0.8659, "step": 5631 }, { "epoch": 2.962651236191478, "grad_norm": 2.3751275539398193, "learning_rate": 2.584039690157808e-06, "loss": 0.8244, "step": 5632 }, { "epoch": 2.9631772751183587, "grad_norm": 2.089909076690674, "learning_rate": 2.583345405764229e-06, "loss": 0.8494, "step": 5633 }, { "epoch": 2.9637033140452393, "grad_norm": 2.136986017227173, "learning_rate": 2.5826511149354295e-06, "loss": 0.8499, "step": 5634 }, { "epoch": 2.96422935297212, "grad_norm": 2.241116762161255, "learning_rate": 2.5819568177250165e-06, "loss": 0.8838, "step": 5635 }, { "epoch": 2.9647553918990006, "grad_norm": 2.2362160682678223, "learning_rate": 2.581262514186597e-06, "loss": 0.8864, "step": 5636 }, { "epoch": 2.9652814308258812, "grad_norm": 2.3444442749023438, "learning_rate": 2.5805682043737806e-06, "loss": 0.8383, "step": 5637 }, { "epoch": 2.965807469752762, "grad_norm": 2.0216920375823975, "learning_rate": 2.5798738883401754e-06, "loss": 0.7971, "step": 5638 }, { "epoch": 2.966333508679642, "grad_norm": 2.058112859725952, "learning_rate": 2.57917956613939e-06, "loss": 0.8344, "step": 5639 }, { "epoch": 2.966859547606523, "grad_norm": 2.165743589401245, "learning_rate": 2.5784852378250347e-06, "loss": 0.8443, "step": 5640 }, { "epoch": 2.9673855865334033, "grad_norm": 2.087465524673462, "learning_rate": 2.5777909034507196e-06, "loss": 0.8803, "step": 5641 }, { "epoch": 2.967911625460284, "grad_norm": 2.2934722900390625, "learning_rate": 2.5770965630700545e-06, "loss": 0.9435, "step": 5642 }, { "epoch": 2.9684376643871646, "grad_norm": 2.184022903442383, "learning_rate": 2.5764022167366514e-06, "loss": 0.8231, "step": 5643 }, { "epoch": 2.9689637033140452, "grad_norm": 2.2172598838806152, "learning_rate": 2.5757078645041213e-06, "loss": 0.8559, "step": 5644 }, { "epoch": 2.969489742240926, "grad_norm": 2.1483957767486572, "learning_rate": 2.575013506426076e-06, "loss": 0.8665, "step": 5645 }, { "epoch": 2.9700157811678065, "grad_norm": 2.123063564300537, "learning_rate": 2.5743191425561282e-06, "loss": 0.8564, "step": 5646 }, { "epoch": 2.970541820094687, "grad_norm": 2.2162530422210693, "learning_rate": 2.5736247729478916e-06, "loss": 0.8645, "step": 5647 }, { "epoch": 2.9710678590215673, "grad_norm": 2.0991568565368652, "learning_rate": 2.572930397654978e-06, "loss": 0.8714, "step": 5648 }, { "epoch": 2.9715938979484484, "grad_norm": 2.2134625911712646, "learning_rate": 2.572236016731001e-06, "loss": 0.8445, "step": 5649 }, { "epoch": 2.9721199368753286, "grad_norm": 2.3663439750671387, "learning_rate": 2.571541630229576e-06, "loss": 0.8849, "step": 5650 }, { "epoch": 2.9726459758022092, "grad_norm": 2.0789709091186523, "learning_rate": 2.570847238204317e-06, "loss": 0.8763, "step": 5651 }, { "epoch": 2.97317201472909, "grad_norm": 2.253239631652832, "learning_rate": 2.5701528407088395e-06, "loss": 0.8721, "step": 5652 }, { "epoch": 2.9736980536559705, "grad_norm": 2.068449020385742, "learning_rate": 2.5694584377967586e-06, "loss": 0.8611, "step": 5653 }, { "epoch": 2.974224092582851, "grad_norm": 2.1228764057159424, "learning_rate": 2.5687640295216894e-06, "loss": 0.8756, "step": 5654 }, { "epoch": 2.9747501315097318, "grad_norm": 2.2742867469787598, "learning_rate": 2.5680696159372496e-06, "loss": 0.8936, "step": 5655 }, { "epoch": 2.9752761704366124, "grad_norm": 2.1135270595550537, "learning_rate": 2.5673751970970546e-06, "loss": 0.8787, "step": 5656 }, { "epoch": 2.975802209363493, "grad_norm": 2.25329327583313, "learning_rate": 2.566680773054722e-06, "loss": 0.868, "step": 5657 }, { "epoch": 2.9763282482903737, "grad_norm": 2.0774147510528564, "learning_rate": 2.56598634386387e-06, "loss": 0.8845, "step": 5658 }, { "epoch": 2.976854287217254, "grad_norm": 2.114530324935913, "learning_rate": 2.565291909578116e-06, "loss": 0.8378, "step": 5659 }, { "epoch": 2.9773803261441345, "grad_norm": 2.07590913772583, "learning_rate": 2.5645974702510774e-06, "loss": 0.8564, "step": 5660 }, { "epoch": 2.977906365071015, "grad_norm": 2.303671360015869, "learning_rate": 2.563903025936374e-06, "loss": 0.9414, "step": 5661 }, { "epoch": 2.9784324039978958, "grad_norm": 2.07218074798584, "learning_rate": 2.563208576687625e-06, "loss": 0.8541, "step": 5662 }, { "epoch": 2.9789584429247764, "grad_norm": 2.16782808303833, "learning_rate": 2.562514122558448e-06, "loss": 0.8693, "step": 5663 }, { "epoch": 2.979484481851657, "grad_norm": 2.1112074851989746, "learning_rate": 2.5618196636024645e-06, "loss": 0.8713, "step": 5664 }, { "epoch": 2.9800105207785377, "grad_norm": 2.254086971282959, "learning_rate": 2.561125199873295e-06, "loss": 0.8639, "step": 5665 }, { "epoch": 2.9805365597054183, "grad_norm": 2.177830696105957, "learning_rate": 2.56043073142456e-06, "loss": 0.8762, "step": 5666 }, { "epoch": 2.981062598632299, "grad_norm": 2.207988739013672, "learning_rate": 2.5597362583098784e-06, "loss": 0.8586, "step": 5667 }, { "epoch": 2.981588637559179, "grad_norm": 2.1314306259155273, "learning_rate": 2.5590417805828734e-06, "loss": 0.8723, "step": 5668 }, { "epoch": 2.98211467648606, "grad_norm": 2.149394989013672, "learning_rate": 2.558347298297166e-06, "loss": 0.8694, "step": 5669 }, { "epoch": 2.9826407154129404, "grad_norm": 2.2965266704559326, "learning_rate": 2.5576528115063793e-06, "loss": 0.8393, "step": 5670 }, { "epoch": 2.983166754339821, "grad_norm": 2.1296353340148926, "learning_rate": 2.556958320264134e-06, "loss": 0.8488, "step": 5671 }, { "epoch": 2.9836927932667017, "grad_norm": 2.01285457611084, "learning_rate": 2.5562638246240533e-06, "loss": 0.8021, "step": 5672 }, { "epoch": 2.9842188321935823, "grad_norm": 2.1777496337890625, "learning_rate": 2.5555693246397604e-06, "loss": 0.8591, "step": 5673 }, { "epoch": 2.984744871120463, "grad_norm": 2.1652703285217285, "learning_rate": 2.554874820364879e-06, "loss": 0.8294, "step": 5674 }, { "epoch": 2.9852709100473436, "grad_norm": 2.170325517654419, "learning_rate": 2.5541803118530324e-06, "loss": 0.8867, "step": 5675 }, { "epoch": 2.985796948974224, "grad_norm": 2.2347910404205322, "learning_rate": 2.5534857991578438e-06, "loss": 0.8383, "step": 5676 }, { "epoch": 2.986322987901105, "grad_norm": 2.264760732650757, "learning_rate": 2.5527912823329397e-06, "loss": 0.8479, "step": 5677 }, { "epoch": 2.9868490268279855, "grad_norm": 2.1393513679504395, "learning_rate": 2.5520967614319433e-06, "loss": 0.8354, "step": 5678 }, { "epoch": 2.9873750657548657, "grad_norm": 2.1253223419189453, "learning_rate": 2.551402236508479e-06, "loss": 0.8609, "step": 5679 }, { "epoch": 2.9879011046817463, "grad_norm": 2.0231034755706787, "learning_rate": 2.5507077076161734e-06, "loss": 0.8791, "step": 5680 }, { "epoch": 2.988427143608627, "grad_norm": 2.1569154262542725, "learning_rate": 2.5500131748086516e-06, "loss": 0.8325, "step": 5681 }, { "epoch": 2.9889531825355076, "grad_norm": 2.0435471534729004, "learning_rate": 2.5493186381395385e-06, "loss": 0.8408, "step": 5682 }, { "epoch": 2.989479221462388, "grad_norm": 2.061220169067383, "learning_rate": 2.548624097662462e-06, "loss": 0.8342, "step": 5683 }, { "epoch": 2.990005260389269, "grad_norm": 2.1771514415740967, "learning_rate": 2.5479295534310484e-06, "loss": 0.8839, "step": 5684 }, { "epoch": 2.9905312993161495, "grad_norm": 2.093118190765381, "learning_rate": 2.5472350054989225e-06, "loss": 0.804, "step": 5685 }, { "epoch": 2.99105733824303, "grad_norm": 2.1935534477233887, "learning_rate": 2.5465404539197132e-06, "loss": 0.847, "step": 5686 }, { "epoch": 2.9915833771699107, "grad_norm": 1.9981772899627686, "learning_rate": 2.545845898747048e-06, "loss": 0.8717, "step": 5687 }, { "epoch": 2.992109416096791, "grad_norm": 2.070040464401245, "learning_rate": 2.545151340034553e-06, "loss": 0.839, "step": 5688 }, { "epoch": 2.992635455023672, "grad_norm": 2.1990272998809814, "learning_rate": 2.5444567778358574e-06, "loss": 0.8895, "step": 5689 }, { "epoch": 2.993161493950552, "grad_norm": 2.15480899810791, "learning_rate": 2.543762212204589e-06, "loss": 0.8744, "step": 5690 }, { "epoch": 2.993687532877433, "grad_norm": 2.3581175804138184, "learning_rate": 2.543067643194377e-06, "loss": 0.8612, "step": 5691 }, { "epoch": 2.9942135718043135, "grad_norm": 2.3500540256500244, "learning_rate": 2.5423730708588477e-06, "loss": 0.8457, "step": 5692 }, { "epoch": 2.994739610731194, "grad_norm": 2.1401665210723877, "learning_rate": 2.541678495251632e-06, "loss": 0.8492, "step": 5693 }, { "epoch": 2.9952656496580747, "grad_norm": 2.225451707839966, "learning_rate": 2.5409839164263587e-06, "loss": 0.8827, "step": 5694 }, { "epoch": 2.9957916885849554, "grad_norm": 2.2112812995910645, "learning_rate": 2.540289334436658e-06, "loss": 0.8589, "step": 5695 }, { "epoch": 2.996317727511836, "grad_norm": 2.278918504714966, "learning_rate": 2.5395947493361587e-06, "loss": 0.8553, "step": 5696 }, { "epoch": 2.996843766438716, "grad_norm": 2.193023443222046, "learning_rate": 2.5389001611784906e-06, "loss": 0.8179, "step": 5697 }, { "epoch": 2.9973698053655973, "grad_norm": 3.0404014587402344, "learning_rate": 2.5382055700172848e-06, "loss": 0.8485, "step": 5698 }, { "epoch": 2.9978958442924775, "grad_norm": 2.1552975177764893, "learning_rate": 2.53751097590617e-06, "loss": 0.8498, "step": 5699 }, { "epoch": 2.998421883219358, "grad_norm": 2.08805775642395, "learning_rate": 2.536816378898778e-06, "loss": 0.8691, "step": 5700 }, { "epoch": 2.9989479221462387, "grad_norm": 2.143832206726074, "learning_rate": 2.53612177904874e-06, "loss": 0.8658, "step": 5701 }, { "epoch": 2.9994739610731194, "grad_norm": 2.043550729751587, "learning_rate": 2.535427176409687e-06, "loss": 0.8247, "step": 5702 }, { "epoch": 3.0, "grad_norm": 2.128239631652832, "learning_rate": 2.534732571035249e-06, "loss": 0.8914, "step": 5703 }, { "epoch": 3.0005260389268806, "grad_norm": 1.942564606666565, "learning_rate": 2.5340379629790583e-06, "loss": 0.8615, "step": 5704 }, { "epoch": 3.0010520778537613, "grad_norm": 2.106269598007202, "learning_rate": 2.5333433522947476e-06, "loss": 0.7921, "step": 5705 }, { "epoch": 3.001578116780642, "grad_norm": 2.0176422595977783, "learning_rate": 2.5326487390359472e-06, "loss": 0.8198, "step": 5706 }, { "epoch": 3.0021041557075225, "grad_norm": 2.380239963531494, "learning_rate": 2.53195412325629e-06, "loss": 0.773, "step": 5707 }, { "epoch": 3.0026301946344027, "grad_norm": 1.9554225206375122, "learning_rate": 2.5312595050094086e-06, "loss": 0.7674, "step": 5708 }, { "epoch": 3.0031562335612834, "grad_norm": 2.1862752437591553, "learning_rate": 2.530564884348935e-06, "loss": 0.8447, "step": 5709 }, { "epoch": 3.003682272488164, "grad_norm": 2.111069440841675, "learning_rate": 2.5298702613285015e-06, "loss": 0.8154, "step": 5710 }, { "epoch": 3.0042083114150446, "grad_norm": 2.053826332092285, "learning_rate": 2.5291756360017415e-06, "loss": 0.773, "step": 5711 }, { "epoch": 3.0047343503419253, "grad_norm": 2.220202922821045, "learning_rate": 2.5284810084222884e-06, "loss": 0.8356, "step": 5712 }, { "epoch": 3.005260389268806, "grad_norm": 2.069816827774048, "learning_rate": 2.5277863786437744e-06, "loss": 0.8645, "step": 5713 }, { "epoch": 3.0057864281956865, "grad_norm": 2.0797791481018066, "learning_rate": 2.5270917467198343e-06, "loss": 0.7809, "step": 5714 }, { "epoch": 3.006312467122567, "grad_norm": 2.147186756134033, "learning_rate": 2.5263971127040997e-06, "loss": 0.7633, "step": 5715 }, { "epoch": 3.006838506049448, "grad_norm": 2.0590076446533203, "learning_rate": 2.5257024766502065e-06, "loss": 0.8007, "step": 5716 }, { "epoch": 3.0073645449763284, "grad_norm": 2.179072618484497, "learning_rate": 2.525007838611787e-06, "loss": 0.8283, "step": 5717 }, { "epoch": 3.0078905839032086, "grad_norm": 2.2251808643341064, "learning_rate": 2.5243131986424753e-06, "loss": 0.833, "step": 5718 }, { "epoch": 3.0084166228300893, "grad_norm": 2.123932361602783, "learning_rate": 2.5236185567959063e-06, "loss": 0.8207, "step": 5719 }, { "epoch": 3.00894266175697, "grad_norm": 2.1033709049224854, "learning_rate": 2.5229239131257148e-06, "loss": 0.7967, "step": 5720 }, { "epoch": 3.0094687006838505, "grad_norm": 1.9164155721664429, "learning_rate": 2.5222292676855336e-06, "loss": 0.7822, "step": 5721 }, { "epoch": 3.009994739610731, "grad_norm": 2.1854350566864014, "learning_rate": 2.5215346205289976e-06, "loss": 0.8271, "step": 5722 }, { "epoch": 3.010520778537612, "grad_norm": 2.1232964992523193, "learning_rate": 2.5208399717097432e-06, "loss": 0.8529, "step": 5723 }, { "epoch": 3.0110468174644924, "grad_norm": 2.104646921157837, "learning_rate": 2.520145321281403e-06, "loss": 0.8195, "step": 5724 }, { "epoch": 3.011572856391373, "grad_norm": 2.255728244781494, "learning_rate": 2.5194506692976138e-06, "loss": 0.8175, "step": 5725 }, { "epoch": 3.0120988953182537, "grad_norm": 2.080730676651001, "learning_rate": 2.51875601581201e-06, "loss": 0.8229, "step": 5726 }, { "epoch": 3.0126249342451343, "grad_norm": 2.1749649047851562, "learning_rate": 2.5180613608782267e-06, "loss": 0.7767, "step": 5727 }, { "epoch": 3.0131509731720145, "grad_norm": 2.147372245788574, "learning_rate": 2.517366704549899e-06, "loss": 0.794, "step": 5728 }, { "epoch": 3.013677012098895, "grad_norm": 2.139737129211426, "learning_rate": 2.5166720468806627e-06, "loss": 0.812, "step": 5729 }, { "epoch": 3.014203051025776, "grad_norm": 2.1527774333953857, "learning_rate": 2.515977387924154e-06, "loss": 0.7652, "step": 5730 }, { "epoch": 3.0147290899526564, "grad_norm": 2.192126750946045, "learning_rate": 2.515282727734007e-06, "loss": 0.8516, "step": 5731 }, { "epoch": 3.015255128879537, "grad_norm": 2.325930595397949, "learning_rate": 2.5145880663638584e-06, "loss": 0.7668, "step": 5732 }, { "epoch": 3.0157811678064177, "grad_norm": 2.186030149459839, "learning_rate": 2.513893403867344e-06, "loss": 0.7973, "step": 5733 }, { "epoch": 3.0163072067332983, "grad_norm": 2.663031578063965, "learning_rate": 2.5131987402980997e-06, "loss": 0.7954, "step": 5734 }, { "epoch": 3.016833245660179, "grad_norm": 2.2261505126953125, "learning_rate": 2.512504075709761e-06, "loss": 0.8407, "step": 5735 }, { "epoch": 3.0173592845870596, "grad_norm": 2.1542792320251465, "learning_rate": 2.5118094101559636e-06, "loss": 0.8134, "step": 5736 }, { "epoch": 3.0178853235139402, "grad_norm": 2.190626621246338, "learning_rate": 2.511114743690346e-06, "loss": 0.7816, "step": 5737 }, { "epoch": 3.0184113624408204, "grad_norm": 2.153644561767578, "learning_rate": 2.5104200763665413e-06, "loss": 0.7761, "step": 5738 }, { "epoch": 3.018937401367701, "grad_norm": 2.161682367324829, "learning_rate": 2.5097254082381883e-06, "loss": 0.8305, "step": 5739 }, { "epoch": 3.0194634402945817, "grad_norm": 2.07537579536438, "learning_rate": 2.509030739358922e-06, "loss": 0.8196, "step": 5740 }, { "epoch": 3.0199894792214623, "grad_norm": 2.156689405441284, "learning_rate": 2.5083360697823794e-06, "loss": 0.8049, "step": 5741 }, { "epoch": 3.020515518148343, "grad_norm": 2.132127046585083, "learning_rate": 2.5076413995621964e-06, "loss": 0.7833, "step": 5742 }, { "epoch": 3.0210415570752236, "grad_norm": 2.083756685256958, "learning_rate": 2.5069467287520096e-06, "loss": 0.7609, "step": 5743 }, { "epoch": 3.0215675960021042, "grad_norm": 2.208649158477783, "learning_rate": 2.5062520574054557e-06, "loss": 0.8409, "step": 5744 }, { "epoch": 3.022093634928985, "grad_norm": 2.2577526569366455, "learning_rate": 2.505557385576172e-06, "loss": 0.8505, "step": 5745 }, { "epoch": 3.0226196738558655, "grad_norm": 2.1935343742370605, "learning_rate": 2.5048627133177933e-06, "loss": 0.7602, "step": 5746 }, { "epoch": 3.023145712782746, "grad_norm": 2.2607522010803223, "learning_rate": 2.5041680406839573e-06, "loss": 0.8066, "step": 5747 }, { "epoch": 3.0236717517096263, "grad_norm": 2.0703189373016357, "learning_rate": 2.5034733677283017e-06, "loss": 0.8064, "step": 5748 }, { "epoch": 3.024197790636507, "grad_norm": 2.1265816688537598, "learning_rate": 2.502778694504462e-06, "loss": 0.7871, "step": 5749 }, { "epoch": 3.0247238295633876, "grad_norm": 2.119349718093872, "learning_rate": 2.502084021066076e-06, "loss": 0.814, "step": 5750 }, { "epoch": 3.0252498684902682, "grad_norm": 2.3099288940429688, "learning_rate": 2.501389347466778e-06, "loss": 0.8024, "step": 5751 }, { "epoch": 3.025775907417149, "grad_norm": 2.2068490982055664, "learning_rate": 2.500694673760208e-06, "loss": 0.8104, "step": 5752 }, { "epoch": 3.0263019463440295, "grad_norm": 2.1348161697387695, "learning_rate": 2.5e-06, "loss": 0.7745, "step": 5753 }, { "epoch": 3.02682798527091, "grad_norm": 2.191687822341919, "learning_rate": 2.4993053262397925e-06, "loss": 0.8355, "step": 5754 }, { "epoch": 3.0273540241977908, "grad_norm": 2.366182327270508, "learning_rate": 2.498610652533222e-06, "loss": 0.8168, "step": 5755 }, { "epoch": 3.0278800631246714, "grad_norm": 2.202754259109497, "learning_rate": 2.497915978933925e-06, "loss": 0.8232, "step": 5756 }, { "epoch": 3.028406102051552, "grad_norm": 2.2198166847229004, "learning_rate": 2.4972213054955385e-06, "loss": 0.8106, "step": 5757 }, { "epoch": 3.028932140978432, "grad_norm": 2.3269569873809814, "learning_rate": 2.496526632271699e-06, "loss": 0.7866, "step": 5758 }, { "epoch": 3.029458179905313, "grad_norm": 2.5298075675964355, "learning_rate": 2.495831959316043e-06, "loss": 0.8756, "step": 5759 }, { "epoch": 3.0299842188321935, "grad_norm": 2.1773805618286133, "learning_rate": 2.4951372866822075e-06, "loss": 0.8116, "step": 5760 }, { "epoch": 3.030510257759074, "grad_norm": 2.1818623542785645, "learning_rate": 2.494442614423829e-06, "loss": 0.7449, "step": 5761 }, { "epoch": 3.0310362966859548, "grad_norm": 2.286184310913086, "learning_rate": 2.493747942594545e-06, "loss": 0.833, "step": 5762 }, { "epoch": 3.0315623356128354, "grad_norm": 2.0997354984283447, "learning_rate": 2.493053271247991e-06, "loss": 0.8355, "step": 5763 }, { "epoch": 3.032088374539716, "grad_norm": 2.2153899669647217, "learning_rate": 2.4923586004378053e-06, "loss": 0.8251, "step": 5764 }, { "epoch": 3.0326144134665967, "grad_norm": 2.10802960395813, "learning_rate": 2.491663930217622e-06, "loss": 0.8127, "step": 5765 }, { "epoch": 3.0331404523934773, "grad_norm": 2.1321182250976562, "learning_rate": 2.490969260641079e-06, "loss": 0.8466, "step": 5766 }, { "epoch": 3.033666491320358, "grad_norm": 2.261615753173828, "learning_rate": 2.490274591761812e-06, "loss": 0.7851, "step": 5767 }, { "epoch": 3.034192530247238, "grad_norm": 2.2502803802490234, "learning_rate": 2.489579923633459e-06, "loss": 0.8143, "step": 5768 }, { "epoch": 3.0347185691741188, "grad_norm": 2.1797590255737305, "learning_rate": 2.488885256309655e-06, "loss": 0.7921, "step": 5769 }, { "epoch": 3.0352446081009994, "grad_norm": 2.068845510482788, "learning_rate": 2.4881905898440364e-06, "loss": 0.8089, "step": 5770 }, { "epoch": 3.03577064702788, "grad_norm": 2.2446391582489014, "learning_rate": 2.4874959242902404e-06, "loss": 0.8324, "step": 5771 }, { "epoch": 3.0362966859547607, "grad_norm": 2.264199733734131, "learning_rate": 2.4868012597019015e-06, "loss": 0.8126, "step": 5772 }, { "epoch": 3.0368227248816413, "grad_norm": 2.201575994491577, "learning_rate": 2.486106596132657e-06, "loss": 0.7611, "step": 5773 }, { "epoch": 3.037348763808522, "grad_norm": 2.2911672592163086, "learning_rate": 2.485411933636142e-06, "loss": 0.8404, "step": 5774 }, { "epoch": 3.0378748027354026, "grad_norm": 2.212791681289673, "learning_rate": 2.484717272265994e-06, "loss": 0.8273, "step": 5775 }, { "epoch": 3.038400841662283, "grad_norm": 2.3167195320129395, "learning_rate": 2.484022612075847e-06, "loss": 0.8688, "step": 5776 }, { "epoch": 3.0389268805891634, "grad_norm": 2.0987792015075684, "learning_rate": 2.4833279531193373e-06, "loss": 0.8132, "step": 5777 }, { "epoch": 3.039452919516044, "grad_norm": 2.163752317428589, "learning_rate": 2.482633295450102e-06, "loss": 0.813, "step": 5778 }, { "epoch": 3.0399789584429247, "grad_norm": 2.1730055809020996, "learning_rate": 2.481938639121774e-06, "loss": 0.7889, "step": 5779 }, { "epoch": 3.0405049973698053, "grad_norm": 2.18927001953125, "learning_rate": 2.481243984187991e-06, "loss": 0.8085, "step": 5780 }, { "epoch": 3.041031036296686, "grad_norm": 2.1857187747955322, "learning_rate": 2.480549330702387e-06, "loss": 0.7767, "step": 5781 }, { "epoch": 3.0415570752235666, "grad_norm": 2.1195292472839355, "learning_rate": 2.4798546787185972e-06, "loss": 0.7886, "step": 5782 }, { "epoch": 3.042083114150447, "grad_norm": 2.825514316558838, "learning_rate": 2.4791600282902576e-06, "loss": 0.8076, "step": 5783 }, { "epoch": 3.042609153077328, "grad_norm": 2.1252899169921875, "learning_rate": 2.4784653794710024e-06, "loss": 0.7769, "step": 5784 }, { "epoch": 3.0431351920042085, "grad_norm": 2.1425621509552, "learning_rate": 2.477770732314467e-06, "loss": 0.834, "step": 5785 }, { "epoch": 3.043661230931089, "grad_norm": 2.419534921646118, "learning_rate": 2.4770760868742865e-06, "loss": 0.8415, "step": 5786 }, { "epoch": 3.0441872698579693, "grad_norm": 2.279653310775757, "learning_rate": 2.476381443204094e-06, "loss": 0.7668, "step": 5787 }, { "epoch": 3.04471330878485, "grad_norm": 2.125697612762451, "learning_rate": 2.475686801357525e-06, "loss": 0.8246, "step": 5788 }, { "epoch": 3.0452393477117305, "grad_norm": 2.147805690765381, "learning_rate": 2.4749921613882137e-06, "loss": 0.8627, "step": 5789 }, { "epoch": 3.045765386638611, "grad_norm": 2.264183759689331, "learning_rate": 2.474297523349794e-06, "loss": 0.8434, "step": 5790 }, { "epoch": 3.046291425565492, "grad_norm": 2.1705267429351807, "learning_rate": 2.4736028872959e-06, "loss": 0.8386, "step": 5791 }, { "epoch": 3.0468174644923725, "grad_norm": 2.1672418117523193, "learning_rate": 2.4729082532801666e-06, "loss": 0.8398, "step": 5792 }, { "epoch": 3.047343503419253, "grad_norm": 2.2219722270965576, "learning_rate": 2.472213621356226e-06, "loss": 0.782, "step": 5793 }, { "epoch": 3.0478695423461337, "grad_norm": 1.9884467124938965, "learning_rate": 2.471518991577713e-06, "loss": 0.8168, "step": 5794 }, { "epoch": 3.0483955812730144, "grad_norm": 2.1909615993499756, "learning_rate": 2.4708243639982593e-06, "loss": 0.851, "step": 5795 }, { "epoch": 3.048921620199895, "grad_norm": 2.0357463359832764, "learning_rate": 2.4701297386714993e-06, "loss": 0.8023, "step": 5796 }, { "epoch": 3.049447659126775, "grad_norm": 2.148435354232788, "learning_rate": 2.4694351156510656e-06, "loss": 0.8037, "step": 5797 }, { "epoch": 3.049973698053656, "grad_norm": 2.2189764976501465, "learning_rate": 2.4687404949905922e-06, "loss": 0.8456, "step": 5798 }, { "epoch": 3.0504997369805364, "grad_norm": 2.35292387008667, "learning_rate": 2.4680458767437105e-06, "loss": 0.7852, "step": 5799 }, { "epoch": 3.051025775907417, "grad_norm": 2.2902071475982666, "learning_rate": 2.467351260964054e-06, "loss": 0.8435, "step": 5800 }, { "epoch": 3.0515518148342977, "grad_norm": 2.2479214668273926, "learning_rate": 2.4666566477052536e-06, "loss": 0.7894, "step": 5801 }, { "epoch": 3.0520778537611783, "grad_norm": 2.27602219581604, "learning_rate": 2.465962037020942e-06, "loss": 0.8214, "step": 5802 }, { "epoch": 3.052603892688059, "grad_norm": 2.304323673248291, "learning_rate": 2.465267428964752e-06, "loss": 0.8558, "step": 5803 }, { "epoch": 3.0531299316149396, "grad_norm": 2.1953742504119873, "learning_rate": 2.4645728235903136e-06, "loss": 0.7993, "step": 5804 }, { "epoch": 3.0536559705418203, "grad_norm": 2.2193920612335205, "learning_rate": 2.4638782209512608e-06, "loss": 0.8315, "step": 5805 }, { "epoch": 3.054182009468701, "grad_norm": 2.1515684127807617, "learning_rate": 2.4631836211012223e-06, "loss": 0.8195, "step": 5806 }, { "epoch": 3.054708048395581, "grad_norm": 2.0692148208618164, "learning_rate": 2.4624890240938315e-06, "loss": 0.8339, "step": 5807 }, { "epoch": 3.0552340873224617, "grad_norm": 2.2709131240844727, "learning_rate": 2.461794429982717e-06, "loss": 0.8784, "step": 5808 }, { "epoch": 3.0557601262493423, "grad_norm": 2.1480321884155273, "learning_rate": 2.4610998388215102e-06, "loss": 0.807, "step": 5809 }, { "epoch": 3.056286165176223, "grad_norm": 2.2839674949645996, "learning_rate": 2.4604052506638417e-06, "loss": 0.8173, "step": 5810 }, { "epoch": 3.0568122041031036, "grad_norm": 2.063239097595215, "learning_rate": 2.4597106655633425e-06, "loss": 0.8161, "step": 5811 }, { "epoch": 3.0573382430299842, "grad_norm": 2.1640236377716064, "learning_rate": 2.4590160835736413e-06, "loss": 0.8347, "step": 5812 }, { "epoch": 3.057864281956865, "grad_norm": 2.0111660957336426, "learning_rate": 2.458321504748368e-06, "loss": 0.8013, "step": 5813 }, { "epoch": 3.0583903208837455, "grad_norm": 2.281169891357422, "learning_rate": 2.4576269291411535e-06, "loss": 0.7875, "step": 5814 }, { "epoch": 3.058916359810626, "grad_norm": 2.0682382583618164, "learning_rate": 2.4569323568056247e-06, "loss": 0.7972, "step": 5815 }, { "epoch": 3.059442398737507, "grad_norm": 2.0746471881866455, "learning_rate": 2.4562377877954116e-06, "loss": 0.8349, "step": 5816 }, { "epoch": 3.059968437664387, "grad_norm": 2.221238136291504, "learning_rate": 2.455543222164143e-06, "loss": 0.8097, "step": 5817 }, { "epoch": 3.0604944765912676, "grad_norm": 2.4484550952911377, "learning_rate": 2.4548486599654475e-06, "loss": 0.8306, "step": 5818 }, { "epoch": 3.0610205155181482, "grad_norm": 2.2193384170532227, "learning_rate": 2.454154101252953e-06, "loss": 0.8297, "step": 5819 }, { "epoch": 3.061546554445029, "grad_norm": 2.3092098236083984, "learning_rate": 2.4534595460802868e-06, "loss": 0.8785, "step": 5820 }, { "epoch": 3.0620725933719095, "grad_norm": 2.3176801204681396, "learning_rate": 2.4527649945010788e-06, "loss": 0.8116, "step": 5821 }, { "epoch": 3.06259863229879, "grad_norm": 2.3127601146698, "learning_rate": 2.452070446568953e-06, "loss": 0.8308, "step": 5822 }, { "epoch": 3.063124671225671, "grad_norm": 2.0995121002197266, "learning_rate": 2.4513759023375384e-06, "loss": 0.7851, "step": 5823 }, { "epoch": 3.0636507101525514, "grad_norm": 2.184575319290161, "learning_rate": 2.4506813618604624e-06, "loss": 0.8299, "step": 5824 }, { "epoch": 3.064176749079432, "grad_norm": 2.185290813446045, "learning_rate": 2.4499868251913496e-06, "loss": 0.832, "step": 5825 }, { "epoch": 3.0647027880063122, "grad_norm": 2.1704347133636475, "learning_rate": 2.449292292383827e-06, "loss": 0.79, "step": 5826 }, { "epoch": 3.065228826933193, "grad_norm": 2.265007734298706, "learning_rate": 2.448597763491521e-06, "loss": 0.8314, "step": 5827 }, { "epoch": 3.0657548658600735, "grad_norm": 2.126673936843872, "learning_rate": 2.447903238568058e-06, "loss": 0.769, "step": 5828 }, { "epoch": 3.066280904786954, "grad_norm": 2.1944780349731445, "learning_rate": 2.447208717667061e-06, "loss": 0.8467, "step": 5829 }, { "epoch": 3.0668069437138348, "grad_norm": 2.109316349029541, "learning_rate": 2.4465142008421566e-06, "loss": 0.7824, "step": 5830 }, { "epoch": 3.0673329826407154, "grad_norm": 2.196589231491089, "learning_rate": 2.4458196881469685e-06, "loss": 0.7789, "step": 5831 }, { "epoch": 3.067859021567596, "grad_norm": 2.203833818435669, "learning_rate": 2.4451251796351215e-06, "loss": 0.7848, "step": 5832 }, { "epoch": 3.0683850604944767, "grad_norm": 2.2787320613861084, "learning_rate": 2.44443067536024e-06, "loss": 0.8356, "step": 5833 }, { "epoch": 3.0689110994213573, "grad_norm": 2.2983996868133545, "learning_rate": 2.443736175375947e-06, "loss": 0.8271, "step": 5834 }, { "epoch": 3.069437138348238, "grad_norm": 2.3213791847229004, "learning_rate": 2.4430416797358665e-06, "loss": 0.8097, "step": 5835 }, { "epoch": 3.0699631772751186, "grad_norm": 2.2485578060150146, "learning_rate": 2.4423471884936215e-06, "loss": 0.8097, "step": 5836 }, { "epoch": 3.0704892162019988, "grad_norm": 2.0908069610595703, "learning_rate": 2.4416527017028346e-06, "loss": 0.7767, "step": 5837 }, { "epoch": 3.0710152551288794, "grad_norm": 2.185401439666748, "learning_rate": 2.440958219417127e-06, "loss": 0.8458, "step": 5838 }, { "epoch": 3.07154129405576, "grad_norm": 2.124192714691162, "learning_rate": 2.4402637416901225e-06, "loss": 0.7804, "step": 5839 }, { "epoch": 3.0720673329826407, "grad_norm": 2.263355016708374, "learning_rate": 2.439569268575441e-06, "loss": 0.8106, "step": 5840 }, { "epoch": 3.0725933719095213, "grad_norm": 2.21267032623291, "learning_rate": 2.4388748001267052e-06, "loss": 0.8209, "step": 5841 }, { "epoch": 3.073119410836402, "grad_norm": 2.364515542984009, "learning_rate": 2.4381803363975355e-06, "loss": 0.7883, "step": 5842 }, { "epoch": 3.0736454497632826, "grad_norm": 2.4426820278167725, "learning_rate": 2.4374858774415535e-06, "loss": 0.847, "step": 5843 }, { "epoch": 3.074171488690163, "grad_norm": 2.354217767715454, "learning_rate": 2.436791423312377e-06, "loss": 0.8392, "step": 5844 }, { "epoch": 3.074697527617044, "grad_norm": 2.1064417362213135, "learning_rate": 2.436096974063627e-06, "loss": 0.8017, "step": 5845 }, { "epoch": 3.075223566543924, "grad_norm": 2.259860038757324, "learning_rate": 2.4354025297489234e-06, "loss": 0.8299, "step": 5846 }, { "epoch": 3.0757496054708047, "grad_norm": 2.091794013977051, "learning_rate": 2.4347080904218847e-06, "loss": 0.8245, "step": 5847 }, { "epoch": 3.0762756443976853, "grad_norm": 2.2322943210601807, "learning_rate": 2.434013656136131e-06, "loss": 0.7495, "step": 5848 }, { "epoch": 3.076801683324566, "grad_norm": 2.3636839389801025, "learning_rate": 2.4333192269452783e-06, "loss": 0.7846, "step": 5849 }, { "epoch": 3.0773277222514466, "grad_norm": 2.142199993133545, "learning_rate": 2.4326248029029466e-06, "loss": 0.8152, "step": 5850 }, { "epoch": 3.077853761178327, "grad_norm": 2.237945318222046, "learning_rate": 2.4319303840627517e-06, "loss": 0.8545, "step": 5851 }, { "epoch": 3.078379800105208, "grad_norm": 2.2256410121917725, "learning_rate": 2.4312359704783114e-06, "loss": 0.8323, "step": 5852 }, { "epoch": 3.0789058390320885, "grad_norm": 2.1710093021392822, "learning_rate": 2.4305415622032427e-06, "loss": 0.7962, "step": 5853 }, { "epoch": 3.079431877958969, "grad_norm": 2.1980249881744385, "learning_rate": 2.4298471592911613e-06, "loss": 0.8148, "step": 5854 }, { "epoch": 3.0799579168858497, "grad_norm": 2.1553313732147217, "learning_rate": 2.4291527617956833e-06, "loss": 0.7902, "step": 5855 }, { "epoch": 3.08048395581273, "grad_norm": 2.344592809677124, "learning_rate": 2.428458369770424e-06, "loss": 0.8473, "step": 5856 }, { "epoch": 3.0810099947396106, "grad_norm": 2.412095069885254, "learning_rate": 2.427763983269e-06, "loss": 0.8269, "step": 5857 }, { "epoch": 3.081536033666491, "grad_norm": 2.3208255767822266, "learning_rate": 2.4270696023450237e-06, "loss": 0.7842, "step": 5858 }, { "epoch": 3.082062072593372, "grad_norm": 2.296003580093384, "learning_rate": 2.4263752270521093e-06, "loss": 0.7827, "step": 5859 }, { "epoch": 3.0825881115202525, "grad_norm": 2.20358943939209, "learning_rate": 2.425680857443872e-06, "loss": 0.8144, "step": 5860 }, { "epoch": 3.083114150447133, "grad_norm": 2.169062614440918, "learning_rate": 2.4249864935739246e-06, "loss": 0.8192, "step": 5861 }, { "epoch": 3.0836401893740137, "grad_norm": 2.3042850494384766, "learning_rate": 2.424292135495879e-06, "loss": 0.8135, "step": 5862 }, { "epoch": 3.0841662283008944, "grad_norm": 2.347749948501587, "learning_rate": 2.423597783263349e-06, "loss": 0.8407, "step": 5863 }, { "epoch": 3.084692267227775, "grad_norm": 2.266805410385132, "learning_rate": 2.4229034369299463e-06, "loss": 0.8228, "step": 5864 }, { "epoch": 3.0852183061546556, "grad_norm": 2.2679619789123535, "learning_rate": 2.4222090965492816e-06, "loss": 0.7709, "step": 5865 }, { "epoch": 3.085744345081536, "grad_norm": 2.1939709186553955, "learning_rate": 2.421514762174966e-06, "loss": 0.7732, "step": 5866 }, { "epoch": 3.0862703840084165, "grad_norm": 2.226367950439453, "learning_rate": 2.420820433860611e-06, "loss": 0.808, "step": 5867 }, { "epoch": 3.086796422935297, "grad_norm": 2.2870755195617676, "learning_rate": 2.4201261116598254e-06, "loss": 0.8286, "step": 5868 }, { "epoch": 3.0873224618621777, "grad_norm": 2.337294101715088, "learning_rate": 2.41943179562622e-06, "loss": 0.8523, "step": 5869 }, { "epoch": 3.0878485007890584, "grad_norm": 2.2871158123016357, "learning_rate": 2.418737485813403e-06, "loss": 0.7861, "step": 5870 }, { "epoch": 3.088374539715939, "grad_norm": 2.280758857727051, "learning_rate": 2.418043182274985e-06, "loss": 0.8591, "step": 5871 }, { "epoch": 3.0889005786428196, "grad_norm": 2.1454367637634277, "learning_rate": 2.4173488850645713e-06, "loss": 0.776, "step": 5872 }, { "epoch": 3.0894266175697003, "grad_norm": 2.1456785202026367, "learning_rate": 2.4166545942357724e-06, "loss": 0.7609, "step": 5873 }, { "epoch": 3.089952656496581, "grad_norm": 2.2060141563415527, "learning_rate": 2.415960309842193e-06, "loss": 0.8103, "step": 5874 }, { "epoch": 3.0904786954234615, "grad_norm": 2.2313082218170166, "learning_rate": 2.415266031937441e-06, "loss": 0.7718, "step": 5875 }, { "epoch": 3.0910047343503417, "grad_norm": 2.248889684677124, "learning_rate": 2.414571760575123e-06, "loss": 0.7485, "step": 5876 }, { "epoch": 3.0915307732772224, "grad_norm": 2.3810949325561523, "learning_rate": 2.413877495808844e-06, "loss": 0.8153, "step": 5877 }, { "epoch": 3.092056812204103, "grad_norm": 2.185899257659912, "learning_rate": 2.413183237692211e-06, "loss": 0.8, "step": 5878 }, { "epoch": 3.0925828511309836, "grad_norm": 2.139752149581909, "learning_rate": 2.4124889862788257e-06, "loss": 0.8008, "step": 5879 }, { "epoch": 3.0931088900578643, "grad_norm": 2.270521402359009, "learning_rate": 2.4117947416222946e-06, "loss": 0.7993, "step": 5880 }, { "epoch": 3.093634928984745, "grad_norm": 2.175020456314087, "learning_rate": 2.4111005037762196e-06, "loss": 0.8277, "step": 5881 }, { "epoch": 3.0941609679116255, "grad_norm": 2.148803234100342, "learning_rate": 2.4104062727942053e-06, "loss": 0.7886, "step": 5882 }, { "epoch": 3.094687006838506, "grad_norm": 2.086379051208496, "learning_rate": 2.409712048729853e-06, "loss": 0.8073, "step": 5883 }, { "epoch": 3.095213045765387, "grad_norm": 2.136272668838501, "learning_rate": 2.4090178316367673e-06, "loss": 0.7769, "step": 5884 }, { "epoch": 3.0957390846922674, "grad_norm": 2.220871686935425, "learning_rate": 2.408323621568546e-06, "loss": 0.8568, "step": 5885 }, { "epoch": 3.0962651236191476, "grad_norm": 2.2598464488983154, "learning_rate": 2.407629418578794e-06, "loss": 0.8213, "step": 5886 }, { "epoch": 3.0967911625460283, "grad_norm": 2.1845171451568604, "learning_rate": 2.4069352227211082e-06, "loss": 0.7864, "step": 5887 }, { "epoch": 3.097317201472909, "grad_norm": 2.14575457572937, "learning_rate": 2.40624103404909e-06, "loss": 0.8052, "step": 5888 }, { "epoch": 3.0978432403997895, "grad_norm": 2.307826519012451, "learning_rate": 2.405546852616339e-06, "loss": 0.8237, "step": 5889 }, { "epoch": 3.09836927932667, "grad_norm": 2.29620361328125, "learning_rate": 2.4048526784764536e-06, "loss": 0.7598, "step": 5890 }, { "epoch": 3.098895318253551, "grad_norm": 2.149925947189331, "learning_rate": 2.4041585116830323e-06, "loss": 0.8121, "step": 5891 }, { "epoch": 3.0994213571804314, "grad_norm": 2.028564214706421, "learning_rate": 2.403464352289672e-06, "loss": 0.8198, "step": 5892 }, { "epoch": 3.099947396107312, "grad_norm": 2.3110191822052, "learning_rate": 2.4027702003499716e-06, "loss": 0.867, "step": 5893 }, { "epoch": 3.1004734350341927, "grad_norm": 2.2690868377685547, "learning_rate": 2.402076055917525e-06, "loss": 0.8023, "step": 5894 }, { "epoch": 3.100999473961073, "grad_norm": 2.3075859546661377, "learning_rate": 2.4013819190459292e-06, "loss": 0.9115, "step": 5895 }, { "epoch": 3.1015255128879535, "grad_norm": 2.1851799488067627, "learning_rate": 2.4006877897887798e-06, "loss": 0.8443, "step": 5896 }, { "epoch": 3.102051551814834, "grad_norm": 2.1979665756225586, "learning_rate": 2.399993668199672e-06, "loss": 0.83, "step": 5897 }, { "epoch": 3.102577590741715, "grad_norm": 2.2022345066070557, "learning_rate": 2.399299554332199e-06, "loss": 0.7908, "step": 5898 }, { "epoch": 3.1031036296685954, "grad_norm": 2.280691385269165, "learning_rate": 2.3986054482399548e-06, "loss": 0.8733, "step": 5899 }, { "epoch": 3.103629668595476, "grad_norm": 2.112553834915161, "learning_rate": 2.397911349976533e-06, "loss": 0.792, "step": 5900 }, { "epoch": 3.1041557075223567, "grad_norm": 2.178797721862793, "learning_rate": 2.3972172595955244e-06, "loss": 0.7987, "step": 5901 }, { "epoch": 3.1046817464492373, "grad_norm": 2.2948946952819824, "learning_rate": 2.396523177150521e-06, "loss": 0.7998, "step": 5902 }, { "epoch": 3.105207785376118, "grad_norm": 2.2072057723999023, "learning_rate": 2.395829102695116e-06, "loss": 0.7657, "step": 5903 }, { "epoch": 3.1057338243029986, "grad_norm": 2.224895715713501, "learning_rate": 2.395135036282897e-06, "loss": 0.7994, "step": 5904 }, { "epoch": 3.1062598632298792, "grad_norm": 2.157320976257324, "learning_rate": 2.3944409779674554e-06, "loss": 0.7891, "step": 5905 }, { "epoch": 3.1067859021567594, "grad_norm": 2.3503975868225098, "learning_rate": 2.3937469278023805e-06, "loss": 0.7953, "step": 5906 }, { "epoch": 3.10731194108364, "grad_norm": 2.223917007446289, "learning_rate": 2.3930528858412618e-06, "loss": 0.7978, "step": 5907 }, { "epoch": 3.1078379800105207, "grad_norm": 2.371192455291748, "learning_rate": 2.3923588521376848e-06, "loss": 0.8739, "step": 5908 }, { "epoch": 3.1083640189374013, "grad_norm": 2.2608249187469482, "learning_rate": 2.3916648267452386e-06, "loss": 0.8402, "step": 5909 }, { "epoch": 3.108890057864282, "grad_norm": 2.3531196117401123, "learning_rate": 2.39097080971751e-06, "loss": 0.8154, "step": 5910 }, { "epoch": 3.1094160967911626, "grad_norm": 2.210343599319458, "learning_rate": 2.390276801108084e-06, "loss": 0.7769, "step": 5911 }, { "epoch": 3.1099421357180432, "grad_norm": 2.2178821563720703, "learning_rate": 2.389582800970547e-06, "loss": 0.7822, "step": 5912 }, { "epoch": 3.110468174644924, "grad_norm": 2.3087480068206787, "learning_rate": 2.3888888093584835e-06, "loss": 0.8253, "step": 5913 }, { "epoch": 3.1109942135718045, "grad_norm": 2.1072237491607666, "learning_rate": 2.3881948263254783e-06, "loss": 0.8268, "step": 5914 }, { "epoch": 3.1115202524986847, "grad_norm": 2.342146396636963, "learning_rate": 2.387500851925113e-06, "loss": 0.824, "step": 5915 }, { "epoch": 3.1120462914255653, "grad_norm": 2.3220486640930176, "learning_rate": 2.3868068862109726e-06, "loss": 0.8038, "step": 5916 }, { "epoch": 3.112572330352446, "grad_norm": 2.1427743434906006, "learning_rate": 2.386112929236637e-06, "loss": 0.745, "step": 5917 }, { "epoch": 3.1130983692793266, "grad_norm": 2.3201498985290527, "learning_rate": 2.385418981055689e-06, "loss": 0.8114, "step": 5918 }, { "epoch": 3.1136244082062072, "grad_norm": 2.1929383277893066, "learning_rate": 2.384725041721709e-06, "loss": 0.8574, "step": 5919 }, { "epoch": 3.114150447133088, "grad_norm": 2.349946975708008, "learning_rate": 2.3840311112882773e-06, "loss": 0.7747, "step": 5920 }, { "epoch": 3.1146764860599685, "grad_norm": 2.224533796310425, "learning_rate": 2.3833371898089735e-06, "loss": 0.8031, "step": 5921 }, { "epoch": 3.115202524986849, "grad_norm": 2.2654600143432617, "learning_rate": 2.3826432773373762e-06, "loss": 0.8008, "step": 5922 }, { "epoch": 3.1157285639137298, "grad_norm": 2.2378251552581787, "learning_rate": 2.381949373927062e-06, "loss": 0.7879, "step": 5923 }, { "epoch": 3.1162546028406104, "grad_norm": 2.282282829284668, "learning_rate": 2.38125547963161e-06, "loss": 0.8161, "step": 5924 }, { "epoch": 3.1167806417674906, "grad_norm": 2.2358434200286865, "learning_rate": 2.3805615945045957e-06, "loss": 0.8012, "step": 5925 }, { "epoch": 3.1173066806943712, "grad_norm": 3.4927937984466553, "learning_rate": 2.3798677185995954e-06, "loss": 0.8296, "step": 5926 }, { "epoch": 3.117832719621252, "grad_norm": 2.3229146003723145, "learning_rate": 2.379173851970185e-06, "loss": 0.8149, "step": 5927 }, { "epoch": 3.1183587585481325, "grad_norm": 2.207639694213867, "learning_rate": 2.378479994669938e-06, "loss": 0.7838, "step": 5928 }, { "epoch": 3.118884797475013, "grad_norm": 2.2155826091766357, "learning_rate": 2.3777861467524285e-06, "loss": 0.786, "step": 5929 }, { "epoch": 3.1194108364018938, "grad_norm": 2.201019048690796, "learning_rate": 2.377092308271229e-06, "loss": 0.7838, "step": 5930 }, { "epoch": 3.1199368753287744, "grad_norm": 2.1281487941741943, "learning_rate": 2.3763984792799117e-06, "loss": 0.7689, "step": 5931 }, { "epoch": 3.120462914255655, "grad_norm": 2.224830150604248, "learning_rate": 2.375704659832049e-06, "loss": 0.7982, "step": 5932 }, { "epoch": 3.1209889531825357, "grad_norm": 2.2306160926818848, "learning_rate": 2.3750108499812114e-06, "loss": 0.8383, "step": 5933 }, { "epoch": 3.1215149921094163, "grad_norm": 2.2206039428710938, "learning_rate": 2.374317049780969e-06, "loss": 0.8228, "step": 5934 }, { "epoch": 3.1220410310362965, "grad_norm": 2.2433090209960938, "learning_rate": 2.373623259284891e-06, "loss": 0.7452, "step": 5935 }, { "epoch": 3.122567069963177, "grad_norm": 2.2349460124969482, "learning_rate": 2.372929478546547e-06, "loss": 0.8046, "step": 5936 }, { "epoch": 3.1230931088900578, "grad_norm": 2.2080681324005127, "learning_rate": 2.3722357076195025e-06, "loss": 0.8085, "step": 5937 }, { "epoch": 3.1236191478169384, "grad_norm": 2.8189799785614014, "learning_rate": 2.3715419465573256e-06, "loss": 0.8572, "step": 5938 }, { "epoch": 3.124145186743819, "grad_norm": 2.2069625854492188, "learning_rate": 2.3708481954135836e-06, "loss": 0.7888, "step": 5939 }, { "epoch": 3.1246712256706997, "grad_norm": 2.230613946914673, "learning_rate": 2.3701544542418414e-06, "loss": 0.7894, "step": 5940 }, { "epoch": 3.1251972645975803, "grad_norm": 2.3029322624206543, "learning_rate": 2.3694607230956633e-06, "loss": 0.8001, "step": 5941 }, { "epoch": 3.125723303524461, "grad_norm": 2.355154514312744, "learning_rate": 2.368767002028614e-06, "loss": 0.832, "step": 5942 }, { "epoch": 3.1262493424513416, "grad_norm": 2.190351724624634, "learning_rate": 2.3680732910942574e-06, "loss": 0.8393, "step": 5943 }, { "epoch": 3.126775381378222, "grad_norm": 2.1369519233703613, "learning_rate": 2.367379590346154e-06, "loss": 0.8154, "step": 5944 }, { "epoch": 3.1273014203051024, "grad_norm": 2.2685189247131348, "learning_rate": 2.3666858998378663e-06, "loss": 0.859, "step": 5945 }, { "epoch": 3.127827459231983, "grad_norm": 2.1182706356048584, "learning_rate": 2.3659922196229556e-06, "loss": 0.8038, "step": 5946 }, { "epoch": 3.1283534981588637, "grad_norm": 2.1755259037017822, "learning_rate": 2.3652985497549814e-06, "loss": 0.8161, "step": 5947 }, { "epoch": 3.1288795370857443, "grad_norm": 2.1373345851898193, "learning_rate": 2.3646048902875032e-06, "loss": 0.7861, "step": 5948 }, { "epoch": 3.129405576012625, "grad_norm": 2.370253324508667, "learning_rate": 2.3639112412740795e-06, "loss": 0.7858, "step": 5949 }, { "epoch": 3.1299316149395056, "grad_norm": 2.233105421066284, "learning_rate": 2.363217602768269e-06, "loss": 0.8165, "step": 5950 }, { "epoch": 3.130457653866386, "grad_norm": 2.3604917526245117, "learning_rate": 2.3625239748236262e-06, "loss": 0.8042, "step": 5951 }, { "epoch": 3.130983692793267, "grad_norm": 2.2326595783233643, "learning_rate": 2.361830357493709e-06, "loss": 0.7669, "step": 5952 }, { "epoch": 3.1315097317201475, "grad_norm": 2.230588436126709, "learning_rate": 2.3611367508320716e-06, "loss": 0.8008, "step": 5953 }, { "epoch": 3.132035770647028, "grad_norm": 2.1719863414764404, "learning_rate": 2.3604431548922687e-06, "loss": 0.7932, "step": 5954 }, { "epoch": 3.1325618095739083, "grad_norm": 2.2667624950408936, "learning_rate": 2.359749569727854e-06, "loss": 0.803, "step": 5955 }, { "epoch": 3.133087848500789, "grad_norm": 2.32043194770813, "learning_rate": 2.35905599539238e-06, "loss": 0.8213, "step": 5956 }, { "epoch": 3.1336138874276696, "grad_norm": 2.1442761421203613, "learning_rate": 2.3583624319394e-06, "loss": 0.8268, "step": 5957 }, { "epoch": 3.13413992635455, "grad_norm": 2.1477997303009033, "learning_rate": 2.357668879422463e-06, "loss": 0.7687, "step": 5958 }, { "epoch": 3.134665965281431, "grad_norm": 2.0788047313690186, "learning_rate": 2.3569753378951197e-06, "loss": 0.775, "step": 5959 }, { "epoch": 3.1351920042083115, "grad_norm": 2.6235759258270264, "learning_rate": 2.35628180741092e-06, "loss": 0.8277, "step": 5960 }, { "epoch": 3.135718043135192, "grad_norm": 2.1856398582458496, "learning_rate": 2.3555882880234122e-06, "loss": 0.7673, "step": 5961 }, { "epoch": 3.1362440820620727, "grad_norm": 2.3565890789031982, "learning_rate": 2.354894779786144e-06, "loss": 0.8306, "step": 5962 }, { "epoch": 3.1367701209889534, "grad_norm": 2.462799310684204, "learning_rate": 2.354201282752662e-06, "loss": 0.8285, "step": 5963 }, { "epoch": 3.1372961599158335, "grad_norm": 2.423048257827759, "learning_rate": 2.353507796976513e-06, "loss": 0.8462, "step": 5964 }, { "epoch": 3.137822198842714, "grad_norm": 2.1878185272216797, "learning_rate": 2.3528143225112405e-06, "loss": 0.8008, "step": 5965 }, { "epoch": 3.138348237769595, "grad_norm": 2.42857027053833, "learning_rate": 2.3521208594103894e-06, "loss": 0.8182, "step": 5966 }, { "epoch": 3.1388742766964755, "grad_norm": 2.228600025177002, "learning_rate": 2.351427407727503e-06, "loss": 0.8206, "step": 5967 }, { "epoch": 3.139400315623356, "grad_norm": 2.2668826580047607, "learning_rate": 2.3507339675161237e-06, "loss": 0.8422, "step": 5968 }, { "epoch": 3.1399263545502367, "grad_norm": 2.095421314239502, "learning_rate": 2.350040538829793e-06, "loss": 0.7851, "step": 5969 }, { "epoch": 3.1404523934771174, "grad_norm": 2.178001880645752, "learning_rate": 2.3493471217220526e-06, "loss": 0.8022, "step": 5970 }, { "epoch": 3.140978432403998, "grad_norm": 2.2133891582489014, "learning_rate": 2.3486537162464416e-06, "loss": 0.7983, "step": 5971 }, { "epoch": 3.1415044713308786, "grad_norm": 2.1747090816497803, "learning_rate": 2.3479603224564975e-06, "loss": 0.8266, "step": 5972 }, { "epoch": 3.1420305102577593, "grad_norm": 2.1512601375579834, "learning_rate": 2.347266940405759e-06, "loss": 0.8109, "step": 5973 }, { "epoch": 3.14255654918464, "grad_norm": 2.161353826522827, "learning_rate": 2.3465735701477642e-06, "loss": 0.8074, "step": 5974 }, { "epoch": 3.14308258811152, "grad_norm": 2.2712578773498535, "learning_rate": 2.345880211736048e-06, "loss": 0.8332, "step": 5975 }, { "epoch": 3.1436086270384007, "grad_norm": 2.168243885040283, "learning_rate": 2.3451868652241466e-06, "loss": 0.7916, "step": 5976 }, { "epoch": 3.1441346659652813, "grad_norm": 2.224085807800293, "learning_rate": 2.344493530665593e-06, "loss": 0.8471, "step": 5977 }, { "epoch": 3.144660704892162, "grad_norm": 2.2444825172424316, "learning_rate": 2.343800208113923e-06, "loss": 0.8163, "step": 5978 }, { "epoch": 3.1451867438190426, "grad_norm": 2.500720739364624, "learning_rate": 2.343106897622666e-06, "loss": 0.8414, "step": 5979 }, { "epoch": 3.1457127827459233, "grad_norm": 2.3448877334594727, "learning_rate": 2.342413599245355e-06, "loss": 0.7865, "step": 5980 }, { "epoch": 3.146238821672804, "grad_norm": 2.1659135818481445, "learning_rate": 2.3417203130355206e-06, "loss": 0.8444, "step": 5981 }, { "epoch": 3.1467648605996845, "grad_norm": 2.1767585277557373, "learning_rate": 2.341027039046693e-06, "loss": 0.8102, "step": 5982 }, { "epoch": 3.147290899526565, "grad_norm": 2.2483646869659424, "learning_rate": 2.3403337773323993e-06, "loss": 0.827, "step": 5983 }, { "epoch": 3.1478169384534453, "grad_norm": 2.253873586654663, "learning_rate": 2.3396405279461686e-06, "loss": 0.8177, "step": 5984 }, { "epoch": 3.148342977380326, "grad_norm": 2.2629575729370117, "learning_rate": 2.3389472909415283e-06, "loss": 0.8266, "step": 5985 }, { "epoch": 3.1488690163072066, "grad_norm": 2.3376593589782715, "learning_rate": 2.338254066372002e-06, "loss": 0.837, "step": 5986 }, { "epoch": 3.1493950552340872, "grad_norm": 2.224231243133545, "learning_rate": 2.3375608542911163e-06, "loss": 0.7855, "step": 5987 }, { "epoch": 3.149921094160968, "grad_norm": 2.3139429092407227, "learning_rate": 2.3368676547523946e-06, "loss": 0.7806, "step": 5988 }, { "epoch": 3.1504471330878485, "grad_norm": 2.225606679916382, "learning_rate": 2.33617446780936e-06, "loss": 0.792, "step": 5989 }, { "epoch": 3.150973172014729, "grad_norm": 2.1874096393585205, "learning_rate": 2.3354812935155336e-06, "loss": 0.7817, "step": 5990 }, { "epoch": 3.15149921094161, "grad_norm": 2.5088999271392822, "learning_rate": 2.3347881319244377e-06, "loss": 0.8115, "step": 5991 }, { "epoch": 3.1520252498684904, "grad_norm": 2.3347179889678955, "learning_rate": 2.334094983089592e-06, "loss": 0.7668, "step": 5992 }, { "epoch": 3.152551288795371, "grad_norm": 2.296952486038208, "learning_rate": 2.333401847064516e-06, "loss": 0.8221, "step": 5993 }, { "epoch": 3.1530773277222512, "grad_norm": 2.2131714820861816, "learning_rate": 2.3327087239027263e-06, "loss": 0.7817, "step": 5994 }, { "epoch": 3.153603366649132, "grad_norm": 2.3841135501861572, "learning_rate": 2.332015613657741e-06, "loss": 0.8275, "step": 5995 }, { "epoch": 3.1541294055760125, "grad_norm": 2.30497145652771, "learning_rate": 2.3313225163830757e-06, "loss": 0.8243, "step": 5996 }, { "epoch": 3.154655444502893, "grad_norm": 2.2384250164031982, "learning_rate": 2.3306294321322454e-06, "loss": 0.805, "step": 5997 }, { "epoch": 3.155181483429774, "grad_norm": 2.376011610031128, "learning_rate": 2.3299363609587643e-06, "loss": 0.8132, "step": 5998 }, { "epoch": 3.1557075223566544, "grad_norm": 2.333233118057251, "learning_rate": 2.3292433029161456e-06, "loss": 0.8375, "step": 5999 }, { "epoch": 3.156233561283535, "grad_norm": 2.18045973777771, "learning_rate": 2.3285502580579025e-06, "loss": 0.7808, "step": 6000 }, { "epoch": 3.1567596002104157, "grad_norm": 2.3686559200286865, "learning_rate": 2.327857226437544e-06, "loss": 0.8499, "step": 6001 }, { "epoch": 3.1572856391372963, "grad_norm": 2.3335983753204346, "learning_rate": 2.3271642081085804e-06, "loss": 0.833, "step": 6002 }, { "epoch": 3.157811678064177, "grad_norm": 2.2628917694091797, "learning_rate": 2.3264712031245214e-06, "loss": 0.7864, "step": 6003 }, { "epoch": 3.158337716991057, "grad_norm": 2.168856382369995, "learning_rate": 2.3257782115388744e-06, "loss": 0.7831, "step": 6004 }, { "epoch": 3.1588637559179378, "grad_norm": 2.0589842796325684, "learning_rate": 2.325085233405146e-06, "loss": 0.8097, "step": 6005 }, { "epoch": 3.1593897948448184, "grad_norm": 2.3899471759796143, "learning_rate": 2.324392268776844e-06, "loss": 0.8312, "step": 6006 }, { "epoch": 3.159915833771699, "grad_norm": 2.2925362586975098, "learning_rate": 2.3236993177074714e-06, "loss": 0.755, "step": 6007 }, { "epoch": 3.1604418726985797, "grad_norm": 2.1601576805114746, "learning_rate": 2.3230063802505327e-06, "loss": 0.8086, "step": 6008 }, { "epoch": 3.1609679116254603, "grad_norm": 2.3079140186309814, "learning_rate": 2.3223134564595297e-06, "loss": 0.8269, "step": 6009 }, { "epoch": 3.161493950552341, "grad_norm": 2.156002998352051, "learning_rate": 2.321620546387964e-06, "loss": 0.8218, "step": 6010 }, { "epoch": 3.1620199894792216, "grad_norm": 2.176419973373413, "learning_rate": 2.3209276500893372e-06, "loss": 0.7962, "step": 6011 }, { "epoch": 3.162546028406102, "grad_norm": 2.1706833839416504, "learning_rate": 2.3202347676171493e-06, "loss": 0.8264, "step": 6012 }, { "epoch": 3.1630720673329824, "grad_norm": 2.2585530281066895, "learning_rate": 2.3195418990248973e-06, "loss": 0.8073, "step": 6013 }, { "epoch": 3.163598106259863, "grad_norm": 2.050306558609009, "learning_rate": 2.31884904436608e-06, "loss": 0.7705, "step": 6014 }, { "epoch": 3.1641241451867437, "grad_norm": 2.2980399131774902, "learning_rate": 2.3181562036941924e-06, "loss": 0.799, "step": 6015 }, { "epoch": 3.1646501841136243, "grad_norm": 2.158836841583252, "learning_rate": 2.31746337706273e-06, "loss": 0.7894, "step": 6016 }, { "epoch": 3.165176223040505, "grad_norm": 2.4046742916107178, "learning_rate": 2.3167705645251875e-06, "loss": 0.8743, "step": 6017 }, { "epoch": 3.1657022619673856, "grad_norm": 2.366253137588501, "learning_rate": 2.3160777661350575e-06, "loss": 0.7553, "step": 6018 }, { "epoch": 3.166228300894266, "grad_norm": 2.214048147201538, "learning_rate": 2.3153849819458325e-06, "loss": 0.7589, "step": 6019 }, { "epoch": 3.166754339821147, "grad_norm": 2.428605318069458, "learning_rate": 2.314692212011003e-06, "loss": 0.7948, "step": 6020 }, { "epoch": 3.1672803787480275, "grad_norm": 2.2491085529327393, "learning_rate": 2.31399945638406e-06, "loss": 0.7872, "step": 6021 }, { "epoch": 3.167806417674908, "grad_norm": 2.35034441947937, "learning_rate": 2.31330671511849e-06, "loss": 0.8201, "step": 6022 }, { "epoch": 3.1683324566017887, "grad_norm": 2.2569408416748047, "learning_rate": 2.312613988267781e-06, "loss": 0.8313, "step": 6023 }, { "epoch": 3.168858495528669, "grad_norm": 2.4875125885009766, "learning_rate": 2.3119212758854205e-06, "loss": 0.8104, "step": 6024 }, { "epoch": 3.1693845344555496, "grad_norm": 2.1511805057525635, "learning_rate": 2.311228578024894e-06, "loss": 0.7857, "step": 6025 }, { "epoch": 3.16991057338243, "grad_norm": 2.2785885334014893, "learning_rate": 2.3105358947396848e-06, "loss": 0.8118, "step": 6026 }, { "epoch": 3.170436612309311, "grad_norm": 2.3820993900299072, "learning_rate": 2.309843226083276e-06, "loss": 0.8418, "step": 6027 }, { "epoch": 3.1709626512361915, "grad_norm": 2.4566638469696045, "learning_rate": 2.309150572109151e-06, "loss": 0.8097, "step": 6028 }, { "epoch": 3.171488690163072, "grad_norm": 2.28102970123291, "learning_rate": 2.3084579328707883e-06, "loss": 0.8136, "step": 6029 }, { "epoch": 3.1720147290899527, "grad_norm": 2.3739588260650635, "learning_rate": 2.3077653084216688e-06, "loss": 0.7948, "step": 6030 }, { "epoch": 3.1725407680168334, "grad_norm": 2.32839298248291, "learning_rate": 2.3070726988152715e-06, "loss": 0.8509, "step": 6031 }, { "epoch": 3.173066806943714, "grad_norm": 2.2080790996551514, "learning_rate": 2.306380104105073e-06, "loss": 0.7774, "step": 6032 }, { "epoch": 3.173592845870594, "grad_norm": 2.17336368560791, "learning_rate": 2.30568752434455e-06, "loss": 0.805, "step": 6033 }, { "epoch": 3.174118884797475, "grad_norm": 2.204817533493042, "learning_rate": 2.304994959587177e-06, "loss": 0.7932, "step": 6034 }, { "epoch": 3.1746449237243555, "grad_norm": 2.258769989013672, "learning_rate": 2.3043024098864304e-06, "loss": 0.8224, "step": 6035 }, { "epoch": 3.175170962651236, "grad_norm": 2.056243419647217, "learning_rate": 2.303609875295779e-06, "loss": 0.7796, "step": 6036 }, { "epoch": 3.1756970015781167, "grad_norm": 2.1709413528442383, "learning_rate": 2.302917355868697e-06, "loss": 0.8128, "step": 6037 }, { "epoch": 3.1762230405049974, "grad_norm": 2.258206844329834, "learning_rate": 2.3022248516586547e-06, "loss": 0.8102, "step": 6038 }, { "epoch": 3.176749079431878, "grad_norm": 2.1269590854644775, "learning_rate": 2.30153236271912e-06, "loss": 0.7826, "step": 6039 }, { "epoch": 3.1772751183587586, "grad_norm": 2.2445785999298096, "learning_rate": 2.3008398891035623e-06, "loss": 0.8059, "step": 6040 }, { "epoch": 3.1778011572856393, "grad_norm": 2.1295299530029297, "learning_rate": 2.300147430865448e-06, "loss": 0.7545, "step": 6041 }, { "epoch": 3.17832719621252, "grad_norm": 2.3082053661346436, "learning_rate": 2.299454988058243e-06, "loss": 0.7939, "step": 6042 }, { "epoch": 3.1788532351394005, "grad_norm": 2.23637056350708, "learning_rate": 2.2987625607354123e-06, "loss": 0.7899, "step": 6043 }, { "epoch": 3.1793792740662807, "grad_norm": 2.3386917114257812, "learning_rate": 2.2980701489504186e-06, "loss": 0.8477, "step": 6044 }, { "epoch": 3.1799053129931614, "grad_norm": 2.18715763092041, "learning_rate": 2.297377752756723e-06, "loss": 0.7971, "step": 6045 }, { "epoch": 3.180431351920042, "grad_norm": 2.4694724082946777, "learning_rate": 2.296685372207788e-06, "loss": 0.8653, "step": 6046 }, { "epoch": 3.1809573908469226, "grad_norm": 2.2609758377075195, "learning_rate": 2.2959930073570725e-06, "loss": 0.77, "step": 6047 }, { "epoch": 3.1814834297738033, "grad_norm": 2.234550952911377, "learning_rate": 2.295300658258036e-06, "loss": 0.7697, "step": 6048 }, { "epoch": 3.182009468700684, "grad_norm": 2.4793407917022705, "learning_rate": 2.294608324964135e-06, "loss": 0.7967, "step": 6049 }, { "epoch": 3.1825355076275645, "grad_norm": 2.4317243099212646, "learning_rate": 2.293916007528826e-06, "loss": 0.8566, "step": 6050 }, { "epoch": 3.183061546554445, "grad_norm": 2.3394713401794434, "learning_rate": 2.293223706005563e-06, "loss": 0.8514, "step": 6051 }, { "epoch": 3.183587585481326, "grad_norm": 2.4308204650878906, "learning_rate": 2.2925314204477998e-06, "loss": 0.8609, "step": 6052 }, { "epoch": 3.184113624408206, "grad_norm": 2.340419292449951, "learning_rate": 2.2918391509089898e-06, "loss": 0.8253, "step": 6053 }, { "epoch": 3.1846396633350866, "grad_norm": 2.2109594345092773, "learning_rate": 2.2911468974425825e-06, "loss": 0.8299, "step": 6054 }, { "epoch": 3.1851657022619673, "grad_norm": 2.5388364791870117, "learning_rate": 2.2904546601020296e-06, "loss": 0.8127, "step": 6055 }, { "epoch": 3.185691741188848, "grad_norm": 2.331636428833008, "learning_rate": 2.2897624389407787e-06, "loss": 0.7643, "step": 6056 }, { "epoch": 3.1862177801157285, "grad_norm": 2.3822133541107178, "learning_rate": 2.2890702340122782e-06, "loss": 0.7804, "step": 6057 }, { "epoch": 3.186743819042609, "grad_norm": 2.1815073490142822, "learning_rate": 2.2883780453699724e-06, "loss": 0.7713, "step": 6058 }, { "epoch": 3.18726985796949, "grad_norm": 2.135180711746216, "learning_rate": 2.2876858730673074e-06, "loss": 0.8449, "step": 6059 }, { "epoch": 3.1877958968963704, "grad_norm": 2.914774179458618, "learning_rate": 2.2869937171577267e-06, "loss": 0.7897, "step": 6060 }, { "epoch": 3.188321935823251, "grad_norm": 2.452510356903076, "learning_rate": 2.286301577694673e-06, "loss": 0.8339, "step": 6061 }, { "epoch": 3.1888479747501317, "grad_norm": 2.348801612854004, "learning_rate": 2.285609454731587e-06, "loss": 0.8437, "step": 6062 }, { "epoch": 3.189374013677012, "grad_norm": 2.200092077255249, "learning_rate": 2.2849173483219083e-06, "loss": 0.8307, "step": 6063 }, { "epoch": 3.1899000526038925, "grad_norm": 2.3064656257629395, "learning_rate": 2.284225258519077e-06, "loss": 0.8202, "step": 6064 }, { "epoch": 3.190426091530773, "grad_norm": 2.2403504848480225, "learning_rate": 2.2835331853765276e-06, "loss": 0.8127, "step": 6065 }, { "epoch": 3.190952130457654, "grad_norm": 2.25875186920166, "learning_rate": 2.2828411289476982e-06, "loss": 0.8113, "step": 6066 }, { "epoch": 3.1914781693845344, "grad_norm": 2.1718413829803467, "learning_rate": 2.2821490892860225e-06, "loss": 0.8126, "step": 6067 }, { "epoch": 3.192004208311415, "grad_norm": 2.3517005443573, "learning_rate": 2.281457066444935e-06, "loss": 0.8168, "step": 6068 }, { "epoch": 3.1925302472382957, "grad_norm": 2.406013011932373, "learning_rate": 2.2807650604778665e-06, "loss": 0.8347, "step": 6069 }, { "epoch": 3.1930562861651763, "grad_norm": 2.119276762008667, "learning_rate": 2.2800730714382483e-06, "loss": 0.8443, "step": 6070 }, { "epoch": 3.193582325092057, "grad_norm": 2.3178839683532715, "learning_rate": 2.2793810993795113e-06, "loss": 0.8454, "step": 6071 }, { "epoch": 3.1941083640189376, "grad_norm": 2.299882650375366, "learning_rate": 2.2786891443550808e-06, "loss": 0.8316, "step": 6072 }, { "epoch": 3.194634402945818, "grad_norm": 2.2713327407836914, "learning_rate": 2.2779972064183854e-06, "loss": 0.8033, "step": 6073 }, { "epoch": 3.1951604418726984, "grad_norm": 2.518601894378662, "learning_rate": 2.277305285622851e-06, "loss": 0.839, "step": 6074 }, { "epoch": 3.195686480799579, "grad_norm": 2.3022005558013916, "learning_rate": 2.276613382021901e-06, "loss": 0.8583, "step": 6075 }, { "epoch": 3.1962125197264597, "grad_norm": 2.342512845993042, "learning_rate": 2.275921495668958e-06, "loss": 0.826, "step": 6076 }, { "epoch": 3.1967385586533403, "grad_norm": 2.4162514209747314, "learning_rate": 2.2752296266174446e-06, "loss": 0.8636, "step": 6077 }, { "epoch": 3.197264597580221, "grad_norm": 2.31231689453125, "learning_rate": 2.2745377749207816e-06, "loss": 0.8394, "step": 6078 }, { "epoch": 3.1977906365071016, "grad_norm": 2.1293232440948486, "learning_rate": 2.273845940632385e-06, "loss": 0.7997, "step": 6079 }, { "epoch": 3.1983166754339822, "grad_norm": 2.2475624084472656, "learning_rate": 2.2731541238056747e-06, "loss": 0.8286, "step": 6080 }, { "epoch": 3.198842714360863, "grad_norm": 2.1608691215515137, "learning_rate": 2.272462324494067e-06, "loss": 0.7847, "step": 6081 }, { "epoch": 3.199368753287743, "grad_norm": 2.249823570251465, "learning_rate": 2.2717705427509753e-06, "loss": 0.7891, "step": 6082 }, { "epoch": 3.1998947922146237, "grad_norm": 2.3211214542388916, "learning_rate": 2.271078778629814e-06, "loss": 0.7913, "step": 6083 }, { "epoch": 3.2004208311415043, "grad_norm": 2.307316541671753, "learning_rate": 2.2703870321839945e-06, "loss": 0.8439, "step": 6084 }, { "epoch": 3.200946870068385, "grad_norm": 2.276026487350464, "learning_rate": 2.26969530346693e-06, "loss": 0.8507, "step": 6085 }, { "epoch": 3.2014729089952656, "grad_norm": 2.2320709228515625, "learning_rate": 2.2690035925320266e-06, "loss": 0.8049, "step": 6086 }, { "epoch": 3.2019989479221462, "grad_norm": 2.2150044441223145, "learning_rate": 2.268311899432694e-06, "loss": 0.7838, "step": 6087 }, { "epoch": 3.202524986849027, "grad_norm": 2.207164764404297, "learning_rate": 2.2676202242223383e-06, "loss": 0.7851, "step": 6088 }, { "epoch": 3.2030510257759075, "grad_norm": 2.2447359561920166, "learning_rate": 2.266928566954365e-06, "loss": 0.817, "step": 6089 }, { "epoch": 3.203577064702788, "grad_norm": 2.4689102172851562, "learning_rate": 2.266236927682178e-06, "loss": 0.8447, "step": 6090 }, { "epoch": 3.2041031036296688, "grad_norm": 2.3369386196136475, "learning_rate": 2.26554530645918e-06, "loss": 0.8212, "step": 6091 }, { "epoch": 3.2046291425565494, "grad_norm": 2.2678511142730713, "learning_rate": 2.2648537033387717e-06, "loss": 0.8159, "step": 6092 }, { "epoch": 3.2051551814834296, "grad_norm": 2.048069715499878, "learning_rate": 2.264162118374354e-06, "loss": 0.7675, "step": 6093 }, { "epoch": 3.2056812204103102, "grad_norm": 2.3286426067352295, "learning_rate": 2.263470551619323e-06, "loss": 0.8632, "step": 6094 }, { "epoch": 3.206207259337191, "grad_norm": 2.4888241291046143, "learning_rate": 2.2627790031270765e-06, "loss": 0.7861, "step": 6095 }, { "epoch": 3.2067332982640715, "grad_norm": 2.3061134815216064, "learning_rate": 2.2620874729510103e-06, "loss": 0.7643, "step": 6096 }, { "epoch": 3.207259337190952, "grad_norm": 2.18060040473938, "learning_rate": 2.2613959611445184e-06, "loss": 0.8834, "step": 6097 }, { "epoch": 3.2077853761178328, "grad_norm": 2.05888295173645, "learning_rate": 2.2607044677609935e-06, "loss": 0.804, "step": 6098 }, { "epoch": 3.2083114150447134, "grad_norm": 2.151953935623169, "learning_rate": 2.2600129928538262e-06, "loss": 0.813, "step": 6099 }, { "epoch": 3.208837453971594, "grad_norm": 2.296372890472412, "learning_rate": 2.2593215364764085e-06, "loss": 0.779, "step": 6100 }, { "epoch": 3.2093634928984747, "grad_norm": 2.1727242469787598, "learning_rate": 2.2586300986821254e-06, "loss": 0.7747, "step": 6101 }, { "epoch": 3.209889531825355, "grad_norm": 2.186924457550049, "learning_rate": 2.2579386795243657e-06, "loss": 0.8133, "step": 6102 }, { "epoch": 3.2104155707522355, "grad_norm": 2.2148656845092773, "learning_rate": 2.2572472790565143e-06, "loss": 0.7833, "step": 6103 }, { "epoch": 3.210941609679116, "grad_norm": 1.9704002141952515, "learning_rate": 2.2565558973319566e-06, "loss": 0.7315, "step": 6104 }, { "epoch": 3.2114676486059968, "grad_norm": 2.246446132659912, "learning_rate": 2.2558645344040734e-06, "loss": 0.7941, "step": 6105 }, { "epoch": 3.2119936875328774, "grad_norm": 2.222992181777954, "learning_rate": 2.2551731903262466e-06, "loss": 0.758, "step": 6106 }, { "epoch": 3.212519726459758, "grad_norm": 2.31709623336792, "learning_rate": 2.2544818651518573e-06, "loss": 0.8207, "step": 6107 }, { "epoch": 3.2130457653866387, "grad_norm": 2.1547329425811768, "learning_rate": 2.2537905589342805e-06, "loss": 0.7922, "step": 6108 }, { "epoch": 3.2135718043135193, "grad_norm": 2.2561070919036865, "learning_rate": 2.2530992717268954e-06, "loss": 0.8407, "step": 6109 }, { "epoch": 3.2140978432404, "grad_norm": 2.286689281463623, "learning_rate": 2.252408003583077e-06, "loss": 0.7914, "step": 6110 }, { "epoch": 3.2146238821672806, "grad_norm": 2.172001600265503, "learning_rate": 2.2517167545561987e-06, "loss": 0.7856, "step": 6111 }, { "epoch": 3.215149921094161, "grad_norm": 2.2408738136291504, "learning_rate": 2.2510255246996322e-06, "loss": 0.8132, "step": 6112 }, { "epoch": 3.2156759600210414, "grad_norm": 2.2133548259735107, "learning_rate": 2.25033431406675e-06, "loss": 0.7692, "step": 6113 }, { "epoch": 3.216201998947922, "grad_norm": 2.243502140045166, "learning_rate": 2.249643122710921e-06, "loss": 0.7692, "step": 6114 }, { "epoch": 3.2167280378748027, "grad_norm": 2.3045690059661865, "learning_rate": 2.248951950685512e-06, "loss": 0.8207, "step": 6115 }, { "epoch": 3.2172540768016833, "grad_norm": 2.122654914855957, "learning_rate": 2.24826079804389e-06, "loss": 0.8333, "step": 6116 }, { "epoch": 3.217780115728564, "grad_norm": 2.366987943649292, "learning_rate": 2.2475696648394206e-06, "loss": 0.8116, "step": 6117 }, { "epoch": 3.2183061546554446, "grad_norm": 2.2646853923797607, "learning_rate": 2.246878551125466e-06, "loss": 0.7887, "step": 6118 }, { "epoch": 3.218832193582325, "grad_norm": 2.3328051567077637, "learning_rate": 2.2461874569553892e-06, "loss": 0.8289, "step": 6119 }, { "epoch": 3.219358232509206, "grad_norm": 2.2180182933807373, "learning_rate": 2.2454963823825494e-06, "loss": 0.8589, "step": 6120 }, { "epoch": 3.2198842714360865, "grad_norm": 2.3645551204681396, "learning_rate": 2.244805327460308e-06, "loss": 0.8631, "step": 6121 }, { "epoch": 3.2204103103629667, "grad_norm": 2.2426116466522217, "learning_rate": 2.244114292242019e-06, "loss": 0.7835, "step": 6122 }, { "epoch": 3.2209363492898473, "grad_norm": 2.2730624675750732, "learning_rate": 2.2434232767810403e-06, "loss": 0.8396, "step": 6123 }, { "epoch": 3.221462388216728, "grad_norm": 2.2587926387786865, "learning_rate": 2.2427322811307254e-06, "loss": 0.8236, "step": 6124 }, { "epoch": 3.2219884271436086, "grad_norm": 2.2664053440093994, "learning_rate": 2.242041305344427e-06, "loss": 0.8452, "step": 6125 }, { "epoch": 3.222514466070489, "grad_norm": 2.1259875297546387, "learning_rate": 2.2413503494754972e-06, "loss": 0.7831, "step": 6126 }, { "epoch": 3.22304050499737, "grad_norm": 2.455373525619507, "learning_rate": 2.240659413577285e-06, "loss": 0.9203, "step": 6127 }, { "epoch": 3.2235665439242505, "grad_norm": 2.2612266540527344, "learning_rate": 2.2399684977031404e-06, "loss": 0.8239, "step": 6128 }, { "epoch": 3.224092582851131, "grad_norm": 2.241485834121704, "learning_rate": 2.239277601906407e-06, "loss": 0.7812, "step": 6129 }, { "epoch": 3.2246186217780117, "grad_norm": 2.2084572315216064, "learning_rate": 2.238586726240432e-06, "loss": 0.8624, "step": 6130 }, { "epoch": 3.2251446607048924, "grad_norm": 2.2185635566711426, "learning_rate": 2.2378958707585572e-06, "loss": 0.8152, "step": 6131 }, { "epoch": 3.2256706996317726, "grad_norm": 2.279062032699585, "learning_rate": 2.2372050355141262e-06, "loss": 0.7919, "step": 6132 }, { "epoch": 3.226196738558653, "grad_norm": 2.229311466217041, "learning_rate": 2.2365142205604788e-06, "loss": 0.805, "step": 6133 }, { "epoch": 3.226722777485534, "grad_norm": 2.112623929977417, "learning_rate": 2.235823425950954e-06, "loss": 0.8278, "step": 6134 }, { "epoch": 3.2272488164124145, "grad_norm": 2.1026556491851807, "learning_rate": 2.2351326517388892e-06, "loss": 0.7781, "step": 6135 }, { "epoch": 3.227774855339295, "grad_norm": 2.3125205039978027, "learning_rate": 2.23444189797762e-06, "loss": 0.7942, "step": 6136 }, { "epoch": 3.2283008942661757, "grad_norm": 2.0641918182373047, "learning_rate": 2.2337511647204794e-06, "loss": 0.7807, "step": 6137 }, { "epoch": 3.2288269331930564, "grad_norm": 2.2547922134399414, "learning_rate": 2.2330604520208014e-06, "loss": 0.8368, "step": 6138 }, { "epoch": 3.229352972119937, "grad_norm": 2.153384208679199, "learning_rate": 2.232369759931916e-06, "loss": 0.8177, "step": 6139 }, { "epoch": 3.2298790110468176, "grad_norm": 2.307365894317627, "learning_rate": 2.2316790885071526e-06, "loss": 0.8224, "step": 6140 }, { "epoch": 3.2304050499736983, "grad_norm": 2.335777997970581, "learning_rate": 2.2309884377998403e-06, "loss": 0.8093, "step": 6141 }, { "epoch": 3.2309310889005785, "grad_norm": 2.536456346511841, "learning_rate": 2.2302978078633034e-06, "loss": 0.8195, "step": 6142 }, { "epoch": 3.231457127827459, "grad_norm": 2.2669427394866943, "learning_rate": 2.2296071987508687e-06, "loss": 0.8333, "step": 6143 }, { "epoch": 3.2319831667543397, "grad_norm": 2.567152500152588, "learning_rate": 2.2289166105158565e-06, "loss": 0.7819, "step": 6144 }, { "epoch": 3.2325092056812204, "grad_norm": 2.301119804382324, "learning_rate": 2.228226043211589e-06, "loss": 0.8423, "step": 6145 }, { "epoch": 3.233035244608101, "grad_norm": 2.121082305908203, "learning_rate": 2.2275354968913864e-06, "loss": 0.8025, "step": 6146 }, { "epoch": 3.2335612835349816, "grad_norm": 2.294090509414673, "learning_rate": 2.2268449716085675e-06, "loss": 0.8884, "step": 6147 }, { "epoch": 3.2340873224618623, "grad_norm": 2.1279749870300293, "learning_rate": 2.226154467416447e-06, "loss": 0.8328, "step": 6148 }, { "epoch": 3.234613361388743, "grad_norm": 2.232896089553833, "learning_rate": 2.2254639843683407e-06, "loss": 0.8185, "step": 6149 }, { "epoch": 3.2351394003156235, "grad_norm": 2.3665270805358887, "learning_rate": 2.2247735225175627e-06, "loss": 0.8143, "step": 6150 }, { "epoch": 3.2356654392425037, "grad_norm": 2.383704662322998, "learning_rate": 2.224083081917423e-06, "loss": 0.796, "step": 6151 }, { "epoch": 3.2361914781693844, "grad_norm": 2.3657021522521973, "learning_rate": 2.223392662621232e-06, "loss": 0.811, "step": 6152 }, { "epoch": 3.236717517096265, "grad_norm": 2.079397439956665, "learning_rate": 2.2227022646822987e-06, "loss": 0.8111, "step": 6153 }, { "epoch": 3.2372435560231456, "grad_norm": 2.210900068283081, "learning_rate": 2.2220118881539283e-06, "loss": 0.8144, "step": 6154 }, { "epoch": 3.2377695949500263, "grad_norm": 2.189394235610962, "learning_rate": 2.221321533089427e-06, "loss": 0.7923, "step": 6155 }, { "epoch": 3.238295633876907, "grad_norm": 2.2866573333740234, "learning_rate": 2.2206311995420974e-06, "loss": 0.8648, "step": 6156 }, { "epoch": 3.2388216728037875, "grad_norm": 2.431684732437134, "learning_rate": 2.219940887565243e-06, "loss": 0.8375, "step": 6157 }, { "epoch": 3.239347711730668, "grad_norm": 2.319190263748169, "learning_rate": 2.2192505972121613e-06, "loss": 0.7799, "step": 6158 }, { "epoch": 3.239873750657549, "grad_norm": 2.180447578430176, "learning_rate": 2.2185603285361514e-06, "loss": 0.7547, "step": 6159 }, { "epoch": 3.2403997895844294, "grad_norm": 2.2909953594207764, "learning_rate": 2.217870081590511e-06, "loss": 0.8107, "step": 6160 }, { "epoch": 3.24092582851131, "grad_norm": 2.5565037727355957, "learning_rate": 2.2171798564285334e-06, "loss": 0.825, "step": 6161 }, { "epoch": 3.2414518674381902, "grad_norm": 2.4483015537261963, "learning_rate": 2.2164896531035133e-06, "loss": 0.8311, "step": 6162 }, { "epoch": 3.241977906365071, "grad_norm": 2.285419464111328, "learning_rate": 2.2157994716687413e-06, "loss": 0.8424, "step": 6163 }, { "epoch": 3.2425039452919515, "grad_norm": 2.3865301609039307, "learning_rate": 2.2151093121775094e-06, "loss": 0.8104, "step": 6164 }, { "epoch": 3.243029984218832, "grad_norm": 2.1530189514160156, "learning_rate": 2.2144191746831027e-06, "loss": 0.7967, "step": 6165 }, { "epoch": 3.243556023145713, "grad_norm": 2.537479877471924, "learning_rate": 2.21372905923881e-06, "loss": 0.8075, "step": 6166 }, { "epoch": 3.2440820620725934, "grad_norm": 2.5144875049591064, "learning_rate": 2.2130389658979147e-06, "loss": 0.8176, "step": 6167 }, { "epoch": 3.244608100999474, "grad_norm": 2.4435784816741943, "learning_rate": 2.2123488947137007e-06, "loss": 0.84, "step": 6168 }, { "epoch": 3.2451341399263547, "grad_norm": 2.45426344871521, "learning_rate": 2.2116588457394496e-06, "loss": 0.8204, "step": 6169 }, { "epoch": 3.2456601788532353, "grad_norm": 2.2683398723602295, "learning_rate": 2.2109688190284406e-06, "loss": 0.793, "step": 6170 }, { "epoch": 3.2461862177801155, "grad_norm": 2.1169612407684326, "learning_rate": 2.2102788146339528e-06, "loss": 0.8055, "step": 6171 }, { "epoch": 3.246712256706996, "grad_norm": 2.1228394508361816, "learning_rate": 2.209588832609261e-06, "loss": 0.8325, "step": 6172 }, { "epoch": 3.247238295633877, "grad_norm": 2.173682689666748, "learning_rate": 2.20889887300764e-06, "loss": 0.8383, "step": 6173 }, { "epoch": 3.2477643345607574, "grad_norm": 2.1556813716888428, "learning_rate": 2.208208935882363e-06, "loss": 0.8157, "step": 6174 }, { "epoch": 3.248290373487638, "grad_norm": 2.7806262969970703, "learning_rate": 2.2075190212867002e-06, "loss": 0.862, "step": 6175 }, { "epoch": 3.2488164124145187, "grad_norm": 2.2271218299865723, "learning_rate": 2.206829129273922e-06, "loss": 0.8169, "step": 6176 }, { "epoch": 3.2493424513413993, "grad_norm": 2.2402923107147217, "learning_rate": 2.206139259897296e-06, "loss": 0.8575, "step": 6177 }, { "epoch": 3.24986849026828, "grad_norm": 2.275212049484253, "learning_rate": 2.2054494132100876e-06, "loss": 0.8376, "step": 6178 }, { "epoch": 3.2503945291951606, "grad_norm": 2.3506696224212646, "learning_rate": 2.204759589265561e-06, "loss": 0.8495, "step": 6179 }, { "epoch": 3.250920568122041, "grad_norm": 2.198591947555542, "learning_rate": 2.2040697881169777e-06, "loss": 0.7973, "step": 6180 }, { "epoch": 3.251446607048922, "grad_norm": 2.256786823272705, "learning_rate": 2.2033800098175987e-06, "loss": 0.7988, "step": 6181 }, { "epoch": 3.251972645975802, "grad_norm": 2.453037738800049, "learning_rate": 2.2026902544206837e-06, "loss": 0.8216, "step": 6182 }, { "epoch": 3.2524986849026827, "grad_norm": 2.759758949279785, "learning_rate": 2.202000521979489e-06, "loss": 0.852, "step": 6183 }, { "epoch": 3.2530247238295633, "grad_norm": 2.4459846019744873, "learning_rate": 2.20131081254727e-06, "loss": 0.8055, "step": 6184 }, { "epoch": 3.253550762756444, "grad_norm": 2.238816022872925, "learning_rate": 2.2006211261772805e-06, "loss": 0.8348, "step": 6185 }, { "epoch": 3.2540768016833246, "grad_norm": 2.1947407722473145, "learning_rate": 2.199931462922771e-06, "loss": 0.7441, "step": 6186 }, { "epoch": 3.254602840610205, "grad_norm": 2.5200819969177246, "learning_rate": 2.199241822836992e-06, "loss": 0.7934, "step": 6187 }, { "epoch": 3.255128879537086, "grad_norm": 2.184965133666992, "learning_rate": 2.198552205973192e-06, "loss": 0.8518, "step": 6188 }, { "epoch": 3.2556549184639665, "grad_norm": 2.2569355964660645, "learning_rate": 2.1978626123846174e-06, "loss": 0.8386, "step": 6189 }, { "epoch": 3.256180957390847, "grad_norm": 2.254096508026123, "learning_rate": 2.1971730421245123e-06, "loss": 0.8041, "step": 6190 }, { "epoch": 3.2567069963177273, "grad_norm": 2.2392029762268066, "learning_rate": 2.1964834952461194e-06, "loss": 0.8038, "step": 6191 }, { "epoch": 3.257233035244608, "grad_norm": 2.0971591472625732, "learning_rate": 2.195793971802681e-06, "loss": 0.7842, "step": 6192 }, { "epoch": 3.2577590741714886, "grad_norm": 2.249518632888794, "learning_rate": 2.195104471847434e-06, "loss": 0.8101, "step": 6193 }, { "epoch": 3.258285113098369, "grad_norm": 2.2274231910705566, "learning_rate": 2.1944149954336165e-06, "loss": 0.8435, "step": 6194 }, { "epoch": 3.25881115202525, "grad_norm": 2.295944929122925, "learning_rate": 2.1937255426144643e-06, "loss": 0.8482, "step": 6195 }, { "epoch": 3.2593371909521305, "grad_norm": 2.234205722808838, "learning_rate": 2.1930361134432117e-06, "loss": 0.793, "step": 6196 }, { "epoch": 3.259863229879011, "grad_norm": 2.2516908645629883, "learning_rate": 2.192346707973089e-06, "loss": 0.8244, "step": 6197 }, { "epoch": 3.2603892688058917, "grad_norm": 2.2679436206817627, "learning_rate": 2.191657326257327e-06, "loss": 0.8368, "step": 6198 }, { "epoch": 3.2609153077327724, "grad_norm": 2.2095043659210205, "learning_rate": 2.1909679683491543e-06, "loss": 0.7717, "step": 6199 }, { "epoch": 3.2614413466596526, "grad_norm": 2.3701984882354736, "learning_rate": 2.1902786343017972e-06, "loss": 0.8237, "step": 6200 }, { "epoch": 3.2619673855865337, "grad_norm": 2.2871575355529785, "learning_rate": 2.1895893241684795e-06, "loss": 0.796, "step": 6201 }, { "epoch": 3.262493424513414, "grad_norm": 2.3171565532684326, "learning_rate": 2.188900038002424e-06, "loss": 0.8142, "step": 6202 }, { "epoch": 3.2630194634402945, "grad_norm": 2.366520881652832, "learning_rate": 2.1882107758568516e-06, "loss": 0.8454, "step": 6203 }, { "epoch": 3.263545502367175, "grad_norm": 2.222208023071289, "learning_rate": 2.187521537784981e-06, "loss": 0.8164, "step": 6204 }, { "epoch": 3.2640715412940557, "grad_norm": 2.1705076694488525, "learning_rate": 2.1868323238400297e-06, "loss": 0.7842, "step": 6205 }, { "epoch": 3.2645975802209364, "grad_norm": 2.4394781589508057, "learning_rate": 2.186143134075213e-06, "loss": 0.8083, "step": 6206 }, { "epoch": 3.265123619147817, "grad_norm": 2.095608949661255, "learning_rate": 2.1854539685437443e-06, "loss": 0.8131, "step": 6207 }, { "epoch": 3.2656496580746976, "grad_norm": 2.3638017177581787, "learning_rate": 2.1847648272988343e-06, "loss": 0.8227, "step": 6208 }, { "epoch": 3.2661756970015783, "grad_norm": 2.3429744243621826, "learning_rate": 2.1840757103936935e-06, "loss": 0.8482, "step": 6209 }, { "epoch": 3.266701735928459, "grad_norm": 2.277132272720337, "learning_rate": 2.183386617881529e-06, "loss": 0.8156, "step": 6210 }, { "epoch": 3.267227774855339, "grad_norm": 2.33634352684021, "learning_rate": 2.1826975498155464e-06, "loss": 0.8211, "step": 6211 }, { "epoch": 3.2677538137822197, "grad_norm": 2.3315560817718506, "learning_rate": 2.1820085062489504e-06, "loss": 0.8165, "step": 6212 }, { "epoch": 3.2682798527091004, "grad_norm": 2.3371999263763428, "learning_rate": 2.1813194872349435e-06, "loss": 0.8341, "step": 6213 }, { "epoch": 3.268805891635981, "grad_norm": 2.3069522380828857, "learning_rate": 2.1806304928267245e-06, "loss": 0.7656, "step": 6214 }, { "epoch": 3.2693319305628616, "grad_norm": 2.291529655456543, "learning_rate": 2.179941523077493e-06, "loss": 0.7513, "step": 6215 }, { "epoch": 3.2698579694897423, "grad_norm": 2.2500674724578857, "learning_rate": 2.179252578040444e-06, "loss": 0.7927, "step": 6216 }, { "epoch": 3.270384008416623, "grad_norm": 2.294891119003296, "learning_rate": 2.1785636577687726e-06, "loss": 0.8817, "step": 6217 }, { "epoch": 3.2709100473435035, "grad_norm": 2.299969434738159, "learning_rate": 2.177874762315671e-06, "loss": 0.7957, "step": 6218 }, { "epoch": 3.271436086270384, "grad_norm": 2.146284341812134, "learning_rate": 2.177185891734331e-06, "loss": 0.7955, "step": 6219 }, { "epoch": 3.2719621251972644, "grad_norm": 2.263119697570801, "learning_rate": 2.17649704607794e-06, "loss": 0.8555, "step": 6220 }, { "epoch": 3.272488164124145, "grad_norm": 2.396578550338745, "learning_rate": 2.175808225399686e-06, "loss": 0.817, "step": 6221 }, { "epoch": 3.2730142030510256, "grad_norm": 2.5320966243743896, "learning_rate": 2.175119429752753e-06, "loss": 0.7935, "step": 6222 }, { "epoch": 3.2735402419779063, "grad_norm": 2.1775219440460205, "learning_rate": 2.1744306591903235e-06, "loss": 0.8475, "step": 6223 }, { "epoch": 3.274066280904787, "grad_norm": 2.2635724544525146, "learning_rate": 2.1737419137655792e-06, "loss": 0.7579, "step": 6224 }, { "epoch": 3.2745923198316675, "grad_norm": 2.280806303024292, "learning_rate": 2.1730531935316986e-06, "loss": 0.8073, "step": 6225 }, { "epoch": 3.275118358758548, "grad_norm": 2.2537949085235596, "learning_rate": 2.17236449854186e-06, "loss": 0.8341, "step": 6226 }, { "epoch": 3.275644397685429, "grad_norm": 2.274935007095337, "learning_rate": 2.1716758288492372e-06, "loss": 0.8489, "step": 6227 }, { "epoch": 3.2761704366123094, "grad_norm": 2.326554775238037, "learning_rate": 2.1709871845070053e-06, "loss": 0.8319, "step": 6228 }, { "epoch": 3.27669647553919, "grad_norm": 2.205768346786499, "learning_rate": 2.1702985655683324e-06, "loss": 0.8287, "step": 6229 }, { "epoch": 3.2772225144660707, "grad_norm": 2.135068893432617, "learning_rate": 2.16960997208639e-06, "loss": 0.8197, "step": 6230 }, { "epoch": 3.277748553392951, "grad_norm": 2.342479705810547, "learning_rate": 2.1689214041143446e-06, "loss": 0.7958, "step": 6231 }, { "epoch": 3.2782745923198315, "grad_norm": 2.458371877670288, "learning_rate": 2.168232861705363e-06, "loss": 0.7971, "step": 6232 }, { "epoch": 3.278800631246712, "grad_norm": 2.2194416522979736, "learning_rate": 2.1675443449126068e-06, "loss": 0.8143, "step": 6233 }, { "epoch": 3.279326670173593, "grad_norm": 2.3036177158355713, "learning_rate": 2.1668558537892376e-06, "loss": 0.8118, "step": 6234 }, { "epoch": 3.2798527091004734, "grad_norm": 2.128720998764038, "learning_rate": 2.1661673883884163e-06, "loss": 0.8261, "step": 6235 }, { "epoch": 3.280378748027354, "grad_norm": 2.201002359390259, "learning_rate": 2.1654789487632984e-06, "loss": 0.7722, "step": 6236 }, { "epoch": 3.2809047869542347, "grad_norm": 2.034285545349121, "learning_rate": 2.1647905349670403e-06, "loss": 0.7967, "step": 6237 }, { "epoch": 3.2814308258811153, "grad_norm": 2.316981077194214, "learning_rate": 2.164102147052795e-06, "loss": 0.8152, "step": 6238 }, { "epoch": 3.281956864807996, "grad_norm": 2.298227548599243, "learning_rate": 2.1634137850737154e-06, "loss": 0.8321, "step": 6239 }, { "epoch": 3.282482903734876, "grad_norm": 2.1370208263397217, "learning_rate": 2.162725449082949e-06, "loss": 0.8343, "step": 6240 }, { "epoch": 3.283008942661757, "grad_norm": 2.299605369567871, "learning_rate": 2.162037139133644e-06, "loss": 0.8536, "step": 6241 }, { "epoch": 3.2835349815886374, "grad_norm": 2.254683494567871, "learning_rate": 2.161348855278947e-06, "loss": 0.7695, "step": 6242 }, { "epoch": 3.284061020515518, "grad_norm": 8.383825302124023, "learning_rate": 2.160660597571999e-06, "loss": 0.824, "step": 6243 }, { "epoch": 3.2845870594423987, "grad_norm": 2.4771134853363037, "learning_rate": 2.1599723660659428e-06, "loss": 0.7888, "step": 6244 }, { "epoch": 3.2851130983692793, "grad_norm": 2.281982898712158, "learning_rate": 2.1592841608139177e-06, "loss": 0.8304, "step": 6245 }, { "epoch": 3.28563913729616, "grad_norm": 3.3973286151885986, "learning_rate": 2.158595981869061e-06, "loss": 0.8381, "step": 6246 }, { "epoch": 3.2861651762230406, "grad_norm": 2.33058500289917, "learning_rate": 2.157907829284508e-06, "loss": 0.7874, "step": 6247 }, { "epoch": 3.2866912151499212, "grad_norm": 2.4058189392089844, "learning_rate": 2.1572197031133913e-06, "loss": 0.7767, "step": 6248 }, { "epoch": 3.287217254076802, "grad_norm": 2.493863582611084, "learning_rate": 2.156531603408843e-06, "loss": 0.794, "step": 6249 }, { "epoch": 3.2877432930036825, "grad_norm": 2.093679428100586, "learning_rate": 2.155843530223993e-06, "loss": 0.8435, "step": 6250 }, { "epoch": 3.2882693319305627, "grad_norm": 2.3444573879241943, "learning_rate": 2.1551554836119666e-06, "loss": 0.8292, "step": 6251 }, { "epoch": 3.2887953708574433, "grad_norm": 2.5248725414276123, "learning_rate": 2.1544674636258905e-06, "loss": 0.8416, "step": 6252 }, { "epoch": 3.289321409784324, "grad_norm": 2.4742376804351807, "learning_rate": 2.153779470318886e-06, "loss": 0.8374, "step": 6253 }, { "epoch": 3.2898474487112046, "grad_norm": 2.272552728652954, "learning_rate": 2.153091503744075e-06, "loss": 0.8376, "step": 6254 }, { "epoch": 3.2903734876380852, "grad_norm": 2.301584482192993, "learning_rate": 2.152403563954577e-06, "loss": 0.7963, "step": 6255 }, { "epoch": 3.290899526564966, "grad_norm": 2.365736722946167, "learning_rate": 2.1517156510035086e-06, "loss": 0.8292, "step": 6256 }, { "epoch": 3.2914255654918465, "grad_norm": 2.249288558959961, "learning_rate": 2.151027764943984e-06, "loss": 0.8235, "step": 6257 }, { "epoch": 3.291951604418727, "grad_norm": 2.1216254234313965, "learning_rate": 2.1503399058291163e-06, "loss": 0.8208, "step": 6258 }, { "epoch": 3.2924776433456078, "grad_norm": 2.231233596801758, "learning_rate": 2.1496520737120155e-06, "loss": 0.8001, "step": 6259 }, { "epoch": 3.293003682272488, "grad_norm": 2.3085718154907227, "learning_rate": 2.1489642686457903e-06, "loss": 0.7808, "step": 6260 }, { "epoch": 3.2935297211993686, "grad_norm": 2.2404396533966064, "learning_rate": 2.148276490683548e-06, "loss": 0.8131, "step": 6261 }, { "epoch": 3.2940557601262492, "grad_norm": 2.339998960494995, "learning_rate": 2.147588739878392e-06, "loss": 0.8409, "step": 6262 }, { "epoch": 3.29458179905313, "grad_norm": 2.1286232471466064, "learning_rate": 2.146901016283425e-06, "loss": 0.7782, "step": 6263 }, { "epoch": 3.2951078379800105, "grad_norm": 2.316520929336548, "learning_rate": 2.146213319951748e-06, "loss": 0.8176, "step": 6264 }, { "epoch": 3.295633876906891, "grad_norm": 2.5105648040771484, "learning_rate": 2.145525650936457e-06, "loss": 0.7914, "step": 6265 }, { "epoch": 3.2961599158337718, "grad_norm": 2.168827772140503, "learning_rate": 2.144838009290649e-06, "loss": 0.8012, "step": 6266 }, { "epoch": 3.2966859547606524, "grad_norm": 3.0709500312805176, "learning_rate": 2.1441503950674178e-06, "loss": 0.8191, "step": 6267 }, { "epoch": 3.297211993687533, "grad_norm": 2.3398211002349854, "learning_rate": 2.143462808319855e-06, "loss": 0.7842, "step": 6268 }, { "epoch": 3.2977380326144132, "grad_norm": 2.343740224838257, "learning_rate": 2.142775249101051e-06, "loss": 0.8143, "step": 6269 }, { "epoch": 3.298264071541294, "grad_norm": 2.333597421646118, "learning_rate": 2.142087717464092e-06, "loss": 0.8463, "step": 6270 }, { "epoch": 3.2987901104681745, "grad_norm": 2.2359061241149902, "learning_rate": 2.141400213462065e-06, "loss": 0.8123, "step": 6271 }, { "epoch": 3.299316149395055, "grad_norm": 2.4329731464385986, "learning_rate": 2.140712737148051e-06, "loss": 0.7893, "step": 6272 }, { "epoch": 3.2998421883219358, "grad_norm": 2.3451294898986816, "learning_rate": 2.140025288575132e-06, "loss": 0.8535, "step": 6273 }, { "epoch": 3.3003682272488164, "grad_norm": 2.443896532058716, "learning_rate": 2.1393378677963874e-06, "loss": 0.8717, "step": 6274 }, { "epoch": 3.300894266175697, "grad_norm": 2.270538330078125, "learning_rate": 2.1386504748648936e-06, "loss": 0.8072, "step": 6275 }, { "epoch": 3.3014203051025777, "grad_norm": 2.2628374099731445, "learning_rate": 2.137963109833725e-06, "loss": 0.8397, "step": 6276 }, { "epoch": 3.3019463440294583, "grad_norm": 2.271310567855835, "learning_rate": 2.137275772755954e-06, "loss": 0.7904, "step": 6277 }, { "epoch": 3.302472382956339, "grad_norm": 2.270611524581909, "learning_rate": 2.1365884636846527e-06, "loss": 0.8023, "step": 6278 }, { "epoch": 3.3029984218832196, "grad_norm": 2.236560583114624, "learning_rate": 2.1359011826728864e-06, "loss": 0.804, "step": 6279 }, { "epoch": 3.3035244608100998, "grad_norm": 2.194335699081421, "learning_rate": 2.135213929773722e-06, "loss": 0.8143, "step": 6280 }, { "epoch": 3.3040504997369804, "grad_norm": 2.4336044788360596, "learning_rate": 2.134526705040224e-06, "loss": 0.7959, "step": 6281 }, { "epoch": 3.304576538663861, "grad_norm": 2.2862436771392822, "learning_rate": 2.1338395085254547e-06, "loss": 0.8542, "step": 6282 }, { "epoch": 3.3051025775907417, "grad_norm": 2.292912721633911, "learning_rate": 2.1331523402824716e-06, "loss": 0.7841, "step": 6283 }, { "epoch": 3.3056286165176223, "grad_norm": 2.4753363132476807, "learning_rate": 2.1324652003643333e-06, "loss": 0.7996, "step": 6284 }, { "epoch": 3.306154655444503, "grad_norm": 2.2245290279388428, "learning_rate": 2.131778088824095e-06, "loss": 0.8091, "step": 6285 }, { "epoch": 3.3066806943713836, "grad_norm": 2.1538522243499756, "learning_rate": 2.1310910057148082e-06, "loss": 0.8401, "step": 6286 }, { "epoch": 3.307206733298264, "grad_norm": 2.4358444213867188, "learning_rate": 2.1304039510895246e-06, "loss": 0.7805, "step": 6287 }, { "epoch": 3.307732772225145, "grad_norm": 2.2992982864379883, "learning_rate": 2.1297169250012934e-06, "loss": 0.782, "step": 6288 }, { "epoch": 3.308258811152025, "grad_norm": 2.2970948219299316, "learning_rate": 2.129029927503159e-06, "loss": 0.8174, "step": 6289 }, { "epoch": 3.3087848500789057, "grad_norm": 2.241891860961914, "learning_rate": 2.1283429586481668e-06, "loss": 0.8039, "step": 6290 }, { "epoch": 3.3093108890057863, "grad_norm": 2.1736819744110107, "learning_rate": 2.1276560184893583e-06, "loss": 0.8214, "step": 6291 }, { "epoch": 3.309836927932667, "grad_norm": 2.330185651779175, "learning_rate": 2.1269691070797744e-06, "loss": 0.7985, "step": 6292 }, { "epoch": 3.3103629668595476, "grad_norm": 2.0103440284729004, "learning_rate": 2.1262822244724506e-06, "loss": 0.7637, "step": 6293 }, { "epoch": 3.310889005786428, "grad_norm": 2.604645252227783, "learning_rate": 2.125595370720423e-06, "loss": 0.8655, "step": 6294 }, { "epoch": 3.311415044713309, "grad_norm": 2.1363556385040283, "learning_rate": 2.124908545876724e-06, "loss": 0.8213, "step": 6295 }, { "epoch": 3.3119410836401895, "grad_norm": 2.2393460273742676, "learning_rate": 2.1242217499943853e-06, "loss": 0.7902, "step": 6296 }, { "epoch": 3.31246712256707, "grad_norm": 2.318011522293091, "learning_rate": 2.1235349831264345e-06, "loss": 0.8703, "step": 6297 }, { "epoch": 3.3129931614939507, "grad_norm": 2.3377926349639893, "learning_rate": 2.1228482453258987e-06, "loss": 0.841, "step": 6298 }, { "epoch": 3.3135192004208314, "grad_norm": 2.301607608795166, "learning_rate": 2.122161536645802e-06, "loss": 0.8132, "step": 6299 }, { "epoch": 3.3140452393477116, "grad_norm": 2.236333131790161, "learning_rate": 2.121474857139166e-06, "loss": 0.7753, "step": 6300 }, { "epoch": 3.314571278274592, "grad_norm": 2.315326452255249, "learning_rate": 2.1207882068590094e-06, "loss": 0.8753, "step": 6301 }, { "epoch": 3.315097317201473, "grad_norm": 2.1954762935638428, "learning_rate": 2.1201015858583497e-06, "loss": 0.8006, "step": 6302 }, { "epoch": 3.3156233561283535, "grad_norm": 2.135469675064087, "learning_rate": 2.1194149941902025e-06, "loss": 0.8026, "step": 6303 }, { "epoch": 3.316149395055234, "grad_norm": 2.203676700592041, "learning_rate": 2.11872843190758e-06, "loss": 0.7096, "step": 6304 }, { "epoch": 3.3166754339821147, "grad_norm": 2.85310697555542, "learning_rate": 2.118041899063494e-06, "loss": 0.7992, "step": 6305 }, { "epoch": 3.3172014729089954, "grad_norm": 2.329979181289673, "learning_rate": 2.1173553957109507e-06, "loss": 0.8085, "step": 6306 }, { "epoch": 3.317727511835876, "grad_norm": 2.17875337600708, "learning_rate": 2.116668921902958e-06, "loss": 0.7867, "step": 6307 }, { "epoch": 3.3182535507627566, "grad_norm": 2.3925771713256836, "learning_rate": 2.115982477692518e-06, "loss": 0.7871, "step": 6308 }, { "epoch": 3.318779589689637, "grad_norm": 2.1597740650177, "learning_rate": 2.1152960631326323e-06, "loss": 0.837, "step": 6309 }, { "epoch": 3.3193056286165175, "grad_norm": 2.462367057800293, "learning_rate": 2.1146096782763007e-06, "loss": 0.8579, "step": 6310 }, { "epoch": 3.319831667543398, "grad_norm": 2.5057084560394287, "learning_rate": 2.113923323176519e-06, "loss": 0.7767, "step": 6311 }, { "epoch": 3.3203577064702787, "grad_norm": 2.1960361003875732, "learning_rate": 2.1132369978862834e-06, "loss": 0.7966, "step": 6312 }, { "epoch": 3.3208837453971594, "grad_norm": 2.3138387203216553, "learning_rate": 2.112550702458584e-06, "loss": 0.7897, "step": 6313 }, { "epoch": 3.32140978432404, "grad_norm": 2.1180922985076904, "learning_rate": 2.111864436946413e-06, "loss": 0.7724, "step": 6314 }, { "epoch": 3.3219358232509206, "grad_norm": 2.2481892108917236, "learning_rate": 2.111178201402755e-06, "loss": 0.8301, "step": 6315 }, { "epoch": 3.3224618621778013, "grad_norm": 2.320733070373535, "learning_rate": 2.1104919958805973e-06, "loss": 0.8561, "step": 6316 }, { "epoch": 3.322987901104682, "grad_norm": 2.2366886138916016, "learning_rate": 2.1098058204329224e-06, "loss": 0.8164, "step": 6317 }, { "epoch": 3.323513940031562, "grad_norm": 2.2313618659973145, "learning_rate": 2.109119675112711e-06, "loss": 0.8364, "step": 6318 }, { "epoch": 3.324039978958443, "grad_norm": 2.3270790576934814, "learning_rate": 2.108433559972941e-06, "loss": 0.7727, "step": 6319 }, { "epoch": 3.3245660178853234, "grad_norm": 2.556732177734375, "learning_rate": 2.1077474750665888e-06, "loss": 0.7905, "step": 6320 }, { "epoch": 3.325092056812204, "grad_norm": 2.3780252933502197, "learning_rate": 2.107061420446628e-06, "loss": 0.8496, "step": 6321 }, { "epoch": 3.3256180957390846, "grad_norm": 2.1489899158477783, "learning_rate": 2.10637539616603e-06, "loss": 0.8146, "step": 6322 }, { "epoch": 3.3261441346659653, "grad_norm": 2.2606422901153564, "learning_rate": 2.1056894022777623e-06, "loss": 0.7895, "step": 6323 }, { "epoch": 3.326670173592846, "grad_norm": 2.3026626110076904, "learning_rate": 2.105003438834794e-06, "loss": 0.8015, "step": 6324 }, { "epoch": 3.3271962125197265, "grad_norm": 2.3084723949432373, "learning_rate": 2.104317505890087e-06, "loss": 0.7901, "step": 6325 }, { "epoch": 3.327722251446607, "grad_norm": 2.3706300258636475, "learning_rate": 2.103631603496604e-06, "loss": 0.8176, "step": 6326 }, { "epoch": 3.328248290373488, "grad_norm": 2.277372360229492, "learning_rate": 2.1029457317073053e-06, "loss": 0.8317, "step": 6327 }, { "epoch": 3.3287743293003684, "grad_norm": 2.2769646644592285, "learning_rate": 2.1022598905751482e-06, "loss": 0.7964, "step": 6328 }, { "epoch": 3.3293003682272486, "grad_norm": 2.5776541233062744, "learning_rate": 2.1015740801530855e-06, "loss": 0.7794, "step": 6329 }, { "epoch": 3.3298264071541293, "grad_norm": 2.2661807537078857, "learning_rate": 2.100888300494071e-06, "loss": 0.8188, "step": 6330 }, { "epoch": 3.33035244608101, "grad_norm": 2.196479082107544, "learning_rate": 2.1002025516510556e-06, "loss": 0.8139, "step": 6331 }, { "epoch": 3.3308784850078905, "grad_norm": 2.3438193798065186, "learning_rate": 2.099516833676985e-06, "loss": 0.8217, "step": 6332 }, { "epoch": 3.331404523934771, "grad_norm": 2.2277588844299316, "learning_rate": 2.098831146624805e-06, "loss": 0.8376, "step": 6333 }, { "epoch": 3.331930562861652, "grad_norm": 2.5766854286193848, "learning_rate": 2.09814549054746e-06, "loss": 0.8569, "step": 6334 }, { "epoch": 3.3324566017885324, "grad_norm": 2.3740732669830322, "learning_rate": 2.0974598654978904e-06, "loss": 0.8358, "step": 6335 }, { "epoch": 3.332982640715413, "grad_norm": 2.460059404373169, "learning_rate": 2.096774271529032e-06, "loss": 0.8313, "step": 6336 }, { "epoch": 3.3335086796422937, "grad_norm": 2.1376266479492188, "learning_rate": 2.0960887086938227e-06, "loss": 0.8181, "step": 6337 }, { "epoch": 3.334034718569174, "grad_norm": 2.2363905906677246, "learning_rate": 2.095403177045194e-06, "loss": 0.7594, "step": 6338 }, { "epoch": 3.3345607574960545, "grad_norm": 2.278709888458252, "learning_rate": 2.0947176766360787e-06, "loss": 0.8527, "step": 6339 }, { "epoch": 3.335086796422935, "grad_norm": 2.4269299507141113, "learning_rate": 2.094032207519404e-06, "loss": 0.7568, "step": 6340 }, { "epoch": 3.335612835349816, "grad_norm": 2.3026301860809326, "learning_rate": 2.093346769748096e-06, "loss": 0.8503, "step": 6341 }, { "epoch": 3.3361388742766964, "grad_norm": 2.244490385055542, "learning_rate": 2.09266136337508e-06, "loss": 0.7958, "step": 6342 }, { "epoch": 3.336664913203577, "grad_norm": 2.2510366439819336, "learning_rate": 2.091975988453276e-06, "loss": 0.797, "step": 6343 }, { "epoch": 3.3371909521304577, "grad_norm": 2.1903867721557617, "learning_rate": 2.0912906450356017e-06, "loss": 0.7629, "step": 6344 }, { "epoch": 3.3377169910573383, "grad_norm": 2.2663655281066895, "learning_rate": 2.0906053331749747e-06, "loss": 0.7944, "step": 6345 }, { "epoch": 3.338243029984219, "grad_norm": 2.2248151302337646, "learning_rate": 2.089920052924309e-06, "loss": 0.8362, "step": 6346 }, { "epoch": 3.3387690689110996, "grad_norm": 2.221076488494873, "learning_rate": 2.089234804336516e-06, "loss": 0.7447, "step": 6347 }, { "epoch": 3.3392951078379802, "grad_norm": 2.3318583965301514, "learning_rate": 2.0885495874645044e-06, "loss": 0.834, "step": 6348 }, { "epoch": 3.3398211467648604, "grad_norm": 2.2303690910339355, "learning_rate": 2.0878644023611814e-06, "loss": 0.821, "step": 6349 }, { "epoch": 3.340347185691741, "grad_norm": 2.160675048828125, "learning_rate": 2.0871792490794514e-06, "loss": 0.7739, "step": 6350 }, { "epoch": 3.3408732246186217, "grad_norm": 2.2408552169799805, "learning_rate": 2.086494127672214e-06, "loss": 0.8301, "step": 6351 }, { "epoch": 3.3413992635455023, "grad_norm": 2.4030227661132812, "learning_rate": 2.08580903819237e-06, "loss": 0.8233, "step": 6352 }, { "epoch": 3.341925302472383, "grad_norm": 2.416980743408203, "learning_rate": 2.0851239806928165e-06, "loss": 0.8389, "step": 6353 }, { "epoch": 3.3424513413992636, "grad_norm": 2.3330459594726562, "learning_rate": 2.0844389552264478e-06, "loss": 0.8199, "step": 6354 }, { "epoch": 3.342977380326144, "grad_norm": 2.256317138671875, "learning_rate": 2.0837539618461546e-06, "loss": 0.7974, "step": 6355 }, { "epoch": 3.343503419253025, "grad_norm": 2.3644986152648926, "learning_rate": 2.0830690006048266e-06, "loss": 0.8018, "step": 6356 }, { "epoch": 3.3440294581799055, "grad_norm": 2.3581297397613525, "learning_rate": 2.082384071555352e-06, "loss": 0.7772, "step": 6357 }, { "epoch": 3.3445554971067857, "grad_norm": 2.43939208984375, "learning_rate": 2.0816991747506136e-06, "loss": 0.9039, "step": 6358 }, { "epoch": 3.3450815360336663, "grad_norm": 2.225891590118408, "learning_rate": 2.0810143102434933e-06, "loss": 0.7755, "step": 6359 }, { "epoch": 3.345607574960547, "grad_norm": 2.3241422176361084, "learning_rate": 2.0803294780868715e-06, "loss": 0.8163, "step": 6360 }, { "epoch": 3.3461336138874276, "grad_norm": 2.1844382286071777, "learning_rate": 2.0796446783336247e-06, "loss": 0.7527, "step": 6361 }, { "epoch": 3.346659652814308, "grad_norm": 2.162379741668701, "learning_rate": 2.0789599110366264e-06, "loss": 0.769, "step": 6362 }, { "epoch": 3.347185691741189, "grad_norm": 2.3199539184570312, "learning_rate": 2.0782751762487497e-06, "loss": 0.7545, "step": 6363 }, { "epoch": 3.3477117306680695, "grad_norm": 2.389275074005127, "learning_rate": 2.0775904740228646e-06, "loss": 0.8253, "step": 6364 }, { "epoch": 3.34823776959495, "grad_norm": 2.1737680435180664, "learning_rate": 2.0769058044118357e-06, "loss": 0.7837, "step": 6365 }, { "epoch": 3.3487638085218308, "grad_norm": 2.2066879272460938, "learning_rate": 2.0762211674685286e-06, "loss": 0.8303, "step": 6366 }, { "epoch": 3.3492898474487114, "grad_norm": 2.33268404006958, "learning_rate": 2.0755365632458056e-06, "loss": 0.7964, "step": 6367 }, { "epoch": 3.349815886375592, "grad_norm": 2.2197842597961426, "learning_rate": 2.074851991796525e-06, "loss": 0.7673, "step": 6368 }, { "epoch": 3.350341925302472, "grad_norm": 2.253516912460327, "learning_rate": 2.074167453173544e-06, "loss": 0.7975, "step": 6369 }, { "epoch": 3.350867964229353, "grad_norm": 2.383758544921875, "learning_rate": 2.0734829474297165e-06, "loss": 0.8314, "step": 6370 }, { "epoch": 3.3513940031562335, "grad_norm": 2.14544415473938, "learning_rate": 2.0727984746178958e-06, "loss": 0.8149, "step": 6371 }, { "epoch": 3.351920042083114, "grad_norm": 2.1731855869293213, "learning_rate": 2.0721140347909288e-06, "loss": 0.7769, "step": 6372 }, { "epoch": 3.3524460810099947, "grad_norm": 2.2989954948425293, "learning_rate": 2.0714296280016637e-06, "loss": 0.7617, "step": 6373 }, { "epoch": 3.3529721199368754, "grad_norm": 2.1964502334594727, "learning_rate": 2.0707452543029434e-06, "loss": 0.8407, "step": 6374 }, { "epoch": 3.353498158863756, "grad_norm": 2.2730393409729004, "learning_rate": 2.0700609137476098e-06, "loss": 0.8117, "step": 6375 }, { "epoch": 3.3540241977906367, "grad_norm": 2.3630895614624023, "learning_rate": 2.069376606388502e-06, "loss": 0.7911, "step": 6376 }, { "epoch": 3.3545502367175173, "grad_norm": 2.2695682048797607, "learning_rate": 2.068692332278456e-06, "loss": 0.7996, "step": 6377 }, { "epoch": 3.3550762756443975, "grad_norm": 2.2595832347869873, "learning_rate": 2.0680080914703076e-06, "loss": 0.827, "step": 6378 }, { "epoch": 3.355602314571278, "grad_norm": 2.3348021507263184, "learning_rate": 2.0673238840168847e-06, "loss": 0.7953, "step": 6379 }, { "epoch": 3.3561283534981587, "grad_norm": 2.2752106189727783, "learning_rate": 2.0666397099710184e-06, "loss": 0.812, "step": 6380 }, { "epoch": 3.3566543924250394, "grad_norm": 2.2162373065948486, "learning_rate": 2.0659555693855337e-06, "loss": 0.7772, "step": 6381 }, { "epoch": 3.35718043135192, "grad_norm": 2.3209683895111084, "learning_rate": 2.0652714623132544e-06, "loss": 0.8669, "step": 6382 }, { "epoch": 3.3577064702788006, "grad_norm": 2.1782987117767334, "learning_rate": 2.064587388807001e-06, "loss": 0.749, "step": 6383 }, { "epoch": 3.3582325092056813, "grad_norm": 2.5908501148223877, "learning_rate": 2.0639033489195933e-06, "loss": 0.8888, "step": 6384 }, { "epoch": 3.358758548132562, "grad_norm": 2.495401620864868, "learning_rate": 2.0632193427038455e-06, "loss": 0.8639, "step": 6385 }, { "epoch": 3.3592845870594426, "grad_norm": 2.199164867401123, "learning_rate": 2.0625353702125713e-06, "loss": 0.8326, "step": 6386 }, { "epoch": 3.3598106259863227, "grad_norm": 2.290201425552368, "learning_rate": 2.0618514314985803e-06, "loss": 0.7816, "step": 6387 }, { "epoch": 3.360336664913204, "grad_norm": 2.2912163734436035, "learning_rate": 2.061167526614682e-06, "loss": 0.8195, "step": 6388 }, { "epoch": 3.360862703840084, "grad_norm": 2.169285774230957, "learning_rate": 2.06048365561368e-06, "loss": 0.764, "step": 6389 }, { "epoch": 3.3613887427669646, "grad_norm": 2.3123276233673096, "learning_rate": 2.059799818548378e-06, "loss": 0.8144, "step": 6390 }, { "epoch": 3.3619147816938453, "grad_norm": 2.2501633167266846, "learning_rate": 2.0591160154715768e-06, "loss": 0.7984, "step": 6391 }, { "epoch": 3.362440820620726, "grad_norm": 2.1380226612091064, "learning_rate": 2.0584322464360732e-06, "loss": 0.7926, "step": 6392 }, { "epoch": 3.3629668595476065, "grad_norm": 2.3120687007904053, "learning_rate": 2.0577485114946614e-06, "loss": 0.771, "step": 6393 }, { "epoch": 3.363492898474487, "grad_norm": 2.2856180667877197, "learning_rate": 2.0570648107001335e-06, "loss": 0.833, "step": 6394 }, { "epoch": 3.364018937401368, "grad_norm": 2.2418248653411865, "learning_rate": 2.0563811441052795e-06, "loss": 0.824, "step": 6395 }, { "epoch": 3.3645449763282484, "grad_norm": 2.2381842136383057, "learning_rate": 2.055697511762886e-06, "loss": 0.8392, "step": 6396 }, { "epoch": 3.365071015255129, "grad_norm": 2.371006965637207, "learning_rate": 2.0550139137257384e-06, "loss": 0.8404, "step": 6397 }, { "epoch": 3.3655970541820093, "grad_norm": 2.2990987300872803, "learning_rate": 2.0543303500466166e-06, "loss": 0.8239, "step": 6398 }, { "epoch": 3.36612309310889, "grad_norm": 2.362243175506592, "learning_rate": 2.053646820778302e-06, "loss": 0.8391, "step": 6399 }, { "epoch": 3.3666491320357705, "grad_norm": 2.1550891399383545, "learning_rate": 2.0529633259735677e-06, "loss": 0.8299, "step": 6400 }, { "epoch": 3.367175170962651, "grad_norm": 2.2423365116119385, "learning_rate": 2.052279865685189e-06, "loss": 0.775, "step": 6401 }, { "epoch": 3.367701209889532, "grad_norm": 2.231645107269287, "learning_rate": 2.051596439965937e-06, "loss": 0.79, "step": 6402 }, { "epoch": 3.3682272488164124, "grad_norm": 2.1957266330718994, "learning_rate": 2.0509130488685802e-06, "loss": 0.8389, "step": 6403 }, { "epoch": 3.368753287743293, "grad_norm": 2.427811622619629, "learning_rate": 2.0502296924458832e-06, "loss": 0.8213, "step": 6404 }, { "epoch": 3.3692793266701737, "grad_norm": 2.2832250595092773, "learning_rate": 2.0495463707506096e-06, "loss": 0.7993, "step": 6405 }, { "epoch": 3.3698053655970543, "grad_norm": 2.1285743713378906, "learning_rate": 2.04886308383552e-06, "loss": 0.7977, "step": 6406 }, { "epoch": 3.3703314045239345, "grad_norm": 2.3167593479156494, "learning_rate": 2.0481798317533726e-06, "loss": 0.812, "step": 6407 }, { "epoch": 3.370857443450815, "grad_norm": 2.5443642139434814, "learning_rate": 2.0474966145569196e-06, "loss": 0.8258, "step": 6408 }, { "epoch": 3.371383482377696, "grad_norm": 2.2065062522888184, "learning_rate": 2.0468134322989154e-06, "loss": 0.8169, "step": 6409 }, { "epoch": 3.3719095213045764, "grad_norm": 2.471256732940674, "learning_rate": 2.0461302850321096e-06, "loss": 0.8695, "step": 6410 }, { "epoch": 3.372435560231457, "grad_norm": 2.1926820278167725, "learning_rate": 2.0454471728092476e-06, "loss": 0.8213, "step": 6411 }, { "epoch": 3.3729615991583377, "grad_norm": 2.306569814682007, "learning_rate": 2.0447640956830742e-06, "loss": 0.806, "step": 6412 }, { "epoch": 3.3734876380852183, "grad_norm": 2.2849538326263428, "learning_rate": 2.0440810537063317e-06, "loss": 0.8269, "step": 6413 }, { "epoch": 3.374013677012099, "grad_norm": 2.2662298679351807, "learning_rate": 2.0433980469317584e-06, "loss": 0.8336, "step": 6414 }, { "epoch": 3.3745397159389796, "grad_norm": 2.42171311378479, "learning_rate": 2.042715075412089e-06, "loss": 0.8138, "step": 6415 }, { "epoch": 3.3750657548658602, "grad_norm": 2.248429298400879, "learning_rate": 2.0420321392000576e-06, "loss": 0.8697, "step": 6416 }, { "epoch": 3.375591793792741, "grad_norm": 2.154552698135376, "learning_rate": 2.0413492383483943e-06, "loss": 0.8271, "step": 6417 }, { "epoch": 3.376117832719621, "grad_norm": 2.152827024459839, "learning_rate": 2.0406663729098275e-06, "loss": 0.7559, "step": 6418 }, { "epoch": 3.3766438716465017, "grad_norm": 2.275256872177124, "learning_rate": 2.0399835429370816e-06, "loss": 0.8622, "step": 6419 }, { "epoch": 3.3771699105733823, "grad_norm": 2.4199063777923584, "learning_rate": 2.0393007484828794e-06, "loss": 0.8166, "step": 6420 }, { "epoch": 3.377695949500263, "grad_norm": 2.2143471240997314, "learning_rate": 2.0386179895999414e-06, "loss": 0.8075, "step": 6421 }, { "epoch": 3.3782219884271436, "grad_norm": 2.287526845932007, "learning_rate": 2.0379352663409822e-06, "loss": 0.8477, "step": 6422 }, { "epoch": 3.3787480273540242, "grad_norm": 2.363960027694702, "learning_rate": 2.0372525787587176e-06, "loss": 0.7565, "step": 6423 }, { "epoch": 3.379274066280905, "grad_norm": 2.268360137939453, "learning_rate": 2.0365699269058578e-06, "loss": 0.8326, "step": 6424 }, { "epoch": 3.3798001052077855, "grad_norm": 2.093205690383911, "learning_rate": 2.035887310835112e-06, "loss": 0.7972, "step": 6425 }, { "epoch": 3.380326144134666, "grad_norm": 2.7152342796325684, "learning_rate": 2.035204730599185e-06, "loss": 0.8002, "step": 6426 }, { "epoch": 3.3808521830615463, "grad_norm": 2.556551933288574, "learning_rate": 2.0345221862507823e-06, "loss": 0.8321, "step": 6427 }, { "epoch": 3.381378221988427, "grad_norm": 2.192464828491211, "learning_rate": 2.033839677842602e-06, "loss": 0.835, "step": 6428 }, { "epoch": 3.3819042609153076, "grad_norm": 2.4415009021759033, "learning_rate": 2.033157205427342e-06, "loss": 0.8228, "step": 6429 }, { "epoch": 3.3824302998421882, "grad_norm": 2.14394211769104, "learning_rate": 2.0324747690576964e-06, "loss": 0.813, "step": 6430 }, { "epoch": 3.382956338769069, "grad_norm": 2.395721197128296, "learning_rate": 2.0317923687863583e-06, "loss": 0.8133, "step": 6431 }, { "epoch": 3.3834823776959495, "grad_norm": 2.2648942470550537, "learning_rate": 2.031110004666016e-06, "loss": 0.814, "step": 6432 }, { "epoch": 3.38400841662283, "grad_norm": 2.407973289489746, "learning_rate": 2.030427676749356e-06, "loss": 0.8373, "step": 6433 }, { "epoch": 3.3845344555497108, "grad_norm": 2.537548542022705, "learning_rate": 2.0297453850890626e-06, "loss": 0.7948, "step": 6434 }, { "epoch": 3.3850604944765914, "grad_norm": 2.479095935821533, "learning_rate": 2.0290631297378163e-06, "loss": 0.8612, "step": 6435 }, { "epoch": 3.385586533403472, "grad_norm": 2.2724921703338623, "learning_rate": 2.028380910748294e-06, "loss": 0.7888, "step": 6436 }, { "epoch": 3.3861125723303527, "grad_norm": 2.279555559158325, "learning_rate": 2.027698728173171e-06, "loss": 0.8069, "step": 6437 }, { "epoch": 3.386638611257233, "grad_norm": 2.8019144535064697, "learning_rate": 2.02701658206512e-06, "loss": 0.7721, "step": 6438 }, { "epoch": 3.3871646501841135, "grad_norm": 2.3212332725524902, "learning_rate": 2.0263344724768106e-06, "loss": 0.8071, "step": 6439 }, { "epoch": 3.387690689110994, "grad_norm": 2.1216108798980713, "learning_rate": 2.0256523994609106e-06, "loss": 0.7727, "step": 6440 }, { "epoch": 3.3882167280378748, "grad_norm": 2.278390407562256, "learning_rate": 2.0249703630700817e-06, "loss": 0.8037, "step": 6441 }, { "epoch": 3.3887427669647554, "grad_norm": 2.4180705547332764, "learning_rate": 2.024288363356987e-06, "loss": 0.8107, "step": 6442 }, { "epoch": 3.389268805891636, "grad_norm": 2.358057975769043, "learning_rate": 2.0236064003742826e-06, "loss": 0.8068, "step": 6443 }, { "epoch": 3.3897948448185167, "grad_norm": 2.2856853008270264, "learning_rate": 2.022924474174625e-06, "loss": 0.8269, "step": 6444 }, { "epoch": 3.3903208837453973, "grad_norm": 2.455933094024658, "learning_rate": 2.022242584810666e-06, "loss": 0.7758, "step": 6445 }, { "epoch": 3.390846922672278, "grad_norm": 2.224581718444824, "learning_rate": 2.0215607323350576e-06, "loss": 0.8158, "step": 6446 }, { "epoch": 3.391372961599158, "grad_norm": 2.2808079719543457, "learning_rate": 2.0208789168004438e-06, "loss": 0.8053, "step": 6447 }, { "epoch": 3.3918990005260388, "grad_norm": 2.3294339179992676, "learning_rate": 2.0201971382594692e-06, "loss": 0.8728, "step": 6448 }, { "epoch": 3.3924250394529194, "grad_norm": 2.254721164703369, "learning_rate": 2.019515396764777e-06, "loss": 0.7834, "step": 6449 }, { "epoch": 3.3929510783798, "grad_norm": 2.242178440093994, "learning_rate": 2.0188336923690026e-06, "loss": 0.8274, "step": 6450 }, { "epoch": 3.3934771173066807, "grad_norm": 2.238377571105957, "learning_rate": 2.018152025124783e-06, "loss": 0.8245, "step": 6451 }, { "epoch": 3.3940031562335613, "grad_norm": 2.3304250240325928, "learning_rate": 2.0174703950847503e-06, "loss": 0.7763, "step": 6452 }, { "epoch": 3.394529195160442, "grad_norm": 2.1841650009155273, "learning_rate": 2.0167888023015346e-06, "loss": 0.8211, "step": 6453 }, { "epoch": 3.3950552340873226, "grad_norm": 2.387086868286133, "learning_rate": 2.0161072468277616e-06, "loss": 0.8255, "step": 6454 }, { "epoch": 3.395581273014203, "grad_norm": 2.125110149383545, "learning_rate": 2.015425728716056e-06, "loss": 0.7892, "step": 6455 }, { "epoch": 3.3961073119410834, "grad_norm": 2.2436165809631348, "learning_rate": 2.0147442480190387e-06, "loss": 0.7764, "step": 6456 }, { "epoch": 3.3966333508679645, "grad_norm": 2.3379268646240234, "learning_rate": 2.014062804789329e-06, "loss": 0.8489, "step": 6457 }, { "epoch": 3.3971593897948447, "grad_norm": 2.392245292663574, "learning_rate": 2.0133813990795402e-06, "loss": 0.8591, "step": 6458 }, { "epoch": 3.3976854287217253, "grad_norm": 2.4135019779205322, "learning_rate": 2.0127000309422857e-06, "loss": 0.8281, "step": 6459 }, { "epoch": 3.398211467648606, "grad_norm": 2.2069091796875, "learning_rate": 2.0120187004301738e-06, "loss": 0.7954, "step": 6460 }, { "epoch": 3.3987375065754866, "grad_norm": 2.2058703899383545, "learning_rate": 2.011337407595812e-06, "loss": 0.8176, "step": 6461 }, { "epoch": 3.399263545502367, "grad_norm": 2.4429948329925537, "learning_rate": 2.010656152491804e-06, "loss": 0.8628, "step": 6462 }, { "epoch": 3.399789584429248, "grad_norm": 2.264537811279297, "learning_rate": 2.0099749351707505e-06, "loss": 0.8012, "step": 6463 }, { "epoch": 3.4003156233561285, "grad_norm": 2.222460985183716, "learning_rate": 2.0092937556852495e-06, "loss": 0.8275, "step": 6464 }, { "epoch": 3.400841662283009, "grad_norm": 2.2528269290924072, "learning_rate": 2.008612614087895e-06, "loss": 0.8091, "step": 6465 }, { "epoch": 3.4013677012098897, "grad_norm": 2.2657852172851562, "learning_rate": 2.0079315104312787e-06, "loss": 0.7568, "step": 6466 }, { "epoch": 3.40189374013677, "grad_norm": 2.48653507232666, "learning_rate": 2.007250444767991e-06, "loss": 0.8103, "step": 6467 }, { "epoch": 3.4024197790636506, "grad_norm": 2.1223082542419434, "learning_rate": 2.0065694171506163e-06, "loss": 0.8065, "step": 6468 }, { "epoch": 3.402945817990531, "grad_norm": 2.236783027648926, "learning_rate": 2.0058884276317394e-06, "loss": 0.863, "step": 6469 }, { "epoch": 3.403471856917412, "grad_norm": 2.3743467330932617, "learning_rate": 2.00520747626394e-06, "loss": 0.8125, "step": 6470 }, { "epoch": 3.4039978958442925, "grad_norm": 2.180320978164673, "learning_rate": 2.0045265630997952e-06, "loss": 0.7622, "step": 6471 }, { "epoch": 3.404523934771173, "grad_norm": 2.286041021347046, "learning_rate": 2.0038456881918796e-06, "loss": 0.792, "step": 6472 }, { "epoch": 3.4050499736980537, "grad_norm": 2.5944221019744873, "learning_rate": 2.003164851592763e-06, "loss": 0.7952, "step": 6473 }, { "epoch": 3.4055760126249344, "grad_norm": 2.111680269241333, "learning_rate": 2.002484053355015e-06, "loss": 0.7569, "step": 6474 }, { "epoch": 3.406102051551815, "grad_norm": 2.8191428184509277, "learning_rate": 2.0018032935312015e-06, "loss": 0.8202, "step": 6475 }, { "epoch": 3.406628090478695, "grad_norm": 2.2276060581207275, "learning_rate": 2.0011225721738848e-06, "loss": 0.7537, "step": 6476 }, { "epoch": 3.407154129405576, "grad_norm": 2.3071539402008057, "learning_rate": 2.000441889335624e-06, "loss": 0.8087, "step": 6477 }, { "epoch": 3.4076801683324565, "grad_norm": 7.256581783294678, "learning_rate": 1.999761245068976e-06, "loss": 0.8131, "step": 6478 }, { "epoch": 3.408206207259337, "grad_norm": 2.311037302017212, "learning_rate": 1.999080639426493e-06, "loss": 0.8217, "step": 6479 }, { "epoch": 3.4087322461862177, "grad_norm": 2.3770508766174316, "learning_rate": 1.9984000724607267e-06, "loss": 0.8068, "step": 6480 }, { "epoch": 3.4092582851130984, "grad_norm": 2.24881649017334, "learning_rate": 1.9977195442242246e-06, "loss": 0.7865, "step": 6481 }, { "epoch": 3.409784324039979, "grad_norm": 2.1521146297454834, "learning_rate": 1.9970390547695312e-06, "loss": 0.7987, "step": 6482 }, { "epoch": 3.4103103629668596, "grad_norm": 2.262075185775757, "learning_rate": 1.996358604149189e-06, "loss": 0.8356, "step": 6483 }, { "epoch": 3.4108364018937403, "grad_norm": 2.2274606227874756, "learning_rate": 1.9956781924157347e-06, "loss": 0.8441, "step": 6484 }, { "epoch": 3.411362440820621, "grad_norm": 2.219874620437622, "learning_rate": 1.994997819621706e-06, "loss": 0.8346, "step": 6485 }, { "epoch": 3.4118884797475015, "grad_norm": 2.3462862968444824, "learning_rate": 1.9943174858196335e-06, "loss": 0.8175, "step": 6486 }, { "epoch": 3.4124145186743817, "grad_norm": 2.134580373764038, "learning_rate": 1.993637191062047e-06, "loss": 0.7781, "step": 6487 }, { "epoch": 3.4129405576012624, "grad_norm": 2.3165037631988525, "learning_rate": 1.9929569354014745e-06, "loss": 0.8191, "step": 6488 }, { "epoch": 3.413466596528143, "grad_norm": 2.260446071624756, "learning_rate": 1.992276718890439e-06, "loss": 0.7629, "step": 6489 }, { "epoch": 3.4139926354550236, "grad_norm": 2.3396387100219727, "learning_rate": 1.99159654158146e-06, "loss": 0.7844, "step": 6490 }, { "epoch": 3.4145186743819043, "grad_norm": 2.121762275695801, "learning_rate": 1.990916403527056e-06, "loss": 0.7428, "step": 6491 }, { "epoch": 3.415044713308785, "grad_norm": 2.344358205795288, "learning_rate": 1.9902363047797417e-06, "loss": 0.8352, "step": 6492 }, { "epoch": 3.4155707522356655, "grad_norm": 2.265092134475708, "learning_rate": 1.9895562453920276e-06, "loss": 0.8102, "step": 6493 }, { "epoch": 3.416096791162546, "grad_norm": 2.2928128242492676, "learning_rate": 1.988876225416422e-06, "loss": 0.8545, "step": 6494 }, { "epoch": 3.416622830089427, "grad_norm": 2.2708046436309814, "learning_rate": 1.9881962449054314e-06, "loss": 0.7791, "step": 6495 }, { "epoch": 3.417148869016307, "grad_norm": 2.132075786590576, "learning_rate": 1.987516303911557e-06, "loss": 0.8202, "step": 6496 }, { "epoch": 3.4176749079431876, "grad_norm": 2.2380948066711426, "learning_rate": 1.9868364024872984e-06, "loss": 0.7972, "step": 6497 }, { "epoch": 3.4182009468700683, "grad_norm": 2.217698335647583, "learning_rate": 1.9861565406851514e-06, "loss": 0.792, "step": 6498 }, { "epoch": 3.418726985796949, "grad_norm": 2.24422287940979, "learning_rate": 1.985476718557611e-06, "loss": 0.7835, "step": 6499 }, { "epoch": 3.4192530247238295, "grad_norm": 2.472730875015259, "learning_rate": 1.9847969361571644e-06, "loss": 0.8343, "step": 6500 }, { "epoch": 3.41977906365071, "grad_norm": 2.2234609127044678, "learning_rate": 1.9841171935363004e-06, "loss": 0.7809, "step": 6501 }, { "epoch": 3.420305102577591, "grad_norm": 2.347553253173828, "learning_rate": 1.983437490747503e-06, "loss": 0.8082, "step": 6502 }, { "epoch": 3.4208311415044714, "grad_norm": 2.130603313446045, "learning_rate": 1.9827578278432517e-06, "loss": 0.7679, "step": 6503 }, { "epoch": 3.421357180431352, "grad_norm": 2.3368728160858154, "learning_rate": 1.9820782048760254e-06, "loss": 0.8481, "step": 6504 }, { "epoch": 3.4218832193582327, "grad_norm": 2.4503326416015625, "learning_rate": 1.9813986218982985e-06, "loss": 0.8338, "step": 6505 }, { "epoch": 3.4224092582851133, "grad_norm": 2.300377130508423, "learning_rate": 1.980719078962543e-06, "loss": 0.7914, "step": 6506 }, { "epoch": 3.4229352972119935, "grad_norm": 2.0982489585876465, "learning_rate": 1.980039576121227e-06, "loss": 0.7866, "step": 6507 }, { "epoch": 3.423461336138874, "grad_norm": 2.2098777294158936, "learning_rate": 1.9793601134268158e-06, "loss": 0.7834, "step": 6508 }, { "epoch": 3.423987375065755, "grad_norm": 2.365705728530884, "learning_rate": 1.9786806909317717e-06, "loss": 0.7938, "step": 6509 }, { "epoch": 3.4245134139926354, "grad_norm": 2.3401362895965576, "learning_rate": 1.978001308688554e-06, "loss": 0.8091, "step": 6510 }, { "epoch": 3.425039452919516, "grad_norm": 2.262272834777832, "learning_rate": 1.9773219667496186e-06, "loss": 0.8029, "step": 6511 }, { "epoch": 3.4255654918463967, "grad_norm": 2.351625919342041, "learning_rate": 1.9766426651674185e-06, "loss": 0.8071, "step": 6512 }, { "epoch": 3.4260915307732773, "grad_norm": 2.312856912612915, "learning_rate": 1.975963403994405e-06, "loss": 0.8442, "step": 6513 }, { "epoch": 3.426617569700158, "grad_norm": 2.335318088531494, "learning_rate": 1.9752841832830232e-06, "loss": 0.7971, "step": 6514 }, { "epoch": 3.4271436086270386, "grad_norm": 2.3754379749298096, "learning_rate": 1.974605003085717e-06, "loss": 0.8156, "step": 6515 }, { "epoch": 3.427669647553919, "grad_norm": 2.1076579093933105, "learning_rate": 1.9739258634549268e-06, "loss": 0.8226, "step": 6516 }, { "epoch": 3.4281956864807994, "grad_norm": 2.2030704021453857, "learning_rate": 1.97324676444309e-06, "loss": 0.781, "step": 6517 }, { "epoch": 3.42872172540768, "grad_norm": 2.3965203762054443, "learning_rate": 1.9725677061026413e-06, "loss": 0.7717, "step": 6518 }, { "epoch": 3.4292477643345607, "grad_norm": 2.2953174114227295, "learning_rate": 1.971888688486012e-06, "loss": 0.8271, "step": 6519 }, { "epoch": 3.4297738032614413, "grad_norm": 2.4659578800201416, "learning_rate": 1.971209711645629e-06, "loss": 0.8428, "step": 6520 }, { "epoch": 3.430299842188322, "grad_norm": 2.1934003829956055, "learning_rate": 1.9705307756339186e-06, "loss": 0.8056, "step": 6521 }, { "epoch": 3.4308258811152026, "grad_norm": 2.207042694091797, "learning_rate": 1.9698518805033005e-06, "loss": 0.7748, "step": 6522 }, { "epoch": 3.4313519200420832, "grad_norm": 2.3221113681793213, "learning_rate": 1.9691730263061943e-06, "loss": 0.8044, "step": 6523 }, { "epoch": 3.431877958968964, "grad_norm": 2.3619771003723145, "learning_rate": 1.968494213095015e-06, "loss": 0.7831, "step": 6524 }, { "epoch": 3.432403997895844, "grad_norm": 2.496462345123291, "learning_rate": 1.967815440922176e-06, "loss": 0.8172, "step": 6525 }, { "epoch": 3.432930036822725, "grad_norm": 2.437614917755127, "learning_rate": 1.9671367098400846e-06, "loss": 0.7753, "step": 6526 }, { "epoch": 3.4334560757496053, "grad_norm": 2.157548427581787, "learning_rate": 1.966458019901147e-06, "loss": 0.809, "step": 6527 }, { "epoch": 3.433982114676486, "grad_norm": 2.1701014041900635, "learning_rate": 1.9657793711577676e-06, "loss": 0.7667, "step": 6528 }, { "epoch": 3.4345081536033666, "grad_norm": 2.285735845565796, "learning_rate": 1.965100763662343e-06, "loss": 0.8299, "step": 6529 }, { "epoch": 3.435034192530247, "grad_norm": 2.2217369079589844, "learning_rate": 1.964422197467271e-06, "loss": 0.8214, "step": 6530 }, { "epoch": 3.435560231457128, "grad_norm": 2.3583333492279053, "learning_rate": 1.9637436726249447e-06, "loss": 0.8111, "step": 6531 }, { "epoch": 3.4360862703840085, "grad_norm": 2.252291679382324, "learning_rate": 1.963065189187754e-06, "loss": 0.7716, "step": 6532 }, { "epoch": 3.436612309310889, "grad_norm": 2.2199649810791016, "learning_rate": 1.9623867472080854e-06, "loss": 0.811, "step": 6533 }, { "epoch": 3.4371383482377698, "grad_norm": 2.3258204460144043, "learning_rate": 1.961708346738322e-06, "loss": 0.8155, "step": 6534 }, { "epoch": 3.4376643871646504, "grad_norm": 2.2464725971221924, "learning_rate": 1.9610299878308457e-06, "loss": 0.8074, "step": 6535 }, { "epoch": 3.4381904260915306, "grad_norm": 2.3778717517852783, "learning_rate": 1.9603516705380316e-06, "loss": 0.8344, "step": 6536 }, { "epoch": 3.438716465018411, "grad_norm": 2.4447457790374756, "learning_rate": 1.9596733949122544e-06, "loss": 0.7992, "step": 6537 }, { "epoch": 3.439242503945292, "grad_norm": 2.3130991458892822, "learning_rate": 1.9589951610058855e-06, "loss": 0.828, "step": 6538 }, { "epoch": 3.4397685428721725, "grad_norm": 2.283212423324585, "learning_rate": 1.958316968871291e-06, "loss": 0.8351, "step": 6539 }, { "epoch": 3.440294581799053, "grad_norm": 2.6486706733703613, "learning_rate": 1.9576388185608354e-06, "loss": 0.8354, "step": 6540 }, { "epoch": 3.4408206207259338, "grad_norm": 2.2254135608673096, "learning_rate": 1.95696071012688e-06, "loss": 0.8233, "step": 6541 }, { "epoch": 3.4413466596528144, "grad_norm": 2.2251880168914795, "learning_rate": 1.9562826436217836e-06, "loss": 0.7907, "step": 6542 }, { "epoch": 3.441872698579695, "grad_norm": 2.3391683101654053, "learning_rate": 1.955604619097899e-06, "loss": 0.8348, "step": 6543 }, { "epoch": 3.4423987375065757, "grad_norm": 2.5027542114257812, "learning_rate": 1.9549266366075773e-06, "loss": 0.8239, "step": 6544 }, { "epoch": 3.442924776433456, "grad_norm": 2.2715587615966797, "learning_rate": 1.954248696203169e-06, "loss": 0.8708, "step": 6545 }, { "epoch": 3.4434508153603365, "grad_norm": 2.248077392578125, "learning_rate": 1.9535707979370154e-06, "loss": 0.8168, "step": 6546 }, { "epoch": 3.443976854287217, "grad_norm": 2.2976725101470947, "learning_rate": 1.9528929418614606e-06, "loss": 0.7863, "step": 6547 }, { "epoch": 3.4445028932140977, "grad_norm": 2.2900381088256836, "learning_rate": 1.952215128028842e-06, "loss": 0.8257, "step": 6548 }, { "epoch": 3.4450289321409784, "grad_norm": 2.18304181098938, "learning_rate": 1.9515373564914954e-06, "loss": 0.7709, "step": 6549 }, { "epoch": 3.445554971067859, "grad_norm": 2.131476402282715, "learning_rate": 1.950859627301751e-06, "loss": 0.7936, "step": 6550 }, { "epoch": 3.4460810099947397, "grad_norm": 2.220564126968384, "learning_rate": 1.9501819405119384e-06, "loss": 0.8354, "step": 6551 }, { "epoch": 3.4466070489216203, "grad_norm": 2.4655537605285645, "learning_rate": 1.9495042961743817e-06, "loss": 0.8397, "step": 6552 }, { "epoch": 3.447133087848501, "grad_norm": 2.1829605102539062, "learning_rate": 1.948826694341404e-06, "loss": 0.7501, "step": 6553 }, { "epoch": 3.4476591267753816, "grad_norm": 2.1720995903015137, "learning_rate": 1.9481491350653233e-06, "loss": 0.7961, "step": 6554 }, { "epoch": 3.448185165702262, "grad_norm": 2.4797163009643555, "learning_rate": 1.947471618398456e-06, "loss": 0.8625, "step": 6555 }, { "epoch": 3.4487112046291424, "grad_norm": 2.164896249771118, "learning_rate": 1.9467941443931127e-06, "loss": 0.8279, "step": 6556 }, { "epoch": 3.449237243556023, "grad_norm": 2.191861629486084, "learning_rate": 1.9461167131016027e-06, "loss": 0.854, "step": 6557 }, { "epoch": 3.4497632824829036, "grad_norm": 2.3284945487976074, "learning_rate": 1.9454393245762314e-06, "loss": 0.7901, "step": 6558 }, { "epoch": 3.4502893214097843, "grad_norm": 2.3699309825897217, "learning_rate": 1.944761978869301e-06, "loss": 0.8416, "step": 6559 }, { "epoch": 3.450815360336665, "grad_norm": 2.2185962200164795, "learning_rate": 1.94408467603311e-06, "loss": 0.8597, "step": 6560 }, { "epoch": 3.4513413992635456, "grad_norm": 2.294293165206909, "learning_rate": 1.943407416119955e-06, "loss": 0.8535, "step": 6561 }, { "epoch": 3.451867438190426, "grad_norm": 2.181443214416504, "learning_rate": 1.9427301991821278e-06, "loss": 0.8124, "step": 6562 }, { "epoch": 3.452393477117307, "grad_norm": 2.2629430294036865, "learning_rate": 1.9420530252719164e-06, "loss": 0.7948, "step": 6563 }, { "epoch": 3.4529195160441875, "grad_norm": 2.2426114082336426, "learning_rate": 1.9413758944416084e-06, "loss": 0.8371, "step": 6564 }, { "epoch": 3.4534455549710676, "grad_norm": 2.2255988121032715, "learning_rate": 1.9406988067434836e-06, "loss": 0.8157, "step": 6565 }, { "epoch": 3.4539715938979483, "grad_norm": 2.2396912574768066, "learning_rate": 1.9400217622298217e-06, "loss": 0.8184, "step": 6566 }, { "epoch": 3.454497632824829, "grad_norm": 2.294344663619995, "learning_rate": 1.939344760952899e-06, "loss": 0.8332, "step": 6567 }, { "epoch": 3.4550236717517095, "grad_norm": 2.2411303520202637, "learning_rate": 1.938667802964988e-06, "loss": 0.8257, "step": 6568 }, { "epoch": 3.45554971067859, "grad_norm": 2.27811598777771, "learning_rate": 1.9379908883183564e-06, "loss": 0.8471, "step": 6569 }, { "epoch": 3.456075749605471, "grad_norm": 2.3223226070404053, "learning_rate": 1.9373140170652707e-06, "loss": 0.8147, "step": 6570 }, { "epoch": 3.4566017885323514, "grad_norm": 2.1883745193481445, "learning_rate": 1.9366371892579935e-06, "loss": 0.79, "step": 6571 }, { "epoch": 3.457127827459232, "grad_norm": 2.865927219390869, "learning_rate": 1.935960404948782e-06, "loss": 0.8206, "step": 6572 }, { "epoch": 3.4576538663861127, "grad_norm": 2.3171393871307373, "learning_rate": 1.935283664189893e-06, "loss": 0.8278, "step": 6573 }, { "epoch": 3.4581799053129934, "grad_norm": 2.421855926513672, "learning_rate": 1.934606967033578e-06, "loss": 0.7742, "step": 6574 }, { "epoch": 3.458705944239874, "grad_norm": 2.254246711730957, "learning_rate": 1.9339303135320866e-06, "loss": 0.8347, "step": 6575 }, { "epoch": 3.459231983166754, "grad_norm": 2.437772512435913, "learning_rate": 1.9332537037376637e-06, "loss": 0.8146, "step": 6576 }, { "epoch": 3.459758022093635, "grad_norm": 2.286348342895508, "learning_rate": 1.932577137702551e-06, "loss": 0.8433, "step": 6577 }, { "epoch": 3.4602840610205154, "grad_norm": 2.213710069656372, "learning_rate": 1.9319006154789885e-06, "loss": 0.8133, "step": 6578 }, { "epoch": 3.460810099947396, "grad_norm": 2.1273367404937744, "learning_rate": 1.9312241371192098e-06, "loss": 0.8019, "step": 6579 }, { "epoch": 3.4613361388742767, "grad_norm": 2.2079734802246094, "learning_rate": 1.930547702675447e-06, "loss": 0.8377, "step": 6580 }, { "epoch": 3.4618621778011573, "grad_norm": 2.3404979705810547, "learning_rate": 1.92987131219993e-06, "loss": 0.7648, "step": 6581 }, { "epoch": 3.462388216728038, "grad_norm": 2.0956735610961914, "learning_rate": 1.929194965744882e-06, "loss": 0.7714, "step": 6582 }, { "epoch": 3.4629142556549186, "grad_norm": 2.2110490798950195, "learning_rate": 1.928518663362526e-06, "loss": 0.7844, "step": 6583 }, { "epoch": 3.4634402945817993, "grad_norm": 2.2458603382110596, "learning_rate": 1.92784240510508e-06, "loss": 0.7978, "step": 6584 }, { "epoch": 3.4639663335086794, "grad_norm": 2.2848830223083496, "learning_rate": 1.92716619102476e-06, "loss": 0.8198, "step": 6585 }, { "epoch": 3.46449237243556, "grad_norm": 2.1164450645446777, "learning_rate": 1.926490021173775e-06, "loss": 0.7881, "step": 6586 }, { "epoch": 3.4650184113624407, "grad_norm": 2.293581247329712, "learning_rate": 1.9258138956043347e-06, "loss": 0.7946, "step": 6587 }, { "epoch": 3.4655444502893213, "grad_norm": 2.199118137359619, "learning_rate": 1.9251378143686433e-06, "loss": 0.8103, "step": 6588 }, { "epoch": 3.466070489216202, "grad_norm": 2.346874952316284, "learning_rate": 1.9244617775189022e-06, "loss": 0.8289, "step": 6589 }, { "epoch": 3.4665965281430826, "grad_norm": 2.140927791595459, "learning_rate": 1.923785785107309e-06, "loss": 0.8147, "step": 6590 }, { "epoch": 3.4671225670699632, "grad_norm": 2.294790744781494, "learning_rate": 1.9231098371860583e-06, "loss": 0.8442, "step": 6591 }, { "epoch": 3.467648605996844, "grad_norm": 2.2153103351593018, "learning_rate": 1.922433933807342e-06, "loss": 0.8136, "step": 6592 }, { "epoch": 3.4681746449237245, "grad_norm": 2.3889265060424805, "learning_rate": 1.921758075023345e-06, "loss": 0.8202, "step": 6593 }, { "epoch": 3.4687006838506047, "grad_norm": 2.3229095935821533, "learning_rate": 1.921082260886254e-06, "loss": 0.8555, "step": 6594 }, { "epoch": 3.469226722777486, "grad_norm": 2.364630699157715, "learning_rate": 1.920406491448248e-06, "loss": 0.7877, "step": 6595 }, { "epoch": 3.469752761704366, "grad_norm": 2.40863037109375, "learning_rate": 1.9197307667615042e-06, "loss": 0.7568, "step": 6596 }, { "epoch": 3.4702788006312466, "grad_norm": 2.4338436126708984, "learning_rate": 1.919055086878197e-06, "loss": 0.8254, "step": 6597 }, { "epoch": 3.4708048395581272, "grad_norm": 2.2949843406677246, "learning_rate": 1.9183794518504967e-06, "loss": 0.7483, "step": 6598 }, { "epoch": 3.471330878485008, "grad_norm": 2.53493070602417, "learning_rate": 1.9177038617305695e-06, "loss": 0.8393, "step": 6599 }, { "epoch": 3.4718569174118885, "grad_norm": 2.2608468532562256, "learning_rate": 1.9170283165705794e-06, "loss": 0.7725, "step": 6600 }, { "epoch": 3.472382956338769, "grad_norm": 2.174605369567871, "learning_rate": 1.9163528164226846e-06, "loss": 0.7599, "step": 6601 }, { "epoch": 3.47290899526565, "grad_norm": 2.361252784729004, "learning_rate": 1.9156773613390432e-06, "loss": 0.8661, "step": 6602 }, { "epoch": 3.4734350341925304, "grad_norm": 2.249295234680176, "learning_rate": 1.915001951371807e-06, "loss": 0.8085, "step": 6603 }, { "epoch": 3.473961073119411, "grad_norm": 2.282573938369751, "learning_rate": 1.9143265865731263e-06, "loss": 0.8101, "step": 6604 }, { "epoch": 3.4744871120462912, "grad_norm": 2.254833936691284, "learning_rate": 1.913651266995147e-06, "loss": 0.8153, "step": 6605 }, { "epoch": 3.475013150973172, "grad_norm": 2.2969093322753906, "learning_rate": 1.9129759926900116e-06, "loss": 0.8004, "step": 6606 }, { "epoch": 3.4755391899000525, "grad_norm": 2.343932628631592, "learning_rate": 1.9123007637098574e-06, "loss": 0.8171, "step": 6607 }, { "epoch": 3.476065228826933, "grad_norm": 2.5252552032470703, "learning_rate": 1.9116255801068207e-06, "loss": 0.8345, "step": 6608 }, { "epoch": 3.4765912677538138, "grad_norm": 2.2380332946777344, "learning_rate": 1.910950441933034e-06, "loss": 0.8509, "step": 6609 }, { "epoch": 3.4771173066806944, "grad_norm": 2.3573033809661865, "learning_rate": 1.9102753492406247e-06, "loss": 0.8522, "step": 6610 }, { "epoch": 3.477643345607575, "grad_norm": 2.240683078765869, "learning_rate": 1.9096003020817196e-06, "loss": 0.865, "step": 6611 }, { "epoch": 3.4781693845344557, "grad_norm": 2.307048797607422, "learning_rate": 1.908925300508438e-06, "loss": 0.8596, "step": 6612 }, { "epoch": 3.4786954234613363, "grad_norm": 2.4641621112823486, "learning_rate": 1.9082503445728982e-06, "loss": 0.8082, "step": 6613 }, { "epoch": 3.4792214623882165, "grad_norm": 2.085509777069092, "learning_rate": 1.9075754343272162e-06, "loss": 0.8044, "step": 6614 }, { "epoch": 3.479747501315097, "grad_norm": 2.2761287689208984, "learning_rate": 1.9069005698235008e-06, "loss": 0.835, "step": 6615 }, { "epoch": 3.4802735402419778, "grad_norm": 2.2613542079925537, "learning_rate": 1.9062257511138596e-06, "loss": 0.8305, "step": 6616 }, { "epoch": 3.4807995791688584, "grad_norm": 2.390489101409912, "learning_rate": 1.9055509782503976e-06, "loss": 0.8342, "step": 6617 }, { "epoch": 3.481325618095739, "grad_norm": 2.313136577606201, "learning_rate": 1.904876251285213e-06, "loss": 0.8007, "step": 6618 }, { "epoch": 3.4818516570226197, "grad_norm": 2.3588967323303223, "learning_rate": 1.9042015702704041e-06, "loss": 0.7997, "step": 6619 }, { "epoch": 3.4823776959495003, "grad_norm": 2.179058074951172, "learning_rate": 1.9035269352580632e-06, "loss": 0.7703, "step": 6620 }, { "epoch": 3.482903734876381, "grad_norm": 2.326569080352783, "learning_rate": 1.9028523463002816e-06, "loss": 0.7955, "step": 6621 }, { "epoch": 3.4834297738032616, "grad_norm": 2.3120603561401367, "learning_rate": 1.9021778034491422e-06, "loss": 0.839, "step": 6622 }, { "epoch": 3.483955812730142, "grad_norm": 2.4250543117523193, "learning_rate": 1.9015033067567295e-06, "loss": 0.7995, "step": 6623 }, { "epoch": 3.484481851657023, "grad_norm": 2.246262550354004, "learning_rate": 1.9008288562751226e-06, "loss": 0.7618, "step": 6624 }, { "epoch": 3.485007890583903, "grad_norm": 2.576256275177002, "learning_rate": 1.9001544520563952e-06, "loss": 0.7681, "step": 6625 }, { "epoch": 3.4855339295107837, "grad_norm": 2.136183977127075, "learning_rate": 1.8994800941526203e-06, "loss": 0.7886, "step": 6626 }, { "epoch": 3.4860599684376643, "grad_norm": 2.324462652206421, "learning_rate": 1.8988057826158657e-06, "loss": 0.8386, "step": 6627 }, { "epoch": 3.486586007364545, "grad_norm": 2.300801992416382, "learning_rate": 1.8981315174981974e-06, "loss": 0.7788, "step": 6628 }, { "epoch": 3.4871120462914256, "grad_norm": 2.3218164443969727, "learning_rate": 1.8974572988516734e-06, "loss": 0.7951, "step": 6629 }, { "epoch": 3.487638085218306, "grad_norm": 2.128422498703003, "learning_rate": 1.8967831267283537e-06, "loss": 0.7985, "step": 6630 }, { "epoch": 3.488164124145187, "grad_norm": 2.4229044914245605, "learning_rate": 1.8961090011802904e-06, "loss": 0.799, "step": 6631 }, { "epoch": 3.4886901630720675, "grad_norm": 2.2866435050964355, "learning_rate": 1.8954349222595346e-06, "loss": 0.8005, "step": 6632 }, { "epoch": 3.489216201998948, "grad_norm": 2.216510772705078, "learning_rate": 1.8947608900181327e-06, "loss": 0.827, "step": 6633 }, { "epoch": 3.4897422409258283, "grad_norm": 2.3705337047576904, "learning_rate": 1.8940869045081278e-06, "loss": 0.8129, "step": 6634 }, { "epoch": 3.490268279852709, "grad_norm": 2.380568504333496, "learning_rate": 1.8934129657815608e-06, "loss": 0.8327, "step": 6635 }, { "epoch": 3.4907943187795896, "grad_norm": 2.2942044734954834, "learning_rate": 1.892739073890465e-06, "loss": 0.7747, "step": 6636 }, { "epoch": 3.49132035770647, "grad_norm": 2.21256685256958, "learning_rate": 1.8920652288868735e-06, "loss": 0.786, "step": 6637 }, { "epoch": 3.491846396633351, "grad_norm": 2.3268837928771973, "learning_rate": 1.8913914308228146e-06, "loss": 0.7995, "step": 6638 }, { "epoch": 3.4923724355602315, "grad_norm": 2.248608350753784, "learning_rate": 1.8907176797503135e-06, "loss": 0.8114, "step": 6639 }, { "epoch": 3.492898474487112, "grad_norm": 2.401310920715332, "learning_rate": 1.890043975721392e-06, "loss": 0.8362, "step": 6640 }, { "epoch": 3.4934245134139927, "grad_norm": 2.24186635017395, "learning_rate": 1.8893703187880678e-06, "loss": 0.8069, "step": 6641 }, { "epoch": 3.4939505523408734, "grad_norm": 2.3232998847961426, "learning_rate": 1.8886967090023542e-06, "loss": 0.8356, "step": 6642 }, { "epoch": 3.494476591267754, "grad_norm": 2.3356106281280518, "learning_rate": 1.8880231464162621e-06, "loss": 0.8086, "step": 6643 }, { "epoch": 3.4950026301946346, "grad_norm": 2.1682939529418945, "learning_rate": 1.8873496310817976e-06, "loss": 0.8109, "step": 6644 }, { "epoch": 3.495528669121515, "grad_norm": 3.318293333053589, "learning_rate": 1.8866761630509642e-06, "loss": 0.8521, "step": 6645 }, { "epoch": 3.4960547080483955, "grad_norm": 2.2676146030426025, "learning_rate": 1.8860027423757616e-06, "loss": 0.8076, "step": 6646 }, { "epoch": 3.496580746975276, "grad_norm": 2.6167826652526855, "learning_rate": 1.8853293691081857e-06, "loss": 0.8455, "step": 6647 }, { "epoch": 3.4971067859021567, "grad_norm": 2.1573596000671387, "learning_rate": 1.884656043300228e-06, "loss": 0.7899, "step": 6648 }, { "epoch": 3.4976328248290374, "grad_norm": 2.4185729026794434, "learning_rate": 1.883982765003878e-06, "loss": 0.8243, "step": 6649 }, { "epoch": 3.498158863755918, "grad_norm": 2.279662609100342, "learning_rate": 1.883309534271119e-06, "loss": 0.8298, "step": 6650 }, { "epoch": 3.4986849026827986, "grad_norm": 2.1928305625915527, "learning_rate": 1.882636351153933e-06, "loss": 0.7968, "step": 6651 }, { "epoch": 3.4992109416096793, "grad_norm": 2.3617258071899414, "learning_rate": 1.8819632157042975e-06, "loss": 0.824, "step": 6652 }, { "epoch": 3.49973698053656, "grad_norm": 2.302788019180298, "learning_rate": 1.8812901279741861e-06, "loss": 0.8051, "step": 6653 }, { "epoch": 3.50026301946344, "grad_norm": 2.2453837394714355, "learning_rate": 1.8806170880155695e-06, "loss": 0.7937, "step": 6654 }, { "epoch": 3.5007890583903207, "grad_norm": 2.372342824935913, "learning_rate": 1.8799440958804128e-06, "loss": 0.792, "step": 6655 }, { "epoch": 3.5013150973172014, "grad_norm": 2.2764060497283936, "learning_rate": 1.8792711516206808e-06, "loss": 0.8608, "step": 6656 }, { "epoch": 3.501841136244082, "grad_norm": 2.2453901767730713, "learning_rate": 1.8785982552883298e-06, "loss": 0.7681, "step": 6657 }, { "epoch": 3.5023671751709626, "grad_norm": 2.390697717666626, "learning_rate": 1.8779254069353167e-06, "loss": 0.8543, "step": 6658 }, { "epoch": 3.5028932140978433, "grad_norm": 2.2349445819854736, "learning_rate": 1.877252606613593e-06, "loss": 0.8068, "step": 6659 }, { "epoch": 3.503419253024724, "grad_norm": 2.2020223140716553, "learning_rate": 1.8765798543751068e-06, "loss": 0.7668, "step": 6660 }, { "epoch": 3.5039452919516045, "grad_norm": 2.2033607959747314, "learning_rate": 1.8759071502718014e-06, "loss": 0.8411, "step": 6661 }, { "epoch": 3.504471330878485, "grad_norm": 2.259394645690918, "learning_rate": 1.8752344943556184e-06, "loss": 0.7452, "step": 6662 }, { "epoch": 3.5049973698053654, "grad_norm": 2.365748405456543, "learning_rate": 1.8745618866784933e-06, "loss": 0.8083, "step": 6663 }, { "epoch": 3.5055234087322464, "grad_norm": 2.327728509902954, "learning_rate": 1.8738893272923614e-06, "loss": 0.8121, "step": 6664 }, { "epoch": 3.5060494476591266, "grad_norm": 2.162748098373413, "learning_rate": 1.873216816249149e-06, "loss": 0.8251, "step": 6665 }, { "epoch": 3.5065754865860073, "grad_norm": 2.3306548595428467, "learning_rate": 1.8725443536007837e-06, "loss": 0.8291, "step": 6666 }, { "epoch": 3.507101525512888, "grad_norm": 2.310300350189209, "learning_rate": 1.8718719393991863e-06, "loss": 0.7881, "step": 6667 }, { "epoch": 3.5076275644397685, "grad_norm": 2.26061749458313, "learning_rate": 1.871199573696275e-06, "loss": 0.7984, "step": 6668 }, { "epoch": 3.508153603366649, "grad_norm": 2.225943088531494, "learning_rate": 1.8705272565439647e-06, "loss": 0.8034, "step": 6669 }, { "epoch": 3.50867964229353, "grad_norm": 2.417344808578491, "learning_rate": 1.8698549879941655e-06, "loss": 0.8707, "step": 6670 }, { "epoch": 3.5092056812204104, "grad_norm": 2.2725613117218018, "learning_rate": 1.8691827680987856e-06, "loss": 0.8409, "step": 6671 }, { "epoch": 3.5097317201472906, "grad_norm": 2.3533949851989746, "learning_rate": 1.8685105969097258e-06, "loss": 0.7988, "step": 6672 }, { "epoch": 3.5102577590741717, "grad_norm": 2.24576735496521, "learning_rate": 1.8678384744788872e-06, "loss": 0.8004, "step": 6673 }, { "epoch": 3.510783798001052, "grad_norm": 2.959660291671753, "learning_rate": 1.8671664008581642e-06, "loss": 0.8445, "step": 6674 }, { "epoch": 3.5113098369279325, "grad_norm": 2.3667027950286865, "learning_rate": 1.8664943760994488e-06, "loss": 0.7745, "step": 6675 }, { "epoch": 3.511835875854813, "grad_norm": 2.2159769535064697, "learning_rate": 1.8658224002546297e-06, "loss": 0.7882, "step": 6676 }, { "epoch": 3.512361914781694, "grad_norm": 2.373455047607422, "learning_rate": 1.8651504733755909e-06, "loss": 0.7792, "step": 6677 }, { "epoch": 3.5128879537085744, "grad_norm": 2.2113096714019775, "learning_rate": 1.864478595514213e-06, "loss": 0.7908, "step": 6678 }, { "epoch": 3.513413992635455, "grad_norm": 2.279045581817627, "learning_rate": 1.8638067667223722e-06, "loss": 0.8425, "step": 6679 }, { "epoch": 3.5139400315623357, "grad_norm": 2.279644250869751, "learning_rate": 1.863134987051941e-06, "loss": 0.7859, "step": 6680 }, { "epoch": 3.5144660704892163, "grad_norm": 2.3207342624664307, "learning_rate": 1.8624632565547893e-06, "loss": 0.7784, "step": 6681 }, { "epoch": 3.514992109416097, "grad_norm": 2.2921924591064453, "learning_rate": 1.8617915752827819e-06, "loss": 0.7962, "step": 6682 }, { "epoch": 3.515518148342977, "grad_norm": 2.2723922729492188, "learning_rate": 1.8611199432877808e-06, "loss": 0.8273, "step": 6683 }, { "epoch": 3.5160441872698582, "grad_norm": 2.1494338512420654, "learning_rate": 1.8604483606216437e-06, "loss": 0.8493, "step": 6684 }, { "epoch": 3.5165702261967384, "grad_norm": 2.036625385284424, "learning_rate": 1.8597768273362243e-06, "loss": 0.762, "step": 6685 }, { "epoch": 3.517096265123619, "grad_norm": 2.1422746181488037, "learning_rate": 1.8591053434833726e-06, "loss": 0.763, "step": 6686 }, { "epoch": 3.5176223040504997, "grad_norm": 2.1377038955688477, "learning_rate": 1.8584339091149343e-06, "loss": 0.7729, "step": 6687 }, { "epoch": 3.5181483429773803, "grad_norm": 2.309107542037964, "learning_rate": 1.8577625242827521e-06, "loss": 0.8075, "step": 6688 }, { "epoch": 3.518674381904261, "grad_norm": 2.456493616104126, "learning_rate": 1.857091189038665e-06, "loss": 0.8302, "step": 6689 }, { "epoch": 3.5192004208311416, "grad_norm": 2.4388349056243896, "learning_rate": 1.8564199034345082e-06, "loss": 0.8265, "step": 6690 }, { "epoch": 3.5197264597580222, "grad_norm": 2.4777419567108154, "learning_rate": 1.8557486675221117e-06, "loss": 0.8591, "step": 6691 }, { "epoch": 3.5202524986849024, "grad_norm": 2.278217315673828, "learning_rate": 1.855077481353304e-06, "loss": 0.8531, "step": 6692 }, { "epoch": 3.5207785376117835, "grad_norm": 2.332242965698242, "learning_rate": 1.854406344979906e-06, "loss": 0.8283, "step": 6693 }, { "epoch": 3.5213045765386637, "grad_norm": 2.3943710327148438, "learning_rate": 1.8537352584537387e-06, "loss": 0.781, "step": 6694 }, { "epoch": 3.5218306154655443, "grad_norm": 2.2751803398132324, "learning_rate": 1.8530642218266176e-06, "loss": 0.8091, "step": 6695 }, { "epoch": 3.522356654392425, "grad_norm": 2.27423357963562, "learning_rate": 1.8523932351503547e-06, "loss": 0.8326, "step": 6696 }, { "epoch": 3.5228826933193056, "grad_norm": 2.2399988174438477, "learning_rate": 1.851722298476757e-06, "loss": 0.7851, "step": 6697 }, { "epoch": 3.5234087322461862, "grad_norm": 2.2271976470947266, "learning_rate": 1.8510514118576285e-06, "loss": 0.7785, "step": 6698 }, { "epoch": 3.523934771173067, "grad_norm": 2.45845627784729, "learning_rate": 1.8503805753447713e-06, "loss": 0.7882, "step": 6699 }, { "epoch": 3.5244608100999475, "grad_norm": 2.18111515045166, "learning_rate": 1.8497097889899784e-06, "loss": 0.7504, "step": 6700 }, { "epoch": 3.524986849026828, "grad_norm": 2.166583299636841, "learning_rate": 1.8490390528450447e-06, "loss": 0.7884, "step": 6701 }, { "epoch": 3.5255128879537088, "grad_norm": 2.390023946762085, "learning_rate": 1.8483683669617574e-06, "loss": 0.851, "step": 6702 }, { "epoch": 3.526038926880589, "grad_norm": 2.3170416355133057, "learning_rate": 1.8476977313919025e-06, "loss": 0.8351, "step": 6703 }, { "epoch": 3.5265649658074696, "grad_norm": 2.089154005050659, "learning_rate": 1.8470271461872594e-06, "loss": 0.7871, "step": 6704 }, { "epoch": 3.52709100473435, "grad_norm": 2.156177520751953, "learning_rate": 1.8463566113996054e-06, "loss": 0.7912, "step": 6705 }, { "epoch": 3.527617043661231, "grad_norm": 2.301234245300293, "learning_rate": 1.8456861270807148e-06, "loss": 0.8326, "step": 6706 }, { "epoch": 3.5281430825881115, "grad_norm": 2.234891414642334, "learning_rate": 1.8450156932823544e-06, "loss": 0.8152, "step": 6707 }, { "epoch": 3.528669121514992, "grad_norm": 2.312657117843628, "learning_rate": 1.8443453100562903e-06, "loss": 0.8419, "step": 6708 }, { "epoch": 3.5291951604418728, "grad_norm": 2.4563639163970947, "learning_rate": 1.8436749774542843e-06, "loss": 0.8324, "step": 6709 }, { "epoch": 3.5297211993687534, "grad_norm": 2.1312005519866943, "learning_rate": 1.8430046955280928e-06, "loss": 0.7849, "step": 6710 }, { "epoch": 3.530247238295634, "grad_norm": 2.3417510986328125, "learning_rate": 1.84233446432947e-06, "loss": 0.8238, "step": 6711 }, { "epoch": 3.530773277222514, "grad_norm": 2.340988874435425, "learning_rate": 1.8416642839101653e-06, "loss": 0.8096, "step": 6712 }, { "epoch": 3.5312993161493953, "grad_norm": 2.1512503623962402, "learning_rate": 1.840994154321924e-06, "loss": 0.8097, "step": 6713 }, { "epoch": 3.5318253550762755, "grad_norm": 2.3156962394714355, "learning_rate": 1.8403240756164892e-06, "loss": 0.8071, "step": 6714 }, { "epoch": 3.532351394003156, "grad_norm": 2.2650651931762695, "learning_rate": 1.8396540478455966e-06, "loss": 0.8327, "step": 6715 }, { "epoch": 3.5328774329300368, "grad_norm": 2.2772655487060547, "learning_rate": 1.8389840710609813e-06, "loss": 0.8286, "step": 6716 }, { "epoch": 3.5334034718569174, "grad_norm": 2.0915255546569824, "learning_rate": 1.838314145314372e-06, "loss": 0.8136, "step": 6717 }, { "epoch": 3.533929510783798, "grad_norm": 2.8129584789276123, "learning_rate": 1.837644270657496e-06, "loss": 0.7849, "step": 6718 }, { "epoch": 3.5344555497106787, "grad_norm": 2.2399885654449463, "learning_rate": 1.8369744471420746e-06, "loss": 0.8079, "step": 6719 }, { "epoch": 3.5349815886375593, "grad_norm": 2.415666341781616, "learning_rate": 1.8363046748198266e-06, "loss": 0.8311, "step": 6720 }, { "epoch": 3.53550762756444, "grad_norm": 2.1350815296173096, "learning_rate": 1.835634953742465e-06, "loss": 0.7483, "step": 6721 }, { "epoch": 3.5360336664913206, "grad_norm": 2.3341641426086426, "learning_rate": 1.834965283961701e-06, "loss": 0.7841, "step": 6722 }, { "epoch": 3.5365597054182007, "grad_norm": 2.1186366081237793, "learning_rate": 1.8342956655292393e-06, "loss": 0.8075, "step": 6723 }, { "epoch": 3.5370857443450814, "grad_norm": 2.322054386138916, "learning_rate": 1.8336260984967833e-06, "loss": 0.8016, "step": 6724 }, { "epoch": 3.537611783271962, "grad_norm": 2.3788182735443115, "learning_rate": 1.8329565829160309e-06, "loss": 0.8433, "step": 6725 }, { "epoch": 3.5381378221988427, "grad_norm": 2.2667806148529053, "learning_rate": 1.832287118838677e-06, "loss": 0.8395, "step": 6726 }, { "epoch": 3.5386638611257233, "grad_norm": 2.2441153526306152, "learning_rate": 1.8316177063164109e-06, "loss": 0.7887, "step": 6727 }, { "epoch": 3.539189900052604, "grad_norm": 2.426431894302368, "learning_rate": 1.8309483454009202e-06, "loss": 0.8633, "step": 6728 }, { "epoch": 3.5397159389794846, "grad_norm": 2.236891508102417, "learning_rate": 1.8302790361438858e-06, "loss": 0.7992, "step": 6729 }, { "epoch": 3.540241977906365, "grad_norm": 2.3096694946289062, "learning_rate": 1.8296097785969862e-06, "loss": 0.8196, "step": 6730 }, { "epoch": 3.540768016833246, "grad_norm": 2.303596258163452, "learning_rate": 1.828940572811897e-06, "loss": 0.8107, "step": 6731 }, { "epoch": 3.541294055760126, "grad_norm": 2.29158353805542, "learning_rate": 1.8282714188402872e-06, "loss": 0.8424, "step": 6732 }, { "epoch": 3.541820094687007, "grad_norm": 2.1342856884002686, "learning_rate": 1.8276023167338243e-06, "loss": 0.792, "step": 6733 }, { "epoch": 3.5423461336138873, "grad_norm": 2.270124912261963, "learning_rate": 1.8269332665441698e-06, "loss": 0.8022, "step": 6734 }, { "epoch": 3.542872172540768, "grad_norm": 2.267951011657715, "learning_rate": 1.8262642683229832e-06, "loss": 0.8103, "step": 6735 }, { "epoch": 3.5433982114676486, "grad_norm": 2.505004644393921, "learning_rate": 1.825595322121917e-06, "loss": 0.8105, "step": 6736 }, { "epoch": 3.543924250394529, "grad_norm": 2.2042183876037598, "learning_rate": 1.8249264279926224e-06, "loss": 0.7992, "step": 6737 }, { "epoch": 3.54445028932141, "grad_norm": 2.2982208728790283, "learning_rate": 1.8242575859867456e-06, "loss": 0.8002, "step": 6738 }, { "epoch": 3.5449763282482905, "grad_norm": 2.26724910736084, "learning_rate": 1.82358879615593e-06, "loss": 0.8086, "step": 6739 }, { "epoch": 3.545502367175171, "grad_norm": 2.5629169940948486, "learning_rate": 1.8229200585518122e-06, "loss": 0.8704, "step": 6740 }, { "epoch": 3.5460284061020513, "grad_norm": 2.2647995948791504, "learning_rate": 1.8222513732260271e-06, "loss": 0.8346, "step": 6741 }, { "epoch": 3.5465544450289324, "grad_norm": 2.2027747631073, "learning_rate": 1.8215827402302061e-06, "loss": 0.8001, "step": 6742 }, { "epoch": 3.5470804839558125, "grad_norm": 2.275857448577881, "learning_rate": 1.820914159615973e-06, "loss": 0.8269, "step": 6743 }, { "epoch": 3.547606522882693, "grad_norm": 2.210892677307129, "learning_rate": 1.820245631434951e-06, "loss": 0.717, "step": 6744 }, { "epoch": 3.548132561809574, "grad_norm": 2.3934237957000732, "learning_rate": 1.8195771557387575e-06, "loss": 0.8219, "step": 6745 }, { "epoch": 3.5486586007364544, "grad_norm": 2.3141746520996094, "learning_rate": 1.8189087325790084e-06, "loss": 0.7764, "step": 6746 }, { "epoch": 3.549184639663335, "grad_norm": 2.4066617488861084, "learning_rate": 1.8182403620073117e-06, "loss": 0.8557, "step": 6747 }, { "epoch": 3.5497106785902157, "grad_norm": 2.249624013900757, "learning_rate": 1.8175720440752737e-06, "loss": 0.7786, "step": 6748 }, { "epoch": 3.5502367175170964, "grad_norm": 2.327151298522949, "learning_rate": 1.8169037788344978e-06, "loss": 0.8208, "step": 6749 }, { "epoch": 3.550762756443977, "grad_norm": 2.2880728244781494, "learning_rate": 1.8162355663365788e-06, "loss": 0.7991, "step": 6750 }, { "epoch": 3.5512887953708576, "grad_norm": 2.284526824951172, "learning_rate": 1.8155674066331124e-06, "loss": 0.8108, "step": 6751 }, { "epoch": 3.551814834297738, "grad_norm": 2.348698377609253, "learning_rate": 1.814899299775688e-06, "loss": 0.8326, "step": 6752 }, { "epoch": 3.552340873224619, "grad_norm": 2.404324769973755, "learning_rate": 1.8142312458158906e-06, "loss": 0.8404, "step": 6753 }, { "epoch": 3.552866912151499, "grad_norm": 2.337867259979248, "learning_rate": 1.8135632448053015e-06, "loss": 0.8498, "step": 6754 }, { "epoch": 3.5533929510783797, "grad_norm": 2.526219367980957, "learning_rate": 1.8128952967954985e-06, "loss": 0.8625, "step": 6755 }, { "epoch": 3.5539189900052603, "grad_norm": 2.2594165802001953, "learning_rate": 1.8122274018380558e-06, "loss": 0.8571, "step": 6756 }, { "epoch": 3.554445028932141, "grad_norm": 2.3154165744781494, "learning_rate": 1.8115595599845404e-06, "loss": 0.8057, "step": 6757 }, { "epoch": 3.5549710678590216, "grad_norm": 2.4079506397247314, "learning_rate": 1.8108917712865186e-06, "loss": 0.7952, "step": 6758 }, { "epoch": 3.5554971067859023, "grad_norm": 2.4709677696228027, "learning_rate": 1.8102240357955514e-06, "loss": 0.8427, "step": 6759 }, { "epoch": 3.556023145712783, "grad_norm": 2.2507524490356445, "learning_rate": 1.809556353563195e-06, "loss": 0.8239, "step": 6760 }, { "epoch": 3.556549184639663, "grad_norm": 2.3236429691314697, "learning_rate": 1.8088887246410025e-06, "loss": 0.8051, "step": 6761 }, { "epoch": 3.557075223566544, "grad_norm": 2.377046823501587, "learning_rate": 1.8082211490805224e-06, "loss": 0.8428, "step": 6762 }, { "epoch": 3.5576012624934243, "grad_norm": 2.18680739402771, "learning_rate": 1.8075536269333005e-06, "loss": 0.7649, "step": 6763 }, { "epoch": 3.558127301420305, "grad_norm": 2.169534683227539, "learning_rate": 1.8068861582508748e-06, "loss": 0.7992, "step": 6764 }, { "epoch": 3.5586533403471856, "grad_norm": 2.2329933643341064, "learning_rate": 1.8062187430847832e-06, "loss": 0.7993, "step": 6765 }, { "epoch": 3.5591793792740662, "grad_norm": 2.429689884185791, "learning_rate": 1.8055513814865572e-06, "loss": 0.8744, "step": 6766 }, { "epoch": 3.559705418200947, "grad_norm": 2.324457883834839, "learning_rate": 1.8048840735077245e-06, "loss": 0.7654, "step": 6767 }, { "epoch": 3.5602314571278275, "grad_norm": 2.3623266220092773, "learning_rate": 1.804216819199809e-06, "loss": 0.7962, "step": 6768 }, { "epoch": 3.560757496054708, "grad_norm": 2.2487075328826904, "learning_rate": 1.8035496186143318e-06, "loss": 0.8135, "step": 6769 }, { "epoch": 3.561283534981589, "grad_norm": 2.3871843814849854, "learning_rate": 1.8028824718028066e-06, "loss": 0.8204, "step": 6770 }, { "epoch": 3.5618095739084694, "grad_norm": 2.3868062496185303, "learning_rate": 1.8022153788167462e-06, "loss": 0.809, "step": 6771 }, { "epoch": 3.5623356128353496, "grad_norm": 2.2512893676757812, "learning_rate": 1.8015483397076566e-06, "loss": 0.8039, "step": 6772 }, { "epoch": 3.5628616517622302, "grad_norm": 2.6955406665802, "learning_rate": 1.800881354527041e-06, "loss": 0.8162, "step": 6773 }, { "epoch": 3.563387690689111, "grad_norm": 2.6110663414001465, "learning_rate": 1.8002144233263987e-06, "loss": 0.8467, "step": 6774 }, { "epoch": 3.5639137296159915, "grad_norm": 2.408461809158325, "learning_rate": 1.7995475461572245e-06, "loss": 0.7852, "step": 6775 }, { "epoch": 3.564439768542872, "grad_norm": 2.2122838497161865, "learning_rate": 1.7988807230710092e-06, "loss": 0.7637, "step": 6776 }, { "epoch": 3.564965807469753, "grad_norm": 2.501992702484131, "learning_rate": 1.798213954119239e-06, "loss": 0.8402, "step": 6777 }, { "epoch": 3.5654918463966334, "grad_norm": 2.300705671310425, "learning_rate": 1.7975472393533962e-06, "loss": 0.7396, "step": 6778 }, { "epoch": 3.566017885323514, "grad_norm": 2.204712152481079, "learning_rate": 1.796880578824958e-06, "loss": 0.8266, "step": 6779 }, { "epoch": 3.5665439242503947, "grad_norm": 2.1564574241638184, "learning_rate": 1.7962139725853988e-06, "loss": 0.7524, "step": 6780 }, { "epoch": 3.567069963177275, "grad_norm": 2.26505708694458, "learning_rate": 1.795547420686188e-06, "loss": 0.8615, "step": 6781 }, { "epoch": 3.567596002104156, "grad_norm": 2.161832332611084, "learning_rate": 1.7948809231787922e-06, "loss": 0.7982, "step": 6782 }, { "epoch": 3.568122041031036, "grad_norm": 2.342695474624634, "learning_rate": 1.794214480114671e-06, "loss": 0.7696, "step": 6783 }, { "epoch": 3.5686480799579168, "grad_norm": 2.2464303970336914, "learning_rate": 1.7935480915452825e-06, "loss": 0.7706, "step": 6784 }, { "epoch": 3.5691741188847974, "grad_norm": 2.470348596572876, "learning_rate": 1.7928817575220798e-06, "loss": 0.8393, "step": 6785 }, { "epoch": 3.569700157811678, "grad_norm": 2.2346954345703125, "learning_rate": 1.79221547809651e-06, "loss": 0.8196, "step": 6786 }, { "epoch": 3.5702261967385587, "grad_norm": 2.298243999481201, "learning_rate": 1.791549253320019e-06, "loss": 0.7964, "step": 6787 }, { "epoch": 3.5707522356654393, "grad_norm": 2.270040988922119, "learning_rate": 1.7908830832440465e-06, "loss": 0.7912, "step": 6788 }, { "epoch": 3.57127827459232, "grad_norm": 2.3706812858581543, "learning_rate": 1.7902169679200282e-06, "loss": 0.8569, "step": 6789 }, { "epoch": 3.5718043135192006, "grad_norm": 2.2994346618652344, "learning_rate": 1.7895509073993958e-06, "loss": 0.8199, "step": 6790 }, { "epoch": 3.572330352446081, "grad_norm": 2.253584146499634, "learning_rate": 1.7888849017335774e-06, "loss": 0.8068, "step": 6791 }, { "epoch": 3.5728563913729614, "grad_norm": 2.4052882194519043, "learning_rate": 1.7882189509739966e-06, "loss": 0.8092, "step": 6792 }, { "epoch": 3.573382430299842, "grad_norm": 2.036642074584961, "learning_rate": 1.787553055172071e-06, "loss": 0.7727, "step": 6793 }, { "epoch": 3.5739084692267227, "grad_norm": 2.375518560409546, "learning_rate": 1.786887214379216e-06, "loss": 0.7997, "step": 6794 }, { "epoch": 3.5744345081536033, "grad_norm": 2.411695718765259, "learning_rate": 1.786221428646843e-06, "loss": 0.8864, "step": 6795 }, { "epoch": 3.574960547080484, "grad_norm": 2.1718907356262207, "learning_rate": 1.7855556980263573e-06, "loss": 0.7949, "step": 6796 }, { "epoch": 3.5754865860073646, "grad_norm": 2.8250157833099365, "learning_rate": 1.7848900225691607e-06, "loss": 0.8021, "step": 6797 }, { "epoch": 3.576012624934245, "grad_norm": 2.2177746295928955, "learning_rate": 1.784224402326652e-06, "loss": 0.7985, "step": 6798 }, { "epoch": 3.576538663861126, "grad_norm": 2.4110212326049805, "learning_rate": 1.7835588373502249e-06, "loss": 0.8124, "step": 6799 }, { "epoch": 3.5770647027880065, "grad_norm": 2.316929578781128, "learning_rate": 1.782893327691267e-06, "loss": 0.767, "step": 6800 }, { "epoch": 3.5775907417148867, "grad_norm": 2.4593799114227295, "learning_rate": 1.782227873401165e-06, "loss": 0.8179, "step": 6801 }, { "epoch": 3.5781167806417677, "grad_norm": 2.2084827423095703, "learning_rate": 1.7815624745312979e-06, "loss": 0.817, "step": 6802 }, { "epoch": 3.578642819568648, "grad_norm": 2.234684705734253, "learning_rate": 1.7808971311330433e-06, "loss": 0.7981, "step": 6803 }, { "epoch": 3.5791688584955286, "grad_norm": 2.2849349975585938, "learning_rate": 1.780231843257773e-06, "loss": 0.8461, "step": 6804 }, { "epoch": 3.579694897422409, "grad_norm": 2.234757661819458, "learning_rate": 1.779566610956855e-06, "loss": 0.8595, "step": 6805 }, { "epoch": 3.58022093634929, "grad_norm": 2.192944049835205, "learning_rate": 1.778901434281654e-06, "loss": 0.7428, "step": 6806 }, { "epoch": 3.5807469752761705, "grad_norm": 2.4062681198120117, "learning_rate": 1.7782363132835279e-06, "loss": 0.8306, "step": 6807 }, { "epoch": 3.581273014203051, "grad_norm": 2.4726765155792236, "learning_rate": 1.7775712480138308e-06, "loss": 0.8386, "step": 6808 }, { "epoch": 3.5817990531299317, "grad_norm": 2.285722494125366, "learning_rate": 1.7769062385239145e-06, "loss": 0.8071, "step": 6809 }, { "epoch": 3.582325092056812, "grad_norm": 2.42941951751709, "learning_rate": 1.776241284865125e-06, "loss": 0.8741, "step": 6810 }, { "epoch": 3.582851130983693, "grad_norm": 2.4711058139801025, "learning_rate": 1.7755763870888048e-06, "loss": 0.8306, "step": 6811 }, { "epoch": 3.583377169910573, "grad_norm": 2.200153350830078, "learning_rate": 1.7749115452462922e-06, "loss": 0.8206, "step": 6812 }, { "epoch": 3.583903208837454, "grad_norm": 2.407073497772217, "learning_rate": 1.7742467593889196e-06, "loss": 0.8059, "step": 6813 }, { "epoch": 3.5844292477643345, "grad_norm": 2.224564552307129, "learning_rate": 1.7735820295680165e-06, "loss": 0.7711, "step": 6814 }, { "epoch": 3.584955286691215, "grad_norm": 2.2370688915252686, "learning_rate": 1.7729173558349066e-06, "loss": 0.7866, "step": 6815 }, { "epoch": 3.5854813256180957, "grad_norm": 2.2582833766937256, "learning_rate": 1.7722527382409117e-06, "loss": 0.7678, "step": 6816 }, { "epoch": 3.5860073645449764, "grad_norm": 2.201625108718872, "learning_rate": 1.7715881768373471e-06, "loss": 0.8522, "step": 6817 }, { "epoch": 3.586533403471857, "grad_norm": 2.3071095943450928, "learning_rate": 1.7709236716755258e-06, "loss": 0.7505, "step": 6818 }, { "epoch": 3.5870594423987376, "grad_norm": 2.289677619934082, "learning_rate": 1.7702592228067533e-06, "loss": 0.8263, "step": 6819 }, { "epoch": 3.5875854813256183, "grad_norm": 2.3556301593780518, "learning_rate": 1.769594830282334e-06, "loss": 0.7944, "step": 6820 }, { "epoch": 3.5881115202524985, "grad_norm": 2.3627138137817383, "learning_rate": 1.768930494153567e-06, "loss": 0.816, "step": 6821 }, { "epoch": 3.5886375591793795, "grad_norm": 2.578767776489258, "learning_rate": 1.768266214471745e-06, "loss": 0.7692, "step": 6822 }, { "epoch": 3.5891635981062597, "grad_norm": 2.3199217319488525, "learning_rate": 1.7676019912881593e-06, "loss": 0.791, "step": 6823 }, { "epoch": 3.5896896370331404, "grad_norm": 2.3710718154907227, "learning_rate": 1.7669378246540947e-06, "loss": 0.8087, "step": 6824 }, { "epoch": 3.590215675960021, "grad_norm": 2.3410680294036865, "learning_rate": 1.7662737146208337e-06, "loss": 0.8147, "step": 6825 }, { "epoch": 3.5907417148869016, "grad_norm": 2.200587511062622, "learning_rate": 1.765609661239652e-06, "loss": 0.797, "step": 6826 }, { "epoch": 3.5912677538137823, "grad_norm": 2.4921727180480957, "learning_rate": 1.7649456645618226e-06, "loss": 0.9028, "step": 6827 }, { "epoch": 3.591793792740663, "grad_norm": 2.3356645107269287, "learning_rate": 1.7642817246386145e-06, "loss": 0.8039, "step": 6828 }, { "epoch": 3.5923198316675435, "grad_norm": 2.4265289306640625, "learning_rate": 1.76361784152129e-06, "loss": 0.8259, "step": 6829 }, { "epoch": 3.5928458705944237, "grad_norm": 2.3344829082489014, "learning_rate": 1.7629540152611092e-06, "loss": 0.8373, "step": 6830 }, { "epoch": 3.593371909521305, "grad_norm": 2.388416051864624, "learning_rate": 1.7622902459093273e-06, "loss": 0.7392, "step": 6831 }, { "epoch": 3.593897948448185, "grad_norm": 2.6373744010925293, "learning_rate": 1.7616265335171939e-06, "loss": 0.8119, "step": 6832 }, { "epoch": 3.5944239873750656, "grad_norm": 2.426213264465332, "learning_rate": 1.7609628781359562e-06, "loss": 0.8107, "step": 6833 }, { "epoch": 3.5949500263019463, "grad_norm": 2.2867064476013184, "learning_rate": 1.7602992798168556e-06, "loss": 0.8204, "step": 6834 }, { "epoch": 3.595476065228827, "grad_norm": 2.2147181034088135, "learning_rate": 1.7596357386111308e-06, "loss": 0.8078, "step": 6835 }, { "epoch": 3.5960021041557075, "grad_norm": 2.3051929473876953, "learning_rate": 1.7589722545700123e-06, "loss": 0.8039, "step": 6836 }, { "epoch": 3.596528143082588, "grad_norm": 2.321636438369751, "learning_rate": 1.7583088277447307e-06, "loss": 0.7655, "step": 6837 }, { "epoch": 3.597054182009469, "grad_norm": 2.505702257156372, "learning_rate": 1.757645458186509e-06, "loss": 0.816, "step": 6838 }, { "epoch": 3.5975802209363494, "grad_norm": 2.355534315109253, "learning_rate": 1.7569821459465674e-06, "loss": 0.8098, "step": 6839 }, { "epoch": 3.59810625986323, "grad_norm": 2.3430917263031006, "learning_rate": 1.7563188910761212e-06, "loss": 0.8408, "step": 6840 }, { "epoch": 3.5986322987901103, "grad_norm": 2.3508381843566895, "learning_rate": 1.755655693626381e-06, "loss": 0.8288, "step": 6841 }, { "epoch": 3.599158337716991, "grad_norm": 2.15136456489563, "learning_rate": 1.7549925536485548e-06, "loss": 0.8219, "step": 6842 }, { "epoch": 3.5996843766438715, "grad_norm": 2.1987226009368896, "learning_rate": 1.7543294711938424e-06, "loss": 0.8185, "step": 6843 }, { "epoch": 3.600210415570752, "grad_norm": 2.6277804374694824, "learning_rate": 1.7536664463134425e-06, "loss": 0.851, "step": 6844 }, { "epoch": 3.600736454497633, "grad_norm": 2.215081214904785, "learning_rate": 1.7530034790585476e-06, "loss": 0.8118, "step": 6845 }, { "epoch": 3.6012624934245134, "grad_norm": 2.2353098392486572, "learning_rate": 1.752340569480347e-06, "loss": 0.7798, "step": 6846 }, { "epoch": 3.601788532351394, "grad_norm": 2.772069215774536, "learning_rate": 1.7516777176300242e-06, "loss": 0.7784, "step": 6847 }, { "epoch": 3.6023145712782747, "grad_norm": 2.2545039653778076, "learning_rate": 1.7510149235587604e-06, "loss": 0.8062, "step": 6848 }, { "epoch": 3.6028406102051553, "grad_norm": 2.208139657974243, "learning_rate": 1.7503521873177298e-06, "loss": 0.8015, "step": 6849 }, { "epoch": 3.6033666491320355, "grad_norm": 2.4332070350646973, "learning_rate": 1.7496895089581033e-06, "loss": 0.8524, "step": 6850 }, { "epoch": 3.6038926880589166, "grad_norm": 2.4568393230438232, "learning_rate": 1.7490268885310471e-06, "loss": 0.8138, "step": 6851 }, { "epoch": 3.604418726985797, "grad_norm": 2.194866895675659, "learning_rate": 1.7483643260877234e-06, "loss": 0.8053, "step": 6852 }, { "epoch": 3.6049447659126774, "grad_norm": 2.28562068939209, "learning_rate": 1.7477018216792892e-06, "loss": 0.8313, "step": 6853 }, { "epoch": 3.605470804839558, "grad_norm": 2.4551830291748047, "learning_rate": 1.7470393753568981e-06, "loss": 0.8959, "step": 6854 }, { "epoch": 3.6059968437664387, "grad_norm": 2.4843056201934814, "learning_rate": 1.7463769871716986e-06, "loss": 0.7715, "step": 6855 }, { "epoch": 3.6065228826933193, "grad_norm": 2.212655782699585, "learning_rate": 1.7457146571748346e-06, "loss": 0.8222, "step": 6856 }, { "epoch": 3.6070489216202, "grad_norm": 2.3608438968658447, "learning_rate": 1.745052385417445e-06, "loss": 0.7969, "step": 6857 }, { "epoch": 3.6075749605470806, "grad_norm": 2.167614221572876, "learning_rate": 1.7443901719506645e-06, "loss": 0.7757, "step": 6858 }, { "epoch": 3.6081009994739612, "grad_norm": 2.1144602298736572, "learning_rate": 1.7437280168256243e-06, "loss": 0.8164, "step": 6859 }, { "epoch": 3.608627038400842, "grad_norm": 2.245079755783081, "learning_rate": 1.7430659200934503e-06, "loss": 0.8126, "step": 6860 }, { "epoch": 3.609153077327722, "grad_norm": 2.31333327293396, "learning_rate": 1.742403881805264e-06, "loss": 0.8422, "step": 6861 }, { "epoch": 3.6096791162546027, "grad_norm": 2.109562397003174, "learning_rate": 1.7417419020121818e-06, "loss": 0.8044, "step": 6862 }, { "epoch": 3.6102051551814833, "grad_norm": 2.3565375804901123, "learning_rate": 1.7410799807653175e-06, "loss": 0.8091, "step": 6863 }, { "epoch": 3.610731194108364, "grad_norm": 2.2996842861175537, "learning_rate": 1.740418118115777e-06, "loss": 0.8198, "step": 6864 }, { "epoch": 3.6112572330352446, "grad_norm": 2.187039375305176, "learning_rate": 1.7397563141146652e-06, "loss": 0.8466, "step": 6865 }, { "epoch": 3.6117832719621252, "grad_norm": 2.2208070755004883, "learning_rate": 1.7390945688130798e-06, "loss": 0.7911, "step": 6866 }, { "epoch": 3.612309310889006, "grad_norm": 2.307356357574463, "learning_rate": 1.7384328822621167e-06, "loss": 0.7989, "step": 6867 }, { "epoch": 3.6128353498158865, "grad_norm": 2.3996243476867676, "learning_rate": 1.7377712545128639e-06, "loss": 0.8273, "step": 6868 }, { "epoch": 3.613361388742767, "grad_norm": 2.309147357940674, "learning_rate": 1.7371096856164076e-06, "loss": 0.8266, "step": 6869 }, { "epoch": 3.6138874276696473, "grad_norm": 2.216115713119507, "learning_rate": 1.736448175623829e-06, "loss": 0.8397, "step": 6870 }, { "epoch": 3.6144134665965284, "grad_norm": 2.222391128540039, "learning_rate": 1.7357867245862043e-06, "loss": 0.8066, "step": 6871 }, { "epoch": 3.6149395055234086, "grad_norm": 2.5504403114318848, "learning_rate": 1.735125332554603e-06, "loss": 0.8205, "step": 6872 }, { "epoch": 3.6154655444502892, "grad_norm": 2.3368020057678223, "learning_rate": 1.7344639995800944e-06, "loss": 0.8319, "step": 6873 }, { "epoch": 3.61599158337717, "grad_norm": 2.2172892093658447, "learning_rate": 1.73380272571374e-06, "loss": 0.8007, "step": 6874 }, { "epoch": 3.6165176223040505, "grad_norm": 2.372816801071167, "learning_rate": 1.733141511006598e-06, "loss": 0.8506, "step": 6875 }, { "epoch": 3.617043661230931, "grad_norm": 2.2260475158691406, "learning_rate": 1.7324803555097215e-06, "loss": 0.8095, "step": 6876 }, { "epoch": 3.6175697001578118, "grad_norm": 2.3740713596343994, "learning_rate": 1.7318192592741595e-06, "loss": 0.8113, "step": 6877 }, { "epoch": 3.6180957390846924, "grad_norm": 2.035574197769165, "learning_rate": 1.731158222350957e-06, "loss": 0.7979, "step": 6878 }, { "epoch": 3.6186217780115726, "grad_norm": 2.3535306453704834, "learning_rate": 1.730497244791152e-06, "loss": 0.8569, "step": 6879 }, { "epoch": 3.6191478169384537, "grad_norm": 2.1080195903778076, "learning_rate": 1.7298363266457807e-06, "loss": 0.807, "step": 6880 }, { "epoch": 3.619673855865334, "grad_norm": 2.3547210693359375, "learning_rate": 1.7291754679658729e-06, "loss": 0.8515, "step": 6881 }, { "epoch": 3.6201998947922145, "grad_norm": 2.2739179134368896, "learning_rate": 1.7285146688024546e-06, "loss": 0.8252, "step": 6882 }, { "epoch": 3.620725933719095, "grad_norm": 2.1560871601104736, "learning_rate": 1.7278539292065471e-06, "loss": 0.7723, "step": 6883 }, { "epoch": 3.6212519726459758, "grad_norm": 2.2977917194366455, "learning_rate": 1.7271932492291677e-06, "loss": 0.8201, "step": 6884 }, { "epoch": 3.6217780115728564, "grad_norm": 2.2049975395202637, "learning_rate": 1.7265326289213286e-06, "loss": 0.8012, "step": 6885 }, { "epoch": 3.622304050499737, "grad_norm": 2.536348819732666, "learning_rate": 1.7258720683340357e-06, "loss": 0.8746, "step": 6886 }, { "epoch": 3.6228300894266177, "grad_norm": 2.184391736984253, "learning_rate": 1.7252115675182935e-06, "loss": 0.8284, "step": 6887 }, { "epoch": 3.6233561283534983, "grad_norm": 2.2189979553222656, "learning_rate": 1.7245511265250985e-06, "loss": 0.8347, "step": 6888 }, { "epoch": 3.623882167280379, "grad_norm": 2.2621612548828125, "learning_rate": 1.723890745405446e-06, "loss": 0.8348, "step": 6889 }, { "epoch": 3.624408206207259, "grad_norm": 2.439124584197998, "learning_rate": 1.7232304242103237e-06, "loss": 0.7971, "step": 6890 }, { "epoch": 3.62493424513414, "grad_norm": 2.3922512531280518, "learning_rate": 1.7225701629907177e-06, "loss": 0.7995, "step": 6891 }, { "epoch": 3.6254602840610204, "grad_norm": 2.3707680702209473, "learning_rate": 1.7219099617976065e-06, "loss": 0.8262, "step": 6892 }, { "epoch": 3.625986322987901, "grad_norm": 2.1922428607940674, "learning_rate": 1.721249820681965e-06, "loss": 0.7993, "step": 6893 }, { "epoch": 3.6265123619147817, "grad_norm": 2.3728537559509277, "learning_rate": 1.7205897396947636e-06, "loss": 0.8635, "step": 6894 }, { "epoch": 3.6270384008416623, "grad_norm": 2.26584529876709, "learning_rate": 1.7199297188869685e-06, "loss": 0.8369, "step": 6895 }, { "epoch": 3.627564439768543, "grad_norm": 2.3225393295288086, "learning_rate": 1.719269758309541e-06, "loss": 0.8235, "step": 6896 }, { "epoch": 3.6280904786954236, "grad_norm": 2.151104211807251, "learning_rate": 1.718609858013438e-06, "loss": 0.8227, "step": 6897 }, { "epoch": 3.628616517622304, "grad_norm": 2.2634084224700928, "learning_rate": 1.7179500180496106e-06, "loss": 0.76, "step": 6898 }, { "epoch": 3.6291425565491844, "grad_norm": 2.3300254344940186, "learning_rate": 1.717290238469007e-06, "loss": 0.8197, "step": 6899 }, { "epoch": 3.6296685954760655, "grad_norm": 2.3277599811553955, "learning_rate": 1.716630519322568e-06, "loss": 0.8336, "step": 6900 }, { "epoch": 3.6301946344029457, "grad_norm": 2.216050863265991, "learning_rate": 1.7159708606612324e-06, "loss": 0.8563, "step": 6901 }, { "epoch": 3.6307206733298263, "grad_norm": 2.385023355484009, "learning_rate": 1.7153112625359335e-06, "loss": 0.8398, "step": 6902 }, { "epoch": 3.631246712256707, "grad_norm": 2.6560583114624023, "learning_rate": 1.7146517249976003e-06, "loss": 0.8316, "step": 6903 }, { "epoch": 3.6317727511835876, "grad_norm": 2.320793628692627, "learning_rate": 1.7139922480971567e-06, "loss": 0.8545, "step": 6904 }, { "epoch": 3.632298790110468, "grad_norm": 2.291451930999756, "learning_rate": 1.7133328318855202e-06, "loss": 0.7877, "step": 6905 }, { "epoch": 3.632824829037349, "grad_norm": 2.377387046813965, "learning_rate": 1.7126734764136084e-06, "loss": 0.8475, "step": 6906 }, { "epoch": 3.6333508679642295, "grad_norm": 2.2511978149414062, "learning_rate": 1.7120141817323275e-06, "loss": 0.8156, "step": 6907 }, { "epoch": 3.63387690689111, "grad_norm": 2.4362118244171143, "learning_rate": 1.7113549478925845e-06, "loss": 0.809, "step": 6908 }, { "epoch": 3.6344029458179907, "grad_norm": 2.1650023460388184, "learning_rate": 1.7106957749452796e-06, "loss": 0.8402, "step": 6909 }, { "epoch": 3.634928984744871, "grad_norm": 2.2873685359954834, "learning_rate": 1.710036662941309e-06, "loss": 0.7725, "step": 6910 }, { "epoch": 3.6354550236717516, "grad_norm": 2.5179617404937744, "learning_rate": 1.7093776119315625e-06, "loss": 0.8047, "step": 6911 }, { "epoch": 3.635981062598632, "grad_norm": 2.8693246841430664, "learning_rate": 1.7087186219669272e-06, "loss": 0.8438, "step": 6912 }, { "epoch": 3.636507101525513, "grad_norm": 2.252930164337158, "learning_rate": 1.7080596930982857e-06, "loss": 0.8182, "step": 6913 }, { "epoch": 3.6370331404523935, "grad_norm": 2.092999219894409, "learning_rate": 1.7074008253765128e-06, "loss": 0.7903, "step": 6914 }, { "epoch": 3.637559179379274, "grad_norm": 2.3600423336029053, "learning_rate": 1.7067420188524813e-06, "loss": 0.7924, "step": 6915 }, { "epoch": 3.6380852183061547, "grad_norm": 2.327265739440918, "learning_rate": 1.7060832735770595e-06, "loss": 0.8953, "step": 6916 }, { "epoch": 3.6386112572330354, "grad_norm": 2.2003695964813232, "learning_rate": 1.7054245896011094e-06, "loss": 0.8284, "step": 6917 }, { "epoch": 3.639137296159916, "grad_norm": 2.2890045642852783, "learning_rate": 1.7047659669754884e-06, "loss": 0.8751, "step": 6918 }, { "epoch": 3.639663335086796, "grad_norm": 2.3281712532043457, "learning_rate": 1.704107405751051e-06, "loss": 0.7633, "step": 6919 }, { "epoch": 3.6401893740136773, "grad_norm": 2.3019280433654785, "learning_rate": 1.7034489059786446e-06, "loss": 0.8031, "step": 6920 }, { "epoch": 3.6407154129405574, "grad_norm": 2.247279405593872, "learning_rate": 1.7027904677091145e-06, "loss": 0.816, "step": 6921 }, { "epoch": 3.641241451867438, "grad_norm": 2.517862319946289, "learning_rate": 1.7021320909932972e-06, "loss": 0.8443, "step": 6922 }, { "epoch": 3.6417674907943187, "grad_norm": 2.2517030239105225, "learning_rate": 1.7014737758820292e-06, "loss": 0.7887, "step": 6923 }, { "epoch": 3.6422935297211994, "grad_norm": 2.173236608505249, "learning_rate": 1.7008155224261383e-06, "loss": 0.7919, "step": 6924 }, { "epoch": 3.64281956864808, "grad_norm": 2.2483484745025635, "learning_rate": 1.70015733067645e-06, "loss": 0.8342, "step": 6925 }, { "epoch": 3.6433456075749606, "grad_norm": 2.209137439727783, "learning_rate": 1.6994992006837841e-06, "loss": 0.833, "step": 6926 }, { "epoch": 3.6438716465018413, "grad_norm": 2.615115165710449, "learning_rate": 1.6988411324989567e-06, "loss": 0.786, "step": 6927 }, { "epoch": 3.644397685428722, "grad_norm": 3.5198230743408203, "learning_rate": 1.6981831261727772e-06, "loss": 0.7901, "step": 6928 }, { "epoch": 3.6449237243556025, "grad_norm": 2.857571840286255, "learning_rate": 1.6975251817560513e-06, "loss": 0.7836, "step": 6929 }, { "epoch": 3.6454497632824827, "grad_norm": 2.3560686111450195, "learning_rate": 1.6968672992995793e-06, "loss": 0.8006, "step": 6930 }, { "epoch": 3.6459758022093633, "grad_norm": 2.2692761421203613, "learning_rate": 1.6962094788541582e-06, "loss": 0.8218, "step": 6931 }, { "epoch": 3.646501841136244, "grad_norm": 2.21148943901062, "learning_rate": 1.695551720470579e-06, "loss": 0.8409, "step": 6932 }, { "epoch": 3.6470278800631246, "grad_norm": 2.428622245788574, "learning_rate": 1.6948940241996283e-06, "loss": 0.8517, "step": 6933 }, { "epoch": 3.6475539189900053, "grad_norm": 2.1937928199768066, "learning_rate": 1.6942363900920882e-06, "loss": 0.7749, "step": 6934 }, { "epoch": 3.648079957916886, "grad_norm": 2.3496575355529785, "learning_rate": 1.6935788181987351e-06, "loss": 0.8117, "step": 6935 }, { "epoch": 3.6486059968437665, "grad_norm": 2.5004899501800537, "learning_rate": 1.692921308570341e-06, "loss": 0.8283, "step": 6936 }, { "epoch": 3.649132035770647, "grad_norm": 2.1244773864746094, "learning_rate": 1.6922638612576725e-06, "loss": 0.7816, "step": 6937 }, { "epoch": 3.649658074697528, "grad_norm": 2.285604476928711, "learning_rate": 1.6916064763114932e-06, "loss": 0.8011, "step": 6938 }, { "epoch": 3.650184113624408, "grad_norm": 2.296208143234253, "learning_rate": 1.69094915378256e-06, "loss": 0.8544, "step": 6939 }, { "epoch": 3.650710152551289, "grad_norm": 2.3372299671173096, "learning_rate": 1.690291893721627e-06, "loss": 0.8146, "step": 6940 }, { "epoch": 3.6512361914781692, "grad_norm": 2.1429426670074463, "learning_rate": 1.6896346961794407e-06, "loss": 0.8017, "step": 6941 }, { "epoch": 3.65176223040505, "grad_norm": 2.232889175415039, "learning_rate": 1.6889775612067458e-06, "loss": 0.7725, "step": 6942 }, { "epoch": 3.6522882693319305, "grad_norm": 2.230398416519165, "learning_rate": 1.6883204888542787e-06, "loss": 0.7876, "step": 6943 }, { "epoch": 3.652814308258811, "grad_norm": 2.3218576908111572, "learning_rate": 1.6876634791727741e-06, "loss": 0.7825, "step": 6944 }, { "epoch": 3.653340347185692, "grad_norm": 2.3935937881469727, "learning_rate": 1.6870065322129605e-06, "loss": 0.8101, "step": 6945 }, { "epoch": 3.6538663861125724, "grad_norm": 2.35552978515625, "learning_rate": 1.686349648025562e-06, "loss": 0.8378, "step": 6946 }, { "epoch": 3.654392425039453, "grad_norm": 2.275754928588867, "learning_rate": 1.6856928266612977e-06, "loss": 0.8149, "step": 6947 }, { "epoch": 3.6549184639663332, "grad_norm": 2.3717589378356934, "learning_rate": 1.6850360681708808e-06, "loss": 0.7887, "step": 6948 }, { "epoch": 3.6554445028932143, "grad_norm": 2.379460096359253, "learning_rate": 1.6843793726050223e-06, "loss": 0.8429, "step": 6949 }, { "epoch": 3.6559705418200945, "grad_norm": 2.1982004642486572, "learning_rate": 1.6837227400144245e-06, "loss": 0.8112, "step": 6950 }, { "epoch": 3.656496580746975, "grad_norm": 2.2463607788085938, "learning_rate": 1.683066170449788e-06, "loss": 0.8118, "step": 6951 }, { "epoch": 3.657022619673856, "grad_norm": 2.365570306777954, "learning_rate": 1.6824096639618076e-06, "loss": 0.8461, "step": 6952 }, { "epoch": 3.6575486586007364, "grad_norm": 2.2633049488067627, "learning_rate": 1.6817532206011732e-06, "loss": 0.7815, "step": 6953 }, { "epoch": 3.658074697527617, "grad_norm": 2.426823139190674, "learning_rate": 1.6810968404185692e-06, "loss": 0.834, "step": 6954 }, { "epoch": 3.6586007364544977, "grad_norm": 2.572584390640259, "learning_rate": 1.680440523464676e-06, "loss": 0.8462, "step": 6955 }, { "epoch": 3.6591267753813783, "grad_norm": 2.204834461212158, "learning_rate": 1.6797842697901701e-06, "loss": 0.7933, "step": 6956 }, { "epoch": 3.659652814308259, "grad_norm": 2.163661003112793, "learning_rate": 1.6791280794457194e-06, "loss": 0.767, "step": 6957 }, { "epoch": 3.6601788532351396, "grad_norm": 2.1145317554473877, "learning_rate": 1.6784719524819903e-06, "loss": 0.7857, "step": 6958 }, { "epoch": 3.6607048921620198, "grad_norm": 2.2437944412231445, "learning_rate": 1.677815888949644e-06, "loss": 0.8309, "step": 6959 }, { "epoch": 3.661230931088901, "grad_norm": 2.2490508556365967, "learning_rate": 1.677159888899335e-06, "loss": 0.8109, "step": 6960 }, { "epoch": 3.661756970015781, "grad_norm": 2.183896064758301, "learning_rate": 1.676503952381715e-06, "loss": 0.8191, "step": 6961 }, { "epoch": 3.6622830089426617, "grad_norm": 2.267484664916992, "learning_rate": 1.6758480794474292e-06, "loss": 0.8198, "step": 6962 }, { "epoch": 3.6628090478695423, "grad_norm": 2.2580184936523438, "learning_rate": 1.6751922701471197e-06, "loss": 0.773, "step": 6963 }, { "epoch": 3.663335086796423, "grad_norm": 2.3399250507354736, "learning_rate": 1.6745365245314204e-06, "loss": 0.7666, "step": 6964 }, { "epoch": 3.6638611257233036, "grad_norm": 2.2503223419189453, "learning_rate": 1.673880842650964e-06, "loss": 0.8378, "step": 6965 }, { "epoch": 3.664387164650184, "grad_norm": 2.2589287757873535, "learning_rate": 1.6732252245563763e-06, "loss": 0.7962, "step": 6966 }, { "epoch": 3.664913203577065, "grad_norm": 2.542750358581543, "learning_rate": 1.672569670298278e-06, "loss": 0.8434, "step": 6967 }, { "epoch": 3.665439242503945, "grad_norm": 2.257844924926758, "learning_rate": 1.6719141799272858e-06, "loss": 0.8253, "step": 6968 }, { "epoch": 3.665965281430826, "grad_norm": 2.1892380714416504, "learning_rate": 1.6712587534940112e-06, "loss": 0.8489, "step": 6969 }, { "epoch": 3.6664913203577063, "grad_norm": 2.3537535667419434, "learning_rate": 1.6706033910490615e-06, "loss": 0.8346, "step": 6970 }, { "epoch": 3.667017359284587, "grad_norm": 2.2484781742095947, "learning_rate": 1.6699480926430361e-06, "loss": 0.8162, "step": 6971 }, { "epoch": 3.6675433982114676, "grad_norm": 2.4307942390441895, "learning_rate": 1.6692928583265333e-06, "loss": 0.8059, "step": 6972 }, { "epoch": 3.668069437138348, "grad_norm": 2.4430689811706543, "learning_rate": 1.668637688150143e-06, "loss": 0.8617, "step": 6973 }, { "epoch": 3.668595476065229, "grad_norm": 2.325995922088623, "learning_rate": 1.6679825821644534e-06, "loss": 0.8333, "step": 6974 }, { "epoch": 3.6691215149921095, "grad_norm": 2.31103777885437, "learning_rate": 1.6673275404200458e-06, "loss": 0.8193, "step": 6975 }, { "epoch": 3.66964755391899, "grad_norm": 2.1631338596343994, "learning_rate": 1.6666725629674968e-06, "loss": 0.7838, "step": 6976 }, { "epoch": 3.6701735928458707, "grad_norm": 2.452697992324829, "learning_rate": 1.6660176498573788e-06, "loss": 0.8371, "step": 6977 }, { "epoch": 3.6706996317727514, "grad_norm": 2.2527623176574707, "learning_rate": 1.665362801140258e-06, "loss": 0.8125, "step": 6978 }, { "epoch": 3.6712256706996316, "grad_norm": 2.51094651222229, "learning_rate": 1.6647080168666957e-06, "loss": 0.8002, "step": 6979 }, { "epoch": 3.671751709626512, "grad_norm": 2.2043919563293457, "learning_rate": 1.6640532970872492e-06, "loss": 0.8255, "step": 6980 }, { "epoch": 3.672277748553393, "grad_norm": 2.222822666168213, "learning_rate": 1.66339864185247e-06, "loss": 0.7824, "step": 6981 }, { "epoch": 3.6728037874802735, "grad_norm": 2.7331693172454834, "learning_rate": 1.662744051212906e-06, "loss": 0.8409, "step": 6982 }, { "epoch": 3.673329826407154, "grad_norm": 2.300334930419922, "learning_rate": 1.6620895252190988e-06, "loss": 0.8393, "step": 6983 }, { "epoch": 3.6738558653340347, "grad_norm": 2.334707498550415, "learning_rate": 1.6614350639215843e-06, "loss": 0.78, "step": 6984 }, { "epoch": 3.6743819042609154, "grad_norm": 2.544874668121338, "learning_rate": 1.6607806673708966e-06, "loss": 0.7952, "step": 6985 }, { "epoch": 3.674907943187796, "grad_norm": 2.213653802871704, "learning_rate": 1.6601263356175598e-06, "loss": 0.8418, "step": 6986 }, { "epoch": 3.6754339821146766, "grad_norm": 2.26179838180542, "learning_rate": 1.6594720687120975e-06, "loss": 0.7791, "step": 6987 }, { "epoch": 3.675960021041557, "grad_norm": 2.4369518756866455, "learning_rate": 1.6588178667050259e-06, "loss": 0.8534, "step": 6988 }, { "epoch": 3.676486059968438, "grad_norm": 2.4017703533172607, "learning_rate": 1.6581637296468584e-06, "loss": 0.8575, "step": 6989 }, { "epoch": 3.677012098895318, "grad_norm": 2.2348556518554688, "learning_rate": 1.6575096575880994e-06, "loss": 0.7929, "step": 6990 }, { "epoch": 3.6775381378221987, "grad_norm": 2.240523338317871, "learning_rate": 1.6568556505792526e-06, "loss": 0.7997, "step": 6991 }, { "epoch": 3.6780641767490794, "grad_norm": 2.3043570518493652, "learning_rate": 1.6562017086708158e-06, "loss": 0.866, "step": 6992 }, { "epoch": 3.67859021567596, "grad_norm": 2.241903305053711, "learning_rate": 1.6555478319132777e-06, "loss": 0.8557, "step": 6993 }, { "epoch": 3.6791162546028406, "grad_norm": 2.2289059162139893, "learning_rate": 1.654894020357127e-06, "loss": 0.7852, "step": 6994 }, { "epoch": 3.6796422935297213, "grad_norm": 2.307501792907715, "learning_rate": 1.654240274052845e-06, "loss": 0.7859, "step": 6995 }, { "epoch": 3.680168332456602, "grad_norm": 2.298226833343506, "learning_rate": 1.6535865930509095e-06, "loss": 0.8271, "step": 6996 }, { "epoch": 3.6806943713834825, "grad_norm": 2.388019561767578, "learning_rate": 1.6529329774017907e-06, "loss": 0.7912, "step": 6997 }, { "epoch": 3.681220410310363, "grad_norm": 2.299114465713501, "learning_rate": 1.6522794271559556e-06, "loss": 0.8392, "step": 6998 }, { "epoch": 3.6817464492372434, "grad_norm": 2.324215888977051, "learning_rate": 1.651625942363867e-06, "loss": 0.8233, "step": 6999 }, { "epoch": 3.682272488164124, "grad_norm": 2.2609665393829346, "learning_rate": 1.6509725230759796e-06, "loss": 0.8181, "step": 7000 }, { "epoch": 3.6827985270910046, "grad_norm": 2.303619384765625, "learning_rate": 1.6503191693427456e-06, "loss": 0.8177, "step": 7001 }, { "epoch": 3.6833245660178853, "grad_norm": 2.2272801399230957, "learning_rate": 1.6496658812146115e-06, "loss": 0.8559, "step": 7002 }, { "epoch": 3.683850604944766, "grad_norm": 2.09889817237854, "learning_rate": 1.6490126587420186e-06, "loss": 0.7893, "step": 7003 }, { "epoch": 3.6843766438716465, "grad_norm": 2.1842613220214844, "learning_rate": 1.6483595019754028e-06, "loss": 0.8305, "step": 7004 }, { "epoch": 3.684902682798527, "grad_norm": 2.3296988010406494, "learning_rate": 1.6477064109651958e-06, "loss": 0.7907, "step": 7005 }, { "epoch": 3.685428721725408, "grad_norm": 2.2922451496124268, "learning_rate": 1.6470533857618245e-06, "loss": 0.7987, "step": 7006 }, { "epoch": 3.6859547606522884, "grad_norm": 2.211989641189575, "learning_rate": 1.646400426415708e-06, "loss": 0.8655, "step": 7007 }, { "epoch": 3.6864807995791686, "grad_norm": 2.1685590744018555, "learning_rate": 1.6457475329772637e-06, "loss": 0.8228, "step": 7008 }, { "epoch": 3.6870068385060497, "grad_norm": 2.2172255516052246, "learning_rate": 1.6450947054969014e-06, "loss": 0.789, "step": 7009 }, { "epoch": 3.68753287743293, "grad_norm": 2.2706682682037354, "learning_rate": 1.6444419440250275e-06, "loss": 0.8026, "step": 7010 }, { "epoch": 3.6880589163598105, "grad_norm": 2.331688404083252, "learning_rate": 1.6437892486120425e-06, "loss": 0.7976, "step": 7011 }, { "epoch": 3.688584955286691, "grad_norm": 2.355132818222046, "learning_rate": 1.643136619308342e-06, "loss": 0.7404, "step": 7012 }, { "epoch": 3.689110994213572, "grad_norm": 2.2378556728363037, "learning_rate": 1.642484056164318e-06, "loss": 0.8031, "step": 7013 }, { "epoch": 3.6896370331404524, "grad_norm": 2.3837685585021973, "learning_rate": 1.641831559230353e-06, "loss": 0.8389, "step": 7014 }, { "epoch": 3.690163072067333, "grad_norm": 2.4918689727783203, "learning_rate": 1.641179128556829e-06, "loss": 0.8061, "step": 7015 }, { "epoch": 3.6906891109942137, "grad_norm": 2.182401657104492, "learning_rate": 1.6405267641941204e-06, "loss": 0.7912, "step": 7016 }, { "epoch": 3.691215149921094, "grad_norm": 2.405829906463623, "learning_rate": 1.6398744661925976e-06, "loss": 0.8111, "step": 7017 }, { "epoch": 3.691741188847975, "grad_norm": 2.3027360439300537, "learning_rate": 1.6392222346026255e-06, "loss": 0.7849, "step": 7018 }, { "epoch": 3.692267227774855, "grad_norm": 2.276221990585327, "learning_rate": 1.6385700694745644e-06, "loss": 0.7818, "step": 7019 }, { "epoch": 3.692793266701736, "grad_norm": 2.3135547637939453, "learning_rate": 1.6379179708587682e-06, "loss": 0.7968, "step": 7020 }, { "epoch": 3.6933193056286164, "grad_norm": 2.2383460998535156, "learning_rate": 1.6372659388055864e-06, "loss": 0.7999, "step": 7021 }, { "epoch": 3.693845344555497, "grad_norm": 2.3852732181549072, "learning_rate": 1.636613973365363e-06, "loss": 0.8221, "step": 7022 }, { "epoch": 3.6943713834823777, "grad_norm": 2.1759836673736572, "learning_rate": 1.635962074588438e-06, "loss": 0.7971, "step": 7023 }, { "epoch": 3.6948974224092583, "grad_norm": 2.2843868732452393, "learning_rate": 1.6353102425251447e-06, "loss": 0.8135, "step": 7024 }, { "epoch": 3.695423461336139, "grad_norm": 2.364798069000244, "learning_rate": 1.6346584772258125e-06, "loss": 0.8351, "step": 7025 }, { "epoch": 3.6959495002630196, "grad_norm": 2.382655620574951, "learning_rate": 1.634006778740766e-06, "loss": 0.8539, "step": 7026 }, { "epoch": 3.6964755391899002, "grad_norm": 2.4426157474517822, "learning_rate": 1.6333551471203218e-06, "loss": 0.8384, "step": 7027 }, { "epoch": 3.6970015781167804, "grad_norm": 2.341196298599243, "learning_rate": 1.6327035824147958e-06, "loss": 0.8187, "step": 7028 }, { "epoch": 3.6975276170436615, "grad_norm": 2.255521535873413, "learning_rate": 1.6320520846744937e-06, "loss": 0.8, "step": 7029 }, { "epoch": 3.6980536559705417, "grad_norm": 2.1881227493286133, "learning_rate": 1.63140065394972e-06, "loss": 0.8508, "step": 7030 }, { "epoch": 3.6985796948974223, "grad_norm": 2.6799798011779785, "learning_rate": 1.630749290290772e-06, "loss": 0.8654, "step": 7031 }, { "epoch": 3.699105733824303, "grad_norm": 2.2486703395843506, "learning_rate": 1.6300979937479435e-06, "loss": 0.7929, "step": 7032 }, { "epoch": 3.6996317727511836, "grad_norm": 2.2389700412750244, "learning_rate": 1.6294467643715212e-06, "loss": 0.8273, "step": 7033 }, { "epoch": 3.7001578116780642, "grad_norm": 2.2403247356414795, "learning_rate": 1.6287956022117874e-06, "loss": 0.8953, "step": 7034 }, { "epoch": 3.700683850604945, "grad_norm": 2.360795259475708, "learning_rate": 1.6281445073190203e-06, "loss": 0.816, "step": 7035 }, { "epoch": 3.7012098895318255, "grad_norm": 2.219724416732788, "learning_rate": 1.6274934797434904e-06, "loss": 0.8191, "step": 7036 }, { "epoch": 3.7017359284587057, "grad_norm": 2.2568459510803223, "learning_rate": 1.626842519535465e-06, "loss": 0.801, "step": 7037 }, { "epoch": 3.7022619673855868, "grad_norm": 2.0947561264038086, "learning_rate": 1.6261916267452066e-06, "loss": 0.7827, "step": 7038 }, { "epoch": 3.702788006312467, "grad_norm": 2.2189993858337402, "learning_rate": 1.6255408014229706e-06, "loss": 0.7519, "step": 7039 }, { "epoch": 3.7033140452393476, "grad_norm": 2.21203875541687, "learning_rate": 1.624890043619008e-06, "loss": 0.7831, "step": 7040 }, { "epoch": 3.7038400841662282, "grad_norm": 2.2139408588409424, "learning_rate": 1.6242393533835655e-06, "loss": 0.8092, "step": 7041 }, { "epoch": 3.704366123093109, "grad_norm": 2.5888473987579346, "learning_rate": 1.6235887307668846e-06, "loss": 0.8365, "step": 7042 }, { "epoch": 3.7048921620199895, "grad_norm": 2.6493701934814453, "learning_rate": 1.6229381758191989e-06, "loss": 0.8466, "step": 7043 }, { "epoch": 3.70541820094687, "grad_norm": 2.638813018798828, "learning_rate": 1.6222876885907396e-06, "loss": 0.8685, "step": 7044 }, { "epoch": 3.7059442398737508, "grad_norm": 2.4570608139038086, "learning_rate": 1.6216372691317319e-06, "loss": 0.8425, "step": 7045 }, { "epoch": 3.7064702788006314, "grad_norm": 2.109828233718872, "learning_rate": 1.6209869174923954e-06, "loss": 0.7701, "step": 7046 }, { "epoch": 3.706996317727512, "grad_norm": 2.2838053703308105, "learning_rate": 1.6203366337229447e-06, "loss": 0.8024, "step": 7047 }, { "epoch": 3.7075223566543922, "grad_norm": 2.360410451889038, "learning_rate": 1.619686417873589e-06, "loss": 0.7873, "step": 7048 }, { "epoch": 3.708048395581273, "grad_norm": 2.2853779792785645, "learning_rate": 1.6190362699945342e-06, "loss": 0.8522, "step": 7049 }, { "epoch": 3.7085744345081535, "grad_norm": 2.3960957527160645, "learning_rate": 1.6183861901359762e-06, "loss": 0.8381, "step": 7050 }, { "epoch": 3.709100473435034, "grad_norm": 2.373978853225708, "learning_rate": 1.617736178348111e-06, "loss": 0.8164, "step": 7051 }, { "epoch": 3.7096265123619148, "grad_norm": 2.341648578643799, "learning_rate": 1.617086234681125e-06, "loss": 0.7584, "step": 7052 }, { "epoch": 3.7101525512887954, "grad_norm": 2.317289113998413, "learning_rate": 1.6164363591852029e-06, "loss": 0.8184, "step": 7053 }, { "epoch": 3.710678590215676, "grad_norm": 2.50099515914917, "learning_rate": 1.6157865519105215e-06, "loss": 0.8146, "step": 7054 }, { "epoch": 3.7112046291425567, "grad_norm": 2.2481937408447266, "learning_rate": 1.6151368129072542e-06, "loss": 0.7997, "step": 7055 }, { "epoch": 3.7117306680694373, "grad_norm": 2.1284520626068115, "learning_rate": 1.6144871422255686e-06, "loss": 0.8141, "step": 7056 }, { "epoch": 3.7122567069963175, "grad_norm": 2.3022537231445312, "learning_rate": 1.6138375399156253e-06, "loss": 0.8189, "step": 7057 }, { "epoch": 3.7127827459231986, "grad_norm": 2.277066707611084, "learning_rate": 1.6131880060275823e-06, "loss": 0.7993, "step": 7058 }, { "epoch": 3.7133087848500788, "grad_norm": 2.455076217651367, "learning_rate": 1.61253854061159e-06, "loss": 0.8253, "step": 7059 }, { "epoch": 3.7138348237769594, "grad_norm": 2.1139585971832275, "learning_rate": 1.6118891437177953e-06, "loss": 0.7551, "step": 7060 }, { "epoch": 3.71436086270384, "grad_norm": 2.4700443744659424, "learning_rate": 1.6112398153963388e-06, "loss": 0.8024, "step": 7061 }, { "epoch": 3.7148869016307207, "grad_norm": 2.2418131828308105, "learning_rate": 1.610590555697357e-06, "loss": 0.812, "step": 7062 }, { "epoch": 3.7154129405576013, "grad_norm": 2.2013378143310547, "learning_rate": 1.6099413646709792e-06, "loss": 0.8216, "step": 7063 }, { "epoch": 3.715938979484482, "grad_norm": 2.362567901611328, "learning_rate": 1.6092922423673305e-06, "loss": 0.8095, "step": 7064 }, { "epoch": 3.7164650184113626, "grad_norm": 2.2237424850463867, "learning_rate": 1.60864318883653e-06, "loss": 0.8251, "step": 7065 }, { "epoch": 3.7169910573382428, "grad_norm": 2.4005613327026367, "learning_rate": 1.6079942041286934e-06, "loss": 0.7873, "step": 7066 }, { "epoch": 3.717517096265124, "grad_norm": 2.205462694168091, "learning_rate": 1.6073452882939287e-06, "loss": 0.8655, "step": 7067 }, { "epoch": 3.718043135192004, "grad_norm": 2.363034963607788, "learning_rate": 1.6066964413823404e-06, "loss": 0.8915, "step": 7068 }, { "epoch": 3.7185691741188847, "grad_norm": 2.386077404022217, "learning_rate": 1.6060476634440265e-06, "loss": 0.7704, "step": 7069 }, { "epoch": 3.7190952130457653, "grad_norm": 2.4508540630340576, "learning_rate": 1.6053989545290806e-06, "loss": 0.852, "step": 7070 }, { "epoch": 3.719621251972646, "grad_norm": 2.354124069213867, "learning_rate": 1.6047503146875894e-06, "loss": 0.8489, "step": 7071 }, { "epoch": 3.7201472908995266, "grad_norm": 2.3748836517333984, "learning_rate": 1.604101743969636e-06, "loss": 0.8061, "step": 7072 }, { "epoch": 3.720673329826407, "grad_norm": 2.3134424686431885, "learning_rate": 1.6034532424252973e-06, "loss": 0.8052, "step": 7073 }, { "epoch": 3.721199368753288, "grad_norm": 2.973984718322754, "learning_rate": 1.6028048101046456e-06, "loss": 0.8288, "step": 7074 }, { "epoch": 3.7217254076801685, "grad_norm": 2.351097822189331, "learning_rate": 1.6021564470577473e-06, "loss": 0.8173, "step": 7075 }, { "epoch": 3.722251446607049, "grad_norm": 2.244666337966919, "learning_rate": 1.6015081533346622e-06, "loss": 0.8347, "step": 7076 }, { "epoch": 3.7227774855339293, "grad_norm": 2.232339382171631, "learning_rate": 1.6008599289854471e-06, "loss": 0.825, "step": 7077 }, { "epoch": 3.7233035244608104, "grad_norm": 2.415418863296509, "learning_rate": 1.600211774060153e-06, "loss": 0.8088, "step": 7078 }, { "epoch": 3.7238295633876906, "grad_norm": 2.5032260417938232, "learning_rate": 1.5995636886088235e-06, "loss": 0.8358, "step": 7079 }, { "epoch": 3.724355602314571, "grad_norm": 2.276979923248291, "learning_rate": 1.598915672681498e-06, "loss": 0.7874, "step": 7080 }, { "epoch": 3.724881641241452, "grad_norm": 2.178647994995117, "learning_rate": 1.5982677263282126e-06, "loss": 0.77, "step": 7081 }, { "epoch": 3.7254076801683325, "grad_norm": 2.2081170082092285, "learning_rate": 1.5976198495989942e-06, "loss": 0.7993, "step": 7082 }, { "epoch": 3.725933719095213, "grad_norm": 2.3214967250823975, "learning_rate": 1.5969720425438675e-06, "loss": 0.8302, "step": 7083 }, { "epoch": 3.7264597580220937, "grad_norm": 2.4113118648529053, "learning_rate": 1.59632430521285e-06, "loss": 0.8725, "step": 7084 }, { "epoch": 3.7269857969489744, "grad_norm": 2.2074246406555176, "learning_rate": 1.5956766376559562e-06, "loss": 0.7678, "step": 7085 }, { "epoch": 3.7275118358758546, "grad_norm": 2.5783793926239014, "learning_rate": 1.5950290399231911e-06, "loss": 0.8593, "step": 7086 }, { "epoch": 3.7280378748027356, "grad_norm": 2.582400321960449, "learning_rate": 1.5943815120645572e-06, "loss": 0.8472, "step": 7087 }, { "epoch": 3.728563913729616, "grad_norm": 2.3328380584716797, "learning_rate": 1.5937340541300521e-06, "loss": 0.8104, "step": 7088 }, { "epoch": 3.7290899526564965, "grad_norm": 2.3772644996643066, "learning_rate": 1.593086666169666e-06, "loss": 0.809, "step": 7089 }, { "epoch": 3.729615991583377, "grad_norm": 2.2832882404327393, "learning_rate": 1.592439348233385e-06, "loss": 0.8376, "step": 7090 }, { "epoch": 3.7301420305102577, "grad_norm": 2.9178905487060547, "learning_rate": 1.5917921003711894e-06, "loss": 0.7928, "step": 7091 }, { "epoch": 3.7306680694371384, "grad_norm": 2.4030609130859375, "learning_rate": 1.5911449226330555e-06, "loss": 0.8398, "step": 7092 }, { "epoch": 3.731194108364019, "grad_norm": 2.3904635906219482, "learning_rate": 1.5904978150689505e-06, "loss": 0.8529, "step": 7093 }, { "epoch": 3.7317201472908996, "grad_norm": 2.075915575027466, "learning_rate": 1.5898507777288397e-06, "loss": 0.8121, "step": 7094 }, { "epoch": 3.7322461862177803, "grad_norm": 2.5465011596679688, "learning_rate": 1.5892038106626818e-06, "loss": 0.8085, "step": 7095 }, { "epoch": 3.732772225144661, "grad_norm": 2.288278818130493, "learning_rate": 1.5885569139204294e-06, "loss": 0.8155, "step": 7096 }, { "epoch": 3.733298264071541, "grad_norm": 2.2560222148895264, "learning_rate": 1.5879100875520312e-06, "loss": 0.7786, "step": 7097 }, { "epoch": 3.7338243029984217, "grad_norm": 2.4792826175689697, "learning_rate": 1.5872633316074299e-06, "loss": 0.8095, "step": 7098 }, { "epoch": 3.7343503419253024, "grad_norm": 2.264688014984131, "learning_rate": 1.5866166461365618e-06, "loss": 0.8052, "step": 7099 }, { "epoch": 3.734876380852183, "grad_norm": 2.293508529663086, "learning_rate": 1.5859700311893586e-06, "loss": 0.8217, "step": 7100 }, { "epoch": 3.7354024197790636, "grad_norm": 2.168264389038086, "learning_rate": 1.5853234868157458e-06, "loss": 0.7925, "step": 7101 }, { "epoch": 3.7359284587059443, "grad_norm": 2.2398223876953125, "learning_rate": 1.5846770130656446e-06, "loss": 0.8191, "step": 7102 }, { "epoch": 3.736454497632825, "grad_norm": 2.453096389770508, "learning_rate": 1.5840306099889702e-06, "loss": 0.8631, "step": 7103 }, { "epoch": 3.7369805365597055, "grad_norm": 2.2849557399749756, "learning_rate": 1.5833842776356324e-06, "loss": 0.8291, "step": 7104 }, { "epoch": 3.737506575486586, "grad_norm": 2.2851014137268066, "learning_rate": 1.5827380160555357e-06, "loss": 0.818, "step": 7105 }, { "epoch": 3.7380326144134663, "grad_norm": 2.3713529109954834, "learning_rate": 1.5820918252985782e-06, "loss": 0.8037, "step": 7106 }, { "epoch": 3.7385586533403474, "grad_norm": 2.4787821769714355, "learning_rate": 1.5814457054146537e-06, "loss": 0.8217, "step": 7107 }, { "epoch": 3.7390846922672276, "grad_norm": 2.297499895095825, "learning_rate": 1.5807996564536499e-06, "loss": 0.794, "step": 7108 }, { "epoch": 3.7396107311941083, "grad_norm": 2.4758541584014893, "learning_rate": 1.5801536784654487e-06, "loss": 0.8134, "step": 7109 }, { "epoch": 3.740136770120989, "grad_norm": 2.2583096027374268, "learning_rate": 1.579507771499928e-06, "loss": 0.8345, "step": 7110 }, { "epoch": 3.7406628090478695, "grad_norm": 2.2677412033081055, "learning_rate": 1.5788619356069587e-06, "loss": 0.7818, "step": 7111 }, { "epoch": 3.74118884797475, "grad_norm": 2.2205939292907715, "learning_rate": 1.5782161708364067e-06, "loss": 0.8033, "step": 7112 }, { "epoch": 3.741714886901631, "grad_norm": 2.392273187637329, "learning_rate": 1.5775704772381335e-06, "loss": 0.8231, "step": 7113 }, { "epoch": 3.7422409258285114, "grad_norm": 2.222046375274658, "learning_rate": 1.576924854861992e-06, "loss": 0.792, "step": 7114 }, { "epoch": 3.742766964755392, "grad_norm": 2.21530818939209, "learning_rate": 1.5762793037578326e-06, "loss": 0.8047, "step": 7115 }, { "epoch": 3.7432930036822727, "grad_norm": 2.3680975437164307, "learning_rate": 1.5756338239754992e-06, "loss": 0.7869, "step": 7116 }, { "epoch": 3.743819042609153, "grad_norm": 2.3526418209075928, "learning_rate": 1.574988415564831e-06, "loss": 0.8059, "step": 7117 }, { "epoch": 3.7443450815360335, "grad_norm": 2.4551892280578613, "learning_rate": 1.5743430785756602e-06, "loss": 0.817, "step": 7118 }, { "epoch": 3.744871120462914, "grad_norm": 2.31890606880188, "learning_rate": 1.5736978130578143e-06, "loss": 0.7459, "step": 7119 }, { "epoch": 3.745397159389795, "grad_norm": 2.19866943359375, "learning_rate": 1.5730526190611162e-06, "loss": 0.8175, "step": 7120 }, { "epoch": 3.7459231983166754, "grad_norm": 2.3596107959747314, "learning_rate": 1.57240749663538e-06, "loss": 0.8116, "step": 7121 }, { "epoch": 3.746449237243556, "grad_norm": 2.2772953510284424, "learning_rate": 1.5717624458304181e-06, "loss": 0.8395, "step": 7122 }, { "epoch": 3.7469752761704367, "grad_norm": 2.1661384105682373, "learning_rate": 1.5711174666960355e-06, "loss": 0.8219, "step": 7123 }, { "epoch": 3.7475013150973173, "grad_norm": 2.132195234298706, "learning_rate": 1.5704725592820325e-06, "loss": 0.7987, "step": 7124 }, { "epoch": 3.748027354024198, "grad_norm": 2.3278214931488037, "learning_rate": 1.5698277236382026e-06, "loss": 0.8146, "step": 7125 }, { "epoch": 3.748553392951078, "grad_norm": 2.1417176723480225, "learning_rate": 1.5691829598143346e-06, "loss": 0.76, "step": 7126 }, { "epoch": 3.7490794318779592, "grad_norm": 2.480254650115967, "learning_rate": 1.5685382678602129e-06, "loss": 0.8278, "step": 7127 }, { "epoch": 3.7496054708048394, "grad_norm": 2.328141927719116, "learning_rate": 1.5678936478256132e-06, "loss": 0.802, "step": 7128 }, { "epoch": 3.75013150973172, "grad_norm": 2.0571000576019287, "learning_rate": 1.5672490997603085e-06, "loss": 0.7567, "step": 7129 }, { "epoch": 3.7506575486586007, "grad_norm": 2.2836623191833496, "learning_rate": 1.5666046237140654e-06, "loss": 0.8576, "step": 7130 }, { "epoch": 3.7511835875854813, "grad_norm": 2.389979124069214, "learning_rate": 1.5659602197366441e-06, "loss": 0.841, "step": 7131 }, { "epoch": 3.751709626512362, "grad_norm": 2.3109076023101807, "learning_rate": 1.565315887877801e-06, "loss": 0.8096, "step": 7132 }, { "epoch": 3.7522356654392426, "grad_norm": 2.2509803771972656, "learning_rate": 1.5646716281872854e-06, "loss": 0.8182, "step": 7133 }, { "epoch": 3.752761704366123, "grad_norm": 2.2437424659729004, "learning_rate": 1.5640274407148412e-06, "loss": 0.8042, "step": 7134 }, { "epoch": 3.7532877432930034, "grad_norm": 2.3656864166259766, "learning_rate": 1.5633833255102087e-06, "loss": 0.8461, "step": 7135 }, { "epoch": 3.7538137822198845, "grad_norm": 2.258357286453247, "learning_rate": 1.5627392826231186e-06, "loss": 0.8276, "step": 7136 }, { "epoch": 3.7543398211467647, "grad_norm": 2.459717273712158, "learning_rate": 1.5620953121033e-06, "loss": 0.8172, "step": 7137 }, { "epoch": 3.7548658600736453, "grad_norm": 2.2758052349090576, "learning_rate": 1.5614514140004739e-06, "loss": 0.7635, "step": 7138 }, { "epoch": 3.755391899000526, "grad_norm": 2.334693193435669, "learning_rate": 1.5608075883643569e-06, "loss": 0.7964, "step": 7139 }, { "epoch": 3.7559179379274066, "grad_norm": 2.2873952388763428, "learning_rate": 1.5601638352446599e-06, "loss": 0.8435, "step": 7140 }, { "epoch": 3.756443976854287, "grad_norm": 2.275642156600952, "learning_rate": 1.5595201546910886e-06, "loss": 0.7626, "step": 7141 }, { "epoch": 3.756970015781168, "grad_norm": 2.2168779373168945, "learning_rate": 1.5588765467533418e-06, "loss": 0.7719, "step": 7142 }, { "epoch": 3.7574960547080485, "grad_norm": 2.340444326400757, "learning_rate": 1.5582330114811134e-06, "loss": 0.7938, "step": 7143 }, { "epoch": 3.758022093634929, "grad_norm": 2.2221713066101074, "learning_rate": 1.557589548924091e-06, "loss": 0.8237, "step": 7144 }, { "epoch": 3.7585481325618098, "grad_norm": 2.3145833015441895, "learning_rate": 1.5569461591319585e-06, "loss": 0.8043, "step": 7145 }, { "epoch": 3.75907417148869, "grad_norm": 2.229503631591797, "learning_rate": 1.5563028421543925e-06, "loss": 0.8241, "step": 7146 }, { "epoch": 3.759600210415571, "grad_norm": 2.2971677780151367, "learning_rate": 1.5556595980410643e-06, "loss": 0.7768, "step": 7147 }, { "epoch": 3.760126249342451, "grad_norm": 2.3100712299346924, "learning_rate": 1.5550164268416408e-06, "loss": 0.8393, "step": 7148 }, { "epoch": 3.760652288269332, "grad_norm": 2.4342761039733887, "learning_rate": 1.5543733286057811e-06, "loss": 0.8222, "step": 7149 }, { "epoch": 3.7611783271962125, "grad_norm": 2.1671619415283203, "learning_rate": 1.5537303033831396e-06, "loss": 0.8078, "step": 7150 }, { "epoch": 3.761704366123093, "grad_norm": 2.4443376064300537, "learning_rate": 1.5530873512233657e-06, "loss": 0.8559, "step": 7151 }, { "epoch": 3.7622304050499737, "grad_norm": 2.367776870727539, "learning_rate": 1.5524444721761023e-06, "loss": 0.7985, "step": 7152 }, { "epoch": 3.7627564439768544, "grad_norm": 2.286033868789673, "learning_rate": 1.5518016662909872e-06, "loss": 0.8338, "step": 7153 }, { "epoch": 3.763282482903735, "grad_norm": 2.3241701126098633, "learning_rate": 1.5511589336176527e-06, "loss": 0.8266, "step": 7154 }, { "epoch": 3.763808521830615, "grad_norm": 2.3212971687316895, "learning_rate": 1.5505162742057248e-06, "loss": 0.86, "step": 7155 }, { "epoch": 3.7643345607574963, "grad_norm": 2.3676939010620117, "learning_rate": 1.5498736881048252e-06, "loss": 0.8132, "step": 7156 }, { "epoch": 3.7648605996843765, "grad_norm": 2.382857084274292, "learning_rate": 1.5492311753645666e-06, "loss": 0.7679, "step": 7157 }, { "epoch": 3.765386638611257, "grad_norm": 2.342841625213623, "learning_rate": 1.5485887360345603e-06, "loss": 0.8123, "step": 7158 }, { "epoch": 3.7659126775381377, "grad_norm": 2.446645736694336, "learning_rate": 1.547946370164409e-06, "loss": 0.8491, "step": 7159 }, { "epoch": 3.7664387164650184, "grad_norm": 2.2193849086761475, "learning_rate": 1.547304077803712e-06, "loss": 0.7912, "step": 7160 }, { "epoch": 3.766964755391899, "grad_norm": 2.401888608932495, "learning_rate": 1.5466618590020595e-06, "loss": 0.7816, "step": 7161 }, { "epoch": 3.7674907943187796, "grad_norm": 2.4825117588043213, "learning_rate": 1.5460197138090399e-06, "loss": 0.7384, "step": 7162 }, { "epoch": 3.7680168332456603, "grad_norm": 2.0633177757263184, "learning_rate": 1.5453776422742348e-06, "loss": 0.779, "step": 7163 }, { "epoch": 3.768542872172541, "grad_norm": 2.2376105785369873, "learning_rate": 1.5447356444472172e-06, "loss": 0.8047, "step": 7164 }, { "epoch": 3.7690689110994215, "grad_norm": 2.2802419662475586, "learning_rate": 1.544093720377558e-06, "loss": 0.8482, "step": 7165 }, { "epoch": 3.7695949500263017, "grad_norm": 2.0599265098571777, "learning_rate": 1.5434518701148204e-06, "loss": 0.7904, "step": 7166 }, { "epoch": 3.7701209889531824, "grad_norm": 2.110909938812256, "learning_rate": 1.5428100937085644e-06, "loss": 0.8294, "step": 7167 }, { "epoch": 3.770647027880063, "grad_norm": 2.2988204956054688, "learning_rate": 1.54216839120834e-06, "loss": 0.776, "step": 7168 }, { "epoch": 3.7711730668069436, "grad_norm": 2.39579701423645, "learning_rate": 1.541526762663696e-06, "loss": 0.7846, "step": 7169 }, { "epoch": 3.7716991057338243, "grad_norm": 2.2142772674560547, "learning_rate": 1.5408852081241732e-06, "loss": 0.7982, "step": 7170 }, { "epoch": 3.772225144660705, "grad_norm": 2.428553342819214, "learning_rate": 1.5402437276393053e-06, "loss": 0.8393, "step": 7171 }, { "epoch": 3.7727511835875855, "grad_norm": 2.388845920562744, "learning_rate": 1.5396023212586234e-06, "loss": 0.7948, "step": 7172 }, { "epoch": 3.773277222514466, "grad_norm": 2.2917144298553467, "learning_rate": 1.5389609890316516e-06, "loss": 0.8076, "step": 7173 }, { "epoch": 3.773803261441347, "grad_norm": 2.3557591438293457, "learning_rate": 1.5383197310079071e-06, "loss": 0.9193, "step": 7174 }, { "epoch": 3.774329300368227, "grad_norm": 2.3871994018554688, "learning_rate": 1.537678547236903e-06, "loss": 0.8599, "step": 7175 }, { "epoch": 3.774855339295108, "grad_norm": 2.220977783203125, "learning_rate": 1.5370374377681458e-06, "loss": 0.8162, "step": 7176 }, { "epoch": 3.7753813782219883, "grad_norm": 2.2715020179748535, "learning_rate": 1.5363964026511376e-06, "loss": 0.8551, "step": 7177 }, { "epoch": 3.775907417148869, "grad_norm": 2.118222951889038, "learning_rate": 1.5357554419353721e-06, "loss": 0.7922, "step": 7178 }, { "epoch": 3.7764334560757495, "grad_norm": 2.2631428241729736, "learning_rate": 1.535114555670339e-06, "loss": 0.8192, "step": 7179 }, { "epoch": 3.77695949500263, "grad_norm": 2.4039599895477295, "learning_rate": 1.534473743905523e-06, "loss": 0.8421, "step": 7180 }, { "epoch": 3.777485533929511, "grad_norm": 2.1779980659484863, "learning_rate": 1.5338330066904012e-06, "loss": 0.7854, "step": 7181 }, { "epoch": 3.7780115728563914, "grad_norm": 2.3674659729003906, "learning_rate": 1.5331923440744461e-06, "loss": 0.8841, "step": 7182 }, { "epoch": 3.778537611783272, "grad_norm": 2.3071441650390625, "learning_rate": 1.5325517561071245e-06, "loss": 0.8316, "step": 7183 }, { "epoch": 3.7790636507101527, "grad_norm": 2.3602943420410156, "learning_rate": 1.5319112428378977e-06, "loss": 0.8014, "step": 7184 }, { "epoch": 3.7795896896370333, "grad_norm": 2.4149651527404785, "learning_rate": 1.5312708043162197e-06, "loss": 0.8237, "step": 7185 }, { "epoch": 3.7801157285639135, "grad_norm": 2.221574544906616, "learning_rate": 1.5306304405915402e-06, "loss": 0.8156, "step": 7186 }, { "epoch": 3.780641767490794, "grad_norm": 2.2651078701019287, "learning_rate": 1.5299901517133014e-06, "loss": 0.8617, "step": 7187 }, { "epoch": 3.781167806417675, "grad_norm": 2.668149471282959, "learning_rate": 1.5293499377309423e-06, "loss": 0.8034, "step": 7188 }, { "epoch": 3.7816938453445554, "grad_norm": 2.3504722118377686, "learning_rate": 1.5287097986938943e-06, "loss": 0.8405, "step": 7189 }, { "epoch": 3.782219884271436, "grad_norm": 2.2106876373291016, "learning_rate": 1.5280697346515838e-06, "loss": 0.7568, "step": 7190 }, { "epoch": 3.7827459231983167, "grad_norm": 2.228835105895996, "learning_rate": 1.5274297456534304e-06, "loss": 0.8051, "step": 7191 }, { "epoch": 3.7832719621251973, "grad_norm": 2.3863766193389893, "learning_rate": 1.52678983174885e-06, "loss": 0.7801, "step": 7192 }, { "epoch": 3.783798001052078, "grad_norm": 2.248229742050171, "learning_rate": 1.5261499929872495e-06, "loss": 0.83, "step": 7193 }, { "epoch": 3.7843240399789586, "grad_norm": 2.3153254985809326, "learning_rate": 1.5255102294180323e-06, "loss": 0.8242, "step": 7194 }, { "epoch": 3.784850078905839, "grad_norm": 2.307124614715576, "learning_rate": 1.5248705410905959e-06, "loss": 0.8097, "step": 7195 }, { "epoch": 3.78537611783272, "grad_norm": 2.2530810832977295, "learning_rate": 1.5242309280543316e-06, "loss": 0.7836, "step": 7196 }, { "epoch": 3.7859021567596, "grad_norm": 2.4607431888580322, "learning_rate": 1.5235913903586249e-06, "loss": 0.759, "step": 7197 }, { "epoch": 3.7864281956864807, "grad_norm": 2.273750066757202, "learning_rate": 1.5229519280528548e-06, "loss": 0.7942, "step": 7198 }, { "epoch": 3.7869542346133613, "grad_norm": 2.3756189346313477, "learning_rate": 1.5223125411863967e-06, "loss": 0.8522, "step": 7199 }, { "epoch": 3.787480273540242, "grad_norm": 2.4541141986846924, "learning_rate": 1.5216732298086161e-06, "loss": 0.8673, "step": 7200 }, { "epoch": 3.7880063124671226, "grad_norm": 2.3283493518829346, "learning_rate": 1.521033993968877e-06, "loss": 0.8435, "step": 7201 }, { "epoch": 3.7885323513940032, "grad_norm": 3.8421003818511963, "learning_rate": 1.5203948337165351e-06, "loss": 0.7925, "step": 7202 }, { "epoch": 3.789058390320884, "grad_norm": 2.2144675254821777, "learning_rate": 1.5197557491009413e-06, "loss": 0.8573, "step": 7203 }, { "epoch": 3.789584429247764, "grad_norm": 2.4043216705322266, "learning_rate": 1.5191167401714396e-06, "loss": 0.7824, "step": 7204 }, { "epoch": 3.790110468174645, "grad_norm": 2.2770462036132812, "learning_rate": 1.5184778069773692e-06, "loss": 0.8033, "step": 7205 }, { "epoch": 3.7906365071015253, "grad_norm": 2.178597927093506, "learning_rate": 1.5178389495680646e-06, "loss": 0.8092, "step": 7206 }, { "epoch": 3.791162546028406, "grad_norm": 2.2249081134796143, "learning_rate": 1.5172001679928496e-06, "loss": 0.7973, "step": 7207 }, { "epoch": 3.7916885849552866, "grad_norm": 2.269867420196533, "learning_rate": 1.5165614623010478e-06, "loss": 0.8335, "step": 7208 }, { "epoch": 3.7922146238821672, "grad_norm": 2.255053758621216, "learning_rate": 1.515922832541974e-06, "loss": 0.801, "step": 7209 }, { "epoch": 3.792740662809048, "grad_norm": 2.3307230472564697, "learning_rate": 1.515284278764938e-06, "loss": 0.8449, "step": 7210 }, { "epoch": 3.7932667017359285, "grad_norm": 2.332603693008423, "learning_rate": 1.514645801019243e-06, "loss": 0.8256, "step": 7211 }, { "epoch": 3.793792740662809, "grad_norm": 2.269724130630493, "learning_rate": 1.5140073993541871e-06, "loss": 0.7645, "step": 7212 }, { "epoch": 3.7943187795896898, "grad_norm": 2.213453531265259, "learning_rate": 1.5133690738190632e-06, "loss": 0.7902, "step": 7213 }, { "epoch": 3.7948448185165704, "grad_norm": 2.1715221405029297, "learning_rate": 1.512730824463155e-06, "loss": 0.8047, "step": 7214 }, { "epoch": 3.7953708574434506, "grad_norm": 2.2491455078125, "learning_rate": 1.5120926513357442e-06, "loss": 0.8238, "step": 7215 }, { "epoch": 3.7958968963703317, "grad_norm": 2.2513978481292725, "learning_rate": 1.5114545544861056e-06, "loss": 0.8356, "step": 7216 }, { "epoch": 3.796422935297212, "grad_norm": 2.6017208099365234, "learning_rate": 1.510816533963506e-06, "loss": 0.8039, "step": 7217 }, { "epoch": 3.7969489742240925, "grad_norm": 2.4665415287017822, "learning_rate": 1.5101785898172095e-06, "loss": 0.8655, "step": 7218 }, { "epoch": 3.797475013150973, "grad_norm": 2.258209705352783, "learning_rate": 1.5095407220964714e-06, "loss": 0.8204, "step": 7219 }, { "epoch": 3.7980010520778538, "grad_norm": 2.124349355697632, "learning_rate": 1.5089029308505438e-06, "loss": 0.8071, "step": 7220 }, { "epoch": 3.7985270910047344, "grad_norm": 2.1860556602478027, "learning_rate": 1.50826521612867e-06, "loss": 0.8243, "step": 7221 }, { "epoch": 3.799053129931615, "grad_norm": 2.353228807449341, "learning_rate": 1.5076275779800903e-06, "loss": 0.8479, "step": 7222 }, { "epoch": 3.7995791688584957, "grad_norm": 2.224217414855957, "learning_rate": 1.5069900164540358e-06, "loss": 0.8068, "step": 7223 }, { "epoch": 3.800105207785376, "grad_norm": 2.2397003173828125, "learning_rate": 1.506352531599735e-06, "loss": 0.7816, "step": 7224 }, { "epoch": 3.800631246712257, "grad_norm": 2.2951974868774414, "learning_rate": 1.5057151234664091e-06, "loss": 0.8558, "step": 7225 }, { "epoch": 3.801157285639137, "grad_norm": 2.4536476135253906, "learning_rate": 1.5050777921032725e-06, "loss": 0.8435, "step": 7226 }, { "epoch": 3.8016833245660178, "grad_norm": 2.2881951332092285, "learning_rate": 1.5044405375595361e-06, "loss": 0.8396, "step": 7227 }, { "epoch": 3.8022093634928984, "grad_norm": 2.4043400287628174, "learning_rate": 1.503803359884401e-06, "loss": 0.8396, "step": 7228 }, { "epoch": 3.802735402419779, "grad_norm": 2.3008878231048584, "learning_rate": 1.5031662591270663e-06, "loss": 0.7532, "step": 7229 }, { "epoch": 3.8032614413466597, "grad_norm": 2.2630724906921387, "learning_rate": 1.5025292353367224e-06, "loss": 0.8106, "step": 7230 }, { "epoch": 3.8037874802735403, "grad_norm": 2.181114435195923, "learning_rate": 1.5018922885625554e-06, "loss": 0.8617, "step": 7231 }, { "epoch": 3.804313519200421, "grad_norm": 2.29514217376709, "learning_rate": 1.5012554188537446e-06, "loss": 0.827, "step": 7232 }, { "epoch": 3.8048395581273016, "grad_norm": 2.088675022125244, "learning_rate": 1.5006186262594643e-06, "loss": 0.7725, "step": 7233 }, { "epoch": 3.805365597054182, "grad_norm": 2.334902763366699, "learning_rate": 1.4999819108288815e-06, "loss": 0.8214, "step": 7234 }, { "epoch": 3.8058916359810624, "grad_norm": 2.3876166343688965, "learning_rate": 1.499345272611159e-06, "loss": 0.8289, "step": 7235 }, { "epoch": 3.806417674907943, "grad_norm": 2.1705398559570312, "learning_rate": 1.4987087116554511e-06, "loss": 0.7713, "step": 7236 }, { "epoch": 3.8069437138348237, "grad_norm": 2.301060199737549, "learning_rate": 1.4980722280109078e-06, "loss": 0.7863, "step": 7237 }, { "epoch": 3.8074697527617043, "grad_norm": 2.2468984127044678, "learning_rate": 1.4974358217266739e-06, "loss": 0.8062, "step": 7238 }, { "epoch": 3.807995791688585, "grad_norm": 2.204146146774292, "learning_rate": 1.496799492851886e-06, "loss": 0.8071, "step": 7239 }, { "epoch": 3.8085218306154656, "grad_norm": 2.4140751361846924, "learning_rate": 1.496163241435678e-06, "loss": 0.792, "step": 7240 }, { "epoch": 3.809047869542346, "grad_norm": 2.4941487312316895, "learning_rate": 1.4955270675271736e-06, "loss": 0.7509, "step": 7241 }, { "epoch": 3.809573908469227, "grad_norm": 2.376397132873535, "learning_rate": 1.4948909711754947e-06, "loss": 0.8672, "step": 7242 }, { "epoch": 3.8100999473961075, "grad_norm": 2.2718944549560547, "learning_rate": 1.4942549524297533e-06, "loss": 0.828, "step": 7243 }, { "epoch": 3.8106259863229877, "grad_norm": 2.3334269523620605, "learning_rate": 1.493619011339058e-06, "loss": 0.8283, "step": 7244 }, { "epoch": 3.8111520252498687, "grad_norm": 2.3541111946105957, "learning_rate": 1.492983147952511e-06, "loss": 0.8684, "step": 7245 }, { "epoch": 3.811678064176749, "grad_norm": 2.2605104446411133, "learning_rate": 1.492347362319209e-06, "loss": 0.8349, "step": 7246 }, { "epoch": 3.8122041031036296, "grad_norm": 2.117382764816284, "learning_rate": 1.4917116544882404e-06, "loss": 0.8125, "step": 7247 }, { "epoch": 3.81273014203051, "grad_norm": 2.38047456741333, "learning_rate": 1.4910760245086897e-06, "loss": 0.8009, "step": 7248 }, { "epoch": 3.813256180957391, "grad_norm": 2.4268484115600586, "learning_rate": 1.490440472429636e-06, "loss": 0.7551, "step": 7249 }, { "epoch": 3.8137822198842715, "grad_norm": 2.3315367698669434, "learning_rate": 1.4898049983001498e-06, "loss": 0.8181, "step": 7250 }, { "epoch": 3.814308258811152, "grad_norm": 2.448945999145508, "learning_rate": 1.489169602169297e-06, "loss": 0.7838, "step": 7251 }, { "epoch": 3.8148342977380327, "grad_norm": 2.117349147796631, "learning_rate": 1.4885342840861383e-06, "loss": 0.764, "step": 7252 }, { "epoch": 3.8153603366649134, "grad_norm": 2.263031482696533, "learning_rate": 1.4878990440997266e-06, "loss": 0.8549, "step": 7253 }, { "epoch": 3.815886375591794, "grad_norm": 2.245312213897705, "learning_rate": 1.4872638822591101e-06, "loss": 0.7988, "step": 7254 }, { "epoch": 3.816412414518674, "grad_norm": 2.385014295578003, "learning_rate": 1.486628798613331e-06, "loss": 0.7911, "step": 7255 }, { "epoch": 3.816938453445555, "grad_norm": 2.420539379119873, "learning_rate": 1.4859937932114254e-06, "loss": 0.8487, "step": 7256 }, { "epoch": 3.8174644923724355, "grad_norm": 2.3943228721618652, "learning_rate": 1.4853588661024212e-06, "loss": 0.7839, "step": 7257 }, { "epoch": 3.817990531299316, "grad_norm": 2.230426549911499, "learning_rate": 1.4847240173353432e-06, "loss": 0.8301, "step": 7258 }, { "epoch": 3.8185165702261967, "grad_norm": 2.4389641284942627, "learning_rate": 1.48408924695921e-06, "loss": 0.7993, "step": 7259 }, { "epoch": 3.8190426091530774, "grad_norm": 2.6065304279327393, "learning_rate": 1.4834545550230311e-06, "loss": 0.7746, "step": 7260 }, { "epoch": 3.819568648079958, "grad_norm": 2.3416812419891357, "learning_rate": 1.4828199415758132e-06, "loss": 0.8339, "step": 7261 }, { "epoch": 3.8200946870068386, "grad_norm": 2.246164083480835, "learning_rate": 1.4821854066665554e-06, "loss": 0.7913, "step": 7262 }, { "epoch": 3.8206207259337193, "grad_norm": 2.1578316688537598, "learning_rate": 1.481550950344252e-06, "loss": 0.7717, "step": 7263 }, { "epoch": 3.8211467648605995, "grad_norm": 2.1790950298309326, "learning_rate": 1.4809165726578889e-06, "loss": 0.787, "step": 7264 }, { "epoch": 3.8216728037874805, "grad_norm": 2.2764670848846436, "learning_rate": 1.4802822736564482e-06, "loss": 0.8318, "step": 7265 }, { "epoch": 3.8221988427143607, "grad_norm": 2.3710319995880127, "learning_rate": 1.4796480533889043e-06, "loss": 0.826, "step": 7266 }, { "epoch": 3.8227248816412414, "grad_norm": 2.233842611312866, "learning_rate": 1.4790139119042268e-06, "loss": 0.7819, "step": 7267 }, { "epoch": 3.823250920568122, "grad_norm": 2.193054437637329, "learning_rate": 1.4783798492513788e-06, "loss": 0.766, "step": 7268 }, { "epoch": 3.8237769594950026, "grad_norm": 2.407064199447632, "learning_rate": 1.4777458654793176e-06, "loss": 0.8366, "step": 7269 }, { "epoch": 3.8243029984218833, "grad_norm": 2.4244778156280518, "learning_rate": 1.4771119606369933e-06, "loss": 0.8942, "step": 7270 }, { "epoch": 3.824829037348764, "grad_norm": 2.158571481704712, "learning_rate": 1.4764781347733502e-06, "loss": 0.7793, "step": 7271 }, { "epoch": 3.8253550762756445, "grad_norm": 2.342299699783325, "learning_rate": 1.475844387937328e-06, "loss": 0.8165, "step": 7272 }, { "epoch": 3.8258811152025247, "grad_norm": 2.3740506172180176, "learning_rate": 1.475210720177858e-06, "loss": 0.8147, "step": 7273 }, { "epoch": 3.826407154129406, "grad_norm": 2.267319679260254, "learning_rate": 1.4745771315438678e-06, "loss": 0.7756, "step": 7274 }, { "epoch": 3.826933193056286, "grad_norm": 2.17486310005188, "learning_rate": 1.4739436220842773e-06, "loss": 0.8355, "step": 7275 }, { "epoch": 3.8274592319831666, "grad_norm": 2.240771770477295, "learning_rate": 1.4733101918480008e-06, "loss": 0.8295, "step": 7276 }, { "epoch": 3.8279852709100473, "grad_norm": 2.376521587371826, "learning_rate": 1.4726768408839464e-06, "loss": 0.8275, "step": 7277 }, { "epoch": 3.828511309836928, "grad_norm": 2.2857706546783447, "learning_rate": 1.4720435692410158e-06, "loss": 0.8298, "step": 7278 }, { "epoch": 3.8290373487638085, "grad_norm": 2.1675431728363037, "learning_rate": 1.4714103769681046e-06, "loss": 0.836, "step": 7279 }, { "epoch": 3.829563387690689, "grad_norm": 2.272911310195923, "learning_rate": 1.4707772641141026e-06, "loss": 0.816, "step": 7280 }, { "epoch": 3.83008942661757, "grad_norm": 2.355377674102783, "learning_rate": 1.4701442307278934e-06, "loss": 0.8063, "step": 7281 }, { "epoch": 3.8306154655444504, "grad_norm": 2.141019105911255, "learning_rate": 1.4695112768583556e-06, "loss": 0.7732, "step": 7282 }, { "epoch": 3.831141504471331, "grad_norm": 2.3625259399414062, "learning_rate": 1.4688784025543587e-06, "loss": 0.849, "step": 7283 }, { "epoch": 3.8316675433982113, "grad_norm": 2.49155855178833, "learning_rate": 1.468245607864769e-06, "loss": 0.8594, "step": 7284 }, { "epoch": 3.8321935823250923, "grad_norm": 2.251298666000366, "learning_rate": 1.4676128928384456e-06, "loss": 0.771, "step": 7285 }, { "epoch": 3.8327196212519725, "grad_norm": 2.273979425430298, "learning_rate": 1.46698025752424e-06, "loss": 0.8106, "step": 7286 }, { "epoch": 3.833245660178853, "grad_norm": 2.272948741912842, "learning_rate": 1.4663477019710002e-06, "loss": 0.8371, "step": 7287 }, { "epoch": 3.833771699105734, "grad_norm": 2.1306393146514893, "learning_rate": 1.465715226227566e-06, "loss": 0.7494, "step": 7288 }, { "epoch": 3.8342977380326144, "grad_norm": 2.7707598209381104, "learning_rate": 1.4650828303427728e-06, "loss": 0.7936, "step": 7289 }, { "epoch": 3.834823776959495, "grad_norm": 2.3278465270996094, "learning_rate": 1.4644505143654475e-06, "loss": 0.824, "step": 7290 }, { "epoch": 3.8353498158863757, "grad_norm": 2.344688653945923, "learning_rate": 1.4638182783444128e-06, "loss": 0.8187, "step": 7291 }, { "epoch": 3.8358758548132563, "grad_norm": 2.128354549407959, "learning_rate": 1.4631861223284854e-06, "loss": 0.8003, "step": 7292 }, { "epoch": 3.8364018937401365, "grad_norm": 2.506960391998291, "learning_rate": 1.4625540463664736e-06, "loss": 0.8021, "step": 7293 }, { "epoch": 3.8369279326670176, "grad_norm": 2.3932011127471924, "learning_rate": 1.4619220505071812e-06, "loss": 0.8257, "step": 7294 }, { "epoch": 3.837453971593898, "grad_norm": 2.309955596923828, "learning_rate": 1.4612901347994063e-06, "loss": 0.7968, "step": 7295 }, { "epoch": 3.8379800105207784, "grad_norm": 2.2575180530548096, "learning_rate": 1.4606582992919389e-06, "loss": 0.8097, "step": 7296 }, { "epoch": 3.838506049447659, "grad_norm": 2.4367024898529053, "learning_rate": 1.4600265440335645e-06, "loss": 0.818, "step": 7297 }, { "epoch": 3.8390320883745397, "grad_norm": 2.201862096786499, "learning_rate": 1.4593948690730624e-06, "loss": 0.8106, "step": 7298 }, { "epoch": 3.8395581273014203, "grad_norm": 2.3160877227783203, "learning_rate": 1.4587632744592055e-06, "loss": 0.791, "step": 7299 }, { "epoch": 3.840084166228301, "grad_norm": 2.4139671325683594, "learning_rate": 1.458131760240758e-06, "loss": 0.8032, "step": 7300 }, { "epoch": 3.8406102051551816, "grad_norm": 2.4379312992095947, "learning_rate": 1.457500326466481e-06, "loss": 0.8134, "step": 7301 }, { "epoch": 3.8411362440820622, "grad_norm": 2.639087200164795, "learning_rate": 1.4568689731851294e-06, "loss": 0.8353, "step": 7302 }, { "epoch": 3.841662283008943, "grad_norm": 2.4225735664367676, "learning_rate": 1.45623770044545e-06, "loss": 0.8306, "step": 7303 }, { "epoch": 3.842188321935823, "grad_norm": 2.372694492340088, "learning_rate": 1.4556065082961845e-06, "loss": 0.7904, "step": 7304 }, { "epoch": 3.8427143608627037, "grad_norm": 2.2891738414764404, "learning_rate": 1.4549753967860683e-06, "loss": 0.8199, "step": 7305 }, { "epoch": 3.8432403997895843, "grad_norm": 2.237074136734009, "learning_rate": 1.4543443659638313e-06, "loss": 0.8417, "step": 7306 }, { "epoch": 3.843766438716465, "grad_norm": 2.337193489074707, "learning_rate": 1.4537134158781942e-06, "loss": 0.7859, "step": 7307 }, { "epoch": 3.8442924776433456, "grad_norm": 2.187221050262451, "learning_rate": 1.4530825465778752e-06, "loss": 0.8027, "step": 7308 }, { "epoch": 3.844818516570226, "grad_norm": 2.329667091369629, "learning_rate": 1.4524517581115839e-06, "loss": 0.8496, "step": 7309 }, { "epoch": 3.845344555497107, "grad_norm": 2.5235939025878906, "learning_rate": 1.4518210505280257e-06, "loss": 0.8028, "step": 7310 }, { "epoch": 3.8458705944239875, "grad_norm": 2.2320220470428467, "learning_rate": 1.4511904238758967e-06, "loss": 0.7993, "step": 7311 }, { "epoch": 3.846396633350868, "grad_norm": 2.1216917037963867, "learning_rate": 1.4505598782038887e-06, "loss": 0.8147, "step": 7312 }, { "epoch": 3.8469226722777483, "grad_norm": 2.3533928394317627, "learning_rate": 1.449929413560689e-06, "loss": 0.7844, "step": 7313 }, { "epoch": 3.8474487112046294, "grad_norm": 2.339154005050659, "learning_rate": 1.4492990299949745e-06, "loss": 0.8242, "step": 7314 }, { "epoch": 3.8479747501315096, "grad_norm": 2.2452900409698486, "learning_rate": 1.4486687275554185e-06, "loss": 0.8513, "step": 7315 }, { "epoch": 3.84850078905839, "grad_norm": 2.272759199142456, "learning_rate": 1.4480385062906882e-06, "loss": 0.7881, "step": 7316 }, { "epoch": 3.849026827985271, "grad_norm": 2.5343337059020996, "learning_rate": 1.4474083662494433e-06, "loss": 0.8528, "step": 7317 }, { "epoch": 3.8495528669121515, "grad_norm": 2.312157154083252, "learning_rate": 1.4467783074803384e-06, "loss": 0.7909, "step": 7318 }, { "epoch": 3.850078905839032, "grad_norm": 2.3957741260528564, "learning_rate": 1.4461483300320212e-06, "loss": 0.8354, "step": 7319 }, { "epoch": 3.8506049447659128, "grad_norm": 2.2548301219940186, "learning_rate": 1.4455184339531336e-06, "loss": 0.8012, "step": 7320 }, { "epoch": 3.8511309836927934, "grad_norm": 2.2165286540985107, "learning_rate": 1.4448886192923094e-06, "loss": 0.7948, "step": 7321 }, { "epoch": 3.851657022619674, "grad_norm": 2.270775079727173, "learning_rate": 1.4442588860981781e-06, "loss": 0.7852, "step": 7322 }, { "epoch": 3.8521830615465547, "grad_norm": 2.366351842880249, "learning_rate": 1.4436292344193639e-06, "loss": 0.7816, "step": 7323 }, { "epoch": 3.852709100473435, "grad_norm": 2.484973907470703, "learning_rate": 1.4429996643044804e-06, "loss": 0.8167, "step": 7324 }, { "epoch": 3.8532351394003155, "grad_norm": 2.237626552581787, "learning_rate": 1.4423701758021392e-06, "loss": 0.7778, "step": 7325 }, { "epoch": 3.853761178327196, "grad_norm": 2.2060656547546387, "learning_rate": 1.4417407689609438e-06, "loss": 0.7938, "step": 7326 }, { "epoch": 3.8542872172540767, "grad_norm": 2.214149236679077, "learning_rate": 1.4411114438294925e-06, "loss": 0.8171, "step": 7327 }, { "epoch": 3.8548132561809574, "grad_norm": 2.5559778213500977, "learning_rate": 1.4404822004563748e-06, "loss": 0.8291, "step": 7328 }, { "epoch": 3.855339295107838, "grad_norm": 2.207153081893921, "learning_rate": 1.439853038890176e-06, "loss": 0.8393, "step": 7329 }, { "epoch": 3.8558653340347187, "grad_norm": 2.205317974090576, "learning_rate": 1.4392239591794752e-06, "loss": 0.7609, "step": 7330 }, { "epoch": 3.8563913729615993, "grad_norm": 2.2951741218566895, "learning_rate": 1.438594961372844e-06, "loss": 0.7888, "step": 7331 }, { "epoch": 3.85691741188848, "grad_norm": 2.1830027103424072, "learning_rate": 1.4379660455188481e-06, "loss": 0.8137, "step": 7332 }, { "epoch": 3.85744345081536, "grad_norm": 2.421032190322876, "learning_rate": 1.4373372116660478e-06, "loss": 0.7906, "step": 7333 }, { "epoch": 3.857969489742241, "grad_norm": 2.2136197090148926, "learning_rate": 1.4367084598629968e-06, "loss": 0.7773, "step": 7334 }, { "epoch": 3.8584955286691214, "grad_norm": 2.3039369583129883, "learning_rate": 1.4360797901582398e-06, "loss": 0.8353, "step": 7335 }, { "epoch": 3.859021567596002, "grad_norm": 2.2991204261779785, "learning_rate": 1.4354512026003199e-06, "loss": 0.8391, "step": 7336 }, { "epoch": 3.8595476065228826, "grad_norm": 2.182220697402954, "learning_rate": 1.4348226972377682e-06, "loss": 0.8153, "step": 7337 }, { "epoch": 3.8600736454497633, "grad_norm": 2.1960716247558594, "learning_rate": 1.4341942741191146e-06, "loss": 0.79, "step": 7338 }, { "epoch": 3.860599684376644, "grad_norm": 2.4186294078826904, "learning_rate": 1.4335659332928804e-06, "loss": 0.7606, "step": 7339 }, { "epoch": 3.8611257233035245, "grad_norm": 2.3253774642944336, "learning_rate": 1.4329376748075798e-06, "loss": 0.8029, "step": 7340 }, { "epoch": 3.861651762230405, "grad_norm": 2.28842830657959, "learning_rate": 1.4323094987117226e-06, "loss": 0.8141, "step": 7341 }, { "epoch": 3.8621778011572854, "grad_norm": 2.2675838470458984, "learning_rate": 1.431681405053812e-06, "loss": 0.798, "step": 7342 }, { "epoch": 3.8627038400841665, "grad_norm": 2.2093403339385986, "learning_rate": 1.4310533938823413e-06, "loss": 0.8167, "step": 7343 }, { "epoch": 3.8632298790110466, "grad_norm": 2.2191379070281982, "learning_rate": 1.4304254652458022e-06, "loss": 0.7983, "step": 7344 }, { "epoch": 3.8637559179379273, "grad_norm": 2.3093531131744385, "learning_rate": 1.4297976191926771e-06, "loss": 0.8681, "step": 7345 }, { "epoch": 3.864281956864808, "grad_norm": 2.352522134780884, "learning_rate": 1.4291698557714446e-06, "loss": 0.8356, "step": 7346 }, { "epoch": 3.8648079957916885, "grad_norm": 2.2035436630249023, "learning_rate": 1.4285421750305728e-06, "loss": 0.7856, "step": 7347 }, { "epoch": 3.865334034718569, "grad_norm": 2.2115516662597656, "learning_rate": 1.4279145770185271e-06, "loss": 0.7612, "step": 7348 }, { "epoch": 3.86586007364545, "grad_norm": 2.1679577827453613, "learning_rate": 1.4272870617837658e-06, "loss": 0.7342, "step": 7349 }, { "epoch": 3.8663861125723304, "grad_norm": 2.2878966331481934, "learning_rate": 1.426659629374739e-06, "loss": 0.7826, "step": 7350 }, { "epoch": 3.866912151499211, "grad_norm": 2.22659969329834, "learning_rate": 1.4260322798398923e-06, "loss": 0.7958, "step": 7351 }, { "epoch": 3.8674381904260917, "grad_norm": 2.27065372467041, "learning_rate": 1.425405013227664e-06, "loss": 0.8259, "step": 7352 }, { "epoch": 3.867964229352972, "grad_norm": 2.2313082218170166, "learning_rate": 1.4247778295864867e-06, "loss": 0.8577, "step": 7353 }, { "epoch": 3.868490268279853, "grad_norm": 2.176978826522827, "learning_rate": 1.424150728964786e-06, "loss": 0.7971, "step": 7354 }, { "epoch": 3.869016307206733, "grad_norm": 2.3149678707122803, "learning_rate": 1.4235237114109812e-06, "loss": 0.774, "step": 7355 }, { "epoch": 3.869542346133614, "grad_norm": 2.3144588470458984, "learning_rate": 1.4228967769734864e-06, "loss": 0.7977, "step": 7356 }, { "epoch": 3.8700683850604944, "grad_norm": 2.468808650970459, "learning_rate": 1.4222699257007054e-06, "loss": 0.8195, "step": 7357 }, { "epoch": 3.870594423987375, "grad_norm": 2.288264274597168, "learning_rate": 1.421643157641041e-06, "loss": 0.7908, "step": 7358 }, { "epoch": 3.8711204629142557, "grad_norm": 2.286550998687744, "learning_rate": 1.421016472842886e-06, "loss": 0.7839, "step": 7359 }, { "epoch": 3.8716465018411363, "grad_norm": 2.18296217918396, "learning_rate": 1.4203898713546268e-06, "loss": 0.804, "step": 7360 }, { "epoch": 3.872172540768017, "grad_norm": 2.4428389072418213, "learning_rate": 1.4197633532246447e-06, "loss": 0.8141, "step": 7361 }, { "epoch": 3.872698579694897, "grad_norm": 2.190387010574341, "learning_rate": 1.4191369185013143e-06, "loss": 0.7759, "step": 7362 }, { "epoch": 3.8732246186217782, "grad_norm": 2.242394208908081, "learning_rate": 1.4185105672330045e-06, "loss": 0.7457, "step": 7363 }, { "epoch": 3.8737506575486584, "grad_norm": 2.375965118408203, "learning_rate": 1.4178842994680752e-06, "loss": 0.784, "step": 7364 }, { "epoch": 3.874276696475539, "grad_norm": 2.2892980575561523, "learning_rate": 1.4172581152548815e-06, "loss": 0.802, "step": 7365 }, { "epoch": 3.8748027354024197, "grad_norm": 2.1997883319854736, "learning_rate": 1.4166320146417727e-06, "loss": 0.7938, "step": 7366 }, { "epoch": 3.8753287743293003, "grad_norm": 2.4669241905212402, "learning_rate": 1.416005997677091e-06, "loss": 0.8343, "step": 7367 }, { "epoch": 3.875854813256181, "grad_norm": 2.185896158218384, "learning_rate": 1.4153800644091717e-06, "loss": 0.8222, "step": 7368 }, { "epoch": 3.8763808521830616, "grad_norm": 2.1989643573760986, "learning_rate": 1.414754214886344e-06, "loss": 0.7943, "step": 7369 }, { "epoch": 3.8769068911099422, "grad_norm": 2.3999273777008057, "learning_rate": 1.4141284491569325e-06, "loss": 0.8333, "step": 7370 }, { "epoch": 3.877432930036823, "grad_norm": 2.5716757774353027, "learning_rate": 1.4135027672692503e-06, "loss": 0.8193, "step": 7371 }, { "epoch": 3.8779589689637035, "grad_norm": 2.3291068077087402, "learning_rate": 1.4128771692716097e-06, "loss": 0.8142, "step": 7372 }, { "epoch": 3.8784850078905837, "grad_norm": 2.295574188232422, "learning_rate": 1.4122516552123123e-06, "loss": 0.818, "step": 7373 }, { "epoch": 3.8790110468174643, "grad_norm": 2.228746175765991, "learning_rate": 1.4116262251396557e-06, "loss": 0.7622, "step": 7374 }, { "epoch": 3.879537085744345, "grad_norm": 2.2215981483459473, "learning_rate": 1.4110008791019303e-06, "loss": 0.8163, "step": 7375 }, { "epoch": 3.8800631246712256, "grad_norm": 2.5078539848327637, "learning_rate": 1.41037561714742e-06, "loss": 0.8131, "step": 7376 }, { "epoch": 3.8805891635981062, "grad_norm": 2.291006326675415, "learning_rate": 1.4097504393244032e-06, "loss": 0.8281, "step": 7377 }, { "epoch": 3.881115202524987, "grad_norm": 2.2885375022888184, "learning_rate": 1.4091253456811489e-06, "loss": 0.8373, "step": 7378 }, { "epoch": 3.8816412414518675, "grad_norm": 2.2722554206848145, "learning_rate": 1.4085003362659223e-06, "loss": 0.8238, "step": 7379 }, { "epoch": 3.882167280378748, "grad_norm": 2.28544282913208, "learning_rate": 1.4078754111269812e-06, "loss": 0.773, "step": 7380 }, { "epoch": 3.8826933193056288, "grad_norm": 2.2488434314727783, "learning_rate": 1.4072505703125775e-06, "loss": 0.7863, "step": 7381 }, { "epoch": 3.883219358232509, "grad_norm": 2.301612615585327, "learning_rate": 1.4066258138709554e-06, "loss": 0.7793, "step": 7382 }, { "epoch": 3.88374539715939, "grad_norm": 2.376739978790283, "learning_rate": 1.406001141850355e-06, "loss": 0.8242, "step": 7383 }, { "epoch": 3.8842714360862702, "grad_norm": 2.457965612411499, "learning_rate": 1.4053765542990052e-06, "loss": 0.8376, "step": 7384 }, { "epoch": 3.884797475013151, "grad_norm": 2.33650541305542, "learning_rate": 1.4047520512651341e-06, "loss": 0.7984, "step": 7385 }, { "epoch": 3.8853235139400315, "grad_norm": 2.330559015274048, "learning_rate": 1.4041276327969584e-06, "loss": 0.8169, "step": 7386 }, { "epoch": 3.885849552866912, "grad_norm": 2.2555248737335205, "learning_rate": 1.403503298942691e-06, "loss": 0.778, "step": 7387 }, { "epoch": 3.8863755917937928, "grad_norm": 2.269360303878784, "learning_rate": 1.402879049750538e-06, "loss": 0.7943, "step": 7388 }, { "epoch": 3.8869016307206734, "grad_norm": 2.2434628009796143, "learning_rate": 1.402254885268698e-06, "loss": 0.8454, "step": 7389 }, { "epoch": 3.887427669647554, "grad_norm": 2.419295072555542, "learning_rate": 1.4016308055453646e-06, "loss": 0.8358, "step": 7390 }, { "epoch": 3.8879537085744342, "grad_norm": 2.309476137161255, "learning_rate": 1.401006810628723e-06, "loss": 0.7733, "step": 7391 }, { "epoch": 3.8884797475013153, "grad_norm": 2.454864740371704, "learning_rate": 1.4003829005669545e-06, "loss": 0.8631, "step": 7392 }, { "epoch": 3.8890057864281955, "grad_norm": 2.276500701904297, "learning_rate": 1.3997590754082294e-06, "loss": 0.8199, "step": 7393 }, { "epoch": 3.889531825355076, "grad_norm": 2.3371996879577637, "learning_rate": 1.399135335200716e-06, "loss": 0.8618, "step": 7394 }, { "epoch": 3.8900578642819568, "grad_norm": 2.2064414024353027, "learning_rate": 1.398511679992573e-06, "loss": 0.8144, "step": 7395 }, { "epoch": 3.8905839032088374, "grad_norm": 2.3014965057373047, "learning_rate": 1.3978881098319557e-06, "loss": 0.799, "step": 7396 }, { "epoch": 3.891109942135718, "grad_norm": 2.1686742305755615, "learning_rate": 1.397264624767009e-06, "loss": 0.7746, "step": 7397 }, { "epoch": 3.8916359810625987, "grad_norm": 2.1854000091552734, "learning_rate": 1.3966412248458733e-06, "loss": 0.7977, "step": 7398 }, { "epoch": 3.8921620199894793, "grad_norm": 2.132330894470215, "learning_rate": 1.3960179101166837e-06, "loss": 0.7839, "step": 7399 }, { "epoch": 3.89268805891636, "grad_norm": 2.1699962615966797, "learning_rate": 1.3953946806275654e-06, "loss": 0.8403, "step": 7400 }, { "epoch": 3.8932140978432406, "grad_norm": 2.1578621864318848, "learning_rate": 1.394771536426639e-06, "loss": 0.7688, "step": 7401 }, { "epoch": 3.8937401367701208, "grad_norm": 2.230057716369629, "learning_rate": 1.3941484775620196e-06, "loss": 0.7904, "step": 7402 }, { "epoch": 3.894266175697002, "grad_norm": 2.4557015895843506, "learning_rate": 1.3935255040818135e-06, "loss": 0.8544, "step": 7403 }, { "epoch": 3.894792214623882, "grad_norm": 2.3645551204681396, "learning_rate": 1.3929026160341219e-06, "loss": 0.7948, "step": 7404 }, { "epoch": 3.8953182535507627, "grad_norm": 2.495687961578369, "learning_rate": 1.392279813467039e-06, "loss": 0.8262, "step": 7405 }, { "epoch": 3.8958442924776433, "grad_norm": 2.405334711074829, "learning_rate": 1.3916570964286526e-06, "loss": 0.8248, "step": 7406 }, { "epoch": 3.896370331404524, "grad_norm": 2.332982063293457, "learning_rate": 1.3910344649670427e-06, "loss": 0.8472, "step": 7407 }, { "epoch": 3.8968963703314046, "grad_norm": 2.1486475467681885, "learning_rate": 1.3904119191302845e-06, "loss": 0.8147, "step": 7408 }, { "epoch": 3.897422409258285, "grad_norm": 2.299659013748169, "learning_rate": 1.389789458966444e-06, "loss": 0.7777, "step": 7409 }, { "epoch": 3.897948448185166, "grad_norm": 2.3652305603027344, "learning_rate": 1.3891670845235835e-06, "loss": 0.8186, "step": 7410 }, { "epoch": 3.898474487112046, "grad_norm": 2.4149599075317383, "learning_rate": 1.388544795849757e-06, "loss": 0.7961, "step": 7411 }, { "epoch": 3.899000526038927, "grad_norm": 2.356353521347046, "learning_rate": 1.3879225929930134e-06, "loss": 0.7686, "step": 7412 }, { "epoch": 3.8995265649658073, "grad_norm": 2.2968997955322266, "learning_rate": 1.3873004760013937e-06, "loss": 0.842, "step": 7413 }, { "epoch": 3.900052603892688, "grad_norm": 2.4684691429138184, "learning_rate": 1.3866784449229309e-06, "loss": 0.8351, "step": 7414 }, { "epoch": 3.9005786428195686, "grad_norm": 2.4568560123443604, "learning_rate": 1.3860564998056541e-06, "loss": 0.8289, "step": 7415 }, { "epoch": 3.901104681746449, "grad_norm": 2.3151392936706543, "learning_rate": 1.3854346406975841e-06, "loss": 0.7886, "step": 7416 }, { "epoch": 3.90163072067333, "grad_norm": 2.377079486846924, "learning_rate": 1.384812867646736e-06, "loss": 0.8166, "step": 7417 }, { "epoch": 3.9021567596002105, "grad_norm": 2.2490956783294678, "learning_rate": 1.3841911807011178e-06, "loss": 0.7915, "step": 7418 }, { "epoch": 3.902682798527091, "grad_norm": 2.334979295730591, "learning_rate": 1.3835695799087317e-06, "loss": 0.8119, "step": 7419 }, { "epoch": 3.9032088374539717, "grad_norm": 2.2888858318328857, "learning_rate": 1.3829480653175703e-06, "loss": 0.8184, "step": 7420 }, { "epoch": 3.9037348763808524, "grad_norm": 2.1689531803131104, "learning_rate": 1.382326636975624e-06, "loss": 0.7932, "step": 7421 }, { "epoch": 3.9042609153077326, "grad_norm": 2.2536540031433105, "learning_rate": 1.3817052949308718e-06, "loss": 0.8202, "step": 7422 }, { "epoch": 3.9047869542346136, "grad_norm": 2.3777239322662354, "learning_rate": 1.3810840392312896e-06, "loss": 0.8049, "step": 7423 }, { "epoch": 3.905312993161494, "grad_norm": 2.2867441177368164, "learning_rate": 1.3804628699248455e-06, "loss": 0.8198, "step": 7424 }, { "epoch": 3.9058390320883745, "grad_norm": 2.2178359031677246, "learning_rate": 1.379841787059501e-06, "loss": 0.8007, "step": 7425 }, { "epoch": 3.906365071015255, "grad_norm": 2.1477153301239014, "learning_rate": 1.3792207906832105e-06, "loss": 0.7593, "step": 7426 }, { "epoch": 3.9068911099421357, "grad_norm": 2.36037015914917, "learning_rate": 1.3785998808439233e-06, "loss": 0.8067, "step": 7427 }, { "epoch": 3.9074171488690164, "grad_norm": 2.2699413299560547, "learning_rate": 1.3779790575895785e-06, "loss": 0.8044, "step": 7428 }, { "epoch": 3.907943187795897, "grad_norm": 2.2346813678741455, "learning_rate": 1.3773583209681122e-06, "loss": 0.8386, "step": 7429 }, { "epoch": 3.9084692267227776, "grad_norm": 2.382087230682373, "learning_rate": 1.376737671027452e-06, "loss": 0.8611, "step": 7430 }, { "epoch": 3.908995265649658, "grad_norm": 2.2500689029693604, "learning_rate": 1.3761171078155195e-06, "loss": 0.7961, "step": 7431 }, { "epoch": 3.909521304576539, "grad_norm": 2.3196098804473877, "learning_rate": 1.3754966313802299e-06, "loss": 0.7848, "step": 7432 }, { "epoch": 3.910047343503419, "grad_norm": 2.369201898574829, "learning_rate": 1.374876241769489e-06, "loss": 0.8643, "step": 7433 }, { "epoch": 3.9105733824302997, "grad_norm": 2.246711254119873, "learning_rate": 1.3742559390312007e-06, "loss": 0.8063, "step": 7434 }, { "epoch": 3.9110994213571804, "grad_norm": 2.1966941356658936, "learning_rate": 1.3736357232132568e-06, "loss": 0.8359, "step": 7435 }, { "epoch": 3.911625460284061, "grad_norm": 2.274506092071533, "learning_rate": 1.373015594363547e-06, "loss": 0.8162, "step": 7436 }, { "epoch": 3.9121514992109416, "grad_norm": 2.330582618713379, "learning_rate": 1.372395552529951e-06, "loss": 0.8243, "step": 7437 }, { "epoch": 3.9126775381378223, "grad_norm": 2.2942960262298584, "learning_rate": 1.3717755977603442e-06, "loss": 0.7745, "step": 7438 }, { "epoch": 3.913203577064703, "grad_norm": 2.2978339195251465, "learning_rate": 1.3711557301025937e-06, "loss": 0.7628, "step": 7439 }, { "epoch": 3.9137296159915835, "grad_norm": 2.430060625076294, "learning_rate": 1.3705359496045606e-06, "loss": 0.8463, "step": 7440 }, { "epoch": 3.914255654918464, "grad_norm": 2.438140630722046, "learning_rate": 1.3699162563140988e-06, "loss": 0.8144, "step": 7441 }, { "epoch": 3.9147816938453444, "grad_norm": 2.369831085205078, "learning_rate": 1.3692966502790572e-06, "loss": 0.8099, "step": 7442 }, { "epoch": 3.915307732772225, "grad_norm": 2.279552936553955, "learning_rate": 1.3686771315472742e-06, "loss": 0.7673, "step": 7443 }, { "epoch": 3.9158337716991056, "grad_norm": 2.378749132156372, "learning_rate": 1.3680577001665847e-06, "loss": 0.8354, "step": 7444 }, { "epoch": 3.9163598106259863, "grad_norm": 2.265110731124878, "learning_rate": 1.3674383561848171e-06, "loss": 0.7988, "step": 7445 }, { "epoch": 3.916885849552867, "grad_norm": 2.252253532409668, "learning_rate": 1.3668190996497894e-06, "loss": 0.8063, "step": 7446 }, { "epoch": 3.9174118884797475, "grad_norm": 2.084804058074951, "learning_rate": 1.3661999306093169e-06, "loss": 0.7622, "step": 7447 }, { "epoch": 3.917937927406628, "grad_norm": 2.3764076232910156, "learning_rate": 1.365580849111206e-06, "loss": 0.817, "step": 7448 }, { "epoch": 3.918463966333509, "grad_norm": 2.2581260204315186, "learning_rate": 1.3649618552032583e-06, "loss": 0.8158, "step": 7449 }, { "epoch": 3.9189900052603894, "grad_norm": 2.3787825107574463, "learning_rate": 1.364342948933265e-06, "loss": 0.7998, "step": 7450 }, { "epoch": 3.9195160441872696, "grad_norm": 2.1646766662597656, "learning_rate": 1.363724130349014e-06, "loss": 0.7696, "step": 7451 }, { "epoch": 3.9200420831141507, "grad_norm": 2.5613794326782227, "learning_rate": 1.363105399498285e-06, "loss": 0.8393, "step": 7452 }, { "epoch": 3.920568122041031, "grad_norm": 3.190979480743408, "learning_rate": 1.3624867564288514e-06, "loss": 0.7925, "step": 7453 }, { "epoch": 3.9210941609679115, "grad_norm": 2.440704107284546, "learning_rate": 1.3618682011884788e-06, "loss": 0.8453, "step": 7454 }, { "epoch": 3.921620199894792, "grad_norm": 2.336864471435547, "learning_rate": 1.3612497338249276e-06, "loss": 0.8595, "step": 7455 }, { "epoch": 3.922146238821673, "grad_norm": 2.2283718585968018, "learning_rate": 1.360631354385951e-06, "loss": 0.7989, "step": 7456 }, { "epoch": 3.9226722777485534, "grad_norm": 2.1687395572662354, "learning_rate": 1.360013062919294e-06, "loss": 0.8009, "step": 7457 }, { "epoch": 3.923198316675434, "grad_norm": 2.416637897491455, "learning_rate": 1.3593948594726951e-06, "loss": 0.8331, "step": 7458 }, { "epoch": 3.9237243556023147, "grad_norm": 2.4255170822143555, "learning_rate": 1.3587767440938876e-06, "loss": 0.7735, "step": 7459 }, { "epoch": 3.924250394529195, "grad_norm": 2.298861503601074, "learning_rate": 1.358158716830597e-06, "loss": 0.8295, "step": 7460 }, { "epoch": 3.924776433456076, "grad_norm": 2.4886183738708496, "learning_rate": 1.3575407777305428e-06, "loss": 0.827, "step": 7461 }, { "epoch": 3.925302472382956, "grad_norm": 2.3413350582122803, "learning_rate": 1.3569229268414356e-06, "loss": 0.8167, "step": 7462 }, { "epoch": 3.925828511309837, "grad_norm": 2.377901077270508, "learning_rate": 1.3563051642109825e-06, "loss": 0.8325, "step": 7463 }, { "epoch": 3.9263545502367174, "grad_norm": 2.2678334712982178, "learning_rate": 1.3556874898868793e-06, "loss": 0.7915, "step": 7464 }, { "epoch": 3.926880589163598, "grad_norm": 2.380868673324585, "learning_rate": 1.3550699039168195e-06, "loss": 0.8469, "step": 7465 }, { "epoch": 3.9274066280904787, "grad_norm": 2.3750123977661133, "learning_rate": 1.354452406348487e-06, "loss": 0.8248, "step": 7466 }, { "epoch": 3.9279326670173593, "grad_norm": 2.4313488006591797, "learning_rate": 1.3538349972295598e-06, "loss": 0.8103, "step": 7467 }, { "epoch": 3.92845870594424, "grad_norm": 2.2609307765960693, "learning_rate": 1.35321767660771e-06, "loss": 0.8326, "step": 7468 }, { "epoch": 3.9289847448711206, "grad_norm": 2.362720012664795, "learning_rate": 1.3526004445306002e-06, "loss": 0.7731, "step": 7469 }, { "epoch": 3.9295107837980012, "grad_norm": 2.3389158248901367, "learning_rate": 1.351983301045889e-06, "loss": 0.7697, "step": 7470 }, { "epoch": 3.9300368227248814, "grad_norm": 2.386995553970337, "learning_rate": 1.3513662462012259e-06, "loss": 0.8371, "step": 7471 }, { "epoch": 3.9305628616517625, "grad_norm": 2.298391819000244, "learning_rate": 1.3507492800442548e-06, "loss": 0.8083, "step": 7472 }, { "epoch": 3.9310889005786427, "grad_norm": 2.192303419113159, "learning_rate": 1.350132402622613e-06, "loss": 0.8675, "step": 7473 }, { "epoch": 3.9316149395055233, "grad_norm": 2.4449541568756104, "learning_rate": 1.3495156139839306e-06, "loss": 0.7933, "step": 7474 }, { "epoch": 3.932140978432404, "grad_norm": 2.4141581058502197, "learning_rate": 1.3488989141758305e-06, "loss": 0.7887, "step": 7475 }, { "epoch": 3.9326670173592846, "grad_norm": 2.518216848373413, "learning_rate": 1.3482823032459288e-06, "loss": 0.8361, "step": 7476 }, { "epoch": 3.9331930562861652, "grad_norm": 2.2116048336029053, "learning_rate": 1.3476657812418365e-06, "loss": 0.8322, "step": 7477 }, { "epoch": 3.933719095213046, "grad_norm": 2.194746255874634, "learning_rate": 1.3470493482111535e-06, "loss": 0.8328, "step": 7478 }, { "epoch": 3.9342451341399265, "grad_norm": 2.1984407901763916, "learning_rate": 1.3464330042014772e-06, "loss": 0.7723, "step": 7479 }, { "epoch": 3.9347711730668067, "grad_norm": 2.3233425617218018, "learning_rate": 1.3458167492603961e-06, "loss": 0.804, "step": 7480 }, { "epoch": 3.9352972119936878, "grad_norm": 2.3440134525299072, "learning_rate": 1.3452005834354928e-06, "loss": 0.7778, "step": 7481 }, { "epoch": 3.935823250920568, "grad_norm": 2.3358850479125977, "learning_rate": 1.3445845067743407e-06, "loss": 0.7905, "step": 7482 }, { "epoch": 3.9363492898474486, "grad_norm": 2.3838300704956055, "learning_rate": 1.3439685193245093e-06, "loss": 0.867, "step": 7483 }, { "epoch": 3.936875328774329, "grad_norm": 2.5057764053344727, "learning_rate": 1.34335262113356e-06, "loss": 0.7906, "step": 7484 }, { "epoch": 3.93740136770121, "grad_norm": 2.186361074447632, "learning_rate": 1.3427368122490463e-06, "loss": 0.8124, "step": 7485 }, { "epoch": 3.9379274066280905, "grad_norm": 2.5774807929992676, "learning_rate": 1.3421210927185161e-06, "loss": 0.8053, "step": 7486 }, { "epoch": 3.938453445554971, "grad_norm": 2.3966641426086426, "learning_rate": 1.3415054625895098e-06, "loss": 0.8104, "step": 7487 }, { "epoch": 3.9389794844818518, "grad_norm": 2.285818576812744, "learning_rate": 1.3408899219095617e-06, "loss": 0.7628, "step": 7488 }, { "epoch": 3.9395055234087324, "grad_norm": 2.449223518371582, "learning_rate": 1.340274470726198e-06, "loss": 0.826, "step": 7489 }, { "epoch": 3.940031562335613, "grad_norm": 2.364875555038452, "learning_rate": 1.3396591090869387e-06, "loss": 0.8005, "step": 7490 }, { "epoch": 3.940557601262493, "grad_norm": 2.175480365753174, "learning_rate": 1.3390438370392971e-06, "loss": 0.7911, "step": 7491 }, { "epoch": 3.941083640189374, "grad_norm": 2.231459856033325, "learning_rate": 1.3384286546307803e-06, "loss": 0.7823, "step": 7492 }, { "epoch": 3.9416096791162545, "grad_norm": 2.3525173664093018, "learning_rate": 1.337813561908885e-06, "loss": 0.8221, "step": 7493 }, { "epoch": 3.942135718043135, "grad_norm": 2.9007747173309326, "learning_rate": 1.3371985589211057e-06, "loss": 0.8101, "step": 7494 }, { "epoch": 3.9426617569700158, "grad_norm": 2.4404003620147705, "learning_rate": 1.3365836457149256e-06, "loss": 0.853, "step": 7495 }, { "epoch": 3.9431877958968964, "grad_norm": 2.288161039352417, "learning_rate": 1.335968822337824e-06, "loss": 0.8258, "step": 7496 }, { "epoch": 3.943713834823777, "grad_norm": 2.41501522064209, "learning_rate": 1.3353540888372724e-06, "loss": 0.826, "step": 7497 }, { "epoch": 3.9442398737506577, "grad_norm": 2.303570508956909, "learning_rate": 1.334739445260735e-06, "loss": 0.7963, "step": 7498 }, { "epoch": 3.9447659126775383, "grad_norm": 2.2177574634552, "learning_rate": 1.3341248916556709e-06, "loss": 0.7923, "step": 7499 }, { "epoch": 3.9452919516044185, "grad_norm": 2.2253448963165283, "learning_rate": 1.3335104280695282e-06, "loss": 0.8245, "step": 7500 }, { "epoch": 3.9458179905312996, "grad_norm": 2.4281342029571533, "learning_rate": 1.3328960545497516e-06, "loss": 0.7907, "step": 7501 }, { "epoch": 3.9463440294581797, "grad_norm": 2.2524611949920654, "learning_rate": 1.332281771143778e-06, "loss": 0.7752, "step": 7502 }, { "epoch": 3.9468700683850604, "grad_norm": 2.2951810359954834, "learning_rate": 1.3316675778990369e-06, "loss": 0.7845, "step": 7503 }, { "epoch": 3.947396107311941, "grad_norm": 2.3567652702331543, "learning_rate": 1.331053474862951e-06, "loss": 0.8005, "step": 7504 }, { "epoch": 3.9479221462388217, "grad_norm": 2.3922977447509766, "learning_rate": 1.3304394620829374e-06, "loss": 0.8273, "step": 7505 }, { "epoch": 3.9484481851657023, "grad_norm": 2.2643258571624756, "learning_rate": 1.3298255396064025e-06, "loss": 0.7965, "step": 7506 }, { "epoch": 3.948974224092583, "grad_norm": 2.43869948387146, "learning_rate": 1.3292117074807508e-06, "loss": 0.8321, "step": 7507 }, { "epoch": 3.9495002630194636, "grad_norm": 2.45373272895813, "learning_rate": 1.3285979657533746e-06, "loss": 0.8345, "step": 7508 }, { "epoch": 3.950026301946344, "grad_norm": 2.711026668548584, "learning_rate": 1.3279843144716632e-06, "loss": 0.8299, "step": 7509 }, { "epoch": 3.950552340873225, "grad_norm": 2.257028102874756, "learning_rate": 1.3273707536829968e-06, "loss": 0.7992, "step": 7510 }, { "epoch": 3.951078379800105, "grad_norm": 2.5500001907348633, "learning_rate": 1.3267572834347503e-06, "loss": 0.7905, "step": 7511 }, { "epoch": 3.9516044187269856, "grad_norm": 2.359071731567383, "learning_rate": 1.3261439037742896e-06, "loss": 0.8214, "step": 7512 }, { "epoch": 3.9521304576538663, "grad_norm": 2.457963228225708, "learning_rate": 1.3255306147489765e-06, "loss": 0.8198, "step": 7513 }, { "epoch": 3.952656496580747, "grad_norm": 2.415721893310547, "learning_rate": 1.3249174164061617e-06, "loss": 0.9135, "step": 7514 }, { "epoch": 3.9531825355076275, "grad_norm": 2.384469747543335, "learning_rate": 1.3243043087931923e-06, "loss": 0.811, "step": 7515 }, { "epoch": 3.953708574434508, "grad_norm": 2.2752151489257812, "learning_rate": 1.3236912919574068e-06, "loss": 0.7967, "step": 7516 }, { "epoch": 3.954234613361389, "grad_norm": 2.3624699115753174, "learning_rate": 1.3230783659461382e-06, "loss": 0.7844, "step": 7517 }, { "epoch": 3.9547606522882695, "grad_norm": 2.2462308406829834, "learning_rate": 1.3224655308067097e-06, "loss": 0.8078, "step": 7518 }, { "epoch": 3.95528669121515, "grad_norm": 2.6105940341949463, "learning_rate": 1.32185278658644e-06, "loss": 0.8111, "step": 7519 }, { "epoch": 3.9558127301420303, "grad_norm": 2.46366286277771, "learning_rate": 1.321240133332641e-06, "loss": 0.8095, "step": 7520 }, { "epoch": 3.9563387690689114, "grad_norm": 2.3300907611846924, "learning_rate": 1.3206275710926147e-06, "loss": 0.8003, "step": 7521 }, { "epoch": 3.9568648079957915, "grad_norm": 2.269954204559326, "learning_rate": 1.3200150999136585e-06, "loss": 0.8381, "step": 7522 }, { "epoch": 3.957390846922672, "grad_norm": 2.1637346744537354, "learning_rate": 1.3194027198430632e-06, "loss": 0.7655, "step": 7523 }, { "epoch": 3.957916885849553, "grad_norm": 2.4031550884246826, "learning_rate": 1.3187904309281103e-06, "loss": 0.8008, "step": 7524 }, { "epoch": 3.9584429247764334, "grad_norm": 2.426804780960083, "learning_rate": 1.318178233216076e-06, "loss": 0.8352, "step": 7525 }, { "epoch": 3.958968963703314, "grad_norm": 2.2764170169830322, "learning_rate": 1.3175661267542295e-06, "loss": 0.8238, "step": 7526 }, { "epoch": 3.9594950026301947, "grad_norm": 2.3457396030426025, "learning_rate": 1.3169541115898326e-06, "loss": 0.7925, "step": 7527 }, { "epoch": 3.9600210415570753, "grad_norm": 2.1151304244995117, "learning_rate": 1.3163421877701385e-06, "loss": 0.7705, "step": 7528 }, { "epoch": 3.9605470804839555, "grad_norm": 2.560269594192505, "learning_rate": 1.3157303553423956e-06, "loss": 0.7963, "step": 7529 }, { "epoch": 3.9610731194108366, "grad_norm": 2.2410812377929688, "learning_rate": 1.3151186143538451e-06, "loss": 0.8106, "step": 7530 }, { "epoch": 3.961599158337717, "grad_norm": 2.203108310699463, "learning_rate": 1.3145069648517187e-06, "loss": 0.7966, "step": 7531 }, { "epoch": 3.9621251972645974, "grad_norm": 2.2751550674438477, "learning_rate": 1.3138954068832437e-06, "loss": 0.7942, "step": 7532 }, { "epoch": 3.962651236191478, "grad_norm": 2.153026819229126, "learning_rate": 1.3132839404956394e-06, "loss": 0.7961, "step": 7533 }, { "epoch": 3.9631772751183587, "grad_norm": 2.2894535064697266, "learning_rate": 1.312672565736119e-06, "loss": 0.8312, "step": 7534 }, { "epoch": 3.9637033140452393, "grad_norm": 2.1891682147979736, "learning_rate": 1.3120612826518853e-06, "loss": 0.8226, "step": 7535 }, { "epoch": 3.96422935297212, "grad_norm": 2.225503921508789, "learning_rate": 1.311450091290138e-06, "loss": 0.7781, "step": 7536 }, { "epoch": 3.9647553918990006, "grad_norm": 2.262430191040039, "learning_rate": 1.3108389916980675e-06, "loss": 0.811, "step": 7537 }, { "epoch": 3.9652814308258812, "grad_norm": 2.3321921825408936, "learning_rate": 1.3102279839228582e-06, "loss": 0.8486, "step": 7538 }, { "epoch": 3.965807469752762, "grad_norm": 2.339423656463623, "learning_rate": 1.3096170680116865e-06, "loss": 0.7836, "step": 7539 }, { "epoch": 3.966333508679642, "grad_norm": 2.268150806427002, "learning_rate": 1.3090062440117224e-06, "loss": 0.8006, "step": 7540 }, { "epoch": 3.966859547606523, "grad_norm": 2.3235385417938232, "learning_rate": 1.3083955119701292e-06, "loss": 0.8127, "step": 7541 }, { "epoch": 3.9673855865334033, "grad_norm": 2.241469621658325, "learning_rate": 1.3077848719340606e-06, "loss": 0.849, "step": 7542 }, { "epoch": 3.967911625460284, "grad_norm": 2.1989028453826904, "learning_rate": 1.3071743239506669e-06, "loss": 0.8283, "step": 7543 }, { "epoch": 3.9684376643871646, "grad_norm": 2.1967873573303223, "learning_rate": 1.3065638680670879e-06, "loss": 0.8083, "step": 7544 }, { "epoch": 3.9689637033140452, "grad_norm": 2.2629261016845703, "learning_rate": 1.305953504330458e-06, "loss": 0.7892, "step": 7545 }, { "epoch": 3.969489742240926, "grad_norm": 2.353896379470825, "learning_rate": 1.305343232787905e-06, "loss": 0.814, "step": 7546 }, { "epoch": 3.9700157811678065, "grad_norm": 2.105722188949585, "learning_rate": 1.3047330534865481e-06, "loss": 0.7814, "step": 7547 }, { "epoch": 3.970541820094687, "grad_norm": 2.3548481464385986, "learning_rate": 1.3041229664735008e-06, "loss": 0.8157, "step": 7548 }, { "epoch": 3.9710678590215673, "grad_norm": 2.5495996475219727, "learning_rate": 1.3035129717958694e-06, "loss": 0.8185, "step": 7549 }, { "epoch": 3.9715938979484484, "grad_norm": 2.3307886123657227, "learning_rate": 1.3029030695007511e-06, "loss": 0.8516, "step": 7550 }, { "epoch": 3.9721199368753286, "grad_norm": 2.394908905029297, "learning_rate": 1.3022932596352375e-06, "loss": 0.8604, "step": 7551 }, { "epoch": 3.9726459758022092, "grad_norm": 2.085165500640869, "learning_rate": 1.3016835422464135e-06, "loss": 0.7628, "step": 7552 }, { "epoch": 3.97317201472909, "grad_norm": 2.223165512084961, "learning_rate": 1.3010739173813561e-06, "loss": 0.7381, "step": 7553 }, { "epoch": 3.9736980536559705, "grad_norm": 2.2732934951782227, "learning_rate": 1.3004643850871361e-06, "loss": 0.8335, "step": 7554 }, { "epoch": 3.974224092582851, "grad_norm": 2.2556822299957275, "learning_rate": 1.299854945410815e-06, "loss": 0.802, "step": 7555 }, { "epoch": 3.9747501315097318, "grad_norm": 2.248419761657715, "learning_rate": 1.2992455983994497e-06, "loss": 0.8109, "step": 7556 }, { "epoch": 3.9752761704366124, "grad_norm": 2.3674440383911133, "learning_rate": 1.2986363441000874e-06, "loss": 0.8238, "step": 7557 }, { "epoch": 3.975802209363493, "grad_norm": 2.2814927101135254, "learning_rate": 1.2980271825597704e-06, "loss": 0.8069, "step": 7558 }, { "epoch": 3.9763282482903737, "grad_norm": 2.3204357624053955, "learning_rate": 1.2974181138255325e-06, "loss": 0.7985, "step": 7559 }, { "epoch": 3.976854287217254, "grad_norm": 2.4643232822418213, "learning_rate": 1.2968091379444015e-06, "loss": 0.8544, "step": 7560 }, { "epoch": 3.9773803261441345, "grad_norm": 2.1553163528442383, "learning_rate": 1.2962002549633971e-06, "loss": 0.83, "step": 7561 }, { "epoch": 3.977906365071015, "grad_norm": 2.1237564086914062, "learning_rate": 1.2955914649295315e-06, "loss": 0.7595, "step": 7562 }, { "epoch": 3.9784324039978958, "grad_norm": 2.0746617317199707, "learning_rate": 1.2949827678898119e-06, "loss": 0.7709, "step": 7563 }, { "epoch": 3.9789584429247764, "grad_norm": 2.126110792160034, "learning_rate": 1.2943741638912348e-06, "loss": 0.7826, "step": 7564 }, { "epoch": 3.979484481851657, "grad_norm": 2.214700222015381, "learning_rate": 1.2937656529807914e-06, "loss": 0.8352, "step": 7565 }, { "epoch": 3.9800105207785377, "grad_norm": 2.242338180541992, "learning_rate": 1.2931572352054668e-06, "loss": 0.7876, "step": 7566 }, { "epoch": 3.9805365597054183, "grad_norm": 2.2146694660186768, "learning_rate": 1.2925489106122385e-06, "loss": 0.8035, "step": 7567 }, { "epoch": 3.981062598632299, "grad_norm": 2.635045289993286, "learning_rate": 1.2919406792480739e-06, "loss": 0.8476, "step": 7568 }, { "epoch": 3.981588637559179, "grad_norm": 2.1624529361724854, "learning_rate": 1.2913325411599368e-06, "loss": 0.8361, "step": 7569 }, { "epoch": 3.98211467648606, "grad_norm": 2.2142581939697266, "learning_rate": 1.290724496394783e-06, "loss": 0.8407, "step": 7570 }, { "epoch": 3.9826407154129404, "grad_norm": 2.2578125, "learning_rate": 1.290116544999559e-06, "loss": 0.8334, "step": 7571 }, { "epoch": 3.983166754339821, "grad_norm": 2.3576712608337402, "learning_rate": 1.2895086870212059e-06, "loss": 0.8807, "step": 7572 }, { "epoch": 3.9836927932667017, "grad_norm": 2.361445665359497, "learning_rate": 1.288900922506658e-06, "loss": 0.7925, "step": 7573 }, { "epoch": 3.9842188321935823, "grad_norm": 2.1792893409729004, "learning_rate": 1.2882932515028417e-06, "loss": 0.8171, "step": 7574 }, { "epoch": 3.984744871120463, "grad_norm": 2.2261033058166504, "learning_rate": 1.2876856740566757e-06, "loss": 0.8115, "step": 7575 }, { "epoch": 3.9852709100473436, "grad_norm": 2.2835943698883057, "learning_rate": 1.287078190215072e-06, "loss": 0.8256, "step": 7576 }, { "epoch": 3.985796948974224, "grad_norm": 2.21751070022583, "learning_rate": 1.2864708000249367e-06, "loss": 0.8238, "step": 7577 }, { "epoch": 3.986322987901105, "grad_norm": 2.3227896690368652, "learning_rate": 1.2858635035331652e-06, "loss": 0.8536, "step": 7578 }, { "epoch": 3.9868490268279855, "grad_norm": 2.245072603225708, "learning_rate": 1.2852563007866491e-06, "loss": 0.763, "step": 7579 }, { "epoch": 3.9873750657548657, "grad_norm": 2.3611061573028564, "learning_rate": 1.2846491918322706e-06, "loss": 0.8489, "step": 7580 }, { "epoch": 3.9879011046817463, "grad_norm": 2.423137664794922, "learning_rate": 1.2840421767169054e-06, "loss": 0.8897, "step": 7581 }, { "epoch": 3.988427143608627, "grad_norm": 2.2812747955322266, "learning_rate": 1.2834352554874225e-06, "loss": 0.7813, "step": 7582 }, { "epoch": 3.9889531825355076, "grad_norm": 2.2141458988189697, "learning_rate": 1.2828284281906832e-06, "loss": 0.817, "step": 7583 }, { "epoch": 3.989479221462388, "grad_norm": 2.225717544555664, "learning_rate": 1.2822216948735422e-06, "loss": 0.778, "step": 7584 }, { "epoch": 3.990005260389269, "grad_norm": 2.2766008377075195, "learning_rate": 1.2816150555828449e-06, "loss": 0.8149, "step": 7585 }, { "epoch": 3.9905312993161495, "grad_norm": 2.187774419784546, "learning_rate": 1.2810085103654312e-06, "loss": 0.7801, "step": 7586 }, { "epoch": 3.99105733824303, "grad_norm": 2.1737887859344482, "learning_rate": 1.2804020592681332e-06, "loss": 0.8275, "step": 7587 }, { "epoch": 3.9915833771699107, "grad_norm": 2.229017496109009, "learning_rate": 1.2797957023377767e-06, "loss": 0.7792, "step": 7588 }, { "epoch": 3.992109416096791, "grad_norm": 2.2880280017852783, "learning_rate": 1.2791894396211787e-06, "loss": 0.8256, "step": 7589 }, { "epoch": 3.992635455023672, "grad_norm": 2.3405184745788574, "learning_rate": 1.2785832711651508e-06, "loss": 0.8315, "step": 7590 }, { "epoch": 3.993161493950552, "grad_norm": 2.3358678817749023, "learning_rate": 1.2779771970164945e-06, "loss": 0.7649, "step": 7591 }, { "epoch": 3.993687532877433, "grad_norm": 2.3021304607391357, "learning_rate": 1.277371217222007e-06, "loss": 0.8192, "step": 7592 }, { "epoch": 3.9942135718043135, "grad_norm": 2.3820672035217285, "learning_rate": 1.2767653318284756e-06, "loss": 0.7809, "step": 7593 }, { "epoch": 3.994739610731194, "grad_norm": 2.2951278686523438, "learning_rate": 1.2761595408826822e-06, "loss": 0.8306, "step": 7594 }, { "epoch": 3.9952656496580747, "grad_norm": 2.2946736812591553, "learning_rate": 1.2755538444314006e-06, "loss": 0.8004, "step": 7595 }, { "epoch": 3.9957916885849554, "grad_norm": 2.165283679962158, "learning_rate": 1.274948242521398e-06, "loss": 0.7541, "step": 7596 }, { "epoch": 3.996317727511836, "grad_norm": 2.3532254695892334, "learning_rate": 1.2743427351994336e-06, "loss": 0.7933, "step": 7597 }, { "epoch": 3.996843766438716, "grad_norm": 2.1826677322387695, "learning_rate": 1.2737373225122593e-06, "loss": 0.7969, "step": 7598 }, { "epoch": 3.9973698053655973, "grad_norm": 2.2633938789367676, "learning_rate": 1.2731320045066214e-06, "loss": 0.8017, "step": 7599 }, { "epoch": 3.9978958442924775, "grad_norm": 2.306880474090576, "learning_rate": 1.2725267812292547e-06, "loss": 0.8495, "step": 7600 }, { "epoch": 3.998421883219358, "grad_norm": 2.4323019981384277, "learning_rate": 1.271921652726891e-06, "loss": 0.8258, "step": 7601 }, { "epoch": 3.9989479221462387, "grad_norm": 2.2382774353027344, "learning_rate": 1.2713166190462527e-06, "loss": 0.7996, "step": 7602 }, { "epoch": 3.9994739610731194, "grad_norm": 2.2370831966400146, "learning_rate": 1.2707116802340568e-06, "loss": 0.8016, "step": 7603 }, { "epoch": 4.0, "grad_norm": 2.1709256172180176, "learning_rate": 1.2701068363370096e-06, "loss": 0.7971, "step": 7604 }, { "epoch": 4.00052603892688, "grad_norm": 2.2677061557769775, "learning_rate": 1.269502087401812e-06, "loss": 0.7354, "step": 7605 }, { "epoch": 4.001052077853761, "grad_norm": 2.164029121398926, "learning_rate": 1.2688974334751594e-06, "loss": 0.7354, "step": 7606 }, { "epoch": 4.0015781167806415, "grad_norm": 2.2173197269439697, "learning_rate": 1.268292874603736e-06, "loss": 0.7523, "step": 7607 }, { "epoch": 4.0021041557075225, "grad_norm": 2.361772060394287, "learning_rate": 1.2676884108342214e-06, "loss": 0.8284, "step": 7608 }, { "epoch": 4.002630194634403, "grad_norm": 2.095203161239624, "learning_rate": 1.2670840422132867e-06, "loss": 0.7549, "step": 7609 }, { "epoch": 4.003156233561284, "grad_norm": 2.2586822509765625, "learning_rate": 1.2664797687875974e-06, "loss": 0.7454, "step": 7610 }, { "epoch": 4.003682272488164, "grad_norm": 2.2884745597839355, "learning_rate": 1.265875590603809e-06, "loss": 0.7613, "step": 7611 }, { "epoch": 4.004208311415045, "grad_norm": 2.209852457046509, "learning_rate": 1.2652715077085715e-06, "loss": 0.772, "step": 7612 }, { "epoch": 4.004734350341925, "grad_norm": 2.286306142807007, "learning_rate": 1.2646675201485281e-06, "loss": 0.7567, "step": 7613 }, { "epoch": 4.0052603892688055, "grad_norm": 2.468754768371582, "learning_rate": 1.2640636279703116e-06, "loss": 0.7607, "step": 7614 }, { "epoch": 4.0057864281956865, "grad_norm": 2.3818540573120117, "learning_rate": 1.2634598312205504e-06, "loss": 0.7714, "step": 7615 }, { "epoch": 4.006312467122567, "grad_norm": 2.1307313442230225, "learning_rate": 1.2628561299458652e-06, "loss": 0.7527, "step": 7616 }, { "epoch": 4.006838506049448, "grad_norm": 2.265869140625, "learning_rate": 1.2622525241928674e-06, "loss": 0.7003, "step": 7617 }, { "epoch": 4.007364544976328, "grad_norm": 2.196467399597168, "learning_rate": 1.2616490140081623e-06, "loss": 0.7357, "step": 7618 }, { "epoch": 4.007890583903209, "grad_norm": 2.2145307064056396, "learning_rate": 1.2610455994383482e-06, "loss": 0.8, "step": 7619 }, { "epoch": 4.008416622830089, "grad_norm": 2.458064079284668, "learning_rate": 1.260442280530017e-06, "loss": 0.7813, "step": 7620 }, { "epoch": 4.00894266175697, "grad_norm": 2.2400100231170654, "learning_rate": 1.2598390573297492e-06, "loss": 0.7725, "step": 7621 }, { "epoch": 4.0094687006838505, "grad_norm": 2.2360427379608154, "learning_rate": 1.2592359298841223e-06, "loss": 0.7285, "step": 7622 }, { "epoch": 4.009994739610732, "grad_norm": 2.270285129547119, "learning_rate": 1.258632898239704e-06, "loss": 0.7703, "step": 7623 }, { "epoch": 4.010520778537612, "grad_norm": 2.541200637817383, "learning_rate": 1.2580299624430553e-06, "loss": 0.8211, "step": 7624 }, { "epoch": 4.011046817464492, "grad_norm": 2.3808650970458984, "learning_rate": 1.2574271225407303e-06, "loss": 0.7986, "step": 7625 }, { "epoch": 4.011572856391373, "grad_norm": 2.5502123832702637, "learning_rate": 1.2568243785792747e-06, "loss": 0.8013, "step": 7626 }, { "epoch": 4.012098895318253, "grad_norm": 2.2940990924835205, "learning_rate": 1.2562217306052283e-06, "loss": 0.7792, "step": 7627 }, { "epoch": 4.012624934245134, "grad_norm": 2.7758824825286865, "learning_rate": 1.2556191786651212e-06, "loss": 0.7784, "step": 7628 }, { "epoch": 4.0131509731720145, "grad_norm": 2.2750155925750732, "learning_rate": 1.2550167228054768e-06, "loss": 0.7826, "step": 7629 }, { "epoch": 4.013677012098896, "grad_norm": 2.3705849647521973, "learning_rate": 1.2544143630728126e-06, "loss": 0.8375, "step": 7630 }, { "epoch": 4.014203051025776, "grad_norm": 2.4545371532440186, "learning_rate": 1.253812099513637e-06, "loss": 0.7808, "step": 7631 }, { "epoch": 4.014729089952657, "grad_norm": 2.5609824657440186, "learning_rate": 1.2532099321744525e-06, "loss": 0.8108, "step": 7632 }, { "epoch": 4.015255128879537, "grad_norm": 2.3402321338653564, "learning_rate": 1.2526078611017527e-06, "loss": 0.7732, "step": 7633 }, { "epoch": 4.015781167806417, "grad_norm": 2.287108898162842, "learning_rate": 1.252005886342026e-06, "loss": 0.7717, "step": 7634 }, { "epoch": 4.016307206733298, "grad_norm": 2.175177812576294, "learning_rate": 1.2514040079417488e-06, "loss": 0.7591, "step": 7635 }, { "epoch": 4.0168332456601785, "grad_norm": 2.1942832469940186, "learning_rate": 1.250802225947395e-06, "loss": 0.7792, "step": 7636 }, { "epoch": 4.01735928458706, "grad_norm": 2.252453327178955, "learning_rate": 1.2502005404054285e-06, "loss": 0.8258, "step": 7637 }, { "epoch": 4.01788532351394, "grad_norm": 2.524944543838501, "learning_rate": 1.2495989513623065e-06, "loss": 0.7702, "step": 7638 }, { "epoch": 4.018411362440821, "grad_norm": 2.4854495525360107, "learning_rate": 1.2489974588644794e-06, "loss": 0.8109, "step": 7639 }, { "epoch": 4.018937401367701, "grad_norm": 2.1810684204101562, "learning_rate": 1.2483960629583874e-06, "loss": 0.7921, "step": 7640 }, { "epoch": 4.019463440294582, "grad_norm": 2.224194049835205, "learning_rate": 1.2477947636904675e-06, "loss": 0.7958, "step": 7641 }, { "epoch": 4.019989479221462, "grad_norm": 2.252716541290283, "learning_rate": 1.2471935611071445e-06, "loss": 0.7575, "step": 7642 }, { "epoch": 4.020515518148343, "grad_norm": 2.3206005096435547, "learning_rate": 1.246592455254839e-06, "loss": 0.8006, "step": 7643 }, { "epoch": 4.021041557075224, "grad_norm": 2.4978678226470947, "learning_rate": 1.245991446179964e-06, "loss": 0.7952, "step": 7644 }, { "epoch": 4.021567596002104, "grad_norm": 2.294207811355591, "learning_rate": 1.2453905339289235e-06, "loss": 0.8056, "step": 7645 }, { "epoch": 4.022093634928985, "grad_norm": 2.4726476669311523, "learning_rate": 1.2447897185481151e-06, "loss": 0.7383, "step": 7646 }, { "epoch": 4.022619673855865, "grad_norm": 2.293447494506836, "learning_rate": 1.2441890000839285e-06, "loss": 0.7341, "step": 7647 }, { "epoch": 4.023145712782746, "grad_norm": 2.43438982963562, "learning_rate": 1.2435883785827466e-06, "loss": 0.7896, "step": 7648 }, { "epoch": 4.023671751709626, "grad_norm": 2.1829097270965576, "learning_rate": 1.2429878540909446e-06, "loss": 0.7557, "step": 7649 }, { "epoch": 4.024197790636507, "grad_norm": 2.2283999919891357, "learning_rate": 1.242387426654888e-06, "loss": 0.7808, "step": 7650 }, { "epoch": 4.024723829563388, "grad_norm": 2.312063455581665, "learning_rate": 1.241787096320938e-06, "loss": 0.7888, "step": 7651 }, { "epoch": 4.025249868490269, "grad_norm": 2.2763218879699707, "learning_rate": 1.2411868631354475e-06, "loss": 0.7828, "step": 7652 }, { "epoch": 4.025775907417149, "grad_norm": 2.5866873264312744, "learning_rate": 1.24058672714476e-06, "loss": 0.8192, "step": 7653 }, { "epoch": 4.026301946344029, "grad_norm": 2.4546616077423096, "learning_rate": 1.2399866883952135e-06, "loss": 0.8114, "step": 7654 }, { "epoch": 4.02682798527091, "grad_norm": 2.33561110496521, "learning_rate": 1.239386746933138e-06, "loss": 0.8175, "step": 7655 }, { "epoch": 4.02735402419779, "grad_norm": 2.2202131748199463, "learning_rate": 1.2387869028048567e-06, "loss": 0.7821, "step": 7656 }, { "epoch": 4.027880063124671, "grad_norm": 2.2882046699523926, "learning_rate": 1.2381871560566824e-06, "loss": 0.7773, "step": 7657 }, { "epoch": 4.028406102051552, "grad_norm": 2.326249361038208, "learning_rate": 1.2375875067349235e-06, "loss": 0.7548, "step": 7658 }, { "epoch": 4.028932140978433, "grad_norm": 2.229983329772949, "learning_rate": 1.23698795488588e-06, "loss": 0.6997, "step": 7659 }, { "epoch": 4.029458179905313, "grad_norm": 2.2813639640808105, "learning_rate": 1.2363885005558443e-06, "loss": 0.7982, "step": 7660 }, { "epoch": 4.029984218832194, "grad_norm": 2.1359565258026123, "learning_rate": 1.2357891437911004e-06, "loss": 0.7238, "step": 7661 }, { "epoch": 4.030510257759074, "grad_norm": 2.267674207687378, "learning_rate": 1.2351898846379261e-06, "loss": 0.754, "step": 7662 }, { "epoch": 4.031036296685955, "grad_norm": 2.4352166652679443, "learning_rate": 1.2345907231425922e-06, "loss": 0.7732, "step": 7663 }, { "epoch": 4.031562335612835, "grad_norm": 2.1464176177978516, "learning_rate": 1.2339916593513585e-06, "loss": 0.7838, "step": 7664 }, { "epoch": 4.032088374539716, "grad_norm": 2.3910226821899414, "learning_rate": 1.2333926933104814e-06, "loss": 0.7827, "step": 7665 }, { "epoch": 4.032614413466597, "grad_norm": 2.2163467407226562, "learning_rate": 1.2327938250662067e-06, "loss": 0.7415, "step": 7666 }, { "epoch": 4.033140452393477, "grad_norm": 2.319227695465088, "learning_rate": 1.232195054664774e-06, "loss": 0.7796, "step": 7667 }, { "epoch": 4.033666491320358, "grad_norm": 2.3570802211761475, "learning_rate": 1.2315963821524157e-06, "loss": 0.8037, "step": 7668 }, { "epoch": 4.034192530247238, "grad_norm": 2.3532707691192627, "learning_rate": 1.2309978075753564e-06, "loss": 0.8109, "step": 7669 }, { "epoch": 4.034718569174119, "grad_norm": 2.3893723487854004, "learning_rate": 1.2303993309798134e-06, "loss": 0.7942, "step": 7670 }, { "epoch": 4.035244608100999, "grad_norm": 2.5417118072509766, "learning_rate": 1.2298009524119944e-06, "loss": 0.7673, "step": 7671 }, { "epoch": 4.0357706470278805, "grad_norm": 2.3070602416992188, "learning_rate": 1.2292026719181018e-06, "loss": 0.7363, "step": 7672 }, { "epoch": 4.036296685954761, "grad_norm": 2.37534236907959, "learning_rate": 1.22860448954433e-06, "loss": 0.7287, "step": 7673 }, { "epoch": 4.036822724881641, "grad_norm": 2.0827274322509766, "learning_rate": 1.2280064053368652e-06, "loss": 0.6857, "step": 7674 }, { "epoch": 4.037348763808522, "grad_norm": 2.2758076190948486, "learning_rate": 1.2274084193418865e-06, "loss": 0.7696, "step": 7675 }, { "epoch": 4.037874802735402, "grad_norm": 2.472902536392212, "learning_rate": 1.2268105316055661e-06, "loss": 0.7632, "step": 7676 }, { "epoch": 4.038400841662283, "grad_norm": 2.4888157844543457, "learning_rate": 1.2262127421740664e-06, "loss": 0.801, "step": 7677 }, { "epoch": 4.038926880589163, "grad_norm": 2.272630214691162, "learning_rate": 1.2256150510935447e-06, "loss": 0.7807, "step": 7678 }, { "epoch": 4.0394529195160445, "grad_norm": 2.1745412349700928, "learning_rate": 1.2250174584101481e-06, "loss": 0.7285, "step": 7679 }, { "epoch": 4.039978958442925, "grad_norm": 2.2674310207366943, "learning_rate": 1.2244199641700189e-06, "loss": 0.7801, "step": 7680 }, { "epoch": 4.040504997369806, "grad_norm": 2.494136095046997, "learning_rate": 1.22382256841929e-06, "loss": 0.8184, "step": 7681 }, { "epoch": 4.041031036296686, "grad_norm": 2.2131636142730713, "learning_rate": 1.2232252712040874e-06, "loss": 0.7732, "step": 7682 }, { "epoch": 4.041557075223566, "grad_norm": 2.4278650283813477, "learning_rate": 1.2226280725705291e-06, "loss": 0.7584, "step": 7683 }, { "epoch": 4.042083114150447, "grad_norm": 2.298429489135742, "learning_rate": 1.222030972564727e-06, "loss": 0.8278, "step": 7684 }, { "epoch": 4.042609153077327, "grad_norm": 2.516505241394043, "learning_rate": 1.221433971232782e-06, "loss": 0.8023, "step": 7685 }, { "epoch": 4.0431351920042085, "grad_norm": 2.374025344848633, "learning_rate": 1.2208370686207903e-06, "loss": 0.7397, "step": 7686 }, { "epoch": 4.043661230931089, "grad_norm": 2.4452414512634277, "learning_rate": 1.2202402647748396e-06, "loss": 0.7616, "step": 7687 }, { "epoch": 4.04418726985797, "grad_norm": 2.338942289352417, "learning_rate": 1.2196435597410112e-06, "loss": 0.8357, "step": 7688 }, { "epoch": 4.04471330878485, "grad_norm": 2.2871742248535156, "learning_rate": 1.219046953565375e-06, "loss": 0.7945, "step": 7689 }, { "epoch": 4.045239347711731, "grad_norm": 2.5458860397338867, "learning_rate": 1.2184504462939978e-06, "loss": 0.7495, "step": 7690 }, { "epoch": 4.045765386638611, "grad_norm": 2.5625433921813965, "learning_rate": 1.217854037972937e-06, "loss": 0.7942, "step": 7691 }, { "epoch": 4.046291425565492, "grad_norm": 2.6981937885284424, "learning_rate": 1.2172577286482406e-06, "loss": 0.7677, "step": 7692 }, { "epoch": 4.0468174644923725, "grad_norm": 2.3438384532928467, "learning_rate": 1.2166615183659514e-06, "loss": 0.743, "step": 7693 }, { "epoch": 4.047343503419253, "grad_norm": 2.5296034812927246, "learning_rate": 1.2160654071721034e-06, "loss": 0.8039, "step": 7694 }, { "epoch": 4.047869542346134, "grad_norm": 2.486940383911133, "learning_rate": 1.2154693951127237e-06, "loss": 0.7596, "step": 7695 }, { "epoch": 4.048395581273014, "grad_norm": 2.4168357849121094, "learning_rate": 1.2148734822338306e-06, "loss": 0.7545, "step": 7696 }, { "epoch": 4.048921620199895, "grad_norm": 2.291905641555786, "learning_rate": 1.214277668581436e-06, "loss": 0.7972, "step": 7697 }, { "epoch": 4.049447659126775, "grad_norm": 2.211524248123169, "learning_rate": 1.2136819542015435e-06, "loss": 0.7758, "step": 7698 }, { "epoch": 4.049973698053656, "grad_norm": 2.2766170501708984, "learning_rate": 1.2130863391401498e-06, "loss": 0.7456, "step": 7699 }, { "epoch": 4.0504997369805364, "grad_norm": 2.4070966243743896, "learning_rate": 1.2124908234432414e-06, "loss": 0.7581, "step": 7700 }, { "epoch": 4.0510257759074175, "grad_norm": 2.383394241333008, "learning_rate": 1.211895407156801e-06, "loss": 0.7419, "step": 7701 }, { "epoch": 4.051551814834298, "grad_norm": 2.2513961791992188, "learning_rate": 1.2113000903267991e-06, "loss": 0.7322, "step": 7702 }, { "epoch": 4.052077853761178, "grad_norm": 2.2502660751342773, "learning_rate": 1.2107048729992026e-06, "loss": 0.8014, "step": 7703 }, { "epoch": 4.052603892688059, "grad_norm": 2.332552194595337, "learning_rate": 1.2101097552199687e-06, "loss": 0.7736, "step": 7704 }, { "epoch": 4.053129931614939, "grad_norm": 2.487950563430786, "learning_rate": 1.2095147370350477e-06, "loss": 0.7634, "step": 7705 }, { "epoch": 4.05365597054182, "grad_norm": 2.260653495788574, "learning_rate": 1.2089198184903822e-06, "loss": 0.7614, "step": 7706 }, { "epoch": 4.0541820094687, "grad_norm": 2.232595205307007, "learning_rate": 1.208324999631905e-06, "loss": 0.8075, "step": 7707 }, { "epoch": 4.0547080483955815, "grad_norm": 2.4441184997558594, "learning_rate": 1.2077302805055445e-06, "loss": 0.7715, "step": 7708 }, { "epoch": 4.055234087322462, "grad_norm": 2.4055707454681396, "learning_rate": 1.207135661157219e-06, "loss": 0.737, "step": 7709 }, { "epoch": 4.055760126249343, "grad_norm": 2.4069275856018066, "learning_rate": 1.2065411416328404e-06, "loss": 0.765, "step": 7710 }, { "epoch": 4.056286165176223, "grad_norm": 2.3041064739227295, "learning_rate": 1.2059467219783124e-06, "loss": 0.7806, "step": 7711 }, { "epoch": 4.056812204103104, "grad_norm": 2.369340419769287, "learning_rate": 1.2053524022395319e-06, "loss": 0.7813, "step": 7712 }, { "epoch": 4.057338243029984, "grad_norm": 2.487205743789673, "learning_rate": 1.2047581824623851e-06, "loss": 0.768, "step": 7713 }, { "epoch": 4.057864281956864, "grad_norm": 2.470015048980713, "learning_rate": 1.2041640626927547e-06, "loss": 0.7901, "step": 7714 }, { "epoch": 4.0583903208837455, "grad_norm": 2.521562099456787, "learning_rate": 1.2035700429765118e-06, "loss": 0.7861, "step": 7715 }, { "epoch": 4.058916359810626, "grad_norm": 2.480879068374634, "learning_rate": 1.202976123359522e-06, "loss": 0.8034, "step": 7716 }, { "epoch": 4.059442398737507, "grad_norm": 2.2808971405029297, "learning_rate": 1.202382303887643e-06, "loss": 0.7371, "step": 7717 }, { "epoch": 4.059968437664387, "grad_norm": 2.4732885360717773, "learning_rate": 1.2017885846067246e-06, "loss": 0.8269, "step": 7718 }, { "epoch": 4.060494476591268, "grad_norm": 2.2668793201446533, "learning_rate": 1.2011949655626087e-06, "loss": 0.8009, "step": 7719 }, { "epoch": 4.061020515518148, "grad_norm": 2.426351547241211, "learning_rate": 1.20060144680113e-06, "loss": 0.7772, "step": 7720 }, { "epoch": 4.061546554445029, "grad_norm": 2.5291004180908203, "learning_rate": 1.2000080283681138e-06, "loss": 0.8024, "step": 7721 }, { "epoch": 4.0620725933719095, "grad_norm": 2.284125804901123, "learning_rate": 1.1994147103093789e-06, "loss": 0.7959, "step": 7722 }, { "epoch": 4.06259863229879, "grad_norm": 2.5429487228393555, "learning_rate": 1.198821492670737e-06, "loss": 0.7731, "step": 7723 }, { "epoch": 4.063124671225671, "grad_norm": 2.3107640743255615, "learning_rate": 1.198228375497991e-06, "loss": 0.8126, "step": 7724 }, { "epoch": 4.063650710152551, "grad_norm": 2.2157325744628906, "learning_rate": 1.197635358836937e-06, "loss": 0.7424, "step": 7725 }, { "epoch": 4.064176749079432, "grad_norm": 2.404575824737549, "learning_rate": 1.1970424427333615e-06, "loss": 0.7314, "step": 7726 }, { "epoch": 4.064702788006312, "grad_norm": 2.5743021965026855, "learning_rate": 1.1964496272330457e-06, "loss": 0.7539, "step": 7727 }, { "epoch": 4.065228826933193, "grad_norm": 2.383334159851074, "learning_rate": 1.1958569123817601e-06, "loss": 0.7289, "step": 7728 }, { "epoch": 4.0657548658600735, "grad_norm": 2.289358377456665, "learning_rate": 1.19526429822527e-06, "loss": 0.7958, "step": 7729 }, { "epoch": 4.066280904786955, "grad_norm": 2.3746402263641357, "learning_rate": 1.1946717848093316e-06, "loss": 0.7907, "step": 7730 }, { "epoch": 4.066806943713835, "grad_norm": 2.483088493347168, "learning_rate": 1.1940793721796947e-06, "loss": 0.7602, "step": 7731 }, { "epoch": 4.067332982640716, "grad_norm": 2.383753776550293, "learning_rate": 1.1934870603820995e-06, "loss": 0.822, "step": 7732 }, { "epoch": 4.067859021567596, "grad_norm": 2.3820712566375732, "learning_rate": 1.1928948494622793e-06, "loss": 0.7986, "step": 7733 }, { "epoch": 4.068385060494476, "grad_norm": 2.3925375938415527, "learning_rate": 1.1923027394659612e-06, "loss": 0.7875, "step": 7734 }, { "epoch": 4.068911099421357, "grad_norm": 2.750485420227051, "learning_rate": 1.1917107304388604e-06, "loss": 0.7651, "step": 7735 }, { "epoch": 4.0694371383482375, "grad_norm": 2.275725841522217, "learning_rate": 1.1911188224266876e-06, "loss": 0.7952, "step": 7736 }, { "epoch": 4.069963177275119, "grad_norm": 2.174393653869629, "learning_rate": 1.1905270154751452e-06, "loss": 0.754, "step": 7737 }, { "epoch": 4.070489216201999, "grad_norm": 2.3273699283599854, "learning_rate": 1.1899353096299286e-06, "loss": 0.778, "step": 7738 }, { "epoch": 4.07101525512888, "grad_norm": 2.447376251220703, "learning_rate": 1.1893437049367218e-06, "loss": 0.7327, "step": 7739 }, { "epoch": 4.07154129405576, "grad_norm": 2.3246965408325195, "learning_rate": 1.1887522014412052e-06, "loss": 0.7708, "step": 7740 }, { "epoch": 4.072067332982641, "grad_norm": 2.37435245513916, "learning_rate": 1.1881607991890498e-06, "loss": 0.7654, "step": 7741 }, { "epoch": 4.072593371909521, "grad_norm": 2.2915642261505127, "learning_rate": 1.1875694982259172e-06, "loss": 0.7127, "step": 7742 }, { "epoch": 4.0731194108364015, "grad_norm": 2.5350959300994873, "learning_rate": 1.1869782985974634e-06, "loss": 0.7783, "step": 7743 }, { "epoch": 4.073645449763283, "grad_norm": 2.5525081157684326, "learning_rate": 1.186387200349336e-06, "loss": 0.7845, "step": 7744 }, { "epoch": 4.074171488690163, "grad_norm": 2.3718795776367188, "learning_rate": 1.185796203527174e-06, "loss": 0.7883, "step": 7745 }, { "epoch": 4.074697527617044, "grad_norm": 2.3415958881378174, "learning_rate": 1.1852053081766098e-06, "loss": 0.7963, "step": 7746 }, { "epoch": 4.075223566543924, "grad_norm": 2.524775743484497, "learning_rate": 1.1846145143432673e-06, "loss": 0.7904, "step": 7747 }, { "epoch": 4.075749605470805, "grad_norm": 2.3359901905059814, "learning_rate": 1.184023822072763e-06, "loss": 0.7241, "step": 7748 }, { "epoch": 4.076275644397685, "grad_norm": 2.8185975551605225, "learning_rate": 1.1834332314107033e-06, "loss": 0.7303, "step": 7749 }, { "epoch": 4.076801683324566, "grad_norm": 2.3891637325286865, "learning_rate": 1.1828427424026909e-06, "loss": 0.8098, "step": 7750 }, { "epoch": 4.077327722251447, "grad_norm": 2.2731916904449463, "learning_rate": 1.1822523550943158e-06, "loss": 0.7666, "step": 7751 }, { "epoch": 4.077853761178327, "grad_norm": 2.383380174636841, "learning_rate": 1.1816620695311642e-06, "loss": 0.7997, "step": 7752 }, { "epoch": 4.078379800105208, "grad_norm": 2.363997220993042, "learning_rate": 1.1810718857588123e-06, "loss": 0.773, "step": 7753 }, { "epoch": 4.078905839032088, "grad_norm": 2.30194354057312, "learning_rate": 1.18048180382283e-06, "loss": 0.8112, "step": 7754 }, { "epoch": 4.079431877958969, "grad_norm": 2.411508560180664, "learning_rate": 1.1798918237687773e-06, "loss": 0.7772, "step": 7755 }, { "epoch": 4.079957916885849, "grad_norm": 2.4400744438171387, "learning_rate": 1.1793019456422092e-06, "loss": 0.8212, "step": 7756 }, { "epoch": 4.08048395581273, "grad_norm": 2.3287501335144043, "learning_rate": 1.1787121694886686e-06, "loss": 0.7715, "step": 7757 }, { "epoch": 4.081009994739611, "grad_norm": 2.329085111618042, "learning_rate": 1.1781224953536942e-06, "loss": 0.7447, "step": 7758 }, { "epoch": 4.081536033666492, "grad_norm": 2.426537036895752, "learning_rate": 1.1775329232828159e-06, "loss": 0.8114, "step": 7759 }, { "epoch": 4.082062072593372, "grad_norm": 2.6044347286224365, "learning_rate": 1.1769434533215545e-06, "loss": 0.7692, "step": 7760 }, { "epoch": 4.082588111520253, "grad_norm": 2.2448205947875977, "learning_rate": 1.176354085515426e-06, "loss": 0.7704, "step": 7761 }, { "epoch": 4.083114150447133, "grad_norm": 2.430553913116455, "learning_rate": 1.1757648199099334e-06, "loss": 0.7635, "step": 7762 }, { "epoch": 4.083640189374013, "grad_norm": 2.4197356700897217, "learning_rate": 1.1751756565505773e-06, "loss": 0.7774, "step": 7763 }, { "epoch": 4.084166228300894, "grad_norm": 2.310459852218628, "learning_rate": 1.1745865954828455e-06, "loss": 0.8293, "step": 7764 }, { "epoch": 4.084692267227775, "grad_norm": 2.244209051132202, "learning_rate": 1.1739976367522216e-06, "loss": 0.7823, "step": 7765 }, { "epoch": 4.085218306154656, "grad_norm": 2.3155574798583984, "learning_rate": 1.17340878040418e-06, "loss": 0.7783, "step": 7766 }, { "epoch": 4.085744345081536, "grad_norm": 2.362194061279297, "learning_rate": 1.1728200264841865e-06, "loss": 0.7499, "step": 7767 }, { "epoch": 4.086270384008417, "grad_norm": 2.3379502296447754, "learning_rate": 1.1722313750377008e-06, "loss": 0.7446, "step": 7768 }, { "epoch": 4.086796422935297, "grad_norm": 2.399310350418091, "learning_rate": 1.1716428261101724e-06, "loss": 0.7825, "step": 7769 }, { "epoch": 4.087322461862178, "grad_norm": 2.4122979640960693, "learning_rate": 1.1710543797470455e-06, "loss": 0.7839, "step": 7770 }, { "epoch": 4.087848500789058, "grad_norm": 2.354429006576538, "learning_rate": 1.170466035993753e-06, "loss": 0.7737, "step": 7771 }, { "epoch": 4.088374539715939, "grad_norm": 2.311234474182129, "learning_rate": 1.1698777948957229e-06, "loss": 0.8049, "step": 7772 }, { "epoch": 4.08890057864282, "grad_norm": 2.706399917602539, "learning_rate": 1.1692896564983738e-06, "loss": 0.7508, "step": 7773 }, { "epoch": 4.0894266175697, "grad_norm": 2.4095563888549805, "learning_rate": 1.1687016208471177e-06, "loss": 0.7995, "step": 7774 }, { "epoch": 4.089952656496581, "grad_norm": 2.3296358585357666, "learning_rate": 1.1681136879873561e-06, "loss": 0.7541, "step": 7775 }, { "epoch": 4.090478695423461, "grad_norm": 2.4263782501220703, "learning_rate": 1.1675258579644846e-06, "loss": 0.7482, "step": 7776 }, { "epoch": 4.091004734350342, "grad_norm": 2.306771993637085, "learning_rate": 1.166938130823892e-06, "loss": 0.8002, "step": 7777 }, { "epoch": 4.091530773277222, "grad_norm": 2.297332763671875, "learning_rate": 1.1663505066109553e-06, "loss": 0.7339, "step": 7778 }, { "epoch": 4.0920568122041034, "grad_norm": 2.3062474727630615, "learning_rate": 1.165762985371047e-06, "loss": 0.7974, "step": 7779 }, { "epoch": 4.092582851130984, "grad_norm": 2.3618736267089844, "learning_rate": 1.16517556714953e-06, "loss": 0.7773, "step": 7780 }, { "epoch": 4.093108890057865, "grad_norm": 2.439612627029419, "learning_rate": 1.16458825199176e-06, "loss": 0.7639, "step": 7781 }, { "epoch": 4.093634928984745, "grad_norm": 2.4381349086761475, "learning_rate": 1.1640010399430848e-06, "loss": 0.8299, "step": 7782 }, { "epoch": 4.094160967911625, "grad_norm": 2.3514621257781982, "learning_rate": 1.1634139310488435e-06, "loss": 0.7615, "step": 7783 }, { "epoch": 4.094687006838506, "grad_norm": 2.6106362342834473, "learning_rate": 1.1628269253543686e-06, "loss": 0.7822, "step": 7784 }, { "epoch": 4.095213045765386, "grad_norm": 2.499948263168335, "learning_rate": 1.1622400229049818e-06, "loss": 0.7796, "step": 7785 }, { "epoch": 4.095739084692267, "grad_norm": 2.460099697113037, "learning_rate": 1.1616532237459998e-06, "loss": 0.8061, "step": 7786 }, { "epoch": 4.096265123619148, "grad_norm": 2.6206612586975098, "learning_rate": 1.161066527922731e-06, "loss": 0.8141, "step": 7787 }, { "epoch": 4.096791162546029, "grad_norm": 2.399508476257324, "learning_rate": 1.1604799354804732e-06, "loss": 0.7328, "step": 7788 }, { "epoch": 4.097317201472909, "grad_norm": 2.327446460723877, "learning_rate": 1.159893446464519e-06, "loss": 0.7909, "step": 7789 }, { "epoch": 4.09784324039979, "grad_norm": 2.298809289932251, "learning_rate": 1.159307060920152e-06, "loss": 0.8012, "step": 7790 }, { "epoch": 4.09836927932667, "grad_norm": 2.6483848094940186, "learning_rate": 1.158720778892649e-06, "loss": 0.7701, "step": 7791 }, { "epoch": 4.09889531825355, "grad_norm": 2.2934787273406982, "learning_rate": 1.158134600427276e-06, "loss": 0.7322, "step": 7792 }, { "epoch": 4.099421357180431, "grad_norm": 2.320493459701538, "learning_rate": 1.157548525569293e-06, "loss": 0.7261, "step": 7793 }, { "epoch": 4.099947396107312, "grad_norm": 4.660049915313721, "learning_rate": 1.1569625543639523e-06, "loss": 0.7763, "step": 7794 }, { "epoch": 4.100473435034193, "grad_norm": 2.407609224319458, "learning_rate": 1.1563766868564975e-06, "loss": 0.7651, "step": 7795 }, { "epoch": 4.100999473961073, "grad_norm": 2.374664545059204, "learning_rate": 1.1557909230921638e-06, "loss": 0.7993, "step": 7796 }, { "epoch": 4.101525512887954, "grad_norm": 2.365990400314331, "learning_rate": 1.1552052631161791e-06, "loss": 0.7823, "step": 7797 }, { "epoch": 4.102051551814834, "grad_norm": 2.660829544067383, "learning_rate": 1.1546197069737648e-06, "loss": 0.7743, "step": 7798 }, { "epoch": 4.102577590741715, "grad_norm": 2.6114392280578613, "learning_rate": 1.1540342547101294e-06, "loss": 0.8279, "step": 7799 }, { "epoch": 4.103103629668595, "grad_norm": 2.3368947505950928, "learning_rate": 1.153448906370479e-06, "loss": 0.7623, "step": 7800 }, { "epoch": 4.103629668595476, "grad_norm": 2.4617700576782227, "learning_rate": 1.1528636620000075e-06, "loss": 0.7709, "step": 7801 }, { "epoch": 4.104155707522357, "grad_norm": 2.2839865684509277, "learning_rate": 1.1522785216439033e-06, "loss": 0.7552, "step": 7802 }, { "epoch": 4.104681746449237, "grad_norm": 2.2530434131622314, "learning_rate": 1.1516934853473457e-06, "loss": 0.8288, "step": 7803 }, { "epoch": 4.105207785376118, "grad_norm": 2.7049949169158936, "learning_rate": 1.1511085531555067e-06, "loss": 0.7841, "step": 7804 }, { "epoch": 4.105733824302998, "grad_norm": 2.334717273712158, "learning_rate": 1.1505237251135492e-06, "loss": 0.7472, "step": 7805 }, { "epoch": 4.106259863229879, "grad_norm": 2.3946545124053955, "learning_rate": 1.1499390012666298e-06, "loss": 0.7838, "step": 7806 }, { "epoch": 4.106785902156759, "grad_norm": 2.527585506439209, "learning_rate": 1.149354381659894e-06, "loss": 0.8067, "step": 7807 }, { "epoch": 4.1073119410836405, "grad_norm": 2.3792295455932617, "learning_rate": 1.1487698663384823e-06, "loss": 0.7719, "step": 7808 }, { "epoch": 4.107837980010521, "grad_norm": 2.401487112045288, "learning_rate": 1.1481854553475255e-06, "loss": 0.7861, "step": 7809 }, { "epoch": 4.108364018937402, "grad_norm": 4.616043567657471, "learning_rate": 1.1476011487321483e-06, "loss": 0.7999, "step": 7810 }, { "epoch": 4.108890057864282, "grad_norm": 2.420893907546997, "learning_rate": 1.1470169465374636e-06, "loss": 0.7844, "step": 7811 }, { "epoch": 4.109416096791162, "grad_norm": 2.3621766567230225, "learning_rate": 1.1464328488085799e-06, "loss": 0.7626, "step": 7812 }, { "epoch": 4.109942135718043, "grad_norm": 2.370089530944824, "learning_rate": 1.1458488555905965e-06, "loss": 0.7712, "step": 7813 }, { "epoch": 4.110468174644923, "grad_norm": 2.3359615802764893, "learning_rate": 1.1452649669286031e-06, "loss": 0.7684, "step": 7814 }, { "epoch": 4.1109942135718045, "grad_norm": 2.312797784805298, "learning_rate": 1.1446811828676831e-06, "loss": 0.7363, "step": 7815 }, { "epoch": 4.111520252498685, "grad_norm": 2.5029871463775635, "learning_rate": 1.1440975034529116e-06, "loss": 0.7871, "step": 7816 }, { "epoch": 4.112046291425566, "grad_norm": 2.3244025707244873, "learning_rate": 1.1435139287293554e-06, "loss": 0.7578, "step": 7817 }, { "epoch": 4.112572330352446, "grad_norm": 2.322390079498291, "learning_rate": 1.142930458742073e-06, "loss": 0.7464, "step": 7818 }, { "epoch": 4.113098369279327, "grad_norm": 2.3366541862487793, "learning_rate": 1.1423470935361144e-06, "loss": 0.7242, "step": 7819 }, { "epoch": 4.113624408206207, "grad_norm": 2.3734798431396484, "learning_rate": 1.1417638331565243e-06, "loss": 0.7637, "step": 7820 }, { "epoch": 4.114150447133087, "grad_norm": 2.4886093139648438, "learning_rate": 1.1411806776483341e-06, "loss": 0.7899, "step": 7821 }, { "epoch": 4.1146764860599685, "grad_norm": 2.3302865028381348, "learning_rate": 1.1405976270565715e-06, "loss": 0.7668, "step": 7822 }, { "epoch": 4.115202524986849, "grad_norm": 2.523188829421997, "learning_rate": 1.1400146814262559e-06, "loss": 0.7895, "step": 7823 }, { "epoch": 4.11572856391373, "grad_norm": 2.2746338844299316, "learning_rate": 1.1394318408023949e-06, "loss": 0.715, "step": 7824 }, { "epoch": 4.11625460284061, "grad_norm": 2.451643228530884, "learning_rate": 1.1388491052299918e-06, "loss": 0.7605, "step": 7825 }, { "epoch": 4.116780641767491, "grad_norm": 2.5565998554229736, "learning_rate": 1.1382664747540403e-06, "loss": 0.8328, "step": 7826 }, { "epoch": 4.117306680694371, "grad_norm": 2.3871402740478516, "learning_rate": 1.1376839494195274e-06, "loss": 0.7604, "step": 7827 }, { "epoch": 4.117832719621252, "grad_norm": 2.228292942047119, "learning_rate": 1.1371015292714283e-06, "loss": 0.7949, "step": 7828 }, { "epoch": 4.1183587585481325, "grad_norm": 2.357185125350952, "learning_rate": 1.1365192143547138e-06, "loss": 0.7746, "step": 7829 }, { "epoch": 4.118884797475014, "grad_norm": 2.3699934482574463, "learning_rate": 1.1359370047143452e-06, "loss": 0.7592, "step": 7830 }, { "epoch": 4.119410836401894, "grad_norm": 2.3717217445373535, "learning_rate": 1.135354900395276e-06, "loss": 0.7699, "step": 7831 }, { "epoch": 4.119936875328774, "grad_norm": 2.2494242191314697, "learning_rate": 1.134772901442451e-06, "loss": 0.7685, "step": 7832 }, { "epoch": 4.120462914255655, "grad_norm": 2.207186460494995, "learning_rate": 1.1341910079008076e-06, "loss": 0.7913, "step": 7833 }, { "epoch": 4.120988953182535, "grad_norm": 2.65362548828125, "learning_rate": 1.133609219815275e-06, "loss": 0.7711, "step": 7834 }, { "epoch": 4.121514992109416, "grad_norm": 2.525789499282837, "learning_rate": 1.1330275372307725e-06, "loss": 0.8181, "step": 7835 }, { "epoch": 4.1220410310362965, "grad_norm": 2.372035026550293, "learning_rate": 1.1324459601922142e-06, "loss": 0.7846, "step": 7836 }, { "epoch": 4.122567069963178, "grad_norm": 2.4437873363494873, "learning_rate": 1.1318644887445028e-06, "loss": 0.7584, "step": 7837 }, { "epoch": 4.123093108890058, "grad_norm": 2.370492696762085, "learning_rate": 1.1312831229325357e-06, "loss": 0.8236, "step": 7838 }, { "epoch": 4.123619147816939, "grad_norm": 2.4467103481292725, "learning_rate": 1.1307018628012009e-06, "loss": 0.7321, "step": 7839 }, { "epoch": 4.124145186743819, "grad_norm": 2.547960042953491, "learning_rate": 1.130120708395378e-06, "loss": 0.7682, "step": 7840 }, { "epoch": 4.124671225670699, "grad_norm": 2.596667766571045, "learning_rate": 1.12953965975994e-06, "loss": 0.7775, "step": 7841 }, { "epoch": 4.12519726459758, "grad_norm": 2.547262191772461, "learning_rate": 1.128958716939749e-06, "loss": 0.7373, "step": 7842 }, { "epoch": 4.1257233035244605, "grad_norm": 2.3187062740325928, "learning_rate": 1.1283778799796607e-06, "loss": 0.7692, "step": 7843 }, { "epoch": 4.126249342451342, "grad_norm": 2.3911385536193848, "learning_rate": 1.1277971489245225e-06, "loss": 0.7638, "step": 7844 }, { "epoch": 4.126775381378222, "grad_norm": 2.4308583736419678, "learning_rate": 1.127216523819174e-06, "loss": 0.7895, "step": 7845 }, { "epoch": 4.127301420305103, "grad_norm": 2.3666539192199707, "learning_rate": 1.1266360047084457e-06, "loss": 0.7683, "step": 7846 }, { "epoch": 4.127827459231983, "grad_norm": 2.378697633743286, "learning_rate": 1.1260555916371615e-06, "loss": 0.775, "step": 7847 }, { "epoch": 4.128353498158864, "grad_norm": 2.344855785369873, "learning_rate": 1.1254752846501338e-06, "loss": 0.7025, "step": 7848 }, { "epoch": 4.128879537085744, "grad_norm": 2.4904074668884277, "learning_rate": 1.124895083792171e-06, "loss": 0.7821, "step": 7849 }, { "epoch": 4.1294055760126245, "grad_norm": 2.2896533012390137, "learning_rate": 1.124314989108069e-06, "loss": 0.8011, "step": 7850 }, { "epoch": 4.129931614939506, "grad_norm": 2.284029245376587, "learning_rate": 1.1237350006426196e-06, "loss": 0.7775, "step": 7851 }, { "epoch": 4.130457653866386, "grad_norm": 2.236741542816162, "learning_rate": 1.1231551184406034e-06, "loss": 0.7454, "step": 7852 }, { "epoch": 4.130983692793267, "grad_norm": 2.1616973876953125, "learning_rate": 1.1225753425467951e-06, "loss": 0.6832, "step": 7853 }, { "epoch": 4.131509731720147, "grad_norm": 2.390833616256714, "learning_rate": 1.121995673005959e-06, "loss": 0.7554, "step": 7854 }, { "epoch": 4.132035770647028, "grad_norm": 2.5568339824676514, "learning_rate": 1.1214161098628528e-06, "loss": 0.8129, "step": 7855 }, { "epoch": 4.132561809573908, "grad_norm": 2.4395909309387207, "learning_rate": 1.1208366531622263e-06, "loss": 0.7695, "step": 7856 }, { "epoch": 4.133087848500789, "grad_norm": 2.439779281616211, "learning_rate": 1.1202573029488188e-06, "loss": 0.8002, "step": 7857 }, { "epoch": 4.1336138874276696, "grad_norm": 2.5206124782562256, "learning_rate": 1.1196780592673625e-06, "loss": 0.7882, "step": 7858 }, { "epoch": 4.134139926354551, "grad_norm": 2.379295587539673, "learning_rate": 1.1190989221625836e-06, "loss": 0.7885, "step": 7859 }, { "epoch": 4.134665965281431, "grad_norm": 2.415818691253662, "learning_rate": 1.1185198916791961e-06, "loss": 0.7737, "step": 7860 }, { "epoch": 4.135192004208311, "grad_norm": 2.4857852458953857, "learning_rate": 1.1179409678619085e-06, "loss": 0.784, "step": 7861 }, { "epoch": 4.135718043135192, "grad_norm": 2.301772356033325, "learning_rate": 1.1173621507554202e-06, "loss": 0.7705, "step": 7862 }, { "epoch": 4.136244082062072, "grad_norm": 2.5830729007720947, "learning_rate": 1.1167834404044238e-06, "loss": 0.7895, "step": 7863 }, { "epoch": 4.136770120988953, "grad_norm": 2.3910720348358154, "learning_rate": 1.1162048368536004e-06, "loss": 0.7474, "step": 7864 }, { "epoch": 4.1372961599158335, "grad_norm": 2.245880365371704, "learning_rate": 1.1156263401476258e-06, "loss": 0.7888, "step": 7865 }, { "epoch": 4.137822198842715, "grad_norm": 2.3316280841827393, "learning_rate": 1.1150479503311668e-06, "loss": 0.8011, "step": 7866 }, { "epoch": 4.138348237769595, "grad_norm": 2.4312551021575928, "learning_rate": 1.1144696674488811e-06, "loss": 0.7623, "step": 7867 }, { "epoch": 4.138874276696476, "grad_norm": 2.4251980781555176, "learning_rate": 1.1138914915454191e-06, "loss": 0.8089, "step": 7868 }, { "epoch": 4.139400315623356, "grad_norm": 2.2556493282318115, "learning_rate": 1.1133134226654227e-06, "loss": 0.7827, "step": 7869 }, { "epoch": 4.139926354550237, "grad_norm": 2.381201982498169, "learning_rate": 1.1127354608535261e-06, "loss": 0.7766, "step": 7870 }, { "epoch": 4.140452393477117, "grad_norm": 2.4276113510131836, "learning_rate": 1.1121576061543532e-06, "loss": 0.8002, "step": 7871 }, { "epoch": 4.1409784324039975, "grad_norm": 2.4709784984588623, "learning_rate": 1.1115798586125226e-06, "loss": 0.7749, "step": 7872 }, { "epoch": 4.141504471330879, "grad_norm": 2.306170701980591, "learning_rate": 1.1110022182726409e-06, "loss": 0.7709, "step": 7873 }, { "epoch": 4.142030510257759, "grad_norm": 2.57257080078125, "learning_rate": 1.11042468517931e-06, "loss": 0.7682, "step": 7874 }, { "epoch": 4.14255654918464, "grad_norm": 2.347698450088501, "learning_rate": 1.1098472593771217e-06, "loss": 0.7591, "step": 7875 }, { "epoch": 4.14308258811152, "grad_norm": 2.506532907485962, "learning_rate": 1.10926994091066e-06, "loss": 0.7371, "step": 7876 }, { "epoch": 4.143608627038401, "grad_norm": 2.384580612182617, "learning_rate": 1.1086927298245015e-06, "loss": 0.778, "step": 7877 }, { "epoch": 4.144134665965281, "grad_norm": 2.3241424560546875, "learning_rate": 1.1081156261632114e-06, "loss": 0.7307, "step": 7878 }, { "epoch": 4.144660704892162, "grad_norm": 2.4754321575164795, "learning_rate": 1.10753862997135e-06, "loss": 0.7558, "step": 7879 }, { "epoch": 4.145186743819043, "grad_norm": 2.362301826477051, "learning_rate": 1.1069617412934677e-06, "loss": 0.7382, "step": 7880 }, { "epoch": 4.145712782745923, "grad_norm": 2.3099799156188965, "learning_rate": 1.1063849601741072e-06, "loss": 0.7967, "step": 7881 }, { "epoch": 4.146238821672804, "grad_norm": 2.604663848876953, "learning_rate": 1.1058082866578027e-06, "loss": 0.8312, "step": 7882 }, { "epoch": 4.146764860599684, "grad_norm": 2.350358486175537, "learning_rate": 1.1052317207890803e-06, "loss": 0.7844, "step": 7883 }, { "epoch": 4.147290899526565, "grad_norm": 2.424131393432617, "learning_rate": 1.1046552626124566e-06, "loss": 0.7818, "step": 7884 }, { "epoch": 4.147816938453445, "grad_norm": 2.420217514038086, "learning_rate": 1.1040789121724416e-06, "loss": 0.7898, "step": 7885 }, { "epoch": 4.148342977380326, "grad_norm": 2.4611406326293945, "learning_rate": 1.103502669513535e-06, "loss": 0.7721, "step": 7886 }, { "epoch": 4.148869016307207, "grad_norm": 2.424072504043579, "learning_rate": 1.10292653468023e-06, "loss": 0.7533, "step": 7887 }, { "epoch": 4.149395055234088, "grad_norm": 2.274604320526123, "learning_rate": 1.102350507717011e-06, "loss": 0.7617, "step": 7888 }, { "epoch": 4.149921094160968, "grad_norm": 2.3314974308013916, "learning_rate": 1.1017745886683536e-06, "loss": 0.8066, "step": 7889 }, { "epoch": 4.150447133087848, "grad_norm": 2.3243188858032227, "learning_rate": 1.1011987775787256e-06, "loss": 0.7583, "step": 7890 }, { "epoch": 4.150973172014729, "grad_norm": 2.4795353412628174, "learning_rate": 1.1006230744925873e-06, "loss": 0.7985, "step": 7891 }, { "epoch": 4.151499210941609, "grad_norm": 2.4273805618286133, "learning_rate": 1.1000474794543872e-06, "loss": 0.7497, "step": 7892 }, { "epoch": 4.15202524986849, "grad_norm": 2.360260009765625, "learning_rate": 1.099471992508569e-06, "loss": 0.7682, "step": 7893 }, { "epoch": 4.152551288795371, "grad_norm": 2.533961296081543, "learning_rate": 1.0988966136995671e-06, "loss": 0.7655, "step": 7894 }, { "epoch": 4.153077327722252, "grad_norm": 2.443802833557129, "learning_rate": 1.098321343071807e-06, "loss": 0.7426, "step": 7895 }, { "epoch": 4.153603366649132, "grad_norm": 2.281114339828491, "learning_rate": 1.0977461806697076e-06, "loss": 0.6986, "step": 7896 }, { "epoch": 4.154129405576013, "grad_norm": 2.374666213989258, "learning_rate": 1.0971711265376755e-06, "loss": 0.7227, "step": 7897 }, { "epoch": 4.154655444502893, "grad_norm": 2.452606439590454, "learning_rate": 1.0965961807201139e-06, "loss": 0.8167, "step": 7898 }, { "epoch": 4.155181483429774, "grad_norm": 2.872177839279175, "learning_rate": 1.096021343261413e-06, "loss": 0.8082, "step": 7899 }, { "epoch": 4.155707522356654, "grad_norm": 2.452517509460449, "learning_rate": 1.095446614205958e-06, "loss": 0.8212, "step": 7900 }, { "epoch": 4.156233561283535, "grad_norm": 2.414577007293701, "learning_rate": 1.0948719935981247e-06, "loss": 0.7953, "step": 7901 }, { "epoch": 4.156759600210416, "grad_norm": 2.5075767040252686, "learning_rate": 1.09429748148228e-06, "loss": 0.771, "step": 7902 }, { "epoch": 4.157285639137296, "grad_norm": 2.2697105407714844, "learning_rate": 1.093723077902783e-06, "loss": 0.7517, "step": 7903 }, { "epoch": 4.157811678064177, "grad_norm": 2.398197650909424, "learning_rate": 1.0931487829039842e-06, "loss": 0.7648, "step": 7904 }, { "epoch": 4.158337716991057, "grad_norm": 2.419999122619629, "learning_rate": 1.0925745965302268e-06, "loss": 0.8138, "step": 7905 }, { "epoch": 4.158863755917938, "grad_norm": 2.4904654026031494, "learning_rate": 1.0920005188258426e-06, "loss": 0.7886, "step": 7906 }, { "epoch": 4.159389794844818, "grad_norm": 2.4547338485717773, "learning_rate": 1.0914265498351579e-06, "loss": 0.8398, "step": 7907 }, { "epoch": 4.1599158337716995, "grad_norm": 2.350738286972046, "learning_rate": 1.0908526896024899e-06, "loss": 0.8125, "step": 7908 }, { "epoch": 4.16044187269858, "grad_norm": 2.332642078399658, "learning_rate": 1.0902789381721476e-06, "loss": 0.7862, "step": 7909 }, { "epoch": 4.16096791162546, "grad_norm": 2.2313072681427, "learning_rate": 1.08970529558843e-06, "loss": 0.7607, "step": 7910 }, { "epoch": 4.161493950552341, "grad_norm": 2.4067602157592773, "learning_rate": 1.0891317618956296e-06, "loss": 0.771, "step": 7911 }, { "epoch": 4.162019989479221, "grad_norm": 2.260493040084839, "learning_rate": 1.0885583371380293e-06, "loss": 0.7482, "step": 7912 }, { "epoch": 4.162546028406102, "grad_norm": 2.312547445297241, "learning_rate": 1.0879850213599056e-06, "loss": 0.7386, "step": 7913 }, { "epoch": 4.163072067332982, "grad_norm": 2.2713053226470947, "learning_rate": 1.087411814605523e-06, "loss": 0.7564, "step": 7914 }, { "epoch": 4.1635981062598635, "grad_norm": 2.294029712677002, "learning_rate": 1.0868387169191402e-06, "loss": 0.7603, "step": 7915 }, { "epoch": 4.164124145186744, "grad_norm": 2.2648332118988037, "learning_rate": 1.0862657283450076e-06, "loss": 0.7827, "step": 7916 }, { "epoch": 4.164650184113625, "grad_norm": 2.401078939437866, "learning_rate": 1.0856928489273657e-06, "loss": 0.7813, "step": 7917 }, { "epoch": 4.165176223040505, "grad_norm": 2.2467682361602783, "learning_rate": 1.0851200787104482e-06, "loss": 0.8034, "step": 7918 }, { "epoch": 4.165702261967386, "grad_norm": 2.5552680492401123, "learning_rate": 1.0845474177384799e-06, "loss": 0.7931, "step": 7919 }, { "epoch": 4.166228300894266, "grad_norm": 2.5184807777404785, "learning_rate": 1.0839748660556748e-06, "loss": 0.8015, "step": 7920 }, { "epoch": 4.166754339821146, "grad_norm": 2.388664722442627, "learning_rate": 1.0834024237062426e-06, "loss": 0.7559, "step": 7921 }, { "epoch": 4.1672803787480275, "grad_norm": 2.360314130783081, "learning_rate": 1.0828300907343809e-06, "loss": 0.7719, "step": 7922 }, { "epoch": 4.167806417674908, "grad_norm": 2.491074562072754, "learning_rate": 1.0822578671842806e-06, "loss": 0.8234, "step": 7923 }, { "epoch": 4.168332456601789, "grad_norm": 2.279381275177002, "learning_rate": 1.0816857531001242e-06, "loss": 0.7229, "step": 7924 }, { "epoch": 4.168858495528669, "grad_norm": 2.55958890914917, "learning_rate": 1.0811137485260858e-06, "loss": 0.8091, "step": 7925 }, { "epoch": 4.16938453445555, "grad_norm": 2.462554693222046, "learning_rate": 1.0805418535063302e-06, "loss": 0.7192, "step": 7926 }, { "epoch": 4.16991057338243, "grad_norm": 2.649787664413452, "learning_rate": 1.0799700680850155e-06, "loss": 0.7685, "step": 7927 }, { "epoch": 4.170436612309311, "grad_norm": 2.303407669067383, "learning_rate": 1.079398392306288e-06, "loss": 0.7677, "step": 7928 }, { "epoch": 4.1709626512361915, "grad_norm": 2.347344398498535, "learning_rate": 1.0788268262142886e-06, "loss": 0.7768, "step": 7929 }, { "epoch": 4.171488690163072, "grad_norm": 2.3688859939575195, "learning_rate": 1.0782553698531488e-06, "loss": 0.7442, "step": 7930 }, { "epoch": 4.172014729089953, "grad_norm": 2.2453911304473877, "learning_rate": 1.0776840232669918e-06, "loss": 0.7456, "step": 7931 }, { "epoch": 4.172540768016833, "grad_norm": 2.403312921524048, "learning_rate": 1.077112786499933e-06, "loss": 0.7922, "step": 7932 }, { "epoch": 4.173066806943714, "grad_norm": 2.383988380432129, "learning_rate": 1.0765416595960762e-06, "loss": 0.7593, "step": 7933 }, { "epoch": 4.173592845870594, "grad_norm": 2.637141466140747, "learning_rate": 1.075970642599521e-06, "loss": 0.7957, "step": 7934 }, { "epoch": 4.174118884797475, "grad_norm": 3.780214309692383, "learning_rate": 1.0753997355543546e-06, "loss": 0.7611, "step": 7935 }, { "epoch": 4.1746449237243555, "grad_norm": 2.4713363647460938, "learning_rate": 1.0748289385046584e-06, "loss": 0.7434, "step": 7936 }, { "epoch": 4.1751709626512366, "grad_norm": 2.498781442642212, "learning_rate": 1.074258251494505e-06, "loss": 0.7775, "step": 7937 }, { "epoch": 4.175697001578117, "grad_norm": 2.463411569595337, "learning_rate": 1.0736876745679571e-06, "loss": 0.783, "step": 7938 }, { "epoch": 4.176223040504997, "grad_norm": 2.3426997661590576, "learning_rate": 1.0731172077690705e-06, "loss": 0.7625, "step": 7939 }, { "epoch": 4.176749079431878, "grad_norm": 2.374189615249634, "learning_rate": 1.0725468511418913e-06, "loss": 0.8117, "step": 7940 }, { "epoch": 4.177275118358758, "grad_norm": 2.583139419555664, "learning_rate": 1.0719766047304589e-06, "loss": 0.8155, "step": 7941 }, { "epoch": 4.177801157285639, "grad_norm": 2.43023943901062, "learning_rate": 1.0714064685788009e-06, "loss": 0.7717, "step": 7942 }, { "epoch": 4.1783271962125195, "grad_norm": 2.352152109146118, "learning_rate": 1.0708364427309389e-06, "loss": 0.7424, "step": 7943 }, { "epoch": 4.1788532351394005, "grad_norm": 2.413094997406006, "learning_rate": 1.070266527230886e-06, "loss": 0.8008, "step": 7944 }, { "epoch": 4.179379274066281, "grad_norm": 2.340256929397583, "learning_rate": 1.0696967221226468e-06, "loss": 0.7939, "step": 7945 }, { "epoch": 4.179905312993162, "grad_norm": 2.409353256225586, "learning_rate": 1.0691270274502147e-06, "loss": 0.7262, "step": 7946 }, { "epoch": 4.180431351920042, "grad_norm": 2.3177199363708496, "learning_rate": 1.0685574432575782e-06, "loss": 0.7322, "step": 7947 }, { "epoch": 4.180957390846923, "grad_norm": 2.4170241355895996, "learning_rate": 1.0679879695887163e-06, "loss": 0.7952, "step": 7948 }, { "epoch": 4.181483429773803, "grad_norm": 2.556642532348633, "learning_rate": 1.067418606487597e-06, "loss": 0.8697, "step": 7949 }, { "epoch": 4.1820094687006835, "grad_norm": 2.2710747718811035, "learning_rate": 1.0668493539981825e-06, "loss": 0.7786, "step": 7950 }, { "epoch": 4.1825355076275645, "grad_norm": 2.4677250385284424, "learning_rate": 1.0662802121644261e-06, "loss": 0.7573, "step": 7951 }, { "epoch": 4.183061546554445, "grad_norm": 2.568702459335327, "learning_rate": 1.0657111810302717e-06, "loss": 0.7722, "step": 7952 }, { "epoch": 4.183587585481326, "grad_norm": 2.448092460632324, "learning_rate": 1.0651422606396549e-06, "loss": 0.8232, "step": 7953 }, { "epoch": 4.184113624408206, "grad_norm": 2.6507174968719482, "learning_rate": 1.064573451036503e-06, "loss": 0.834, "step": 7954 }, { "epoch": 4.184639663335087, "grad_norm": 2.2621328830718994, "learning_rate": 1.0640047522647359e-06, "loss": 0.7861, "step": 7955 }, { "epoch": 4.185165702261967, "grad_norm": 2.207615375518799, "learning_rate": 1.063436164368261e-06, "loss": 0.7666, "step": 7956 }, { "epoch": 4.185691741188848, "grad_norm": 2.4889283180236816, "learning_rate": 1.0628676873909817e-06, "loss": 0.7464, "step": 7957 }, { "epoch": 4.1862177801157285, "grad_norm": 2.435574769973755, "learning_rate": 1.0622993213767913e-06, "loss": 0.7693, "step": 7958 }, { "epoch": 4.186743819042609, "grad_norm": 2.460252046585083, "learning_rate": 1.0617310663695727e-06, "loss": 0.7987, "step": 7959 }, { "epoch": 4.18726985796949, "grad_norm": 2.3807854652404785, "learning_rate": 1.061162922413202e-06, "loss": 0.8032, "step": 7960 }, { "epoch": 4.18779589689637, "grad_norm": 2.3596653938293457, "learning_rate": 1.0605948895515466e-06, "loss": 0.8245, "step": 7961 }, { "epoch": 4.188321935823251, "grad_norm": 2.4039359092712402, "learning_rate": 1.0600269678284655e-06, "loss": 0.7857, "step": 7962 }, { "epoch": 4.188847974750131, "grad_norm": 2.512507915496826, "learning_rate": 1.0594591572878097e-06, "loss": 0.8058, "step": 7963 }, { "epoch": 4.189374013677012, "grad_norm": 2.335369348526001, "learning_rate": 1.0588914579734185e-06, "loss": 0.7755, "step": 7964 }, { "epoch": 4.1899000526038925, "grad_norm": 2.4964404106140137, "learning_rate": 1.0583238699291259e-06, "loss": 0.7917, "step": 7965 }, { "epoch": 4.190426091530774, "grad_norm": 2.344651222229004, "learning_rate": 1.057756393198756e-06, "loss": 0.7757, "step": 7966 }, { "epoch": 4.190952130457654, "grad_norm": 2.308790445327759, "learning_rate": 1.0571890278261251e-06, "loss": 0.769, "step": 7967 }, { "epoch": 4.191478169384535, "grad_norm": 2.5501091480255127, "learning_rate": 1.0566217738550396e-06, "loss": 0.7866, "step": 7968 }, { "epoch": 4.192004208311415, "grad_norm": 2.419311761856079, "learning_rate": 1.056054631329299e-06, "loss": 0.8205, "step": 7969 }, { "epoch": 4.192530247238295, "grad_norm": 2.45460844039917, "learning_rate": 1.055487600292692e-06, "loss": 0.7469, "step": 7970 }, { "epoch": 4.193056286165176, "grad_norm": 2.462567090988159, "learning_rate": 1.0549206807890011e-06, "loss": 0.763, "step": 7971 }, { "epoch": 4.1935823250920565, "grad_norm": 2.517831325531006, "learning_rate": 1.0543538728619977e-06, "loss": 0.7662, "step": 7972 }, { "epoch": 4.194108364018938, "grad_norm": 2.333758592605591, "learning_rate": 1.0537871765554461e-06, "loss": 0.7568, "step": 7973 }, { "epoch": 4.194634402945818, "grad_norm": 2.551729917526245, "learning_rate": 1.0532205919131021e-06, "loss": 0.7583, "step": 7974 }, { "epoch": 4.195160441872699, "grad_norm": 2.219179153442383, "learning_rate": 1.0526541189787126e-06, "loss": 0.7384, "step": 7975 }, { "epoch": 4.195686480799579, "grad_norm": 2.4835171699523926, "learning_rate": 1.0520877577960155e-06, "loss": 0.7911, "step": 7976 }, { "epoch": 4.19621251972646, "grad_norm": 2.505699396133423, "learning_rate": 1.0515215084087422e-06, "loss": 0.7818, "step": 7977 }, { "epoch": 4.19673855865334, "grad_norm": 2.3090872764587402, "learning_rate": 1.0509553708606105e-06, "loss": 0.7398, "step": 7978 }, { "epoch": 4.1972645975802205, "grad_norm": 2.2966597080230713, "learning_rate": 1.0503893451953346e-06, "loss": 0.765, "step": 7979 }, { "epoch": 4.197790636507102, "grad_norm": 2.422903060913086, "learning_rate": 1.0498234314566177e-06, "loss": 0.7349, "step": 7980 }, { "epoch": 4.198316675433982, "grad_norm": 2.3739092350006104, "learning_rate": 1.0492576296881556e-06, "loss": 0.7598, "step": 7981 }, { "epoch": 4.198842714360863, "grad_norm": 2.3989906311035156, "learning_rate": 1.0486919399336337e-06, "loss": 0.7423, "step": 7982 }, { "epoch": 4.199368753287743, "grad_norm": 2.5307960510253906, "learning_rate": 1.0481263622367297e-06, "loss": 0.7894, "step": 7983 }, { "epoch": 4.199894792214624, "grad_norm": 2.455165386199951, "learning_rate": 1.0475608966411141e-06, "loss": 0.8049, "step": 7984 }, { "epoch": 4.200420831141504, "grad_norm": 2.29496169090271, "learning_rate": 1.0469955431904453e-06, "loss": 0.7517, "step": 7985 }, { "epoch": 4.200946870068385, "grad_norm": 2.382363796234131, "learning_rate": 1.0464303019283762e-06, "loss": 0.7534, "step": 7986 }, { "epoch": 4.201472908995266, "grad_norm": 2.312115430831909, "learning_rate": 1.0458651728985494e-06, "loss": 0.7852, "step": 7987 }, { "epoch": 4.201998947922146, "grad_norm": 2.556230068206787, "learning_rate": 1.0453001561445999e-06, "loss": 0.7151, "step": 7988 }, { "epoch": 4.202524986849027, "grad_norm": 2.3652844429016113, "learning_rate": 1.0447352517101533e-06, "loss": 0.7486, "step": 7989 }, { "epoch": 4.203051025775907, "grad_norm": 2.406468152999878, "learning_rate": 1.044170459638827e-06, "loss": 0.7607, "step": 7990 }, { "epoch": 4.203577064702788, "grad_norm": 2.2614219188690186, "learning_rate": 1.0436057799742295e-06, "loss": 0.7872, "step": 7991 }, { "epoch": 4.204103103629668, "grad_norm": 2.454523801803589, "learning_rate": 1.0430412127599592e-06, "loss": 0.7876, "step": 7992 }, { "epoch": 4.204629142556549, "grad_norm": 2.5302183628082275, "learning_rate": 1.0424767580396083e-06, "loss": 0.8046, "step": 7993 }, { "epoch": 4.20515518148343, "grad_norm": 2.363980770111084, "learning_rate": 1.0419124158567598e-06, "loss": 0.7782, "step": 7994 }, { "epoch": 4.205681220410311, "grad_norm": 2.4459216594696045, "learning_rate": 1.0413481862549857e-06, "loss": 0.7555, "step": 7995 }, { "epoch": 4.206207259337191, "grad_norm": 2.50775408744812, "learning_rate": 1.0407840692778518e-06, "loss": 0.7298, "step": 7996 }, { "epoch": 4.206733298264072, "grad_norm": 2.483492374420166, "learning_rate": 1.0402200649689146e-06, "loss": 0.7702, "step": 7997 }, { "epoch": 4.207259337190952, "grad_norm": 2.4366455078125, "learning_rate": 1.0396561733717222e-06, "loss": 0.7744, "step": 7998 }, { "epoch": 4.207785376117832, "grad_norm": 2.4289448261260986, "learning_rate": 1.039092394529812e-06, "loss": 0.7954, "step": 7999 }, { "epoch": 4.208311415044713, "grad_norm": 2.3361754417419434, "learning_rate": 1.0385287284867148e-06, "loss": 0.7371, "step": 8000 }, { "epoch": 4.208837453971594, "grad_norm": 2.3423662185668945, "learning_rate": 1.0379651752859526e-06, "loss": 0.819, "step": 8001 }, { "epoch": 4.209363492898475, "grad_norm": 2.41709566116333, "learning_rate": 1.037401734971038e-06, "loss": 0.7848, "step": 8002 }, { "epoch": 4.209889531825355, "grad_norm": 2.5096569061279297, "learning_rate": 1.0368384075854745e-06, "loss": 0.7992, "step": 8003 }, { "epoch": 4.210415570752236, "grad_norm": 2.463763952255249, "learning_rate": 1.0362751931727583e-06, "loss": 0.857, "step": 8004 }, { "epoch": 4.210941609679116, "grad_norm": 2.381608486175537, "learning_rate": 1.035712091776376e-06, "loss": 0.7735, "step": 8005 }, { "epoch": 4.211467648605997, "grad_norm": 2.281677722930908, "learning_rate": 1.0351491034398045e-06, "loss": 0.7867, "step": 8006 }, { "epoch": 4.211993687532877, "grad_norm": 2.7974140644073486, "learning_rate": 1.034586228206514e-06, "loss": 0.795, "step": 8007 }, { "epoch": 4.2125197264597585, "grad_norm": 2.3419928550720215, "learning_rate": 1.0340234661199638e-06, "loss": 0.7786, "step": 8008 }, { "epoch": 4.213045765386639, "grad_norm": 2.258654832839966, "learning_rate": 1.0334608172236063e-06, "loss": 0.8084, "step": 8009 }, { "epoch": 4.213571804313519, "grad_norm": 2.4569954872131348, "learning_rate": 1.0328982815608844e-06, "loss": 0.8254, "step": 8010 }, { "epoch": 4.2140978432404, "grad_norm": 2.4785144329071045, "learning_rate": 1.0323358591752319e-06, "loss": 0.7732, "step": 8011 }, { "epoch": 4.21462388216728, "grad_norm": 2.2694003582000732, "learning_rate": 1.0317735501100745e-06, "loss": 0.7904, "step": 8012 }, { "epoch": 4.215149921094161, "grad_norm": 2.2950475215911865, "learning_rate": 1.0312113544088303e-06, "loss": 0.7832, "step": 8013 }, { "epoch": 4.215675960021041, "grad_norm": 2.3790993690490723, "learning_rate": 1.0306492721149048e-06, "loss": 0.7941, "step": 8014 }, { "epoch": 4.2162019989479225, "grad_norm": 2.3126938343048096, "learning_rate": 1.0300873032716984e-06, "loss": 0.6969, "step": 8015 }, { "epoch": 4.216728037874803, "grad_norm": 2.365175485610962, "learning_rate": 1.0295254479226016e-06, "loss": 0.7745, "step": 8016 }, { "epoch": 4.217254076801684, "grad_norm": 2.1377389430999756, "learning_rate": 1.028963706110996e-06, "loss": 0.742, "step": 8017 }, { "epoch": 4.217780115728564, "grad_norm": 2.3844034671783447, "learning_rate": 1.028402077880255e-06, "loss": 0.7786, "step": 8018 }, { "epoch": 4.218306154655444, "grad_norm": 2.3177740573883057, "learning_rate": 1.0278405632737418e-06, "loss": 0.7289, "step": 8019 }, { "epoch": 4.218832193582325, "grad_norm": 2.519719362258911, "learning_rate": 1.0272791623348128e-06, "loss": 0.8023, "step": 8020 }, { "epoch": 4.219358232509205, "grad_norm": 2.3475072383880615, "learning_rate": 1.026717875106813e-06, "loss": 0.7747, "step": 8021 }, { "epoch": 4.2198842714360865, "grad_norm": 2.4405202865600586, "learning_rate": 1.026156701633081e-06, "loss": 0.788, "step": 8022 }, { "epoch": 4.220410310362967, "grad_norm": 2.4471981525421143, "learning_rate": 1.0255956419569466e-06, "loss": 0.7979, "step": 8023 }, { "epoch": 4.220936349289848, "grad_norm": 2.3583567142486572, "learning_rate": 1.0250346961217288e-06, "loss": 0.7743, "step": 8024 }, { "epoch": 4.221462388216728, "grad_norm": 2.392238140106201, "learning_rate": 1.0244738641707402e-06, "loss": 0.805, "step": 8025 }, { "epoch": 4.221988427143609, "grad_norm": 2.4220595359802246, "learning_rate": 1.0239131461472826e-06, "loss": 0.7562, "step": 8026 }, { "epoch": 4.222514466070489, "grad_norm": 2.4962520599365234, "learning_rate": 1.023352542094651e-06, "loss": 0.7657, "step": 8027 }, { "epoch": 4.223040504997369, "grad_norm": 2.4382710456848145, "learning_rate": 1.022792052056129e-06, "loss": 0.7969, "step": 8028 }, { "epoch": 4.2235665439242505, "grad_norm": 2.341886281967163, "learning_rate": 1.0222316760749934e-06, "loss": 0.7423, "step": 8029 }, { "epoch": 4.224092582851131, "grad_norm": 2.4726717472076416, "learning_rate": 1.021671414194512e-06, "loss": 0.7529, "step": 8030 }, { "epoch": 4.224618621778012, "grad_norm": 2.5094258785247803, "learning_rate": 1.0211112664579438e-06, "loss": 0.7226, "step": 8031 }, { "epoch": 4.225144660704892, "grad_norm": 2.3939218521118164, "learning_rate": 1.020551232908537e-06, "loss": 0.8404, "step": 8032 }, { "epoch": 4.225670699631773, "grad_norm": 2.2936739921569824, "learning_rate": 1.019991313589534e-06, "loss": 0.7284, "step": 8033 }, { "epoch": 4.226196738558653, "grad_norm": 2.4538636207580566, "learning_rate": 1.0194315085441673e-06, "loss": 0.7864, "step": 8034 }, { "epoch": 4.226722777485534, "grad_norm": 2.395907402038574, "learning_rate": 1.0188718178156587e-06, "loss": 0.7738, "step": 8035 }, { "epoch": 4.2272488164124145, "grad_norm": 2.298395872116089, "learning_rate": 1.0183122414472238e-06, "loss": 0.7768, "step": 8036 }, { "epoch": 4.2277748553392955, "grad_norm": 2.4488062858581543, "learning_rate": 1.017752779482068e-06, "loss": 0.7733, "step": 8037 }, { "epoch": 4.228300894266176, "grad_norm": 2.1917617321014404, "learning_rate": 1.0171934319633884e-06, "loss": 0.7568, "step": 8038 }, { "epoch": 4.228826933193056, "grad_norm": 2.31813907623291, "learning_rate": 1.0166341989343725e-06, "loss": 0.7465, "step": 8039 }, { "epoch": 4.229352972119937, "grad_norm": 2.177497386932373, "learning_rate": 1.0160750804382006e-06, "loss": 0.7525, "step": 8040 }, { "epoch": 4.229879011046817, "grad_norm": 2.5460317134857178, "learning_rate": 1.015516076518043e-06, "loss": 0.8223, "step": 8041 }, { "epoch": 4.230405049973698, "grad_norm": 2.421895742416382, "learning_rate": 1.0149571872170596e-06, "loss": 0.7453, "step": 8042 }, { "epoch": 4.2309310889005785, "grad_norm": 2.3038978576660156, "learning_rate": 1.014398412578405e-06, "loss": 0.744, "step": 8043 }, { "epoch": 4.2314571278274595, "grad_norm": 2.4547617435455322, "learning_rate": 1.0138397526452216e-06, "loss": 0.7426, "step": 8044 }, { "epoch": 4.23198316675434, "grad_norm": 2.3435144424438477, "learning_rate": 1.0132812074606443e-06, "loss": 0.7853, "step": 8045 }, { "epoch": 4.232509205681221, "grad_norm": 2.2333176136016846, "learning_rate": 1.0127227770678e-06, "loss": 0.7805, "step": 8046 }, { "epoch": 4.233035244608101, "grad_norm": 2.3187828063964844, "learning_rate": 1.0121644615098056e-06, "loss": 0.78, "step": 8047 }, { "epoch": 4.233561283534981, "grad_norm": 2.6836767196655273, "learning_rate": 1.0116062608297704e-06, "loss": 0.7363, "step": 8048 }, { "epoch": 4.234087322461862, "grad_norm": 2.7763373851776123, "learning_rate": 1.0110481750707924e-06, "loss": 0.7906, "step": 8049 }, { "epoch": 4.2346133613887424, "grad_norm": 2.5196187496185303, "learning_rate": 1.0104902042759624e-06, "loss": 0.8145, "step": 8050 }, { "epoch": 4.2351394003156235, "grad_norm": 2.3469111919403076, "learning_rate": 1.0099323484883627e-06, "loss": 0.7614, "step": 8051 }, { "epoch": 4.235665439242504, "grad_norm": 2.4065778255462646, "learning_rate": 1.0093746077510662e-06, "loss": 0.7867, "step": 8052 }, { "epoch": 4.236191478169385, "grad_norm": 2.2696657180786133, "learning_rate": 1.0088169821071367e-06, "loss": 0.8087, "step": 8053 }, { "epoch": 4.236717517096265, "grad_norm": 2.2716050148010254, "learning_rate": 1.0082594715996299e-06, "loss": 0.745, "step": 8054 }, { "epoch": 4.237243556023146, "grad_norm": 2.5266993045806885, "learning_rate": 1.0077020762715908e-06, "loss": 0.8287, "step": 8055 }, { "epoch": 4.237769594950026, "grad_norm": 2.42179536819458, "learning_rate": 1.007144796166058e-06, "loss": 0.7865, "step": 8056 }, { "epoch": 4.238295633876907, "grad_norm": 2.479194164276123, "learning_rate": 1.0065876313260583e-06, "loss": 0.7763, "step": 8057 }, { "epoch": 4.2388216728037875, "grad_norm": 2.2644407749176025, "learning_rate": 1.0060305817946123e-06, "loss": 0.785, "step": 8058 }, { "epoch": 4.239347711730668, "grad_norm": 2.5875720977783203, "learning_rate": 1.0054736476147303e-06, "loss": 0.8114, "step": 8059 }, { "epoch": 4.239873750657549, "grad_norm": 2.241182327270508, "learning_rate": 1.0049168288294144e-06, "loss": 0.7764, "step": 8060 }, { "epoch": 4.240399789584429, "grad_norm": 2.286288261413574, "learning_rate": 1.004360125481657e-06, "loss": 0.7735, "step": 8061 }, { "epoch": 4.24092582851131, "grad_norm": 2.4451956748962402, "learning_rate": 1.003803537614442e-06, "loss": 0.8121, "step": 8062 }, { "epoch": 4.24145186743819, "grad_norm": 2.5014984607696533, "learning_rate": 1.0032470652707455e-06, "loss": 0.7624, "step": 8063 }, { "epoch": 4.241977906365071, "grad_norm": 2.43483567237854, "learning_rate": 1.0026907084935317e-06, "loss": 0.801, "step": 8064 }, { "epoch": 4.2425039452919515, "grad_norm": 2.3422460556030273, "learning_rate": 1.0021344673257582e-06, "loss": 0.7557, "step": 8065 }, { "epoch": 4.243029984218833, "grad_norm": 2.1837587356567383, "learning_rate": 1.001578341810374e-06, "loss": 0.7813, "step": 8066 }, { "epoch": 4.243556023145713, "grad_norm": 2.587301015853882, "learning_rate": 1.0010223319903183e-06, "loss": 0.8035, "step": 8067 }, { "epoch": 4.244082062072593, "grad_norm": 2.382218837738037, "learning_rate": 1.0004664379085204e-06, "loss": 0.7752, "step": 8068 }, { "epoch": 4.244608100999474, "grad_norm": 2.415156364440918, "learning_rate": 9.999106596079026e-07, "loss": 0.8119, "step": 8069 }, { "epoch": 4.245134139926354, "grad_norm": 2.2804689407348633, "learning_rate": 9.993549971313776e-07, "loss": 0.7927, "step": 8070 }, { "epoch": 4.245660178853235, "grad_norm": 2.4784955978393555, "learning_rate": 9.987994505218477e-07, "loss": 0.7836, "step": 8071 }, { "epoch": 4.2461862177801155, "grad_norm": 2.292492628097534, "learning_rate": 9.982440198222083e-07, "loss": 0.7795, "step": 8072 }, { "epoch": 4.246712256706997, "grad_norm": 2.401416301727295, "learning_rate": 9.976887050753444e-07, "loss": 0.8077, "step": 8073 }, { "epoch": 4.247238295633877, "grad_norm": 2.3032033443450928, "learning_rate": 9.971335063241336e-07, "loss": 0.7932, "step": 8074 }, { "epoch": 4.247764334560758, "grad_norm": 2.647953748703003, "learning_rate": 9.96578423611443e-07, "loss": 0.7779, "step": 8075 }, { "epoch": 4.248290373487638, "grad_norm": 2.4170920848846436, "learning_rate": 9.960234569801313e-07, "loss": 0.7533, "step": 8076 }, { "epoch": 4.248816412414518, "grad_norm": 2.7615225315093994, "learning_rate": 9.954686064730498e-07, "loss": 0.8099, "step": 8077 }, { "epoch": 4.249342451341399, "grad_norm": 2.3568289279937744, "learning_rate": 9.949138721330368e-07, "loss": 0.78, "step": 8078 }, { "epoch": 4.2498684902682795, "grad_norm": 2.266554355621338, "learning_rate": 9.943592540029254e-07, "loss": 0.7867, "step": 8079 }, { "epoch": 4.250394529195161, "grad_norm": 2.39963436126709, "learning_rate": 9.938047521255396e-07, "loss": 0.8219, "step": 8080 }, { "epoch": 4.250920568122041, "grad_norm": 2.3614373207092285, "learning_rate": 9.93250366543691e-07, "loss": 0.7906, "step": 8081 }, { "epoch": 4.251446607048922, "grad_norm": 2.4056386947631836, "learning_rate": 9.926960973001857e-07, "loss": 0.7702, "step": 8082 }, { "epoch": 4.251972645975802, "grad_norm": 2.361729383468628, "learning_rate": 9.921419444378194e-07, "loss": 0.7498, "step": 8083 }, { "epoch": 4.252498684902683, "grad_norm": 2.413128614425659, "learning_rate": 9.915879079993803e-07, "loss": 0.7566, "step": 8084 }, { "epoch": 4.253024723829563, "grad_norm": 2.3054893016815186, "learning_rate": 9.910339880276446e-07, "loss": 0.7191, "step": 8085 }, { "epoch": 4.253550762756444, "grad_norm": 2.3933470249176025, "learning_rate": 9.904801845653816e-07, "loss": 0.7766, "step": 8086 }, { "epoch": 4.254076801683325, "grad_norm": 2.395019292831421, "learning_rate": 9.899264976553518e-07, "loss": 0.7905, "step": 8087 }, { "epoch": 4.254602840610205, "grad_norm": 2.374593496322632, "learning_rate": 9.893729273403062e-07, "loss": 0.7282, "step": 8088 }, { "epoch": 4.255128879537086, "grad_norm": 2.2381794452667236, "learning_rate": 9.888194736629866e-07, "loss": 0.8261, "step": 8089 }, { "epoch": 4.255654918463966, "grad_norm": 2.251431703567505, "learning_rate": 9.882661366661259e-07, "loss": 0.7764, "step": 8090 }, { "epoch": 4.256180957390847, "grad_norm": 2.3738322257995605, "learning_rate": 9.87712916392449e-07, "loss": 0.7678, "step": 8091 }, { "epoch": 4.256706996317727, "grad_norm": 2.383078098297119, "learning_rate": 9.8715981288467e-07, "loss": 0.8097, "step": 8092 }, { "epoch": 4.257233035244608, "grad_norm": 2.2764699459075928, "learning_rate": 9.866068261854939e-07, "loss": 0.7322, "step": 8093 }, { "epoch": 4.257759074171489, "grad_norm": 2.591967821121216, "learning_rate": 9.860539563376187e-07, "loss": 0.8019, "step": 8094 }, { "epoch": 4.25828511309837, "grad_norm": 2.30446457862854, "learning_rate": 9.85501203383732e-07, "loss": 0.7461, "step": 8095 }, { "epoch": 4.25881115202525, "grad_norm": 2.460045337677002, "learning_rate": 9.849485673665126e-07, "loss": 0.7542, "step": 8096 }, { "epoch": 4.259337190952131, "grad_norm": 2.2758710384368896, "learning_rate": 9.843960483286307e-07, "loss": 0.7694, "step": 8097 }, { "epoch": 4.259863229879011, "grad_norm": 2.6567254066467285, "learning_rate": 9.83843646312748e-07, "loss": 0.7829, "step": 8098 }, { "epoch": 4.260389268805891, "grad_norm": 2.446244478225708, "learning_rate": 9.83291361361514e-07, "loss": 0.7581, "step": 8099 }, { "epoch": 4.260915307732772, "grad_norm": 2.4221906661987305, "learning_rate": 9.827391935175726e-07, "loss": 0.7577, "step": 8100 }, { "epoch": 4.261441346659653, "grad_norm": 2.3743629455566406, "learning_rate": 9.821871428235575e-07, "loss": 0.781, "step": 8101 }, { "epoch": 4.261967385586534, "grad_norm": 2.2788898944854736, "learning_rate": 9.816352093220936e-07, "loss": 0.793, "step": 8102 }, { "epoch": 4.262493424513414, "grad_norm": 2.32100510597229, "learning_rate": 9.81083393055797e-07, "loss": 0.7815, "step": 8103 }, { "epoch": 4.263019463440295, "grad_norm": 2.470402717590332, "learning_rate": 9.805316940672724e-07, "loss": 0.7857, "step": 8104 }, { "epoch": 4.263545502367175, "grad_norm": 2.40494704246521, "learning_rate": 9.799801123991192e-07, "loss": 0.7739, "step": 8105 }, { "epoch": 4.264071541294056, "grad_norm": 2.3707151412963867, "learning_rate": 9.794286480939244e-07, "loss": 0.8064, "step": 8106 }, { "epoch": 4.264597580220936, "grad_norm": 2.397425651550293, "learning_rate": 9.788773011942678e-07, "loss": 0.7374, "step": 8107 }, { "epoch": 4.265123619147817, "grad_norm": 2.529218912124634, "learning_rate": 9.7832607174272e-07, "loss": 0.7647, "step": 8108 }, { "epoch": 4.265649658074698, "grad_norm": 2.38920259475708, "learning_rate": 9.77774959781842e-07, "loss": 0.779, "step": 8109 }, { "epoch": 4.266175697001578, "grad_norm": 2.521488904953003, "learning_rate": 9.772239653541861e-07, "loss": 0.7541, "step": 8110 }, { "epoch": 4.266701735928459, "grad_norm": 2.297619104385376, "learning_rate": 9.766730885022955e-07, "loss": 0.7611, "step": 8111 }, { "epoch": 4.267227774855339, "grad_norm": 2.459557056427002, "learning_rate": 9.761223292687048e-07, "loss": 0.7646, "step": 8112 }, { "epoch": 4.26775381378222, "grad_norm": 2.4299750328063965, "learning_rate": 9.75571687695937e-07, "loss": 0.7622, "step": 8113 }, { "epoch": 4.2682798527091, "grad_norm": 2.279601573944092, "learning_rate": 9.750211638265095e-07, "loss": 0.7985, "step": 8114 }, { "epoch": 4.2688058916359815, "grad_norm": 2.461407423019409, "learning_rate": 9.744707577029284e-07, "loss": 0.7319, "step": 8115 }, { "epoch": 4.269331930562862, "grad_norm": 2.2464804649353027, "learning_rate": 9.739204693676927e-07, "loss": 0.7333, "step": 8116 }, { "epoch": 4.269857969489742, "grad_norm": 2.631521224975586, "learning_rate": 9.733702988632887e-07, "loss": 0.8408, "step": 8117 }, { "epoch": 4.270384008416623, "grad_norm": 2.4832184314727783, "learning_rate": 9.728202462321973e-07, "loss": 0.8056, "step": 8118 }, { "epoch": 4.270910047343503, "grad_norm": 3.1079347133636475, "learning_rate": 9.722703115168885e-07, "loss": 0.7951, "step": 8119 }, { "epoch": 4.271436086270384, "grad_norm": 2.3454151153564453, "learning_rate": 9.717204947598247e-07, "loss": 0.7614, "step": 8120 }, { "epoch": 4.271962125197264, "grad_norm": 2.5714542865753174, "learning_rate": 9.711707960034562e-07, "loss": 0.7625, "step": 8121 }, { "epoch": 4.2724881641241454, "grad_norm": 2.2861106395721436, "learning_rate": 9.70621215290227e-07, "loss": 0.7535, "step": 8122 }, { "epoch": 4.273014203051026, "grad_norm": 2.660611629486084, "learning_rate": 9.700717526625706e-07, "loss": 0.7935, "step": 8123 }, { "epoch": 4.273540241977907, "grad_norm": 2.4607439041137695, "learning_rate": 9.695224081629124e-07, "loss": 0.778, "step": 8124 }, { "epoch": 4.274066280904787, "grad_norm": 2.2891204357147217, "learning_rate": 9.689731818336676e-07, "loss": 0.7648, "step": 8125 }, { "epoch": 4.274592319831667, "grad_norm": 2.309997797012329, "learning_rate": 9.684240737172432e-07, "loss": 0.7568, "step": 8126 }, { "epoch": 4.275118358758548, "grad_norm": 2.1023130416870117, "learning_rate": 9.678750838560371e-07, "loss": 0.767, "step": 8127 }, { "epoch": 4.275644397685428, "grad_norm": 2.30771803855896, "learning_rate": 9.673262122924365e-07, "loss": 0.7574, "step": 8128 }, { "epoch": 4.2761704366123094, "grad_norm": 2.3392488956451416, "learning_rate": 9.667774590688217e-07, "loss": 0.746, "step": 8129 }, { "epoch": 4.27669647553919, "grad_norm": 2.410217523574829, "learning_rate": 9.66228824227561e-07, "loss": 0.8253, "step": 8130 }, { "epoch": 4.277222514466071, "grad_norm": 2.5102319717407227, "learning_rate": 9.656803078110166e-07, "loss": 0.7981, "step": 8131 }, { "epoch": 4.277748553392951, "grad_norm": 2.241826057434082, "learning_rate": 9.651319098615398e-07, "loss": 0.7828, "step": 8132 }, { "epoch": 4.278274592319832, "grad_norm": 2.3299407958984375, "learning_rate": 9.645836304214736e-07, "loss": 0.7587, "step": 8133 }, { "epoch": 4.278800631246712, "grad_norm": 2.433032989501953, "learning_rate": 9.640354695331522e-07, "loss": 0.7505, "step": 8134 }, { "epoch": 4.279326670173593, "grad_norm": 2.272974967956543, "learning_rate": 9.63487427238898e-07, "loss": 0.7398, "step": 8135 }, { "epoch": 4.279852709100473, "grad_norm": 2.297887086868286, "learning_rate": 9.62939503581027e-07, "loss": 0.7902, "step": 8136 }, { "epoch": 4.280378748027354, "grad_norm": 2.4648585319519043, "learning_rate": 9.623916986018455e-07, "loss": 0.8351, "step": 8137 }, { "epoch": 4.280904786954235, "grad_norm": 2.317366600036621, "learning_rate": 9.618440123436502e-07, "loss": 0.7816, "step": 8138 }, { "epoch": 4.281430825881115, "grad_norm": 2.5018842220306396, "learning_rate": 9.612964448487285e-07, "loss": 0.7649, "step": 8139 }, { "epoch": 4.281956864807996, "grad_norm": 2.4122228622436523, "learning_rate": 9.607489961593602e-07, "loss": 0.8108, "step": 8140 }, { "epoch": 4.282482903734876, "grad_norm": 2.3960208892822266, "learning_rate": 9.602016663178127e-07, "loss": 0.7632, "step": 8141 }, { "epoch": 4.283008942661757, "grad_norm": 2.3335673809051514, "learning_rate": 9.596544553663476e-07, "loss": 0.809, "step": 8142 }, { "epoch": 4.283534981588637, "grad_norm": 2.563936233520508, "learning_rate": 9.591073633472145e-07, "loss": 0.7906, "step": 8143 }, { "epoch": 4.2840610205155185, "grad_norm": 2.4666600227355957, "learning_rate": 9.585603903026556e-07, "loss": 0.7608, "step": 8144 }, { "epoch": 4.284587059442399, "grad_norm": 3.203920602798462, "learning_rate": 9.58013536274904e-07, "loss": 0.7828, "step": 8145 }, { "epoch": 4.28511309836928, "grad_norm": 2.3925483226776123, "learning_rate": 9.574668013061828e-07, "loss": 0.7725, "step": 8146 }, { "epoch": 4.28563913729616, "grad_norm": 2.4095118045806885, "learning_rate": 9.56920185438706e-07, "loss": 0.7572, "step": 8147 }, { "epoch": 4.28616517622304, "grad_norm": 2.4807276725769043, "learning_rate": 9.563736887146801e-07, "loss": 0.759, "step": 8148 }, { "epoch": 4.286691215149921, "grad_norm": 2.1591782569885254, "learning_rate": 9.55827311176299e-07, "loss": 0.7251, "step": 8149 }, { "epoch": 4.287217254076801, "grad_norm": 4.415726661682129, "learning_rate": 9.552810528657497e-07, "loss": 0.7214, "step": 8150 }, { "epoch": 4.2877432930036825, "grad_norm": 2.366939067840576, "learning_rate": 9.547349138252098e-07, "loss": 0.8113, "step": 8151 }, { "epoch": 4.288269331930563, "grad_norm": 2.3527777194976807, "learning_rate": 9.541888940968487e-07, "loss": 0.749, "step": 8152 }, { "epoch": 4.288795370857444, "grad_norm": 2.31217098236084, "learning_rate": 9.536429937228234e-07, "loss": 0.7989, "step": 8153 }, { "epoch": 4.289321409784324, "grad_norm": 2.556335926055908, "learning_rate": 9.530972127452845e-07, "loss": 0.7906, "step": 8154 }, { "epoch": 4.289847448711205, "grad_norm": 2.3912854194641113, "learning_rate": 9.525515512063734e-07, "loss": 0.7246, "step": 8155 }, { "epoch": 4.290373487638085, "grad_norm": 2.5004332065582275, "learning_rate": 9.5200600914822e-07, "loss": 0.7534, "step": 8156 }, { "epoch": 4.290899526564965, "grad_norm": 2.5102827548980713, "learning_rate": 9.514605866129467e-07, "loss": 0.782, "step": 8157 }, { "epoch": 4.2914255654918465, "grad_norm": 2.4188337326049805, "learning_rate": 9.509152836426669e-07, "loss": 0.755, "step": 8158 }, { "epoch": 4.291951604418727, "grad_norm": 2.2239229679107666, "learning_rate": 9.50370100279484e-07, "loss": 0.7395, "step": 8159 }, { "epoch": 4.292477643345608, "grad_norm": 2.3157131671905518, "learning_rate": 9.498250365654924e-07, "loss": 0.7645, "step": 8160 }, { "epoch": 4.293003682272488, "grad_norm": 2.687593936920166, "learning_rate": 9.492800925427773e-07, "loss": 0.7943, "step": 8161 }, { "epoch": 4.293529721199369, "grad_norm": 2.3092877864837646, "learning_rate": 9.487352682534157e-07, "loss": 0.7866, "step": 8162 }, { "epoch": 4.294055760126249, "grad_norm": 2.4883267879486084, "learning_rate": 9.48190563739472e-07, "loss": 0.7865, "step": 8163 }, { "epoch": 4.29458179905313, "grad_norm": 2.6186671257019043, "learning_rate": 9.476459790430051e-07, "loss": 0.783, "step": 8164 }, { "epoch": 4.2951078379800105, "grad_norm": 2.4224090576171875, "learning_rate": 9.471015142060635e-07, "loss": 0.8299, "step": 8165 }, { "epoch": 4.295633876906891, "grad_norm": 2.4331178665161133, "learning_rate": 9.465571692706852e-07, "loss": 0.8143, "step": 8166 }, { "epoch": 4.296159915833772, "grad_norm": 2.278550863265991, "learning_rate": 9.460129442788999e-07, "loss": 0.7906, "step": 8167 }, { "epoch": 4.296685954760652, "grad_norm": 2.4482979774475098, "learning_rate": 9.454688392727282e-07, "loss": 0.7905, "step": 8168 }, { "epoch": 4.297211993687533, "grad_norm": 2.372551918029785, "learning_rate": 9.449248542941816e-07, "loss": 0.7376, "step": 8169 }, { "epoch": 4.297738032614413, "grad_norm": 2.3251852989196777, "learning_rate": 9.443809893852623e-07, "loss": 0.7423, "step": 8170 }, { "epoch": 4.298264071541294, "grad_norm": 2.492675542831421, "learning_rate": 9.438372445879617e-07, "loss": 0.7735, "step": 8171 }, { "epoch": 4.2987901104681745, "grad_norm": 2.3902831077575684, "learning_rate": 9.432936199442638e-07, "loss": 0.7727, "step": 8172 }, { "epoch": 4.299316149395056, "grad_norm": 2.5389552116394043, "learning_rate": 9.427501154961427e-07, "loss": 0.7883, "step": 8173 }, { "epoch": 4.299842188321936, "grad_norm": 2.246748685836792, "learning_rate": 9.422067312855632e-07, "loss": 0.7814, "step": 8174 }, { "epoch": 4.300368227248816, "grad_norm": 2.3575665950775146, "learning_rate": 9.416634673544806e-07, "loss": 0.7565, "step": 8175 }, { "epoch": 4.300894266175697, "grad_norm": 2.436378002166748, "learning_rate": 9.411203237448418e-07, "loss": 0.8002, "step": 8176 }, { "epoch": 4.301420305102577, "grad_norm": 2.4124162197113037, "learning_rate": 9.405773004985824e-07, "loss": 0.8168, "step": 8177 }, { "epoch": 4.301946344029458, "grad_norm": 2.5020899772644043, "learning_rate": 9.400343976576318e-07, "loss": 0.7928, "step": 8178 }, { "epoch": 4.3024723829563385, "grad_norm": 2.3097643852233887, "learning_rate": 9.394916152639064e-07, "loss": 0.7356, "step": 8179 }, { "epoch": 4.30299842188322, "grad_norm": 2.5838191509246826, "learning_rate": 9.389489533593158e-07, "loss": 0.7849, "step": 8180 }, { "epoch": 4.3035244608101, "grad_norm": 2.373910665512085, "learning_rate": 9.384064119857603e-07, "loss": 0.8123, "step": 8181 }, { "epoch": 4.304050499736981, "grad_norm": 2.4283015727996826, "learning_rate": 9.378639911851297e-07, "loss": 0.8018, "step": 8182 }, { "epoch": 4.304576538663861, "grad_norm": 2.3099677562713623, "learning_rate": 9.373216909993055e-07, "loss": 0.787, "step": 8183 }, { "epoch": 4.305102577590742, "grad_norm": 2.153803586959839, "learning_rate": 9.367795114701605e-07, "loss": 0.7557, "step": 8184 }, { "epoch": 4.305628616517622, "grad_norm": 2.3664135932922363, "learning_rate": 9.362374526395551e-07, "loss": 0.8009, "step": 8185 }, { "epoch": 4.3061546554445025, "grad_norm": 2.3927345275878906, "learning_rate": 9.356955145493432e-07, "loss": 0.781, "step": 8186 }, { "epoch": 4.306680694371384, "grad_norm": 2.288412570953369, "learning_rate": 9.351536972413692e-07, "loss": 0.7841, "step": 8187 }, { "epoch": 4.307206733298264, "grad_norm": 2.309278726577759, "learning_rate": 9.346120007574672e-07, "loss": 0.8006, "step": 8188 }, { "epoch": 4.307732772225145, "grad_norm": 2.2922723293304443, "learning_rate": 9.340704251394633e-07, "loss": 0.7736, "step": 8189 }, { "epoch": 4.308258811152025, "grad_norm": 2.304837942123413, "learning_rate": 9.335289704291714e-07, "loss": 0.7679, "step": 8190 }, { "epoch": 4.308784850078906, "grad_norm": 2.5926523208618164, "learning_rate": 9.329876366684004e-07, "loss": 0.7687, "step": 8191 }, { "epoch": 4.309310889005786, "grad_norm": 2.484771966934204, "learning_rate": 9.324464238989451e-07, "loss": 0.7873, "step": 8192 }, { "epoch": 4.309836927932667, "grad_norm": 2.557997941970825, "learning_rate": 9.319053321625943e-07, "loss": 0.7463, "step": 8193 }, { "epoch": 4.310362966859548, "grad_norm": 2.271350145339966, "learning_rate": 9.313643615011267e-07, "loss": 0.7617, "step": 8194 }, { "epoch": 4.310889005786429, "grad_norm": 2.4417126178741455, "learning_rate": 9.308235119563114e-07, "loss": 0.7899, "step": 8195 }, { "epoch": 4.311415044713309, "grad_norm": 2.4365978240966797, "learning_rate": 9.302827835699079e-07, "loss": 0.7722, "step": 8196 }, { "epoch": 4.311941083640189, "grad_norm": 2.546692132949829, "learning_rate": 9.297421763836672e-07, "loss": 0.8502, "step": 8197 }, { "epoch": 4.31246712256707, "grad_norm": 2.3279800415039062, "learning_rate": 9.292016904393306e-07, "loss": 0.7558, "step": 8198 }, { "epoch": 4.31299316149395, "grad_norm": 2.4029362201690674, "learning_rate": 9.286613257786284e-07, "loss": 0.76, "step": 8199 }, { "epoch": 4.313519200420831, "grad_norm": 2.4564545154571533, "learning_rate": 9.28121082443284e-07, "loss": 0.7454, "step": 8200 }, { "epoch": 4.314045239347712, "grad_norm": 2.3405215740203857, "learning_rate": 9.275809604750099e-07, "loss": 0.8067, "step": 8201 }, { "epoch": 4.314571278274593, "grad_norm": 2.2823147773742676, "learning_rate": 9.270409599155108e-07, "loss": 0.7515, "step": 8202 }, { "epoch": 4.315097317201473, "grad_norm": 2.3824760913848877, "learning_rate": 9.265010808064795e-07, "loss": 0.7664, "step": 8203 }, { "epoch": 4.315623356128354, "grad_norm": 2.3500545024871826, "learning_rate": 9.259613231896011e-07, "loss": 0.7668, "step": 8204 }, { "epoch": 4.316149395055234, "grad_norm": 2.472586154937744, "learning_rate": 9.254216871065525e-07, "loss": 0.7604, "step": 8205 }, { "epoch": 4.316675433982114, "grad_norm": 2.5538313388824463, "learning_rate": 9.248821725989978e-07, "loss": 0.8188, "step": 8206 }, { "epoch": 4.317201472908995, "grad_norm": 2.3774752616882324, "learning_rate": 9.243427797085947e-07, "loss": 0.7572, "step": 8207 }, { "epoch": 4.3177275118358756, "grad_norm": 2.340214729309082, "learning_rate": 9.238035084769903e-07, "loss": 0.7597, "step": 8208 }, { "epoch": 4.318253550762757, "grad_norm": 2.365476608276367, "learning_rate": 9.232643589458229e-07, "loss": 0.8249, "step": 8209 }, { "epoch": 4.318779589689637, "grad_norm": 2.5612056255340576, "learning_rate": 9.227253311567205e-07, "loss": 0.8327, "step": 8210 }, { "epoch": 4.319305628616518, "grad_norm": 2.242342233657837, "learning_rate": 9.221864251513027e-07, "loss": 0.742, "step": 8211 }, { "epoch": 4.319831667543398, "grad_norm": 2.6007885932922363, "learning_rate": 9.216476409711797e-07, "loss": 0.8298, "step": 8212 }, { "epoch": 4.320357706470279, "grad_norm": 2.369804620742798, "learning_rate": 9.211089786579505e-07, "loss": 0.7472, "step": 8213 }, { "epoch": 4.320883745397159, "grad_norm": 2.361845016479492, "learning_rate": 9.205704382532074e-07, "loss": 0.7602, "step": 8214 }, { "epoch": 4.3214097843240395, "grad_norm": 2.5786831378936768, "learning_rate": 9.200320197985302e-07, "loss": 0.7902, "step": 8215 }, { "epoch": 4.321935823250921, "grad_norm": 2.255446195602417, "learning_rate": 9.19493723335492e-07, "loss": 0.763, "step": 8216 }, { "epoch": 4.322461862177801, "grad_norm": 2.6043481826782227, "learning_rate": 9.189555489056554e-07, "loss": 0.7621, "step": 8217 }, { "epoch": 4.322987901104682, "grad_norm": 2.3130226135253906, "learning_rate": 9.184174965505735e-07, "loss": 0.7897, "step": 8218 }, { "epoch": 4.323513940031562, "grad_norm": 2.6263530254364014, "learning_rate": 9.178795663117901e-07, "loss": 0.7658, "step": 8219 }, { "epoch": 4.324039978958443, "grad_norm": 2.408893346786499, "learning_rate": 9.173417582308408e-07, "loss": 0.7639, "step": 8220 }, { "epoch": 4.324566017885323, "grad_norm": 2.3176801204681396, "learning_rate": 9.168040723492483e-07, "loss": 0.7741, "step": 8221 }, { "epoch": 4.325092056812204, "grad_norm": 2.3100130558013916, "learning_rate": 9.162665087085296e-07, "loss": 0.8211, "step": 8222 }, { "epoch": 4.325618095739085, "grad_norm": 2.2356486320495605, "learning_rate": 9.157290673501904e-07, "loss": 0.7719, "step": 8223 }, { "epoch": 4.326144134665965, "grad_norm": 2.4988648891448975, "learning_rate": 9.151917483157272e-07, "loss": 0.7439, "step": 8224 }, { "epoch": 4.326670173592846, "grad_norm": 2.268217086791992, "learning_rate": 9.146545516466282e-07, "loss": 0.7622, "step": 8225 }, { "epoch": 4.327196212519726, "grad_norm": 2.4885447025299072, "learning_rate": 9.141174773843694e-07, "loss": 0.7543, "step": 8226 }, { "epoch": 4.327722251446607, "grad_norm": 2.3892648220062256, "learning_rate": 9.135805255704211e-07, "loss": 0.7488, "step": 8227 }, { "epoch": 4.328248290373487, "grad_norm": 2.2913625240325928, "learning_rate": 9.1304369624624e-07, "loss": 0.7579, "step": 8228 }, { "epoch": 4.328774329300368, "grad_norm": 2.2932233810424805, "learning_rate": 9.125069894532764e-07, "loss": 0.7599, "step": 8229 }, { "epoch": 4.329300368227249, "grad_norm": 2.47275447845459, "learning_rate": 9.119704052329703e-07, "loss": 0.766, "step": 8230 }, { "epoch": 4.32982640715413, "grad_norm": 2.439953565597534, "learning_rate": 9.114339436267521e-07, "loss": 0.75, "step": 8231 }, { "epoch": 4.33035244608101, "grad_norm": 2.3847789764404297, "learning_rate": 9.108976046760432e-07, "loss": 0.7734, "step": 8232 }, { "epoch": 4.330878485007891, "grad_norm": 2.3578529357910156, "learning_rate": 9.103613884222545e-07, "loss": 0.7402, "step": 8233 }, { "epoch": 4.331404523934771, "grad_norm": 2.240596055984497, "learning_rate": 9.098252949067892e-07, "loss": 0.7506, "step": 8234 }, { "epoch": 4.331930562861651, "grad_norm": 2.8053760528564453, "learning_rate": 9.092893241710382e-07, "loss": 0.7408, "step": 8235 }, { "epoch": 4.332456601788532, "grad_norm": 2.3574788570404053, "learning_rate": 9.087534762563854e-07, "loss": 0.7694, "step": 8236 }, { "epoch": 4.332982640715413, "grad_norm": 3.210982322692871, "learning_rate": 9.082177512042042e-07, "loss": 0.7319, "step": 8237 }, { "epoch": 4.333508679642294, "grad_norm": 2.3396527767181396, "learning_rate": 9.076821490558599e-07, "loss": 0.7603, "step": 8238 }, { "epoch": 4.334034718569174, "grad_norm": 2.400285243988037, "learning_rate": 9.071466698527051e-07, "loss": 0.8016, "step": 8239 }, { "epoch": 4.334560757496055, "grad_norm": 2.4744210243225098, "learning_rate": 9.06611313636086e-07, "loss": 0.7859, "step": 8240 }, { "epoch": 4.335086796422935, "grad_norm": 2.4523026943206787, "learning_rate": 9.06076080447339e-07, "loss": 0.7497, "step": 8241 }, { "epoch": 4.335612835349816, "grad_norm": 2.427067995071411, "learning_rate": 9.055409703277887e-07, "loss": 0.8013, "step": 8242 }, { "epoch": 4.336138874276696, "grad_norm": 2.4637789726257324, "learning_rate": 9.050059833187522e-07, "loss": 0.8306, "step": 8243 }, { "epoch": 4.3366649132035775, "grad_norm": 2.4949941635131836, "learning_rate": 9.044711194615369e-07, "loss": 0.7962, "step": 8244 }, { "epoch": 4.337190952130458, "grad_norm": 2.271155834197998, "learning_rate": 9.039363787974406e-07, "loss": 0.7989, "step": 8245 }, { "epoch": 4.337716991057338, "grad_norm": 2.469163417816162, "learning_rate": 9.034017613677509e-07, "loss": 0.7945, "step": 8246 }, { "epoch": 4.338243029984219, "grad_norm": 2.5102767944335938, "learning_rate": 9.02867267213747e-07, "loss": 0.7825, "step": 8247 }, { "epoch": 4.338769068911099, "grad_norm": 2.597377300262451, "learning_rate": 9.023328963766983e-07, "loss": 0.7704, "step": 8248 }, { "epoch": 4.33929510783798, "grad_norm": 2.6036064624786377, "learning_rate": 9.017986488978628e-07, "loss": 0.7722, "step": 8249 }, { "epoch": 4.33982114676486, "grad_norm": 2.547224760055542, "learning_rate": 9.012645248184918e-07, "loss": 0.7613, "step": 8250 }, { "epoch": 4.3403471856917415, "grad_norm": 2.280644416809082, "learning_rate": 9.007305241798258e-07, "loss": 0.7431, "step": 8251 }, { "epoch": 4.340873224618622, "grad_norm": 2.2513363361358643, "learning_rate": 9.00196647023095e-07, "loss": 0.7364, "step": 8252 }, { "epoch": 4.341399263545503, "grad_norm": 2.4456870555877686, "learning_rate": 8.99662893389521e-07, "loss": 0.8156, "step": 8253 }, { "epoch": 4.341925302472383, "grad_norm": 2.4843616485595703, "learning_rate": 8.99129263320316e-07, "loss": 0.7779, "step": 8254 }, { "epoch": 4.342451341399263, "grad_norm": 2.5992014408111572, "learning_rate": 8.985957568566833e-07, "loss": 0.7693, "step": 8255 }, { "epoch": 4.342977380326144, "grad_norm": 2.477079391479492, "learning_rate": 8.98062374039814e-07, "loss": 0.7926, "step": 8256 }, { "epoch": 4.343503419253024, "grad_norm": 2.5769104957580566, "learning_rate": 8.97529114910892e-07, "loss": 0.7651, "step": 8257 }, { "epoch": 4.3440294581799055, "grad_norm": 2.3152942657470703, "learning_rate": 8.969959795110908e-07, "loss": 0.7817, "step": 8258 }, { "epoch": 4.344555497106786, "grad_norm": 2.5415971279144287, "learning_rate": 8.964629678815751e-07, "loss": 0.7744, "step": 8259 }, { "epoch": 4.345081536033667, "grad_norm": 2.7219138145446777, "learning_rate": 8.959300800634993e-07, "loss": 0.7805, "step": 8260 }, { "epoch": 4.345607574960547, "grad_norm": 2.2474355697631836, "learning_rate": 8.953973160980084e-07, "loss": 0.756, "step": 8261 }, { "epoch": 4.346133613887428, "grad_norm": 2.4150211811065674, "learning_rate": 8.948646760262389e-07, "loss": 0.7677, "step": 8262 }, { "epoch": 4.346659652814308, "grad_norm": 2.4739131927490234, "learning_rate": 8.943321598893157e-07, "loss": 0.8397, "step": 8263 }, { "epoch": 4.347185691741188, "grad_norm": 2.3792333602905273, "learning_rate": 8.937997677283541e-07, "loss": 0.7965, "step": 8264 }, { "epoch": 4.3477117306680695, "grad_norm": 2.403362989425659, "learning_rate": 8.932674995844623e-07, "loss": 0.7367, "step": 8265 }, { "epoch": 4.34823776959495, "grad_norm": 2.4289636611938477, "learning_rate": 8.92735355498737e-07, "loss": 0.785, "step": 8266 }, { "epoch": 4.348763808521831, "grad_norm": 2.446092367172241, "learning_rate": 8.922033355122661e-07, "loss": 0.7692, "step": 8267 }, { "epoch": 4.349289847448711, "grad_norm": 2.633793354034424, "learning_rate": 8.916714396661275e-07, "loss": 0.7835, "step": 8268 }, { "epoch": 4.349815886375592, "grad_norm": 2.4487130641937256, "learning_rate": 8.911396680013895e-07, "loss": 0.8649, "step": 8269 }, { "epoch": 4.350341925302472, "grad_norm": 2.3783857822418213, "learning_rate": 8.906080205591122e-07, "loss": 0.8058, "step": 8270 }, { "epoch": 4.350867964229353, "grad_norm": 2.3517448902130127, "learning_rate": 8.900764973803427e-07, "loss": 0.7378, "step": 8271 }, { "epoch": 4.3513940031562335, "grad_norm": 2.5490105152130127, "learning_rate": 8.895450985061221e-07, "loss": 0.7853, "step": 8272 }, { "epoch": 4.351920042083115, "grad_norm": 2.3511784076690674, "learning_rate": 8.890138239774804e-07, "loss": 0.7454, "step": 8273 }, { "epoch": 4.352446081009995, "grad_norm": 2.3965234756469727, "learning_rate": 8.884826738354382e-07, "loss": 0.7058, "step": 8274 }, { "epoch": 4.352972119936875, "grad_norm": 2.449772596359253, "learning_rate": 8.879516481210057e-07, "loss": 0.7776, "step": 8275 }, { "epoch": 4.353498158863756, "grad_norm": 2.2969818115234375, "learning_rate": 8.874207468751841e-07, "loss": 0.7907, "step": 8276 }, { "epoch": 4.354024197790636, "grad_norm": 2.381258010864258, "learning_rate": 8.868899701389666e-07, "loss": 0.7578, "step": 8277 }, { "epoch": 4.354550236717517, "grad_norm": 2.3247318267822266, "learning_rate": 8.863593179533334e-07, "loss": 0.7987, "step": 8278 }, { "epoch": 4.3550762756443975, "grad_norm": 2.4487695693969727, "learning_rate": 8.858287903592575e-07, "loss": 0.7835, "step": 8279 }, { "epoch": 4.3556023145712786, "grad_norm": 2.307539701461792, "learning_rate": 8.85298387397702e-07, "loss": 0.7548, "step": 8280 }, { "epoch": 4.356128353498159, "grad_norm": 2.406243324279785, "learning_rate": 8.8476810910962e-07, "loss": 0.8033, "step": 8281 }, { "epoch": 4.35665439242504, "grad_norm": 2.3368594646453857, "learning_rate": 8.842379555359548e-07, "loss": 0.8151, "step": 8282 }, { "epoch": 4.35718043135192, "grad_norm": 2.50144362449646, "learning_rate": 8.837079267176407e-07, "loss": 0.7458, "step": 8283 }, { "epoch": 4.357706470278801, "grad_norm": 2.2735109329223633, "learning_rate": 8.83178022695603e-07, "loss": 0.7712, "step": 8284 }, { "epoch": 4.358232509205681, "grad_norm": 2.4715747833251953, "learning_rate": 8.826482435107539e-07, "loss": 0.7736, "step": 8285 }, { "epoch": 4.3587585481325615, "grad_norm": 2.2846426963806152, "learning_rate": 8.821185892040001e-07, "loss": 0.7607, "step": 8286 }, { "epoch": 4.3592845870594426, "grad_norm": 2.4868738651275635, "learning_rate": 8.815890598162374e-07, "loss": 0.7819, "step": 8287 }, { "epoch": 4.359810625986323, "grad_norm": 2.364600658416748, "learning_rate": 8.810596553883499e-07, "loss": 0.7811, "step": 8288 }, { "epoch": 4.360336664913204, "grad_norm": 2.433934450149536, "learning_rate": 8.805303759612146e-07, "loss": 0.8379, "step": 8289 }, { "epoch": 4.360862703840084, "grad_norm": 2.3779988288879395, "learning_rate": 8.800012215756981e-07, "loss": 0.768, "step": 8290 }, { "epoch": 4.361388742766965, "grad_norm": 2.3712024688720703, "learning_rate": 8.794721922726576e-07, "loss": 0.8191, "step": 8291 }, { "epoch": 4.361914781693845, "grad_norm": 2.25065279006958, "learning_rate": 8.789432880929389e-07, "loss": 0.7947, "step": 8292 }, { "epoch": 4.362440820620726, "grad_norm": 3.1986124515533447, "learning_rate": 8.784145090773802e-07, "loss": 0.7312, "step": 8293 }, { "epoch": 4.3629668595476065, "grad_norm": 2.5009689331054688, "learning_rate": 8.778858552668093e-07, "loss": 0.813, "step": 8294 }, { "epoch": 4.363492898474487, "grad_norm": 2.459430456161499, "learning_rate": 8.773573267020442e-07, "loss": 0.7707, "step": 8295 }, { "epoch": 4.364018937401368, "grad_norm": 2.223590850830078, "learning_rate": 8.768289234238936e-07, "loss": 0.7289, "step": 8296 }, { "epoch": 4.364544976328248, "grad_norm": 2.389727830886841, "learning_rate": 8.76300645473156e-07, "loss": 0.8002, "step": 8297 }, { "epoch": 4.365071015255129, "grad_norm": 2.2199110984802246, "learning_rate": 8.757724928906217e-07, "loss": 0.7392, "step": 8298 }, { "epoch": 4.365597054182009, "grad_norm": 2.3662822246551514, "learning_rate": 8.752444657170684e-07, "loss": 0.7212, "step": 8299 }, { "epoch": 4.36612309310889, "grad_norm": 2.4073216915130615, "learning_rate": 8.747165639932673e-07, "loss": 0.7352, "step": 8300 }, { "epoch": 4.3666491320357705, "grad_norm": 2.3599822521209717, "learning_rate": 8.741887877599767e-07, "loss": 0.7606, "step": 8301 }, { "epoch": 4.367175170962652, "grad_norm": 2.4992589950561523, "learning_rate": 8.736611370579484e-07, "loss": 0.7364, "step": 8302 }, { "epoch": 4.367701209889532, "grad_norm": 2.3796274662017822, "learning_rate": 8.731336119279224e-07, "loss": 0.8035, "step": 8303 }, { "epoch": 4.368227248816412, "grad_norm": 2.29179048538208, "learning_rate": 8.726062124106302e-07, "loss": 0.7525, "step": 8304 }, { "epoch": 4.368753287743293, "grad_norm": 2.415982484817505, "learning_rate": 8.720789385467935e-07, "loss": 0.7927, "step": 8305 }, { "epoch": 4.369279326670173, "grad_norm": 2.430549144744873, "learning_rate": 8.715517903771225e-07, "loss": 0.7903, "step": 8306 }, { "epoch": 4.369805365597054, "grad_norm": 2.3840994834899902, "learning_rate": 8.710247679423198e-07, "loss": 0.8093, "step": 8307 }, { "epoch": 4.3703314045239345, "grad_norm": 2.3140456676483154, "learning_rate": 8.704978712830777e-07, "loss": 0.7664, "step": 8308 }, { "epoch": 4.370857443450816, "grad_norm": 2.474623680114746, "learning_rate": 8.699711004400785e-07, "loss": 0.7883, "step": 8309 }, { "epoch": 4.371383482377696, "grad_norm": 2.1675772666931152, "learning_rate": 8.694444554539952e-07, "loss": 0.7361, "step": 8310 }, { "epoch": 4.371909521304577, "grad_norm": 2.4054372310638428, "learning_rate": 8.689179363654914e-07, "loss": 0.7685, "step": 8311 }, { "epoch": 4.372435560231457, "grad_norm": 2.342686653137207, "learning_rate": 8.683915432152187e-07, "loss": 0.7535, "step": 8312 }, { "epoch": 4.372961599158337, "grad_norm": 2.3407323360443115, "learning_rate": 8.678652760438227e-07, "loss": 0.7555, "step": 8313 }, { "epoch": 4.373487638085218, "grad_norm": 2.3947575092315674, "learning_rate": 8.673391348919352e-07, "loss": 0.7977, "step": 8314 }, { "epoch": 4.3740136770120985, "grad_norm": 2.472151041030884, "learning_rate": 8.668131198001814e-07, "loss": 0.7809, "step": 8315 }, { "epoch": 4.37453971593898, "grad_norm": 2.357212781906128, "learning_rate": 8.662872308091758e-07, "loss": 0.7834, "step": 8316 }, { "epoch": 4.37506575486586, "grad_norm": 2.539846897125244, "learning_rate": 8.657614679595224e-07, "loss": 0.7813, "step": 8317 }, { "epoch": 4.375591793792741, "grad_norm": 2.459594249725342, "learning_rate": 8.652358312918172e-07, "loss": 0.7691, "step": 8318 }, { "epoch": 4.376117832719621, "grad_norm": 2.5109102725982666, "learning_rate": 8.647103208466451e-07, "loss": 0.786, "step": 8319 }, { "epoch": 4.376643871646502, "grad_norm": 2.456998109817505, "learning_rate": 8.641849366645808e-07, "loss": 0.7633, "step": 8320 }, { "epoch": 4.377169910573382, "grad_norm": 2.4395530223846436, "learning_rate": 8.636596787861901e-07, "loss": 0.8141, "step": 8321 }, { "epoch": 4.377695949500263, "grad_norm": 2.4064536094665527, "learning_rate": 8.631345472520295e-07, "loss": 0.7481, "step": 8322 }, { "epoch": 4.378221988427144, "grad_norm": 2.4242374897003174, "learning_rate": 8.626095421026454e-07, "loss": 0.7705, "step": 8323 }, { "epoch": 4.378748027354024, "grad_norm": 2.4345691204071045, "learning_rate": 8.620846633785731e-07, "loss": 0.7469, "step": 8324 }, { "epoch": 4.379274066280905, "grad_norm": 2.6029045581817627, "learning_rate": 8.615599111203395e-07, "loss": 0.7761, "step": 8325 }, { "epoch": 4.379800105207785, "grad_norm": 2.452298641204834, "learning_rate": 8.610352853684623e-07, "loss": 0.6978, "step": 8326 }, { "epoch": 4.380326144134666, "grad_norm": 2.544074773788452, "learning_rate": 8.60510786163449e-07, "loss": 0.7529, "step": 8327 }, { "epoch": 4.380852183061546, "grad_norm": 2.3090906143188477, "learning_rate": 8.59986413545795e-07, "loss": 0.7588, "step": 8328 }, { "epoch": 4.381378221988427, "grad_norm": 2.292987585067749, "learning_rate": 8.594621675559891e-07, "loss": 0.7902, "step": 8329 }, { "epoch": 4.381904260915308, "grad_norm": 2.305712938308716, "learning_rate": 8.589380482345091e-07, "loss": 0.7716, "step": 8330 }, { "epoch": 4.382430299842189, "grad_norm": 2.465571880340576, "learning_rate": 8.584140556218232e-07, "loss": 0.7706, "step": 8331 }, { "epoch": 4.382956338769069, "grad_norm": 2.4396636486053467, "learning_rate": 8.578901897583888e-07, "loss": 0.8116, "step": 8332 }, { "epoch": 4.38348237769595, "grad_norm": 2.4852864742279053, "learning_rate": 8.573664506846551e-07, "loss": 0.7333, "step": 8333 }, { "epoch": 4.38400841662283, "grad_norm": 2.5129261016845703, "learning_rate": 8.568428384410616e-07, "loss": 0.7831, "step": 8334 }, { "epoch": 4.38453445554971, "grad_norm": 2.420325756072998, "learning_rate": 8.563193530680352e-07, "loss": 0.804, "step": 8335 }, { "epoch": 4.385060494476591, "grad_norm": 2.598161458969116, "learning_rate": 8.557959946059968e-07, "loss": 0.7717, "step": 8336 }, { "epoch": 4.385586533403472, "grad_norm": 2.33305025100708, "learning_rate": 8.552727630953539e-07, "loss": 0.7776, "step": 8337 }, { "epoch": 4.386112572330353, "grad_norm": 2.3142528533935547, "learning_rate": 8.547496585765067e-07, "loss": 0.7956, "step": 8338 }, { "epoch": 4.386638611257233, "grad_norm": 2.3717217445373535, "learning_rate": 8.54226681089845e-07, "loss": 0.7655, "step": 8339 }, { "epoch": 4.387164650184114, "grad_norm": 2.5689072608947754, "learning_rate": 8.537038306757489e-07, "loss": 0.8091, "step": 8340 }, { "epoch": 4.387690689110994, "grad_norm": 2.539479970932007, "learning_rate": 8.531811073745891e-07, "loss": 0.7719, "step": 8341 }, { "epoch": 4.388216728037875, "grad_norm": 2.3576266765594482, "learning_rate": 8.526585112267238e-07, "loss": 0.7694, "step": 8342 }, { "epoch": 4.388742766964755, "grad_norm": 2.4337332248687744, "learning_rate": 8.521360422725047e-07, "loss": 0.7856, "step": 8343 }, { "epoch": 4.389268805891636, "grad_norm": 2.3568594455718994, "learning_rate": 8.516137005522723e-07, "loss": 0.7619, "step": 8344 }, { "epoch": 4.389794844818517, "grad_norm": 2.3867335319519043, "learning_rate": 8.510914861063574e-07, "loss": 0.7845, "step": 8345 }, { "epoch": 4.390320883745397, "grad_norm": 2.3410379886627197, "learning_rate": 8.505693989750807e-07, "loss": 0.7753, "step": 8346 }, { "epoch": 4.390846922672278, "grad_norm": 2.3416929244995117, "learning_rate": 8.500474391987545e-07, "loss": 0.7755, "step": 8347 }, { "epoch": 4.391372961599158, "grad_norm": 2.405708074569702, "learning_rate": 8.49525606817678e-07, "loss": 0.7339, "step": 8348 }, { "epoch": 4.391899000526039, "grad_norm": 2.415078639984131, "learning_rate": 8.490039018721446e-07, "loss": 0.738, "step": 8349 }, { "epoch": 4.392425039452919, "grad_norm": 2.46207857131958, "learning_rate": 8.484823244024343e-07, "loss": 0.806, "step": 8350 }, { "epoch": 4.3929510783798005, "grad_norm": 2.302720308303833, "learning_rate": 8.479608744488194e-07, "loss": 0.7943, "step": 8351 }, { "epoch": 4.393477117306681, "grad_norm": 2.4472496509552, "learning_rate": 8.474395520515621e-07, "loss": 0.7906, "step": 8352 }, { "epoch": 4.394003156233561, "grad_norm": 2.5134267807006836, "learning_rate": 8.469183572509143e-07, "loss": 0.7769, "step": 8353 }, { "epoch": 4.394529195160442, "grad_norm": 2.502992630004883, "learning_rate": 8.463972900871182e-07, "loss": 0.7695, "step": 8354 }, { "epoch": 4.395055234087322, "grad_norm": 2.6500566005706787, "learning_rate": 8.458763506004069e-07, "loss": 0.7824, "step": 8355 }, { "epoch": 4.395581273014203, "grad_norm": 2.5239574909210205, "learning_rate": 8.453555388310016e-07, "loss": 0.7985, "step": 8356 }, { "epoch": 4.396107311941083, "grad_norm": 2.177889108657837, "learning_rate": 8.448348548191154e-07, "loss": 0.7773, "step": 8357 }, { "epoch": 4.3966333508679645, "grad_norm": 2.4276840686798096, "learning_rate": 8.443142986049512e-07, "loss": 0.7757, "step": 8358 }, { "epoch": 4.397159389794845, "grad_norm": 2.3634872436523438, "learning_rate": 8.437938702287021e-07, "loss": 0.731, "step": 8359 }, { "epoch": 4.397685428721726, "grad_norm": 2.341019630432129, "learning_rate": 8.432735697305519e-07, "loss": 0.7496, "step": 8360 }, { "epoch": 4.398211467648606, "grad_norm": 2.389551877975464, "learning_rate": 8.427533971506715e-07, "loss": 0.7998, "step": 8361 }, { "epoch": 4.398737506575486, "grad_norm": 2.5781211853027344, "learning_rate": 8.422333525292267e-07, "loss": 0.7894, "step": 8362 }, { "epoch": 4.399263545502367, "grad_norm": 2.3790340423583984, "learning_rate": 8.417134359063689e-07, "loss": 0.8197, "step": 8363 }, { "epoch": 4.399789584429247, "grad_norm": 2.5205910205841064, "learning_rate": 8.411936473222424e-07, "loss": 0.7896, "step": 8364 }, { "epoch": 4.4003156233561285, "grad_norm": 2.380890130996704, "learning_rate": 8.406739868169808e-07, "loss": 0.7352, "step": 8365 }, { "epoch": 4.400841662283009, "grad_norm": 2.460771083831787, "learning_rate": 8.401544544307081e-07, "loss": 0.8127, "step": 8366 }, { "epoch": 4.40136770120989, "grad_norm": 2.5309970378875732, "learning_rate": 8.396350502035377e-07, "loss": 0.8119, "step": 8367 }, { "epoch": 4.40189374013677, "grad_norm": 2.6718714237213135, "learning_rate": 8.39115774175574e-07, "loss": 0.7927, "step": 8368 }, { "epoch": 4.402419779063651, "grad_norm": 2.600287437438965, "learning_rate": 8.385966263869117e-07, "loss": 0.7806, "step": 8369 }, { "epoch": 4.402945817990531, "grad_norm": 2.450496196746826, "learning_rate": 8.380776068776333e-07, "loss": 0.7564, "step": 8370 }, { "epoch": 4.403471856917412, "grad_norm": 2.3062126636505127, "learning_rate": 8.37558715687814e-07, "loss": 0.7502, "step": 8371 }, { "epoch": 4.4039978958442925, "grad_norm": 2.3181588649749756, "learning_rate": 8.370399528575179e-07, "loss": 0.7638, "step": 8372 }, { "epoch": 4.404523934771173, "grad_norm": 2.222208023071289, "learning_rate": 8.365213184268006e-07, "loss": 0.8, "step": 8373 }, { "epoch": 4.405049973698054, "grad_norm": 2.288255214691162, "learning_rate": 8.360028124357047e-07, "loss": 0.7343, "step": 8374 }, { "epoch": 4.405576012624934, "grad_norm": 2.4590864181518555, "learning_rate": 8.354844349242658e-07, "loss": 0.7771, "step": 8375 }, { "epoch": 4.406102051551815, "grad_norm": 2.5098981857299805, "learning_rate": 8.349661859325084e-07, "loss": 0.8078, "step": 8376 }, { "epoch": 4.406628090478695, "grad_norm": 2.2740654945373535, "learning_rate": 8.344480655004483e-07, "loss": 0.8064, "step": 8377 }, { "epoch": 4.407154129405576, "grad_norm": 2.855205535888672, "learning_rate": 8.339300736680886e-07, "loss": 0.791, "step": 8378 }, { "epoch": 4.4076801683324565, "grad_norm": 2.5346455574035645, "learning_rate": 8.33412210475425e-07, "loss": 0.7553, "step": 8379 }, { "epoch": 4.4082062072593375, "grad_norm": 2.333087682723999, "learning_rate": 8.328944759624424e-07, "loss": 0.6955, "step": 8380 }, { "epoch": 4.408732246186218, "grad_norm": 2.5277631282806396, "learning_rate": 8.32376870169116e-07, "loss": 0.7466, "step": 8381 }, { "epoch": 4.409258285113099, "grad_norm": 2.462573289871216, "learning_rate": 8.31859393135411e-07, "loss": 0.7636, "step": 8382 }, { "epoch": 4.409784324039979, "grad_norm": 2.401510715484619, "learning_rate": 8.313420449012829e-07, "loss": 0.8385, "step": 8383 }, { "epoch": 4.410310362966859, "grad_norm": 2.6439414024353027, "learning_rate": 8.308248255066758e-07, "loss": 0.7896, "step": 8384 }, { "epoch": 4.41083640189374, "grad_norm": 2.5286803245544434, "learning_rate": 8.303077349915264e-07, "loss": 0.7555, "step": 8385 }, { "epoch": 4.4113624408206205, "grad_norm": 3.2387888431549072, "learning_rate": 8.297907733957583e-07, "loss": 0.7913, "step": 8386 }, { "epoch": 4.4118884797475015, "grad_norm": 2.2982900142669678, "learning_rate": 8.292739407592878e-07, "loss": 0.7628, "step": 8387 }, { "epoch": 4.412414518674382, "grad_norm": 2.56265926361084, "learning_rate": 8.287572371220204e-07, "loss": 0.8015, "step": 8388 }, { "epoch": 4.412940557601263, "grad_norm": 2.3667795658111572, "learning_rate": 8.282406625238512e-07, "loss": 0.7541, "step": 8389 }, { "epoch": 4.413466596528143, "grad_norm": 2.279059410095215, "learning_rate": 8.277242170046659e-07, "loss": 0.7931, "step": 8390 }, { "epoch": 4.413992635455024, "grad_norm": 2.6173441410064697, "learning_rate": 8.272079006043407e-07, "loss": 0.8071, "step": 8391 }, { "epoch": 4.414518674381904, "grad_norm": 2.446571111679077, "learning_rate": 8.266917133627397e-07, "loss": 0.733, "step": 8392 }, { "epoch": 4.4150447133087845, "grad_norm": 2.5059781074523926, "learning_rate": 8.261756553197192e-07, "loss": 0.7803, "step": 8393 }, { "epoch": 4.4155707522356655, "grad_norm": 2.3744757175445557, "learning_rate": 8.25659726515125e-07, "loss": 0.7464, "step": 8394 }, { "epoch": 4.416096791162546, "grad_norm": 2.3756353855133057, "learning_rate": 8.251439269887923e-07, "loss": 0.7947, "step": 8395 }, { "epoch": 4.416622830089427, "grad_norm": 2.410831928253174, "learning_rate": 8.246282567805481e-07, "loss": 0.7595, "step": 8396 }, { "epoch": 4.417148869016307, "grad_norm": 2.379734754562378, "learning_rate": 8.241127159302057e-07, "loss": 0.7689, "step": 8397 }, { "epoch": 4.417674907943188, "grad_norm": 2.3048627376556396, "learning_rate": 8.235973044775731e-07, "loss": 0.7398, "step": 8398 }, { "epoch": 4.418200946870068, "grad_norm": 2.428063154220581, "learning_rate": 8.230820224624436e-07, "loss": 0.7211, "step": 8399 }, { "epoch": 4.418726985796949, "grad_norm": 2.43034291267395, "learning_rate": 8.225668699246045e-07, "loss": 0.7887, "step": 8400 }, { "epoch": 4.4192530247238295, "grad_norm": 2.427856922149658, "learning_rate": 8.22051846903831e-07, "loss": 0.7824, "step": 8401 }, { "epoch": 4.41977906365071, "grad_norm": 2.4546382427215576, "learning_rate": 8.21536953439889e-07, "loss": 0.7949, "step": 8402 }, { "epoch": 4.420305102577591, "grad_norm": 2.4922678470611572, "learning_rate": 8.210221895725337e-07, "loss": 0.7618, "step": 8403 }, { "epoch": 4.420831141504471, "grad_norm": 2.3240625858306885, "learning_rate": 8.205075553415115e-07, "loss": 0.7402, "step": 8404 }, { "epoch": 4.421357180431352, "grad_norm": 2.428910732269287, "learning_rate": 8.199930507865584e-07, "loss": 0.7805, "step": 8405 }, { "epoch": 4.421883219358232, "grad_norm": 2.3795695304870605, "learning_rate": 8.194786759473985e-07, "loss": 0.7995, "step": 8406 }, { "epoch": 4.422409258285113, "grad_norm": 2.420412540435791, "learning_rate": 8.189644308637485e-07, "loss": 0.8067, "step": 8407 }, { "epoch": 4.4229352972119935, "grad_norm": 2.4293529987335205, "learning_rate": 8.184503155753134e-07, "loss": 0.7675, "step": 8408 }, { "epoch": 4.423461336138875, "grad_norm": 2.4385569095611572, "learning_rate": 8.179363301217902e-07, "loss": 0.8328, "step": 8409 }, { "epoch": 4.423987375065755, "grad_norm": 2.3509764671325684, "learning_rate": 8.174224745428627e-07, "loss": 0.751, "step": 8410 }, { "epoch": 4.424513413992636, "grad_norm": 2.41316294670105, "learning_rate": 8.169087488782071e-07, "loss": 0.7927, "step": 8411 }, { "epoch": 4.425039452919516, "grad_norm": 2.493514060974121, "learning_rate": 8.163951531674899e-07, "loss": 0.7831, "step": 8412 }, { "epoch": 4.425565491846396, "grad_norm": 2.3674709796905518, "learning_rate": 8.15881687450365e-07, "loss": 0.7428, "step": 8413 }, { "epoch": 4.426091530773277, "grad_norm": 2.5373268127441406, "learning_rate": 8.153683517664781e-07, "loss": 0.8124, "step": 8414 }, { "epoch": 4.4266175697001575, "grad_norm": 2.1366541385650635, "learning_rate": 8.148551461554654e-07, "loss": 0.8128, "step": 8415 }, { "epoch": 4.427143608627039, "grad_norm": 2.275153636932373, "learning_rate": 8.143420706569519e-07, "loss": 0.7987, "step": 8416 }, { "epoch": 4.427669647553919, "grad_norm": 2.5562636852264404, "learning_rate": 8.138291253105526e-07, "loss": 0.7948, "step": 8417 }, { "epoch": 4.4281956864808, "grad_norm": 2.620753765106201, "learning_rate": 8.133163101558733e-07, "loss": 0.7743, "step": 8418 }, { "epoch": 4.42872172540768, "grad_norm": 2.4794702529907227, "learning_rate": 8.128036252325097e-07, "loss": 0.8023, "step": 8419 }, { "epoch": 4.429247764334561, "grad_norm": 2.573004961013794, "learning_rate": 8.122910705800452e-07, "loss": 0.8142, "step": 8420 }, { "epoch": 4.429773803261441, "grad_norm": 2.692836046218872, "learning_rate": 8.117786462380561e-07, "loss": 0.7437, "step": 8421 }, { "epoch": 4.430299842188322, "grad_norm": 2.4754979610443115, "learning_rate": 8.112663522461082e-07, "loss": 0.7769, "step": 8422 }, { "epoch": 4.430825881115203, "grad_norm": 2.2988030910491943, "learning_rate": 8.107541886437542e-07, "loss": 0.7422, "step": 8423 }, { "epoch": 4.431351920042083, "grad_norm": 2.676414728164673, "learning_rate": 8.102421554705409e-07, "loss": 0.7421, "step": 8424 }, { "epoch": 4.431877958968964, "grad_norm": 2.236236810684204, "learning_rate": 8.09730252766002e-07, "loss": 0.7646, "step": 8425 }, { "epoch": 4.432403997895844, "grad_norm": 2.397451162338257, "learning_rate": 8.092184805696631e-07, "loss": 0.7433, "step": 8426 }, { "epoch": 4.432930036822725, "grad_norm": 2.5054244995117188, "learning_rate": 8.087068389210393e-07, "loss": 0.8293, "step": 8427 }, { "epoch": 4.433456075749605, "grad_norm": 2.429736852645874, "learning_rate": 8.081953278596339e-07, "loss": 0.7539, "step": 8428 }, { "epoch": 4.433982114676486, "grad_norm": 2.4258389472961426, "learning_rate": 8.076839474249415e-07, "loss": 0.7607, "step": 8429 }, { "epoch": 4.434508153603367, "grad_norm": 2.3631515502929688, "learning_rate": 8.071726976564476e-07, "loss": 0.7342, "step": 8430 }, { "epoch": 4.435034192530248, "grad_norm": 2.3087663650512695, "learning_rate": 8.066615785936255e-07, "loss": 0.7659, "step": 8431 }, { "epoch": 4.435560231457128, "grad_norm": 2.473992109298706, "learning_rate": 8.061505902759401e-07, "loss": 0.7894, "step": 8432 }, { "epoch": 4.436086270384008, "grad_norm": 2.360266923904419, "learning_rate": 8.056397327428461e-07, "loss": 0.7928, "step": 8433 }, { "epoch": 4.436612309310889, "grad_norm": 2.527277708053589, "learning_rate": 8.051290060337869e-07, "loss": 0.7759, "step": 8434 }, { "epoch": 4.437138348237769, "grad_norm": 2.3555498123168945, "learning_rate": 8.046184101881957e-07, "loss": 0.8395, "step": 8435 }, { "epoch": 4.43766438716465, "grad_norm": 2.616858720779419, "learning_rate": 8.041079452454967e-07, "loss": 0.7778, "step": 8436 }, { "epoch": 4.438190426091531, "grad_norm": 2.3088395595550537, "learning_rate": 8.035976112451044e-07, "loss": 0.7713, "step": 8437 }, { "epoch": 4.438716465018412, "grad_norm": 2.349453926086426, "learning_rate": 8.030874082264217e-07, "loss": 0.7984, "step": 8438 }, { "epoch": 4.439242503945292, "grad_norm": 2.3588361740112305, "learning_rate": 8.025773362288425e-07, "loss": 0.7268, "step": 8439 }, { "epoch": 4.439768542872173, "grad_norm": 2.3122191429138184, "learning_rate": 8.020673952917501e-07, "loss": 0.716, "step": 8440 }, { "epoch": 4.440294581799053, "grad_norm": 2.5526273250579834, "learning_rate": 8.015575854545188e-07, "loss": 0.7475, "step": 8441 }, { "epoch": 4.440820620725933, "grad_norm": 2.3130557537078857, "learning_rate": 8.0104790675651e-07, "loss": 0.7671, "step": 8442 }, { "epoch": 4.441346659652814, "grad_norm": 2.403554677963257, "learning_rate": 8.005383592370771e-07, "loss": 0.7339, "step": 8443 }, { "epoch": 4.441872698579695, "grad_norm": 2.382357597351074, "learning_rate": 8.00028942935564e-07, "loss": 0.7865, "step": 8444 }, { "epoch": 4.442398737506576, "grad_norm": 2.58563232421875, "learning_rate": 7.995196578913034e-07, "loss": 0.7752, "step": 8445 }, { "epoch": 4.442924776433456, "grad_norm": 2.3232104778289795, "learning_rate": 7.99010504143617e-07, "loss": 0.7697, "step": 8446 }, { "epoch": 4.443450815360337, "grad_norm": 2.355912685394287, "learning_rate": 7.985014817318173e-07, "loss": 0.7871, "step": 8447 }, { "epoch": 4.443976854287217, "grad_norm": 2.517832040786743, "learning_rate": 7.979925906952083e-07, "loss": 0.8038, "step": 8448 }, { "epoch": 4.444502893214098, "grad_norm": 2.297689914703369, "learning_rate": 7.974838310730798e-07, "loss": 0.7614, "step": 8449 }, { "epoch": 4.445028932140978, "grad_norm": 2.5216143131256104, "learning_rate": 7.969752029047157e-07, "loss": 0.797, "step": 8450 }, { "epoch": 4.445554971067859, "grad_norm": 2.4923458099365234, "learning_rate": 7.964667062293868e-07, "loss": 0.7882, "step": 8451 }, { "epoch": 4.44608100999474, "grad_norm": 2.4061124324798584, "learning_rate": 7.959583410863558e-07, "loss": 0.8029, "step": 8452 }, { "epoch": 4.44660704892162, "grad_norm": 2.243330478668213, "learning_rate": 7.954501075148738e-07, "loss": 0.7748, "step": 8453 }, { "epoch": 4.447133087848501, "grad_norm": 2.3388850688934326, "learning_rate": 7.949420055541823e-07, "loss": 0.8257, "step": 8454 }, { "epoch": 4.447659126775381, "grad_norm": 2.4567978382110596, "learning_rate": 7.944340352435137e-07, "loss": 0.7395, "step": 8455 }, { "epoch": 4.448185165702262, "grad_norm": 2.3489465713500977, "learning_rate": 7.939261966220873e-07, "loss": 0.7929, "step": 8456 }, { "epoch": 4.448711204629142, "grad_norm": 2.2369866371154785, "learning_rate": 7.934184897291147e-07, "loss": 0.7503, "step": 8457 }, { "epoch": 4.4492372435560235, "grad_norm": 2.4863247871398926, "learning_rate": 7.929109146037978e-07, "loss": 0.7962, "step": 8458 }, { "epoch": 4.449763282482904, "grad_norm": 2.4205501079559326, "learning_rate": 7.924034712853257e-07, "loss": 0.7655, "step": 8459 }, { "epoch": 4.450289321409785, "grad_norm": 2.3936150074005127, "learning_rate": 7.918961598128791e-07, "loss": 0.7441, "step": 8460 }, { "epoch": 4.450815360336665, "grad_norm": 2.4642488956451416, "learning_rate": 7.913889802256288e-07, "loss": 0.7771, "step": 8461 }, { "epoch": 4.451341399263545, "grad_norm": 2.5882582664489746, "learning_rate": 7.908819325627354e-07, "loss": 0.7781, "step": 8462 }, { "epoch": 4.451867438190426, "grad_norm": 2.5373730659484863, "learning_rate": 7.903750168633473e-07, "loss": 0.7425, "step": 8463 }, { "epoch": 4.452393477117306, "grad_norm": 2.5893592834472656, "learning_rate": 7.898682331666049e-07, "loss": 0.8073, "step": 8464 }, { "epoch": 4.4529195160441875, "grad_norm": 2.4263217449188232, "learning_rate": 7.893615815116379e-07, "loss": 0.753, "step": 8465 }, { "epoch": 4.453445554971068, "grad_norm": 2.6976163387298584, "learning_rate": 7.888550619375654e-07, "loss": 0.7712, "step": 8466 }, { "epoch": 4.453971593897949, "grad_norm": 2.378200054168701, "learning_rate": 7.883486744834965e-07, "loss": 0.7748, "step": 8467 }, { "epoch": 4.454497632824829, "grad_norm": 2.5719518661499023, "learning_rate": 7.878424191885303e-07, "loss": 0.787, "step": 8468 }, { "epoch": 4.45502367175171, "grad_norm": 2.571544647216797, "learning_rate": 7.873362960917563e-07, "loss": 0.771, "step": 8469 }, { "epoch": 4.45554971067859, "grad_norm": 2.266282320022583, "learning_rate": 7.868303052322515e-07, "loss": 0.7878, "step": 8470 }, { "epoch": 4.456075749605471, "grad_norm": 2.3318567276000977, "learning_rate": 7.863244466490854e-07, "loss": 0.7524, "step": 8471 }, { "epoch": 4.4566017885323514, "grad_norm": 2.4438860416412354, "learning_rate": 7.858187203813151e-07, "loss": 0.7508, "step": 8472 }, { "epoch": 4.457127827459232, "grad_norm": 2.4212872982025146, "learning_rate": 7.853131264679883e-07, "loss": 0.7959, "step": 8473 }, { "epoch": 4.457653866386113, "grad_norm": 2.4765875339508057, "learning_rate": 7.848076649481437e-07, "loss": 0.7896, "step": 8474 }, { "epoch": 4.458179905312993, "grad_norm": 2.2553257942199707, "learning_rate": 7.843023358608079e-07, "loss": 0.7595, "step": 8475 }, { "epoch": 4.458705944239874, "grad_norm": 2.6041972637176514, "learning_rate": 7.837971392449986e-07, "loss": 0.7653, "step": 8476 }, { "epoch": 4.459231983166754, "grad_norm": 2.205690860748291, "learning_rate": 7.832920751397235e-07, "loss": 0.7607, "step": 8477 }, { "epoch": 4.459758022093635, "grad_norm": 2.534125328063965, "learning_rate": 7.827871435839777e-07, "loss": 0.7826, "step": 8478 }, { "epoch": 4.4602840610205154, "grad_norm": 2.3961703777313232, "learning_rate": 7.822823446167485e-07, "loss": 0.7594, "step": 8479 }, { "epoch": 4.4608100999473965, "grad_norm": 2.6331467628479004, "learning_rate": 7.81777678277012e-07, "loss": 0.768, "step": 8480 }, { "epoch": 4.461336138874277, "grad_norm": 2.2320566177368164, "learning_rate": 7.812731446037342e-07, "loss": 0.766, "step": 8481 }, { "epoch": 4.461862177801157, "grad_norm": 2.401158571243286, "learning_rate": 7.807687436358721e-07, "loss": 0.7671, "step": 8482 }, { "epoch": 4.462388216728038, "grad_norm": 2.397277355194092, "learning_rate": 7.802644754123693e-07, "loss": 0.7177, "step": 8483 }, { "epoch": 4.462914255654918, "grad_norm": 2.401618480682373, "learning_rate": 7.797603399721626e-07, "loss": 0.8046, "step": 8484 }, { "epoch": 4.463440294581799, "grad_norm": 2.67268967628479, "learning_rate": 7.792563373541759e-07, "loss": 0.7701, "step": 8485 }, { "epoch": 4.463966333508679, "grad_norm": 2.545565605163574, "learning_rate": 7.787524675973244e-07, "loss": 0.7433, "step": 8486 }, { "epoch": 4.4644923724355605, "grad_norm": 2.3011245727539062, "learning_rate": 7.782487307405126e-07, "loss": 0.7857, "step": 8487 }, { "epoch": 4.465018411362441, "grad_norm": 2.3063979148864746, "learning_rate": 7.777451268226349e-07, "loss": 0.7582, "step": 8488 }, { "epoch": 4.465544450289322, "grad_norm": 2.510178565979004, "learning_rate": 7.772416558825751e-07, "loss": 0.7557, "step": 8489 }, { "epoch": 4.466070489216202, "grad_norm": 2.3307723999023438, "learning_rate": 7.767383179592072e-07, "loss": 0.7745, "step": 8490 }, { "epoch": 4.466596528143082, "grad_norm": 2.3145599365234375, "learning_rate": 7.762351130913956e-07, "loss": 0.792, "step": 8491 }, { "epoch": 4.467122567069963, "grad_norm": 2.459284543991089, "learning_rate": 7.757320413179914e-07, "loss": 0.7532, "step": 8492 }, { "epoch": 4.467648605996843, "grad_norm": 2.545323610305786, "learning_rate": 7.752291026778386e-07, "loss": 0.7715, "step": 8493 }, { "epoch": 4.4681746449237245, "grad_norm": 2.2895984649658203, "learning_rate": 7.747262972097702e-07, "loss": 0.798, "step": 8494 }, { "epoch": 4.468700683850605, "grad_norm": 2.1966278553009033, "learning_rate": 7.742236249526077e-07, "loss": 0.8085, "step": 8495 }, { "epoch": 4.469226722777486, "grad_norm": 2.339207649230957, "learning_rate": 7.737210859451636e-07, "loss": 0.7717, "step": 8496 }, { "epoch": 4.469752761704366, "grad_norm": 2.478672504425049, "learning_rate": 7.732186802262393e-07, "loss": 0.7947, "step": 8497 }, { "epoch": 4.470278800631247, "grad_norm": 2.6015913486480713, "learning_rate": 7.727164078346278e-07, "loss": 0.8433, "step": 8498 }, { "epoch": 4.470804839558127, "grad_norm": 2.2686665058135986, "learning_rate": 7.722142688091081e-07, "loss": 0.776, "step": 8499 }, { "epoch": 4.471330878485007, "grad_norm": 2.3564484119415283, "learning_rate": 7.717122631884522e-07, "loss": 0.7593, "step": 8500 }, { "epoch": 4.4718569174118885, "grad_norm": 2.454174518585205, "learning_rate": 7.712103910114203e-07, "loss": 0.7528, "step": 8501 }, { "epoch": 4.472382956338769, "grad_norm": 2.343982219696045, "learning_rate": 7.707086523167631e-07, "loss": 0.7554, "step": 8502 }, { "epoch": 4.47290899526565, "grad_norm": 3.7665395736694336, "learning_rate": 7.702070471432205e-07, "loss": 0.7583, "step": 8503 }, { "epoch": 4.47343503419253, "grad_norm": 2.692082166671753, "learning_rate": 7.697055755295221e-07, "loss": 0.7933, "step": 8504 }, { "epoch": 4.473961073119411, "grad_norm": 2.326594114303589, "learning_rate": 7.692042375143882e-07, "loss": 0.7968, "step": 8505 }, { "epoch": 4.474487112046291, "grad_norm": 2.5664684772491455, "learning_rate": 7.687030331365259e-07, "loss": 0.7446, "step": 8506 }, { "epoch": 4.475013150973172, "grad_norm": 2.4301488399505615, "learning_rate": 7.682019624346359e-07, "loss": 0.7855, "step": 8507 }, { "epoch": 4.4755391899000525, "grad_norm": 2.3681881427764893, "learning_rate": 7.677010254474049e-07, "loss": 0.8262, "step": 8508 }, { "epoch": 4.476065228826934, "grad_norm": 2.448824167251587, "learning_rate": 7.672002222135114e-07, "loss": 0.7863, "step": 8509 }, { "epoch": 4.476591267753814, "grad_norm": 2.3974449634552, "learning_rate": 7.666995527716237e-07, "loss": 0.7641, "step": 8510 }, { "epoch": 4.477117306680694, "grad_norm": 2.547948122024536, "learning_rate": 7.661990171603989e-07, "loss": 0.7909, "step": 8511 }, { "epoch": 4.477643345607575, "grad_norm": 2.3657867908477783, "learning_rate": 7.656986154184851e-07, "loss": 0.7736, "step": 8512 }, { "epoch": 4.478169384534455, "grad_norm": 2.420616626739502, "learning_rate": 7.65198347584517e-07, "loss": 0.7904, "step": 8513 }, { "epoch": 4.478695423461336, "grad_norm": 2.531310796737671, "learning_rate": 7.646982136971226e-07, "loss": 0.8114, "step": 8514 }, { "epoch": 4.4792214623882165, "grad_norm": 2.3340201377868652, "learning_rate": 7.641982137949172e-07, "loss": 0.786, "step": 8515 }, { "epoch": 4.479747501315098, "grad_norm": 2.3740508556365967, "learning_rate": 7.636983479165069e-07, "loss": 0.7586, "step": 8516 }, { "epoch": 4.480273540241978, "grad_norm": 2.344174861907959, "learning_rate": 7.63198616100487e-07, "loss": 0.802, "step": 8517 }, { "epoch": 4.480799579168859, "grad_norm": 2.4462900161743164, "learning_rate": 7.626990183854432e-07, "loss": 0.8016, "step": 8518 }, { "epoch": 4.481325618095739, "grad_norm": 2.382189989089966, "learning_rate": 7.62199554809949e-07, "loss": 0.7482, "step": 8519 }, { "epoch": 4.48185165702262, "grad_norm": 2.4363040924072266, "learning_rate": 7.617002254125697e-07, "loss": 0.7914, "step": 8520 }, { "epoch": 4.4823776959495, "grad_norm": 2.367854356765747, "learning_rate": 7.612010302318582e-07, "loss": 0.7728, "step": 8521 }, { "epoch": 4.4829037348763805, "grad_norm": 2.530144453048706, "learning_rate": 7.607019693063583e-07, "loss": 0.8057, "step": 8522 }, { "epoch": 4.483429773803262, "grad_norm": 2.3546833992004395, "learning_rate": 7.602030426746038e-07, "loss": 0.7385, "step": 8523 }, { "epoch": 4.483955812730142, "grad_norm": 2.350921869277954, "learning_rate": 7.597042503751176e-07, "loss": 0.7842, "step": 8524 }, { "epoch": 4.484481851657023, "grad_norm": 2.4936697483062744, "learning_rate": 7.592055924464115e-07, "loss": 0.7375, "step": 8525 }, { "epoch": 4.485007890583903, "grad_norm": 2.3664679527282715, "learning_rate": 7.58707068926989e-07, "loss": 0.8036, "step": 8526 }, { "epoch": 4.485533929510784, "grad_norm": 2.319458246231079, "learning_rate": 7.5820867985534e-07, "loss": 0.7966, "step": 8527 }, { "epoch": 4.486059968437664, "grad_norm": 2.2757842540740967, "learning_rate": 7.577104252699468e-07, "loss": 0.7456, "step": 8528 }, { "epoch": 4.486586007364545, "grad_norm": 2.399097204208374, "learning_rate": 7.572123052092803e-07, "loss": 0.7444, "step": 8529 }, { "epoch": 4.487112046291426, "grad_norm": 2.4299046993255615, "learning_rate": 7.567143197118007e-07, "loss": 0.7868, "step": 8530 }, { "epoch": 4.487638085218306, "grad_norm": 2.492642879486084, "learning_rate": 7.562164688159598e-07, "loss": 0.7944, "step": 8531 }, { "epoch": 4.488164124145187, "grad_norm": 2.453767776489258, "learning_rate": 7.55718752560195e-07, "loss": 0.8137, "step": 8532 }, { "epoch": 4.488690163072067, "grad_norm": 2.6281778812408447, "learning_rate": 7.552211709829371e-07, "loss": 0.8407, "step": 8533 }, { "epoch": 4.489216201998948, "grad_norm": 2.487858295440674, "learning_rate": 7.547237241226055e-07, "loss": 0.7627, "step": 8534 }, { "epoch": 4.489742240925828, "grad_norm": 2.422574996948242, "learning_rate": 7.542264120176071e-07, "loss": 0.7594, "step": 8535 }, { "epoch": 4.490268279852709, "grad_norm": 2.3697311878204346, "learning_rate": 7.537292347063413e-07, "loss": 0.7901, "step": 8536 }, { "epoch": 4.49079431877959, "grad_norm": 2.537184715270996, "learning_rate": 7.53232192227196e-07, "loss": 0.7704, "step": 8537 }, { "epoch": 4.491320357706471, "grad_norm": 2.516770362854004, "learning_rate": 7.527352846185479e-07, "loss": 0.7986, "step": 8538 }, { "epoch": 4.491846396633351, "grad_norm": 2.490718364715576, "learning_rate": 7.522385119187645e-07, "loss": 0.8215, "step": 8539 }, { "epoch": 4.492372435560231, "grad_norm": 2.390807628631592, "learning_rate": 7.517418741662022e-07, "loss": 0.7989, "step": 8540 }, { "epoch": 4.492898474487112, "grad_norm": 2.324885368347168, "learning_rate": 7.51245371399208e-07, "loss": 0.7311, "step": 8541 }, { "epoch": 4.493424513413992, "grad_norm": 2.517463207244873, "learning_rate": 7.507490036561157e-07, "loss": 0.7447, "step": 8542 }, { "epoch": 4.493950552340873, "grad_norm": 2.513991594314575, "learning_rate": 7.502527709752518e-07, "loss": 0.8059, "step": 8543 }, { "epoch": 4.494476591267754, "grad_norm": 2.513127326965332, "learning_rate": 7.497566733949318e-07, "loss": 0.8338, "step": 8544 }, { "epoch": 4.495002630194635, "grad_norm": 2.387063980102539, "learning_rate": 7.492607109534586e-07, "loss": 0.7503, "step": 8545 }, { "epoch": 4.495528669121515, "grad_norm": 2.4754624366760254, "learning_rate": 7.487648836891268e-07, "loss": 0.7895, "step": 8546 }, { "epoch": 4.496054708048396, "grad_norm": 2.582383155822754, "learning_rate": 7.482691916402198e-07, "loss": 0.8101, "step": 8547 }, { "epoch": 4.496580746975276, "grad_norm": 2.391432285308838, "learning_rate": 7.477736348450119e-07, "loss": 0.7859, "step": 8548 }, { "epoch": 4.497106785902156, "grad_norm": 2.4432260990142822, "learning_rate": 7.472782133417639e-07, "loss": 0.7689, "step": 8549 }, { "epoch": 4.497632824829037, "grad_norm": 2.3713948726654053, "learning_rate": 7.467829271687291e-07, "loss": 0.7808, "step": 8550 }, { "epoch": 4.498158863755918, "grad_norm": 2.542748212814331, "learning_rate": 7.462877763641488e-07, "loss": 0.7207, "step": 8551 }, { "epoch": 4.498684902682799, "grad_norm": 2.4236514568328857, "learning_rate": 7.457927609662547e-07, "loss": 0.7669, "step": 8552 }, { "epoch": 4.499210941609679, "grad_norm": 2.312593460083008, "learning_rate": 7.452978810132677e-07, "loss": 0.7724, "step": 8553 }, { "epoch": 4.49973698053656, "grad_norm": 2.324626922607422, "learning_rate": 7.448031365433986e-07, "loss": 0.7559, "step": 8554 }, { "epoch": 4.50026301946344, "grad_norm": 2.6642448902130127, "learning_rate": 7.443085275948464e-07, "loss": 0.807, "step": 8555 }, { "epoch": 4.500789058390321, "grad_norm": 2.499500036239624, "learning_rate": 7.438140542058014e-07, "loss": 0.7515, "step": 8556 }, { "epoch": 4.501315097317201, "grad_norm": 2.568876266479492, "learning_rate": 7.433197164144415e-07, "loss": 0.7903, "step": 8557 }, { "epoch": 4.501841136244082, "grad_norm": 2.3730709552764893, "learning_rate": 7.428255142589361e-07, "loss": 0.7739, "step": 8558 }, { "epoch": 4.502367175170963, "grad_norm": 2.468108892440796, "learning_rate": 7.423314477774432e-07, "loss": 0.7521, "step": 8559 }, { "epoch": 4.502893214097844, "grad_norm": 2.5371437072753906, "learning_rate": 7.418375170081102e-07, "loss": 0.7516, "step": 8560 }, { "epoch": 4.503419253024724, "grad_norm": 2.391871213912964, "learning_rate": 7.413437219890743e-07, "loss": 0.7492, "step": 8561 }, { "epoch": 4.503945291951604, "grad_norm": 2.389869451522827, "learning_rate": 7.408500627584628e-07, "loss": 0.7564, "step": 8562 }, { "epoch": 4.504471330878485, "grad_norm": 2.913843870162964, "learning_rate": 7.403565393543907e-07, "loss": 0.8215, "step": 8563 }, { "epoch": 4.504997369805365, "grad_norm": 2.475311040878296, "learning_rate": 7.39863151814964e-07, "loss": 0.7833, "step": 8564 }, { "epoch": 4.505523408732246, "grad_norm": 2.3944201469421387, "learning_rate": 7.393699001782781e-07, "loss": 0.7367, "step": 8565 }, { "epoch": 4.506049447659127, "grad_norm": 2.4370815753936768, "learning_rate": 7.388767844824177e-07, "loss": 0.7496, "step": 8566 }, { "epoch": 4.506575486586008, "grad_norm": 2.4445574283599854, "learning_rate": 7.383838047654576e-07, "loss": 0.775, "step": 8567 }, { "epoch": 4.507101525512888, "grad_norm": 2.3735005855560303, "learning_rate": 7.378909610654603e-07, "loss": 0.7439, "step": 8568 }, { "epoch": 4.507627564439769, "grad_norm": 2.569633722305298, "learning_rate": 7.373982534204799e-07, "loss": 0.7345, "step": 8569 }, { "epoch": 4.508153603366649, "grad_norm": 2.397230625152588, "learning_rate": 7.369056818685582e-07, "loss": 0.7491, "step": 8570 }, { "epoch": 4.508679642293529, "grad_norm": 2.396317481994629, "learning_rate": 7.364132464477278e-07, "loss": 0.8091, "step": 8571 }, { "epoch": 4.50920568122041, "grad_norm": 2.3805654048919678, "learning_rate": 7.359209471960102e-07, "loss": 0.7595, "step": 8572 }, { "epoch": 4.509731720147291, "grad_norm": 2.202779531478882, "learning_rate": 7.354287841514169e-07, "loss": 0.7596, "step": 8573 }, { "epoch": 4.510257759074172, "grad_norm": 2.462190628051758, "learning_rate": 7.349367573519483e-07, "loss": 0.8022, "step": 8574 }, { "epoch": 4.510783798001052, "grad_norm": 2.468679189682007, "learning_rate": 7.344448668355944e-07, "loss": 0.7689, "step": 8575 }, { "epoch": 4.511309836927933, "grad_norm": 3.928131580352783, "learning_rate": 7.33953112640336e-07, "loss": 0.7821, "step": 8576 }, { "epoch": 4.511835875854813, "grad_norm": 2.358807325363159, "learning_rate": 7.334614948041405e-07, "loss": 0.7797, "step": 8577 }, { "epoch": 4.512361914781694, "grad_norm": 2.393948554992676, "learning_rate": 7.329700133649667e-07, "loss": 0.7836, "step": 8578 }, { "epoch": 4.512887953708574, "grad_norm": 2.342907667160034, "learning_rate": 7.324786683607632e-07, "loss": 0.7728, "step": 8579 }, { "epoch": 4.513413992635455, "grad_norm": 2.4625515937805176, "learning_rate": 7.31987459829468e-07, "loss": 0.7783, "step": 8580 }, { "epoch": 4.513940031562336, "grad_norm": 2.371654748916626, "learning_rate": 7.314963878090062e-07, "loss": 0.7875, "step": 8581 }, { "epoch": 4.514466070489216, "grad_norm": 2.374716281890869, "learning_rate": 7.310054523372956e-07, "loss": 0.769, "step": 8582 }, { "epoch": 4.514992109416097, "grad_norm": 2.3157947063446045, "learning_rate": 7.305146534522414e-07, "loss": 0.7755, "step": 8583 }, { "epoch": 4.515518148342977, "grad_norm": 2.453199625015259, "learning_rate": 7.300239911917403e-07, "loss": 0.8074, "step": 8584 }, { "epoch": 4.516044187269858, "grad_norm": 2.443173885345459, "learning_rate": 7.295334655936748e-07, "loss": 0.7844, "step": 8585 }, { "epoch": 4.516570226196738, "grad_norm": 2.417264938354492, "learning_rate": 7.290430766959206e-07, "loss": 0.7449, "step": 8586 }, { "epoch": 4.5170962651236195, "grad_norm": 2.388425350189209, "learning_rate": 7.285528245363407e-07, "loss": 0.7466, "step": 8587 }, { "epoch": 4.5176223040505, "grad_norm": 2.3412327766418457, "learning_rate": 7.280627091527887e-07, "loss": 0.7085, "step": 8588 }, { "epoch": 4.51814834297738, "grad_norm": 2.4061696529388428, "learning_rate": 7.275727305831068e-07, "loss": 0.7955, "step": 8589 }, { "epoch": 4.518674381904261, "grad_norm": 2.6668288707733154, "learning_rate": 7.270828888651268e-07, "loss": 0.7563, "step": 8590 }, { "epoch": 4.519200420831141, "grad_norm": 2.374195098876953, "learning_rate": 7.265931840366713e-07, "loss": 0.7585, "step": 8591 }, { "epoch": 4.519726459758022, "grad_norm": 2.4299745559692383, "learning_rate": 7.261036161355497e-07, "loss": 0.7414, "step": 8592 }, { "epoch": 4.520252498684902, "grad_norm": 2.3150992393493652, "learning_rate": 7.256141851995633e-07, "loss": 0.769, "step": 8593 }, { "epoch": 4.5207785376117835, "grad_norm": 2.336333751678467, "learning_rate": 7.251248912665004e-07, "loss": 0.7586, "step": 8594 }, { "epoch": 4.521304576538664, "grad_norm": 2.327484130859375, "learning_rate": 7.246357343741409e-07, "loss": 0.7368, "step": 8595 }, { "epoch": 4.521830615465545, "grad_norm": 2.348048448562622, "learning_rate": 7.241467145602535e-07, "loss": 0.769, "step": 8596 }, { "epoch": 4.522356654392425, "grad_norm": 2.4073588848114014, "learning_rate": 7.236578318625956e-07, "loss": 0.7694, "step": 8597 }, { "epoch": 4.522882693319305, "grad_norm": 2.4297101497650146, "learning_rate": 7.231690863189162e-07, "loss": 0.7752, "step": 8598 }, { "epoch": 4.523408732246186, "grad_norm": 2.371044397354126, "learning_rate": 7.226804779669497e-07, "loss": 0.7725, "step": 8599 }, { "epoch": 4.523934771173067, "grad_norm": 2.295240879058838, "learning_rate": 7.221920068444233e-07, "loss": 0.7703, "step": 8600 }, { "epoch": 4.5244608100999475, "grad_norm": 2.502183437347412, "learning_rate": 7.217036729890525e-07, "loss": 0.7909, "step": 8601 }, { "epoch": 4.524986849026828, "grad_norm": 2.6421236991882324, "learning_rate": 7.212154764385426e-07, "loss": 0.8014, "step": 8602 }, { "epoch": 4.525512887953709, "grad_norm": 2.4223201274871826, "learning_rate": 7.207274172305879e-07, "loss": 0.8049, "step": 8603 }, { "epoch": 4.526038926880589, "grad_norm": 2.3753812313079834, "learning_rate": 7.202394954028724e-07, "loss": 0.7774, "step": 8604 }, { "epoch": 4.52656496580747, "grad_norm": 2.509178400039673, "learning_rate": 7.197517109930686e-07, "loss": 0.8371, "step": 8605 }, { "epoch": 4.52709100473435, "grad_norm": 2.294024705886841, "learning_rate": 7.192640640388396e-07, "loss": 0.7533, "step": 8606 }, { "epoch": 4.527617043661231, "grad_norm": 2.3881561756134033, "learning_rate": 7.187765545778366e-07, "loss": 0.7843, "step": 8607 }, { "epoch": 4.5281430825881115, "grad_norm": 2.655238151550293, "learning_rate": 7.182891826477012e-07, "loss": 0.7833, "step": 8608 }, { "epoch": 4.528669121514993, "grad_norm": 2.3011088371276855, "learning_rate": 7.178019482860643e-07, "loss": 0.7688, "step": 8609 }, { "epoch": 4.529195160441873, "grad_norm": 2.2885167598724365, "learning_rate": 7.173148515305462e-07, "loss": 0.7852, "step": 8610 }, { "epoch": 4.529721199368753, "grad_norm": 2.3931100368499756, "learning_rate": 7.168278924187557e-07, "loss": 0.7963, "step": 8611 }, { "epoch": 4.530247238295634, "grad_norm": 2.453887939453125, "learning_rate": 7.163410709882931e-07, "loss": 0.7557, "step": 8612 }, { "epoch": 4.530773277222514, "grad_norm": 2.39274263381958, "learning_rate": 7.158543872767445e-07, "loss": 0.7726, "step": 8613 }, { "epoch": 4.531299316149395, "grad_norm": 2.4126060009002686, "learning_rate": 7.153678413216888e-07, "loss": 0.7837, "step": 8614 }, { "epoch": 4.5318253550762755, "grad_norm": 2.4622690677642822, "learning_rate": 7.148814331606923e-07, "loss": 0.7662, "step": 8615 }, { "epoch": 4.532351394003157, "grad_norm": 2.367560625076294, "learning_rate": 7.143951628313126e-07, "loss": 0.761, "step": 8616 }, { "epoch": 4.532877432930037, "grad_norm": 2.3616883754730225, "learning_rate": 7.139090303710936e-07, "loss": 0.8238, "step": 8617 }, { "epoch": 4.533403471856918, "grad_norm": 2.316927671432495, "learning_rate": 7.134230358175709e-07, "loss": 0.7731, "step": 8618 }, { "epoch": 4.533929510783798, "grad_norm": 2.506267547607422, "learning_rate": 7.129371792082698e-07, "loss": 0.7831, "step": 8619 }, { "epoch": 4.534455549710678, "grad_norm": 2.3867814540863037, "learning_rate": 7.124514605807026e-07, "loss": 0.8001, "step": 8620 }, { "epoch": 4.534981588637559, "grad_norm": 2.3049447536468506, "learning_rate": 7.119658799723725e-07, "loss": 0.7781, "step": 8621 }, { "epoch": 4.5355076275644395, "grad_norm": 2.5051393508911133, "learning_rate": 7.114804374207726e-07, "loss": 0.7575, "step": 8622 }, { "epoch": 4.536033666491321, "grad_norm": 2.7193150520324707, "learning_rate": 7.109951329633843e-07, "loss": 0.7607, "step": 8623 }, { "epoch": 4.536559705418201, "grad_norm": 2.5874674320220947, "learning_rate": 7.105099666376789e-07, "loss": 0.7381, "step": 8624 }, { "epoch": 4.537085744345082, "grad_norm": 2.318556308746338, "learning_rate": 7.100249384811164e-07, "loss": 0.7787, "step": 8625 }, { "epoch": 4.537611783271962, "grad_norm": 2.603573799133301, "learning_rate": 7.095400485311476e-07, "loss": 0.7154, "step": 8626 }, { "epoch": 4.538137822198843, "grad_norm": 2.3907535076141357, "learning_rate": 7.090552968252102e-07, "loss": 0.7359, "step": 8627 }, { "epoch": 4.538663861125723, "grad_norm": 2.3038787841796875, "learning_rate": 7.085706834007328e-07, "loss": 0.7801, "step": 8628 }, { "epoch": 4.5391899000526035, "grad_norm": 2.395252227783203, "learning_rate": 7.080862082951342e-07, "loss": 0.7156, "step": 8629 }, { "epoch": 4.5397159389794846, "grad_norm": 2.372432231903076, "learning_rate": 7.076018715458199e-07, "loss": 0.7824, "step": 8630 }, { "epoch": 4.540241977906365, "grad_norm": 2.4038825035095215, "learning_rate": 7.071176731901872e-07, "loss": 0.7465, "step": 8631 }, { "epoch": 4.540768016833246, "grad_norm": 2.3435847759246826, "learning_rate": 7.066336132656213e-07, "loss": 0.7365, "step": 8632 }, { "epoch": 4.541294055760126, "grad_norm": 2.585966110229492, "learning_rate": 7.061496918094976e-07, "loss": 0.7653, "step": 8633 }, { "epoch": 4.541820094687007, "grad_norm": 2.353532552719116, "learning_rate": 7.056659088591807e-07, "loss": 0.7869, "step": 8634 }, { "epoch": 4.542346133613887, "grad_norm": 2.5097947120666504, "learning_rate": 7.051822644520234e-07, "loss": 0.7898, "step": 8635 }, { "epoch": 4.542872172540768, "grad_norm": 2.45700740814209, "learning_rate": 7.046987586253687e-07, "loss": 0.7752, "step": 8636 }, { "epoch": 4.5433982114676486, "grad_norm": 2.5616374015808105, "learning_rate": 7.042153914165489e-07, "loss": 0.7413, "step": 8637 }, { "epoch": 4.543924250394529, "grad_norm": 2.634974956512451, "learning_rate": 7.037321628628857e-07, "loss": 0.7467, "step": 8638 }, { "epoch": 4.54445028932141, "grad_norm": 3.1471107006073, "learning_rate": 7.032490730016898e-07, "loss": 0.7792, "step": 8639 }, { "epoch": 4.54497632824829, "grad_norm": 2.364485263824463, "learning_rate": 7.02766121870262e-07, "loss": 0.7232, "step": 8640 }, { "epoch": 4.545502367175171, "grad_norm": 2.4992127418518066, "learning_rate": 7.022833095058898e-07, "loss": 0.7807, "step": 8641 }, { "epoch": 4.546028406102051, "grad_norm": 2.4304091930389404, "learning_rate": 7.01800635945854e-07, "loss": 0.7805, "step": 8642 }, { "epoch": 4.546554445028932, "grad_norm": 2.409531593322754, "learning_rate": 7.013181012274209e-07, "loss": 0.7766, "step": 8643 }, { "epoch": 4.5470804839558125, "grad_norm": 2.4331626892089844, "learning_rate": 7.008357053878481e-07, "loss": 0.8028, "step": 8644 }, { "epoch": 4.547606522882694, "grad_norm": 2.584266185760498, "learning_rate": 7.003534484643823e-07, "loss": 0.7826, "step": 8645 }, { "epoch": 4.548132561809574, "grad_norm": 2.465193510055542, "learning_rate": 6.998713304942592e-07, "loss": 0.7922, "step": 8646 }, { "epoch": 4.548658600736454, "grad_norm": 2.4383318424224854, "learning_rate": 6.993893515147037e-07, "loss": 0.7869, "step": 8647 }, { "epoch": 4.549184639663335, "grad_norm": 2.2516443729400635, "learning_rate": 6.989075115629316e-07, "loss": 0.717, "step": 8648 }, { "epoch": 4.549710678590216, "grad_norm": 2.4583303928375244, "learning_rate": 6.98425810676144e-07, "loss": 0.7784, "step": 8649 }, { "epoch": 4.550236717517096, "grad_norm": 2.2929608821868896, "learning_rate": 6.97944248891535e-07, "loss": 0.7946, "step": 8650 }, { "epoch": 4.5507627564439765, "grad_norm": 2.410184621810913, "learning_rate": 6.974628262462865e-07, "loss": 0.7612, "step": 8651 }, { "epoch": 4.551288795370858, "grad_norm": 2.567108154296875, "learning_rate": 6.969815427775697e-07, "loss": 0.7722, "step": 8652 }, { "epoch": 4.551814834297738, "grad_norm": 2.528733968734741, "learning_rate": 6.965003985225466e-07, "loss": 0.745, "step": 8653 }, { "epoch": 4.552340873224619, "grad_norm": 2.424027919769287, "learning_rate": 6.960193935183648e-07, "loss": 0.8057, "step": 8654 }, { "epoch": 4.552866912151499, "grad_norm": 2.3696846961975098, "learning_rate": 6.955385278021653e-07, "loss": 0.8219, "step": 8655 }, { "epoch": 4.55339295107838, "grad_norm": 2.3667409420013428, "learning_rate": 6.950578014110751e-07, "loss": 0.7783, "step": 8656 }, { "epoch": 4.55391899000526, "grad_norm": 2.292508840560913, "learning_rate": 6.945772143822121e-07, "loss": 0.762, "step": 8657 }, { "epoch": 4.554445028932141, "grad_norm": 2.424623489379883, "learning_rate": 6.940967667526832e-07, "loss": 0.7979, "step": 8658 }, { "epoch": 4.554971067859022, "grad_norm": 2.304807424545288, "learning_rate": 6.936164585595848e-07, "loss": 0.7994, "step": 8659 }, { "epoch": 4.555497106785902, "grad_norm": 2.304176092147827, "learning_rate": 6.931362898400021e-07, "loss": 0.8095, "step": 8660 }, { "epoch": 4.556023145712783, "grad_norm": 2.317871570587158, "learning_rate": 6.926562606310092e-07, "loss": 0.7901, "step": 8661 }, { "epoch": 4.556549184639663, "grad_norm": 2.291839361190796, "learning_rate": 6.921763709696711e-07, "loss": 0.7656, "step": 8662 }, { "epoch": 4.557075223566544, "grad_norm": 2.293214797973633, "learning_rate": 6.916966208930392e-07, "loss": 0.7455, "step": 8663 }, { "epoch": 4.557601262493424, "grad_norm": 2.4763870239257812, "learning_rate": 6.912170104381563e-07, "loss": 0.7948, "step": 8664 }, { "epoch": 4.558127301420305, "grad_norm": 2.2563560009002686, "learning_rate": 6.907375396420537e-07, "loss": 0.7458, "step": 8665 }, { "epoch": 4.558653340347186, "grad_norm": 2.327197313308716, "learning_rate": 6.902582085417534e-07, "loss": 0.7709, "step": 8666 }, { "epoch": 4.559179379274067, "grad_norm": 2.5717575550079346, "learning_rate": 6.89779017174263e-07, "loss": 0.8029, "step": 8667 }, { "epoch": 4.559705418200947, "grad_norm": 2.384303331375122, "learning_rate": 6.892999655765828e-07, "loss": 0.7914, "step": 8668 }, { "epoch": 4.560231457127827, "grad_norm": 2.4297525882720947, "learning_rate": 6.888210537857015e-07, "loss": 0.7889, "step": 8669 }, { "epoch": 4.560757496054708, "grad_norm": 2.433431625366211, "learning_rate": 6.883422818385954e-07, "loss": 0.8015, "step": 8670 }, { "epoch": 4.561283534981588, "grad_norm": 2.3815135955810547, "learning_rate": 6.878636497722319e-07, "loss": 0.7547, "step": 8671 }, { "epoch": 4.561809573908469, "grad_norm": 2.287050724029541, "learning_rate": 6.873851576235665e-07, "loss": 0.7641, "step": 8672 }, { "epoch": 4.56233561283535, "grad_norm": 2.380012273788452, "learning_rate": 6.86906805429545e-07, "loss": 0.7517, "step": 8673 }, { "epoch": 4.562861651762231, "grad_norm": 2.3945987224578857, "learning_rate": 6.864285932271008e-07, "loss": 0.8037, "step": 8674 }, { "epoch": 4.563387690689111, "grad_norm": 2.3405630588531494, "learning_rate": 6.859505210531578e-07, "loss": 0.7652, "step": 8675 }, { "epoch": 4.563913729615992, "grad_norm": 2.333310604095459, "learning_rate": 6.854725889446295e-07, "loss": 0.7581, "step": 8676 }, { "epoch": 4.564439768542872, "grad_norm": 2.389495849609375, "learning_rate": 6.849947969384163e-07, "loss": 0.7772, "step": 8677 }, { "epoch": 4.564965807469752, "grad_norm": 2.441429853439331, "learning_rate": 6.845171450714105e-07, "loss": 0.7923, "step": 8678 }, { "epoch": 4.565491846396633, "grad_norm": 2.600302219390869, "learning_rate": 6.840396333804908e-07, "loss": 0.8181, "step": 8679 }, { "epoch": 4.566017885323514, "grad_norm": 2.581071615219116, "learning_rate": 6.835622619025273e-07, "loss": 0.7651, "step": 8680 }, { "epoch": 4.566543924250395, "grad_norm": 2.2534122467041016, "learning_rate": 6.830850306743786e-07, "loss": 0.7226, "step": 8681 }, { "epoch": 4.567069963177275, "grad_norm": 2.76198148727417, "learning_rate": 6.826079397328924e-07, "loss": 0.8002, "step": 8682 }, { "epoch": 4.567596002104156, "grad_norm": 2.5461013317108154, "learning_rate": 6.821309891149067e-07, "loss": 0.7689, "step": 8683 }, { "epoch": 4.568122041031036, "grad_norm": 2.5005953311920166, "learning_rate": 6.816541788572453e-07, "loss": 0.7961, "step": 8684 }, { "epoch": 4.568648079957917, "grad_norm": 2.4961671829223633, "learning_rate": 6.811775089967248e-07, "loss": 0.7173, "step": 8685 }, { "epoch": 4.569174118884797, "grad_norm": 2.2612526416778564, "learning_rate": 6.807009795701494e-07, "loss": 0.7969, "step": 8686 }, { "epoch": 4.569700157811678, "grad_norm": 2.476712226867676, "learning_rate": 6.802245906143127e-07, "loss": 0.8, "step": 8687 }, { "epoch": 4.570226196738559, "grad_norm": 2.4788904190063477, "learning_rate": 6.797483421659972e-07, "loss": 0.799, "step": 8688 }, { "epoch": 4.570752235665439, "grad_norm": 2.5979485511779785, "learning_rate": 6.792722342619756e-07, "loss": 0.7934, "step": 8689 }, { "epoch": 4.57127827459232, "grad_norm": 2.4992308616638184, "learning_rate": 6.787962669390075e-07, "loss": 0.819, "step": 8690 }, { "epoch": 4.5718043135192, "grad_norm": 2.369959592819214, "learning_rate": 6.783204402338445e-07, "loss": 0.7652, "step": 8691 }, { "epoch": 4.572330352446081, "grad_norm": 2.7279441356658936, "learning_rate": 6.778447541832239e-07, "loss": 0.7749, "step": 8692 }, { "epoch": 4.572856391372961, "grad_norm": 2.525794506072998, "learning_rate": 6.773692088238757e-07, "loss": 0.8391, "step": 8693 }, { "epoch": 4.5733824302998425, "grad_norm": 2.4560720920562744, "learning_rate": 6.768938041925168e-07, "loss": 0.7806, "step": 8694 }, { "epoch": 4.573908469226723, "grad_norm": 2.9361560344696045, "learning_rate": 6.764185403258541e-07, "loss": 0.7879, "step": 8695 }, { "epoch": 4.574434508153604, "grad_norm": 2.614457607269287, "learning_rate": 6.759434172605833e-07, "loss": 0.7808, "step": 8696 }, { "epoch": 4.574960547080484, "grad_norm": 2.400282144546509, "learning_rate": 6.754684350333895e-07, "loss": 0.8016, "step": 8697 }, { "epoch": 4.575486586007365, "grad_norm": 2.3881890773773193, "learning_rate": 6.749935936809476e-07, "loss": 0.7696, "step": 8698 }, { "epoch": 4.576012624934245, "grad_norm": 2.340338706970215, "learning_rate": 6.745188932399193e-07, "loss": 0.7487, "step": 8699 }, { "epoch": 4.576538663861125, "grad_norm": 2.167144775390625, "learning_rate": 6.740443337469574e-07, "loss": 0.7181, "step": 8700 }, { "epoch": 4.5770647027880065, "grad_norm": 2.5368239879608154, "learning_rate": 6.735699152387034e-07, "loss": 0.8161, "step": 8701 }, { "epoch": 4.577590741714887, "grad_norm": 2.657496690750122, "learning_rate": 6.730956377517889e-07, "loss": 0.8294, "step": 8702 }, { "epoch": 4.578116780641768, "grad_norm": 2.6306920051574707, "learning_rate": 6.726215013228318e-07, "loss": 0.7827, "step": 8703 }, { "epoch": 4.578642819568648, "grad_norm": 2.3117547035217285, "learning_rate": 6.721475059884413e-07, "loss": 0.7849, "step": 8704 }, { "epoch": 4.579168858495529, "grad_norm": 2.556194305419922, "learning_rate": 6.716736517852168e-07, "loss": 0.7355, "step": 8705 }, { "epoch": 4.579694897422409, "grad_norm": 2.3919174671173096, "learning_rate": 6.711999387497431e-07, "loss": 0.7766, "step": 8706 }, { "epoch": 4.58022093634929, "grad_norm": 2.3734145164489746, "learning_rate": 6.707263669185973e-07, "loss": 0.7714, "step": 8707 }, { "epoch": 4.5807469752761705, "grad_norm": 2.348479986190796, "learning_rate": 6.702529363283447e-07, "loss": 0.7243, "step": 8708 }, { "epoch": 4.581273014203051, "grad_norm": 2.5992491245269775, "learning_rate": 6.697796470155393e-07, "loss": 0.8203, "step": 8709 }, { "epoch": 4.581799053129932, "grad_norm": 2.433411121368408, "learning_rate": 6.693064990167247e-07, "loss": 0.8171, "step": 8710 }, { "epoch": 4.582325092056812, "grad_norm": 2.3721096515655518, "learning_rate": 6.688334923684331e-07, "loss": 0.7751, "step": 8711 }, { "epoch": 4.582851130983693, "grad_norm": 2.336761236190796, "learning_rate": 6.683606271071872e-07, "loss": 0.803, "step": 8712 }, { "epoch": 4.583377169910573, "grad_norm": 2.364105701446533, "learning_rate": 6.678879032694954e-07, "loss": 0.7872, "step": 8713 }, { "epoch": 4.583903208837454, "grad_norm": 2.4279818534851074, "learning_rate": 6.674153208918588e-07, "loss": 0.8124, "step": 8714 }, { "epoch": 4.5844292477643345, "grad_norm": 2.382506847381592, "learning_rate": 6.669428800107667e-07, "loss": 0.7862, "step": 8715 }, { "epoch": 4.5849552866912155, "grad_norm": 2.323568820953369, "learning_rate": 6.664705806626956e-07, "loss": 0.7312, "step": 8716 }, { "epoch": 4.585481325618096, "grad_norm": 2.4844603538513184, "learning_rate": 6.659984228841126e-07, "loss": 0.7988, "step": 8717 }, { "epoch": 4.586007364544976, "grad_norm": 2.2899088859558105, "learning_rate": 6.655264067114744e-07, "loss": 0.7678, "step": 8718 }, { "epoch": 4.586533403471857, "grad_norm": 2.369069814682007, "learning_rate": 6.650545321812268e-07, "loss": 0.7944, "step": 8719 }, { "epoch": 4.587059442398737, "grad_norm": 2.4201908111572266, "learning_rate": 6.645827993298018e-07, "loss": 0.8356, "step": 8720 }, { "epoch": 4.587585481325618, "grad_norm": 2.382195472717285, "learning_rate": 6.641112081936238e-07, "loss": 0.7623, "step": 8721 }, { "epoch": 4.5881115202524985, "grad_norm": 2.407207727432251, "learning_rate": 6.63639758809105e-07, "loss": 0.7331, "step": 8722 }, { "epoch": 4.5886375591793795, "grad_norm": 2.487672805786133, "learning_rate": 6.631684512126465e-07, "loss": 0.7609, "step": 8723 }, { "epoch": 4.58916359810626, "grad_norm": 2.321713924407959, "learning_rate": 6.626972854406388e-07, "loss": 0.7396, "step": 8724 }, { "epoch": 4.589689637033141, "grad_norm": 2.4888041019439697, "learning_rate": 6.622262615294614e-07, "loss": 0.7721, "step": 8725 }, { "epoch": 4.590215675960021, "grad_norm": 2.469524383544922, "learning_rate": 6.617553795154833e-07, "loss": 0.8258, "step": 8726 }, { "epoch": 4.590741714886901, "grad_norm": 2.313016176223755, "learning_rate": 6.612846394350614e-07, "loss": 0.7365, "step": 8727 }, { "epoch": 4.591267753813782, "grad_norm": 2.403724431991577, "learning_rate": 6.608140413245412e-07, "loss": 0.7681, "step": 8728 }, { "epoch": 4.5917937927406625, "grad_norm": 2.34006667137146, "learning_rate": 6.603435852202595e-07, "loss": 0.7695, "step": 8729 }, { "epoch": 4.5923198316675435, "grad_norm": 2.4242968559265137, "learning_rate": 6.598732711585404e-07, "loss": 0.7559, "step": 8730 }, { "epoch": 4.592845870594424, "grad_norm": 2.5051090717315674, "learning_rate": 6.594030991756978e-07, "loss": 0.7811, "step": 8731 }, { "epoch": 4.593371909521305, "grad_norm": 2.3459572792053223, "learning_rate": 6.589330693080343e-07, "loss": 0.7813, "step": 8732 }, { "epoch": 4.593897948448185, "grad_norm": 2.3224494457244873, "learning_rate": 6.584631815918424e-07, "loss": 0.8031, "step": 8733 }, { "epoch": 4.594423987375066, "grad_norm": 2.4222865104675293, "learning_rate": 6.579934360634014e-07, "loss": 0.7364, "step": 8734 }, { "epoch": 4.594950026301946, "grad_norm": 2.4201743602752686, "learning_rate": 6.575238327589814e-07, "loss": 0.8168, "step": 8735 }, { "epoch": 4.5954760652288265, "grad_norm": 2.4559848308563232, "learning_rate": 6.570543717148414e-07, "loss": 0.7491, "step": 8736 }, { "epoch": 4.5960021041557075, "grad_norm": 2.5481057167053223, "learning_rate": 6.565850529672293e-07, "loss": 0.8025, "step": 8737 }, { "epoch": 4.596528143082588, "grad_norm": 2.285106658935547, "learning_rate": 6.561158765523828e-07, "loss": 0.7527, "step": 8738 }, { "epoch": 4.597054182009469, "grad_norm": 2.57143235206604, "learning_rate": 6.556468425065257e-07, "loss": 0.7988, "step": 8739 }, { "epoch": 4.597580220936349, "grad_norm": 2.4854586124420166, "learning_rate": 6.551779508658738e-07, "loss": 0.7756, "step": 8740 }, { "epoch": 4.59810625986323, "grad_norm": 2.327723979949951, "learning_rate": 6.547092016666317e-07, "loss": 0.77, "step": 8741 }, { "epoch": 4.59863229879011, "grad_norm": 2.407625675201416, "learning_rate": 6.542405949449906e-07, "loss": 0.7504, "step": 8742 }, { "epoch": 4.599158337716991, "grad_norm": 2.6225481033325195, "learning_rate": 6.537721307371331e-07, "loss": 0.8093, "step": 8743 }, { "epoch": 4.5996843766438715, "grad_norm": 2.4468836784362793, "learning_rate": 6.533038090792301e-07, "loss": 0.8283, "step": 8744 }, { "epoch": 4.600210415570753, "grad_norm": 2.4593911170959473, "learning_rate": 6.528356300074412e-07, "loss": 0.7607, "step": 8745 }, { "epoch": 4.600736454497633, "grad_norm": 2.3439412117004395, "learning_rate": 6.523675935579155e-07, "loss": 0.7521, "step": 8746 }, { "epoch": 4.601262493424514, "grad_norm": 2.2875523567199707, "learning_rate": 6.518996997667906e-07, "loss": 0.7747, "step": 8747 }, { "epoch": 4.601788532351394, "grad_norm": 2.3689732551574707, "learning_rate": 6.514319486701939e-07, "loss": 0.7817, "step": 8748 }, { "epoch": 4.602314571278274, "grad_norm": 2.458815336227417, "learning_rate": 6.509643403042399e-07, "loss": 0.7611, "step": 8749 }, { "epoch": 4.602840610205155, "grad_norm": 2.3726868629455566, "learning_rate": 6.504968747050339e-07, "loss": 0.7771, "step": 8750 }, { "epoch": 4.6033666491320355, "grad_norm": 2.3886821269989014, "learning_rate": 6.500295519086702e-07, "loss": 0.7601, "step": 8751 }, { "epoch": 4.603892688058917, "grad_norm": 2.517976999282837, "learning_rate": 6.495623719512304e-07, "loss": 0.7366, "step": 8752 }, { "epoch": 4.604418726985797, "grad_norm": 2.5501677989959717, "learning_rate": 6.490953348687864e-07, "loss": 0.7792, "step": 8753 }, { "epoch": 4.604944765912678, "grad_norm": 2.2321486473083496, "learning_rate": 6.486284406973991e-07, "loss": 0.7542, "step": 8754 }, { "epoch": 4.605470804839558, "grad_norm": 2.3556957244873047, "learning_rate": 6.481616894731191e-07, "loss": 0.7536, "step": 8755 }, { "epoch": 4.605996843766439, "grad_norm": 2.4481894969940186, "learning_rate": 6.47695081231983e-07, "loss": 0.7931, "step": 8756 }, { "epoch": 4.606522882693319, "grad_norm": 2.533371925354004, "learning_rate": 6.472286160100191e-07, "loss": 0.7926, "step": 8757 }, { "epoch": 4.6070489216201995, "grad_norm": 2.5626060962677, "learning_rate": 6.467622938432442e-07, "loss": 0.7676, "step": 8758 }, { "epoch": 4.607574960547081, "grad_norm": 2.4561634063720703, "learning_rate": 6.462961147676633e-07, "loss": 0.8079, "step": 8759 }, { "epoch": 4.608100999473961, "grad_norm": 2.276794195175171, "learning_rate": 6.458300788192712e-07, "loss": 0.7628, "step": 8760 }, { "epoch": 4.608627038400842, "grad_norm": 2.6545140743255615, "learning_rate": 6.453641860340506e-07, "loss": 0.8121, "step": 8761 }, { "epoch": 4.609153077327722, "grad_norm": 2.376054048538208, "learning_rate": 6.448984364479752e-07, "loss": 0.7717, "step": 8762 }, { "epoch": 4.609679116254603, "grad_norm": 2.1978695392608643, "learning_rate": 6.444328300970043e-07, "loss": 0.7821, "step": 8763 }, { "epoch": 4.610205155181483, "grad_norm": 2.3933041095733643, "learning_rate": 6.439673670170895e-07, "loss": 0.7792, "step": 8764 }, { "epoch": 4.610731194108364, "grad_norm": 2.4240293502807617, "learning_rate": 6.435020472441689e-07, "loss": 0.794, "step": 8765 }, { "epoch": 4.611257233035245, "grad_norm": 2.2621867656707764, "learning_rate": 6.430368708141708e-07, "loss": 0.7748, "step": 8766 }, { "epoch": 4.611783271962125, "grad_norm": 2.530802011489868, "learning_rate": 6.425718377630119e-07, "loss": 0.7432, "step": 8767 }, { "epoch": 4.612309310889006, "grad_norm": 2.439785957336426, "learning_rate": 6.421069481265988e-07, "loss": 0.739, "step": 8768 }, { "epoch": 4.612835349815886, "grad_norm": 2.524880886077881, "learning_rate": 6.416422019408264e-07, "loss": 0.7969, "step": 8769 }, { "epoch": 4.613361388742767, "grad_norm": 2.435901165008545, "learning_rate": 6.411775992415772e-07, "loss": 0.805, "step": 8770 }, { "epoch": 4.613887427669647, "grad_norm": 2.4255480766296387, "learning_rate": 6.407131400647248e-07, "loss": 0.711, "step": 8771 }, { "epoch": 4.614413466596528, "grad_norm": 2.475843667984009, "learning_rate": 6.402488244461308e-07, "loss": 0.8152, "step": 8772 }, { "epoch": 4.614939505523409, "grad_norm": 2.509237289428711, "learning_rate": 6.397846524216453e-07, "loss": 0.7859, "step": 8773 }, { "epoch": 4.61546554445029, "grad_norm": 2.2775607109069824, "learning_rate": 6.393206240271077e-07, "loss": 0.7862, "step": 8774 }, { "epoch": 4.61599158337717, "grad_norm": 2.30538010597229, "learning_rate": 6.388567392983475e-07, "loss": 0.8171, "step": 8775 }, { "epoch": 4.61651762230405, "grad_norm": 2.337613821029663, "learning_rate": 6.3839299827118e-07, "loss": 0.7527, "step": 8776 }, { "epoch": 4.617043661230931, "grad_norm": 2.7444067001342773, "learning_rate": 6.379294009814132e-07, "loss": 0.8295, "step": 8777 }, { "epoch": 4.617569700157811, "grad_norm": 2.554661750793457, "learning_rate": 6.374659474648403e-07, "loss": 0.7658, "step": 8778 }, { "epoch": 4.618095739084692, "grad_norm": 2.3251659870147705, "learning_rate": 6.370026377572461e-07, "loss": 0.7127, "step": 8779 }, { "epoch": 4.618621778011573, "grad_norm": 2.4100253582000732, "learning_rate": 6.365394718944035e-07, "loss": 0.6648, "step": 8780 }, { "epoch": 4.619147816938454, "grad_norm": 2.2332258224487305, "learning_rate": 6.360764499120739e-07, "loss": 0.7566, "step": 8781 }, { "epoch": 4.619673855865334, "grad_norm": 2.3981752395629883, "learning_rate": 6.356135718460082e-07, "loss": 0.773, "step": 8782 }, { "epoch": 4.620199894792215, "grad_norm": 2.2396979331970215, "learning_rate": 6.351508377319468e-07, "loss": 0.7703, "step": 8783 }, { "epoch": 4.620725933719095, "grad_norm": 2.996161460876465, "learning_rate": 6.34688247605616e-07, "loss": 0.7724, "step": 8784 }, { "epoch": 4.621251972645975, "grad_norm": 2.479576826095581, "learning_rate": 6.342258015027345e-07, "loss": 0.7428, "step": 8785 }, { "epoch": 4.621778011572856, "grad_norm": 2.368489980697632, "learning_rate": 6.337634994590078e-07, "loss": 0.7721, "step": 8786 }, { "epoch": 4.6223040504997375, "grad_norm": 2.3931570053100586, "learning_rate": 6.333013415101322e-07, "loss": 0.7897, "step": 8787 }, { "epoch": 4.622830089426618, "grad_norm": 2.366560459136963, "learning_rate": 6.3283932769179e-07, "loss": 0.8422, "step": 8788 }, { "epoch": 4.623356128353498, "grad_norm": 2.588444232940674, "learning_rate": 6.323774580396541e-07, "loss": 0.803, "step": 8789 }, { "epoch": 4.623882167280379, "grad_norm": 2.4773969650268555, "learning_rate": 6.319157325893868e-07, "loss": 0.8006, "step": 8790 }, { "epoch": 4.624408206207259, "grad_norm": 2.3748583793640137, "learning_rate": 6.314541513766393e-07, "loss": 0.8177, "step": 8791 }, { "epoch": 4.62493424513414, "grad_norm": 2.6425795555114746, "learning_rate": 6.309927144370492e-07, "loss": 0.7966, "step": 8792 }, { "epoch": 4.62546028406102, "grad_norm": 2.4057843685150146, "learning_rate": 6.305314218062455e-07, "loss": 0.8245, "step": 8793 }, { "epoch": 4.6259863229879015, "grad_norm": 2.5088109970092773, "learning_rate": 6.300702735198455e-07, "loss": 0.7509, "step": 8794 }, { "epoch": 4.626512361914782, "grad_norm": 2.2872095108032227, "learning_rate": 6.296092696134551e-07, "loss": 0.7933, "step": 8795 }, { "epoch": 4.627038400841663, "grad_norm": 2.5723464488983154, "learning_rate": 6.291484101226689e-07, "loss": 0.7778, "step": 8796 }, { "epoch": 4.627564439768543, "grad_norm": 2.537269353866577, "learning_rate": 6.286876950830703e-07, "loss": 0.8045, "step": 8797 }, { "epoch": 4.628090478695423, "grad_norm": 2.4907281398773193, "learning_rate": 6.282271245302332e-07, "loss": 0.7664, "step": 8798 }, { "epoch": 4.628616517622304, "grad_norm": 2.395399808883667, "learning_rate": 6.277666984997169e-07, "loss": 0.7414, "step": 8799 }, { "epoch": 4.629142556549184, "grad_norm": 2.3473474979400635, "learning_rate": 6.273064170270735e-07, "loss": 0.7753, "step": 8800 }, { "epoch": 4.6296685954760655, "grad_norm": 2.4545416831970215, "learning_rate": 6.2684628014784e-07, "loss": 0.8366, "step": 8801 }, { "epoch": 4.630194634402946, "grad_norm": 2.9393117427825928, "learning_rate": 6.263862878975452e-07, "loss": 0.8039, "step": 8802 }, { "epoch": 4.630720673329827, "grad_norm": 2.3255577087402344, "learning_rate": 6.25926440311706e-07, "loss": 0.7996, "step": 8803 }, { "epoch": 4.631246712256707, "grad_norm": 2.4601569175720215, "learning_rate": 6.254667374258277e-07, "loss": 0.8261, "step": 8804 }, { "epoch": 4.631772751183588, "grad_norm": 2.487532615661621, "learning_rate": 6.250071792754053e-07, "loss": 0.7761, "step": 8805 }, { "epoch": 4.632298790110468, "grad_norm": 2.4347281455993652, "learning_rate": 6.245477658959207e-07, "loss": 0.7735, "step": 8806 }, { "epoch": 4.632824829037348, "grad_norm": 2.273512363433838, "learning_rate": 6.240884973228465e-07, "loss": 0.7865, "step": 8807 }, { "epoch": 4.6333508679642295, "grad_norm": 2.456537961959839, "learning_rate": 6.236293735916432e-07, "loss": 0.8411, "step": 8808 }, { "epoch": 4.63387690689111, "grad_norm": 2.5871315002441406, "learning_rate": 6.23170394737761e-07, "loss": 0.8178, "step": 8809 }, { "epoch": 4.634402945817991, "grad_norm": 2.3097331523895264, "learning_rate": 6.227115607966378e-07, "loss": 0.753, "step": 8810 }, { "epoch": 4.634928984744871, "grad_norm": 2.3663787841796875, "learning_rate": 6.222528718037019e-07, "loss": 0.8058, "step": 8811 }, { "epoch": 4.635455023671752, "grad_norm": 2.7307016849517822, "learning_rate": 6.217943277943678e-07, "loss": 0.7615, "step": 8812 }, { "epoch": 4.635981062598632, "grad_norm": 2.9137661457061768, "learning_rate": 6.213359288040419e-07, "loss": 0.7736, "step": 8813 }, { "epoch": 4.636507101525513, "grad_norm": 2.4421451091766357, "learning_rate": 6.208776748681161e-07, "loss": 0.8201, "step": 8814 }, { "epoch": 4.6370331404523935, "grad_norm": 2.4645023345947266, "learning_rate": 6.204195660219739e-07, "loss": 0.7268, "step": 8815 }, { "epoch": 4.637559179379274, "grad_norm": 2.412090539932251, "learning_rate": 6.199616023009863e-07, "loss": 0.7995, "step": 8816 }, { "epoch": 4.638085218306155, "grad_norm": 2.323514938354492, "learning_rate": 6.195037837405138e-07, "loss": 0.7497, "step": 8817 }, { "epoch": 4.638611257233035, "grad_norm": 2.339538097381592, "learning_rate": 6.190461103759046e-07, "loss": 0.7838, "step": 8818 }, { "epoch": 4.639137296159916, "grad_norm": 2.3684370517730713, "learning_rate": 6.185885822424976e-07, "loss": 0.7544, "step": 8819 }, { "epoch": 4.639663335086796, "grad_norm": 2.355742931365967, "learning_rate": 6.181311993756176e-07, "loss": 0.7944, "step": 8820 }, { "epoch": 4.640189374013677, "grad_norm": 2.5365512371063232, "learning_rate": 6.176739618105801e-07, "loss": 0.765, "step": 8821 }, { "epoch": 4.6407154129405574, "grad_norm": 2.519153594970703, "learning_rate": 6.172168695826896e-07, "loss": 0.7575, "step": 8822 }, { "epoch": 4.6412414518674385, "grad_norm": 2.5129952430725098, "learning_rate": 6.167599227272389e-07, "loss": 0.8003, "step": 8823 }, { "epoch": 4.641767490794319, "grad_norm": 2.516206979751587, "learning_rate": 6.163031212795101e-07, "loss": 0.7343, "step": 8824 }, { "epoch": 4.642293529721199, "grad_norm": 2.4109890460968018, "learning_rate": 6.15846465274772e-07, "loss": 0.7433, "step": 8825 }, { "epoch": 4.64281956864808, "grad_norm": 2.532378673553467, "learning_rate": 6.15389954748285e-07, "loss": 0.8155, "step": 8826 }, { "epoch": 4.64334560757496, "grad_norm": 2.6566991806030273, "learning_rate": 6.149335897352957e-07, "loss": 0.8163, "step": 8827 }, { "epoch": 4.643871646501841, "grad_norm": 2.5548949241638184, "learning_rate": 6.144773702710413e-07, "loss": 0.7796, "step": 8828 }, { "epoch": 4.6443976854287214, "grad_norm": 2.3219220638275146, "learning_rate": 6.140212963907474e-07, "loss": 0.7788, "step": 8829 }, { "epoch": 4.6449237243556025, "grad_norm": 2.4092092514038086, "learning_rate": 6.13565368129628e-07, "loss": 0.7825, "step": 8830 }, { "epoch": 4.645449763282483, "grad_norm": 2.5776851177215576, "learning_rate": 6.131095855228861e-07, "loss": 0.8374, "step": 8831 }, { "epoch": 4.645975802209364, "grad_norm": 2.414196729660034, "learning_rate": 6.126539486057132e-07, "loss": 0.7777, "step": 8832 }, { "epoch": 4.646501841136244, "grad_norm": 2.6225385665893555, "learning_rate": 6.121984574132903e-07, "loss": 0.7842, "step": 8833 }, { "epoch": 4.647027880063124, "grad_norm": 2.478374481201172, "learning_rate": 6.117431119807851e-07, "loss": 0.777, "step": 8834 }, { "epoch": 4.647553918990005, "grad_norm": 2.362490177154541, "learning_rate": 6.112879123433565e-07, "loss": 0.756, "step": 8835 }, { "epoch": 4.648079957916886, "grad_norm": 2.609894275665283, "learning_rate": 6.108328585361511e-07, "loss": 0.816, "step": 8836 }, { "epoch": 4.6486059968437665, "grad_norm": 2.342231512069702, "learning_rate": 6.103779505943048e-07, "loss": 0.7481, "step": 8837 }, { "epoch": 4.649132035770647, "grad_norm": 2.253077268600464, "learning_rate": 6.099231885529403e-07, "loss": 0.7442, "step": 8838 }, { "epoch": 4.649658074697528, "grad_norm": 2.350665807723999, "learning_rate": 6.094685724471713e-07, "loss": 0.7224, "step": 8839 }, { "epoch": 4.650184113624408, "grad_norm": 2.4373555183410645, "learning_rate": 6.090141023120991e-07, "loss": 0.7931, "step": 8840 }, { "epoch": 4.650710152551289, "grad_norm": 2.5031843185424805, "learning_rate": 6.085597781828151e-07, "loss": 0.7798, "step": 8841 }, { "epoch": 4.651236191478169, "grad_norm": 2.306077718734741, "learning_rate": 6.081056000943964e-07, "loss": 0.7903, "step": 8842 }, { "epoch": 4.65176223040505, "grad_norm": 2.3824191093444824, "learning_rate": 6.07651568081912e-07, "loss": 0.7207, "step": 8843 }, { "epoch": 4.6522882693319305, "grad_norm": 2.4878695011138916, "learning_rate": 6.07197682180418e-07, "loss": 0.7554, "step": 8844 }, { "epoch": 4.652814308258812, "grad_norm": 2.3144333362579346, "learning_rate": 6.067439424249596e-07, "loss": 0.7363, "step": 8845 }, { "epoch": 4.653340347185692, "grad_norm": 2.418778896331787, "learning_rate": 6.062903488505712e-07, "loss": 0.7421, "step": 8846 }, { "epoch": 4.653866386112572, "grad_norm": 2.304666042327881, "learning_rate": 6.058369014922758e-07, "loss": 0.7634, "step": 8847 }, { "epoch": 4.654392425039453, "grad_norm": 2.420586109161377, "learning_rate": 6.05383600385083e-07, "loss": 0.7709, "step": 8848 }, { "epoch": 4.654918463966333, "grad_norm": 2.4977619647979736, "learning_rate": 6.049304455639946e-07, "loss": 0.8068, "step": 8849 }, { "epoch": 4.655444502893214, "grad_norm": 2.4414148330688477, "learning_rate": 6.044774370639978e-07, "loss": 0.8082, "step": 8850 }, { "epoch": 4.6559705418200945, "grad_norm": 2.394644260406494, "learning_rate": 6.04024574920071e-07, "loss": 0.7484, "step": 8851 }, { "epoch": 4.656496580746976, "grad_norm": 2.446826696395874, "learning_rate": 6.035718591671802e-07, "loss": 0.789, "step": 8852 }, { "epoch": 4.657022619673856, "grad_norm": 2.408750057220459, "learning_rate": 6.031192898402802e-07, "loss": 0.8022, "step": 8853 }, { "epoch": 4.657548658600737, "grad_norm": 2.382441282272339, "learning_rate": 6.026668669743144e-07, "loss": 0.7832, "step": 8854 }, { "epoch": 4.658074697527617, "grad_norm": 2.415757179260254, "learning_rate": 6.022145906042159e-07, "loss": 0.7792, "step": 8855 }, { "epoch": 4.658600736454497, "grad_norm": 2.485945701599121, "learning_rate": 6.017624607649045e-07, "loss": 0.7794, "step": 8856 }, { "epoch": 4.659126775381378, "grad_norm": 2.287383556365967, "learning_rate": 6.013104774912901e-07, "loss": 0.7341, "step": 8857 }, { "epoch": 4.6596528143082585, "grad_norm": 2.4851973056793213, "learning_rate": 6.008586408182712e-07, "loss": 0.7957, "step": 8858 }, { "epoch": 4.66017885323514, "grad_norm": 2.376516103744507, "learning_rate": 6.004069507807344e-07, "loss": 0.7626, "step": 8859 }, { "epoch": 4.66070489216202, "grad_norm": 2.482280731201172, "learning_rate": 5.999554074135566e-07, "loss": 0.7747, "step": 8860 }, { "epoch": 4.661230931088901, "grad_norm": 2.5731120109558105, "learning_rate": 5.995040107516004e-07, "loss": 0.8124, "step": 8861 }, { "epoch": 4.661756970015781, "grad_norm": 2.264411449432373, "learning_rate": 5.990527608297203e-07, "loss": 0.7878, "step": 8862 }, { "epoch": 4.662283008942662, "grad_norm": 2.3320934772491455, "learning_rate": 5.986016576827564e-07, "loss": 0.7825, "step": 8863 }, { "epoch": 4.662809047869542, "grad_norm": 2.4416563510894775, "learning_rate": 5.981507013455398e-07, "loss": 0.7837, "step": 8864 }, { "epoch": 4.6633350867964225, "grad_norm": 2.384885787963867, "learning_rate": 5.976998918528898e-07, "loss": 0.7806, "step": 8865 }, { "epoch": 4.663861125723304, "grad_norm": 2.3647680282592773, "learning_rate": 5.972492292396137e-07, "loss": 0.7821, "step": 8866 }, { "epoch": 4.664387164650184, "grad_norm": 2.5099477767944336, "learning_rate": 5.967987135405079e-07, "loss": 0.7688, "step": 8867 }, { "epoch": 4.664913203577065, "grad_norm": 2.7503249645233154, "learning_rate": 5.963483447903573e-07, "loss": 0.7696, "step": 8868 }, { "epoch": 4.665439242503945, "grad_norm": 2.2173678874969482, "learning_rate": 5.958981230239367e-07, "loss": 0.7421, "step": 8869 }, { "epoch": 4.665965281430826, "grad_norm": 2.348292112350464, "learning_rate": 5.954480482760061e-07, "loss": 0.8344, "step": 8870 }, { "epoch": 4.666491320357706, "grad_norm": 2.372459888458252, "learning_rate": 5.94998120581318e-07, "loss": 0.7296, "step": 8871 }, { "epoch": 4.667017359284587, "grad_norm": 2.451267957687378, "learning_rate": 5.945483399746116e-07, "loss": 0.7549, "step": 8872 }, { "epoch": 4.667543398211468, "grad_norm": 2.5156400203704834, "learning_rate": 5.940987064906159e-07, "loss": 0.7311, "step": 8873 }, { "epoch": 4.668069437138348, "grad_norm": 2.342646360397339, "learning_rate": 5.936492201640462e-07, "loss": 0.7564, "step": 8874 }, { "epoch": 4.668595476065229, "grad_norm": 2.286766290664673, "learning_rate": 5.931998810296089e-07, "loss": 0.7373, "step": 8875 }, { "epoch": 4.669121514992109, "grad_norm": 2.4430220127105713, "learning_rate": 5.927506891219986e-07, "loss": 0.7546, "step": 8876 }, { "epoch": 4.66964755391899, "grad_norm": 2.3479604721069336, "learning_rate": 5.92301644475897e-07, "loss": 0.7755, "step": 8877 }, { "epoch": 4.67017359284587, "grad_norm": 3.3653383255004883, "learning_rate": 5.918527471259758e-07, "loss": 0.7799, "step": 8878 }, { "epoch": 4.670699631772751, "grad_norm": 2.8174712657928467, "learning_rate": 5.914039971068952e-07, "loss": 0.7678, "step": 8879 }, { "epoch": 4.671225670699632, "grad_norm": 2.371349334716797, "learning_rate": 5.909553944533039e-07, "loss": 0.7642, "step": 8880 }, { "epoch": 4.671751709626513, "grad_norm": 2.3104395866394043, "learning_rate": 5.905069391998392e-07, "loss": 0.7601, "step": 8881 }, { "epoch": 4.672277748553393, "grad_norm": 2.3968024253845215, "learning_rate": 5.900586313811269e-07, "loss": 0.7873, "step": 8882 }, { "epoch": 4.672803787480274, "grad_norm": 2.4375228881835938, "learning_rate": 5.896104710317821e-07, "loss": 0.772, "step": 8883 }, { "epoch": 4.673329826407154, "grad_norm": 2.5995571613311768, "learning_rate": 5.891624581864066e-07, "loss": 0.7804, "step": 8884 }, { "epoch": 4.673855865334035, "grad_norm": 2.6446313858032227, "learning_rate": 5.887145928795929e-07, "loss": 0.801, "step": 8885 }, { "epoch": 4.674381904260915, "grad_norm": 2.365895986557007, "learning_rate": 5.882668751459219e-07, "loss": 0.7444, "step": 8886 }, { "epoch": 4.674907943187796, "grad_norm": 2.376763105392456, "learning_rate": 5.878193050199615e-07, "loss": 0.7638, "step": 8887 }, { "epoch": 4.675433982114677, "grad_norm": 2.6804537773132324, "learning_rate": 5.873718825362695e-07, "loss": 0.8246, "step": 8888 }, { "epoch": 4.675960021041557, "grad_norm": 2.4396657943725586, "learning_rate": 5.869246077293925e-07, "loss": 0.7429, "step": 8889 }, { "epoch": 4.676486059968438, "grad_norm": 2.2962279319763184, "learning_rate": 5.864774806338652e-07, "loss": 0.7904, "step": 8890 }, { "epoch": 4.677012098895318, "grad_norm": 2.5809829235076904, "learning_rate": 5.860305012842104e-07, "loss": 0.7958, "step": 8891 }, { "epoch": 4.677538137822199, "grad_norm": 2.5763118267059326, "learning_rate": 5.855836697149401e-07, "loss": 0.7963, "step": 8892 }, { "epoch": 4.678064176749079, "grad_norm": 2.3892228603363037, "learning_rate": 5.851369859605552e-07, "loss": 0.7953, "step": 8893 }, { "epoch": 4.6785902156759605, "grad_norm": 2.3664567470550537, "learning_rate": 5.846904500555445e-07, "loss": 0.7844, "step": 8894 }, { "epoch": 4.679116254602841, "grad_norm": 2.3988423347473145, "learning_rate": 5.842440620343858e-07, "loss": 0.7529, "step": 8895 }, { "epoch": 4.679642293529721, "grad_norm": 2.2643160820007324, "learning_rate": 5.837978219315457e-07, "loss": 0.7377, "step": 8896 }, { "epoch": 4.680168332456602, "grad_norm": 2.300199031829834, "learning_rate": 5.833517297814792e-07, "loss": 0.7233, "step": 8897 }, { "epoch": 4.680694371383482, "grad_norm": 2.327751874923706, "learning_rate": 5.829057856186296e-07, "loss": 0.7692, "step": 8898 }, { "epoch": 4.681220410310363, "grad_norm": 2.43599271774292, "learning_rate": 5.824599894774277e-07, "loss": 0.7704, "step": 8899 }, { "epoch": 4.681746449237243, "grad_norm": 2.3503031730651855, "learning_rate": 5.820143413922949e-07, "loss": 0.8308, "step": 8900 }, { "epoch": 4.6822724881641244, "grad_norm": 2.4178149700164795, "learning_rate": 5.815688413976406e-07, "loss": 0.7698, "step": 8901 }, { "epoch": 4.682798527091005, "grad_norm": 2.4154651165008545, "learning_rate": 5.811234895278623e-07, "loss": 0.8095, "step": 8902 }, { "epoch": 4.683324566017886, "grad_norm": 2.654283046722412, "learning_rate": 5.806782858173459e-07, "loss": 0.7634, "step": 8903 }, { "epoch": 4.683850604944766, "grad_norm": 2.4717907905578613, "learning_rate": 5.802332303004671e-07, "loss": 0.7392, "step": 8904 }, { "epoch": 4.684376643871646, "grad_norm": 2.2097344398498535, "learning_rate": 5.797883230115891e-07, "loss": 0.7644, "step": 8905 }, { "epoch": 4.684902682798527, "grad_norm": 2.7380118370056152, "learning_rate": 5.793435639850631e-07, "loss": 0.7741, "step": 8906 }, { "epoch": 4.685428721725407, "grad_norm": 2.805870532989502, "learning_rate": 5.7889895325523e-07, "loss": 0.7292, "step": 8907 }, { "epoch": 4.685954760652288, "grad_norm": 2.467465400695801, "learning_rate": 5.784544908564191e-07, "loss": 0.8227, "step": 8908 }, { "epoch": 4.686480799579169, "grad_norm": 2.330148935317993, "learning_rate": 5.780101768229482e-07, "loss": 0.8129, "step": 8909 }, { "epoch": 4.68700683850605, "grad_norm": 2.8538782596588135, "learning_rate": 5.775660111891224e-07, "loss": 0.8148, "step": 8910 }, { "epoch": 4.68753287743293, "grad_norm": 2.495075225830078, "learning_rate": 5.77121993989237e-07, "loss": 0.7896, "step": 8911 }, { "epoch": 4.688058916359811, "grad_norm": 2.689262628555298, "learning_rate": 5.766781252575757e-07, "loss": 0.8233, "step": 8912 }, { "epoch": 4.688584955286691, "grad_norm": 2.3703155517578125, "learning_rate": 5.762344050284093e-07, "loss": 0.7526, "step": 8913 }, { "epoch": 4.689110994213571, "grad_norm": 2.3565948009490967, "learning_rate": 5.757908333359985e-07, "loss": 0.7982, "step": 8914 }, { "epoch": 4.689637033140452, "grad_norm": 2.380347728729248, "learning_rate": 5.753474102145923e-07, "loss": 0.7952, "step": 8915 }, { "epoch": 4.690163072067333, "grad_norm": 2.4809582233428955, "learning_rate": 5.749041356984278e-07, "loss": 0.7777, "step": 8916 }, { "epoch": 4.690689110994214, "grad_norm": 2.655095100402832, "learning_rate": 5.744610098217307e-07, "loss": 0.8189, "step": 8917 }, { "epoch": 4.691215149921094, "grad_norm": 2.461071491241455, "learning_rate": 5.740180326187162e-07, "loss": 0.7955, "step": 8918 }, { "epoch": 4.691741188847975, "grad_norm": 2.4957680702209473, "learning_rate": 5.73575204123587e-07, "loss": 0.7983, "step": 8919 }, { "epoch": 4.692267227774855, "grad_norm": 2.464352607727051, "learning_rate": 5.731325243705338e-07, "loss": 0.7652, "step": 8920 }, { "epoch": 4.692793266701736, "grad_norm": 2.815164089202881, "learning_rate": 5.726899933937369e-07, "loss": 0.7856, "step": 8921 }, { "epoch": 4.693319305628616, "grad_norm": 2.4070117473602295, "learning_rate": 5.722476112273656e-07, "loss": 0.7505, "step": 8922 }, { "epoch": 4.693845344555497, "grad_norm": 2.529219627380371, "learning_rate": 5.718053779055754e-07, "loss": 0.7264, "step": 8923 }, { "epoch": 4.694371383482378, "grad_norm": 3.786221981048584, "learning_rate": 5.713632934625127e-07, "loss": 0.7555, "step": 8924 }, { "epoch": 4.694897422409259, "grad_norm": 2.4862537384033203, "learning_rate": 5.709213579323108e-07, "loss": 0.7962, "step": 8925 }, { "epoch": 4.695423461336139, "grad_norm": 2.56864595413208, "learning_rate": 5.704795713490937e-07, "loss": 0.7561, "step": 8926 }, { "epoch": 4.695949500263019, "grad_norm": 2.4767754077911377, "learning_rate": 5.700379337469708e-07, "loss": 0.7602, "step": 8927 }, { "epoch": 4.6964755391899, "grad_norm": 2.570997714996338, "learning_rate": 5.695964451600422e-07, "loss": 0.7543, "step": 8928 }, { "epoch": 4.69700157811678, "grad_norm": 2.3905484676361084, "learning_rate": 5.691551056223957e-07, "loss": 0.8008, "step": 8929 }, { "epoch": 4.6975276170436615, "grad_norm": 2.728247880935669, "learning_rate": 5.687139151681079e-07, "loss": 0.7279, "step": 8930 }, { "epoch": 4.698053655970542, "grad_norm": 2.622790575027466, "learning_rate": 5.682728738312437e-07, "loss": 0.8168, "step": 8931 }, { "epoch": 4.698579694897423, "grad_norm": 2.7650840282440186, "learning_rate": 5.678319816458569e-07, "loss": 0.7454, "step": 8932 }, { "epoch": 4.699105733824303, "grad_norm": 2.3717708587646484, "learning_rate": 5.673912386459895e-07, "loss": 0.7456, "step": 8933 }, { "epoch": 4.699631772751184, "grad_norm": 2.4091598987579346, "learning_rate": 5.669506448656711e-07, "loss": 0.7533, "step": 8934 }, { "epoch": 4.700157811678064, "grad_norm": 2.4075400829315186, "learning_rate": 5.665102003389216e-07, "loss": 0.7334, "step": 8935 }, { "epoch": 4.700683850604944, "grad_norm": 2.283949613571167, "learning_rate": 5.660699050997473e-07, "loss": 0.7313, "step": 8936 }, { "epoch": 4.7012098895318255, "grad_norm": 2.383634567260742, "learning_rate": 5.656297591821444e-07, "loss": 0.7862, "step": 8937 }, { "epoch": 4.701735928458706, "grad_norm": 2.2650158405303955, "learning_rate": 5.651897626200975e-07, "loss": 0.7474, "step": 8938 }, { "epoch": 4.702261967385587, "grad_norm": 2.5478498935699463, "learning_rate": 5.647499154475791e-07, "loss": 0.791, "step": 8939 }, { "epoch": 4.702788006312467, "grad_norm": 2.4755916595458984, "learning_rate": 5.643102176985515e-07, "loss": 0.7715, "step": 8940 }, { "epoch": 4.703314045239348, "grad_norm": 2.435530424118042, "learning_rate": 5.638706694069626e-07, "loss": 0.7851, "step": 8941 }, { "epoch": 4.703840084166228, "grad_norm": 2.3357291221618652, "learning_rate": 5.634312706067515e-07, "loss": 0.7519, "step": 8942 }, { "epoch": 4.704366123093109, "grad_norm": 2.3117635250091553, "learning_rate": 5.629920213318449e-07, "loss": 0.8076, "step": 8943 }, { "epoch": 4.7048921620199895, "grad_norm": 2.5477993488311768, "learning_rate": 5.625529216161576e-07, "loss": 0.8577, "step": 8944 }, { "epoch": 4.70541820094687, "grad_norm": 2.3423023223876953, "learning_rate": 5.621139714935936e-07, "loss": 0.774, "step": 8945 }, { "epoch": 4.705944239873751, "grad_norm": 2.4052889347076416, "learning_rate": 5.61675170998045e-07, "loss": 0.7594, "step": 8946 }, { "epoch": 4.706470278800631, "grad_norm": 2.379390239715576, "learning_rate": 5.612365201633912e-07, "loss": 0.8351, "step": 8947 }, { "epoch": 4.706996317727512, "grad_norm": 2.3874783515930176, "learning_rate": 5.607980190235024e-07, "loss": 0.7589, "step": 8948 }, { "epoch": 4.707522356654392, "grad_norm": 2.49642276763916, "learning_rate": 5.603596676122344e-07, "loss": 0.7605, "step": 8949 }, { "epoch": 4.708048395581273, "grad_norm": 2.36794114112854, "learning_rate": 5.599214659634339e-07, "loss": 0.7556, "step": 8950 }, { "epoch": 4.7085744345081535, "grad_norm": 2.277553081512451, "learning_rate": 5.594834141109348e-07, "loss": 0.7618, "step": 8951 }, { "epoch": 4.709100473435035, "grad_norm": 2.426344633102417, "learning_rate": 5.590455120885599e-07, "loss": 0.7698, "step": 8952 }, { "epoch": 4.709626512361915, "grad_norm": 2.368129253387451, "learning_rate": 5.586077599301204e-07, "loss": 0.7572, "step": 8953 }, { "epoch": 4.710152551288795, "grad_norm": 2.3761661052703857, "learning_rate": 5.581701576694154e-07, "loss": 0.7796, "step": 8954 }, { "epoch": 4.710678590215676, "grad_norm": 2.380526065826416, "learning_rate": 5.577327053402337e-07, "loss": 0.765, "step": 8955 }, { "epoch": 4.711204629142556, "grad_norm": 2.381603479385376, "learning_rate": 5.572954029763503e-07, "loss": 0.7307, "step": 8956 }, { "epoch": 4.711730668069437, "grad_norm": 2.3443517684936523, "learning_rate": 5.568582506115306e-07, "loss": 0.7419, "step": 8957 }, { "epoch": 4.7122567069963175, "grad_norm": 2.7294187545776367, "learning_rate": 5.564212482795287e-07, "loss": 0.8009, "step": 8958 }, { "epoch": 4.712782745923199, "grad_norm": 2.46101975440979, "learning_rate": 5.559843960140842e-07, "loss": 0.7481, "step": 8959 }, { "epoch": 4.713308784850079, "grad_norm": 2.490787982940674, "learning_rate": 5.555476938489285e-07, "loss": 0.7683, "step": 8960 }, { "epoch": 4.71383482377696, "grad_norm": 2.413050889968872, "learning_rate": 5.551111418177793e-07, "loss": 0.7614, "step": 8961 }, { "epoch": 4.71436086270384, "grad_norm": 2.319284200668335, "learning_rate": 5.546747399543448e-07, "loss": 0.8007, "step": 8962 }, { "epoch": 4.71488690163072, "grad_norm": 2.362011194229126, "learning_rate": 5.542384882923186e-07, "loss": 0.7768, "step": 8963 }, { "epoch": 4.715412940557601, "grad_norm": 2.3781235218048096, "learning_rate": 5.538023868653846e-07, "loss": 0.7304, "step": 8964 }, { "epoch": 4.7159389794844815, "grad_norm": 2.5265302658081055, "learning_rate": 5.533664357072155e-07, "loss": 0.7802, "step": 8965 }, { "epoch": 4.716465018411363, "grad_norm": 2.4433302879333496, "learning_rate": 5.529306348514713e-07, "loss": 0.7717, "step": 8966 }, { "epoch": 4.716991057338243, "grad_norm": 2.3717451095581055, "learning_rate": 5.524949843318008e-07, "loss": 0.7652, "step": 8967 }, { "epoch": 4.717517096265124, "grad_norm": 2.44887638092041, "learning_rate": 5.520594841818416e-07, "loss": 0.7863, "step": 8968 }, { "epoch": 4.718043135192004, "grad_norm": 2.4891481399536133, "learning_rate": 5.516241344352196e-07, "loss": 0.7874, "step": 8969 }, { "epoch": 4.718569174118885, "grad_norm": 2.303292751312256, "learning_rate": 5.511889351255475e-07, "loss": 0.8108, "step": 8970 }, { "epoch": 4.719095213045765, "grad_norm": 2.510984420776367, "learning_rate": 5.507538862864293e-07, "loss": 0.7973, "step": 8971 }, { "epoch": 4.7196212519726455, "grad_norm": 2.478739023208618, "learning_rate": 5.503189879514542e-07, "loss": 0.8332, "step": 8972 }, { "epoch": 4.720147290899527, "grad_norm": 2.6370742321014404, "learning_rate": 5.498842401542017e-07, "loss": 0.7793, "step": 8973 }, { "epoch": 4.720673329826408, "grad_norm": 2.5012784004211426, "learning_rate": 5.494496429282401e-07, "loss": 0.7585, "step": 8974 }, { "epoch": 4.721199368753288, "grad_norm": 2.664734125137329, "learning_rate": 5.490151963071244e-07, "loss": 0.7611, "step": 8975 }, { "epoch": 4.721725407680168, "grad_norm": 2.3607699871063232, "learning_rate": 5.485809003244003e-07, "loss": 0.8404, "step": 8976 }, { "epoch": 4.722251446607049, "grad_norm": 2.2493174076080322, "learning_rate": 5.481467550135982e-07, "loss": 0.7829, "step": 8977 }, { "epoch": 4.722777485533929, "grad_norm": 2.4686458110809326, "learning_rate": 5.477127604082407e-07, "loss": 0.7274, "step": 8978 }, { "epoch": 4.72330352446081, "grad_norm": 2.4034881591796875, "learning_rate": 5.472789165418368e-07, "loss": 0.748, "step": 8979 }, { "epoch": 4.7238295633876906, "grad_norm": 2.647833824157715, "learning_rate": 5.468452234478841e-07, "loss": 0.7935, "step": 8980 }, { "epoch": 4.724355602314572, "grad_norm": 3.0118916034698486, "learning_rate": 5.464116811598686e-07, "loss": 0.7933, "step": 8981 }, { "epoch": 4.724881641241452, "grad_norm": 2.302861213684082, "learning_rate": 5.459782897112656e-07, "loss": 0.7672, "step": 8982 }, { "epoch": 4.725407680168333, "grad_norm": 2.4912946224212646, "learning_rate": 5.455450491355365e-07, "loss": 0.7535, "step": 8983 }, { "epoch": 4.725933719095213, "grad_norm": 2.288665533065796, "learning_rate": 5.451119594661338e-07, "loss": 0.7814, "step": 8984 }, { "epoch": 4.726459758022093, "grad_norm": 2.4992141723632812, "learning_rate": 5.446790207364958e-07, "loss": 0.7501, "step": 8985 }, { "epoch": 4.726985796948974, "grad_norm": 2.2010655403137207, "learning_rate": 5.442462329800508e-07, "loss": 0.7449, "step": 8986 }, { "epoch": 4.7275118358758546, "grad_norm": 2.3963072299957275, "learning_rate": 5.438135962302149e-07, "loss": 0.7993, "step": 8987 }, { "epoch": 4.728037874802736, "grad_norm": 2.5063445568084717, "learning_rate": 5.433811105203926e-07, "loss": 0.8036, "step": 8988 }, { "epoch": 4.728563913729616, "grad_norm": 2.389068365097046, "learning_rate": 5.429487758839772e-07, "loss": 0.8009, "step": 8989 }, { "epoch": 4.729089952656497, "grad_norm": 2.413666009902954, "learning_rate": 5.425165923543499e-07, "loss": 0.7604, "step": 8990 }, { "epoch": 4.729615991583377, "grad_norm": 2.4378607273101807, "learning_rate": 5.420845599648794e-07, "loss": 0.7749, "step": 8991 }, { "epoch": 4.730142030510258, "grad_norm": 2.330585479736328, "learning_rate": 5.41652678748924e-07, "loss": 0.7772, "step": 8992 }, { "epoch": 4.730668069437138, "grad_norm": 2.592555522918701, "learning_rate": 5.412209487398301e-07, "loss": 0.7518, "step": 8993 }, { "epoch": 4.7311941083640185, "grad_norm": 2.4289040565490723, "learning_rate": 5.407893699709318e-07, "loss": 0.7631, "step": 8994 }, { "epoch": 4.7317201472909, "grad_norm": 2.2833380699157715, "learning_rate": 5.403579424755528e-07, "loss": 0.7457, "step": 8995 }, { "epoch": 4.73224618621778, "grad_norm": 2.3631556034088135, "learning_rate": 5.399266662870031e-07, "loss": 0.7838, "step": 8996 }, { "epoch": 4.732772225144661, "grad_norm": 2.5335159301757812, "learning_rate": 5.394955414385828e-07, "loss": 0.7645, "step": 8997 }, { "epoch": 4.733298264071541, "grad_norm": 2.6111950874328613, "learning_rate": 5.390645679635801e-07, "loss": 0.7456, "step": 8998 }, { "epoch": 4.733824302998422, "grad_norm": 2.3625335693359375, "learning_rate": 5.386337458952698e-07, "loss": 0.7618, "step": 8999 }, { "epoch": 4.734350341925302, "grad_norm": 2.480126142501831, "learning_rate": 5.382030752669168e-07, "loss": 0.7844, "step": 9000 }, { "epoch": 4.734876380852183, "grad_norm": 2.431349277496338, "learning_rate": 5.377725561117744e-07, "loss": 0.8032, "step": 9001 }, { "epoch": 4.735402419779064, "grad_norm": 2.321800708770752, "learning_rate": 5.37342188463083e-07, "loss": 0.7719, "step": 9002 }, { "epoch": 4.735928458705944, "grad_norm": 2.3132412433624268, "learning_rate": 5.369119723540725e-07, "loss": 0.7586, "step": 9003 }, { "epoch": 4.736454497632825, "grad_norm": 2.4994406700134277, "learning_rate": 5.364819078179601e-07, "loss": 0.8085, "step": 9004 }, { "epoch": 4.736980536559705, "grad_norm": 2.591792583465576, "learning_rate": 5.360519948879525e-07, "loss": 0.7816, "step": 9005 }, { "epoch": 4.737506575486586, "grad_norm": 2.4216482639312744, "learning_rate": 5.356222335972425e-07, "loss": 0.7729, "step": 9006 }, { "epoch": 4.738032614413466, "grad_norm": 2.6022558212280273, "learning_rate": 5.351926239790134e-07, "loss": 0.8327, "step": 9007 }, { "epoch": 4.738558653340347, "grad_norm": 2.495760679244995, "learning_rate": 5.347631660664362e-07, "loss": 0.7266, "step": 9008 }, { "epoch": 4.739084692267228, "grad_norm": 2.372746229171753, "learning_rate": 5.343338598926695e-07, "loss": 0.7616, "step": 9009 }, { "epoch": 4.739610731194109, "grad_norm": 2.3422586917877197, "learning_rate": 5.339047054908606e-07, "loss": 0.7701, "step": 9010 }, { "epoch": 4.740136770120989, "grad_norm": 2.3540589809417725, "learning_rate": 5.334757028941454e-07, "loss": 0.7984, "step": 9011 }, { "epoch": 4.740662809047869, "grad_norm": 2.3844621181488037, "learning_rate": 5.330468521356482e-07, "loss": 0.7864, "step": 9012 }, { "epoch": 4.74118884797475, "grad_norm": 2.443161964416504, "learning_rate": 5.326181532484806e-07, "loss": 0.8166, "step": 9013 }, { "epoch": 4.74171488690163, "grad_norm": 2.4030117988586426, "learning_rate": 5.321896062657431e-07, "loss": 0.7488, "step": 9014 }, { "epoch": 4.742240925828511, "grad_norm": 2.451956033706665, "learning_rate": 5.317612112205244e-07, "loss": 0.8131, "step": 9015 }, { "epoch": 4.742766964755392, "grad_norm": 2.310049295425415, "learning_rate": 5.313329681459017e-07, "loss": 0.7077, "step": 9016 }, { "epoch": 4.743293003682273, "grad_norm": 2.2790098190307617, "learning_rate": 5.309048770749406e-07, "loss": 0.785, "step": 9017 }, { "epoch": 4.743819042609153, "grad_norm": 2.471299886703491, "learning_rate": 5.304769380406946e-07, "loss": 0.8059, "step": 9018 }, { "epoch": 4.744345081536034, "grad_norm": 2.4534692764282227, "learning_rate": 5.300491510762049e-07, "loss": 0.8038, "step": 9019 }, { "epoch": 4.744871120462914, "grad_norm": 2.3893964290618896, "learning_rate": 5.296215162145021e-07, "loss": 0.805, "step": 9020 }, { "epoch": 4.745397159389795, "grad_norm": 2.3459837436676025, "learning_rate": 5.29194033488604e-07, "loss": 0.7333, "step": 9021 }, { "epoch": 4.745923198316675, "grad_norm": 2.3876798152923584, "learning_rate": 5.287667029315174e-07, "loss": 0.7665, "step": 9022 }, { "epoch": 4.7464492372435565, "grad_norm": 2.249927282333374, "learning_rate": 5.283395245762371e-07, "loss": 0.7903, "step": 9023 }, { "epoch": 4.746975276170437, "grad_norm": 2.3770294189453125, "learning_rate": 5.279124984557463e-07, "loss": 0.7891, "step": 9024 }, { "epoch": 4.747501315097317, "grad_norm": 2.5572924613952637, "learning_rate": 5.274856246030161e-07, "loss": 0.7961, "step": 9025 }, { "epoch": 4.748027354024198, "grad_norm": 2.5552802085876465, "learning_rate": 5.270589030510067e-07, "loss": 0.7593, "step": 9026 }, { "epoch": 4.748553392951078, "grad_norm": 2.5888400077819824, "learning_rate": 5.266323338326651e-07, "loss": 0.7812, "step": 9027 }, { "epoch": 4.749079431877959, "grad_norm": 2.268892526626587, "learning_rate": 5.262059169809275e-07, "loss": 0.7312, "step": 9028 }, { "epoch": 4.749605470804839, "grad_norm": 2.5691561698913574, "learning_rate": 5.257796525287182e-07, "loss": 0.7517, "step": 9029 }, { "epoch": 4.7501315097317205, "grad_norm": 2.3335750102996826, "learning_rate": 5.253535405089499e-07, "loss": 0.757, "step": 9030 }, { "epoch": 4.750657548658601, "grad_norm": 2.441002607345581, "learning_rate": 5.24927580954524e-07, "loss": 0.7816, "step": 9031 }, { "epoch": 4.751183587585482, "grad_norm": 2.4526185989379883, "learning_rate": 5.245017738983277e-07, "loss": 0.7563, "step": 9032 }, { "epoch": 4.751709626512362, "grad_norm": 2.249312162399292, "learning_rate": 5.240761193732402e-07, "loss": 0.7443, "step": 9033 }, { "epoch": 4.752235665439242, "grad_norm": 2.549687147140503, "learning_rate": 5.23650617412125e-07, "loss": 0.7823, "step": 9034 }, { "epoch": 4.752761704366123, "grad_norm": 2.3420112133026123, "learning_rate": 5.232252680478367e-07, "loss": 0.7817, "step": 9035 }, { "epoch": 4.753287743293003, "grad_norm": 2.479092836380005, "learning_rate": 5.228000713132169e-07, "loss": 0.7685, "step": 9036 }, { "epoch": 4.7538137822198845, "grad_norm": 2.3693814277648926, "learning_rate": 5.22375027241096e-07, "loss": 0.7932, "step": 9037 }, { "epoch": 4.754339821146765, "grad_norm": 2.272975206375122, "learning_rate": 5.21950135864292e-07, "loss": 0.7791, "step": 9038 }, { "epoch": 4.754865860073646, "grad_norm": 2.3920512199401855, "learning_rate": 5.215253972156115e-07, "loss": 0.7865, "step": 9039 }, { "epoch": 4.755391899000526, "grad_norm": 2.5284855365753174, "learning_rate": 5.211008113278499e-07, "loss": 0.786, "step": 9040 }, { "epoch": 4.755917937927407, "grad_norm": 2.5763795375823975, "learning_rate": 5.206763782337887e-07, "loss": 0.7336, "step": 9041 }, { "epoch": 4.756443976854287, "grad_norm": 2.3403635025024414, "learning_rate": 5.202520979661996e-07, "loss": 0.761, "step": 9042 }, { "epoch": 4.756970015781167, "grad_norm": 2.4153048992156982, "learning_rate": 5.198279705578421e-07, "loss": 0.776, "step": 9043 }, { "epoch": 4.7574960547080485, "grad_norm": 2.315401315689087, "learning_rate": 5.19403996041464e-07, "loss": 0.7303, "step": 9044 }, { "epoch": 4.758022093634929, "grad_norm": 2.5400264263153076, "learning_rate": 5.189801744498002e-07, "loss": 0.7741, "step": 9045 }, { "epoch": 4.75854813256181, "grad_norm": 2.4966142177581787, "learning_rate": 5.185565058155748e-07, "loss": 0.745, "step": 9046 }, { "epoch": 4.75907417148869, "grad_norm": 2.4051637649536133, "learning_rate": 5.181329901714999e-07, "loss": 0.8201, "step": 9047 }, { "epoch": 4.759600210415571, "grad_norm": 2.3868119716644287, "learning_rate": 5.177096275502766e-07, "loss": 0.7819, "step": 9048 }, { "epoch": 4.760126249342451, "grad_norm": 2.6075820922851562, "learning_rate": 5.172864179845921e-07, "loss": 0.7264, "step": 9049 }, { "epoch": 4.760652288269332, "grad_norm": 2.466926097869873, "learning_rate": 5.168633615071233e-07, "loss": 0.7341, "step": 9050 }, { "epoch": 4.7611783271962125, "grad_norm": 2.3816115856170654, "learning_rate": 5.164404581505356e-07, "loss": 0.7801, "step": 9051 }, { "epoch": 4.761704366123093, "grad_norm": 2.3512110710144043, "learning_rate": 5.160177079474815e-07, "loss": 0.7987, "step": 9052 }, { "epoch": 4.762230405049974, "grad_norm": 2.4581685066223145, "learning_rate": 5.155951109306023e-07, "loss": 0.7728, "step": 9053 }, { "epoch": 4.762756443976854, "grad_norm": 2.3484432697296143, "learning_rate": 5.151726671325274e-07, "loss": 0.7983, "step": 9054 }, { "epoch": 4.763282482903735, "grad_norm": 2.451979637145996, "learning_rate": 5.147503765858749e-07, "loss": 0.7775, "step": 9055 }, { "epoch": 4.763808521830615, "grad_norm": 2.6195461750030518, "learning_rate": 5.14328239323249e-07, "loss": 0.754, "step": 9056 }, { "epoch": 4.764334560757496, "grad_norm": 2.4434609413146973, "learning_rate": 5.139062553772453e-07, "loss": 0.7553, "step": 9057 }, { "epoch": 4.7648605996843765, "grad_norm": 2.5011584758758545, "learning_rate": 5.134844247804441e-07, "loss": 0.8108, "step": 9058 }, { "epoch": 4.7653866386112576, "grad_norm": 2.4911460876464844, "learning_rate": 5.130627475654163e-07, "loss": 0.7669, "step": 9059 }, { "epoch": 4.765912677538138, "grad_norm": 2.3466527462005615, "learning_rate": 5.126412237647202e-07, "loss": 0.7453, "step": 9060 }, { "epoch": 4.766438716465018, "grad_norm": 2.4183645248413086, "learning_rate": 5.122198534109024e-07, "loss": 0.7674, "step": 9061 }, { "epoch": 4.766964755391899, "grad_norm": 2.3388845920562744, "learning_rate": 5.11798636536498e-07, "loss": 0.782, "step": 9062 }, { "epoch": 4.767490794318779, "grad_norm": 2.3488988876342773, "learning_rate": 5.113775731740287e-07, "loss": 0.7485, "step": 9063 }, { "epoch": 4.76801683324566, "grad_norm": 2.3661632537841797, "learning_rate": 5.109566633560056e-07, "loss": 0.7791, "step": 9064 }, { "epoch": 4.7685428721725405, "grad_norm": 2.420675754547119, "learning_rate": 5.105359071149282e-07, "loss": 0.7997, "step": 9065 }, { "epoch": 4.7690689110994215, "grad_norm": 2.4533286094665527, "learning_rate": 5.101153044832838e-07, "loss": 0.8061, "step": 9066 }, { "epoch": 4.769594950026302, "grad_norm": 2.421241044998169, "learning_rate": 5.096948554935472e-07, "loss": 0.7725, "step": 9067 }, { "epoch": 4.770120988953183, "grad_norm": 2.3592143058776855, "learning_rate": 5.092745601781832e-07, "loss": 0.7647, "step": 9068 }, { "epoch": 4.770647027880063, "grad_norm": 2.447930335998535, "learning_rate": 5.08854418569642e-07, "loss": 0.7737, "step": 9069 }, { "epoch": 4.771173066806944, "grad_norm": 2.418416976928711, "learning_rate": 5.084344307003633e-07, "loss": 0.7948, "step": 9070 }, { "epoch": 4.771699105733824, "grad_norm": 2.57692551612854, "learning_rate": 5.080145966027755e-07, "loss": 0.7648, "step": 9071 }, { "epoch": 4.772225144660705, "grad_norm": 2.330059051513672, "learning_rate": 5.075949163092947e-07, "loss": 0.7893, "step": 9072 }, { "epoch": 4.7727511835875855, "grad_norm": 2.4157345294952393, "learning_rate": 5.071753898523246e-07, "loss": 0.7864, "step": 9073 }, { "epoch": 4.773277222514466, "grad_norm": 2.4598004817962646, "learning_rate": 5.067560172642579e-07, "loss": 0.7562, "step": 9074 }, { "epoch": 4.773803261441347, "grad_norm": 2.5462634563446045, "learning_rate": 5.063367985774748e-07, "loss": 0.7661, "step": 9075 }, { "epoch": 4.774329300368227, "grad_norm": 2.6109893321990967, "learning_rate": 5.059177338243443e-07, "loss": 0.7585, "step": 9076 }, { "epoch": 4.774855339295108, "grad_norm": 2.350914239883423, "learning_rate": 5.054988230372221e-07, "loss": 0.7841, "step": 9077 }, { "epoch": 4.775381378221988, "grad_norm": 2.9237539768218994, "learning_rate": 5.050800662484531e-07, "loss": 0.7761, "step": 9078 }, { "epoch": 4.775907417148869, "grad_norm": 2.496375322341919, "learning_rate": 5.046614634903704e-07, "loss": 0.7704, "step": 9079 }, { "epoch": 4.7764334560757495, "grad_norm": 2.475736379623413, "learning_rate": 5.042430147952954e-07, "loss": 0.8044, "step": 9080 }, { "epoch": 4.776959495002631, "grad_norm": 2.3815035820007324, "learning_rate": 5.03824720195536e-07, "loss": 0.7471, "step": 9081 }, { "epoch": 4.777485533929511, "grad_norm": 2.467413902282715, "learning_rate": 5.0340657972339e-07, "loss": 0.7699, "step": 9082 }, { "epoch": 4.778011572856391, "grad_norm": 2.345513105392456, "learning_rate": 5.029885934111433e-07, "loss": 0.7476, "step": 9083 }, { "epoch": 4.778537611783272, "grad_norm": 2.43619704246521, "learning_rate": 5.025707612910677e-07, "loss": 0.7413, "step": 9084 }, { "epoch": 4.779063650710152, "grad_norm": 2.4016788005828857, "learning_rate": 5.021530833954255e-07, "loss": 0.793, "step": 9085 }, { "epoch": 4.779589689637033, "grad_norm": 2.735020637512207, "learning_rate": 5.017355597564663e-07, "loss": 0.7673, "step": 9086 }, { "epoch": 4.7801157285639135, "grad_norm": 2.3192338943481445, "learning_rate": 5.013181904064274e-07, "loss": 0.7293, "step": 9087 }, { "epoch": 4.780641767490795, "grad_norm": 2.2888853549957275, "learning_rate": 5.009009753775345e-07, "loss": 0.7763, "step": 9088 }, { "epoch": 4.781167806417675, "grad_norm": 2.5663809776306152, "learning_rate": 5.004839147020019e-07, "loss": 0.8176, "step": 9089 }, { "epoch": 4.781693845344556, "grad_norm": 2.3702266216278076, "learning_rate": 5.000670084120315e-07, "loss": 0.7975, "step": 9090 }, { "epoch": 4.782219884271436, "grad_norm": 2.4209561347961426, "learning_rate": 4.996502565398125e-07, "loss": 0.8047, "step": 9091 }, { "epoch": 4.782745923198316, "grad_norm": 2.2881996631622314, "learning_rate": 4.992336591175231e-07, "loss": 0.7473, "step": 9092 }, { "epoch": 4.783271962125197, "grad_norm": 2.5324788093566895, "learning_rate": 4.988172161773302e-07, "loss": 0.7816, "step": 9093 }, { "epoch": 4.7837980010520775, "grad_norm": 2.3747105598449707, "learning_rate": 4.984009277513868e-07, "loss": 0.7433, "step": 9094 }, { "epoch": 4.784324039978959, "grad_norm": 2.49125337600708, "learning_rate": 4.979847938718357e-07, "loss": 0.7831, "step": 9095 }, { "epoch": 4.784850078905839, "grad_norm": 2.5042505264282227, "learning_rate": 4.975688145708071e-07, "loss": 0.7395, "step": 9096 }, { "epoch": 4.78537611783272, "grad_norm": 2.4318039417266846, "learning_rate": 4.971529898804204e-07, "loss": 0.7558, "step": 9097 }, { "epoch": 4.7859021567596, "grad_norm": 2.4336962699890137, "learning_rate": 4.967373198327799e-07, "loss": 0.7858, "step": 9098 }, { "epoch": 4.786428195686481, "grad_norm": 2.4202816486358643, "learning_rate": 4.963218044599816e-07, "loss": 0.7406, "step": 9099 }, { "epoch": 4.786954234613361, "grad_norm": 2.569545269012451, "learning_rate": 4.959064437941077e-07, "loss": 0.8318, "step": 9100 }, { "epoch": 4.7874802735402415, "grad_norm": 2.351001024246216, "learning_rate": 4.954912378672289e-07, "loss": 0.7737, "step": 9101 }, { "epoch": 4.788006312467123, "grad_norm": 2.2663633823394775, "learning_rate": 4.950761867114037e-07, "loss": 0.7507, "step": 9102 }, { "epoch": 4.788532351394003, "grad_norm": 2.427870988845825, "learning_rate": 4.946612903586793e-07, "loss": 0.7833, "step": 9103 }, { "epoch": 4.789058390320884, "grad_norm": 2.6192092895507812, "learning_rate": 4.942465488410902e-07, "loss": 0.7701, "step": 9104 }, { "epoch": 4.789584429247764, "grad_norm": 2.4988791942596436, "learning_rate": 4.938319621906587e-07, "loss": 0.7927, "step": 9105 }, { "epoch": 4.790110468174645, "grad_norm": 2.311074733734131, "learning_rate": 4.934175304393965e-07, "loss": 0.7878, "step": 9106 }, { "epoch": 4.790636507101525, "grad_norm": 2.607074737548828, "learning_rate": 4.930032536193014e-07, "loss": 0.8361, "step": 9107 }, { "epoch": 4.791162546028406, "grad_norm": 2.301159620285034, "learning_rate": 4.925891317623607e-07, "loss": 0.7556, "step": 9108 }, { "epoch": 4.791688584955287, "grad_norm": 2.452260732650757, "learning_rate": 4.921751649005499e-07, "loss": 0.7583, "step": 9109 }, { "epoch": 4.792214623882167, "grad_norm": 2.3395650386810303, "learning_rate": 4.917613530658314e-07, "loss": 0.8049, "step": 9110 }, { "epoch": 4.792740662809048, "grad_norm": 2.377103090286255, "learning_rate": 4.913476962901564e-07, "loss": 0.768, "step": 9111 }, { "epoch": 4.793266701735929, "grad_norm": 2.490147590637207, "learning_rate": 4.90934194605465e-07, "loss": 0.8148, "step": 9112 }, { "epoch": 4.793792740662809, "grad_norm": 2.3916900157928467, "learning_rate": 4.905208480436824e-07, "loss": 0.728, "step": 9113 }, { "epoch": 4.794318779589689, "grad_norm": 2.4889438152313232, "learning_rate": 4.901076566367244e-07, "loss": 0.8008, "step": 9114 }, { "epoch": 4.79484481851657, "grad_norm": 2.5035603046417236, "learning_rate": 4.896946204164946e-07, "loss": 0.8015, "step": 9115 }, { "epoch": 4.795370857443451, "grad_norm": 2.482731819152832, "learning_rate": 4.892817394148836e-07, "loss": 0.8308, "step": 9116 }, { "epoch": 4.795896896370332, "grad_norm": 2.4224746227264404, "learning_rate": 4.888690136637717e-07, "loss": 0.8065, "step": 9117 }, { "epoch": 4.796422935297212, "grad_norm": 2.6329233646392822, "learning_rate": 4.88456443195024e-07, "loss": 0.7972, "step": 9118 }, { "epoch": 4.796948974224093, "grad_norm": 2.380758285522461, "learning_rate": 4.880440280404977e-07, "loss": 0.7632, "step": 9119 }, { "epoch": 4.797475013150973, "grad_norm": 2.4442639350891113, "learning_rate": 4.876317682320344e-07, "loss": 0.7398, "step": 9120 }, { "epoch": 4.798001052077854, "grad_norm": 2.2979209423065186, "learning_rate": 4.872196638014659e-07, "loss": 0.7738, "step": 9121 }, { "epoch": 4.798527091004734, "grad_norm": 2.5280535221099854, "learning_rate": 4.868077147806117e-07, "loss": 0.7664, "step": 9122 }, { "epoch": 4.799053129931615, "grad_norm": 2.5119893550872803, "learning_rate": 4.863959212012784e-07, "loss": 0.7868, "step": 9123 }, { "epoch": 4.799579168858496, "grad_norm": 2.318741798400879, "learning_rate": 4.859842830952616e-07, "loss": 0.7128, "step": 9124 }, { "epoch": 4.800105207785376, "grad_norm": 2.4127187728881836, "learning_rate": 4.855728004943444e-07, "loss": 0.798, "step": 9125 }, { "epoch": 4.800631246712257, "grad_norm": 2.391779661178589, "learning_rate": 4.851614734302987e-07, "loss": 0.7724, "step": 9126 }, { "epoch": 4.801157285639137, "grad_norm": 2.311973810195923, "learning_rate": 4.84750301934882e-07, "loss": 0.7766, "step": 9127 }, { "epoch": 4.801683324566018, "grad_norm": 2.5216002464294434, "learning_rate": 4.843392860398427e-07, "loss": 0.811, "step": 9128 }, { "epoch": 4.802209363492898, "grad_norm": 2.4147045612335205, "learning_rate": 4.839284257769158e-07, "loss": 0.7601, "step": 9129 }, { "epoch": 4.8027354024197795, "grad_norm": 2.4309191703796387, "learning_rate": 4.835177211778241e-07, "loss": 0.7395, "step": 9130 }, { "epoch": 4.80326144134666, "grad_norm": 2.516692638397217, "learning_rate": 4.831071722742786e-07, "loss": 0.7682, "step": 9131 }, { "epoch": 4.80378748027354, "grad_norm": 2.4268972873687744, "learning_rate": 4.826967790979786e-07, "loss": 0.7036, "step": 9132 }, { "epoch": 4.804313519200421, "grad_norm": 2.2925307750701904, "learning_rate": 4.82286541680612e-07, "loss": 0.7598, "step": 9133 }, { "epoch": 4.804839558127301, "grad_norm": 2.5060667991638184, "learning_rate": 4.818764600538523e-07, "loss": 0.7751, "step": 9134 }, { "epoch": 4.805365597054182, "grad_norm": 2.501265287399292, "learning_rate": 4.814665342493629e-07, "loss": 0.7861, "step": 9135 }, { "epoch": 4.805891635981062, "grad_norm": 2.6420555114746094, "learning_rate": 4.810567642987954e-07, "loss": 0.8081, "step": 9136 }, { "epoch": 4.8064176749079435, "grad_norm": 2.585592746734619, "learning_rate": 4.806471502337883e-07, "loss": 0.836, "step": 9137 }, { "epoch": 4.806943713834824, "grad_norm": 2.3444716930389404, "learning_rate": 4.802376920859686e-07, "loss": 0.8415, "step": 9138 }, { "epoch": 4.807469752761705, "grad_norm": 2.5145068168640137, "learning_rate": 4.798283898869513e-07, "loss": 0.7845, "step": 9139 }, { "epoch": 4.807995791688585, "grad_norm": 2.303823709487915, "learning_rate": 4.794192436683399e-07, "loss": 0.7471, "step": 9140 }, { "epoch": 4.808521830615465, "grad_norm": 2.4735357761383057, "learning_rate": 4.790102534617233e-07, "loss": 0.7197, "step": 9141 }, { "epoch": 4.809047869542346, "grad_norm": 2.5085606575012207, "learning_rate": 4.78601419298682e-07, "loss": 0.8567, "step": 9142 }, { "epoch": 4.809573908469226, "grad_norm": 2.3733408451080322, "learning_rate": 4.781927412107818e-07, "loss": 0.8199, "step": 9143 }, { "epoch": 4.8100999473961075, "grad_norm": 2.350113868713379, "learning_rate": 4.777842192295773e-07, "loss": 0.7663, "step": 9144 }, { "epoch": 4.810625986322988, "grad_norm": 2.6009793281555176, "learning_rate": 4.77375853386611e-07, "loss": 0.7688, "step": 9145 }, { "epoch": 4.811152025249869, "grad_norm": 2.393566370010376, "learning_rate": 4.769676437134141e-07, "loss": 0.7577, "step": 9146 }, { "epoch": 4.811678064176749, "grad_norm": 2.424201726913452, "learning_rate": 4.7655959024150516e-07, "loss": 0.7661, "step": 9147 }, { "epoch": 4.81220410310363, "grad_norm": 2.6920835971832275, "learning_rate": 4.761516930023896e-07, "loss": 0.7649, "step": 9148 }, { "epoch": 4.81273014203051, "grad_norm": 2.4214084148406982, "learning_rate": 4.7574395202756223e-07, "loss": 0.7466, "step": 9149 }, { "epoch": 4.81325618095739, "grad_norm": 2.8006601333618164, "learning_rate": 4.7533636734850564e-07, "loss": 0.8168, "step": 9150 }, { "epoch": 4.8137822198842715, "grad_norm": 2.583569049835205, "learning_rate": 4.7492893899668937e-07, "loss": 0.7911, "step": 9151 }, { "epoch": 4.814308258811152, "grad_norm": 2.4994688034057617, "learning_rate": 4.745216670035721e-07, "loss": 0.7651, "step": 9152 }, { "epoch": 4.814834297738033, "grad_norm": 2.6436803340911865, "learning_rate": 4.741145514006007e-07, "loss": 0.8183, "step": 9153 }, { "epoch": 4.815360336664913, "grad_norm": 2.2799580097198486, "learning_rate": 4.7370759221920723e-07, "loss": 0.7789, "step": 9154 }, { "epoch": 4.815886375591794, "grad_norm": 2.4304120540618896, "learning_rate": 4.7330078949081535e-07, "loss": 0.7763, "step": 9155 }, { "epoch": 4.816412414518674, "grad_norm": 2.514035940170288, "learning_rate": 4.728941432468337e-07, "loss": 0.7693, "step": 9156 }, { "epoch": 4.816938453445555, "grad_norm": 2.450211763381958, "learning_rate": 4.724876535186601e-07, "loss": 0.793, "step": 9157 }, { "epoch": 4.8174644923724355, "grad_norm": 2.58949613571167, "learning_rate": 4.720813203376809e-07, "loss": 0.7436, "step": 9158 }, { "epoch": 4.8179905312993165, "grad_norm": 2.440373420715332, "learning_rate": 4.716751437352693e-07, "loss": 0.7986, "step": 9159 }, { "epoch": 4.818516570226197, "grad_norm": 2.6193525791168213, "learning_rate": 4.7126912374278686e-07, "loss": 0.8485, "step": 9160 }, { "epoch": 4.819042609153078, "grad_norm": 2.440355062484741, "learning_rate": 4.7086326039158306e-07, "loss": 0.7787, "step": 9161 }, { "epoch": 4.819568648079958, "grad_norm": 2.5062994956970215, "learning_rate": 4.7045755371299545e-07, "loss": 0.7881, "step": 9162 }, { "epoch": 4.820094687006838, "grad_norm": 2.3882555961608887, "learning_rate": 4.700520037383485e-07, "loss": 0.7648, "step": 9163 }, { "epoch": 4.820620725933719, "grad_norm": 2.253563165664673, "learning_rate": 4.6964661049895583e-07, "loss": 0.7871, "step": 9164 }, { "epoch": 4.8211467648605995, "grad_norm": 2.5780258178710938, "learning_rate": 4.6924137402611815e-07, "loss": 0.7984, "step": 9165 }, { "epoch": 4.8216728037874805, "grad_norm": 2.4559638500213623, "learning_rate": 4.6883629435112545e-07, "loss": 0.7523, "step": 9166 }, { "epoch": 4.822198842714361, "grad_norm": 2.3612444400787354, "learning_rate": 4.684313715052527e-07, "loss": 0.8004, "step": 9167 }, { "epoch": 4.822724881641242, "grad_norm": 2.5486960411071777, "learning_rate": 4.6802660551976574e-07, "loss": 0.7575, "step": 9168 }, { "epoch": 4.823250920568122, "grad_norm": 2.5330560207366943, "learning_rate": 4.676219964259174e-07, "loss": 0.779, "step": 9169 }, { "epoch": 4.823776959495003, "grad_norm": 2.427905797958374, "learning_rate": 4.67217544254947e-07, "loss": 0.7593, "step": 9170 }, { "epoch": 4.824302998421883, "grad_norm": 2.4015212059020996, "learning_rate": 4.668132490380836e-07, "loss": 0.8406, "step": 9171 }, { "epoch": 4.8248290373487634, "grad_norm": 2.495419979095459, "learning_rate": 4.6640911080654356e-07, "loss": 0.7478, "step": 9172 }, { "epoch": 4.8253550762756445, "grad_norm": 2.494229555130005, "learning_rate": 4.660051295915305e-07, "loss": 0.7664, "step": 9173 }, { "epoch": 4.825881115202525, "grad_norm": 2.4759912490844727, "learning_rate": 4.656013054242367e-07, "loss": 0.7582, "step": 9174 }, { "epoch": 4.826407154129406, "grad_norm": 2.3977596759796143, "learning_rate": 4.6519763833584223e-07, "loss": 0.8236, "step": 9175 }, { "epoch": 4.826933193056286, "grad_norm": 2.497652769088745, "learning_rate": 4.64794128357515e-07, "loss": 0.7594, "step": 9176 }, { "epoch": 4.827459231983167, "grad_norm": 2.405832052230835, "learning_rate": 4.643907755204097e-07, "loss": 0.8108, "step": 9177 }, { "epoch": 4.827985270910047, "grad_norm": 2.3437016010284424, "learning_rate": 4.639875798556703e-07, "loss": 0.7623, "step": 9178 }, { "epoch": 4.828511309836928, "grad_norm": 2.7363779544830322, "learning_rate": 4.635845413944287e-07, "loss": 0.8132, "step": 9179 }, { "epoch": 4.8290373487638085, "grad_norm": 2.4804418087005615, "learning_rate": 4.63181660167803e-07, "loss": 0.8212, "step": 9180 }, { "epoch": 4.829563387690689, "grad_norm": 2.4118309020996094, "learning_rate": 4.6277893620690047e-07, "loss": 0.7242, "step": 9181 }, { "epoch": 4.83008942661757, "grad_norm": 2.4802968502044678, "learning_rate": 4.6237636954281656e-07, "loss": 0.8004, "step": 9182 }, { "epoch": 4.83061546554445, "grad_norm": 2.3784289360046387, "learning_rate": 4.6197396020663424e-07, "loss": 0.7648, "step": 9183 }, { "epoch": 4.831141504471331, "grad_norm": 2.3564107418060303, "learning_rate": 4.6157170822942324e-07, "loss": 0.7351, "step": 9184 }, { "epoch": 4.831667543398211, "grad_norm": 2.347116231918335, "learning_rate": 4.611696136422422e-07, "loss": 0.776, "step": 9185 }, { "epoch": 4.832193582325092, "grad_norm": 2.551668882369995, "learning_rate": 4.607676764761379e-07, "loss": 0.7638, "step": 9186 }, { "epoch": 4.8327196212519725, "grad_norm": 2.4285004138946533, "learning_rate": 4.6036589676214413e-07, "loss": 0.7602, "step": 9187 }, { "epoch": 4.833245660178854, "grad_norm": 2.432318925857544, "learning_rate": 4.5996427453128293e-07, "loss": 0.7863, "step": 9188 }, { "epoch": 4.833771699105734, "grad_norm": 2.472846269607544, "learning_rate": 4.595628098145649e-07, "loss": 0.7621, "step": 9189 }, { "epoch": 4.834297738032614, "grad_norm": 2.3628740310668945, "learning_rate": 4.591615026429866e-07, "loss": 0.7544, "step": 9190 }, { "epoch": 4.834823776959495, "grad_norm": 2.629133701324463, "learning_rate": 4.5876035304753456e-07, "loss": 0.7666, "step": 9191 }, { "epoch": 4.835349815886375, "grad_norm": 2.4353911876678467, "learning_rate": 4.5835936105918076e-07, "loss": 0.7974, "step": 9192 }, { "epoch": 4.835875854813256, "grad_norm": 2.3677780628204346, "learning_rate": 4.5795852670888733e-07, "loss": 0.7734, "step": 9193 }, { "epoch": 4.8364018937401365, "grad_norm": 2.4623961448669434, "learning_rate": 4.575578500276032e-07, "loss": 0.7987, "step": 9194 }, { "epoch": 4.836927932667018, "grad_norm": 2.3480515480041504, "learning_rate": 4.5715733104626516e-07, "loss": 0.7618, "step": 9195 }, { "epoch": 4.837453971593898, "grad_norm": 2.302177667617798, "learning_rate": 4.567569697957977e-07, "loss": 0.8, "step": 9196 }, { "epoch": 4.837980010520779, "grad_norm": 2.25274920463562, "learning_rate": 4.563567663071139e-07, "loss": 0.7644, "step": 9197 }, { "epoch": 4.838506049447659, "grad_norm": 2.3492579460144043, "learning_rate": 4.559567206111132e-07, "loss": 0.7707, "step": 9198 }, { "epoch": 4.839032088374539, "grad_norm": 2.2800369262695312, "learning_rate": 4.555568327386842e-07, "loss": 0.7685, "step": 9199 }, { "epoch": 4.83955812730142, "grad_norm": 2.5013973712921143, "learning_rate": 4.5515710272070246e-07, "loss": 0.7285, "step": 9200 }, { "epoch": 4.8400841662283005, "grad_norm": 2.787599563598633, "learning_rate": 4.5475753058803226e-07, "loss": 0.7771, "step": 9201 }, { "epoch": 4.840610205155182, "grad_norm": 2.6487085819244385, "learning_rate": 4.543581163715255e-07, "loss": 0.7891, "step": 9202 }, { "epoch": 4.841136244082062, "grad_norm": 2.281497001647949, "learning_rate": 4.5395886010202005e-07, "loss": 0.7405, "step": 9203 }, { "epoch": 4.841662283008943, "grad_norm": 2.4789090156555176, "learning_rate": 4.5355976181034406e-07, "loss": 0.7958, "step": 9204 }, { "epoch": 4.842188321935823, "grad_norm": 2.5035488605499268, "learning_rate": 4.531608215273131e-07, "loss": 0.818, "step": 9205 }, { "epoch": 4.842714360862704, "grad_norm": 2.545590877532959, "learning_rate": 4.5276203928372843e-07, "loss": 0.735, "step": 9206 }, { "epoch": 4.843240399789584, "grad_norm": 2.311375379562378, "learning_rate": 4.523634151103815e-07, "loss": 0.7574, "step": 9207 }, { "epoch": 4.843766438716465, "grad_norm": 2.427124500274658, "learning_rate": 4.519649490380504e-07, "loss": 0.7437, "step": 9208 }, { "epoch": 4.844292477643346, "grad_norm": 2.524705648422241, "learning_rate": 4.5156664109750144e-07, "loss": 0.7543, "step": 9209 }, { "epoch": 4.844818516570227, "grad_norm": 2.4259538650512695, "learning_rate": 4.511684913194889e-07, "loss": 0.7677, "step": 9210 }, { "epoch": 4.845344555497107, "grad_norm": 2.3872807025909424, "learning_rate": 4.5077049973475366e-07, "loss": 0.7799, "step": 9211 }, { "epoch": 4.845870594423987, "grad_norm": 2.528369426727295, "learning_rate": 4.503726663740265e-07, "loss": 0.8322, "step": 9212 }, { "epoch": 4.846396633350868, "grad_norm": 2.4475088119506836, "learning_rate": 4.4997499126802346e-07, "loss": 0.7522, "step": 9213 }, { "epoch": 4.846922672277748, "grad_norm": 2.3957321643829346, "learning_rate": 4.4957747444745016e-07, "loss": 0.7772, "step": 9214 }, { "epoch": 4.847448711204629, "grad_norm": 2.3642044067382812, "learning_rate": 4.491801159429998e-07, "loss": 0.764, "step": 9215 }, { "epoch": 4.84797475013151, "grad_norm": 2.4057910442352295, "learning_rate": 4.487829157853521e-07, "loss": 0.7871, "step": 9216 }, { "epoch": 4.848500789058391, "grad_norm": 2.394082546234131, "learning_rate": 4.483858740051761e-07, "loss": 0.8087, "step": 9217 }, { "epoch": 4.849026827985271, "grad_norm": 2.4454338550567627, "learning_rate": 4.479889906331275e-07, "loss": 0.7859, "step": 9218 }, { "epoch": 4.849552866912152, "grad_norm": 2.580508232116699, "learning_rate": 4.475922656998516e-07, "loss": 0.7717, "step": 9219 }, { "epoch": 4.850078905839032, "grad_norm": 3.8876004219055176, "learning_rate": 4.471956992359783e-07, "loss": 0.8077, "step": 9220 }, { "epoch": 4.850604944765912, "grad_norm": 2.3701181411743164, "learning_rate": 4.467992912721278e-07, "loss": 0.7672, "step": 9221 }, { "epoch": 4.851130983692793, "grad_norm": 2.3864476680755615, "learning_rate": 4.4640304183890757e-07, "loss": 0.777, "step": 9222 }, { "epoch": 4.851657022619674, "grad_norm": 2.5996651649475098, "learning_rate": 4.4600695096691236e-07, "loss": 0.7892, "step": 9223 }, { "epoch": 4.852183061546555, "grad_norm": 2.7791764736175537, "learning_rate": 4.456110186867249e-07, "loss": 0.7297, "step": 9224 }, { "epoch": 4.852709100473435, "grad_norm": 2.5364136695861816, "learning_rate": 4.45215245028916e-07, "loss": 0.7799, "step": 9225 }, { "epoch": 4.853235139400316, "grad_norm": 2.3849501609802246, "learning_rate": 4.4481963002404453e-07, "loss": 0.7585, "step": 9226 }, { "epoch": 4.853761178327196, "grad_norm": 2.4486541748046875, "learning_rate": 4.444241737026547e-07, "loss": 0.8119, "step": 9227 }, { "epoch": 4.854287217254077, "grad_norm": 2.341799020767212, "learning_rate": 4.440288760952824e-07, "loss": 0.7548, "step": 9228 }, { "epoch": 4.854813256180957, "grad_norm": 2.332106351852417, "learning_rate": 4.4363373723244703e-07, "loss": 0.777, "step": 9229 }, { "epoch": 4.855339295107838, "grad_norm": 2.7264246940612793, "learning_rate": 4.432387571446589e-07, "loss": 0.7852, "step": 9230 }, { "epoch": 4.855865334034719, "grad_norm": 2.4496166706085205, "learning_rate": 4.428439358624151e-07, "loss": 0.7836, "step": 9231 }, { "epoch": 4.856391372961599, "grad_norm": 2.348107099533081, "learning_rate": 4.424492734162e-07, "loss": 0.8104, "step": 9232 }, { "epoch": 4.85691741188848, "grad_norm": 2.5867178440093994, "learning_rate": 4.4205476983648713e-07, "loss": 0.8448, "step": 9233 }, { "epoch": 4.85744345081536, "grad_norm": 2.4545867443084717, "learning_rate": 4.4166042515373516e-07, "loss": 0.8117, "step": 9234 }, { "epoch": 4.857969489742241, "grad_norm": 2.648716926574707, "learning_rate": 4.412662393983924e-07, "loss": 0.7952, "step": 9235 }, { "epoch": 4.858495528669121, "grad_norm": 2.8375144004821777, "learning_rate": 4.408722126008949e-07, "loss": 0.7683, "step": 9236 }, { "epoch": 4.8590215675960025, "grad_norm": 2.333240270614624, "learning_rate": 4.4047834479166607e-07, "loss": 0.7363, "step": 9237 }, { "epoch": 4.859547606522883, "grad_norm": 2.3310129642486572, "learning_rate": 4.4008463600111686e-07, "loss": 0.8095, "step": 9238 }, { "epoch": 4.860073645449763, "grad_norm": 2.5818309783935547, "learning_rate": 4.396910862596468e-07, "loss": 0.8061, "step": 9239 }, { "epoch": 4.860599684376644, "grad_norm": 2.409816026687622, "learning_rate": 4.3929769559764106e-07, "loss": 0.7638, "step": 9240 }, { "epoch": 4.861125723303524, "grad_norm": 2.3435451984405518, "learning_rate": 4.389044640454751e-07, "loss": 0.7498, "step": 9241 }, { "epoch": 4.861651762230405, "grad_norm": 2.3700873851776123, "learning_rate": 4.3851139163350985e-07, "loss": 0.8194, "step": 9242 }, { "epoch": 4.862177801157285, "grad_norm": 2.5121078491210938, "learning_rate": 4.3811847839209583e-07, "loss": 0.8038, "step": 9243 }, { "epoch": 4.8627038400841665, "grad_norm": 2.306656837463379, "learning_rate": 4.3772572435157007e-07, "loss": 0.7631, "step": 9244 }, { "epoch": 4.863229879011047, "grad_norm": 2.4411587715148926, "learning_rate": 4.3733312954225756e-07, "loss": 0.754, "step": 9245 }, { "epoch": 4.863755917937928, "grad_norm": 2.438312292098999, "learning_rate": 4.3694069399447176e-07, "loss": 0.7701, "step": 9246 }, { "epoch": 4.864281956864808, "grad_norm": 2.416154384613037, "learning_rate": 4.365484177385132e-07, "loss": 0.7403, "step": 9247 }, { "epoch": 4.864807995791688, "grad_norm": 2.435748338699341, "learning_rate": 4.3615630080466905e-07, "loss": 0.8224, "step": 9248 }, { "epoch": 4.865334034718569, "grad_norm": 2.520920991897583, "learning_rate": 4.3576434322321585e-07, "loss": 0.8366, "step": 9249 }, { "epoch": 4.86586007364545, "grad_norm": 2.3485729694366455, "learning_rate": 4.3537254502441756e-07, "loss": 0.7706, "step": 9250 }, { "epoch": 4.8663861125723304, "grad_norm": 2.368957757949829, "learning_rate": 4.349809062385255e-07, "loss": 0.7847, "step": 9251 }, { "epoch": 4.866912151499211, "grad_norm": 2.6354823112487793, "learning_rate": 4.3458942689577785e-07, "loss": 0.7974, "step": 9252 }, { "epoch": 4.867438190426092, "grad_norm": 2.351818084716797, "learning_rate": 4.341981070264017e-07, "loss": 0.7817, "step": 9253 }, { "epoch": 4.867964229352972, "grad_norm": 2.348900079727173, "learning_rate": 4.338069466606115e-07, "loss": 0.7743, "step": 9254 }, { "epoch": 4.868490268279853, "grad_norm": 2.6266822814941406, "learning_rate": 4.334159458286102e-07, "loss": 0.7662, "step": 9255 }, { "epoch": 4.869016307206733, "grad_norm": 2.7509374618530273, "learning_rate": 4.33025104560586e-07, "loss": 0.7953, "step": 9256 }, { "epoch": 4.869542346133614, "grad_norm": 2.46771502494812, "learning_rate": 4.3263442288671716e-07, "loss": 0.7648, "step": 9257 }, { "epoch": 4.870068385060494, "grad_norm": 2.2352139949798584, "learning_rate": 4.322439008371684e-07, "loss": 0.7261, "step": 9258 }, { "epoch": 4.8705944239873755, "grad_norm": 2.80551815032959, "learning_rate": 4.318535384420927e-07, "loss": 0.7239, "step": 9259 }, { "epoch": 4.871120462914256, "grad_norm": 2.3969638347625732, "learning_rate": 4.3146333573163085e-07, "loss": 0.7634, "step": 9260 }, { "epoch": 4.871646501841136, "grad_norm": 2.4832613468170166, "learning_rate": 4.310732927359104e-07, "loss": 0.8075, "step": 9261 }, { "epoch": 4.872172540768017, "grad_norm": 2.478261947631836, "learning_rate": 4.3068340948504814e-07, "loss": 0.8192, "step": 9262 }, { "epoch": 4.872698579694897, "grad_norm": 2.3278605937957764, "learning_rate": 4.3029368600914637e-07, "loss": 0.7213, "step": 9263 }, { "epoch": 4.873224618621778, "grad_norm": 2.4674181938171387, "learning_rate": 4.2990412233829715e-07, "loss": 0.7293, "step": 9264 }, { "epoch": 4.873750657548658, "grad_norm": 2.376608371734619, "learning_rate": 4.2951471850257805e-07, "loss": 0.8062, "step": 9265 }, { "epoch": 4.8742766964755395, "grad_norm": 2.7449114322662354, "learning_rate": 4.2912547453205636e-07, "loss": 0.8156, "step": 9266 }, { "epoch": 4.87480273540242, "grad_norm": 2.351728677749634, "learning_rate": 4.2873639045678596e-07, "loss": 0.818, "step": 9267 }, { "epoch": 4.875328774329301, "grad_norm": 2.2530288696289062, "learning_rate": 4.283474663068088e-07, "loss": 0.7783, "step": 9268 }, { "epoch": 4.875854813256181, "grad_norm": 2.265854597091675, "learning_rate": 4.279587021121545e-07, "loss": 0.752, "step": 9269 }, { "epoch": 4.876380852183061, "grad_norm": 2.2614498138427734, "learning_rate": 4.275700979028394e-07, "loss": 0.7647, "step": 9270 }, { "epoch": 4.876906891109942, "grad_norm": 2.6675307750701904, "learning_rate": 4.2718165370886854e-07, "loss": 0.8078, "step": 9271 }, { "epoch": 4.877432930036822, "grad_norm": 2.560035228729248, "learning_rate": 4.2679336956023417e-07, "loss": 0.7537, "step": 9272 }, { "epoch": 4.8779589689637035, "grad_norm": 2.4416913986206055, "learning_rate": 4.264052454869164e-07, "loss": 0.8247, "step": 9273 }, { "epoch": 4.878485007890584, "grad_norm": 2.339390993118286, "learning_rate": 4.260172815188829e-07, "loss": 0.8101, "step": 9274 }, { "epoch": 4.879011046817465, "grad_norm": 2.749077081680298, "learning_rate": 4.256294776860895e-07, "loss": 0.8066, "step": 9275 }, { "epoch": 4.879537085744345, "grad_norm": 2.4118244647979736, "learning_rate": 4.25241834018478e-07, "loss": 0.7649, "step": 9276 }, { "epoch": 4.880063124671226, "grad_norm": 2.5626420974731445, "learning_rate": 4.2485435054597986e-07, "loss": 0.8027, "step": 9277 }, { "epoch": 4.880589163598106, "grad_norm": 2.52481746673584, "learning_rate": 4.244670272985124e-07, "loss": 0.817, "step": 9278 }, { "epoch": 4.881115202524986, "grad_norm": 2.536489248275757, "learning_rate": 4.240798643059818e-07, "loss": 0.7711, "step": 9279 }, { "epoch": 4.8816412414518675, "grad_norm": 2.4345009326934814, "learning_rate": 4.2369286159828135e-07, "loss": 0.7505, "step": 9280 }, { "epoch": 4.882167280378748, "grad_norm": 2.3597114086151123, "learning_rate": 4.2330601920529237e-07, "loss": 0.7562, "step": 9281 }, { "epoch": 4.882693319305629, "grad_norm": 2.480328321456909, "learning_rate": 4.229193371568835e-07, "loss": 0.7789, "step": 9282 }, { "epoch": 4.883219358232509, "grad_norm": 2.4433889389038086, "learning_rate": 4.2253281548291144e-07, "loss": 0.7702, "step": 9283 }, { "epoch": 4.88374539715939, "grad_norm": 2.382988452911377, "learning_rate": 4.2214645421321895e-07, "loss": 0.7713, "step": 9284 }, { "epoch": 4.88427143608627, "grad_norm": 2.3505055904388428, "learning_rate": 4.2176025337763826e-07, "loss": 0.7843, "step": 9285 }, { "epoch": 4.884797475013151, "grad_norm": 2.4525089263916016, "learning_rate": 4.2137421300598837e-07, "loss": 0.7588, "step": 9286 }, { "epoch": 4.8853235139400315, "grad_norm": 2.246673345565796, "learning_rate": 4.2098833312807594e-07, "loss": 0.7475, "step": 9287 }, { "epoch": 4.885849552866912, "grad_norm": 2.2699897289276123, "learning_rate": 4.2060261377369625e-07, "loss": 0.7613, "step": 9288 }, { "epoch": 4.886375591793793, "grad_norm": 2.414214611053467, "learning_rate": 4.2021705497263003e-07, "loss": 0.7776, "step": 9289 }, { "epoch": 4.886901630720673, "grad_norm": 2.4631502628326416, "learning_rate": 4.198316567546476e-07, "loss": 0.7936, "step": 9290 }, { "epoch": 4.887427669647554, "grad_norm": 2.665884494781494, "learning_rate": 4.194464191495054e-07, "loss": 0.7412, "step": 9291 }, { "epoch": 4.887953708574434, "grad_norm": 2.5149543285369873, "learning_rate": 4.1906134218694853e-07, "loss": 0.7733, "step": 9292 }, { "epoch": 4.888479747501315, "grad_norm": 2.51798415184021, "learning_rate": 4.1867642589670935e-07, "loss": 0.7658, "step": 9293 }, { "epoch": 4.8890057864281955, "grad_norm": 2.460693120956421, "learning_rate": 4.1829167030850797e-07, "loss": 0.8113, "step": 9294 }, { "epoch": 4.889531825355077, "grad_norm": 2.390411615371704, "learning_rate": 4.1790707545205174e-07, "loss": 0.8066, "step": 9295 }, { "epoch": 4.890057864281957, "grad_norm": 2.6434783935546875, "learning_rate": 4.1752264135703576e-07, "loss": 0.7888, "step": 9296 }, { "epoch": 4.890583903208837, "grad_norm": 2.3402822017669678, "learning_rate": 4.171383680531438e-07, "loss": 0.7563, "step": 9297 }, { "epoch": 4.891109942135718, "grad_norm": 2.2386045455932617, "learning_rate": 4.1675425557004433e-07, "loss": 0.7493, "step": 9298 }, { "epoch": 4.891635981062599, "grad_norm": 2.3617660999298096, "learning_rate": 4.1637030393739617e-07, "loss": 0.7519, "step": 9299 }, { "epoch": 4.892162019989479, "grad_norm": 2.5884978771209717, "learning_rate": 4.1598651318484505e-07, "loss": 0.8214, "step": 9300 }, { "epoch": 4.8926880589163595, "grad_norm": 2.5180344581604004, "learning_rate": 4.1560288334202393e-07, "loss": 0.7931, "step": 9301 }, { "epoch": 4.893214097843241, "grad_norm": 2.5661780834198, "learning_rate": 4.15219414438553e-07, "loss": 0.7734, "step": 9302 }, { "epoch": 4.893740136770121, "grad_norm": 2.4038705825805664, "learning_rate": 4.1483610650404044e-07, "loss": 0.7365, "step": 9303 }, { "epoch": 4.894266175697002, "grad_norm": 2.3846120834350586, "learning_rate": 4.144529595680827e-07, "loss": 0.7698, "step": 9304 }, { "epoch": 4.894792214623882, "grad_norm": 2.455430507659912, "learning_rate": 4.140699736602624e-07, "loss": 0.8431, "step": 9305 }, { "epoch": 4.895318253550763, "grad_norm": 2.29259991645813, "learning_rate": 4.136871488101504e-07, "loss": 0.7606, "step": 9306 }, { "epoch": 4.895844292477643, "grad_norm": 2.4578490257263184, "learning_rate": 4.133044850473059e-07, "loss": 0.7278, "step": 9307 }, { "epoch": 4.896370331404524, "grad_norm": 2.432403087615967, "learning_rate": 4.12921982401274e-07, "loss": 0.7574, "step": 9308 }, { "epoch": 4.896896370331405, "grad_norm": 2.4182636737823486, "learning_rate": 4.1253964090158915e-07, "loss": 0.7494, "step": 9309 }, { "epoch": 4.897422409258285, "grad_norm": 2.5722644329071045, "learning_rate": 4.1215746057777203e-07, "loss": 0.7495, "step": 9310 }, { "epoch": 4.897948448185166, "grad_norm": 2.4439220428466797, "learning_rate": 4.1177544145933197e-07, "loss": 0.7632, "step": 9311 }, { "epoch": 4.898474487112046, "grad_norm": 2.382387638092041, "learning_rate": 4.113935835757643e-07, "loss": 0.7564, "step": 9312 }, { "epoch": 4.899000526038927, "grad_norm": 2.5706825256347656, "learning_rate": 4.110118869565538e-07, "loss": 0.7865, "step": 9313 }, { "epoch": 4.899526564965807, "grad_norm": 2.6208064556121826, "learning_rate": 4.106303516311705e-07, "loss": 0.7869, "step": 9314 }, { "epoch": 4.900052603892688, "grad_norm": 5.448323726654053, "learning_rate": 4.102489776290741e-07, "loss": 0.7659, "step": 9315 }, { "epoch": 4.900578642819569, "grad_norm": 2.339062452316284, "learning_rate": 4.098677649797111e-07, "loss": 0.7497, "step": 9316 }, { "epoch": 4.90110468174645, "grad_norm": 2.462444305419922, "learning_rate": 4.0948671371251526e-07, "loss": 0.7255, "step": 9317 }, { "epoch": 4.90163072067333, "grad_norm": 2.5429837703704834, "learning_rate": 4.0910582385690843e-07, "loss": 0.7567, "step": 9318 }, { "epoch": 4.90215675960021, "grad_norm": 2.2956697940826416, "learning_rate": 4.087250954423e-07, "loss": 0.7536, "step": 9319 }, { "epoch": 4.902682798527091, "grad_norm": 2.538759469985962, "learning_rate": 4.0834452849808526e-07, "loss": 0.7703, "step": 9320 }, { "epoch": 4.903208837453971, "grad_norm": 2.4166481494903564, "learning_rate": 4.0796412305364936e-07, "loss": 0.7853, "step": 9321 }, { "epoch": 4.903734876380852, "grad_norm": 2.9078540802001953, "learning_rate": 4.075838791383635e-07, "loss": 0.7979, "step": 9322 }, { "epoch": 4.904260915307733, "grad_norm": 2.5208466053009033, "learning_rate": 4.0720379678158726e-07, "loss": 0.7978, "step": 9323 }, { "epoch": 4.904786954234614, "grad_norm": 2.378113269805908, "learning_rate": 4.0682387601266777e-07, "loss": 0.794, "step": 9324 }, { "epoch": 4.905312993161494, "grad_norm": 2.3400466442108154, "learning_rate": 4.0644411686093814e-07, "loss": 0.7654, "step": 9325 }, { "epoch": 4.905839032088375, "grad_norm": 2.520535707473755, "learning_rate": 4.060645193557214e-07, "loss": 0.8091, "step": 9326 }, { "epoch": 4.906365071015255, "grad_norm": 2.478520631790161, "learning_rate": 4.0568508352632513e-07, "loss": 0.7934, "step": 9327 }, { "epoch": 4.906891109942135, "grad_norm": 2.478001832962036, "learning_rate": 4.053058094020473e-07, "loss": 0.7311, "step": 9328 }, { "epoch": 4.907417148869016, "grad_norm": 2.5915958881378174, "learning_rate": 4.0492669701217207e-07, "loss": 0.7942, "step": 9329 }, { "epoch": 4.9079431877958966, "grad_norm": 2.4638400077819824, "learning_rate": 4.0454774638597124e-07, "loss": 0.7852, "step": 9330 }, { "epoch": 4.908469226722778, "grad_norm": 2.664144277572632, "learning_rate": 4.041689575527041e-07, "loss": 0.7552, "step": 9331 }, { "epoch": 4.908995265649658, "grad_norm": 2.7929770946502686, "learning_rate": 4.037903305416174e-07, "loss": 0.7782, "step": 9332 }, { "epoch": 4.909521304576539, "grad_norm": 2.5346126556396484, "learning_rate": 4.0341186538194635e-07, "loss": 0.7753, "step": 9333 }, { "epoch": 4.910047343503419, "grad_norm": 2.5054843425750732, "learning_rate": 4.0303356210291136e-07, "loss": 0.7771, "step": 9334 }, { "epoch": 4.9105733824303, "grad_norm": 2.4527764320373535, "learning_rate": 4.0265542073372243e-07, "loss": 0.7812, "step": 9335 }, { "epoch": 4.91109942135718, "grad_norm": 2.59757137298584, "learning_rate": 4.0227744130357625e-07, "loss": 0.7552, "step": 9336 }, { "epoch": 4.9116254602840606, "grad_norm": 2.4486398696899414, "learning_rate": 4.018996238416584e-07, "loss": 0.8125, "step": 9337 }, { "epoch": 4.912151499210942, "grad_norm": 2.271002769470215, "learning_rate": 4.015219683771387e-07, "loss": 0.7846, "step": 9338 }, { "epoch": 4.912677538137822, "grad_norm": 2.3605008125305176, "learning_rate": 4.0114447493917767e-07, "loss": 0.7267, "step": 9339 }, { "epoch": 4.913203577064703, "grad_norm": 2.3874058723449707, "learning_rate": 4.0076714355692244e-07, "loss": 0.7433, "step": 9340 }, { "epoch": 4.913729615991583, "grad_norm": 2.6003143787384033, "learning_rate": 4.00389974259506e-07, "loss": 0.7987, "step": 9341 }, { "epoch": 4.914255654918464, "grad_norm": 2.273441791534424, "learning_rate": 4.0001296707605134e-07, "loss": 0.7762, "step": 9342 }, { "epoch": 4.914781693845344, "grad_norm": 2.4579968452453613, "learning_rate": 3.9963612203566703e-07, "loss": 0.8022, "step": 9343 }, { "epoch": 4.915307732772225, "grad_norm": 2.3970906734466553, "learning_rate": 3.9925943916745007e-07, "loss": 0.8105, "step": 9344 }, { "epoch": 4.915833771699106, "grad_norm": 2.436716079711914, "learning_rate": 3.98882918500485e-07, "loss": 0.8181, "step": 9345 }, { "epoch": 4.916359810625987, "grad_norm": 2.4702186584472656, "learning_rate": 3.9850656006384326e-07, "loss": 0.7926, "step": 9346 }, { "epoch": 4.916885849552867, "grad_norm": 2.2959063053131104, "learning_rate": 3.9813036388658465e-07, "loss": 0.7613, "step": 9347 }, { "epoch": 4.917411888479748, "grad_norm": 2.3248696327209473, "learning_rate": 3.977543299977543e-07, "loss": 0.7691, "step": 9348 }, { "epoch": 4.917937927406628, "grad_norm": 2.3514392375946045, "learning_rate": 3.973784584263876e-07, "loss": 0.7692, "step": 9349 }, { "epoch": 4.918463966333508, "grad_norm": 2.3273279666900635, "learning_rate": 3.970027492015063e-07, "loss": 0.7603, "step": 9350 }, { "epoch": 4.918990005260389, "grad_norm": 2.388913631439209, "learning_rate": 3.966272023521187e-07, "loss": 0.7446, "step": 9351 }, { "epoch": 4.91951604418727, "grad_norm": 2.572000503540039, "learning_rate": 3.962518179072211e-07, "loss": 0.7918, "step": 9352 }, { "epoch": 4.920042083114151, "grad_norm": 2.2377891540527344, "learning_rate": 3.958765958957983e-07, "loss": 0.7611, "step": 9353 }, { "epoch": 4.920568122041031, "grad_norm": 2.4356701374053955, "learning_rate": 3.95501536346822e-07, "loss": 0.7777, "step": 9354 }, { "epoch": 4.921094160967912, "grad_norm": 2.360860824584961, "learning_rate": 3.9512663928924974e-07, "loss": 0.8114, "step": 9355 }, { "epoch": 4.921620199894792, "grad_norm": 2.3512167930603027, "learning_rate": 3.9475190475202876e-07, "loss": 0.807, "step": 9356 }, { "epoch": 4.922146238821673, "grad_norm": 2.4573378562927246, "learning_rate": 3.943773327640929e-07, "loss": 0.769, "step": 9357 }, { "epoch": 4.922672277748553, "grad_norm": 2.398411273956299, "learning_rate": 3.94002923354363e-07, "loss": 0.7901, "step": 9358 }, { "epoch": 4.923198316675434, "grad_norm": 2.5111634731292725, "learning_rate": 3.936286765517483e-07, "loss": 0.8365, "step": 9359 }, { "epoch": 4.923724355602315, "grad_norm": 2.3981733322143555, "learning_rate": 3.9325459238514497e-07, "loss": 0.7934, "step": 9360 }, { "epoch": 4.924250394529195, "grad_norm": 2.4074034690856934, "learning_rate": 3.9288067088343594e-07, "loss": 0.8041, "step": 9361 }, { "epoch": 4.924776433456076, "grad_norm": 2.3357996940612793, "learning_rate": 3.925069120754929e-07, "loss": 0.771, "step": 9362 }, { "epoch": 4.925302472382956, "grad_norm": 2.539121150970459, "learning_rate": 3.921333159901736e-07, "loss": 0.7478, "step": 9363 }, { "epoch": 4.925828511309837, "grad_norm": 2.4090969562530518, "learning_rate": 3.9175988265632433e-07, "loss": 0.7814, "step": 9364 }, { "epoch": 4.926354550236717, "grad_norm": 2.452394723892212, "learning_rate": 3.9138661210277845e-07, "loss": 0.7205, "step": 9365 }, { "epoch": 4.9268805891635985, "grad_norm": 2.388089179992676, "learning_rate": 3.9101350435835657e-07, "loss": 0.7701, "step": 9366 }, { "epoch": 4.927406628090479, "grad_norm": 2.4334757328033447, "learning_rate": 3.9064055945186696e-07, "loss": 0.8302, "step": 9367 }, { "epoch": 4.927932667017359, "grad_norm": 2.8784406185150146, "learning_rate": 3.902677774121055e-07, "loss": 0.7148, "step": 9368 }, { "epoch": 4.92845870594424, "grad_norm": 2.2391486167907715, "learning_rate": 3.898951582678556e-07, "loss": 0.784, "step": 9369 }, { "epoch": 4.92898474487112, "grad_norm": 2.3770525455474854, "learning_rate": 3.895227020478867e-07, "loss": 0.7251, "step": 9370 }, { "epoch": 4.929510783798001, "grad_norm": 2.5448226928710938, "learning_rate": 3.891504087809569e-07, "loss": 0.8276, "step": 9371 }, { "epoch": 4.930036822724881, "grad_norm": 2.484090805053711, "learning_rate": 3.887782784958119e-07, "loss": 0.7361, "step": 9372 }, { "epoch": 4.9305628616517625, "grad_norm": 2.631150960922241, "learning_rate": 3.88406311221185e-07, "loss": 0.7705, "step": 9373 }, { "epoch": 4.931088900578643, "grad_norm": 2.4263927936553955, "learning_rate": 3.8803450698579463e-07, "loss": 0.7615, "step": 9374 }, { "epoch": 4.931614939505524, "grad_norm": 2.510218620300293, "learning_rate": 3.8766286581834985e-07, "loss": 0.8153, "step": 9375 }, { "epoch": 4.932140978432404, "grad_norm": 2.4305148124694824, "learning_rate": 3.8729138774754535e-07, "loss": 0.8192, "step": 9376 }, { "epoch": 4.932667017359284, "grad_norm": 2.5441055297851562, "learning_rate": 3.8692007280206277e-07, "loss": 0.7903, "step": 9377 }, { "epoch": 4.933193056286165, "grad_norm": 2.4499871730804443, "learning_rate": 3.8654892101057243e-07, "loss": 0.8156, "step": 9378 }, { "epoch": 4.933719095213045, "grad_norm": 2.4541895389556885, "learning_rate": 3.861779324017312e-07, "loss": 0.8253, "step": 9379 }, { "epoch": 4.9342451341399265, "grad_norm": 2.337996482849121, "learning_rate": 3.858071070041841e-07, "loss": 0.6979, "step": 9380 }, { "epoch": 4.934771173066807, "grad_norm": 2.5104360580444336, "learning_rate": 3.854364448465628e-07, "loss": 0.7431, "step": 9381 }, { "epoch": 4.935297211993688, "grad_norm": 2.4720773696899414, "learning_rate": 3.850659459574868e-07, "loss": 0.8351, "step": 9382 }, { "epoch": 4.935823250920568, "grad_norm": 2.401127576828003, "learning_rate": 3.846956103655636e-07, "loss": 0.7581, "step": 9383 }, { "epoch": 4.936349289847449, "grad_norm": 2.4426040649414062, "learning_rate": 3.8432543809938547e-07, "loss": 0.8175, "step": 9384 }, { "epoch": 4.936875328774329, "grad_norm": 2.4676010608673096, "learning_rate": 3.839554291875355e-07, "loss": 0.7729, "step": 9385 }, { "epoch": 4.937401367701209, "grad_norm": 2.3991336822509766, "learning_rate": 3.835855836585825e-07, "loss": 0.7709, "step": 9386 }, { "epoch": 4.9379274066280905, "grad_norm": 2.4937000274658203, "learning_rate": 3.832159015410822e-07, "loss": 0.7695, "step": 9387 }, { "epoch": 4.938453445554972, "grad_norm": 2.4284138679504395, "learning_rate": 3.828463828635781e-07, "loss": 0.8024, "step": 9388 }, { "epoch": 4.938979484481852, "grad_norm": 2.3610103130340576, "learning_rate": 3.824770276546022e-07, "loss": 0.7851, "step": 9389 }, { "epoch": 4.939505523408732, "grad_norm": 2.3146140575408936, "learning_rate": 3.8210783594267277e-07, "loss": 0.808, "step": 9390 }, { "epoch": 4.940031562335613, "grad_norm": 2.223250389099121, "learning_rate": 3.8173880775629474e-07, "loss": 0.7429, "step": 9391 }, { "epoch": 4.940557601262493, "grad_norm": 2.3968570232391357, "learning_rate": 3.8136994312396204e-07, "loss": 0.7295, "step": 9392 }, { "epoch": 4.941083640189374, "grad_norm": 2.389993667602539, "learning_rate": 3.810012420741552e-07, "loss": 0.8163, "step": 9393 }, { "epoch": 4.9416096791162545, "grad_norm": 2.2780709266662598, "learning_rate": 3.8063270463534195e-07, "loss": 0.8022, "step": 9394 }, { "epoch": 4.942135718043136, "grad_norm": 2.3378043174743652, "learning_rate": 3.80264330835978e-07, "loss": 0.7555, "step": 9395 }, { "epoch": 4.942661756970016, "grad_norm": 2.5123565196990967, "learning_rate": 3.798961207045057e-07, "loss": 0.7689, "step": 9396 }, { "epoch": 4.943187795896897, "grad_norm": 2.3595917224884033, "learning_rate": 3.795280742693555e-07, "loss": 0.7737, "step": 9397 }, { "epoch": 4.943713834823777, "grad_norm": 2.496974468231201, "learning_rate": 3.7916019155894436e-07, "loss": 0.7506, "step": 9398 }, { "epoch": 4.944239873750657, "grad_norm": 2.466698408126831, "learning_rate": 3.787924726016773e-07, "loss": 0.7653, "step": 9399 }, { "epoch": 4.944765912677538, "grad_norm": 2.4985995292663574, "learning_rate": 3.7842491742594605e-07, "loss": 0.7719, "step": 9400 }, { "epoch": 4.9452919516044185, "grad_norm": 2.8591156005859375, "learning_rate": 3.780575260601302e-07, "loss": 0.8555, "step": 9401 }, { "epoch": 4.9458179905313, "grad_norm": 2.5626413822174072, "learning_rate": 3.776902985325967e-07, "loss": 0.7635, "step": 9402 }, { "epoch": 4.94634402945818, "grad_norm": 2.5069539546966553, "learning_rate": 3.7732323487169984e-07, "loss": 0.8373, "step": 9403 }, { "epoch": 4.946870068385061, "grad_norm": 2.454974889755249, "learning_rate": 3.769563351057817e-07, "loss": 0.777, "step": 9404 }, { "epoch": 4.947396107311941, "grad_norm": 2.361227035522461, "learning_rate": 3.765895992631699e-07, "loss": 0.7494, "step": 9405 }, { "epoch": 4.947922146238822, "grad_norm": 2.6270229816436768, "learning_rate": 3.762230273721809e-07, "loss": 0.821, "step": 9406 }, { "epoch": 4.948448185165702, "grad_norm": 2.3561112880706787, "learning_rate": 3.7585661946111907e-07, "loss": 0.7174, "step": 9407 }, { "epoch": 4.9489742240925825, "grad_norm": 2.2574148178100586, "learning_rate": 3.754903755582745e-07, "loss": 0.7408, "step": 9408 }, { "epoch": 4.9495002630194636, "grad_norm": 2.3554129600524902, "learning_rate": 3.751242956919257e-07, "loss": 0.7681, "step": 9409 }, { "epoch": 4.950026301946344, "grad_norm": 2.4434423446655273, "learning_rate": 3.7475837989033925e-07, "loss": 0.7523, "step": 9410 }, { "epoch": 4.950552340873225, "grad_norm": 2.313767671585083, "learning_rate": 3.7439262818176613e-07, "loss": 0.764, "step": 9411 }, { "epoch": 4.951078379800105, "grad_norm": 2.2370870113372803, "learning_rate": 3.7402704059444823e-07, "loss": 0.7637, "step": 9412 }, { "epoch": 4.951604418726986, "grad_norm": 2.4841485023498535, "learning_rate": 3.736616171566118e-07, "loss": 0.7751, "step": 9413 }, { "epoch": 4.952130457653866, "grad_norm": 2.4448978900909424, "learning_rate": 3.732963578964721e-07, "loss": 0.757, "step": 9414 }, { "epoch": 4.952656496580747, "grad_norm": 2.2700560092926025, "learning_rate": 3.729312628422319e-07, "loss": 0.7686, "step": 9415 }, { "epoch": 4.9531825355076275, "grad_norm": 2.4372005462646484, "learning_rate": 3.725663320220801e-07, "loss": 0.8202, "step": 9416 }, { "epoch": 4.953708574434508, "grad_norm": 2.3108925819396973, "learning_rate": 3.722015654641936e-07, "loss": 0.7381, "step": 9417 }, { "epoch": 4.954234613361389, "grad_norm": 2.435683250427246, "learning_rate": 3.7183696319673724e-07, "loss": 0.7598, "step": 9418 }, { "epoch": 4.954760652288269, "grad_norm": 2.429086446762085, "learning_rate": 3.714725252478621e-07, "loss": 0.7979, "step": 9419 }, { "epoch": 4.95528669121515, "grad_norm": 2.354238271713257, "learning_rate": 3.711082516457065e-07, "loss": 0.802, "step": 9420 }, { "epoch": 4.95581273014203, "grad_norm": 2.488157272338867, "learning_rate": 3.707441424183972e-07, "loss": 0.7709, "step": 9421 }, { "epoch": 4.956338769068911, "grad_norm": 2.4466710090637207, "learning_rate": 3.7038019759404743e-07, "loss": 0.7532, "step": 9422 }, { "epoch": 4.9568648079957915, "grad_norm": 2.32387113571167, "learning_rate": 3.7001641720075756e-07, "loss": 0.7757, "step": 9423 }, { "epoch": 4.957390846922673, "grad_norm": 2.5182251930236816, "learning_rate": 3.696528012666156e-07, "loss": 0.7446, "step": 9424 }, { "epoch": 4.957916885849553, "grad_norm": 2.3517367839813232, "learning_rate": 3.6928934981969725e-07, "loss": 0.7295, "step": 9425 }, { "epoch": 4.958442924776433, "grad_norm": 2.561237096786499, "learning_rate": 3.689260628880653e-07, "loss": 0.8011, "step": 9426 }, { "epoch": 4.958968963703314, "grad_norm": 2.5310091972351074, "learning_rate": 3.68562940499769e-07, "loss": 0.7814, "step": 9427 }, { "epoch": 4.959495002630194, "grad_norm": 2.2422006130218506, "learning_rate": 3.6819998268284584e-07, "loss": 0.7671, "step": 9428 }, { "epoch": 4.960021041557075, "grad_norm": 2.5260696411132812, "learning_rate": 3.678371894653204e-07, "loss": 0.8051, "step": 9429 }, { "epoch": 4.9605470804839555, "grad_norm": 2.5069432258605957, "learning_rate": 3.6747456087520475e-07, "loss": 0.7736, "step": 9430 }, { "epoch": 4.961073119410837, "grad_norm": 2.4721035957336426, "learning_rate": 3.671120969404973e-07, "loss": 0.787, "step": 9431 }, { "epoch": 4.961599158337717, "grad_norm": 2.380103588104248, "learning_rate": 3.667497976891851e-07, "loss": 0.764, "step": 9432 }, { "epoch": 4.962125197264598, "grad_norm": 2.3973026275634766, "learning_rate": 3.663876631492419e-07, "loss": 0.7696, "step": 9433 }, { "epoch": 4.962651236191478, "grad_norm": 2.4748077392578125, "learning_rate": 3.6602569334862753e-07, "loss": 0.7862, "step": 9434 }, { "epoch": 4.963177275118358, "grad_norm": 2.3291592597961426, "learning_rate": 3.656638883152919e-07, "loss": 0.7526, "step": 9435 }, { "epoch": 4.963703314045239, "grad_norm": 2.648793935775757, "learning_rate": 3.653022480771687e-07, "loss": 0.7877, "step": 9436 }, { "epoch": 4.96422935297212, "grad_norm": 2.391446113586426, "learning_rate": 3.649407726621815e-07, "loss": 0.7022, "step": 9437 }, { "epoch": 4.964755391899001, "grad_norm": 2.344348669052124, "learning_rate": 3.645794620982407e-07, "loss": 0.7799, "step": 9438 }, { "epoch": 4.965281430825881, "grad_norm": 3.321922540664673, "learning_rate": 3.642183164132429e-07, "loss": 0.7788, "step": 9439 }, { "epoch": 4.965807469752762, "grad_norm": 2.363175630569458, "learning_rate": 3.6385733563507386e-07, "loss": 0.7711, "step": 9440 }, { "epoch": 4.966333508679642, "grad_norm": 2.457504987716675, "learning_rate": 3.6349651979160403e-07, "loss": 0.8101, "step": 9441 }, { "epoch": 4.966859547606523, "grad_norm": 2.6258490085601807, "learning_rate": 3.631358689106934e-07, "loss": 0.7788, "step": 9442 }, { "epoch": 4.967385586533403, "grad_norm": 2.3089306354522705, "learning_rate": 3.62775383020188e-07, "loss": 0.8377, "step": 9443 }, { "epoch": 4.967911625460284, "grad_norm": 2.4204094409942627, "learning_rate": 3.624150621479214e-07, "loss": 0.7905, "step": 9444 }, { "epoch": 4.968437664387165, "grad_norm": 2.3787431716918945, "learning_rate": 3.620549063217149e-07, "loss": 0.7661, "step": 9445 }, { "epoch": 4.968963703314046, "grad_norm": 2.357114315032959, "learning_rate": 3.616949155693769e-07, "loss": 0.7506, "step": 9446 }, { "epoch": 4.969489742240926, "grad_norm": 2.441854476928711, "learning_rate": 3.613350899187018e-07, "loss": 0.7406, "step": 9447 }, { "epoch": 4.970015781167806, "grad_norm": 2.3805274963378906, "learning_rate": 3.609754293974732e-07, "loss": 0.7364, "step": 9448 }, { "epoch": 4.970541820094687, "grad_norm": 2.594456911087036, "learning_rate": 3.6061593403346027e-07, "loss": 0.8454, "step": 9449 }, { "epoch": 4.971067859021567, "grad_norm": 2.3527157306671143, "learning_rate": 3.602566038544206e-07, "loss": 0.7699, "step": 9450 }, { "epoch": 4.971593897948448, "grad_norm": 2.240264654159546, "learning_rate": 3.5989743888809854e-07, "loss": 0.7622, "step": 9451 }, { "epoch": 4.972119936875329, "grad_norm": 2.395141839981079, "learning_rate": 3.5953843916222587e-07, "loss": 0.7941, "step": 9452 }, { "epoch": 4.97264597580221, "grad_norm": 2.4191207885742188, "learning_rate": 3.5917960470452093e-07, "loss": 0.7911, "step": 9453 }, { "epoch": 4.97317201472909, "grad_norm": 2.349496603012085, "learning_rate": 3.588209355426911e-07, "loss": 0.7355, "step": 9454 }, { "epoch": 4.973698053655971, "grad_norm": 2.340040445327759, "learning_rate": 3.5846243170442857e-07, "loss": 0.7915, "step": 9455 }, { "epoch": 4.974224092582851, "grad_norm": 2.3741636276245117, "learning_rate": 3.58104093217414e-07, "loss": 0.7415, "step": 9456 }, { "epoch": 4.974750131509731, "grad_norm": 2.4093282222747803, "learning_rate": 3.577459201093156e-07, "loss": 0.7573, "step": 9457 }, { "epoch": 4.975276170436612, "grad_norm": 2.550605058670044, "learning_rate": 3.5738791240778834e-07, "loss": 0.7915, "step": 9458 }, { "epoch": 4.975802209363493, "grad_norm": 2.4412760734558105, "learning_rate": 3.5703007014047523e-07, "loss": 0.8143, "step": 9459 }, { "epoch": 4.976328248290374, "grad_norm": 2.562148332595825, "learning_rate": 3.566723933350047e-07, "loss": 0.789, "step": 9460 }, { "epoch": 4.976854287217254, "grad_norm": 2.4253089427948, "learning_rate": 3.5631488201899405e-07, "loss": 0.7878, "step": 9461 }, { "epoch": 4.977380326144135, "grad_norm": 2.4196841716766357, "learning_rate": 3.559575362200468e-07, "loss": 0.7929, "step": 9462 }, { "epoch": 4.977906365071015, "grad_norm": 2.4674015045166016, "learning_rate": 3.5560035596575446e-07, "loss": 0.795, "step": 9463 }, { "epoch": 4.978432403997896, "grad_norm": 2.450052261352539, "learning_rate": 3.5524334128369557e-07, "loss": 0.7531, "step": 9464 }, { "epoch": 4.978958442924776, "grad_norm": 2.289897918701172, "learning_rate": 3.5488649220143556e-07, "loss": 0.7544, "step": 9465 }, { "epoch": 4.979484481851657, "grad_norm": 2.4173295497894287, "learning_rate": 3.545298087465274e-07, "loss": 0.7449, "step": 9466 }, { "epoch": 4.980010520778538, "grad_norm": 2.8976476192474365, "learning_rate": 3.541732909465109e-07, "loss": 0.7914, "step": 9467 }, { "epoch": 4.980536559705418, "grad_norm": 2.6497788429260254, "learning_rate": 3.5381693882891336e-07, "loss": 0.7993, "step": 9468 }, { "epoch": 4.981062598632299, "grad_norm": 2.5016427040100098, "learning_rate": 3.534607524212502e-07, "loss": 0.8011, "step": 9469 }, { "epoch": 4.981588637559179, "grad_norm": 2.3287365436553955, "learning_rate": 3.5310473175102167e-07, "loss": 0.7658, "step": 9470 }, { "epoch": 4.98211467648606, "grad_norm": 2.474313259124756, "learning_rate": 3.527488768457174e-07, "loss": 0.7707, "step": 9471 }, { "epoch": 4.98264071541294, "grad_norm": 2.332996368408203, "learning_rate": 3.523931877328138e-07, "loss": 0.781, "step": 9472 }, { "epoch": 4.9831667543398215, "grad_norm": 2.3925702571868896, "learning_rate": 3.52037664439773e-07, "loss": 0.7674, "step": 9473 }, { "epoch": 4.983692793266702, "grad_norm": 2.1863832473754883, "learning_rate": 3.5168230699404614e-07, "loss": 0.7608, "step": 9474 }, { "epoch": 4.984218832193582, "grad_norm": 2.4635770320892334, "learning_rate": 3.51327115423071e-07, "loss": 0.7552, "step": 9475 }, { "epoch": 4.984744871120463, "grad_norm": 2.5472867488861084, "learning_rate": 3.5097208975427287e-07, "loss": 0.7855, "step": 9476 }, { "epoch": 4.985270910047343, "grad_norm": 2.312760829925537, "learning_rate": 3.506172300150626e-07, "loss": 0.7414, "step": 9477 }, { "epoch": 4.985796948974224, "grad_norm": 2.3059911727905273, "learning_rate": 3.502625362328402e-07, "loss": 0.7843, "step": 9478 }, { "epoch": 4.986322987901104, "grad_norm": 2.43567156791687, "learning_rate": 3.4990800843499177e-07, "loss": 0.7613, "step": 9479 }, { "epoch": 4.9868490268279855, "grad_norm": 3.286155939102173, "learning_rate": 3.495536466488916e-07, "loss": 0.7385, "step": 9480 }, { "epoch": 4.987375065754866, "grad_norm": 2.3563225269317627, "learning_rate": 3.4919945090189976e-07, "loss": 0.7902, "step": 9481 }, { "epoch": 4.987901104681747, "grad_norm": 2.420759439468384, "learning_rate": 3.488454212213649e-07, "loss": 0.7771, "step": 9482 }, { "epoch": 4.988427143608627, "grad_norm": 2.471403121948242, "learning_rate": 3.484915576346215e-07, "loss": 0.7593, "step": 9483 }, { "epoch": 4.988953182535508, "grad_norm": 2.367290496826172, "learning_rate": 3.4813786016899246e-07, "loss": 0.7939, "step": 9484 }, { "epoch": 4.989479221462388, "grad_norm": 2.536642551422119, "learning_rate": 3.4778432885178653e-07, "loss": 0.7575, "step": 9485 }, { "epoch": 4.990005260389269, "grad_norm": 2.7516207695007324, "learning_rate": 3.4743096371030074e-07, "loss": 0.816, "step": 9486 }, { "epoch": 4.9905312993161495, "grad_norm": 2.382490396499634, "learning_rate": 3.47077764771819e-07, "loss": 0.7647, "step": 9487 }, { "epoch": 4.99105733824303, "grad_norm": 2.5199124813079834, "learning_rate": 3.4672473206361216e-07, "loss": 0.7556, "step": 9488 }, { "epoch": 4.991583377169911, "grad_norm": 2.6370177268981934, "learning_rate": 3.463718656129389e-07, "loss": 0.7505, "step": 9489 }, { "epoch": 4.992109416096791, "grad_norm": 2.497655153274536, "learning_rate": 3.460191654470446e-07, "loss": 0.7493, "step": 9490 }, { "epoch": 4.992635455023672, "grad_norm": 2.510464668273926, "learning_rate": 3.456666315931606e-07, "loss": 0.7389, "step": 9491 }, { "epoch": 4.993161493950552, "grad_norm": 2.4530465602874756, "learning_rate": 3.453142640785076e-07, "loss": 0.7843, "step": 9492 }, { "epoch": 4.993687532877433, "grad_norm": 2.2798893451690674, "learning_rate": 3.449620629302919e-07, "loss": 0.7605, "step": 9493 }, { "epoch": 4.9942135718043135, "grad_norm": 2.2680747509002686, "learning_rate": 3.4461002817570784e-07, "loss": 0.7481, "step": 9494 }, { "epoch": 4.9947396107311945, "grad_norm": 2.5619122982025146, "learning_rate": 3.44258159841937e-07, "loss": 0.7384, "step": 9495 }, { "epoch": 4.995265649658075, "grad_norm": 2.409496784210205, "learning_rate": 3.439064579561463e-07, "loss": 0.7824, "step": 9496 }, { "epoch": 4.995791688584955, "grad_norm": 2.4711921215057373, "learning_rate": 3.435549225454926e-07, "loss": 0.7973, "step": 9497 }, { "epoch": 4.996317727511836, "grad_norm": 2.423917293548584, "learning_rate": 3.4320355363711725e-07, "loss": 0.7224, "step": 9498 }, { "epoch": 4.996843766438716, "grad_norm": 2.2706961631774902, "learning_rate": 3.4285235125815027e-07, "loss": 0.7587, "step": 9499 }, { "epoch": 4.997369805365597, "grad_norm": 2.434476137161255, "learning_rate": 3.4250131543570873e-07, "loss": 0.7099, "step": 9500 }, { "epoch": 4.9978958442924775, "grad_norm": 2.2927086353302, "learning_rate": 3.4215044619689663e-07, "loss": 0.7903, "step": 9501 }, { "epoch": 4.9984218832193585, "grad_norm": 2.2611589431762695, "learning_rate": 3.4179974356880504e-07, "loss": 0.7868, "step": 9502 }, { "epoch": 4.998947922146239, "grad_norm": 2.3409926891326904, "learning_rate": 3.4144920757851224e-07, "loss": 0.8007, "step": 9503 }, { "epoch": 4.99947396107312, "grad_norm": 2.3330955505371094, "learning_rate": 3.410988382530844e-07, "loss": 0.7656, "step": 9504 }, { "epoch": 5.0, "grad_norm": 2.5103468894958496, "learning_rate": 3.4074863561957247e-07, "loss": 0.7488, "step": 9505 } ], "logging_steps": 1, "max_steps": 11406, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1901, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.400934729353265e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }