{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 3802, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005260389268805891, "grad_norm": 4.7434234619140625, "learning_rate": 5.0000000000000004e-08, "loss": 1.7896, "step": 1 }, { "epoch": 0.0010520778537611783, "grad_norm": 4.893940448760986, "learning_rate": 1.0000000000000001e-07, "loss": 1.8423, "step": 2 }, { "epoch": 0.0015781167806417674, "grad_norm": 5.008203029632568, "learning_rate": 1.5000000000000002e-07, "loss": 1.7775, "step": 3 }, { "epoch": 0.0021041557075223566, "grad_norm": 4.682094097137451, "learning_rate": 2.0000000000000002e-07, "loss": 1.7132, "step": 4 }, { "epoch": 0.0026301946344029457, "grad_norm": 5.076476097106934, "learning_rate": 2.5000000000000004e-07, "loss": 1.7946, "step": 5 }, { "epoch": 0.003156233561283535, "grad_norm": 5.164911270141602, "learning_rate": 3.0000000000000004e-07, "loss": 1.7562, "step": 6 }, { "epoch": 0.003682272488164124, "grad_norm": 5.532482624053955, "learning_rate": 3.5000000000000004e-07, "loss": 1.9173, "step": 7 }, { "epoch": 0.004208311415044713, "grad_norm": 4.994466304779053, "learning_rate": 4.0000000000000003e-07, "loss": 1.8048, "step": 8 }, { "epoch": 0.004734350341925302, "grad_norm": 4.728099822998047, "learning_rate": 4.5000000000000003e-07, "loss": 1.8313, "step": 9 }, { "epoch": 0.0052603892688058915, "grad_norm": 4.757445335388184, "learning_rate": 5.000000000000001e-07, "loss": 1.7745, "step": 10 }, { "epoch": 0.005786428195686481, "grad_norm": 4.926065444946289, "learning_rate": 5.5e-07, "loss": 1.8448, "step": 11 }, { "epoch": 0.00631246712256707, "grad_norm": 4.987133979797363, "learning_rate": 6.000000000000001e-07, "loss": 1.7755, "step": 12 }, { "epoch": 0.006838506049447659, "grad_norm": 4.783141613006592, "learning_rate": 6.5e-07, "loss": 1.7815, "step": 13 }, { "epoch": 0.007364544976328248, "grad_norm": 4.668217182159424, "learning_rate": 7.000000000000001e-07, "loss": 1.754, "step": 14 }, { "epoch": 0.007890583903208837, "grad_norm": 4.673665523529053, "learning_rate": 7.5e-07, "loss": 1.7608, "step": 15 }, { "epoch": 0.008416622830089426, "grad_norm": 4.452486991882324, "learning_rate": 8.000000000000001e-07, "loss": 1.7222, "step": 16 }, { "epoch": 0.008942661756970016, "grad_norm": 4.257665157318115, "learning_rate": 8.500000000000001e-07, "loss": 1.7556, "step": 17 }, { "epoch": 0.009468700683850605, "grad_norm": 4.1270432472229, "learning_rate": 9.000000000000001e-07, "loss": 1.7121, "step": 18 }, { "epoch": 0.009994739610731194, "grad_norm": 4.321215629577637, "learning_rate": 9.500000000000001e-07, "loss": 1.7584, "step": 19 }, { "epoch": 0.010520778537611783, "grad_norm": 3.8703970909118652, "learning_rate": 1.0000000000000002e-06, "loss": 1.6611, "step": 20 }, { "epoch": 0.011046817464492372, "grad_norm": 4.07947301864624, "learning_rate": 1.0500000000000001e-06, "loss": 1.7914, "step": 21 }, { "epoch": 0.011572856391372961, "grad_norm": 3.9068686962127686, "learning_rate": 1.1e-06, "loss": 1.7848, "step": 22 }, { "epoch": 0.01209889531825355, "grad_norm": 3.7697386741638184, "learning_rate": 1.1500000000000002e-06, "loss": 1.6694, "step": 23 }, { "epoch": 0.01262493424513414, "grad_norm": 3.795276641845703, "learning_rate": 1.2000000000000002e-06, "loss": 1.759, "step": 24 }, { "epoch": 0.013150973172014729, "grad_norm": 3.331472396850586, "learning_rate": 1.25e-06, "loss": 1.7053, "step": 25 }, { "epoch": 0.013677012098895318, "grad_norm": 3.381592035293579, "learning_rate": 1.3e-06, "loss": 1.683, "step": 26 }, { "epoch": 0.014203051025775907, "grad_norm": 3.2494184970855713, "learning_rate": 1.3500000000000002e-06, "loss": 1.5756, "step": 27 }, { "epoch": 0.014729089952656496, "grad_norm": 3.124213695526123, "learning_rate": 1.4000000000000001e-06, "loss": 1.7102, "step": 28 }, { "epoch": 0.015255128879537085, "grad_norm": 2.9148762226104736, "learning_rate": 1.45e-06, "loss": 1.6007, "step": 29 }, { "epoch": 0.015781167806417674, "grad_norm": 2.886734962463379, "learning_rate": 1.5e-06, "loss": 1.7086, "step": 30 }, { "epoch": 0.016307206733298264, "grad_norm": 2.6898605823516846, "learning_rate": 1.5500000000000002e-06, "loss": 1.5655, "step": 31 }, { "epoch": 0.016833245660178853, "grad_norm": 2.6458981037139893, "learning_rate": 1.6000000000000001e-06, "loss": 1.4881, "step": 32 }, { "epoch": 0.017359284587059442, "grad_norm": 2.481387138366699, "learning_rate": 1.6500000000000003e-06, "loss": 1.5608, "step": 33 }, { "epoch": 0.01788532351394003, "grad_norm": 2.743023633956909, "learning_rate": 1.7000000000000002e-06, "loss": 1.5705, "step": 34 }, { "epoch": 0.01841136244082062, "grad_norm": 2.7273406982421875, "learning_rate": 1.75e-06, "loss": 1.5819, "step": 35 }, { "epoch": 0.01893740136770121, "grad_norm": 2.7253308296203613, "learning_rate": 1.8000000000000001e-06, "loss": 1.5201, "step": 36 }, { "epoch": 0.0194634402945818, "grad_norm": 2.8794732093811035, "learning_rate": 1.85e-06, "loss": 1.4743, "step": 37 }, { "epoch": 0.019989479221462388, "grad_norm": 2.767172336578369, "learning_rate": 1.9000000000000002e-06, "loss": 1.5366, "step": 38 }, { "epoch": 0.020515518148342977, "grad_norm": 2.84169864654541, "learning_rate": 1.9500000000000004e-06, "loss": 1.5635, "step": 39 }, { "epoch": 0.021041557075223566, "grad_norm": 2.6982147693634033, "learning_rate": 2.0000000000000003e-06, "loss": 1.49, "step": 40 }, { "epoch": 0.021567596002104155, "grad_norm": 2.597731590270996, "learning_rate": 2.05e-06, "loss": 1.5189, "step": 41 }, { "epoch": 0.022093634928984744, "grad_norm": 2.4286556243896484, "learning_rate": 2.1000000000000002e-06, "loss": 1.4439, "step": 42 }, { "epoch": 0.022619673855865333, "grad_norm": 2.6267499923706055, "learning_rate": 2.15e-06, "loss": 1.3522, "step": 43 }, { "epoch": 0.023145712782745922, "grad_norm": 2.2576816082000732, "learning_rate": 2.2e-06, "loss": 1.4713, "step": 44 }, { "epoch": 0.02367175170962651, "grad_norm": 2.406381368637085, "learning_rate": 2.25e-06, "loss": 1.47, "step": 45 }, { "epoch": 0.0241977906365071, "grad_norm": 2.2341415882110596, "learning_rate": 2.3000000000000004e-06, "loss": 1.4041, "step": 46 }, { "epoch": 0.02472382956338769, "grad_norm": 2.5055644512176514, "learning_rate": 2.35e-06, "loss": 1.4321, "step": 47 }, { "epoch": 0.02524986849026828, "grad_norm": 2.2131927013397217, "learning_rate": 2.4000000000000003e-06, "loss": 1.3631, "step": 48 }, { "epoch": 0.025775907417148868, "grad_norm": 2.3399457931518555, "learning_rate": 2.4500000000000003e-06, "loss": 1.4055, "step": 49 }, { "epoch": 0.026301946344029457, "grad_norm": 2.2194554805755615, "learning_rate": 2.5e-06, "loss": 1.3722, "step": 50 }, { "epoch": 0.026827985270910047, "grad_norm": 2.196530342102051, "learning_rate": 2.55e-06, "loss": 1.4126, "step": 51 }, { "epoch": 0.027354024197790636, "grad_norm": 2.401376485824585, "learning_rate": 2.6e-06, "loss": 1.4174, "step": 52 }, { "epoch": 0.027880063124671225, "grad_norm": 2.2509777545928955, "learning_rate": 2.6500000000000005e-06, "loss": 1.3725, "step": 53 }, { "epoch": 0.028406102051551814, "grad_norm": 2.2538340091705322, "learning_rate": 2.7000000000000004e-06, "loss": 1.4274, "step": 54 }, { "epoch": 0.028932140978432403, "grad_norm": 2.218494176864624, "learning_rate": 2.7500000000000004e-06, "loss": 1.4518, "step": 55 }, { "epoch": 0.029458179905312992, "grad_norm": 2.06544828414917, "learning_rate": 2.8000000000000003e-06, "loss": 1.3547, "step": 56 }, { "epoch": 0.02998421883219358, "grad_norm": 2.014075994491577, "learning_rate": 2.85e-06, "loss": 1.2274, "step": 57 }, { "epoch": 0.03051025775907417, "grad_norm": 2.187418222427368, "learning_rate": 2.9e-06, "loss": 1.3663, "step": 58 }, { "epoch": 0.03103629668595476, "grad_norm": 1.993913173675537, "learning_rate": 2.95e-06, "loss": 1.3357, "step": 59 }, { "epoch": 0.03156233561283535, "grad_norm": 2.1067426204681396, "learning_rate": 3e-06, "loss": 1.3627, "step": 60 }, { "epoch": 0.03208837453971594, "grad_norm": 2.0144565105438232, "learning_rate": 3.05e-06, "loss": 1.394, "step": 61 }, { "epoch": 0.03261441346659653, "grad_norm": 2.2240288257598877, "learning_rate": 3.1000000000000004e-06, "loss": 1.3657, "step": 62 }, { "epoch": 0.03314045239347712, "grad_norm": 2.0080718994140625, "learning_rate": 3.1500000000000003e-06, "loss": 1.2954, "step": 63 }, { "epoch": 0.033666491320357705, "grad_norm": 2.1592211723327637, "learning_rate": 3.2000000000000003e-06, "loss": 1.363, "step": 64 }, { "epoch": 0.0341925302472383, "grad_norm": 2.1390435695648193, "learning_rate": 3.2500000000000002e-06, "loss": 1.3329, "step": 65 }, { "epoch": 0.034718569174118884, "grad_norm": 2.309795379638672, "learning_rate": 3.3000000000000006e-06, "loss": 1.3378, "step": 66 }, { "epoch": 0.035244608100999476, "grad_norm": 2.0283970832824707, "learning_rate": 3.3500000000000005e-06, "loss": 1.2707, "step": 67 }, { "epoch": 0.03577064702788006, "grad_norm": 2.3350703716278076, "learning_rate": 3.4000000000000005e-06, "loss": 1.3149, "step": 68 }, { "epoch": 0.036296685954760655, "grad_norm": 2.1374268531799316, "learning_rate": 3.45e-06, "loss": 1.3181, "step": 69 }, { "epoch": 0.03682272488164124, "grad_norm": 2.1340744495391846, "learning_rate": 3.5e-06, "loss": 1.2968, "step": 70 }, { "epoch": 0.03734876380852183, "grad_norm": 2.212939500808716, "learning_rate": 3.5500000000000003e-06, "loss": 1.3285, "step": 71 }, { "epoch": 0.03787480273540242, "grad_norm": 2.0891077518463135, "learning_rate": 3.6000000000000003e-06, "loss": 1.3142, "step": 72 }, { "epoch": 0.03840084166228301, "grad_norm": 2.0146496295928955, "learning_rate": 3.65e-06, "loss": 1.2932, "step": 73 }, { "epoch": 0.0389268805891636, "grad_norm": 2.2315266132354736, "learning_rate": 3.7e-06, "loss": 1.3515, "step": 74 }, { "epoch": 0.03945291951604419, "grad_norm": 2.0311717987060547, "learning_rate": 3.7500000000000005e-06, "loss": 1.2601, "step": 75 }, { "epoch": 0.039978958442924775, "grad_norm": 1.9522899389266968, "learning_rate": 3.8000000000000005e-06, "loss": 1.3521, "step": 76 }, { "epoch": 0.04050499736980537, "grad_norm": 2.0501742362976074, "learning_rate": 3.85e-06, "loss": 1.3243, "step": 77 }, { "epoch": 0.041031036296685953, "grad_norm": 2.136033535003662, "learning_rate": 3.900000000000001e-06, "loss": 1.3373, "step": 78 }, { "epoch": 0.041557075223566546, "grad_norm": 2.328866958618164, "learning_rate": 3.95e-06, "loss": 1.2864, "step": 79 }, { "epoch": 0.04208311415044713, "grad_norm": 2.0889344215393066, "learning_rate": 4.000000000000001e-06, "loss": 1.2692, "step": 80 }, { "epoch": 0.042609153077327724, "grad_norm": 2.088667631149292, "learning_rate": 4.05e-06, "loss": 1.2232, "step": 81 }, { "epoch": 0.04313519200420831, "grad_norm": 2.0293898582458496, "learning_rate": 4.1e-06, "loss": 1.2505, "step": 82 }, { "epoch": 0.0436612309310889, "grad_norm": 2.240025281906128, "learning_rate": 4.15e-06, "loss": 1.3107, "step": 83 }, { "epoch": 0.04418726985796949, "grad_norm": 2.123445987701416, "learning_rate": 4.2000000000000004e-06, "loss": 1.1674, "step": 84 }, { "epoch": 0.04471330878485008, "grad_norm": 2.1865620613098145, "learning_rate": 4.25e-06, "loss": 1.3257, "step": 85 }, { "epoch": 0.04523934771173067, "grad_norm": 2.1336405277252197, "learning_rate": 4.3e-06, "loss": 1.2968, "step": 86 }, { "epoch": 0.04576538663861126, "grad_norm": 2.117763042449951, "learning_rate": 4.350000000000001e-06, "loss": 1.2294, "step": 87 }, { "epoch": 0.046291425565491845, "grad_norm": 1.9969348907470703, "learning_rate": 4.4e-06, "loss": 1.2621, "step": 88 }, { "epoch": 0.04681746449237244, "grad_norm": 2.24861741065979, "learning_rate": 4.450000000000001e-06, "loss": 1.2909, "step": 89 }, { "epoch": 0.04734350341925302, "grad_norm": 2.08335542678833, "learning_rate": 4.5e-06, "loss": 1.2691, "step": 90 }, { "epoch": 0.047869542346133616, "grad_norm": 2.1306045055389404, "learning_rate": 4.5500000000000005e-06, "loss": 1.3248, "step": 91 }, { "epoch": 0.0483955812730142, "grad_norm": 2.2251298427581787, "learning_rate": 4.600000000000001e-06, "loss": 1.2391, "step": 92 }, { "epoch": 0.048921620199894794, "grad_norm": 2.1604959964752197, "learning_rate": 4.65e-06, "loss": 1.2169, "step": 93 }, { "epoch": 0.04944765912677538, "grad_norm": 2.0155038833618164, "learning_rate": 4.7e-06, "loss": 1.2533, "step": 94 }, { "epoch": 0.04997369805365597, "grad_norm": 1.9579726457595825, "learning_rate": 4.75e-06, "loss": 1.2228, "step": 95 }, { "epoch": 0.05049973698053656, "grad_norm": 2.129992961883545, "learning_rate": 4.800000000000001e-06, "loss": 1.2573, "step": 96 }, { "epoch": 0.05102577590741715, "grad_norm": 2.0832459926605225, "learning_rate": 4.85e-06, "loss": 1.241, "step": 97 }, { "epoch": 0.051551814834297736, "grad_norm": 2.278550148010254, "learning_rate": 4.9000000000000005e-06, "loss": 1.2565, "step": 98 }, { "epoch": 0.05207785376117833, "grad_norm": 2.0997259616851807, "learning_rate": 4.95e-06, "loss": 1.2445, "step": 99 }, { "epoch": 0.052603892688058915, "grad_norm": 2.127976417541504, "learning_rate": 5e-06, "loss": 1.2605, "step": 100 }, { "epoch": 0.05312993161493951, "grad_norm": 2.1200127601623535, "learning_rate": 4.9999999034856715e-06, "loss": 1.3057, "step": 101 }, { "epoch": 0.05365597054182009, "grad_norm": 2.456881046295166, "learning_rate": 4.999999613942694e-06, "loss": 1.2741, "step": 102 }, { "epoch": 0.054182009468700686, "grad_norm": 2.189507484436035, "learning_rate": 4.9999991313710884e-06, "loss": 1.2399, "step": 103 }, { "epoch": 0.05470804839558127, "grad_norm": 2.258619785308838, "learning_rate": 4.9999984557708936e-06, "loss": 1.2161, "step": 104 }, { "epoch": 0.055234087322461864, "grad_norm": 1.983225703239441, "learning_rate": 4.999997587142161e-06, "loss": 1.2027, "step": 105 }, { "epoch": 0.05576012624934245, "grad_norm": 2.1400973796844482, "learning_rate": 4.999996525484957e-06, "loss": 1.2685, "step": 106 }, { "epoch": 0.05628616517622304, "grad_norm": 1.9494950771331787, "learning_rate": 4.999995270799365e-06, "loss": 1.2604, "step": 107 }, { "epoch": 0.05681220410310363, "grad_norm": 2.1203386783599854, "learning_rate": 4.9999938230854814e-06, "loss": 1.2345, "step": 108 }, { "epoch": 0.05733824302998422, "grad_norm": 2.131884813308716, "learning_rate": 4.999992182343417e-06, "loss": 1.2097, "step": 109 }, { "epoch": 0.057864281956864806, "grad_norm": 2.136289119720459, "learning_rate": 4.9999903485732996e-06, "loss": 1.2617, "step": 110 }, { "epoch": 0.0583903208837454, "grad_norm": 2.025071144104004, "learning_rate": 4.9999883217752705e-06, "loss": 1.2004, "step": 111 }, { "epoch": 0.058916359810625984, "grad_norm": 2.513960838317871, "learning_rate": 4.999986101949486e-06, "loss": 1.2399, "step": 112 }, { "epoch": 0.05944239873750658, "grad_norm": 2.2483277320861816, "learning_rate": 4.999983689096117e-06, "loss": 1.2265, "step": 113 }, { "epoch": 0.05996843766438716, "grad_norm": 2.0863187313079834, "learning_rate": 4.999981083215352e-06, "loss": 1.1969, "step": 114 }, { "epoch": 0.060494476591267755, "grad_norm": 2.1240596771240234, "learning_rate": 4.99997828430739e-06, "loss": 1.275, "step": 115 }, { "epoch": 0.06102051551814834, "grad_norm": 2.3810060024261475, "learning_rate": 4.9999752923724465e-06, "loss": 1.3054, "step": 116 }, { "epoch": 0.061546554445028934, "grad_norm": 2.1266205310821533, "learning_rate": 4.999972107410754e-06, "loss": 1.1933, "step": 117 }, { "epoch": 0.06207259337190952, "grad_norm": 2.039619207382202, "learning_rate": 4.999968729422559e-06, "loss": 1.1886, "step": 118 }, { "epoch": 0.0625986322987901, "grad_norm": 2.024503707885742, "learning_rate": 4.999965158408122e-06, "loss": 1.2008, "step": 119 }, { "epoch": 0.0631246712256707, "grad_norm": 2.058926582336426, "learning_rate": 4.999961394367717e-06, "loss": 1.1772, "step": 120 }, { "epoch": 0.06365071015255129, "grad_norm": 1.989399790763855, "learning_rate": 4.999957437301637e-06, "loss": 1.1869, "step": 121 }, { "epoch": 0.06417674907943188, "grad_norm": 2.0462567806243896, "learning_rate": 4.999953287210185e-06, "loss": 1.1944, "step": 122 }, { "epoch": 0.06470278800631246, "grad_norm": 2.258549213409424, "learning_rate": 4.999948944093683e-06, "loss": 1.2304, "step": 123 }, { "epoch": 0.06522882693319305, "grad_norm": 2.115344285964966, "learning_rate": 4.999944407952467e-06, "loss": 1.1901, "step": 124 }, { "epoch": 0.06575486586007365, "grad_norm": 2.082406997680664, "learning_rate": 4.999939678786886e-06, "loss": 1.2481, "step": 125 }, { "epoch": 0.06628090478695424, "grad_norm": 2.5095906257629395, "learning_rate": 4.999934756597305e-06, "loss": 1.2526, "step": 126 }, { "epoch": 0.06680694371383482, "grad_norm": 1.989524483680725, "learning_rate": 4.999929641384105e-06, "loss": 1.2298, "step": 127 }, { "epoch": 0.06733298264071541, "grad_norm": 2.3429722785949707, "learning_rate": 4.999924333147681e-06, "loss": 1.2511, "step": 128 }, { "epoch": 0.067859021567596, "grad_norm": 2.064497232437134, "learning_rate": 4.999918831888441e-06, "loss": 1.2041, "step": 129 }, { "epoch": 0.0683850604944766, "grad_norm": 2.099992513656616, "learning_rate": 4.999913137606813e-06, "loss": 1.2256, "step": 130 }, { "epoch": 0.06891109942135717, "grad_norm": 2.188778877258301, "learning_rate": 4.999907250303234e-06, "loss": 1.2009, "step": 131 }, { "epoch": 0.06943713834823777, "grad_norm": 2.154895067214966, "learning_rate": 4.999901169978158e-06, "loss": 1.273, "step": 132 }, { "epoch": 0.06996317727511836, "grad_norm": 2.457084894180298, "learning_rate": 4.999894896632058e-06, "loss": 1.2003, "step": 133 }, { "epoch": 0.07048921620199895, "grad_norm": 2.0455472469329834, "learning_rate": 4.999888430265415e-06, "loss": 1.1909, "step": 134 }, { "epoch": 0.07101525512887953, "grad_norm": 2.3690097332000732, "learning_rate": 4.99988177087873e-06, "loss": 1.2414, "step": 135 }, { "epoch": 0.07154129405576012, "grad_norm": 2.0194432735443115, "learning_rate": 4.999874918472516e-06, "loss": 1.2072, "step": 136 }, { "epoch": 0.07206733298264072, "grad_norm": 2.0639989376068115, "learning_rate": 4.999867873047303e-06, "loss": 1.1853, "step": 137 }, { "epoch": 0.07259337190952131, "grad_norm": 2.1263129711151123, "learning_rate": 4.999860634603635e-06, "loss": 1.1915, "step": 138 }, { "epoch": 0.07311941083640189, "grad_norm": 1.9768770933151245, "learning_rate": 4.99985320314207e-06, "loss": 1.1623, "step": 139 }, { "epoch": 0.07364544976328248, "grad_norm": 2.4466986656188965, "learning_rate": 4.9998455786631835e-06, "loss": 1.2549, "step": 140 }, { "epoch": 0.07417148869016307, "grad_norm": 2.482954263687134, "learning_rate": 4.999837761167563e-06, "loss": 1.1503, "step": 141 }, { "epoch": 0.07469752761704367, "grad_norm": 2.1949164867401123, "learning_rate": 4.9998297506558116e-06, "loss": 1.2515, "step": 142 }, { "epoch": 0.07522356654392424, "grad_norm": 2.3435401916503906, "learning_rate": 4.9998215471285486e-06, "loss": 1.2231, "step": 143 }, { "epoch": 0.07574960547080484, "grad_norm": 2.2442994117736816, "learning_rate": 4.9998131505864064e-06, "loss": 1.2472, "step": 144 }, { "epoch": 0.07627564439768543, "grad_norm": 2.4117157459259033, "learning_rate": 4.999804561030036e-06, "loss": 1.2303, "step": 145 }, { "epoch": 0.07680168332456602, "grad_norm": 2.263303279876709, "learning_rate": 4.999795778460097e-06, "loss": 1.2435, "step": 146 }, { "epoch": 0.0773277222514466, "grad_norm": 2.174962282180786, "learning_rate": 4.99978680287727e-06, "loss": 1.2074, "step": 147 }, { "epoch": 0.0778537611783272, "grad_norm": 2.1498875617980957, "learning_rate": 4.999777634282248e-06, "loss": 1.1665, "step": 148 }, { "epoch": 0.07837980010520779, "grad_norm": 2.0245747566223145, "learning_rate": 4.999768272675737e-06, "loss": 1.169, "step": 149 }, { "epoch": 0.07890583903208838, "grad_norm": 2.03243350982666, "learning_rate": 4.999758718058462e-06, "loss": 1.2113, "step": 150 }, { "epoch": 0.07943187795896896, "grad_norm": 2.104052782058716, "learning_rate": 4.9997489704311586e-06, "loss": 1.1792, "step": 151 }, { "epoch": 0.07995791688584955, "grad_norm": 2.16056752204895, "learning_rate": 4.999739029794581e-06, "loss": 1.2183, "step": 152 }, { "epoch": 0.08048395581273014, "grad_norm": 2.1418581008911133, "learning_rate": 4.9997288961494975e-06, "loss": 1.2024, "step": 153 }, { "epoch": 0.08100999473961074, "grad_norm": 2.235917329788208, "learning_rate": 4.999718569496688e-06, "loss": 1.2234, "step": 154 }, { "epoch": 0.08153603366649131, "grad_norm": 2.0039474964141846, "learning_rate": 4.999708049836952e-06, "loss": 1.1164, "step": 155 }, { "epoch": 0.08206207259337191, "grad_norm": 2.0888242721557617, "learning_rate": 4.9996973371710995e-06, "loss": 1.1935, "step": 156 }, { "epoch": 0.0825881115202525, "grad_norm": 2.245558500289917, "learning_rate": 4.999686431499961e-06, "loss": 1.1438, "step": 157 }, { "epoch": 0.08311415044713309, "grad_norm": 2.351905345916748, "learning_rate": 4.999675332824376e-06, "loss": 1.2208, "step": 158 }, { "epoch": 0.08364018937401367, "grad_norm": 2.0418808460235596, "learning_rate": 4.999664041145201e-06, "loss": 1.1537, "step": 159 }, { "epoch": 0.08416622830089426, "grad_norm": 2.194399118423462, "learning_rate": 4.99965255646331e-06, "loss": 1.1602, "step": 160 }, { "epoch": 0.08469226722777486, "grad_norm": 2.4853098392486572, "learning_rate": 4.999640878779588e-06, "loss": 1.1981, "step": 161 }, { "epoch": 0.08521830615465545, "grad_norm": 2.1702558994293213, "learning_rate": 4.9996290080949386e-06, "loss": 1.1682, "step": 162 }, { "epoch": 0.08574434508153603, "grad_norm": 2.150707960128784, "learning_rate": 4.999616944410276e-06, "loss": 1.2123, "step": 163 }, { "epoch": 0.08627038400841662, "grad_norm": 2.166897773742676, "learning_rate": 4.9996046877265325e-06, "loss": 1.1855, "step": 164 }, { "epoch": 0.08679642293529721, "grad_norm": 2.1538188457489014, "learning_rate": 4.999592238044655e-06, "loss": 1.1797, "step": 165 }, { "epoch": 0.0873224618621778, "grad_norm": 2.222170114517212, "learning_rate": 4.999579595365604e-06, "loss": 1.1606, "step": 166 }, { "epoch": 0.08784850078905838, "grad_norm": 2.264437437057495, "learning_rate": 4.999566759690356e-06, "loss": 1.1662, "step": 167 }, { "epoch": 0.08837453971593898, "grad_norm": 2.2306337356567383, "learning_rate": 4.999553731019903e-06, "loss": 1.1933, "step": 168 }, { "epoch": 0.08890057864281957, "grad_norm": 2.2025609016418457, "learning_rate": 4.9995405093552495e-06, "loss": 1.2241, "step": 169 }, { "epoch": 0.08942661756970016, "grad_norm": 2.3908772468566895, "learning_rate": 4.999527094697418e-06, "loss": 1.1954, "step": 170 }, { "epoch": 0.08995265649658074, "grad_norm": 2.1161653995513916, "learning_rate": 4.999513487047442e-06, "loss": 1.2315, "step": 171 }, { "epoch": 0.09047869542346133, "grad_norm": 2.0984017848968506, "learning_rate": 4.9994996864063735e-06, "loss": 1.2413, "step": 172 }, { "epoch": 0.09100473435034193, "grad_norm": 2.205087900161743, "learning_rate": 4.999485692775279e-06, "loss": 1.2267, "step": 173 }, { "epoch": 0.09153077327722252, "grad_norm": 2.224553108215332, "learning_rate": 4.9994715061552365e-06, "loss": 1.1613, "step": 174 }, { "epoch": 0.0920568122041031, "grad_norm": 2.191676139831543, "learning_rate": 4.999457126547344e-06, "loss": 1.168, "step": 175 }, { "epoch": 0.09258285113098369, "grad_norm": 2.2432751655578613, "learning_rate": 4.99944255395271e-06, "loss": 1.218, "step": 176 }, { "epoch": 0.09310889005786428, "grad_norm": 2.1327083110809326, "learning_rate": 4.999427788372461e-06, "loss": 1.1994, "step": 177 }, { "epoch": 0.09363492898474488, "grad_norm": 2.146256923675537, "learning_rate": 4.999412829807735e-06, "loss": 1.1387, "step": 178 }, { "epoch": 0.09416096791162545, "grad_norm": 2.377356767654419, "learning_rate": 4.999397678259689e-06, "loss": 1.1901, "step": 179 }, { "epoch": 0.09468700683850605, "grad_norm": 2.192535638809204, "learning_rate": 4.999382333729492e-06, "loss": 1.2079, "step": 180 }, { "epoch": 0.09521304576538664, "grad_norm": 2.0958621501922607, "learning_rate": 4.999366796218329e-06, "loss": 1.1663, "step": 181 }, { "epoch": 0.09573908469226723, "grad_norm": 2.1492772102355957, "learning_rate": 4.9993510657274e-06, "loss": 1.1877, "step": 182 }, { "epoch": 0.09626512361914781, "grad_norm": 2.366111993789673, "learning_rate": 4.999335142257919e-06, "loss": 1.1849, "step": 183 }, { "epoch": 0.0967911625460284, "grad_norm": 2.144526243209839, "learning_rate": 4.999319025811116e-06, "loss": 1.1739, "step": 184 }, { "epoch": 0.097317201472909, "grad_norm": 2.3407647609710693, "learning_rate": 4.999302716388234e-06, "loss": 1.1987, "step": 185 }, { "epoch": 0.09784324039978959, "grad_norm": 2.3771328926086426, "learning_rate": 4.999286213990534e-06, "loss": 1.2024, "step": 186 }, { "epoch": 0.09836927932667017, "grad_norm": 2.2484753131866455, "learning_rate": 4.99926951861929e-06, "loss": 1.2087, "step": 187 }, { "epoch": 0.09889531825355076, "grad_norm": 2.276099681854248, "learning_rate": 4.99925263027579e-06, "loss": 1.1696, "step": 188 }, { "epoch": 0.09942135718043135, "grad_norm": 2.1576876640319824, "learning_rate": 4.999235548961338e-06, "loss": 1.1404, "step": 189 }, { "epoch": 0.09994739610731194, "grad_norm": 2.1412558555603027, "learning_rate": 4.999218274677254e-06, "loss": 1.1279, "step": 190 }, { "epoch": 0.10047343503419252, "grad_norm": 2.1507153511047363, "learning_rate": 4.999200807424871e-06, "loss": 1.1841, "step": 191 }, { "epoch": 0.10099947396107312, "grad_norm": 2.236116886138916, "learning_rate": 4.999183147205538e-06, "loss": 1.208, "step": 192 }, { "epoch": 0.10152551288795371, "grad_norm": 2.1643691062927246, "learning_rate": 4.9991652940206185e-06, "loss": 1.1325, "step": 193 }, { "epoch": 0.1020515518148343, "grad_norm": 2.11639142036438, "learning_rate": 4.999147247871491e-06, "loss": 1.2073, "step": 194 }, { "epoch": 0.10257759074171488, "grad_norm": 1.9682193994522095, "learning_rate": 4.9991290087595475e-06, "loss": 1.1447, "step": 195 }, { "epoch": 0.10310362966859547, "grad_norm": 1.9927830696105957, "learning_rate": 4.9991105766861996e-06, "loss": 1.1694, "step": 196 }, { "epoch": 0.10362966859547607, "grad_norm": 2.0124592781066895, "learning_rate": 4.999091951652867e-06, "loss": 1.152, "step": 197 }, { "epoch": 0.10415570752235666, "grad_norm": 2.1793248653411865, "learning_rate": 4.99907313366099e-06, "loss": 1.228, "step": 198 }, { "epoch": 0.10468174644923724, "grad_norm": 2.1615028381347656, "learning_rate": 4.99905412271202e-06, "loss": 1.2106, "step": 199 }, { "epoch": 0.10520778537611783, "grad_norm": 1.9827650785446167, "learning_rate": 4.999034918807425e-06, "loss": 1.1829, "step": 200 }, { "epoch": 0.10573382430299842, "grad_norm": 2.1772680282592773, "learning_rate": 4.999015521948689e-06, "loss": 1.13, "step": 201 }, { "epoch": 0.10625986322987901, "grad_norm": 2.257385492324829, "learning_rate": 4.99899593213731e-06, "loss": 1.2144, "step": 202 }, { "epoch": 0.1067859021567596, "grad_norm": 2.104809045791626, "learning_rate": 4.998976149374799e-06, "loss": 1.1715, "step": 203 }, { "epoch": 0.10731194108364019, "grad_norm": 2.116504430770874, "learning_rate": 4.998956173662683e-06, "loss": 1.1442, "step": 204 }, { "epoch": 0.10783798001052078, "grad_norm": 2.2018845081329346, "learning_rate": 4.998936005002507e-06, "loss": 1.1327, "step": 205 }, { "epoch": 0.10836401893740137, "grad_norm": 2.2733311653137207, "learning_rate": 4.998915643395826e-06, "loss": 1.1821, "step": 206 }, { "epoch": 0.10889005786428195, "grad_norm": 2.0005805492401123, "learning_rate": 4.998895088844212e-06, "loss": 1.0955, "step": 207 }, { "epoch": 0.10941609679116254, "grad_norm": 2.0851638317108154, "learning_rate": 4.998874341349253e-06, "loss": 1.1851, "step": 208 }, { "epoch": 0.10994213571804314, "grad_norm": 2.032989501953125, "learning_rate": 4.998853400912552e-06, "loss": 1.1069, "step": 209 }, { "epoch": 0.11046817464492373, "grad_norm": 2.295994520187378, "learning_rate": 4.9988322675357235e-06, "loss": 1.1511, "step": 210 }, { "epoch": 0.1109942135718043, "grad_norm": 1.9963881969451904, "learning_rate": 4.9988109412204015e-06, "loss": 1.1497, "step": 211 }, { "epoch": 0.1115202524986849, "grad_norm": 2.6223835945129395, "learning_rate": 4.998789421968231e-06, "loss": 1.1692, "step": 212 }, { "epoch": 0.11204629142556549, "grad_norm": 2.1924188137054443, "learning_rate": 4.998767709780873e-06, "loss": 1.1659, "step": 213 }, { "epoch": 0.11257233035244608, "grad_norm": 2.4124836921691895, "learning_rate": 4.998745804660005e-06, "loss": 1.1965, "step": 214 }, { "epoch": 0.11309836927932668, "grad_norm": 2.15348482131958, "learning_rate": 4.99872370660732e-06, "loss": 1.1337, "step": 215 }, { "epoch": 0.11362440820620726, "grad_norm": 2.3462562561035156, "learning_rate": 4.9987014156245215e-06, "loss": 1.1793, "step": 216 }, { "epoch": 0.11415044713308785, "grad_norm": 2.1864969730377197, "learning_rate": 4.998678931713331e-06, "loss": 1.1139, "step": 217 }, { "epoch": 0.11467648605996844, "grad_norm": 2.1411378383636475, "learning_rate": 4.998656254875486e-06, "loss": 1.1582, "step": 218 }, { "epoch": 0.11520252498684903, "grad_norm": 2.2826247215270996, "learning_rate": 4.998633385112737e-06, "loss": 1.1779, "step": 219 }, { "epoch": 0.11572856391372961, "grad_norm": 2.0697169303894043, "learning_rate": 4.998610322426848e-06, "loss": 1.1775, "step": 220 }, { "epoch": 0.1162546028406102, "grad_norm": 2.153381824493408, "learning_rate": 4.998587066819602e-06, "loss": 1.2244, "step": 221 }, { "epoch": 0.1167806417674908, "grad_norm": 2.151595115661621, "learning_rate": 4.998563618292793e-06, "loss": 1.1562, "step": 222 }, { "epoch": 0.11730668069437139, "grad_norm": 2.1102607250213623, "learning_rate": 4.998539976848233e-06, "loss": 1.1326, "step": 223 }, { "epoch": 0.11783271962125197, "grad_norm": 2.3099205493927, "learning_rate": 4.998516142487746e-06, "loss": 1.1934, "step": 224 }, { "epoch": 0.11835875854813256, "grad_norm": 2.0830485820770264, "learning_rate": 4.998492115213173e-06, "loss": 1.105, "step": 225 }, { "epoch": 0.11888479747501315, "grad_norm": 1.965256929397583, "learning_rate": 4.998467895026369e-06, "loss": 1.1496, "step": 226 }, { "epoch": 0.11941083640189375, "grad_norm": 2.060734272003174, "learning_rate": 4.9984434819292036e-06, "loss": 1.1256, "step": 227 }, { "epoch": 0.11993687532877433, "grad_norm": 2.278106927871704, "learning_rate": 4.998418875923563e-06, "loss": 1.1557, "step": 228 }, { "epoch": 0.12046291425565492, "grad_norm": 2.562490463256836, "learning_rate": 4.998394077011346e-06, "loss": 1.1579, "step": 229 }, { "epoch": 0.12098895318253551, "grad_norm": 2.20798921585083, "learning_rate": 4.998369085194468e-06, "loss": 1.181, "step": 230 }, { "epoch": 0.1215149921094161, "grad_norm": 2.3529961109161377, "learning_rate": 4.998343900474858e-06, "loss": 1.1514, "step": 231 }, { "epoch": 0.12204103103629668, "grad_norm": 2.2413651943206787, "learning_rate": 4.998318522854461e-06, "loss": 1.1317, "step": 232 }, { "epoch": 0.12256706996317727, "grad_norm": 2.2179031372070312, "learning_rate": 4.998292952335236e-06, "loss": 1.1784, "step": 233 }, { "epoch": 0.12309310889005787, "grad_norm": 2.2591211795806885, "learning_rate": 4.998267188919158e-06, "loss": 1.1587, "step": 234 }, { "epoch": 0.12361914781693846, "grad_norm": 2.4820573329925537, "learning_rate": 4.998241232608216e-06, "loss": 1.1448, "step": 235 }, { "epoch": 0.12414518674381904, "grad_norm": 2.202066659927368, "learning_rate": 4.998215083404414e-06, "loss": 1.1859, "step": 236 }, { "epoch": 0.12467122567069963, "grad_norm": 2.246918201446533, "learning_rate": 4.9981887413097705e-06, "loss": 1.1778, "step": 237 }, { "epoch": 0.1251972645975802, "grad_norm": 2.166926145553589, "learning_rate": 4.9981622063263205e-06, "loss": 1.16, "step": 238 }, { "epoch": 0.12572330352446082, "grad_norm": 2.2850661277770996, "learning_rate": 4.998135478456112e-06, "loss": 1.1522, "step": 239 }, { "epoch": 0.1262493424513414, "grad_norm": 2.1694653034210205, "learning_rate": 4.9981085577012095e-06, "loss": 1.1394, "step": 240 }, { "epoch": 0.126775381378222, "grad_norm": 2.061791181564331, "learning_rate": 4.998081444063691e-06, "loss": 1.1551, "step": 241 }, { "epoch": 0.12730142030510258, "grad_norm": 2.1517114639282227, "learning_rate": 4.998054137545649e-06, "loss": 1.1487, "step": 242 }, { "epoch": 0.12782745923198316, "grad_norm": 2.118903398513794, "learning_rate": 4.9980266381491935e-06, "loss": 1.1871, "step": 243 }, { "epoch": 0.12835349815886377, "grad_norm": 2.271512508392334, "learning_rate": 4.997998945876448e-06, "loss": 1.21, "step": 244 }, { "epoch": 0.12887953708574434, "grad_norm": 2.199542760848999, "learning_rate": 4.997971060729549e-06, "loss": 1.17, "step": 245 }, { "epoch": 0.12940557601262492, "grad_norm": 2.213566303253174, "learning_rate": 4.997942982710651e-06, "loss": 1.1521, "step": 246 }, { "epoch": 0.12993161493950553, "grad_norm": 2.291456699371338, "learning_rate": 4.997914711821921e-06, "loss": 1.1671, "step": 247 }, { "epoch": 0.1304576538663861, "grad_norm": 2.017871856689453, "learning_rate": 4.997886248065542e-06, "loss": 1.1522, "step": 248 }, { "epoch": 0.13098369279326671, "grad_norm": 2.1125521659851074, "learning_rate": 4.9978575914437115e-06, "loss": 1.1335, "step": 249 }, { "epoch": 0.1315097317201473, "grad_norm": 2.262874126434326, "learning_rate": 4.997828741958643e-06, "loss": 1.1697, "step": 250 }, { "epoch": 0.13203577064702787, "grad_norm": 2.450192451477051, "learning_rate": 4.997799699612563e-06, "loss": 1.1329, "step": 251 }, { "epoch": 0.13256180957390848, "grad_norm": 2.0831351280212402, "learning_rate": 4.997770464407715e-06, "loss": 1.1711, "step": 252 }, { "epoch": 0.13308784850078906, "grad_norm": 2.2078895568847656, "learning_rate": 4.997741036346357e-06, "loss": 1.1998, "step": 253 }, { "epoch": 0.13361388742766964, "grad_norm": 2.175858497619629, "learning_rate": 4.997711415430759e-06, "loss": 1.1083, "step": 254 }, { "epoch": 0.13413992635455024, "grad_norm": 2.203817129135132, "learning_rate": 4.997681601663207e-06, "loss": 1.088, "step": 255 }, { "epoch": 0.13466596528143082, "grad_norm": 2.0065557956695557, "learning_rate": 4.997651595046007e-06, "loss": 1.1584, "step": 256 }, { "epoch": 0.13519200420831143, "grad_norm": 2.299633264541626, "learning_rate": 4.997621395581474e-06, "loss": 1.2102, "step": 257 }, { "epoch": 0.135718043135192, "grad_norm": 2.2972707748413086, "learning_rate": 4.997591003271938e-06, "loss": 1.1821, "step": 258 }, { "epoch": 0.13624408206207259, "grad_norm": 2.399705171585083, "learning_rate": 4.997560418119749e-06, "loss": 1.1325, "step": 259 }, { "epoch": 0.1367701209889532, "grad_norm": 2.2461678981781006, "learning_rate": 4.997529640127266e-06, "loss": 1.2361, "step": 260 }, { "epoch": 0.13729615991583377, "grad_norm": 2.236917495727539, "learning_rate": 4.997498669296865e-06, "loss": 1.1159, "step": 261 }, { "epoch": 0.13782219884271435, "grad_norm": 2.2851338386535645, "learning_rate": 4.99746750563094e-06, "loss": 1.1688, "step": 262 }, { "epoch": 0.13834823776959496, "grad_norm": 2.1499626636505127, "learning_rate": 4.997436149131894e-06, "loss": 1.1478, "step": 263 }, { "epoch": 0.13887427669647553, "grad_norm": 2.0969858169555664, "learning_rate": 4.997404599802151e-06, "loss": 1.1102, "step": 264 }, { "epoch": 0.13940031562335614, "grad_norm": 2.5635933876037598, "learning_rate": 4.997372857644146e-06, "loss": 1.1173, "step": 265 }, { "epoch": 0.13992635455023672, "grad_norm": 2.1076197624206543, "learning_rate": 4.997340922660329e-06, "loss": 1.1321, "step": 266 }, { "epoch": 0.1404523934771173, "grad_norm": 2.179189443588257, "learning_rate": 4.997308794853165e-06, "loss": 1.1325, "step": 267 }, { "epoch": 0.1409784324039979, "grad_norm": 2.0838067531585693, "learning_rate": 4.9972764742251375e-06, "loss": 1.1243, "step": 268 }, { "epoch": 0.14150447133087848, "grad_norm": 2.1462979316711426, "learning_rate": 4.9972439607787405e-06, "loss": 1.1251, "step": 269 }, { "epoch": 0.14203051025775906, "grad_norm": 2.144658088684082, "learning_rate": 4.997211254516484e-06, "loss": 1.1879, "step": 270 }, { "epoch": 0.14255654918463967, "grad_norm": 2.118098020553589, "learning_rate": 4.997178355440892e-06, "loss": 1.1635, "step": 271 }, { "epoch": 0.14308258811152025, "grad_norm": 2.284640312194824, "learning_rate": 4.99714526355451e-06, "loss": 1.1181, "step": 272 }, { "epoch": 0.14360862703840085, "grad_norm": 2.2020652294158936, "learning_rate": 4.997111978859886e-06, "loss": 1.1234, "step": 273 }, { "epoch": 0.14413466596528143, "grad_norm": 2.164998769760132, "learning_rate": 4.997078501359595e-06, "loss": 1.1723, "step": 274 }, { "epoch": 0.144660704892162, "grad_norm": 2.1917877197265625, "learning_rate": 4.9970448310562196e-06, "loss": 1.1222, "step": 275 }, { "epoch": 0.14518674381904262, "grad_norm": 2.314770221710205, "learning_rate": 4.99701096795236e-06, "loss": 1.183, "step": 276 }, { "epoch": 0.1457127827459232, "grad_norm": 2.217176675796509, "learning_rate": 4.996976912050632e-06, "loss": 1.1509, "step": 277 }, { "epoch": 0.14623882167280378, "grad_norm": 2.253232002258301, "learning_rate": 4.996942663353663e-06, "loss": 1.1733, "step": 278 }, { "epoch": 0.14676486059968438, "grad_norm": 2.091414213180542, "learning_rate": 4.996908221864099e-06, "loss": 1.1479, "step": 279 }, { "epoch": 0.14729089952656496, "grad_norm": 2.391035556793213, "learning_rate": 4.996873587584599e-06, "loss": 1.1646, "step": 280 }, { "epoch": 0.14781693845344557, "grad_norm": 1.941179871559143, "learning_rate": 4.996838760517836e-06, "loss": 1.1362, "step": 281 }, { "epoch": 0.14834297738032615, "grad_norm": 2.3869614601135254, "learning_rate": 4.9968037406665e-06, "loss": 1.1455, "step": 282 }, { "epoch": 0.14886901630720673, "grad_norm": 2.2253477573394775, "learning_rate": 4.9967685280332955e-06, "loss": 1.1934, "step": 283 }, { "epoch": 0.14939505523408733, "grad_norm": 2.235481023788452, "learning_rate": 4.99673312262094e-06, "loss": 1.1457, "step": 284 }, { "epoch": 0.1499210941609679, "grad_norm": 2.1756770610809326, "learning_rate": 4.996697524432169e-06, "loss": 1.1874, "step": 285 }, { "epoch": 0.1504471330878485, "grad_norm": 1.9890838861465454, "learning_rate": 4.99666173346973e-06, "loss": 1.1381, "step": 286 }, { "epoch": 0.1509731720147291, "grad_norm": 2.032940149307251, "learning_rate": 4.996625749736386e-06, "loss": 1.1408, "step": 287 }, { "epoch": 0.15149921094160967, "grad_norm": 2.38653564453125, "learning_rate": 4.996589573234915e-06, "loss": 1.1137, "step": 288 }, { "epoch": 0.15202524986849028, "grad_norm": 2.5009000301361084, "learning_rate": 4.9965532039681116e-06, "loss": 1.1404, "step": 289 }, { "epoch": 0.15255128879537086, "grad_norm": 2.113600969314575, "learning_rate": 4.996516641938784e-06, "loss": 1.0764, "step": 290 }, { "epoch": 0.15307732772225144, "grad_norm": 2.2645368576049805, "learning_rate": 4.996479887149754e-06, "loss": 1.1499, "step": 291 }, { "epoch": 0.15360336664913204, "grad_norm": 2.015124559402466, "learning_rate": 4.99644293960386e-06, "loss": 1.0487, "step": 292 }, { "epoch": 0.15412940557601262, "grad_norm": 2.121588706970215, "learning_rate": 4.996405799303955e-06, "loss": 1.1119, "step": 293 }, { "epoch": 0.1546554445028932, "grad_norm": 2.3707003593444824, "learning_rate": 4.996368466252907e-06, "loss": 1.1797, "step": 294 }, { "epoch": 0.1551814834297738, "grad_norm": 2.3027000427246094, "learning_rate": 4.996330940453598e-06, "loss": 1.1228, "step": 295 }, { "epoch": 0.1557075223566544, "grad_norm": 2.0909178256988525, "learning_rate": 4.996293221908925e-06, "loss": 1.0932, "step": 296 }, { "epoch": 0.156233561283535, "grad_norm": 2.362823486328125, "learning_rate": 4.996255310621801e-06, "loss": 1.1507, "step": 297 }, { "epoch": 0.15675960021041557, "grad_norm": 2.080667495727539, "learning_rate": 4.996217206595153e-06, "loss": 1.1158, "step": 298 }, { "epoch": 0.15728563913729615, "grad_norm": 2.0508742332458496, "learning_rate": 4.996178909831922e-06, "loss": 1.1326, "step": 299 }, { "epoch": 0.15781167806417676, "grad_norm": 2.1632707118988037, "learning_rate": 4.996140420335068e-06, "loss": 1.0946, "step": 300 }, { "epoch": 0.15833771699105734, "grad_norm": 1.9084789752960205, "learning_rate": 4.996101738107559e-06, "loss": 1.0939, "step": 301 }, { "epoch": 0.15886375591793792, "grad_norm": 1.9817906618118286, "learning_rate": 4.996062863152385e-06, "loss": 1.1013, "step": 302 }, { "epoch": 0.15938979484481852, "grad_norm": 1.9947365522384644, "learning_rate": 4.9960237954725446e-06, "loss": 1.0635, "step": 303 }, { "epoch": 0.1599158337716991, "grad_norm": 2.0908870697021484, "learning_rate": 4.995984535071056e-06, "loss": 1.0914, "step": 304 }, { "epoch": 0.1604418726985797, "grad_norm": 2.1920530796051025, "learning_rate": 4.995945081950952e-06, "loss": 1.1816, "step": 305 }, { "epoch": 0.16096791162546029, "grad_norm": 2.250007152557373, "learning_rate": 4.995905436115276e-06, "loss": 1.1543, "step": 306 }, { "epoch": 0.16149395055234086, "grad_norm": 2.3157906532287598, "learning_rate": 4.995865597567091e-06, "loss": 1.1349, "step": 307 }, { "epoch": 0.16201998947922147, "grad_norm": 2.816443681716919, "learning_rate": 4.995825566309471e-06, "loss": 1.1154, "step": 308 }, { "epoch": 0.16254602840610205, "grad_norm": 2.3194282054901123, "learning_rate": 4.995785342345509e-06, "loss": 1.1547, "step": 309 }, { "epoch": 0.16307206733298263, "grad_norm": 2.1249098777770996, "learning_rate": 4.99574492567831e-06, "loss": 1.0995, "step": 310 }, { "epoch": 0.16359810625986324, "grad_norm": 2.100315809249878, "learning_rate": 4.995704316310994e-06, "loss": 1.1662, "step": 311 }, { "epoch": 0.16412414518674381, "grad_norm": 2.1664323806762695, "learning_rate": 4.995663514246697e-06, "loss": 1.1466, "step": 312 }, { "epoch": 0.16465018411362442, "grad_norm": 2.217438220977783, "learning_rate": 4.9956225194885704e-06, "loss": 1.1908, "step": 313 }, { "epoch": 0.165176223040505, "grad_norm": 2.3328514099121094, "learning_rate": 4.995581332039778e-06, "loss": 1.0809, "step": 314 }, { "epoch": 0.16570226196738558, "grad_norm": 2.088467836380005, "learning_rate": 4.9955399519035e-06, "loss": 1.0908, "step": 315 }, { "epoch": 0.16622830089426618, "grad_norm": 2.2554612159729004, "learning_rate": 4.995498379082932e-06, "loss": 1.1702, "step": 316 }, { "epoch": 0.16675433982114676, "grad_norm": 2.2798142433166504, "learning_rate": 4.995456613581284e-06, "loss": 1.107, "step": 317 }, { "epoch": 0.16728037874802734, "grad_norm": 2.4394755363464355, "learning_rate": 4.9954146554017816e-06, "loss": 1.0881, "step": 318 }, { "epoch": 0.16780641767490795, "grad_norm": 2.1176295280456543, "learning_rate": 4.995372504547662e-06, "loss": 1.1177, "step": 319 }, { "epoch": 0.16833245660178853, "grad_norm": 2.141923189163208, "learning_rate": 4.995330161022181e-06, "loss": 1.1321, "step": 320 }, { "epoch": 0.16885849552866913, "grad_norm": 2.273068428039551, "learning_rate": 4.9952876248286086e-06, "loss": 1.1832, "step": 321 }, { "epoch": 0.1693845344555497, "grad_norm": 2.267636299133301, "learning_rate": 4.995244895970228e-06, "loss": 1.1058, "step": 322 }, { "epoch": 0.1699105733824303, "grad_norm": 2.133772850036621, "learning_rate": 4.99520197445034e-06, "loss": 1.1478, "step": 323 }, { "epoch": 0.1704366123093109, "grad_norm": 2.2782862186431885, "learning_rate": 4.995158860272257e-06, "loss": 1.1074, "step": 324 }, { "epoch": 0.17096265123619148, "grad_norm": 2.544316053390503, "learning_rate": 4.995115553439308e-06, "loss": 1.0583, "step": 325 }, { "epoch": 0.17148869016307206, "grad_norm": 2.2900187969207764, "learning_rate": 4.995072053954838e-06, "loss": 1.1933, "step": 326 }, { "epoch": 0.17201472908995266, "grad_norm": 2.190380811691284, "learning_rate": 4.995028361822206e-06, "loss": 1.135, "step": 327 }, { "epoch": 0.17254076801683324, "grad_norm": 2.4495794773101807, "learning_rate": 4.9949844770447834e-06, "loss": 1.1214, "step": 328 }, { "epoch": 0.17306680694371385, "grad_norm": 2.332644462585449, "learning_rate": 4.994940399625959e-06, "loss": 1.1017, "step": 329 }, { "epoch": 0.17359284587059443, "grad_norm": 2.0709457397460938, "learning_rate": 4.994896129569138e-06, "loss": 1.1073, "step": 330 }, { "epoch": 0.174118884797475, "grad_norm": 2.8817923069000244, "learning_rate": 4.994851666877736e-06, "loss": 1.0758, "step": 331 }, { "epoch": 0.1746449237243556, "grad_norm": 2.2557790279388428, "learning_rate": 4.994807011555189e-06, "loss": 1.173, "step": 332 }, { "epoch": 0.1751709626512362, "grad_norm": 2.2412662506103516, "learning_rate": 4.994762163604942e-06, "loss": 1.1357, "step": 333 }, { "epoch": 0.17569700157811677, "grad_norm": 2.1749277114868164, "learning_rate": 4.9947171230304595e-06, "loss": 1.0988, "step": 334 }, { "epoch": 0.17622304050499737, "grad_norm": 2.4530062675476074, "learning_rate": 4.994671889835218e-06, "loss": 1.1377, "step": 335 }, { "epoch": 0.17674907943187795, "grad_norm": 2.2602410316467285, "learning_rate": 4.994626464022711e-06, "loss": 1.0799, "step": 336 }, { "epoch": 0.17727511835875856, "grad_norm": 2.0797061920166016, "learning_rate": 4.994580845596446e-06, "loss": 1.1214, "step": 337 }, { "epoch": 0.17780115728563914, "grad_norm": 2.1437630653381348, "learning_rate": 4.994535034559945e-06, "loss": 1.1794, "step": 338 }, { "epoch": 0.17832719621251972, "grad_norm": 2.0809285640716553, "learning_rate": 4.994489030916745e-06, "loss": 1.1331, "step": 339 }, { "epoch": 0.17885323513940032, "grad_norm": 2.31193208694458, "learning_rate": 4.994442834670397e-06, "loss": 1.1425, "step": 340 }, { "epoch": 0.1793792740662809, "grad_norm": 2.0348451137542725, "learning_rate": 4.99439644582447e-06, "loss": 1.1149, "step": 341 }, { "epoch": 0.17990531299316148, "grad_norm": 2.2816810607910156, "learning_rate": 4.994349864382544e-06, "loss": 1.1509, "step": 342 }, { "epoch": 0.1804313519200421, "grad_norm": 2.08492374420166, "learning_rate": 4.994303090348217e-06, "loss": 1.0854, "step": 343 }, { "epoch": 0.18095739084692267, "grad_norm": 2.0389866828918457, "learning_rate": 4.994256123725098e-06, "loss": 1.1195, "step": 344 }, { "epoch": 0.18148342977380327, "grad_norm": 2.2040510177612305, "learning_rate": 4.9942089645168175e-06, "loss": 1.1112, "step": 345 }, { "epoch": 0.18200946870068385, "grad_norm": 2.058849811553955, "learning_rate": 4.994161612727013e-06, "loss": 1.1462, "step": 346 }, { "epoch": 0.18253550762756443, "grad_norm": 2.2940948009490967, "learning_rate": 4.994114068359343e-06, "loss": 1.2183, "step": 347 }, { "epoch": 0.18306154655444504, "grad_norm": 2.0303874015808105, "learning_rate": 4.9940663314174756e-06, "loss": 1.1136, "step": 348 }, { "epoch": 0.18358758548132562, "grad_norm": 2.208289861679077, "learning_rate": 4.9940184019051e-06, "loss": 1.1507, "step": 349 }, { "epoch": 0.1841136244082062, "grad_norm": 2.438228130340576, "learning_rate": 4.993970279825915e-06, "loss": 1.1619, "step": 350 }, { "epoch": 0.1846396633350868, "grad_norm": 2.1701645851135254, "learning_rate": 4.993921965183636e-06, "loss": 1.1057, "step": 351 }, { "epoch": 0.18516570226196738, "grad_norm": 2.345054864883423, "learning_rate": 4.9938734579819944e-06, "loss": 1.1758, "step": 352 }, { "epoch": 0.185691741188848, "grad_norm": 2.3761768341064453, "learning_rate": 4.9938247582247345e-06, "loss": 1.1093, "step": 353 }, { "epoch": 0.18621778011572857, "grad_norm": 2.2209126949310303, "learning_rate": 4.993775865915618e-06, "loss": 1.0882, "step": 354 }, { "epoch": 0.18674381904260914, "grad_norm": 2.093406915664673, "learning_rate": 4.993726781058419e-06, "loss": 1.1621, "step": 355 }, { "epoch": 0.18726985796948975, "grad_norm": 2.509725332260132, "learning_rate": 4.993677503656927e-06, "loss": 1.1411, "step": 356 }, { "epoch": 0.18779589689637033, "grad_norm": 2.2245242595672607, "learning_rate": 4.993628033714947e-06, "loss": 1.1042, "step": 357 }, { "epoch": 0.1883219358232509, "grad_norm": 1.838408350944519, "learning_rate": 4.9935783712363e-06, "loss": 1.0204, "step": 358 }, { "epoch": 0.18884797475013151, "grad_norm": 2.0559537410736084, "learning_rate": 4.993528516224818e-06, "loss": 1.0681, "step": 359 }, { "epoch": 0.1893740136770121, "grad_norm": 2.084890604019165, "learning_rate": 4.993478468684352e-06, "loss": 1.1149, "step": 360 }, { "epoch": 0.1899000526038927, "grad_norm": 2.179478168487549, "learning_rate": 4.993428228618767e-06, "loss": 1.1342, "step": 361 }, { "epoch": 0.19042609153077328, "grad_norm": 2.082578182220459, "learning_rate": 4.99337779603194e-06, "loss": 1.1293, "step": 362 }, { "epoch": 0.19095213045765386, "grad_norm": 2.031831979751587, "learning_rate": 4.993327170927766e-06, "loss": 1.0728, "step": 363 }, { "epoch": 0.19147816938453446, "grad_norm": 2.1939597129821777, "learning_rate": 4.993276353310155e-06, "loss": 1.1252, "step": 364 }, { "epoch": 0.19200420831141504, "grad_norm": 2.031350612640381, "learning_rate": 4.9932253431830295e-06, "loss": 1.1039, "step": 365 }, { "epoch": 0.19253024723829562, "grad_norm": 2.3367671966552734, "learning_rate": 4.993174140550327e-06, "loss": 1.1211, "step": 366 }, { "epoch": 0.19305628616517623, "grad_norm": 2.2768945693969727, "learning_rate": 4.993122745416003e-06, "loss": 1.1119, "step": 367 }, { "epoch": 0.1935823250920568, "grad_norm": 2.220766544342041, "learning_rate": 4.993071157784025e-06, "loss": 1.1451, "step": 368 }, { "epoch": 0.1941083640189374, "grad_norm": 2.3694369792938232, "learning_rate": 4.993019377658376e-06, "loss": 1.1156, "step": 369 }, { "epoch": 0.194634402945818, "grad_norm": 2.245237350463867, "learning_rate": 4.9929674050430535e-06, "loss": 1.1316, "step": 370 }, { "epoch": 0.19516044187269857, "grad_norm": 2.720625400543213, "learning_rate": 4.992915239942071e-06, "loss": 1.1092, "step": 371 }, { "epoch": 0.19568648079957918, "grad_norm": 2.115727424621582, "learning_rate": 4.992862882359457e-06, "loss": 1.1769, "step": 372 }, { "epoch": 0.19621251972645976, "grad_norm": 2.235677480697632, "learning_rate": 4.992810332299253e-06, "loss": 1.1786, "step": 373 }, { "epoch": 0.19673855865334033, "grad_norm": 2.539433002471924, "learning_rate": 4.992757589765516e-06, "loss": 1.1251, "step": 374 }, { "epoch": 0.19726459758022094, "grad_norm": 5.042508602142334, "learning_rate": 4.99270465476232e-06, "loss": 1.0706, "step": 375 }, { "epoch": 0.19779063650710152, "grad_norm": 2.1171703338623047, "learning_rate": 4.9926515272937516e-06, "loss": 1.1287, "step": 376 }, { "epoch": 0.19831667543398213, "grad_norm": 2.4587223529815674, "learning_rate": 4.992598207363912e-06, "loss": 1.053, "step": 377 }, { "epoch": 0.1988427143608627, "grad_norm": 2.1502695083618164, "learning_rate": 4.9925446949769184e-06, "loss": 1.0837, "step": 378 }, { "epoch": 0.19936875328774328, "grad_norm": 2.139822483062744, "learning_rate": 4.992490990136903e-06, "loss": 1.1358, "step": 379 }, { "epoch": 0.1998947922146239, "grad_norm": 2.4914610385894775, "learning_rate": 4.992437092848012e-06, "loss": 1.1053, "step": 380 }, { "epoch": 0.20042083114150447, "grad_norm": 2.24576735496521, "learning_rate": 4.992383003114408e-06, "loss": 1.1034, "step": 381 }, { "epoch": 0.20094687006838505, "grad_norm": 2.1979477405548096, "learning_rate": 4.992328720940266e-06, "loss": 1.0839, "step": 382 }, { "epoch": 0.20147290899526565, "grad_norm": 2.1680850982666016, "learning_rate": 4.992274246329778e-06, "loss": 1.1011, "step": 383 }, { "epoch": 0.20199894792214623, "grad_norm": 2.3214027881622314, "learning_rate": 4.9922195792871495e-06, "loss": 1.03, "step": 384 }, { "epoch": 0.20252498684902684, "grad_norm": 2.162393808364868, "learning_rate": 4.9921647198166014e-06, "loss": 1.0466, "step": 385 }, { "epoch": 0.20305102577590742, "grad_norm": 2.184163808822632, "learning_rate": 4.99210966792237e-06, "loss": 1.1379, "step": 386 }, { "epoch": 0.203577064702788, "grad_norm": 2.3308913707733154, "learning_rate": 4.992054423608706e-06, "loss": 1.1751, "step": 387 }, { "epoch": 0.2041031036296686, "grad_norm": 2.123298168182373, "learning_rate": 4.991998986879874e-06, "loss": 1.1079, "step": 388 }, { "epoch": 0.20462914255654918, "grad_norm": 2.229844331741333, "learning_rate": 4.991943357740155e-06, "loss": 1.1242, "step": 389 }, { "epoch": 0.20515518148342976, "grad_norm": 2.1815683841705322, "learning_rate": 4.991887536193845e-06, "loss": 1.0949, "step": 390 }, { "epoch": 0.20568122041031037, "grad_norm": 2.4636261463165283, "learning_rate": 4.991831522245253e-06, "loss": 1.1118, "step": 391 }, { "epoch": 0.20620725933719095, "grad_norm": 2.0095014572143555, "learning_rate": 4.991775315898703e-06, "loss": 1.0197, "step": 392 }, { "epoch": 0.20673329826407155, "grad_norm": 2.1244406700134277, "learning_rate": 4.991718917158538e-06, "loss": 1.1081, "step": 393 }, { "epoch": 0.20725933719095213, "grad_norm": 1.9773920774459839, "learning_rate": 4.991662326029109e-06, "loss": 1.0657, "step": 394 }, { "epoch": 0.2077853761178327, "grad_norm": 2.204554796218872, "learning_rate": 4.9916055425147874e-06, "loss": 1.1434, "step": 395 }, { "epoch": 0.20831141504471332, "grad_norm": 2.068147659301758, "learning_rate": 4.991548566619957e-06, "loss": 1.1281, "step": 396 }, { "epoch": 0.2088374539715939, "grad_norm": 2.1518101692199707, "learning_rate": 4.991491398349017e-06, "loss": 1.0977, "step": 397 }, { "epoch": 0.20936349289847447, "grad_norm": 2.091654062271118, "learning_rate": 4.991434037706382e-06, "loss": 1.1033, "step": 398 }, { "epoch": 0.20988953182535508, "grad_norm": 2.8754067420959473, "learning_rate": 4.9913764846964805e-06, "loss": 1.1237, "step": 399 }, { "epoch": 0.21041557075223566, "grad_norm": 2.2165675163269043, "learning_rate": 4.991318739323757e-06, "loss": 1.1298, "step": 400 }, { "epoch": 0.21094160967911627, "grad_norm": 2.1219065189361572, "learning_rate": 4.991260801592668e-06, "loss": 1.0795, "step": 401 }, { "epoch": 0.21146764860599684, "grad_norm": 2.132737159729004, "learning_rate": 4.9912026715076885e-06, "loss": 1.0546, "step": 402 }, { "epoch": 0.21199368753287742, "grad_norm": 2.228076457977295, "learning_rate": 4.9911443490733075e-06, "loss": 1.1759, "step": 403 }, { "epoch": 0.21251972645975803, "grad_norm": 2.1305177211761475, "learning_rate": 4.991085834294027e-06, "loss": 1.0865, "step": 404 }, { "epoch": 0.2130457653866386, "grad_norm": 2.1550936698913574, "learning_rate": 4.991027127174365e-06, "loss": 1.1027, "step": 405 }, { "epoch": 0.2135718043135192, "grad_norm": 2.3489346504211426, "learning_rate": 4.990968227718854e-06, "loss": 1.184, "step": 406 }, { "epoch": 0.2140978432403998, "grad_norm": 2.2208189964294434, "learning_rate": 4.9909091359320434e-06, "loss": 1.1476, "step": 407 }, { "epoch": 0.21462388216728037, "grad_norm": 2.230978012084961, "learning_rate": 4.990849851818494e-06, "loss": 1.1125, "step": 408 }, { "epoch": 0.21514992109416098, "grad_norm": 2.294647216796875, "learning_rate": 4.990790375382784e-06, "loss": 1.1526, "step": 409 }, { "epoch": 0.21567596002104156, "grad_norm": 2.160446882247925, "learning_rate": 4.990730706629507e-06, "loss": 1.1569, "step": 410 }, { "epoch": 0.21620199894792214, "grad_norm": 2.1352434158325195, "learning_rate": 4.990670845563268e-06, "loss": 1.049, "step": 411 }, { "epoch": 0.21672803787480274, "grad_norm": 2.0740866661071777, "learning_rate": 4.99061079218869e-06, "loss": 1.104, "step": 412 }, { "epoch": 0.21725407680168332, "grad_norm": 2.302877426147461, "learning_rate": 4.990550546510408e-06, "loss": 1.0942, "step": 413 }, { "epoch": 0.2177801157285639, "grad_norm": 2.270836353302002, "learning_rate": 4.990490108533076e-06, "loss": 1.107, "step": 414 }, { "epoch": 0.2183061546554445, "grad_norm": 2.05703067779541, "learning_rate": 4.99042947826136e-06, "loss": 1.1284, "step": 415 }, { "epoch": 0.21883219358232509, "grad_norm": 2.3524155616760254, "learning_rate": 4.990368655699941e-06, "loss": 1.068, "step": 416 }, { "epoch": 0.2193582325092057, "grad_norm": 2.5300350189208984, "learning_rate": 4.9903076408535145e-06, "loss": 1.0993, "step": 417 }, { "epoch": 0.21988427143608627, "grad_norm": 2.1858162879943848, "learning_rate": 4.990246433726793e-06, "loss": 1.1398, "step": 418 }, { "epoch": 0.22041031036296685, "grad_norm": 1.9856489896774292, "learning_rate": 4.990185034324501e-06, "loss": 1.0671, "step": 419 }, { "epoch": 0.22093634928984746, "grad_norm": 2.177152156829834, "learning_rate": 4.99012344265138e-06, "loss": 1.1673, "step": 420 }, { "epoch": 0.22146238821672803, "grad_norm": 2.128787040710449, "learning_rate": 4.990061658712186e-06, "loss": 1.1629, "step": 421 }, { "epoch": 0.2219884271436086, "grad_norm": 2.1840457916259766, "learning_rate": 4.989999682511688e-06, "loss": 1.0739, "step": 422 }, { "epoch": 0.22251446607048922, "grad_norm": 2.37825608253479, "learning_rate": 4.989937514054673e-06, "loss": 1.1179, "step": 423 }, { "epoch": 0.2230405049973698, "grad_norm": 2.2746498584747314, "learning_rate": 4.98987515334594e-06, "loss": 1.1117, "step": 424 }, { "epoch": 0.2235665439242504, "grad_norm": 2.441087007522583, "learning_rate": 4.989812600390304e-06, "loss": 1.134, "step": 425 }, { "epoch": 0.22409258285113098, "grad_norm": 1.9548932313919067, "learning_rate": 4.989749855192596e-06, "loss": 1.0962, "step": 426 }, { "epoch": 0.22461862177801156, "grad_norm": 2.382025957107544, "learning_rate": 4.989686917757659e-06, "loss": 1.1233, "step": 427 }, { "epoch": 0.22514466070489217, "grad_norm": 2.1739771366119385, "learning_rate": 4.989623788090353e-06, "loss": 1.0665, "step": 428 }, { "epoch": 0.22567069963177275, "grad_norm": 2.3246262073516846, "learning_rate": 4.989560466195553e-06, "loss": 1.0834, "step": 429 }, { "epoch": 0.22619673855865335, "grad_norm": 2.1649882793426514, "learning_rate": 4.9894969520781475e-06, "loss": 1.1144, "step": 430 }, { "epoch": 0.22672277748553393, "grad_norm": 2.307199001312256, "learning_rate": 4.98943324574304e-06, "loss": 1.2195, "step": 431 }, { "epoch": 0.2272488164124145, "grad_norm": 2.2414958477020264, "learning_rate": 4.989369347195151e-06, "loss": 1.0549, "step": 432 }, { "epoch": 0.22777485533929512, "grad_norm": 2.12762713432312, "learning_rate": 4.989305256439413e-06, "loss": 1.1185, "step": 433 }, { "epoch": 0.2283008942661757, "grad_norm": 2.1503520011901855, "learning_rate": 4.989240973480774e-06, "loss": 1.1294, "step": 434 }, { "epoch": 0.22882693319305628, "grad_norm": 2.1283833980560303, "learning_rate": 4.9891764983242e-06, "loss": 1.1154, "step": 435 }, { "epoch": 0.22935297211993688, "grad_norm": 2.239828109741211, "learning_rate": 4.9891118309746666e-06, "loss": 1.073, "step": 436 }, { "epoch": 0.22987901104681746, "grad_norm": 2.396672248840332, "learning_rate": 4.989046971437167e-06, "loss": 1.0916, "step": 437 }, { "epoch": 0.23040504997369807, "grad_norm": 2.1172304153442383, "learning_rate": 4.98898191971671e-06, "loss": 1.1001, "step": 438 }, { "epoch": 0.23093108890057865, "grad_norm": 2.1714346408843994, "learning_rate": 4.98891667581832e-06, "loss": 1.1672, "step": 439 }, { "epoch": 0.23145712782745922, "grad_norm": 2.058523178100586, "learning_rate": 4.98885123974703e-06, "loss": 1.0842, "step": 440 }, { "epoch": 0.23198316675433983, "grad_norm": 2.4147160053253174, "learning_rate": 4.988785611507896e-06, "loss": 1.0755, "step": 441 }, { "epoch": 0.2325092056812204, "grad_norm": 2.274296283721924, "learning_rate": 4.988719791105985e-06, "loss": 1.1141, "step": 442 }, { "epoch": 0.233035244608101, "grad_norm": 2.178182363510132, "learning_rate": 4.988653778546379e-06, "loss": 1.212, "step": 443 }, { "epoch": 0.2335612835349816, "grad_norm": 2.200793743133545, "learning_rate": 4.988587573834173e-06, "loss": 1.0992, "step": 444 }, { "epoch": 0.23408732246186217, "grad_norm": 1.9726881980895996, "learning_rate": 4.98852117697448e-06, "loss": 1.1165, "step": 445 }, { "epoch": 0.23461336138874278, "grad_norm": 2.1173300743103027, "learning_rate": 4.988454587972428e-06, "loss": 1.1162, "step": 446 }, { "epoch": 0.23513940031562336, "grad_norm": 2.1428768634796143, "learning_rate": 4.9883878068331556e-06, "loss": 1.1343, "step": 447 }, { "epoch": 0.23566543924250394, "grad_norm": 2.00190806388855, "learning_rate": 4.988320833561822e-06, "loss": 1.0873, "step": 448 }, { "epoch": 0.23619147816938454, "grad_norm": 2.2472777366638184, "learning_rate": 4.988253668163596e-06, "loss": 1.1209, "step": 449 }, { "epoch": 0.23671751709626512, "grad_norm": 2.0522475242614746, "learning_rate": 4.988186310643666e-06, "loss": 1.0912, "step": 450 }, { "epoch": 0.2372435560231457, "grad_norm": 2.1521215438842773, "learning_rate": 4.98811876100723e-06, "loss": 1.0971, "step": 451 }, { "epoch": 0.2377695949500263, "grad_norm": 2.1117734909057617, "learning_rate": 4.988051019259505e-06, "loss": 1.1247, "step": 452 }, { "epoch": 0.2382956338769069, "grad_norm": 2.1884706020355225, "learning_rate": 4.987983085405722e-06, "loss": 1.1255, "step": 453 }, { "epoch": 0.2388216728037875, "grad_norm": 2.138962984085083, "learning_rate": 4.9879149594511245e-06, "loss": 1.0787, "step": 454 }, { "epoch": 0.23934771173066807, "grad_norm": 2.553452730178833, "learning_rate": 4.987846641400974e-06, "loss": 1.1178, "step": 455 }, { "epoch": 0.23987375065754865, "grad_norm": 2.5340464115142822, "learning_rate": 4.987778131260546e-06, "loss": 1.1577, "step": 456 }, { "epoch": 0.24039978958442926, "grad_norm": 2.2375919818878174, "learning_rate": 4.987709429035128e-06, "loss": 1.0711, "step": 457 }, { "epoch": 0.24092582851130984, "grad_norm": 2.35756254196167, "learning_rate": 4.987640534730027e-06, "loss": 1.1031, "step": 458 }, { "epoch": 0.24145186743819042, "grad_norm": 2.03385591506958, "learning_rate": 4.987571448350561e-06, "loss": 1.0869, "step": 459 }, { "epoch": 0.24197790636507102, "grad_norm": 2.662584066390991, "learning_rate": 4.987502169902065e-06, "loss": 1.0909, "step": 460 }, { "epoch": 0.2425039452919516, "grad_norm": 2.2569165229797363, "learning_rate": 4.987432699389888e-06, "loss": 1.1576, "step": 461 }, { "epoch": 0.2430299842188322, "grad_norm": 1.9718097448349, "learning_rate": 4.987363036819393e-06, "loss": 1.0577, "step": 462 }, { "epoch": 0.24355602314571279, "grad_norm": 2.2083537578582764, "learning_rate": 4.987293182195959e-06, "loss": 1.1328, "step": 463 }, { "epoch": 0.24408206207259336, "grad_norm": 2.2045726776123047, "learning_rate": 4.987223135524981e-06, "loss": 1.0908, "step": 464 }, { "epoch": 0.24460810099947397, "grad_norm": 2.213714122772217, "learning_rate": 4.987152896811866e-06, "loss": 1.124, "step": 465 }, { "epoch": 0.24513413992635455, "grad_norm": 4.030746936798096, "learning_rate": 4.987082466062038e-06, "loss": 1.0855, "step": 466 }, { "epoch": 0.24566017885323513, "grad_norm": 2.1142022609710693, "learning_rate": 4.987011843280934e-06, "loss": 1.1305, "step": 467 }, { "epoch": 0.24618621778011573, "grad_norm": 2.1746232509613037, "learning_rate": 4.986941028474009e-06, "loss": 1.0846, "step": 468 }, { "epoch": 0.2467122567069963, "grad_norm": 2.038947820663452, "learning_rate": 4.986870021646728e-06, "loss": 1.0907, "step": 469 }, { "epoch": 0.24723829563387692, "grad_norm": 12.261099815368652, "learning_rate": 4.986798822804576e-06, "loss": 1.1012, "step": 470 }, { "epoch": 0.2477643345607575, "grad_norm": 2.020077705383301, "learning_rate": 4.986727431953048e-06, "loss": 1.097, "step": 471 }, { "epoch": 0.24829037348763808, "grad_norm": 2.070114850997925, "learning_rate": 4.986655849097658e-06, "loss": 1.175, "step": 472 }, { "epoch": 0.24881641241451868, "grad_norm": 2.0364394187927246, "learning_rate": 4.986584074243932e-06, "loss": 1.0892, "step": 473 }, { "epoch": 0.24934245134139926, "grad_norm": 2.1961004734039307, "learning_rate": 4.986512107397413e-06, "loss": 1.0867, "step": 474 }, { "epoch": 0.24986849026827984, "grad_norm": 3.1488072872161865, "learning_rate": 4.986439948563656e-06, "loss": 1.0276, "step": 475 }, { "epoch": 0.2503945291951604, "grad_norm": 2.3070068359375, "learning_rate": 4.986367597748235e-06, "loss": 1.0897, "step": 476 }, { "epoch": 0.25092056812204105, "grad_norm": 2.0328757762908936, "learning_rate": 4.986295054956733e-06, "loss": 1.0573, "step": 477 }, { "epoch": 0.25144660704892163, "grad_norm": 2.4608747959136963, "learning_rate": 4.986222320194754e-06, "loss": 1.1343, "step": 478 }, { "epoch": 0.2519726459758022, "grad_norm": 2.249994993209839, "learning_rate": 4.986149393467913e-06, "loss": 1.0771, "step": 479 }, { "epoch": 0.2524986849026828, "grad_norm": 2.1573803424835205, "learning_rate": 4.98607627478184e-06, "loss": 1.0795, "step": 480 }, { "epoch": 0.25302472382956337, "grad_norm": 2.6239383220672607, "learning_rate": 4.986002964142182e-06, "loss": 1.0874, "step": 481 }, { "epoch": 0.253550762756444, "grad_norm": 2.0815794467926025, "learning_rate": 4.985929461554597e-06, "loss": 1.0729, "step": 482 }, { "epoch": 0.2540768016833246, "grad_norm": 2.156259059906006, "learning_rate": 4.985855767024763e-06, "loss": 1.0912, "step": 483 }, { "epoch": 0.25460284061020516, "grad_norm": 2.4136252403259277, "learning_rate": 4.985781880558369e-06, "loss": 1.1365, "step": 484 }, { "epoch": 0.25512887953708574, "grad_norm": 2.265622854232788, "learning_rate": 4.98570780216112e-06, "loss": 1.1218, "step": 485 }, { "epoch": 0.2556549184639663, "grad_norm": 2.1097841262817383, "learning_rate": 4.985633531838735e-06, "loss": 1.1238, "step": 486 }, { "epoch": 0.2561809573908469, "grad_norm": 2.205012083053589, "learning_rate": 4.985559069596949e-06, "loss": 1.0664, "step": 487 }, { "epoch": 0.25670699631772753, "grad_norm": 2.1896169185638428, "learning_rate": 4.9854844154415115e-06, "loss": 1.0374, "step": 488 }, { "epoch": 0.2572330352446081, "grad_norm": 2.0652949810028076, "learning_rate": 4.985409569378187e-06, "loss": 1.1016, "step": 489 }, { "epoch": 0.2577590741714887, "grad_norm": 2.1278676986694336, "learning_rate": 4.985334531412754e-06, "loss": 1.147, "step": 490 }, { "epoch": 0.25828511309836927, "grad_norm": 2.2769057750701904, "learning_rate": 4.985259301551005e-06, "loss": 1.1389, "step": 491 }, { "epoch": 0.25881115202524985, "grad_norm": 2.0440104007720947, "learning_rate": 4.985183879798751e-06, "loss": 1.0826, "step": 492 }, { "epoch": 0.2593371909521305, "grad_norm": 2.4153213500976562, "learning_rate": 4.985108266161815e-06, "loss": 1.105, "step": 493 }, { "epoch": 0.25986322987901106, "grad_norm": 2.3863043785095215, "learning_rate": 4.985032460646033e-06, "loss": 1.1023, "step": 494 }, { "epoch": 0.26038926880589164, "grad_norm": 2.2597336769104004, "learning_rate": 4.98495646325726e-06, "loss": 1.1046, "step": 495 }, { "epoch": 0.2609153077327722, "grad_norm": 2.541444778442383, "learning_rate": 4.984880274001364e-06, "loss": 1.1149, "step": 496 }, { "epoch": 0.2614413466596528, "grad_norm": 2.3011064529418945, "learning_rate": 4.984803892884227e-06, "loss": 1.0757, "step": 497 }, { "epoch": 0.26196738558653343, "grad_norm": 2.116774797439575, "learning_rate": 4.9847273199117475e-06, "loss": 1.1151, "step": 498 }, { "epoch": 0.262493424513414, "grad_norm": 2.2372357845306396, "learning_rate": 4.984650555089836e-06, "loss": 1.1107, "step": 499 }, { "epoch": 0.2630194634402946, "grad_norm": 2.0782155990600586, "learning_rate": 4.984573598424421e-06, "loss": 1.1174, "step": 500 }, { "epoch": 0.26354550236717517, "grad_norm": 2.0625476837158203, "learning_rate": 4.984496449921444e-06, "loss": 1.0965, "step": 501 }, { "epoch": 0.26407154129405574, "grad_norm": 2.142184019088745, "learning_rate": 4.9844191095868615e-06, "loss": 1.0678, "step": 502 }, { "epoch": 0.2645975802209363, "grad_norm": 2.1218082904815674, "learning_rate": 4.984341577426646e-06, "loss": 1.0661, "step": 503 }, { "epoch": 0.26512361914781696, "grad_norm": 2.2910757064819336, "learning_rate": 4.984263853446783e-06, "loss": 1.1111, "step": 504 }, { "epoch": 0.26564965807469754, "grad_norm": 2.0604546070098877, "learning_rate": 4.984185937653274e-06, "loss": 1.0614, "step": 505 }, { "epoch": 0.2661756970015781, "grad_norm": 2.1210556030273438, "learning_rate": 4.984107830052134e-06, "loss": 1.0925, "step": 506 }, { "epoch": 0.2667017359284587, "grad_norm": 2.535501003265381, "learning_rate": 4.984029530649396e-06, "loss": 1.1238, "step": 507 }, { "epoch": 0.2672277748553393, "grad_norm": 2.2978546619415283, "learning_rate": 4.9839510394511035e-06, "loss": 1.1615, "step": 508 }, { "epoch": 0.2677538137822199, "grad_norm": 2.0443382263183594, "learning_rate": 4.983872356463318e-06, "loss": 1.1087, "step": 509 }, { "epoch": 0.2682798527091005, "grad_norm": 2.216139316558838, "learning_rate": 4.983793481692114e-06, "loss": 1.1431, "step": 510 }, { "epoch": 0.26880589163598106, "grad_norm": 1.9255571365356445, "learning_rate": 4.983714415143583e-06, "loss": 1.0204, "step": 511 }, { "epoch": 0.26933193056286164, "grad_norm": 2.103969097137451, "learning_rate": 4.9836351568238286e-06, "loss": 1.0855, "step": 512 }, { "epoch": 0.2698579694897422, "grad_norm": 2.5458972454071045, "learning_rate": 4.98355570673897e-06, "loss": 1.0747, "step": 513 }, { "epoch": 0.27038400841662286, "grad_norm": 2.023601531982422, "learning_rate": 4.983476064895143e-06, "loss": 1.0471, "step": 514 }, { "epoch": 0.27091004734350344, "grad_norm": 2.0976908206939697, "learning_rate": 4.983396231298496e-06, "loss": 1.0658, "step": 515 }, { "epoch": 0.271436086270384, "grad_norm": 2.4051074981689453, "learning_rate": 4.9833162059551936e-06, "loss": 1.0624, "step": 516 }, { "epoch": 0.2719621251972646, "grad_norm": 2.0524230003356934, "learning_rate": 4.983235988871414e-06, "loss": 1.1261, "step": 517 }, { "epoch": 0.27248816412414517, "grad_norm": 2.1440162658691406, "learning_rate": 4.983155580053351e-06, "loss": 0.9893, "step": 518 }, { "epoch": 0.27301420305102575, "grad_norm": 2.1923670768737793, "learning_rate": 4.983074979507213e-06, "loss": 1.1066, "step": 519 }, { "epoch": 0.2735402419779064, "grad_norm": 2.2967565059661865, "learning_rate": 4.982994187239225e-06, "loss": 1.1256, "step": 520 }, { "epoch": 0.27406628090478696, "grad_norm": 2.0392587184906006, "learning_rate": 4.982913203255623e-06, "loss": 1.1026, "step": 521 }, { "epoch": 0.27459231983166754, "grad_norm": 2.371121644973755, "learning_rate": 4.9828320275626605e-06, "loss": 1.0607, "step": 522 }, { "epoch": 0.2751183587585481, "grad_norm": 2.082239866256714, "learning_rate": 4.982750660166606e-06, "loss": 1.0749, "step": 523 }, { "epoch": 0.2756443976854287, "grad_norm": 2.2039687633514404, "learning_rate": 4.98266910107374e-06, "loss": 1.0769, "step": 524 }, { "epoch": 0.27617043661230933, "grad_norm": 2.087859869003296, "learning_rate": 4.9825873502903625e-06, "loss": 1.1575, "step": 525 }, { "epoch": 0.2766964755391899, "grad_norm": 2.1991021633148193, "learning_rate": 4.982505407822783e-06, "loss": 1.1149, "step": 526 }, { "epoch": 0.2772225144660705, "grad_norm": 2.2656140327453613, "learning_rate": 4.98242327367733e-06, "loss": 1.0948, "step": 527 }, { "epoch": 0.27774855339295107, "grad_norm": 2.1107430458068848, "learning_rate": 4.982340947860344e-06, "loss": 1.0289, "step": 528 }, { "epoch": 0.27827459231983165, "grad_norm": 2.2510344982147217, "learning_rate": 4.982258430378184e-06, "loss": 1.0694, "step": 529 }, { "epoch": 0.2788006312467123, "grad_norm": 2.252258062362671, "learning_rate": 4.982175721237218e-06, "loss": 1.0435, "step": 530 }, { "epoch": 0.27932667017359286, "grad_norm": 2.12455677986145, "learning_rate": 4.982092820443834e-06, "loss": 1.0202, "step": 531 }, { "epoch": 0.27985270910047344, "grad_norm": 2.3654651641845703, "learning_rate": 4.982009728004433e-06, "loss": 1.1282, "step": 532 }, { "epoch": 0.280378748027354, "grad_norm": 2.3759138584136963, "learning_rate": 4.981926443925431e-06, "loss": 1.1557, "step": 533 }, { "epoch": 0.2809047869542346, "grad_norm": 1.9874821901321411, "learning_rate": 4.981842968213256e-06, "loss": 1.0723, "step": 534 }, { "epoch": 0.2814308258811152, "grad_norm": 2.154383897781372, "learning_rate": 4.981759300874356e-06, "loss": 1.0786, "step": 535 }, { "epoch": 0.2819568648079958, "grad_norm": 2.1774797439575195, "learning_rate": 4.9816754419151906e-06, "loss": 1.0457, "step": 536 }, { "epoch": 0.2824829037348764, "grad_norm": 2.206082820892334, "learning_rate": 4.981591391342233e-06, "loss": 1.0216, "step": 537 }, { "epoch": 0.28300894266175697, "grad_norm": 2.008676528930664, "learning_rate": 4.981507149161975e-06, "loss": 1.0297, "step": 538 }, { "epoch": 0.28353498158863755, "grad_norm": 2.0553462505340576, "learning_rate": 4.981422715380919e-06, "loss": 1.0967, "step": 539 }, { "epoch": 0.2840610205155181, "grad_norm": 2.047567844390869, "learning_rate": 4.981338090005586e-06, "loss": 1.0524, "step": 540 }, { "epoch": 0.28458705944239876, "grad_norm": 2.2144312858581543, "learning_rate": 4.981253273042509e-06, "loss": 1.1178, "step": 541 }, { "epoch": 0.28511309836927934, "grad_norm": 2.388124465942383, "learning_rate": 4.981168264498238e-06, "loss": 1.0728, "step": 542 }, { "epoch": 0.2856391372961599, "grad_norm": 2.152280807495117, "learning_rate": 4.981083064379335e-06, "loss": 1.1146, "step": 543 }, { "epoch": 0.2861651762230405, "grad_norm": 2.1481564044952393, "learning_rate": 4.98099767269238e-06, "loss": 1.1376, "step": 544 }, { "epoch": 0.2866912151499211, "grad_norm": 2.060664415359497, "learning_rate": 4.980912089443966e-06, "loss": 1.0961, "step": 545 }, { "epoch": 0.2872172540768017, "grad_norm": 2.032557964324951, "learning_rate": 4.9808263146406985e-06, "loss": 1.1055, "step": 546 }, { "epoch": 0.2877432930036823, "grad_norm": 2.0957093238830566, "learning_rate": 4.980740348289204e-06, "loss": 1.0444, "step": 547 }, { "epoch": 0.28826933193056287, "grad_norm": 2.0774853229522705, "learning_rate": 4.980654190396118e-06, "loss": 1.0963, "step": 548 }, { "epoch": 0.28879537085744345, "grad_norm": 2.0808207988739014, "learning_rate": 4.980567840968094e-06, "loss": 1.0634, "step": 549 }, { "epoch": 0.289321409784324, "grad_norm": 2.2924559116363525, "learning_rate": 4.980481300011797e-06, "loss": 1.0805, "step": 550 }, { "epoch": 0.2898474487112046, "grad_norm": 2.041088104248047, "learning_rate": 4.980394567533911e-06, "loss": 1.0983, "step": 551 }, { "epoch": 0.29037348763808524, "grad_norm": 2.030073881149292, "learning_rate": 4.980307643541132e-06, "loss": 1.1334, "step": 552 }, { "epoch": 0.2908995265649658, "grad_norm": 2.15849232673645, "learning_rate": 4.980220528040172e-06, "loss": 1.0906, "step": 553 }, { "epoch": 0.2914255654918464, "grad_norm": 2.094135284423828, "learning_rate": 4.9801332210377574e-06, "loss": 1.0644, "step": 554 }, { "epoch": 0.291951604418727, "grad_norm": 2.193941354751587, "learning_rate": 4.980045722540628e-06, "loss": 1.0819, "step": 555 }, { "epoch": 0.29247764334560755, "grad_norm": 2.2015504837036133, "learning_rate": 4.979958032555542e-06, "loss": 1.0759, "step": 556 }, { "epoch": 0.2930036822724882, "grad_norm": 2.1240222454071045, "learning_rate": 4.979870151089267e-06, "loss": 1.1268, "step": 557 }, { "epoch": 0.29352972119936876, "grad_norm": 2.0243959426879883, "learning_rate": 4.9797820781485905e-06, "loss": 1.0449, "step": 558 }, { "epoch": 0.29405576012624934, "grad_norm": 2.2300705909729004, "learning_rate": 4.979693813740313e-06, "loss": 1.0493, "step": 559 }, { "epoch": 0.2945817990531299, "grad_norm": 2.1185836791992188, "learning_rate": 4.979605357871249e-06, "loss": 1.0921, "step": 560 }, { "epoch": 0.2951078379800105, "grad_norm": 2.091691732406616, "learning_rate": 4.979516710548227e-06, "loss": 1.1025, "step": 561 }, { "epoch": 0.29563387690689114, "grad_norm": 2.1666178703308105, "learning_rate": 4.979427871778094e-06, "loss": 1.1245, "step": 562 }, { "epoch": 0.2961599158337717, "grad_norm": 2.6985056400299072, "learning_rate": 4.9793388415677066e-06, "loss": 1.1398, "step": 563 }, { "epoch": 0.2966859547606523, "grad_norm": 2.118074655532837, "learning_rate": 4.979249619923942e-06, "loss": 1.0897, "step": 564 }, { "epoch": 0.29721199368753287, "grad_norm": 2.246856927871704, "learning_rate": 4.979160206853687e-06, "loss": 1.0714, "step": 565 }, { "epoch": 0.29773803261441345, "grad_norm": 2.201953887939453, "learning_rate": 4.979070602363846e-06, "loss": 1.1466, "step": 566 }, { "epoch": 0.29826407154129403, "grad_norm": 2.048617362976074, "learning_rate": 4.9789808064613375e-06, "loss": 1.1368, "step": 567 }, { "epoch": 0.29879011046817466, "grad_norm": 2.1507785320281982, "learning_rate": 4.978890819153095e-06, "loss": 1.1499, "step": 568 }, { "epoch": 0.29931614939505524, "grad_norm": 1.9633440971374512, "learning_rate": 4.978800640446066e-06, "loss": 1.0667, "step": 569 }, { "epoch": 0.2998421883219358, "grad_norm": 2.1089606285095215, "learning_rate": 4.978710270347214e-06, "loss": 1.0611, "step": 570 }, { "epoch": 0.3003682272488164, "grad_norm": 2.170901298522949, "learning_rate": 4.9786197088635145e-06, "loss": 1.1524, "step": 571 }, { "epoch": 0.300894266175697, "grad_norm": 2.165510892868042, "learning_rate": 4.978528956001964e-06, "loss": 1.0987, "step": 572 }, { "epoch": 0.3014203051025776, "grad_norm": 2.0415878295898438, "learning_rate": 4.978438011769565e-06, "loss": 1.1582, "step": 573 }, { "epoch": 0.3019463440294582, "grad_norm": 2.110260248184204, "learning_rate": 4.978346876173342e-06, "loss": 1.0587, "step": 574 }, { "epoch": 0.30247238295633877, "grad_norm": 2.253488063812256, "learning_rate": 4.9782555492203334e-06, "loss": 1.1038, "step": 575 }, { "epoch": 0.30299842188321935, "grad_norm": 2.0166091918945312, "learning_rate": 4.978164030917587e-06, "loss": 1.0367, "step": 576 }, { "epoch": 0.3035244608100999, "grad_norm": 2.2842600345611572, "learning_rate": 4.978072321272171e-06, "loss": 1.0996, "step": 577 }, { "epoch": 0.30405049973698056, "grad_norm": 2.0563907623291016, "learning_rate": 4.977980420291166e-06, "loss": 1.1219, "step": 578 }, { "epoch": 0.30457653866386114, "grad_norm": 2.059800863265991, "learning_rate": 4.977888327981668e-06, "loss": 1.1193, "step": 579 }, { "epoch": 0.3051025775907417, "grad_norm": 2.242919921875, "learning_rate": 4.977796044350788e-06, "loss": 1.0701, "step": 580 }, { "epoch": 0.3056286165176223, "grad_norm": 1.9749282598495483, "learning_rate": 4.977703569405651e-06, "loss": 1.0771, "step": 581 }, { "epoch": 0.3061546554445029, "grad_norm": 2.2251386642456055, "learning_rate": 4.977610903153397e-06, "loss": 1.084, "step": 582 }, { "epoch": 0.30668069437138346, "grad_norm": 2.0289855003356934, "learning_rate": 4.97751804560118e-06, "loss": 1.0732, "step": 583 }, { "epoch": 0.3072067332982641, "grad_norm": 2.152841806411743, "learning_rate": 4.977424996756171e-06, "loss": 1.0712, "step": 584 }, { "epoch": 0.30773277222514467, "grad_norm": 2.3243937492370605, "learning_rate": 4.977331756625555e-06, "loss": 1.0197, "step": 585 }, { "epoch": 0.30825881115202525, "grad_norm": 2.293274402618408, "learning_rate": 4.97723832521653e-06, "loss": 1.1121, "step": 586 }, { "epoch": 0.3087848500789058, "grad_norm": 2.139958143234253, "learning_rate": 4.97714470253631e-06, "loss": 1.0799, "step": 587 }, { "epoch": 0.3093108890057864, "grad_norm": 2.269357442855835, "learning_rate": 4.977050888592123e-06, "loss": 1.0872, "step": 588 }, { "epoch": 0.30983692793266704, "grad_norm": 2.268691301345825, "learning_rate": 4.976956883391215e-06, "loss": 1.1079, "step": 589 }, { "epoch": 0.3103629668595476, "grad_norm": 2.127131223678589, "learning_rate": 4.976862686940842e-06, "loss": 1.1217, "step": 590 }, { "epoch": 0.3108890057864282, "grad_norm": 2.0126006603240967, "learning_rate": 4.976768299248278e-06, "loss": 1.0719, "step": 591 }, { "epoch": 0.3114150447133088, "grad_norm": 1.965903639793396, "learning_rate": 4.97667372032081e-06, "loss": 1.0843, "step": 592 }, { "epoch": 0.31194108364018935, "grad_norm": 2.1280322074890137, "learning_rate": 4.976578950165742e-06, "loss": 1.0676, "step": 593 }, { "epoch": 0.31246712256707, "grad_norm": 2.2355756759643555, "learning_rate": 4.976483988790391e-06, "loss": 1.0855, "step": 594 }, { "epoch": 0.31299316149395057, "grad_norm": 2.153095245361328, "learning_rate": 4.976388836202088e-06, "loss": 1.0357, "step": 595 }, { "epoch": 0.31351920042083115, "grad_norm": 2.023137092590332, "learning_rate": 4.97629349240818e-06, "loss": 1.0381, "step": 596 }, { "epoch": 0.3140452393477117, "grad_norm": 2.2524759769439697, "learning_rate": 4.97619795741603e-06, "loss": 1.0911, "step": 597 }, { "epoch": 0.3145712782745923, "grad_norm": 2.1904008388519287, "learning_rate": 4.9761022312330135e-06, "loss": 1.047, "step": 598 }, { "epoch": 0.3150973172014729, "grad_norm": 2.3166565895080566, "learning_rate": 4.976006313866521e-06, "loss": 1.0663, "step": 599 }, { "epoch": 0.3156233561283535, "grad_norm": 2.11413836479187, "learning_rate": 4.975910205323959e-06, "loss": 1.0843, "step": 600 }, { "epoch": 0.3161493950552341, "grad_norm": 2.1609344482421875, "learning_rate": 4.975813905612749e-06, "loss": 1.1344, "step": 601 }, { "epoch": 0.3166754339821147, "grad_norm": 2.055330276489258, "learning_rate": 4.975717414740326e-06, "loss": 1.0663, "step": 602 }, { "epoch": 0.31720147290899525, "grad_norm": 2.2735755443573, "learning_rate": 4.975620732714139e-06, "loss": 1.1061, "step": 603 }, { "epoch": 0.31772751183587583, "grad_norm": 2.1966300010681152, "learning_rate": 4.975523859541654e-06, "loss": 1.1498, "step": 604 }, { "epoch": 0.31825355076275647, "grad_norm": 2.20951247215271, "learning_rate": 4.975426795230351e-06, "loss": 1.1057, "step": 605 }, { "epoch": 0.31877958968963704, "grad_norm": 2.0706050395965576, "learning_rate": 4.975329539787725e-06, "loss": 1.0906, "step": 606 }, { "epoch": 0.3193056286165176, "grad_norm": 2.0394089221954346, "learning_rate": 4.975232093221284e-06, "loss": 1.0514, "step": 607 }, { "epoch": 0.3198316675433982, "grad_norm": 2.1639111042022705, "learning_rate": 4.975134455538551e-06, "loss": 1.0787, "step": 608 }, { "epoch": 0.3203577064702788, "grad_norm": 2.025575876235962, "learning_rate": 4.975036626747067e-06, "loss": 1.0451, "step": 609 }, { "epoch": 0.3208837453971594, "grad_norm": 2.060215950012207, "learning_rate": 4.974938606854384e-06, "loss": 1.0821, "step": 610 }, { "epoch": 0.32140978432404, "grad_norm": 2.265155792236328, "learning_rate": 4.974840395868073e-06, "loss": 1.1341, "step": 611 }, { "epoch": 0.32193582325092057, "grad_norm": 2.22503924369812, "learning_rate": 4.974741993795712e-06, "loss": 1.1643, "step": 612 }, { "epoch": 0.32246186217780115, "grad_norm": 2.11155104637146, "learning_rate": 4.9746434006449034e-06, "loss": 1.0548, "step": 613 }, { "epoch": 0.32298790110468173, "grad_norm": 2.0055696964263916, "learning_rate": 4.974544616423258e-06, "loss": 1.0769, "step": 614 }, { "epoch": 0.3235139400315623, "grad_norm": 2.0843770503997803, "learning_rate": 4.974445641138403e-06, "loss": 1.0701, "step": 615 }, { "epoch": 0.32403997895844294, "grad_norm": 2.0580337047576904, "learning_rate": 4.9743464747979785e-06, "loss": 1.0465, "step": 616 }, { "epoch": 0.3245660178853235, "grad_norm": 2.3719844818115234, "learning_rate": 4.974247117409645e-06, "loss": 1.1498, "step": 617 }, { "epoch": 0.3250920568122041, "grad_norm": 1.9926241636276245, "learning_rate": 4.974147568981072e-06, "loss": 1.081, "step": 618 }, { "epoch": 0.3256180957390847, "grad_norm": 2.029318332672119, "learning_rate": 4.974047829519946e-06, "loss": 1.139, "step": 619 }, { "epoch": 0.32614413466596526, "grad_norm": 2.0171804428100586, "learning_rate": 4.973947899033969e-06, "loss": 1.0887, "step": 620 }, { "epoch": 0.3266701735928459, "grad_norm": 2.3209071159362793, "learning_rate": 4.973847777530854e-06, "loss": 1.1156, "step": 621 }, { "epoch": 0.32719621251972647, "grad_norm": 2.360849142074585, "learning_rate": 4.973747465018334e-06, "loss": 1.1305, "step": 622 }, { "epoch": 0.32772225144660705, "grad_norm": 2.1828086376190186, "learning_rate": 4.973646961504154e-06, "loss": 1.091, "step": 623 }, { "epoch": 0.32824829037348763, "grad_norm": 1.9628446102142334, "learning_rate": 4.973546266996074e-06, "loss": 1.0932, "step": 624 }, { "epoch": 0.3287743293003682, "grad_norm": 2.0040283203125, "learning_rate": 4.973445381501868e-06, "loss": 1.0723, "step": 625 }, { "epoch": 0.32930036822724884, "grad_norm": 2.289292097091675, "learning_rate": 4.973344305029326e-06, "loss": 1.1526, "step": 626 }, { "epoch": 0.3298264071541294, "grad_norm": 2.1106910705566406, "learning_rate": 4.973243037586252e-06, "loss": 1.1327, "step": 627 }, { "epoch": 0.33035244608101, "grad_norm": 2.326677083969116, "learning_rate": 4.9731415791804655e-06, "loss": 1.0898, "step": 628 }, { "epoch": 0.3308784850078906, "grad_norm": 2.086299180984497, "learning_rate": 4.9730399298198e-06, "loss": 1.0842, "step": 629 }, { "epoch": 0.33140452393477116, "grad_norm": 2.045738935470581, "learning_rate": 4.972938089512104e-06, "loss": 1.0156, "step": 630 }, { "epoch": 0.33193056286165173, "grad_norm": 2.038058280944824, "learning_rate": 4.97283605826524e-06, "loss": 1.0545, "step": 631 }, { "epoch": 0.33245660178853237, "grad_norm": 2.0892717838287354, "learning_rate": 4.972733836087088e-06, "loss": 1.099, "step": 632 }, { "epoch": 0.33298264071541295, "grad_norm": 2.2152934074401855, "learning_rate": 4.972631422985538e-06, "loss": 1.0775, "step": 633 }, { "epoch": 0.3335086796422935, "grad_norm": 2.3605494499206543, "learning_rate": 4.9725288189685e-06, "loss": 1.0682, "step": 634 }, { "epoch": 0.3340347185691741, "grad_norm": 2.076491117477417, "learning_rate": 4.9724260240438945e-06, "loss": 1.063, "step": 635 }, { "epoch": 0.3345607574960547, "grad_norm": 3.2677767276763916, "learning_rate": 4.97232303821966e-06, "loss": 1.1173, "step": 636 }, { "epoch": 0.3350867964229353, "grad_norm": 2.110320568084717, "learning_rate": 4.972219861503746e-06, "loss": 1.0264, "step": 637 }, { "epoch": 0.3356128353498159, "grad_norm": 2.101353406906128, "learning_rate": 4.972116493904121e-06, "loss": 1.0806, "step": 638 }, { "epoch": 0.3361388742766965, "grad_norm": 2.247091293334961, "learning_rate": 4.972012935428765e-06, "loss": 1.1178, "step": 639 }, { "epoch": 0.33666491320357705, "grad_norm": 2.183757781982422, "learning_rate": 4.971909186085675e-06, "loss": 1.0615, "step": 640 }, { "epoch": 0.33719095213045763, "grad_norm": 2.0801236629486084, "learning_rate": 4.97180524588286e-06, "loss": 1.0441, "step": 641 }, { "epoch": 0.33771699105733827, "grad_norm": 1.9939873218536377, "learning_rate": 4.9717011148283455e-06, "loss": 1.0853, "step": 642 }, { "epoch": 0.33824302998421885, "grad_norm": 2.13399338722229, "learning_rate": 4.971596792930174e-06, "loss": 0.9943, "step": 643 }, { "epoch": 0.3387690689110994, "grad_norm": 2.1221766471862793, "learning_rate": 4.971492280196397e-06, "loss": 1.0088, "step": 644 }, { "epoch": 0.33929510783798, "grad_norm": 2.023320436477661, "learning_rate": 4.971387576635087e-06, "loss": 1.0449, "step": 645 }, { "epoch": 0.3398211467648606, "grad_norm": 2.1422126293182373, "learning_rate": 4.971282682254327e-06, "loss": 1.0987, "step": 646 }, { "epoch": 0.3403471856917412, "grad_norm": 2.136868715286255, "learning_rate": 4.971177597062215e-06, "loss": 1.0983, "step": 647 }, { "epoch": 0.3408732246186218, "grad_norm": 2.1036930084228516, "learning_rate": 4.971072321066868e-06, "loss": 1.1284, "step": 648 }, { "epoch": 0.3413992635455024, "grad_norm": 2.147191286087036, "learning_rate": 4.970966854276411e-06, "loss": 1.1165, "step": 649 }, { "epoch": 0.34192530247238295, "grad_norm": 2.1734893321990967, "learning_rate": 4.970861196698988e-06, "loss": 1.0834, "step": 650 }, { "epoch": 0.34245134139926353, "grad_norm": 2.038435459136963, "learning_rate": 4.97075534834276e-06, "loss": 1.0193, "step": 651 }, { "epoch": 0.3429773803261441, "grad_norm": 2.077822208404541, "learning_rate": 4.970649309215895e-06, "loss": 1.0697, "step": 652 }, { "epoch": 0.34350341925302474, "grad_norm": 2.056907892227173, "learning_rate": 4.970543079326584e-06, "loss": 1.0593, "step": 653 }, { "epoch": 0.3440294581799053, "grad_norm": 2.7795369625091553, "learning_rate": 4.9704366586830275e-06, "loss": 1.122, "step": 654 }, { "epoch": 0.3445554971067859, "grad_norm": 2.0807559490203857, "learning_rate": 4.970330047293443e-06, "loss": 1.0225, "step": 655 }, { "epoch": 0.3450815360336665, "grad_norm": 2.219024658203125, "learning_rate": 4.970223245166062e-06, "loss": 1.1506, "step": 656 }, { "epoch": 0.34560757496054706, "grad_norm": 2.1809475421905518, "learning_rate": 4.970116252309131e-06, "loss": 1.1094, "step": 657 }, { "epoch": 0.3461336138874277, "grad_norm": 2.243777275085449, "learning_rate": 4.970009068730911e-06, "loss": 1.0942, "step": 658 }, { "epoch": 0.3466596528143083, "grad_norm": 2.106391191482544, "learning_rate": 4.969901694439677e-06, "loss": 1.0899, "step": 659 }, { "epoch": 0.34718569174118885, "grad_norm": 2.1109979152679443, "learning_rate": 4.96979412944372e-06, "loss": 1.0622, "step": 660 }, { "epoch": 0.34771173066806943, "grad_norm": 2.292466163635254, "learning_rate": 4.969686373751347e-06, "loss": 1.1081, "step": 661 }, { "epoch": 0.34823776959495, "grad_norm": 1.9919096231460571, "learning_rate": 4.9695784273708755e-06, "loss": 1.0774, "step": 662 }, { "epoch": 0.34876380852183064, "grad_norm": 2.2421789169311523, "learning_rate": 4.969470290310641e-06, "loss": 1.0958, "step": 663 }, { "epoch": 0.3492898474487112, "grad_norm": 2.069939613342285, "learning_rate": 4.969361962578994e-06, "loss": 1.0758, "step": 664 }, { "epoch": 0.3498158863755918, "grad_norm": 2.0892951488494873, "learning_rate": 4.969253444184297e-06, "loss": 1.105, "step": 665 }, { "epoch": 0.3503419253024724, "grad_norm": 2.1536753177642822, "learning_rate": 4.969144735134929e-06, "loss": 1.0655, "step": 666 }, { "epoch": 0.35086796422935296, "grad_norm": 2.031996250152588, "learning_rate": 4.969035835439284e-06, "loss": 1.1107, "step": 667 }, { "epoch": 0.35139400315623354, "grad_norm": 2.068693161010742, "learning_rate": 4.9689267451057714e-06, "loss": 1.0293, "step": 668 }, { "epoch": 0.35192004208311417, "grad_norm": 2.1489906311035156, "learning_rate": 4.9688174641428136e-06, "loss": 1.0656, "step": 669 }, { "epoch": 0.35244608100999475, "grad_norm": 2.5132720470428467, "learning_rate": 4.9687079925588475e-06, "loss": 1.0558, "step": 670 }, { "epoch": 0.35297211993687533, "grad_norm": 1.9639642238616943, "learning_rate": 4.968598330362326e-06, "loss": 1.0498, "step": 671 }, { "epoch": 0.3534981588637559, "grad_norm": 2.2413175106048584, "learning_rate": 4.968488477561716e-06, "loss": 0.986, "step": 672 }, { "epoch": 0.3540241977906365, "grad_norm": 2.0109381675720215, "learning_rate": 4.968378434165501e-06, "loss": 1.1112, "step": 673 }, { "epoch": 0.3545502367175171, "grad_norm": 2.1863934993743896, "learning_rate": 4.968268200182175e-06, "loss": 1.0843, "step": 674 }, { "epoch": 0.3550762756443977, "grad_norm": 2.262173652648926, "learning_rate": 4.968157775620252e-06, "loss": 1.0938, "step": 675 }, { "epoch": 0.3556023145712783, "grad_norm": 2.261918067932129, "learning_rate": 4.968047160488256e-06, "loss": 1.1004, "step": 676 }, { "epoch": 0.35612835349815886, "grad_norm": 2.13324236869812, "learning_rate": 4.967936354794728e-06, "loss": 1.0881, "step": 677 }, { "epoch": 0.35665439242503943, "grad_norm": 2.271207809448242, "learning_rate": 4.967825358548225e-06, "loss": 1.0967, "step": 678 }, { "epoch": 0.35718043135192007, "grad_norm": 2.177339553833008, "learning_rate": 4.967714171757315e-06, "loss": 1.1131, "step": 679 }, { "epoch": 0.35770647027880065, "grad_norm": 2.1329848766326904, "learning_rate": 4.967602794430585e-06, "loss": 1.112, "step": 680 }, { "epoch": 0.3582325092056812, "grad_norm": 2.0018250942230225, "learning_rate": 4.967491226576634e-06, "loss": 1.0853, "step": 681 }, { "epoch": 0.3587585481325618, "grad_norm": 2.06925106048584, "learning_rate": 4.967379468204075e-06, "loss": 1.1405, "step": 682 }, { "epoch": 0.3592845870594424, "grad_norm": 2.0437614917755127, "learning_rate": 4.967267519321538e-06, "loss": 1.1165, "step": 683 }, { "epoch": 0.35981062598632296, "grad_norm": 2.043297290802002, "learning_rate": 4.9671553799376685e-06, "loss": 1.0438, "step": 684 }, { "epoch": 0.3603366649132036, "grad_norm": 2.060760259628296, "learning_rate": 4.967043050061121e-06, "loss": 1.0401, "step": 685 }, { "epoch": 0.3608627038400842, "grad_norm": 2.3929009437561035, "learning_rate": 4.966930529700572e-06, "loss": 1.0812, "step": 686 }, { "epoch": 0.36138874276696475, "grad_norm": 2.2057461738586426, "learning_rate": 4.966817818864708e-06, "loss": 1.0499, "step": 687 }, { "epoch": 0.36191478169384533, "grad_norm": 2.0358550548553467, "learning_rate": 4.966704917562231e-06, "loss": 1.1603, "step": 688 }, { "epoch": 0.3624408206207259, "grad_norm": 2.0840682983398438, "learning_rate": 4.966591825801859e-06, "loss": 1.0967, "step": 689 }, { "epoch": 0.36296685954760655, "grad_norm": 2.0170061588287354, "learning_rate": 4.9664785435923255e-06, "loss": 1.0573, "step": 690 }, { "epoch": 0.3634928984744871, "grad_norm": 2.1349408626556396, "learning_rate": 4.966365070942375e-06, "loss": 1.0665, "step": 691 }, { "epoch": 0.3640189374013677, "grad_norm": 2.1616368293762207, "learning_rate": 4.966251407860769e-06, "loss": 1.0306, "step": 692 }, { "epoch": 0.3645449763282483, "grad_norm": 2.2529335021972656, "learning_rate": 4.966137554356285e-06, "loss": 1.0445, "step": 693 }, { "epoch": 0.36507101525512886, "grad_norm": 2.041102170944214, "learning_rate": 4.966023510437713e-06, "loss": 1.0395, "step": 694 }, { "epoch": 0.3655970541820095, "grad_norm": 2.0450620651245117, "learning_rate": 4.9659092761138585e-06, "loss": 1.064, "step": 695 }, { "epoch": 0.3661230931088901, "grad_norm": 2.163081407546997, "learning_rate": 4.965794851393541e-06, "loss": 1.0729, "step": 696 }, { "epoch": 0.36664913203577065, "grad_norm": 2.1602089405059814, "learning_rate": 4.965680236285596e-06, "loss": 1.0707, "step": 697 }, { "epoch": 0.36717517096265123, "grad_norm": 2.3263938426971436, "learning_rate": 4.965565430798875e-06, "loss": 1.0146, "step": 698 }, { "epoch": 0.3677012098895318, "grad_norm": 2.0192365646362305, "learning_rate": 4.965450434942238e-06, "loss": 1.0751, "step": 699 }, { "epoch": 0.3682272488164124, "grad_norm": 2.0557174682617188, "learning_rate": 4.965335248724568e-06, "loss": 1.0749, "step": 700 }, { "epoch": 0.368753287743293, "grad_norm": 2.29679799079895, "learning_rate": 4.965219872154757e-06, "loss": 1.0516, "step": 701 }, { "epoch": 0.3692793266701736, "grad_norm": 2.2303829193115234, "learning_rate": 4.965104305241713e-06, "loss": 1.1586, "step": 702 }, { "epoch": 0.3698053655970542, "grad_norm": 2.112283706665039, "learning_rate": 4.964988547994361e-06, "loss": 1.0833, "step": 703 }, { "epoch": 0.37033140452393476, "grad_norm": 2.1807613372802734, "learning_rate": 4.9648726004216354e-06, "loss": 1.0786, "step": 704 }, { "epoch": 0.37085744345081534, "grad_norm": 2.0990889072418213, "learning_rate": 4.964756462532492e-06, "loss": 1.0555, "step": 705 }, { "epoch": 0.371383482377696, "grad_norm": 2.2034318447113037, "learning_rate": 4.964640134335896e-06, "loss": 1.0696, "step": 706 }, { "epoch": 0.37190952130457655, "grad_norm": 2.207235813140869, "learning_rate": 4.964523615840831e-06, "loss": 1.0897, "step": 707 }, { "epoch": 0.37243556023145713, "grad_norm": 1.8820483684539795, "learning_rate": 4.964406907056291e-06, "loss": 1.0822, "step": 708 }, { "epoch": 0.3729615991583377, "grad_norm": 2.2243785858154297, "learning_rate": 4.964290007991291e-06, "loss": 1.0958, "step": 709 }, { "epoch": 0.3734876380852183, "grad_norm": 2.208770990371704, "learning_rate": 4.964172918654854e-06, "loss": 1.0803, "step": 710 }, { "epoch": 0.3740136770120989, "grad_norm": 2.1083521842956543, "learning_rate": 4.96405563905602e-06, "loss": 1.0513, "step": 711 }, { "epoch": 0.3745397159389795, "grad_norm": 2.0161774158477783, "learning_rate": 4.963938169203847e-06, "loss": 1.0775, "step": 712 }, { "epoch": 0.3750657548658601, "grad_norm": 2.1578962802886963, "learning_rate": 4.963820509107403e-06, "loss": 1.0695, "step": 713 }, { "epoch": 0.37559179379274066, "grad_norm": 2.1972339153289795, "learning_rate": 4.963702658775774e-06, "loss": 1.0703, "step": 714 }, { "epoch": 0.37611783271962124, "grad_norm": 2.338205575942993, "learning_rate": 4.9635846182180594e-06, "loss": 1.0756, "step": 715 }, { "epoch": 0.3766438716465018, "grad_norm": 2.281242847442627, "learning_rate": 4.963466387443372e-06, "loss": 1.1177, "step": 716 }, { "epoch": 0.37716991057338245, "grad_norm": 2.092036724090576, "learning_rate": 4.963347966460841e-06, "loss": 1.1004, "step": 717 }, { "epoch": 0.37769594950026303, "grad_norm": 2.148244857788086, "learning_rate": 4.963229355279611e-06, "loss": 1.1157, "step": 718 }, { "epoch": 0.3782219884271436, "grad_norm": 1.9961777925491333, "learning_rate": 4.963110553908838e-06, "loss": 1.0703, "step": 719 }, { "epoch": 0.3787480273540242, "grad_norm": 2.299091339111328, "learning_rate": 4.962991562357697e-06, "loss": 1.1265, "step": 720 }, { "epoch": 0.37927406628090476, "grad_norm": 2.1055006980895996, "learning_rate": 4.962872380635374e-06, "loss": 1.0361, "step": 721 }, { "epoch": 0.3798001052077854, "grad_norm": 2.1554667949676514, "learning_rate": 4.9627530087510725e-06, "loss": 1.0603, "step": 722 }, { "epoch": 0.380326144134666, "grad_norm": 2.1003949642181396, "learning_rate": 4.962633446714009e-06, "loss": 1.0714, "step": 723 }, { "epoch": 0.38085218306154656, "grad_norm": 2.1850736141204834, "learning_rate": 4.962513694533414e-06, "loss": 1.0795, "step": 724 }, { "epoch": 0.38137822198842714, "grad_norm": 2.0440175533294678, "learning_rate": 4.962393752218535e-06, "loss": 1.0882, "step": 725 }, { "epoch": 0.3819042609153077, "grad_norm": 2.2579755783081055, "learning_rate": 4.962273619778632e-06, "loss": 1.1066, "step": 726 }, { "epoch": 0.38243029984218835, "grad_norm": 2.0210318565368652, "learning_rate": 4.962153297222981e-06, "loss": 1.0843, "step": 727 }, { "epoch": 0.3829563387690689, "grad_norm": 2.1218135356903076, "learning_rate": 4.962032784560873e-06, "loss": 1.1039, "step": 728 }, { "epoch": 0.3834823776959495, "grad_norm": 2.2498831748962402, "learning_rate": 4.961912081801612e-06, "loss": 1.0389, "step": 729 }, { "epoch": 0.3840084166228301, "grad_norm": 2.6789276599884033, "learning_rate": 4.9617911889545175e-06, "loss": 1.0772, "step": 730 }, { "epoch": 0.38453445554971066, "grad_norm": 1.9847339391708374, "learning_rate": 4.961670106028924e-06, "loss": 1.0804, "step": 731 }, { "epoch": 0.38506049447659124, "grad_norm": 2.048737049102783, "learning_rate": 4.9615488330341814e-06, "loss": 1.1089, "step": 732 }, { "epoch": 0.3855865334034719, "grad_norm": 2.2241313457489014, "learning_rate": 4.961427369979652e-06, "loss": 1.0618, "step": 733 }, { "epoch": 0.38611257233035245, "grad_norm": 1.9084025621414185, "learning_rate": 4.961305716874716e-06, "loss": 1.0316, "step": 734 }, { "epoch": 0.38663861125723303, "grad_norm": 2.0064773559570312, "learning_rate": 4.9611838737287646e-06, "loss": 1.0289, "step": 735 }, { "epoch": 0.3871646501841136, "grad_norm": 2.386962652206421, "learning_rate": 4.961061840551205e-06, "loss": 1.1488, "step": 736 }, { "epoch": 0.3876906891109942, "grad_norm": 2.0626862049102783, "learning_rate": 4.960939617351462e-06, "loss": 1.0793, "step": 737 }, { "epoch": 0.3882167280378748, "grad_norm": 2.1622767448425293, "learning_rate": 4.960817204138971e-06, "loss": 1.0923, "step": 738 }, { "epoch": 0.3887427669647554, "grad_norm": 2.049163818359375, "learning_rate": 4.9606946009231834e-06, "loss": 1.0423, "step": 739 }, { "epoch": 0.389268805891636, "grad_norm": 2.0196399688720703, "learning_rate": 4.960571807713568e-06, "loss": 0.9832, "step": 740 }, { "epoch": 0.38979484481851656, "grad_norm": 1.982647180557251, "learning_rate": 4.960448824519602e-06, "loss": 1.0424, "step": 741 }, { "epoch": 0.39032088374539714, "grad_norm": 2.0468926429748535, "learning_rate": 4.960325651350784e-06, "loss": 1.074, "step": 742 }, { "epoch": 0.3908469226722778, "grad_norm": 2.402381181716919, "learning_rate": 4.960202288216624e-06, "loss": 1.058, "step": 743 }, { "epoch": 0.39137296159915835, "grad_norm": 2.065232753753662, "learning_rate": 4.960078735126646e-06, "loss": 1.0985, "step": 744 }, { "epoch": 0.39189900052603893, "grad_norm": 2.1949756145477295, "learning_rate": 4.95995499209039e-06, "loss": 1.0791, "step": 745 }, { "epoch": 0.3924250394529195, "grad_norm": 2.121232271194458, "learning_rate": 4.959831059117411e-06, "loss": 1.0606, "step": 746 }, { "epoch": 0.3929510783798001, "grad_norm": 2.247145652770996, "learning_rate": 4.959706936217278e-06, "loss": 1.0991, "step": 747 }, { "epoch": 0.39347711730668067, "grad_norm": 2.0540339946746826, "learning_rate": 4.9595826233995735e-06, "loss": 1.0835, "step": 748 }, { "epoch": 0.3940031562335613, "grad_norm": 2.173257350921631, "learning_rate": 4.959458120673898e-06, "loss": 1.0588, "step": 749 }, { "epoch": 0.3945291951604419, "grad_norm": 2.1530778408050537, "learning_rate": 4.959333428049862e-06, "loss": 1.0395, "step": 750 }, { "epoch": 0.39505523408732246, "grad_norm": 2.0705490112304688, "learning_rate": 4.959208545537095e-06, "loss": 1.071, "step": 751 }, { "epoch": 0.39558127301420304, "grad_norm": 1.9439338445663452, "learning_rate": 4.95908347314524e-06, "loss": 1.0224, "step": 752 }, { "epoch": 0.3961073119410836, "grad_norm": 2.1683454513549805, "learning_rate": 4.958958210883952e-06, "loss": 1.0745, "step": 753 }, { "epoch": 0.39663335086796425, "grad_norm": 2.2809042930603027, "learning_rate": 4.958832758762903e-06, "loss": 1.0887, "step": 754 }, { "epoch": 0.39715938979484483, "grad_norm": 2.161447048187256, "learning_rate": 4.9587071167917814e-06, "loss": 1.1447, "step": 755 }, { "epoch": 0.3976854287217254, "grad_norm": 2.1375932693481445, "learning_rate": 4.958581284980285e-06, "loss": 1.0295, "step": 756 }, { "epoch": 0.398211467648606, "grad_norm": 2.0431041717529297, "learning_rate": 4.958455263338133e-06, "loss": 1.0567, "step": 757 }, { "epoch": 0.39873750657548657, "grad_norm": 2.0288238525390625, "learning_rate": 4.958329051875053e-06, "loss": 1.0736, "step": 758 }, { "epoch": 0.3992635455023672, "grad_norm": 2.146132230758667, "learning_rate": 4.958202650600791e-06, "loss": 1.0744, "step": 759 }, { "epoch": 0.3997895844292478, "grad_norm": 2.1740963459014893, "learning_rate": 4.958076059525107e-06, "loss": 1.0263, "step": 760 }, { "epoch": 0.40031562335612836, "grad_norm": 2.1219875812530518, "learning_rate": 4.957949278657773e-06, "loss": 1.0508, "step": 761 }, { "epoch": 0.40084166228300894, "grad_norm": 2.0742340087890625, "learning_rate": 4.9578223080085815e-06, "loss": 1.0455, "step": 762 }, { "epoch": 0.4013677012098895, "grad_norm": 2.1779415607452393, "learning_rate": 4.957695147587334e-06, "loss": 1.1079, "step": 763 }, { "epoch": 0.4018937401367701, "grad_norm": 2.151047706604004, "learning_rate": 4.957567797403848e-06, "loss": 1.0893, "step": 764 }, { "epoch": 0.40241977906365073, "grad_norm": 2.1728570461273193, "learning_rate": 4.9574402574679594e-06, "loss": 1.0726, "step": 765 }, { "epoch": 0.4029458179905313, "grad_norm": 1.982230305671692, "learning_rate": 4.957312527789512e-06, "loss": 1.0629, "step": 766 }, { "epoch": 0.4034718569174119, "grad_norm": 1.953464150428772, "learning_rate": 4.95718460837837e-06, "loss": 1.1093, "step": 767 }, { "epoch": 0.40399789584429247, "grad_norm": 1.9718215465545654, "learning_rate": 4.9570564992444116e-06, "loss": 1.1018, "step": 768 }, { "epoch": 0.40452393477117304, "grad_norm": 2.067629337310791, "learning_rate": 4.956928200397526e-06, "loss": 1.0364, "step": 769 }, { "epoch": 0.4050499736980537, "grad_norm": 2.1172022819519043, "learning_rate": 4.956799711847619e-06, "loss": 1.0693, "step": 770 }, { "epoch": 0.40557601262493426, "grad_norm": 2.0539615154266357, "learning_rate": 4.956671033604613e-06, "loss": 1.0034, "step": 771 }, { "epoch": 0.40610205155181484, "grad_norm": 1.9780375957489014, "learning_rate": 4.956542165678443e-06, "loss": 1.0515, "step": 772 }, { "epoch": 0.4066280904786954, "grad_norm": 2.0974819660186768, "learning_rate": 4.95641310807906e-06, "loss": 1.0754, "step": 773 }, { "epoch": 0.407154129405576, "grad_norm": 2.1018221378326416, "learning_rate": 4.956283860816427e-06, "loss": 1.1102, "step": 774 }, { "epoch": 0.4076801683324566, "grad_norm": 2.3969085216522217, "learning_rate": 4.9561544239005235e-06, "loss": 1.0455, "step": 775 }, { "epoch": 0.4082062072593372, "grad_norm": 2.2645649909973145, "learning_rate": 4.956024797341345e-06, "loss": 0.9724, "step": 776 }, { "epoch": 0.4087322461862178, "grad_norm": 2.3406150341033936, "learning_rate": 4.955894981148898e-06, "loss": 1.1341, "step": 777 }, { "epoch": 0.40925828511309836, "grad_norm": 2.0782880783081055, "learning_rate": 4.955764975333208e-06, "loss": 1.0116, "step": 778 }, { "epoch": 0.40978432403997894, "grad_norm": 2.1269314289093018, "learning_rate": 4.955634779904312e-06, "loss": 1.0967, "step": 779 }, { "epoch": 0.4103103629668595, "grad_norm": 2.198559522628784, "learning_rate": 4.9555043948722625e-06, "loss": 1.0815, "step": 780 }, { "epoch": 0.41083640189374016, "grad_norm": 2.2189719676971436, "learning_rate": 4.9553738202471264e-06, "loss": 1.0559, "step": 781 }, { "epoch": 0.41136244082062073, "grad_norm": 2.2313179969787598, "learning_rate": 4.955243056038986e-06, "loss": 1.046, "step": 782 }, { "epoch": 0.4118884797475013, "grad_norm": 1.9563003778457642, "learning_rate": 4.955112102257939e-06, "loss": 1.0735, "step": 783 }, { "epoch": 0.4124145186743819, "grad_norm": 1.99479341506958, "learning_rate": 4.954980958914093e-06, "loss": 1.0657, "step": 784 }, { "epoch": 0.41294055760126247, "grad_norm": 2.029634714126587, "learning_rate": 4.954849626017577e-06, "loss": 1.0811, "step": 785 }, { "epoch": 0.4134665965281431, "grad_norm": 2.2947723865509033, "learning_rate": 4.9547181035785314e-06, "loss": 1.0807, "step": 786 }, { "epoch": 0.4139926354550237, "grad_norm": 2.0323445796966553, "learning_rate": 4.9545863916071094e-06, "loss": 1.0715, "step": 787 }, { "epoch": 0.41451867438190426, "grad_norm": 2.0068464279174805, "learning_rate": 4.954454490113482e-06, "loss": 1.0447, "step": 788 }, { "epoch": 0.41504471330878484, "grad_norm": 2.132549285888672, "learning_rate": 4.954322399107833e-06, "loss": 1.0454, "step": 789 }, { "epoch": 0.4155707522356654, "grad_norm": 2.0086755752563477, "learning_rate": 4.954190118600361e-06, "loss": 1.0724, "step": 790 }, { "epoch": 0.41609679116254605, "grad_norm": 2.1461241245269775, "learning_rate": 4.95405764860128e-06, "loss": 1.0391, "step": 791 }, { "epoch": 0.41662283008942663, "grad_norm": 2.1352107524871826, "learning_rate": 4.953924989120818e-06, "loss": 0.9898, "step": 792 }, { "epoch": 0.4171488690163072, "grad_norm": 2.0694406032562256, "learning_rate": 4.953792140169219e-06, "loss": 1.0819, "step": 793 }, { "epoch": 0.4176749079431878, "grad_norm": 2.088433027267456, "learning_rate": 4.953659101756739e-06, "loss": 1.0833, "step": 794 }, { "epoch": 0.41820094687006837, "grad_norm": 2.1760306358337402, "learning_rate": 4.95352587389365e-06, "loss": 1.0535, "step": 795 }, { "epoch": 0.41872698579694895, "grad_norm": 2.2031099796295166, "learning_rate": 4.95339245659024e-06, "loss": 1.0389, "step": 796 }, { "epoch": 0.4192530247238296, "grad_norm": 2.247276782989502, "learning_rate": 4.953258849856809e-06, "loss": 1.0839, "step": 797 }, { "epoch": 0.41977906365071016, "grad_norm": 2.24357271194458, "learning_rate": 4.953125053703674e-06, "loss": 1.0666, "step": 798 }, { "epoch": 0.42030510257759074, "grad_norm": 2.240151882171631, "learning_rate": 4.952991068141165e-06, "loss": 1.1009, "step": 799 }, { "epoch": 0.4208311415044713, "grad_norm": 2.2172327041625977, "learning_rate": 4.952856893179628e-06, "loss": 1.0928, "step": 800 }, { "epoch": 0.4213571804313519, "grad_norm": 2.377336025238037, "learning_rate": 4.952722528829422e-06, "loss": 1.0968, "step": 801 }, { "epoch": 0.42188321935823253, "grad_norm": 2.466841459274292, "learning_rate": 4.9525879751009205e-06, "loss": 1.0631, "step": 802 }, { "epoch": 0.4224092582851131, "grad_norm": 2.035644054412842, "learning_rate": 4.952453232004516e-06, "loss": 1.0609, "step": 803 }, { "epoch": 0.4229352972119937, "grad_norm": 2.2472054958343506, "learning_rate": 4.952318299550608e-06, "loss": 1.0613, "step": 804 }, { "epoch": 0.42346133613887427, "grad_norm": 2.175999879837036, "learning_rate": 4.952183177749618e-06, "loss": 1.0954, "step": 805 }, { "epoch": 0.42398737506575485, "grad_norm": 2.206052303314209, "learning_rate": 4.952047866611978e-06, "loss": 1.0965, "step": 806 }, { "epoch": 0.4245134139926355, "grad_norm": 1.9550546407699585, "learning_rate": 4.951912366148135e-06, "loss": 1.0835, "step": 807 }, { "epoch": 0.42503945291951606, "grad_norm": 2.194734811782837, "learning_rate": 4.951776676368552e-06, "loss": 1.1179, "step": 808 }, { "epoch": 0.42556549184639664, "grad_norm": 2.094862222671509, "learning_rate": 4.951640797283704e-06, "loss": 1.0634, "step": 809 }, { "epoch": 0.4260915307732772, "grad_norm": 1.980043888092041, "learning_rate": 4.951504728904085e-06, "loss": 1.0874, "step": 810 }, { "epoch": 0.4266175697001578, "grad_norm": 2.2654919624328613, "learning_rate": 4.9513684712402e-06, "loss": 1.057, "step": 811 }, { "epoch": 0.4271436086270384, "grad_norm": 2.197120189666748, "learning_rate": 4.951232024302569e-06, "loss": 1.1114, "step": 812 }, { "epoch": 0.427669647553919, "grad_norm": 2.143324375152588, "learning_rate": 4.9510953881017275e-06, "loss": 1.07, "step": 813 }, { "epoch": 0.4281956864807996, "grad_norm": 2.1920077800750732, "learning_rate": 4.950958562648226e-06, "loss": 1.0373, "step": 814 }, { "epoch": 0.42872172540768017, "grad_norm": 2.0401923656463623, "learning_rate": 4.950821547952629e-06, "loss": 1.1111, "step": 815 }, { "epoch": 0.42924776433456074, "grad_norm": 1.9541674852371216, "learning_rate": 4.950684344025515e-06, "loss": 1.0153, "step": 816 }, { "epoch": 0.4297738032614413, "grad_norm": 3.5096704959869385, "learning_rate": 4.9505469508774776e-06, "loss": 1.0435, "step": 817 }, { "epoch": 0.43029984218832196, "grad_norm": 2.0304462909698486, "learning_rate": 4.9504093685191255e-06, "loss": 1.0786, "step": 818 }, { "epoch": 0.43082588111520254, "grad_norm": 2.115224599838257, "learning_rate": 4.950271596961082e-06, "loss": 1.0854, "step": 819 }, { "epoch": 0.4313519200420831, "grad_norm": 2.176621913909912, "learning_rate": 4.950133636213984e-06, "loss": 0.9909, "step": 820 }, { "epoch": 0.4318779589689637, "grad_norm": 2.2046449184417725, "learning_rate": 4.949995486288484e-06, "loss": 1.0688, "step": 821 }, { "epoch": 0.43240399789584427, "grad_norm": 2.1462888717651367, "learning_rate": 4.949857147195249e-06, "loss": 1.0644, "step": 822 }, { "epoch": 0.4329300368227249, "grad_norm": 2.0735347270965576, "learning_rate": 4.94971861894496e-06, "loss": 1.022, "step": 823 }, { "epoch": 0.4334560757496055, "grad_norm": 2.086724042892456, "learning_rate": 4.949579901548312e-06, "loss": 1.02, "step": 824 }, { "epoch": 0.43398211467648606, "grad_norm": 2.078622341156006, "learning_rate": 4.949440995016018e-06, "loss": 1.0653, "step": 825 }, { "epoch": 0.43450815360336664, "grad_norm": 2.1504440307617188, "learning_rate": 4.949301899358801e-06, "loss": 1.0708, "step": 826 }, { "epoch": 0.4350341925302472, "grad_norm": 2.2340216636657715, "learning_rate": 4.949162614587401e-06, "loss": 1.0688, "step": 827 }, { "epoch": 0.4355602314571278, "grad_norm": 2.2017569541931152, "learning_rate": 4.949023140712574e-06, "loss": 1.0935, "step": 828 }, { "epoch": 0.43608627038400843, "grad_norm": 2.117745876312256, "learning_rate": 4.948883477745088e-06, "loss": 1.0868, "step": 829 }, { "epoch": 0.436612309310889, "grad_norm": 2.0983524322509766, "learning_rate": 4.948743625695726e-06, "loss": 1.0695, "step": 830 }, { "epoch": 0.4371383482377696, "grad_norm": 2.205693244934082, "learning_rate": 4.948603584575287e-06, "loss": 1.0541, "step": 831 }, { "epoch": 0.43766438716465017, "grad_norm": 1.9967527389526367, "learning_rate": 4.948463354394583e-06, "loss": 0.9933, "step": 832 }, { "epoch": 0.43819042609153075, "grad_norm": 2.113577127456665, "learning_rate": 4.948322935164442e-06, "loss": 1.0199, "step": 833 }, { "epoch": 0.4387164650184114, "grad_norm": 2.0825533866882324, "learning_rate": 4.948182326895705e-06, "loss": 1.0446, "step": 834 }, { "epoch": 0.43924250394529196, "grad_norm": 2.0186421871185303, "learning_rate": 4.94804152959923e-06, "loss": 1.0798, "step": 835 }, { "epoch": 0.43976854287217254, "grad_norm": 2.3025147914886475, "learning_rate": 4.947900543285888e-06, "loss": 0.9977, "step": 836 }, { "epoch": 0.4402945817990531, "grad_norm": 2.1662867069244385, "learning_rate": 4.947759367966564e-06, "loss": 1.048, "step": 837 }, { "epoch": 0.4408206207259337, "grad_norm": 2.0708656311035156, "learning_rate": 4.947618003652158e-06, "loss": 1.0715, "step": 838 }, { "epoch": 0.44134665965281433, "grad_norm": 2.2494263648986816, "learning_rate": 4.947476450353586e-06, "loss": 1.0901, "step": 839 }, { "epoch": 0.4418726985796949, "grad_norm": 2.3319430351257324, "learning_rate": 4.947334708081777e-06, "loss": 1.0308, "step": 840 }, { "epoch": 0.4423987375065755, "grad_norm": 2.134620428085327, "learning_rate": 4.947192776847676e-06, "loss": 1.0459, "step": 841 }, { "epoch": 0.44292477643345607, "grad_norm": 2.075429916381836, "learning_rate": 4.94705065666224e-06, "loss": 1.0733, "step": 842 }, { "epoch": 0.44345081536033665, "grad_norm": 2.173069953918457, "learning_rate": 4.946908347536444e-06, "loss": 1.1092, "step": 843 }, { "epoch": 0.4439768542872172, "grad_norm": 2.1481893062591553, "learning_rate": 4.946765849481274e-06, "loss": 1.0822, "step": 844 }, { "epoch": 0.44450289321409786, "grad_norm": 2.247277021408081, "learning_rate": 4.9466231625077354e-06, "loss": 1.0777, "step": 845 }, { "epoch": 0.44502893214097844, "grad_norm": 2.1181042194366455, "learning_rate": 4.946480286626842e-06, "loss": 1.1139, "step": 846 }, { "epoch": 0.445554971067859, "grad_norm": 2.05195951461792, "learning_rate": 4.946337221849628e-06, "loss": 1.0738, "step": 847 }, { "epoch": 0.4460810099947396, "grad_norm": 2.122732639312744, "learning_rate": 4.946193968187139e-06, "loss": 1.061, "step": 848 }, { "epoch": 0.4466070489216202, "grad_norm": 1.8827515840530396, "learning_rate": 4.946050525650434e-06, "loss": 1.061, "step": 849 }, { "epoch": 0.4471330878485008, "grad_norm": 2.3874471187591553, "learning_rate": 4.945906894250591e-06, "loss": 1.0667, "step": 850 }, { "epoch": 0.4476591267753814, "grad_norm": 2.274724006652832, "learning_rate": 4.945763073998699e-06, "loss": 1.0559, "step": 851 }, { "epoch": 0.44818516570226197, "grad_norm": 2.2730906009674072, "learning_rate": 4.945619064905861e-06, "loss": 1.0952, "step": 852 }, { "epoch": 0.44871120462914255, "grad_norm": 2.190969944000244, "learning_rate": 4.945474866983199e-06, "loss": 1.0816, "step": 853 }, { "epoch": 0.4492372435560231, "grad_norm": 3.6214282512664795, "learning_rate": 4.945330480241844e-06, "loss": 1.09, "step": 854 }, { "epoch": 0.44976328248290376, "grad_norm": 2.0487356185913086, "learning_rate": 4.945185904692946e-06, "loss": 1.0279, "step": 855 }, { "epoch": 0.45028932140978434, "grad_norm": 2.074282646179199, "learning_rate": 4.945041140347669e-06, "loss": 1.0514, "step": 856 }, { "epoch": 0.4508153603366649, "grad_norm": 2.126495838165283, "learning_rate": 4.944896187217187e-06, "loss": 1.0819, "step": 857 }, { "epoch": 0.4513413992635455, "grad_norm": 2.0265605449676514, "learning_rate": 4.944751045312695e-06, "loss": 1.0282, "step": 858 }, { "epoch": 0.4518674381904261, "grad_norm": 2.0557355880737305, "learning_rate": 4.944605714645399e-06, "loss": 1.1052, "step": 859 }, { "epoch": 0.4523934771173067, "grad_norm": 2.026393175125122, "learning_rate": 4.944460195226519e-06, "loss": 0.982, "step": 860 }, { "epoch": 0.4529195160441873, "grad_norm": 2.1781463623046875, "learning_rate": 4.9443144870672925e-06, "loss": 1.1251, "step": 861 }, { "epoch": 0.45344555497106787, "grad_norm": 2.053683042526245, "learning_rate": 4.944168590178968e-06, "loss": 1.0766, "step": 862 }, { "epoch": 0.45397159389794844, "grad_norm": 2.1147496700286865, "learning_rate": 4.944022504572811e-06, "loss": 1.0174, "step": 863 }, { "epoch": 0.454497632824829, "grad_norm": 2.06046199798584, "learning_rate": 4.943876230260102e-06, "loss": 1.0836, "step": 864 }, { "epoch": 0.4550236717517096, "grad_norm": 2.171419382095337, "learning_rate": 4.9437297672521345e-06, "loss": 1.0695, "step": 865 }, { "epoch": 0.45554971067859024, "grad_norm": 2.064301013946533, "learning_rate": 4.943583115560217e-06, "loss": 1.0147, "step": 866 }, { "epoch": 0.4560757496054708, "grad_norm": 2.6638195514678955, "learning_rate": 4.943436275195673e-06, "loss": 1.0565, "step": 867 }, { "epoch": 0.4566017885323514, "grad_norm": 3.9418976306915283, "learning_rate": 4.943289246169839e-06, "loss": 1.0768, "step": 868 }, { "epoch": 0.457127827459232, "grad_norm": 2.114297389984131, "learning_rate": 4.943142028494069e-06, "loss": 1.0687, "step": 869 }, { "epoch": 0.45765386638611255, "grad_norm": 2.139803171157837, "learning_rate": 4.942994622179729e-06, "loss": 1.0464, "step": 870 }, { "epoch": 0.4581799053129932, "grad_norm": 2.011474370956421, "learning_rate": 4.942847027238201e-06, "loss": 1.0181, "step": 871 }, { "epoch": 0.45870594423987376, "grad_norm": 2.1592113971710205, "learning_rate": 4.94269924368088e-06, "loss": 1.0699, "step": 872 }, { "epoch": 0.45923198316675434, "grad_norm": 2.0230283737182617, "learning_rate": 4.942551271519178e-06, "loss": 1.075, "step": 873 }, { "epoch": 0.4597580220936349, "grad_norm": 2.286768913269043, "learning_rate": 4.942403110764518e-06, "loss": 1.0604, "step": 874 }, { "epoch": 0.4602840610205155, "grad_norm": 2.305375337600708, "learning_rate": 4.942254761428343e-06, "loss": 1.0067, "step": 875 }, { "epoch": 0.46081009994739613, "grad_norm": 2.416245698928833, "learning_rate": 4.942106223522104e-06, "loss": 1.1109, "step": 876 }, { "epoch": 0.4613361388742767, "grad_norm": 2.1339962482452393, "learning_rate": 4.941957497057272e-06, "loss": 1.0708, "step": 877 }, { "epoch": 0.4618621778011573, "grad_norm": 1.9983795881271362, "learning_rate": 4.941808582045329e-06, "loss": 1.0032, "step": 878 }, { "epoch": 0.46238821672803787, "grad_norm": 2.1115024089813232, "learning_rate": 4.9416594784977735e-06, "loss": 1.0272, "step": 879 }, { "epoch": 0.46291425565491845, "grad_norm": 2.2785818576812744, "learning_rate": 4.941510186426118e-06, "loss": 1.0538, "step": 880 }, { "epoch": 0.46344029458179903, "grad_norm": 2.009938955307007, "learning_rate": 4.94136070584189e-06, "loss": 1.0432, "step": 881 }, { "epoch": 0.46396633350867966, "grad_norm": 2.119264841079712, "learning_rate": 4.94121103675663e-06, "loss": 1.063, "step": 882 }, { "epoch": 0.46449237243556024, "grad_norm": 2.267575979232788, "learning_rate": 4.941061179181896e-06, "loss": 1.0698, "step": 883 }, { "epoch": 0.4650184113624408, "grad_norm": 2.2345592975616455, "learning_rate": 4.940911133129257e-06, "loss": 1.0898, "step": 884 }, { "epoch": 0.4655444502893214, "grad_norm": 2.175180673599243, "learning_rate": 4.940760898610299e-06, "loss": 1.0915, "step": 885 }, { "epoch": 0.466070489216202, "grad_norm": 2.036628246307373, "learning_rate": 4.940610475636621e-06, "loss": 1.0981, "step": 886 }, { "epoch": 0.4665965281430826, "grad_norm": 2.193129539489746, "learning_rate": 4.9404598642198386e-06, "loss": 1.1237, "step": 887 }, { "epoch": 0.4671225670699632, "grad_norm": 1.920074462890625, "learning_rate": 4.9403090643715804e-06, "loss": 1.0358, "step": 888 }, { "epoch": 0.46764860599684377, "grad_norm": 2.0745346546173096, "learning_rate": 4.940158076103489e-06, "loss": 1.0487, "step": 889 }, { "epoch": 0.46817464492372435, "grad_norm": 1.9645469188690186, "learning_rate": 4.940006899427225e-06, "loss": 1.0256, "step": 890 }, { "epoch": 0.4687006838506049, "grad_norm": 1.9696778059005737, "learning_rate": 4.939855534354458e-06, "loss": 1.0302, "step": 891 }, { "epoch": 0.46922672277748556, "grad_norm": 2.1893057823181152, "learning_rate": 4.939703980896875e-06, "loss": 1.0391, "step": 892 }, { "epoch": 0.46975276170436614, "grad_norm": 2.0537021160125732, "learning_rate": 4.93955223906618e-06, "loss": 1.0498, "step": 893 }, { "epoch": 0.4702788006312467, "grad_norm": 2.4528138637542725, "learning_rate": 4.9394003088740875e-06, "loss": 1.0393, "step": 894 }, { "epoch": 0.4708048395581273, "grad_norm": 2.2085723876953125, "learning_rate": 4.93924819033233e-06, "loss": 1.0789, "step": 895 }, { "epoch": 0.4713308784850079, "grad_norm": 2.0029642581939697, "learning_rate": 4.9390958834526504e-06, "loss": 1.0621, "step": 896 }, { "epoch": 0.47185691741188845, "grad_norm": 2.0400004386901855, "learning_rate": 4.93894338824681e-06, "loss": 1.0426, "step": 897 }, { "epoch": 0.4723829563387691, "grad_norm": 2.3174595832824707, "learning_rate": 4.9387907047265825e-06, "loss": 1.0273, "step": 898 }, { "epoch": 0.47290899526564967, "grad_norm": 1.998889446258545, "learning_rate": 4.938637832903758e-06, "loss": 1.0401, "step": 899 }, { "epoch": 0.47343503419253025, "grad_norm": 2.0847246646881104, "learning_rate": 4.93848477279014e-06, "loss": 1.0677, "step": 900 }, { "epoch": 0.4739610731194108, "grad_norm": 2.086249351501465, "learning_rate": 4.938331524397544e-06, "loss": 1.043, "step": 901 }, { "epoch": 0.4744871120462914, "grad_norm": 2.1909382343292236, "learning_rate": 4.938178087737805e-06, "loss": 0.9977, "step": 902 }, { "epoch": 0.47501315097317204, "grad_norm": 2.066394567489624, "learning_rate": 4.938024462822769e-06, "loss": 1.044, "step": 903 }, { "epoch": 0.4755391899000526, "grad_norm": 2.1768858432769775, "learning_rate": 4.937870649664299e-06, "loss": 0.9886, "step": 904 }, { "epoch": 0.4760652288269332, "grad_norm": 2.0450236797332764, "learning_rate": 4.937716648274269e-06, "loss": 1.0471, "step": 905 }, { "epoch": 0.4765912677538138, "grad_norm": 2.218719720840454, "learning_rate": 4.937562458664571e-06, "loss": 1.0324, "step": 906 }, { "epoch": 0.47711730668069435, "grad_norm": 2.2519423961639404, "learning_rate": 4.937408080847109e-06, "loss": 1.0899, "step": 907 }, { "epoch": 0.477643345607575, "grad_norm": 2.045959234237671, "learning_rate": 4.9372535148338055e-06, "loss": 1.0383, "step": 908 }, { "epoch": 0.47816938453445557, "grad_norm": 2.1137306690216064, "learning_rate": 4.937098760636591e-06, "loss": 1.0223, "step": 909 }, { "epoch": 0.47869542346133614, "grad_norm": 2.2585835456848145, "learning_rate": 4.936943818267418e-06, "loss": 1.027, "step": 910 }, { "epoch": 0.4792214623882167, "grad_norm": 2.161625862121582, "learning_rate": 4.936788687738247e-06, "loss": 1.0318, "step": 911 }, { "epoch": 0.4797475013150973, "grad_norm": 2.0743277072906494, "learning_rate": 4.936633369061057e-06, "loss": 1.1014, "step": 912 }, { "epoch": 0.4802735402419779, "grad_norm": 2.1271307468414307, "learning_rate": 4.936477862247841e-06, "loss": 1.0403, "step": 913 }, { "epoch": 0.4807995791688585, "grad_norm": 2.0820491313934326, "learning_rate": 4.9363221673106046e-06, "loss": 1.069, "step": 914 }, { "epoch": 0.4813256180957391, "grad_norm": 2.0069093704223633, "learning_rate": 4.936166284261369e-06, "loss": 1.0752, "step": 915 }, { "epoch": 0.4818516570226197, "grad_norm": 2.2541720867156982, "learning_rate": 4.936010213112172e-06, "loss": 1.0309, "step": 916 }, { "epoch": 0.48237769594950025, "grad_norm": 2.155980110168457, "learning_rate": 4.9358539538750636e-06, "loss": 1.0078, "step": 917 }, { "epoch": 0.48290373487638083, "grad_norm": 2.217339038848877, "learning_rate": 4.935697506562107e-06, "loss": 1.0522, "step": 918 }, { "epoch": 0.48342977380326146, "grad_norm": 1.963270902633667, "learning_rate": 4.935540871185384e-06, "loss": 1.0692, "step": 919 }, { "epoch": 0.48395581273014204, "grad_norm": 1.9923917055130005, "learning_rate": 4.935384047756987e-06, "loss": 1.0926, "step": 920 }, { "epoch": 0.4844818516570226, "grad_norm": 2.177624464035034, "learning_rate": 4.935227036289026e-06, "loss": 1.0727, "step": 921 }, { "epoch": 0.4850078905839032, "grad_norm": 2.022496461868286, "learning_rate": 4.935069836793622e-06, "loss": 1.0267, "step": 922 }, { "epoch": 0.4855339295107838, "grad_norm": 2.0110666751861572, "learning_rate": 4.9349124492829155e-06, "loss": 1.0911, "step": 923 }, { "epoch": 0.4860599684376644, "grad_norm": 2.1780877113342285, "learning_rate": 4.934754873769057e-06, "loss": 1.0494, "step": 924 }, { "epoch": 0.486586007364545, "grad_norm": 2.0291390419006348, "learning_rate": 4.934597110264212e-06, "loss": 1.0485, "step": 925 }, { "epoch": 0.48711204629142557, "grad_norm": 1.947896957397461, "learning_rate": 4.9344391587805626e-06, "loss": 1.0789, "step": 926 }, { "epoch": 0.48763808521830615, "grad_norm": 1.9520971775054932, "learning_rate": 4.934281019330305e-06, "loss": 1.0644, "step": 927 }, { "epoch": 0.48816412414518673, "grad_norm": 2.0348432064056396, "learning_rate": 4.93412269192565e-06, "loss": 1.0471, "step": 928 }, { "epoch": 0.4886901630720673, "grad_norm": 2.214876651763916, "learning_rate": 4.93396417657882e-06, "loss": 1.0921, "step": 929 }, { "epoch": 0.48921620199894794, "grad_norm": 1.9910991191864014, "learning_rate": 4.933805473302057e-06, "loss": 1.0962, "step": 930 }, { "epoch": 0.4897422409258285, "grad_norm": 2.0497536659240723, "learning_rate": 4.933646582107612e-06, "loss": 1.0502, "step": 931 }, { "epoch": 0.4902682798527091, "grad_norm": 2.102994203567505, "learning_rate": 4.933487503007756e-06, "loss": 1.0676, "step": 932 }, { "epoch": 0.4907943187795897, "grad_norm": 1.885666012763977, "learning_rate": 4.933328236014768e-06, "loss": 1.0005, "step": 933 }, { "epoch": 0.49132035770647026, "grad_norm": 2.1525766849517822, "learning_rate": 4.933168781140949e-06, "loss": 1.0997, "step": 934 }, { "epoch": 0.4918463966333509, "grad_norm": 2.0346620082855225, "learning_rate": 4.9330091383986086e-06, "loss": 1.0651, "step": 935 }, { "epoch": 0.49237243556023147, "grad_norm": 2.0436878204345703, "learning_rate": 4.932849307800074e-06, "loss": 1.0539, "step": 936 }, { "epoch": 0.49289847448711205, "grad_norm": 2.1023032665252686, "learning_rate": 4.932689289357686e-06, "loss": 1.0583, "step": 937 }, { "epoch": 0.4934245134139926, "grad_norm": 2.0781443119049072, "learning_rate": 4.932529083083798e-06, "loss": 1.0753, "step": 938 }, { "epoch": 0.4939505523408732, "grad_norm": 2.0385992527008057, "learning_rate": 4.932368688990783e-06, "loss": 1.0165, "step": 939 }, { "epoch": 0.49447659126775384, "grad_norm": 2.350186586380005, "learning_rate": 4.932208107091022e-06, "loss": 1.0834, "step": 940 }, { "epoch": 0.4950026301946344, "grad_norm": 2.2009286880493164, "learning_rate": 4.932047337396917e-06, "loss": 1.0975, "step": 941 }, { "epoch": 0.495528669121515, "grad_norm": 2.389380931854248, "learning_rate": 4.931886379920878e-06, "loss": 1.0853, "step": 942 }, { "epoch": 0.4960547080483956, "grad_norm": 2.016162157058716, "learning_rate": 4.931725234675334e-06, "loss": 1.039, "step": 943 }, { "epoch": 0.49658074697527615, "grad_norm": 2.116718292236328, "learning_rate": 4.9315639016727286e-06, "loss": 1.0182, "step": 944 }, { "epoch": 0.49710678590215673, "grad_norm": 2.1381125450134277, "learning_rate": 4.931402380925517e-06, "loss": 1.1051, "step": 945 }, { "epoch": 0.49763282482903737, "grad_norm": 2.0954737663269043, "learning_rate": 4.931240672446171e-06, "loss": 1.038, "step": 946 }, { "epoch": 0.49815886375591795, "grad_norm": 2.167865037918091, "learning_rate": 4.931078776247176e-06, "loss": 1.0998, "step": 947 }, { "epoch": 0.4986849026827985, "grad_norm": 2.1278021335601807, "learning_rate": 4.930916692341034e-06, "loss": 1.0374, "step": 948 }, { "epoch": 0.4992109416096791, "grad_norm": 2.088512420654297, "learning_rate": 4.9307544207402565e-06, "loss": 1.0954, "step": 949 }, { "epoch": 0.4997369805365597, "grad_norm": 2.015916109085083, "learning_rate": 4.930591961457375e-06, "loss": 1.0163, "step": 950 }, { "epoch": 0.5002630194634403, "grad_norm": 2.0662143230438232, "learning_rate": 4.930429314504933e-06, "loss": 1.0968, "step": 951 }, { "epoch": 0.5007890583903208, "grad_norm": 2.0692410469055176, "learning_rate": 4.930266479895488e-06, "loss": 1.0772, "step": 952 }, { "epoch": 0.5013150973172015, "grad_norm": 2.0734803676605225, "learning_rate": 4.930103457641613e-06, "loss": 1.1096, "step": 953 }, { "epoch": 0.5018411362440821, "grad_norm": 2.167228937149048, "learning_rate": 4.929940247755896e-06, "loss": 1.0608, "step": 954 }, { "epoch": 0.5023671751709626, "grad_norm": 2.272087574005127, "learning_rate": 4.929776850250937e-06, "loss": 1.0825, "step": 955 }, { "epoch": 0.5028932140978433, "grad_norm": 2.0937726497650146, "learning_rate": 4.929613265139354e-06, "loss": 1.0651, "step": 956 }, { "epoch": 0.5034192530247238, "grad_norm": 2.168090343475342, "learning_rate": 4.929449492433777e-06, "loss": 1.0821, "step": 957 }, { "epoch": 0.5039452919516044, "grad_norm": 2.0708675384521484, "learning_rate": 4.92928553214685e-06, "loss": 1.0655, "step": 958 }, { "epoch": 0.5044713308784851, "grad_norm": 2.067678689956665, "learning_rate": 4.929121384291234e-06, "loss": 1.05, "step": 959 }, { "epoch": 0.5049973698053656, "grad_norm": 1.9181219339370728, "learning_rate": 4.928957048879602e-06, "loss": 0.9935, "step": 960 }, { "epoch": 0.5055234087322462, "grad_norm": 2.217785358428955, "learning_rate": 4.928792525924644e-06, "loss": 0.97, "step": 961 }, { "epoch": 0.5060494476591267, "grad_norm": 2.084656238555908, "learning_rate": 4.928627815439062e-06, "loss": 1.0541, "step": 962 }, { "epoch": 0.5065754865860074, "grad_norm": 2.035367727279663, "learning_rate": 4.928462917435574e-06, "loss": 1.0694, "step": 963 }, { "epoch": 0.507101525512888, "grad_norm": 2.001654624938965, "learning_rate": 4.928297831926912e-06, "loss": 1.0232, "step": 964 }, { "epoch": 0.5076275644397685, "grad_norm": 2.57733154296875, "learning_rate": 4.928132558925822e-06, "loss": 1.0664, "step": 965 }, { "epoch": 0.5081536033666492, "grad_norm": 2.1757423877716064, "learning_rate": 4.927967098445066e-06, "loss": 1.1119, "step": 966 }, { "epoch": 0.5086796422935297, "grad_norm": 2.089594602584839, "learning_rate": 4.927801450497417e-06, "loss": 1.0212, "step": 967 }, { "epoch": 0.5092056812204103, "grad_norm": 2.078519821166992, "learning_rate": 4.927635615095668e-06, "loss": 1.0381, "step": 968 }, { "epoch": 0.5097317201472908, "grad_norm": 2.0807132720947266, "learning_rate": 4.927469592252621e-06, "loss": 1.0272, "step": 969 }, { "epoch": 0.5102577590741715, "grad_norm": 2.1806020736694336, "learning_rate": 4.927303381981098e-06, "loss": 1.0846, "step": 970 }, { "epoch": 0.5107837980010521, "grad_norm": 2.1434948444366455, "learning_rate": 4.927136984293928e-06, "loss": 1.0775, "step": 971 }, { "epoch": 0.5113098369279326, "grad_norm": 2.000924825668335, "learning_rate": 4.926970399203962e-06, "loss": 1.0272, "step": 972 }, { "epoch": 0.5118358758548133, "grad_norm": 2.1742711067199707, "learning_rate": 4.926803626724062e-06, "loss": 1.0253, "step": 973 }, { "epoch": 0.5123619147816938, "grad_norm": 2.1074674129486084, "learning_rate": 4.926636666867103e-06, "loss": 1.0146, "step": 974 }, { "epoch": 0.5128879537085744, "grad_norm": 2.1562392711639404, "learning_rate": 4.926469519645976e-06, "loss": 1.0364, "step": 975 }, { "epoch": 0.5134139926354551, "grad_norm": 2.4177775382995605, "learning_rate": 4.926302185073591e-06, "loss": 1.0658, "step": 976 }, { "epoch": 0.5139400315623356, "grad_norm": 2.322571277618408, "learning_rate": 4.9261346631628635e-06, "loss": 1.0489, "step": 977 }, { "epoch": 0.5144660704892162, "grad_norm": 2.0937836170196533, "learning_rate": 4.925966953926729e-06, "loss": 1.0535, "step": 978 }, { "epoch": 0.5149921094160967, "grad_norm": 2.065213680267334, "learning_rate": 4.925799057378139e-06, "loss": 1.0097, "step": 979 }, { "epoch": 0.5155181483429774, "grad_norm": 2.0844249725341797, "learning_rate": 4.925630973530054e-06, "loss": 1.0719, "step": 980 }, { "epoch": 0.516044187269858, "grad_norm": 2.4148666858673096, "learning_rate": 4.925462702395454e-06, "loss": 1.0906, "step": 981 }, { "epoch": 0.5165702261967385, "grad_norm": 2.071423053741455, "learning_rate": 4.925294243987331e-06, "loss": 1.03, "step": 982 }, { "epoch": 0.5170962651236192, "grad_norm": 2.2397513389587402, "learning_rate": 4.9251255983186915e-06, "loss": 1.0412, "step": 983 }, { "epoch": 0.5176223040504997, "grad_norm": 2.171269655227661, "learning_rate": 4.924956765402557e-06, "loss": 1.1, "step": 984 }, { "epoch": 0.5181483429773803, "grad_norm": 2.1705877780914307, "learning_rate": 4.924787745251963e-06, "loss": 1.0534, "step": 985 }, { "epoch": 0.518674381904261, "grad_norm": 2.178514003753662, "learning_rate": 4.924618537879961e-06, "loss": 1.0759, "step": 986 }, { "epoch": 0.5192004208311415, "grad_norm": 2.072097063064575, "learning_rate": 4.924449143299614e-06, "loss": 1.0321, "step": 987 }, { "epoch": 0.5197264597580221, "grad_norm": 2.124030351638794, "learning_rate": 4.924279561524004e-06, "loss": 1.0465, "step": 988 }, { "epoch": 0.5202524986849026, "grad_norm": 2.0927019119262695, "learning_rate": 4.924109792566222e-06, "loss": 1.0716, "step": 989 }, { "epoch": 0.5207785376117833, "grad_norm": 2.0673232078552246, "learning_rate": 4.923939836439377e-06, "loss": 1.0628, "step": 990 }, { "epoch": 0.5213045765386639, "grad_norm": 2.2538528442382812, "learning_rate": 4.92376969315659e-06, "loss": 1.0687, "step": 991 }, { "epoch": 0.5218306154655444, "grad_norm": 2.120530366897583, "learning_rate": 4.923599362731001e-06, "loss": 1.0893, "step": 992 }, { "epoch": 0.5223566543924251, "grad_norm": 2.0750699043273926, "learning_rate": 4.92342884517576e-06, "loss": 1.0016, "step": 993 }, { "epoch": 0.5228826933193056, "grad_norm": 1.9984569549560547, "learning_rate": 4.923258140504032e-06, "loss": 1.0326, "step": 994 }, { "epoch": 0.5234087322461862, "grad_norm": 2.201758861541748, "learning_rate": 4.923087248729e-06, "loss": 1.0413, "step": 995 }, { "epoch": 0.5239347711730669, "grad_norm": 2.1322648525238037, "learning_rate": 4.922916169863855e-06, "loss": 1.0505, "step": 996 }, { "epoch": 0.5244608100999474, "grad_norm": 2.0557119846343994, "learning_rate": 4.922744903921809e-06, "loss": 0.9761, "step": 997 }, { "epoch": 0.524986849026828, "grad_norm": 2.0989720821380615, "learning_rate": 4.922573450916086e-06, "loss": 1.0436, "step": 998 }, { "epoch": 0.5255128879537085, "grad_norm": 2.152665138244629, "learning_rate": 4.922401810859922e-06, "loss": 1.0567, "step": 999 }, { "epoch": 0.5260389268805892, "grad_norm": 1.959796667098999, "learning_rate": 4.922229983766571e-06, "loss": 1.0694, "step": 1000 }, { "epoch": 0.5265649658074697, "grad_norm": 2.121493101119995, "learning_rate": 4.9220579696493e-06, "loss": 1.1024, "step": 1001 }, { "epoch": 0.5270910047343503, "grad_norm": 1.9629384279251099, "learning_rate": 4.92188576852139e-06, "loss": 1.0538, "step": 1002 }, { "epoch": 0.527617043661231, "grad_norm": 2.396224021911621, "learning_rate": 4.921713380396137e-06, "loss": 1.0711, "step": 1003 }, { "epoch": 0.5281430825881115, "grad_norm": 2.1571781635284424, "learning_rate": 4.921540805286852e-06, "loss": 1.0663, "step": 1004 }, { "epoch": 0.5286691215149921, "grad_norm": 2.032282590866089, "learning_rate": 4.921368043206858e-06, "loss": 1.0658, "step": 1005 }, { "epoch": 0.5291951604418726, "grad_norm": 1.9589232206344604, "learning_rate": 4.921195094169496e-06, "loss": 0.9755, "step": 1006 }, { "epoch": 0.5297211993687533, "grad_norm": 1.9304051399230957, "learning_rate": 4.92102195818812e-06, "loss": 1.011, "step": 1007 }, { "epoch": 0.5302472382956339, "grad_norm": 2.306674003601074, "learning_rate": 4.920848635276096e-06, "loss": 1.0626, "step": 1008 }, { "epoch": 0.5307732772225144, "grad_norm": 2.156906843185425, "learning_rate": 4.920675125446809e-06, "loss": 1.0107, "step": 1009 }, { "epoch": 0.5312993161493951, "grad_norm": 2.2959272861480713, "learning_rate": 4.9205014287136535e-06, "loss": 1.0527, "step": 1010 }, { "epoch": 0.5318253550762756, "grad_norm": 2.046900510787964, "learning_rate": 4.9203275450900426e-06, "loss": 1.0154, "step": 1011 }, { "epoch": 0.5323513940031562, "grad_norm": 1.9947476387023926, "learning_rate": 4.920153474589401e-06, "loss": 1.0456, "step": 1012 }, { "epoch": 0.5328774329300369, "grad_norm": 2.3516438007354736, "learning_rate": 4.919979217225169e-06, "loss": 1.0982, "step": 1013 }, { "epoch": 0.5334034718569174, "grad_norm": 2.5909998416900635, "learning_rate": 4.919804773010802e-06, "loss": 1.0436, "step": 1014 }, { "epoch": 0.533929510783798, "grad_norm": 2.5206117630004883, "learning_rate": 4.91963014195977e-06, "loss": 1.0405, "step": 1015 }, { "epoch": 0.5344555497106785, "grad_norm": 2.21992826461792, "learning_rate": 4.919455324085554e-06, "loss": 1.0594, "step": 1016 }, { "epoch": 0.5349815886375592, "grad_norm": 2.2773711681365967, "learning_rate": 4.919280319401654e-06, "loss": 1.0501, "step": 1017 }, { "epoch": 0.5355076275644398, "grad_norm": 2.075962543487549, "learning_rate": 4.919105127921582e-06, "loss": 1.0052, "step": 1018 }, { "epoch": 0.5360336664913203, "grad_norm": 2.108670473098755, "learning_rate": 4.9189297496588636e-06, "loss": 1.0675, "step": 1019 }, { "epoch": 0.536559705418201, "grad_norm": 2.125927209854126, "learning_rate": 4.918754184627041e-06, "loss": 1.0912, "step": 1020 }, { "epoch": 0.5370857443450815, "grad_norm": 2.1099467277526855, "learning_rate": 4.91857843283967e-06, "loss": 1.0424, "step": 1021 }, { "epoch": 0.5376117832719621, "grad_norm": 2.0880467891693115, "learning_rate": 4.918402494310319e-06, "loss": 1.061, "step": 1022 }, { "epoch": 0.5381378221988428, "grad_norm": 2.1544101238250732, "learning_rate": 4.918226369052575e-06, "loss": 1.0608, "step": 1023 }, { "epoch": 0.5386638611257233, "grad_norm": 2.213214635848999, "learning_rate": 4.918050057080036e-06, "loss": 1.1368, "step": 1024 }, { "epoch": 0.5391899000526039, "grad_norm": 2.062903642654419, "learning_rate": 4.917873558406315e-06, "loss": 1.0861, "step": 1025 }, { "epoch": 0.5397159389794844, "grad_norm": 1.9643436670303345, "learning_rate": 4.917696873045039e-06, "loss": 1.0008, "step": 1026 }, { "epoch": 0.5402419779063651, "grad_norm": 2.276639699935913, "learning_rate": 4.917520001009851e-06, "loss": 0.9812, "step": 1027 }, { "epoch": 0.5407680168332457, "grad_norm": 2.1487631797790527, "learning_rate": 4.917342942314407e-06, "loss": 1.0603, "step": 1028 }, { "epoch": 0.5412940557601262, "grad_norm": 2.1040542125701904, "learning_rate": 4.917165696972379e-06, "loss": 1.0425, "step": 1029 }, { "epoch": 0.5418200946870069, "grad_norm": 2.214475154876709, "learning_rate": 4.916988264997452e-06, "loss": 1.032, "step": 1030 }, { "epoch": 0.5423461336138874, "grad_norm": 2.154320001602173, "learning_rate": 4.916810646403325e-06, "loss": 1.0371, "step": 1031 }, { "epoch": 0.542872172540768, "grad_norm": 2.1565327644348145, "learning_rate": 4.916632841203714e-06, "loss": 1.0866, "step": 1032 }, { "epoch": 0.5433982114676486, "grad_norm": 2.197402238845825, "learning_rate": 4.916454849412344e-06, "loss": 1.0531, "step": 1033 }, { "epoch": 0.5439242503945292, "grad_norm": 2.0249993801116943, "learning_rate": 4.916276671042962e-06, "loss": 1.0485, "step": 1034 }, { "epoch": 0.5444502893214098, "grad_norm": 2.077765703201294, "learning_rate": 4.916098306109323e-06, "loss": 1.0731, "step": 1035 }, { "epoch": 0.5449763282482903, "grad_norm": 2.0669186115264893, "learning_rate": 4.915919754625199e-06, "loss": 1.0912, "step": 1036 }, { "epoch": 0.545502367175171, "grad_norm": 2.160076379776001, "learning_rate": 4.915741016604378e-06, "loss": 1.0523, "step": 1037 }, { "epoch": 0.5460284061020515, "grad_norm": 1.8992373943328857, "learning_rate": 4.915562092060659e-06, "loss": 1.0185, "step": 1038 }, { "epoch": 0.5465544450289321, "grad_norm": 2.0712900161743164, "learning_rate": 4.915382981007857e-06, "loss": 1.0581, "step": 1039 }, { "epoch": 0.5470804839558128, "grad_norm": 2.2600317001342773, "learning_rate": 4.915203683459802e-06, "loss": 1.0154, "step": 1040 }, { "epoch": 0.5476065228826933, "grad_norm": 2.050366163253784, "learning_rate": 4.915024199430338e-06, "loss": 1.0371, "step": 1041 }, { "epoch": 0.5481325618095739, "grad_norm": 2.208393096923828, "learning_rate": 4.914844528933322e-06, "loss": 1.0767, "step": 1042 }, { "epoch": 0.5486586007364544, "grad_norm": 2.1388466358184814, "learning_rate": 4.914664671982629e-06, "loss": 1.1074, "step": 1043 }, { "epoch": 0.5491846396633351, "grad_norm": 2.253007411956787, "learning_rate": 4.914484628592144e-06, "loss": 1.0455, "step": 1044 }, { "epoch": 0.5497106785902157, "grad_norm": 2.2380669116973877, "learning_rate": 4.9143043987757684e-06, "loss": 1.0581, "step": 1045 }, { "epoch": 0.5502367175170962, "grad_norm": 2.136256456375122, "learning_rate": 4.914123982547419e-06, "loss": 1.0588, "step": 1046 }, { "epoch": 0.5507627564439769, "grad_norm": 2.0044257640838623, "learning_rate": 4.913943379921025e-06, "loss": 0.9918, "step": 1047 }, { "epoch": 0.5512887953708574, "grad_norm": 2.089315414428711, "learning_rate": 4.913762590910533e-06, "loss": 1.0675, "step": 1048 }, { "epoch": 0.551814834297738, "grad_norm": 2.048976182937622, "learning_rate": 4.9135816155298985e-06, "loss": 1.0259, "step": 1049 }, { "epoch": 0.5523408732246187, "grad_norm": 2.273501396179199, "learning_rate": 4.913400453793098e-06, "loss": 1.0743, "step": 1050 }, { "epoch": 0.5528669121514992, "grad_norm": 2.0761802196502686, "learning_rate": 4.913219105714117e-06, "loss": 1.0199, "step": 1051 }, { "epoch": 0.5533929510783798, "grad_norm": 1.9552183151245117, "learning_rate": 4.913037571306961e-06, "loss": 1.0582, "step": 1052 }, { "epoch": 0.5539189900052603, "grad_norm": 2.0593061447143555, "learning_rate": 4.9128558505856425e-06, "loss": 0.9626, "step": 1053 }, { "epoch": 0.554445028932141, "grad_norm": 2.026820659637451, "learning_rate": 4.9126739435641955e-06, "loss": 1.0253, "step": 1054 }, { "epoch": 0.5549710678590216, "grad_norm": 2.22835111618042, "learning_rate": 4.9124918502566635e-06, "loss": 1.0176, "step": 1055 }, { "epoch": 0.5554971067859021, "grad_norm": 1.9653559923171997, "learning_rate": 4.9123095706771064e-06, "loss": 0.9886, "step": 1056 }, { "epoch": 0.5560231457127828, "grad_norm": 2.083310842514038, "learning_rate": 4.912127104839599e-06, "loss": 1.0105, "step": 1057 }, { "epoch": 0.5565491846396633, "grad_norm": 2.1681482791900635, "learning_rate": 4.91194445275823e-06, "loss": 1.0359, "step": 1058 }, { "epoch": 0.5570752235665439, "grad_norm": 1.990717887878418, "learning_rate": 4.911761614447101e-06, "loss": 1.0369, "step": 1059 }, { "epoch": 0.5576012624934246, "grad_norm": 2.159813642501831, "learning_rate": 4.91157858992033e-06, "loss": 1.0341, "step": 1060 }, { "epoch": 0.5581273014203051, "grad_norm": 1.9474655389785767, "learning_rate": 4.911395379192048e-06, "loss": 1.0432, "step": 1061 }, { "epoch": 0.5586533403471857, "grad_norm": 2.140634536743164, "learning_rate": 4.911211982276402e-06, "loss": 1.0485, "step": 1062 }, { "epoch": 0.5591793792740662, "grad_norm": 2.2925636768341064, "learning_rate": 4.911028399187552e-06, "loss": 1.0648, "step": 1063 }, { "epoch": 0.5597054182009469, "grad_norm": 2.037755250930786, "learning_rate": 4.910844629939672e-06, "loss": 1.0568, "step": 1064 }, { "epoch": 0.5602314571278275, "grad_norm": 1.997471809387207, "learning_rate": 4.910660674546951e-06, "loss": 1.0109, "step": 1065 }, { "epoch": 0.560757496054708, "grad_norm": 2.109219551086426, "learning_rate": 4.910476533023593e-06, "loss": 1.0658, "step": 1066 }, { "epoch": 0.5612835349815887, "grad_norm": 2.10469388961792, "learning_rate": 4.9102922053838175e-06, "loss": 1.0612, "step": 1067 }, { "epoch": 0.5618095739084692, "grad_norm": 2.2748658657073975, "learning_rate": 4.9101076916418535e-06, "loss": 1.0422, "step": 1068 }, { "epoch": 0.5623356128353498, "grad_norm": 2.0472326278686523, "learning_rate": 4.90992299181195e-06, "loss": 1.0339, "step": 1069 }, { "epoch": 0.5628616517622304, "grad_norm": 2.0694494247436523, "learning_rate": 4.909738105908367e-06, "loss": 1.0083, "step": 1070 }, { "epoch": 0.563387690689111, "grad_norm": 2.0032498836517334, "learning_rate": 4.909553033945379e-06, "loss": 0.9889, "step": 1071 }, { "epoch": 0.5639137296159916, "grad_norm": 2.091393232345581, "learning_rate": 4.909367775937278e-06, "loss": 1.0856, "step": 1072 }, { "epoch": 0.5644397685428721, "grad_norm": 2.0542173385620117, "learning_rate": 4.909182331898366e-06, "loss": 1.0422, "step": 1073 }, { "epoch": 0.5649658074697528, "grad_norm": 2.009228467941284, "learning_rate": 4.908996701842962e-06, "loss": 1.0594, "step": 1074 }, { "epoch": 0.5654918463966333, "grad_norm": 1.9546911716461182, "learning_rate": 4.9088108857853985e-06, "loss": 1.0691, "step": 1075 }, { "epoch": 0.5660178853235139, "grad_norm": 2.1657440662384033, "learning_rate": 4.908624883740023e-06, "loss": 1.0252, "step": 1076 }, { "epoch": 0.5665439242503946, "grad_norm": 2.151035785675049, "learning_rate": 4.9084386957211975e-06, "loss": 1.0587, "step": 1077 }, { "epoch": 0.5670699631772751, "grad_norm": 2.299673557281494, "learning_rate": 4.908252321743296e-06, "loss": 1.0221, "step": 1078 }, { "epoch": 0.5675960021041557, "grad_norm": 2.0144848823547363, "learning_rate": 4.908065761820711e-06, "loss": 1.0256, "step": 1079 }, { "epoch": 0.5681220410310363, "grad_norm": 2.172971725463867, "learning_rate": 4.907879015967846e-06, "loss": 1.0231, "step": 1080 }, { "epoch": 0.5686480799579169, "grad_norm": 2.0427041053771973, "learning_rate": 4.907692084199119e-06, "loss": 1.0433, "step": 1081 }, { "epoch": 0.5691741188847975, "grad_norm": 2.1561834812164307, "learning_rate": 4.907504966528966e-06, "loss": 1.0478, "step": 1082 }, { "epoch": 0.569700157811678, "grad_norm": 2.012385606765747, "learning_rate": 4.907317662971831e-06, "loss": 1.0703, "step": 1083 }, { "epoch": 0.5702261967385587, "grad_norm": 2.137075424194336, "learning_rate": 4.907130173542179e-06, "loss": 1.0527, "step": 1084 }, { "epoch": 0.5707522356654392, "grad_norm": 2.039424180984497, "learning_rate": 4.906942498254485e-06, "loss": 0.9969, "step": 1085 }, { "epoch": 0.5712782745923198, "grad_norm": 2.0207748413085938, "learning_rate": 4.90675463712324e-06, "loss": 1.0157, "step": 1086 }, { "epoch": 0.5718043135192005, "grad_norm": 2.024454116821289, "learning_rate": 4.906566590162949e-06, "loss": 1.0699, "step": 1087 }, { "epoch": 0.572330352446081, "grad_norm": 2.256537675857544, "learning_rate": 4.90637835738813e-06, "loss": 1.1083, "step": 1088 }, { "epoch": 0.5728563913729616, "grad_norm": 2.099698543548584, "learning_rate": 4.90618993881332e-06, "loss": 1.0242, "step": 1089 }, { "epoch": 0.5733824302998421, "grad_norm": 2.0367214679718018, "learning_rate": 4.906001334453064e-06, "loss": 1.0088, "step": 1090 }, { "epoch": 0.5739084692267228, "grad_norm": 1.9988690614700317, "learning_rate": 4.9058125443219245e-06, "loss": 1.044, "step": 1091 }, { "epoch": 0.5744345081536034, "grad_norm": 1.9970273971557617, "learning_rate": 4.9056235684344805e-06, "loss": 1.0847, "step": 1092 }, { "epoch": 0.5749605470804839, "grad_norm": 2.152602434158325, "learning_rate": 4.905434406805322e-06, "loss": 1.0931, "step": 1093 }, { "epoch": 0.5754865860073646, "grad_norm": 2.0728707313537598, "learning_rate": 4.905245059449053e-06, "loss": 1.0401, "step": 1094 }, { "epoch": 0.5760126249342451, "grad_norm": 1.94095778465271, "learning_rate": 4.9050555263802954e-06, "loss": 1.0262, "step": 1095 }, { "epoch": 0.5765386638611257, "grad_norm": 2.126347780227661, "learning_rate": 4.904865807613683e-06, "loss": 1.0678, "step": 1096 }, { "epoch": 0.5770647027880064, "grad_norm": 2.085378646850586, "learning_rate": 4.904675903163864e-06, "loss": 1.0665, "step": 1097 }, { "epoch": 0.5775907417148869, "grad_norm": 2.2276804447174072, "learning_rate": 4.9044858130454995e-06, "loss": 1.0718, "step": 1098 }, { "epoch": 0.5781167806417675, "grad_norm": 2.2318899631500244, "learning_rate": 4.904295537273269e-06, "loss": 1.0663, "step": 1099 }, { "epoch": 0.578642819568648, "grad_norm": 2.0555522441864014, "learning_rate": 4.904105075861864e-06, "loss": 0.9989, "step": 1100 }, { "epoch": 0.5791688584955287, "grad_norm": 2.094501256942749, "learning_rate": 4.9039144288259876e-06, "loss": 1.0802, "step": 1101 }, { "epoch": 0.5796948974224092, "grad_norm": 2.7403769493103027, "learning_rate": 4.903723596180363e-06, "loss": 1.0024, "step": 1102 }, { "epoch": 0.5802209363492898, "grad_norm": 2.1775436401367188, "learning_rate": 4.9035325779397225e-06, "loss": 1.0234, "step": 1103 }, { "epoch": 0.5807469752761705, "grad_norm": 2.2489676475524902, "learning_rate": 4.903341374118816e-06, "loss": 1.0188, "step": 1104 }, { "epoch": 0.581273014203051, "grad_norm": 2.2214367389678955, "learning_rate": 4.903149984732407e-06, "loss": 1.0835, "step": 1105 }, { "epoch": 0.5817990531299316, "grad_norm": 2.203273296356201, "learning_rate": 4.902958409795272e-06, "loss": 1.0547, "step": 1106 }, { "epoch": 0.5823250920568122, "grad_norm": 2.1076622009277344, "learning_rate": 4.902766649322204e-06, "loss": 1.0571, "step": 1107 }, { "epoch": 0.5828511309836928, "grad_norm": 2.1270394325256348, "learning_rate": 4.902574703328007e-06, "loss": 0.9863, "step": 1108 }, { "epoch": 0.5833771699105734, "grad_norm": 2.1030006408691406, "learning_rate": 4.902382571827503e-06, "loss": 1.0404, "step": 1109 }, { "epoch": 0.583903208837454, "grad_norm": 2.1046831607818604, "learning_rate": 4.9021902548355275e-06, "loss": 1.018, "step": 1110 }, { "epoch": 0.5844292477643346, "grad_norm": 2.0193376541137695, "learning_rate": 4.901997752366927e-06, "loss": 1.0035, "step": 1111 }, { "epoch": 0.5849552866912151, "grad_norm": 2.0812923908233643, "learning_rate": 4.9018050644365675e-06, "loss": 0.9928, "step": 1112 }, { "epoch": 0.5854813256180957, "grad_norm": 2.035750150680542, "learning_rate": 4.901612191059325e-06, "loss": 1.0658, "step": 1113 }, { "epoch": 0.5860073645449764, "grad_norm": 2.093606948852539, "learning_rate": 4.901419132250093e-06, "loss": 1.0019, "step": 1114 }, { "epoch": 0.5865334034718569, "grad_norm": 2.4018402099609375, "learning_rate": 4.901225888023776e-06, "loss": 1.0785, "step": 1115 }, { "epoch": 0.5870594423987375, "grad_norm": 2.1731529235839844, "learning_rate": 4.901032458395296e-06, "loss": 1.0437, "step": 1116 }, { "epoch": 0.587585481325618, "grad_norm": 2.085692882537842, "learning_rate": 4.900838843379588e-06, "loss": 1.0122, "step": 1117 }, { "epoch": 0.5881115202524987, "grad_norm": 2.272787094116211, "learning_rate": 4.900645042991601e-06, "loss": 1.0708, "step": 1118 }, { "epoch": 0.5886375591793793, "grad_norm": 2.197758913040161, "learning_rate": 4.900451057246298e-06, "loss": 1.037, "step": 1119 }, { "epoch": 0.5891635981062598, "grad_norm": 2.228980779647827, "learning_rate": 4.900256886158658e-06, "loss": 1.0306, "step": 1120 }, { "epoch": 0.5896896370331405, "grad_norm": 2.010698080062866, "learning_rate": 4.900062529743672e-06, "loss": 1.0777, "step": 1121 }, { "epoch": 0.590215675960021, "grad_norm": 2.0015103816986084, "learning_rate": 4.899867988016348e-06, "loss": 0.9991, "step": 1122 }, { "epoch": 0.5907417148869016, "grad_norm": 1.9307256937026978, "learning_rate": 4.899673260991706e-06, "loss": 1.0655, "step": 1123 }, { "epoch": 0.5912677538137823, "grad_norm": 2.339930295944214, "learning_rate": 4.899478348684782e-06, "loss": 1.0177, "step": 1124 }, { "epoch": 0.5917937927406628, "grad_norm": 2.000337839126587, "learning_rate": 4.899283251110624e-06, "loss": 1.036, "step": 1125 }, { "epoch": 0.5923198316675434, "grad_norm": 2.0116374492645264, "learning_rate": 4.899087968284297e-06, "loss": 0.9666, "step": 1126 }, { "epoch": 0.592845870594424, "grad_norm": 2.27270245552063, "learning_rate": 4.898892500220878e-06, "loss": 1.0526, "step": 1127 }, { "epoch": 0.5933719095213046, "grad_norm": 2.1844749450683594, "learning_rate": 4.89869684693546e-06, "loss": 1.0606, "step": 1128 }, { "epoch": 0.5938979484481852, "grad_norm": 2.112031936645508, "learning_rate": 4.898501008443151e-06, "loss": 1.0846, "step": 1129 }, { "epoch": 0.5944239873750657, "grad_norm": 2.251878499984741, "learning_rate": 4.898304984759069e-06, "loss": 1.023, "step": 1130 }, { "epoch": 0.5949500263019464, "grad_norm": 2.064732074737549, "learning_rate": 4.898108775898351e-06, "loss": 1.066, "step": 1131 }, { "epoch": 0.5954760652288269, "grad_norm": 2.10412335395813, "learning_rate": 4.897912381876147e-06, "loss": 1.0476, "step": 1132 }, { "epoch": 0.5960021041557075, "grad_norm": 2.1343259811401367, "learning_rate": 4.897715802707621e-06, "loss": 1.0264, "step": 1133 }, { "epoch": 0.5965281430825881, "grad_norm": 2.3453173637390137, "learning_rate": 4.89751903840795e-06, "loss": 1.076, "step": 1134 }, { "epoch": 0.5970541820094687, "grad_norm": 2.040123462677002, "learning_rate": 4.897322088992326e-06, "loss": 1.0494, "step": 1135 }, { "epoch": 0.5975802209363493, "grad_norm": 2.070585012435913, "learning_rate": 4.897124954475958e-06, "loss": 1.0904, "step": 1136 }, { "epoch": 0.5981062598632298, "grad_norm": 2.048081159591675, "learning_rate": 4.896927634874065e-06, "loss": 0.9855, "step": 1137 }, { "epoch": 0.5986322987901105, "grad_norm": 2.07633113861084, "learning_rate": 4.896730130201883e-06, "loss": 1.0848, "step": 1138 }, { "epoch": 0.599158337716991, "grad_norm": 2.233821153640747, "learning_rate": 4.8965324404746624e-06, "loss": 1.0419, "step": 1139 }, { "epoch": 0.5996843766438716, "grad_norm": 2.1806929111480713, "learning_rate": 4.896334565707666e-06, "loss": 1.0377, "step": 1140 }, { "epoch": 0.6002104155707523, "grad_norm": 2.056483268737793, "learning_rate": 4.896136505916174e-06, "loss": 1.0269, "step": 1141 }, { "epoch": 0.6007364544976328, "grad_norm": 1.9446007013320923, "learning_rate": 4.895938261115476e-06, "loss": 0.9958, "step": 1142 }, { "epoch": 0.6012624934245134, "grad_norm": 1.9170737266540527, "learning_rate": 4.8957398313208795e-06, "loss": 1.0083, "step": 1143 }, { "epoch": 0.601788532351394, "grad_norm": 2.0455801486968994, "learning_rate": 4.895541216547707e-06, "loss": 1.0819, "step": 1144 }, { "epoch": 0.6023145712782746, "grad_norm": 2.410231828689575, "learning_rate": 4.8953424168112925e-06, "loss": 1.0265, "step": 1145 }, { "epoch": 0.6028406102051552, "grad_norm": 2.0946412086486816, "learning_rate": 4.895143432126986e-06, "loss": 1.014, "step": 1146 }, { "epoch": 0.6033666491320357, "grad_norm": 1.9825836420059204, "learning_rate": 4.894944262510152e-06, "loss": 0.9721, "step": 1147 }, { "epoch": 0.6038926880589164, "grad_norm": 2.1228606700897217, "learning_rate": 4.8947449079761685e-06, "loss": 1.0971, "step": 1148 }, { "epoch": 0.6044187269857969, "grad_norm": 2.1443943977355957, "learning_rate": 4.894545368540427e-06, "loss": 0.9956, "step": 1149 }, { "epoch": 0.6049447659126775, "grad_norm": 1.9651165008544922, "learning_rate": 4.894345644218335e-06, "loss": 1.0103, "step": 1150 }, { "epoch": 0.6054708048395582, "grad_norm": 1.9829816818237305, "learning_rate": 4.8941457350253134e-06, "loss": 1.0425, "step": 1151 }, { "epoch": 0.6059968437664387, "grad_norm": 2.122873067855835, "learning_rate": 4.893945640976798e-06, "loss": 1.0532, "step": 1152 }, { "epoch": 0.6065228826933193, "grad_norm": 2.0714738368988037, "learning_rate": 4.8937453620882365e-06, "loss": 1.0307, "step": 1153 }, { "epoch": 0.6070489216201999, "grad_norm": 1.9049363136291504, "learning_rate": 4.893544898375096e-06, "loss": 0.9805, "step": 1154 }, { "epoch": 0.6075749605470805, "grad_norm": 2.432041645050049, "learning_rate": 4.893344249852851e-06, "loss": 1.0833, "step": 1155 }, { "epoch": 0.6081009994739611, "grad_norm": 2.055748224258423, "learning_rate": 4.893143416536997e-06, "loss": 1.0315, "step": 1156 }, { "epoch": 0.6086270384008416, "grad_norm": 1.9813153743743896, "learning_rate": 4.892942398443037e-06, "loss": 1.0786, "step": 1157 }, { "epoch": 0.6091530773277223, "grad_norm": 2.2038941383361816, "learning_rate": 4.892741195586496e-06, "loss": 1.0604, "step": 1158 }, { "epoch": 0.6096791162546028, "grad_norm": 2.0015673637390137, "learning_rate": 4.892539807982906e-06, "loss": 0.9863, "step": 1159 }, { "epoch": 0.6102051551814834, "grad_norm": 2.0392401218414307, "learning_rate": 4.892338235647818e-06, "loss": 1.0218, "step": 1160 }, { "epoch": 0.6107311941083641, "grad_norm": 2.0060133934020996, "learning_rate": 4.892136478596796e-06, "loss": 1.0134, "step": 1161 }, { "epoch": 0.6112572330352446, "grad_norm": 1.9645148515701294, "learning_rate": 4.8919345368454164e-06, "loss": 1.0206, "step": 1162 }, { "epoch": 0.6117832719621252, "grad_norm": 1.9299581050872803, "learning_rate": 4.8917324104092725e-06, "loss": 1.0243, "step": 1163 }, { "epoch": 0.6123093108890058, "grad_norm": 2.071143388748169, "learning_rate": 4.891530099303971e-06, "loss": 1.0466, "step": 1164 }, { "epoch": 0.6128353498158864, "grad_norm": 2.122020959854126, "learning_rate": 4.891327603545132e-06, "loss": 1.0886, "step": 1165 }, { "epoch": 0.6133613887427669, "grad_norm": 2.0861775875091553, "learning_rate": 4.891124923148391e-06, "loss": 1.0481, "step": 1166 }, { "epoch": 0.6138874276696475, "grad_norm": 2.053553581237793, "learning_rate": 4.890922058129396e-06, "loss": 1.0332, "step": 1167 }, { "epoch": 0.6144134665965282, "grad_norm": 2.0698556900024414, "learning_rate": 4.890719008503813e-06, "loss": 0.9913, "step": 1168 }, { "epoch": 0.6149395055234087, "grad_norm": 2.0626866817474365, "learning_rate": 4.890515774287317e-06, "loss": 1.0383, "step": 1169 }, { "epoch": 0.6154655444502893, "grad_norm": 2.001122236251831, "learning_rate": 4.890312355495602e-06, "loss": 0.997, "step": 1170 }, { "epoch": 0.6159915833771699, "grad_norm": 2.141261577606201, "learning_rate": 4.890108752144373e-06, "loss": 1.0139, "step": 1171 }, { "epoch": 0.6165176223040505, "grad_norm": 2.0430335998535156, "learning_rate": 4.8899049642493514e-06, "loss": 1.0177, "step": 1172 }, { "epoch": 0.6170436612309311, "grad_norm": 2.0376110076904297, "learning_rate": 4.889700991826271e-06, "loss": 1.0306, "step": 1173 }, { "epoch": 0.6175697001578117, "grad_norm": 2.0546419620513916, "learning_rate": 4.889496834890882e-06, "loss": 1.0379, "step": 1174 }, { "epoch": 0.6180957390846923, "grad_norm": 2.004117012023926, "learning_rate": 4.889292493458947e-06, "loss": 1.1014, "step": 1175 }, { "epoch": 0.6186217780115728, "grad_norm": 2.1904101371765137, "learning_rate": 4.889087967546243e-06, "loss": 1.0252, "step": 1176 }, { "epoch": 0.6191478169384534, "grad_norm": 2.2026965618133545, "learning_rate": 4.8888832571685626e-06, "loss": 1.0309, "step": 1177 }, { "epoch": 0.6196738558653341, "grad_norm": 1.9925811290740967, "learning_rate": 4.888678362341711e-06, "loss": 1.0157, "step": 1178 }, { "epoch": 0.6201998947922146, "grad_norm": 2.4098422527313232, "learning_rate": 4.88847328308151e-06, "loss": 0.9825, "step": 1179 }, { "epoch": 0.6207259337190952, "grad_norm": 1.9352220296859741, "learning_rate": 4.888268019403792e-06, "loss": 1.0235, "step": 1180 }, { "epoch": 0.6212519726459758, "grad_norm": 1.9798966646194458, "learning_rate": 4.888062571324407e-06, "loss": 1.0124, "step": 1181 }, { "epoch": 0.6217780115728564, "grad_norm": 1.9737377166748047, "learning_rate": 4.887856938859218e-06, "loss": 1.005, "step": 1182 }, { "epoch": 0.622304050499737, "grad_norm": 2.2528250217437744, "learning_rate": 4.887651122024102e-06, "loss": 1.0207, "step": 1183 }, { "epoch": 0.6228300894266176, "grad_norm": 2.01436185836792, "learning_rate": 4.887445120834949e-06, "loss": 1.0368, "step": 1184 }, { "epoch": 0.6233561283534982, "grad_norm": 2.0212924480438232, "learning_rate": 4.887238935307667e-06, "loss": 1.0136, "step": 1185 }, { "epoch": 0.6238821672803787, "grad_norm": 2.080514669418335, "learning_rate": 4.887032565458174e-06, "loss": 1.0012, "step": 1186 }, { "epoch": 0.6244082062072593, "grad_norm": 2.220168113708496, "learning_rate": 4.886826011302406e-06, "loss": 1.0055, "step": 1187 }, { "epoch": 0.62493424513414, "grad_norm": 2.042325258255005, "learning_rate": 4.886619272856309e-06, "loss": 1.0793, "step": 1188 }, { "epoch": 0.6254602840610205, "grad_norm": 2.0139427185058594, "learning_rate": 4.886412350135848e-06, "loss": 1.0853, "step": 1189 }, { "epoch": 0.6259863229879011, "grad_norm": 2.072531223297119, "learning_rate": 4.886205243156998e-06, "loss": 1.0611, "step": 1190 }, { "epoch": 0.6265123619147817, "grad_norm": 2.1070992946624756, "learning_rate": 4.8859979519357505e-06, "loss": 1.0171, "step": 1191 }, { "epoch": 0.6270384008416623, "grad_norm": 1.9750585556030273, "learning_rate": 4.885790476488111e-06, "loss": 1.01, "step": 1192 }, { "epoch": 0.6275644397685429, "grad_norm": 1.9221036434173584, "learning_rate": 4.885582816830099e-06, "loss": 1.0173, "step": 1193 }, { "epoch": 0.6280904786954234, "grad_norm": 2.0700929164886475, "learning_rate": 4.885374972977748e-06, "loss": 1.0469, "step": 1194 }, { "epoch": 0.6286165176223041, "grad_norm": 2.1358914375305176, "learning_rate": 4.885166944947106e-06, "loss": 1.0144, "step": 1195 }, { "epoch": 0.6291425565491846, "grad_norm": 2.0657570362091064, "learning_rate": 4.884958732754236e-06, "loss": 1.0278, "step": 1196 }, { "epoch": 0.6296685954760652, "grad_norm": 2.050619125366211, "learning_rate": 4.884750336415213e-06, "loss": 1.0401, "step": 1197 }, { "epoch": 0.6301946344029458, "grad_norm": 2.029069423675537, "learning_rate": 4.884541755946127e-06, "loss": 1.0265, "step": 1198 }, { "epoch": 0.6307206733298264, "grad_norm": 2.2242050170898438, "learning_rate": 4.884332991363086e-06, "loss": 1.043, "step": 1199 }, { "epoch": 0.631246712256707, "grad_norm": 1.9235576391220093, "learning_rate": 4.8841240426822056e-06, "loss": 1.0323, "step": 1200 }, { "epoch": 0.6317727511835876, "grad_norm": 2.0110039710998535, "learning_rate": 4.88391490991962e-06, "loss": 0.9861, "step": 1201 }, { "epoch": 0.6322987901104682, "grad_norm": 1.9583542346954346, "learning_rate": 4.883705593091478e-06, "loss": 1.0907, "step": 1202 }, { "epoch": 0.6328248290373487, "grad_norm": 2.046147346496582, "learning_rate": 4.88349609221394e-06, "loss": 1.0264, "step": 1203 }, { "epoch": 0.6333508679642293, "grad_norm": 2.072329521179199, "learning_rate": 4.8832864073031826e-06, "loss": 1.0273, "step": 1204 }, { "epoch": 0.63387690689111, "grad_norm": 2.163562774658203, "learning_rate": 4.883076538375395e-06, "loss": 0.9729, "step": 1205 }, { "epoch": 0.6344029458179905, "grad_norm": 2.018745183944702, "learning_rate": 4.8828664854467825e-06, "loss": 1.0349, "step": 1206 }, { "epoch": 0.6349289847448711, "grad_norm": 1.9641830921173096, "learning_rate": 4.882656248533562e-06, "loss": 1.0254, "step": 1207 }, { "epoch": 0.6354550236717517, "grad_norm": 2.189903736114502, "learning_rate": 4.8824458276519676e-06, "loss": 1.0347, "step": 1208 }, { "epoch": 0.6359810625986323, "grad_norm": 1.9000815153121948, "learning_rate": 4.882235222818245e-06, "loss": 1.0068, "step": 1209 }, { "epoch": 0.6365071015255129, "grad_norm": 2.008253335952759, "learning_rate": 4.882024434048658e-06, "loss": 0.9951, "step": 1210 }, { "epoch": 0.6370331404523935, "grad_norm": 2.254880905151367, "learning_rate": 4.881813461359479e-06, "loss": 1.0254, "step": 1211 }, { "epoch": 0.6375591793792741, "grad_norm": 2.079281806945801, "learning_rate": 4.881602304766999e-06, "loss": 1.0138, "step": 1212 }, { "epoch": 0.6380852183061546, "grad_norm": 1.9515445232391357, "learning_rate": 4.881390964287521e-06, "loss": 0.9896, "step": 1213 }, { "epoch": 0.6386112572330352, "grad_norm": 2.118746757507324, "learning_rate": 4.881179439937363e-06, "loss": 1.0554, "step": 1214 }, { "epoch": 0.6391372961599159, "grad_norm": 1.9809492826461792, "learning_rate": 4.8809677317328574e-06, "loss": 1.0327, "step": 1215 }, { "epoch": 0.6396633350867964, "grad_norm": 2.0196714401245117, "learning_rate": 4.88075583969035e-06, "loss": 1.0072, "step": 1216 }, { "epoch": 0.640189374013677, "grad_norm": 2.075596570968628, "learning_rate": 4.8805437638262024e-06, "loss": 1.0088, "step": 1217 }, { "epoch": 0.6407154129405576, "grad_norm": 1.919331431388855, "learning_rate": 4.880331504156788e-06, "loss": 0.9561, "step": 1218 }, { "epoch": 0.6412414518674382, "grad_norm": 2.1209754943847656, "learning_rate": 4.8801190606984974e-06, "loss": 1.0436, "step": 1219 }, { "epoch": 0.6417674907943188, "grad_norm": 2.1692416667938232, "learning_rate": 4.879906433467731e-06, "loss": 1.0596, "step": 1220 }, { "epoch": 0.6422935297211994, "grad_norm": 2.127383232116699, "learning_rate": 4.879693622480908e-06, "loss": 1.0527, "step": 1221 }, { "epoch": 0.64281956864808, "grad_norm": 2.0686752796173096, "learning_rate": 4.87948062775446e-06, "loss": 1.0161, "step": 1222 }, { "epoch": 0.6433456075749605, "grad_norm": 1.9912559986114502, "learning_rate": 4.879267449304831e-06, "loss": 1.0246, "step": 1223 }, { "epoch": 0.6438716465018411, "grad_norm": 1.9714523553848267, "learning_rate": 4.879054087148483e-06, "loss": 1.0669, "step": 1224 }, { "epoch": 0.6443976854287218, "grad_norm": 2.0122146606445312, "learning_rate": 4.878840541301888e-06, "loss": 1.0383, "step": 1225 }, { "epoch": 0.6449237243556023, "grad_norm": 2.191110134124756, "learning_rate": 4.878626811781536e-06, "loss": 1.0832, "step": 1226 }, { "epoch": 0.6454497632824829, "grad_norm": 2.018800735473633, "learning_rate": 4.8784128986039274e-06, "loss": 1.0588, "step": 1227 }, { "epoch": 0.6459758022093635, "grad_norm": 2.0812923908233643, "learning_rate": 4.87819880178558e-06, "loss": 1.0221, "step": 1228 }, { "epoch": 0.6465018411362441, "grad_norm": 2.110596179962158, "learning_rate": 4.877984521343025e-06, "loss": 1.0252, "step": 1229 }, { "epoch": 0.6470278800631246, "grad_norm": 2.2176296710968018, "learning_rate": 4.877770057292806e-06, "loss": 1.0575, "step": 1230 }, { "epoch": 0.6475539189900053, "grad_norm": 2.0294981002807617, "learning_rate": 4.8775554096514836e-06, "loss": 0.9862, "step": 1231 }, { "epoch": 0.6480799579168859, "grad_norm": 2.03635573387146, "learning_rate": 4.8773405784356285e-06, "loss": 1.0229, "step": 1232 }, { "epoch": 0.6486059968437664, "grad_norm": 2.2391481399536133, "learning_rate": 4.877125563661831e-06, "loss": 1.1258, "step": 1233 }, { "epoch": 0.649132035770647, "grad_norm": 2.1449427604675293, "learning_rate": 4.876910365346691e-06, "loss": 1.039, "step": 1234 }, { "epoch": 0.6496580746975276, "grad_norm": 2.075510025024414, "learning_rate": 4.876694983506826e-06, "loss": 1.047, "step": 1235 }, { "epoch": 0.6501841136244082, "grad_norm": 1.9154462814331055, "learning_rate": 4.876479418158862e-06, "loss": 0.9906, "step": 1236 }, { "epoch": 0.6507101525512888, "grad_norm": 2.2096331119537354, "learning_rate": 4.876263669319449e-06, "loss": 1.0843, "step": 1237 }, { "epoch": 0.6512361914781694, "grad_norm": 2.0682895183563232, "learning_rate": 4.87604773700524e-06, "loss": 1.0262, "step": 1238 }, { "epoch": 0.65176223040505, "grad_norm": 2.0859344005584717, "learning_rate": 4.8758316212329106e-06, "loss": 1.02, "step": 1239 }, { "epoch": 0.6522882693319305, "grad_norm": 2.060521364212036, "learning_rate": 4.875615322019146e-06, "loss": 1.0455, "step": 1240 }, { "epoch": 0.6528143082588111, "grad_norm": 2.049457311630249, "learning_rate": 4.875398839380647e-06, "loss": 1.0763, "step": 1241 }, { "epoch": 0.6533403471856918, "grad_norm": 2.2475039958953857, "learning_rate": 4.875182173334129e-06, "loss": 1.0599, "step": 1242 }, { "epoch": 0.6538663861125723, "grad_norm": 1.9375535249710083, "learning_rate": 4.874965323896321e-06, "loss": 0.9758, "step": 1243 }, { "epoch": 0.6543924250394529, "grad_norm": 2.0157570838928223, "learning_rate": 4.874748291083967e-06, "loss": 1.0491, "step": 1244 }, { "epoch": 0.6549184639663335, "grad_norm": 2.1339237689971924, "learning_rate": 4.874531074913823e-06, "loss": 0.9634, "step": 1245 }, { "epoch": 0.6554445028932141, "grad_norm": 1.946191430091858, "learning_rate": 4.874313675402662e-06, "loss": 1.0407, "step": 1246 }, { "epoch": 0.6559705418200947, "grad_norm": 1.9623258113861084, "learning_rate": 4.874096092567268e-06, "loss": 1.0662, "step": 1247 }, { "epoch": 0.6564965807469753, "grad_norm": 2.092224359512329, "learning_rate": 4.873878326424443e-06, "loss": 1.0802, "step": 1248 }, { "epoch": 0.6570226196738559, "grad_norm": 1.863853931427002, "learning_rate": 4.873660376990999e-06, "loss": 1.0789, "step": 1249 }, { "epoch": 0.6575486586007364, "grad_norm": 2.146857976913452, "learning_rate": 4.8734422442837655e-06, "loss": 1.0132, "step": 1250 }, { "epoch": 0.658074697527617, "grad_norm": 2.022573232650757, "learning_rate": 4.8732239283195844e-06, "loss": 1.0252, "step": 1251 }, { "epoch": 0.6586007364544977, "grad_norm": 2.160632848739624, "learning_rate": 4.873005429115312e-06, "loss": 1.0235, "step": 1252 }, { "epoch": 0.6591267753813782, "grad_norm": 2.0909252166748047, "learning_rate": 4.87278674668782e-06, "loss": 1.0671, "step": 1253 }, { "epoch": 0.6596528143082588, "grad_norm": 1.9689445495605469, "learning_rate": 4.872567881053991e-06, "loss": 1.0323, "step": 1254 }, { "epoch": 0.6601788532351394, "grad_norm": 2.141439914703369, "learning_rate": 4.872348832230727e-06, "loss": 1.0019, "step": 1255 }, { "epoch": 0.66070489216202, "grad_norm": 1.9927963018417358, "learning_rate": 4.872129600234938e-06, "loss": 1.0262, "step": 1256 }, { "epoch": 0.6612309310889006, "grad_norm": 2.1227667331695557, "learning_rate": 4.871910185083554e-06, "loss": 1.0341, "step": 1257 }, { "epoch": 0.6617569700157812, "grad_norm": 2.0554583072662354, "learning_rate": 4.871690586793514e-06, "loss": 1.0458, "step": 1258 }, { "epoch": 0.6622830089426618, "grad_norm": 1.9936654567718506, "learning_rate": 4.871470805381775e-06, "loss": 1.0125, "step": 1259 }, { "epoch": 0.6628090478695423, "grad_norm": 2.0953080654144287, "learning_rate": 4.871250840865306e-06, "loss": 1.0518, "step": 1260 }, { "epoch": 0.663335086796423, "grad_norm": 1.9445053339004517, "learning_rate": 4.871030693261091e-06, "loss": 0.9892, "step": 1261 }, { "epoch": 0.6638611257233035, "grad_norm": 2.054898500442505, "learning_rate": 4.870810362586127e-06, "loss": 1.0712, "step": 1262 }, { "epoch": 0.6643871646501841, "grad_norm": 2.158090114593506, "learning_rate": 4.870589848857428e-06, "loss": 0.9874, "step": 1263 }, { "epoch": 0.6649132035770647, "grad_norm": 2.081550121307373, "learning_rate": 4.870369152092019e-06, "loss": 1.0299, "step": 1264 }, { "epoch": 0.6654392425039453, "grad_norm": 1.9839400053024292, "learning_rate": 4.87014827230694e-06, "loss": 0.9997, "step": 1265 }, { "epoch": 0.6659652814308259, "grad_norm": 2.0596096515655518, "learning_rate": 4.869927209519246e-06, "loss": 1.0655, "step": 1266 }, { "epoch": 0.6664913203577064, "grad_norm": 2.3403422832489014, "learning_rate": 4.8697059637460055e-06, "loss": 1.0551, "step": 1267 }, { "epoch": 0.667017359284587, "grad_norm": 2.072814702987671, "learning_rate": 4.8694845350043004e-06, "loss": 1.0454, "step": 1268 }, { "epoch": 0.6675433982114677, "grad_norm": 2.2819271087646484, "learning_rate": 4.86926292331123e-06, "loss": 1.0076, "step": 1269 }, { "epoch": 0.6680694371383482, "grad_norm": 2.162179708480835, "learning_rate": 4.8690411286839024e-06, "loss": 1.0145, "step": 1270 }, { "epoch": 0.6685954760652288, "grad_norm": 2.1072568893432617, "learning_rate": 4.868819151139443e-06, "loss": 1.0936, "step": 1271 }, { "epoch": 0.6691215149921094, "grad_norm": 2.113056182861328, "learning_rate": 4.868596990694994e-06, "loss": 1.044, "step": 1272 }, { "epoch": 0.66964755391899, "grad_norm": 1.9856184720993042, "learning_rate": 4.868374647367705e-06, "loss": 1.0119, "step": 1273 }, { "epoch": 0.6701735928458706, "grad_norm": 2.013106346130371, "learning_rate": 4.868152121174746e-06, "loss": 1.0913, "step": 1274 }, { "epoch": 0.6706996317727512, "grad_norm": 1.8831686973571777, "learning_rate": 4.867929412133297e-06, "loss": 1.0077, "step": 1275 }, { "epoch": 0.6712256706996318, "grad_norm": 2.035214424133301, "learning_rate": 4.867706520260554e-06, "loss": 0.9683, "step": 1276 }, { "epoch": 0.6717517096265123, "grad_norm": 2.0336945056915283, "learning_rate": 4.867483445573727e-06, "loss": 1.0583, "step": 1277 }, { "epoch": 0.672277748553393, "grad_norm": 1.9241890907287598, "learning_rate": 4.867260188090041e-06, "loss": 1.0162, "step": 1278 }, { "epoch": 0.6728037874802736, "grad_norm": 2.122288942337036, "learning_rate": 4.8670367478267335e-06, "loss": 1.0633, "step": 1279 }, { "epoch": 0.6733298264071541, "grad_norm": 1.964282512664795, "learning_rate": 4.8668131248010555e-06, "loss": 1.0009, "step": 1280 }, { "epoch": 0.6738558653340347, "grad_norm": 2.075181722640991, "learning_rate": 4.866589319030273e-06, "loss": 1.0535, "step": 1281 }, { "epoch": 0.6743819042609153, "grad_norm": 2.086574077606201, "learning_rate": 4.866365330531668e-06, "loss": 1.0125, "step": 1282 }, { "epoch": 0.6749079431877959, "grad_norm": 2.176712989807129, "learning_rate": 4.866141159322535e-06, "loss": 1.0883, "step": 1283 }, { "epoch": 0.6754339821146765, "grad_norm": 2.4133596420288086, "learning_rate": 4.865916805420181e-06, "loss": 1.1115, "step": 1284 }, { "epoch": 0.6759600210415571, "grad_norm": 1.9632985591888428, "learning_rate": 4.865692268841931e-06, "loss": 0.9837, "step": 1285 }, { "epoch": 0.6764860599684377, "grad_norm": 2.320810556411743, "learning_rate": 4.865467549605119e-06, "loss": 1.0307, "step": 1286 }, { "epoch": 0.6770120988953182, "grad_norm": 2.259291172027588, "learning_rate": 4.865242647727097e-06, "loss": 1.0125, "step": 1287 }, { "epoch": 0.6775381378221988, "grad_norm": 2.069227695465088, "learning_rate": 4.8650175632252314e-06, "loss": 1.0348, "step": 1288 }, { "epoch": 0.6780641767490795, "grad_norm": 2.093912363052368, "learning_rate": 4.8647922961169e-06, "loss": 1.0628, "step": 1289 }, { "epoch": 0.67859021567596, "grad_norm": 2.0842857360839844, "learning_rate": 4.864566846419497e-06, "loss": 1.0296, "step": 1290 }, { "epoch": 0.6791162546028406, "grad_norm": 2.1448631286621094, "learning_rate": 4.864341214150428e-06, "loss": 1.0344, "step": 1291 }, { "epoch": 0.6796422935297212, "grad_norm": 2.173478841781616, "learning_rate": 4.864115399327115e-06, "loss": 1.0662, "step": 1292 }, { "epoch": 0.6801683324566018, "grad_norm": 2.1156740188598633, "learning_rate": 4.863889401966995e-06, "loss": 1.0568, "step": 1293 }, { "epoch": 0.6806943713834824, "grad_norm": 2.0641050338745117, "learning_rate": 4.863663222087515e-06, "loss": 1.0508, "step": 1294 }, { "epoch": 0.681220410310363, "grad_norm": 2.050645112991333, "learning_rate": 4.863436859706141e-06, "loss": 1.0198, "step": 1295 }, { "epoch": 0.6817464492372436, "grad_norm": 1.9624086618423462, "learning_rate": 4.86321031484035e-06, "loss": 1.012, "step": 1296 }, { "epoch": 0.6822724881641241, "grad_norm": 2.2763307094573975, "learning_rate": 4.8629835875076325e-06, "loss": 1.0208, "step": 1297 }, { "epoch": 0.6827985270910047, "grad_norm": 1.952094316482544, "learning_rate": 4.862756677725496e-06, "loss": 0.9912, "step": 1298 }, { "epoch": 0.6833245660178853, "grad_norm": 1.9964386224746704, "learning_rate": 4.862529585511461e-06, "loss": 1.0216, "step": 1299 }, { "epoch": 0.6838506049447659, "grad_norm": 2.0915441513061523, "learning_rate": 4.862302310883061e-06, "loss": 1.028, "step": 1300 }, { "epoch": 0.6843766438716465, "grad_norm": 2.239182233810425, "learning_rate": 4.862074853857843e-06, "loss": 1.1119, "step": 1301 }, { "epoch": 0.6849026827985271, "grad_norm": 2.120128870010376, "learning_rate": 4.861847214453371e-06, "loss": 1.0811, "step": 1302 }, { "epoch": 0.6854287217254077, "grad_norm": 1.8495033979415894, "learning_rate": 4.86161939268722e-06, "loss": 0.9559, "step": 1303 }, { "epoch": 0.6859547606522882, "grad_norm": 1.9767253398895264, "learning_rate": 4.861391388576982e-06, "loss": 0.9942, "step": 1304 }, { "epoch": 0.6864807995791689, "grad_norm": 1.9148463010787964, "learning_rate": 4.8611632021402605e-06, "loss": 1.0152, "step": 1305 }, { "epoch": 0.6870068385060495, "grad_norm": 2.036726474761963, "learning_rate": 4.860934833394674e-06, "loss": 1.0692, "step": 1306 }, { "epoch": 0.68753287743293, "grad_norm": 2.03383731842041, "learning_rate": 4.860706282357856e-06, "loss": 1.0429, "step": 1307 }, { "epoch": 0.6880589163598106, "grad_norm": 1.986863374710083, "learning_rate": 4.860477549047452e-06, "loss": 0.9737, "step": 1308 }, { "epoch": 0.6885849552866912, "grad_norm": 1.9917157888412476, "learning_rate": 4.860248633481124e-06, "loss": 0.9808, "step": 1309 }, { "epoch": 0.6891109942135718, "grad_norm": 1.9868308305740356, "learning_rate": 4.860019535676546e-06, "loss": 1.0001, "step": 1310 }, { "epoch": 0.6896370331404524, "grad_norm": 1.9900240898132324, "learning_rate": 4.859790255651408e-06, "loss": 1.0561, "step": 1311 }, { "epoch": 0.690163072067333, "grad_norm": 1.987703800201416, "learning_rate": 4.859560793423412e-06, "loss": 1.013, "step": 1312 }, { "epoch": 0.6906891109942136, "grad_norm": 1.9851711988449097, "learning_rate": 4.859331149010276e-06, "loss": 1.0727, "step": 1313 }, { "epoch": 0.6912151499210941, "grad_norm": 1.9733060598373413, "learning_rate": 4.8591013224297304e-06, "loss": 0.9924, "step": 1314 }, { "epoch": 0.6917411888479748, "grad_norm": 1.9737035036087036, "learning_rate": 4.85887131369952e-06, "loss": 1.0131, "step": 1315 }, { "epoch": 0.6922672277748554, "grad_norm": 2.176969528198242, "learning_rate": 4.858641122837407e-06, "loss": 1.0382, "step": 1316 }, { "epoch": 0.6927932667017359, "grad_norm": 1.951177716255188, "learning_rate": 4.858410749861161e-06, "loss": 1.011, "step": 1317 }, { "epoch": 0.6933193056286165, "grad_norm": 2.009986639022827, "learning_rate": 4.858180194788572e-06, "loss": 1.0999, "step": 1318 }, { "epoch": 0.6938453445554971, "grad_norm": 2.0470845699310303, "learning_rate": 4.857949457637441e-06, "loss": 1.0477, "step": 1319 }, { "epoch": 0.6943713834823777, "grad_norm": 2.163547992706299, "learning_rate": 4.857718538425582e-06, "loss": 1.0229, "step": 1320 }, { "epoch": 0.6948974224092583, "grad_norm": 2.0979368686676025, "learning_rate": 4.857487437170827e-06, "loss": 1.0686, "step": 1321 }, { "epoch": 0.6954234613361389, "grad_norm": 2.0388388633728027, "learning_rate": 4.857256153891017e-06, "loss": 0.991, "step": 1322 }, { "epoch": 0.6959495002630195, "grad_norm": 2.136115312576294, "learning_rate": 4.8570246886040124e-06, "loss": 1.0249, "step": 1323 }, { "epoch": 0.6964755391899, "grad_norm": 2.0932974815368652, "learning_rate": 4.8567930413276835e-06, "loss": 1.0649, "step": 1324 }, { "epoch": 0.6970015781167807, "grad_norm": 2.0559682846069336, "learning_rate": 4.856561212079916e-06, "loss": 0.9931, "step": 1325 }, { "epoch": 0.6975276170436613, "grad_norm": 1.9723689556121826, "learning_rate": 4.856329200878611e-06, "loss": 0.9628, "step": 1326 }, { "epoch": 0.6980536559705418, "grad_norm": 2.054049253463745, "learning_rate": 4.8560970077416805e-06, "loss": 1.0322, "step": 1327 }, { "epoch": 0.6985796948974224, "grad_norm": 2.100574254989624, "learning_rate": 4.855864632687055e-06, "loss": 1.0941, "step": 1328 }, { "epoch": 0.699105733824303, "grad_norm": 2.1415367126464844, "learning_rate": 4.8556320757326735e-06, "loss": 1.0341, "step": 1329 }, { "epoch": 0.6996317727511836, "grad_norm": 1.988004207611084, "learning_rate": 4.855399336896495e-06, "loss": 1.0357, "step": 1330 }, { "epoch": 0.7001578116780641, "grad_norm": 2.0249714851379395, "learning_rate": 4.855166416196487e-06, "loss": 1.0489, "step": 1331 }, { "epoch": 0.7006838506049448, "grad_norm": 1.9197039604187012, "learning_rate": 4.8549333136506356e-06, "loss": 1.0094, "step": 1332 }, { "epoch": 0.7012098895318254, "grad_norm": 2.153716564178467, "learning_rate": 4.854700029276938e-06, "loss": 1.0613, "step": 1333 }, { "epoch": 0.7017359284587059, "grad_norm": 1.9626339673995972, "learning_rate": 4.854466563093407e-06, "loss": 1.024, "step": 1334 }, { "epoch": 0.7022619673855865, "grad_norm": 2.0288281440734863, "learning_rate": 4.854232915118068e-06, "loss": 0.9778, "step": 1335 }, { "epoch": 0.7027880063124671, "grad_norm": 1.9677989482879639, "learning_rate": 4.853999085368963e-06, "loss": 0.9802, "step": 1336 }, { "epoch": 0.7033140452393477, "grad_norm": 2.054617404937744, "learning_rate": 4.853765073864144e-06, "loss": 0.9523, "step": 1337 }, { "epoch": 0.7038400841662283, "grad_norm": 2.0509955883026123, "learning_rate": 4.853530880621681e-06, "loss": 1.0324, "step": 1338 }, { "epoch": 0.7043661230931089, "grad_norm": 2.224724054336548, "learning_rate": 4.853296505659657e-06, "loss": 1.0965, "step": 1339 }, { "epoch": 0.7048921620199895, "grad_norm": 1.9698208570480347, "learning_rate": 4.8530619489961664e-06, "loss": 1.0486, "step": 1340 }, { "epoch": 0.70541820094687, "grad_norm": 2.129383087158203, "learning_rate": 4.85282721064932e-06, "loss": 1.0857, "step": 1341 }, { "epoch": 0.7059442398737507, "grad_norm": 2.2943053245544434, "learning_rate": 4.852592290637244e-06, "loss": 1.0628, "step": 1342 }, { "epoch": 0.7064702788006313, "grad_norm": 2.0792641639709473, "learning_rate": 4.852357188978075e-06, "loss": 1.0604, "step": 1343 }, { "epoch": 0.7069963177275118, "grad_norm": 2.0224812030792236, "learning_rate": 4.852121905689968e-06, "loss": 1.0687, "step": 1344 }, { "epoch": 0.7075223566543924, "grad_norm": 2.4030919075012207, "learning_rate": 4.851886440791087e-06, "loss": 1.0942, "step": 1345 }, { "epoch": 0.708048395581273, "grad_norm": 2.190215826034546, "learning_rate": 4.851650794299614e-06, "loss": 1.0393, "step": 1346 }, { "epoch": 0.7085744345081536, "grad_norm": 2.1099565029144287, "learning_rate": 4.851414966233743e-06, "loss": 1.0452, "step": 1347 }, { "epoch": 0.7091004734350342, "grad_norm": 2.156395673751831, "learning_rate": 4.851178956611682e-06, "loss": 1.0625, "step": 1348 }, { "epoch": 0.7096265123619148, "grad_norm": 2.1840314865112305, "learning_rate": 4.850942765451655e-06, "loss": 1.0467, "step": 1349 }, { "epoch": 0.7101525512887954, "grad_norm": 2.0080723762512207, "learning_rate": 4.850706392771899e-06, "loss": 1.0187, "step": 1350 }, { "epoch": 0.7106785902156759, "grad_norm": 2.1242828369140625, "learning_rate": 4.850469838590664e-06, "loss": 1.0459, "step": 1351 }, { "epoch": 0.7112046291425566, "grad_norm": 1.9652162790298462, "learning_rate": 4.8502331029262125e-06, "loss": 1.0404, "step": 1352 }, { "epoch": 0.7117306680694372, "grad_norm": 2.2363545894622803, "learning_rate": 4.849996185796827e-06, "loss": 1.0182, "step": 1353 }, { "epoch": 0.7122567069963177, "grad_norm": 2.028017044067383, "learning_rate": 4.849759087220798e-06, "loss": 1.0213, "step": 1354 }, { "epoch": 0.7127827459231983, "grad_norm": 2.265037775039673, "learning_rate": 4.849521807216432e-06, "loss": 1.0316, "step": 1355 }, { "epoch": 0.7133087848500789, "grad_norm": 2.083799362182617, "learning_rate": 4.849284345802051e-06, "loss": 1.0133, "step": 1356 }, { "epoch": 0.7138348237769595, "grad_norm": 1.9307647943496704, "learning_rate": 4.8490467029959895e-06, "loss": 1.0023, "step": 1357 }, { "epoch": 0.7143608627038401, "grad_norm": 2.1079766750335693, "learning_rate": 4.848808878816595e-06, "loss": 1.0208, "step": 1358 }, { "epoch": 0.7148869016307207, "grad_norm": 2.0214877128601074, "learning_rate": 4.8485708732822315e-06, "loss": 0.9904, "step": 1359 }, { "epoch": 0.7154129405576013, "grad_norm": 2.150768756866455, "learning_rate": 4.848332686411276e-06, "loss": 0.9969, "step": 1360 }, { "epoch": 0.7159389794844818, "grad_norm": 2.0330607891082764, "learning_rate": 4.8480943182221184e-06, "loss": 0.9865, "step": 1361 }, { "epoch": 0.7164650184113625, "grad_norm": 1.973970651626587, "learning_rate": 4.847855768733163e-06, "loss": 0.9815, "step": 1362 }, { "epoch": 0.716991057338243, "grad_norm": 2.074868679046631, "learning_rate": 4.84761703796283e-06, "loss": 1.0499, "step": 1363 }, { "epoch": 0.7175170962651236, "grad_norm": 1.9750478267669678, "learning_rate": 4.8473781259295514e-06, "loss": 0.9797, "step": 1364 }, { "epoch": 0.7180431351920042, "grad_norm": 1.971375823020935, "learning_rate": 4.847139032651774e-06, "loss": 0.9805, "step": 1365 }, { "epoch": 0.7185691741188848, "grad_norm": 2.0710880756378174, "learning_rate": 4.846899758147958e-06, "loss": 1.0143, "step": 1366 }, { "epoch": 0.7190952130457654, "grad_norm": 1.9696688652038574, "learning_rate": 4.8466603024365785e-06, "loss": 0.9869, "step": 1367 }, { "epoch": 0.7196212519726459, "grad_norm": 2.1022462844848633, "learning_rate": 4.846420665536126e-06, "loss": 1.0048, "step": 1368 }, { "epoch": 0.7201472908995266, "grad_norm": 2.164783000946045, "learning_rate": 4.8461808474651e-06, "loss": 1.0114, "step": 1369 }, { "epoch": 0.7206733298264072, "grad_norm": 2.0148744583129883, "learning_rate": 4.845940848242019e-06, "loss": 1.0232, "step": 1370 }, { "epoch": 0.7211993687532877, "grad_norm": 2.0193605422973633, "learning_rate": 4.845700667885414e-06, "loss": 0.9764, "step": 1371 }, { "epoch": 0.7217254076801684, "grad_norm": 2.005157232284546, "learning_rate": 4.845460306413829e-06, "loss": 1.0242, "step": 1372 }, { "epoch": 0.7222514466070489, "grad_norm": 2.128805637359619, "learning_rate": 4.845219763845823e-06, "loss": 0.9964, "step": 1373 }, { "epoch": 0.7227774855339295, "grad_norm": 1.9924060106277466, "learning_rate": 4.844979040199968e-06, "loss": 1.0185, "step": 1374 }, { "epoch": 0.7233035244608101, "grad_norm": 2.2126121520996094, "learning_rate": 4.844738135494851e-06, "loss": 1.0013, "step": 1375 }, { "epoch": 0.7238295633876907, "grad_norm": 2.0494630336761475, "learning_rate": 4.844497049749073e-06, "loss": 1.0628, "step": 1376 }, { "epoch": 0.7243556023145713, "grad_norm": 2.4115402698516846, "learning_rate": 4.844255782981249e-06, "loss": 1.0623, "step": 1377 }, { "epoch": 0.7248816412414518, "grad_norm": 2.062485933303833, "learning_rate": 4.8440143352100054e-06, "loss": 1.0115, "step": 1378 }, { "epoch": 0.7254076801683325, "grad_norm": 2.995894432067871, "learning_rate": 4.843772706453988e-06, "loss": 1.0805, "step": 1379 }, { "epoch": 0.7259337190952131, "grad_norm": 1.9974204301834106, "learning_rate": 4.84353089673185e-06, "loss": 1.0221, "step": 1380 }, { "epoch": 0.7264597580220936, "grad_norm": 2.1927318572998047, "learning_rate": 4.843288906062264e-06, "loss": 1.0273, "step": 1381 }, { "epoch": 0.7269857969489742, "grad_norm": 2.0213675498962402, "learning_rate": 4.8430467344639136e-06, "loss": 0.968, "step": 1382 }, { "epoch": 0.7275118358758548, "grad_norm": 2.2534306049346924, "learning_rate": 4.842804381955497e-06, "loss": 1.0457, "step": 1383 }, { "epoch": 0.7280378748027354, "grad_norm": 2.003638505935669, "learning_rate": 4.842561848555728e-06, "loss": 1.0471, "step": 1384 }, { "epoch": 0.728563913729616, "grad_norm": 2.217237949371338, "learning_rate": 4.842319134283331e-06, "loss": 1.0348, "step": 1385 }, { "epoch": 0.7290899526564966, "grad_norm": 2.1162800788879395, "learning_rate": 4.842076239157047e-06, "loss": 1.0548, "step": 1386 }, { "epoch": 0.7296159915833772, "grad_norm": 2.043252944946289, "learning_rate": 4.8418331631956325e-06, "loss": 1.0931, "step": 1387 }, { "epoch": 0.7301420305102577, "grad_norm": 2.099283218383789, "learning_rate": 4.841589906417853e-06, "loss": 1.0059, "step": 1388 }, { "epoch": 0.7306680694371384, "grad_norm": 1.9934890270233154, "learning_rate": 4.8413464688424904e-06, "loss": 1.0327, "step": 1389 }, { "epoch": 0.731194108364019, "grad_norm": 1.868202567100525, "learning_rate": 4.841102850488343e-06, "loss": 0.9622, "step": 1390 }, { "epoch": 0.7317201472908995, "grad_norm": 1.9592076539993286, "learning_rate": 4.84085905137422e-06, "loss": 1.0413, "step": 1391 }, { "epoch": 0.7322461862177801, "grad_norm": 2.0478546619415283, "learning_rate": 4.840615071518946e-06, "loss": 1.0343, "step": 1392 }, { "epoch": 0.7327722251446607, "grad_norm": 2.4996554851531982, "learning_rate": 4.840370910941358e-06, "loss": 1.1106, "step": 1393 }, { "epoch": 0.7332982640715413, "grad_norm": 2.0023233890533447, "learning_rate": 4.8401265696603085e-06, "loss": 1.0273, "step": 1394 }, { "epoch": 0.7338243029984218, "grad_norm": 2.0366029739379883, "learning_rate": 4.8398820476946625e-06, "loss": 1.0092, "step": 1395 }, { "epoch": 0.7343503419253025, "grad_norm": 2.2142248153686523, "learning_rate": 4.839637345063302e-06, "loss": 0.9884, "step": 1396 }, { "epoch": 0.7348763808521831, "grad_norm": 1.9955226182937622, "learning_rate": 4.839392461785119e-06, "loss": 1.054, "step": 1397 }, { "epoch": 0.7354024197790636, "grad_norm": 2.0607223510742188, "learning_rate": 4.839147397879023e-06, "loss": 0.9826, "step": 1398 }, { "epoch": 0.7359284587059443, "grad_norm": 2.054483652114868, "learning_rate": 4.8389021533639345e-06, "loss": 1.0738, "step": 1399 }, { "epoch": 0.7364544976328248, "grad_norm": 2.1066908836364746, "learning_rate": 4.8386567282587886e-06, "loss": 1.0937, "step": 1400 }, { "epoch": 0.7369805365597054, "grad_norm": 2.018155097961426, "learning_rate": 4.8384111225825355e-06, "loss": 0.9767, "step": 1401 }, { "epoch": 0.737506575486586, "grad_norm": 2.152189016342163, "learning_rate": 4.83816533635414e-06, "loss": 1.0062, "step": 1402 }, { "epoch": 0.7380326144134666, "grad_norm": 1.9946335554122925, "learning_rate": 4.8379193695925785e-06, "loss": 1.0724, "step": 1403 }, { "epoch": 0.7385586533403472, "grad_norm": 2.077017307281494, "learning_rate": 4.837673222316843e-06, "loss": 1.0991, "step": 1404 }, { "epoch": 0.7390846922672277, "grad_norm": 2.0850563049316406, "learning_rate": 4.837426894545938e-06, "loss": 1.0527, "step": 1405 }, { "epoch": 0.7396107311941084, "grad_norm": 1.9786406755447388, "learning_rate": 4.837180386298883e-06, "loss": 0.9666, "step": 1406 }, { "epoch": 0.740136770120989, "grad_norm": 2.0060155391693115, "learning_rate": 4.836933697594711e-06, "loss": 1.0795, "step": 1407 }, { "epoch": 0.7406628090478695, "grad_norm": 2.086906909942627, "learning_rate": 4.836686828452471e-06, "loss": 0.9925, "step": 1408 }, { "epoch": 0.7411888479747502, "grad_norm": 2.0125632286071777, "learning_rate": 4.836439778891223e-06, "loss": 0.9706, "step": 1409 }, { "epoch": 0.7417148869016307, "grad_norm": 1.8921434879302979, "learning_rate": 4.836192548930041e-06, "loss": 1.0237, "step": 1410 }, { "epoch": 0.7422409258285113, "grad_norm": 1.9400858879089355, "learning_rate": 4.835945138588015e-06, "loss": 1.0444, "step": 1411 }, { "epoch": 0.742766964755392, "grad_norm": 2.083749294281006, "learning_rate": 4.835697547884248e-06, "loss": 1.0136, "step": 1412 }, { "epoch": 0.7432930036822725, "grad_norm": 2.0750844478607178, "learning_rate": 4.8354497768378575e-06, "loss": 1.0863, "step": 1413 }, { "epoch": 0.7438190426091531, "grad_norm": 2.137214183807373, "learning_rate": 4.835201825467973e-06, "loss": 1.0095, "step": 1414 }, { "epoch": 0.7443450815360336, "grad_norm": 2.06549072265625, "learning_rate": 4.834953693793739e-06, "loss": 1.0449, "step": 1415 }, { "epoch": 0.7448711204629143, "grad_norm": 2.0396728515625, "learning_rate": 4.834705381834315e-06, "loss": 1.0093, "step": 1416 }, { "epoch": 0.7453971593897949, "grad_norm": 1.993697166442871, "learning_rate": 4.834456889608874e-06, "loss": 1.0075, "step": 1417 }, { "epoch": 0.7459231983166754, "grad_norm": 2.1017816066741943, "learning_rate": 4.834208217136601e-06, "loss": 1.0687, "step": 1418 }, { "epoch": 0.746449237243556, "grad_norm": 2.0740413665771484, "learning_rate": 4.833959364436698e-06, "loss": 0.9777, "step": 1419 }, { "epoch": 0.7469752761704366, "grad_norm": 2.0858206748962402, "learning_rate": 4.833710331528377e-06, "loss": 1.044, "step": 1420 }, { "epoch": 0.7475013150973172, "grad_norm": 2.33298921585083, "learning_rate": 4.833461118430869e-06, "loss": 1.0602, "step": 1421 }, { "epoch": 0.7480273540241978, "grad_norm": 2.1458897590637207, "learning_rate": 4.833211725163414e-06, "loss": 0.9903, "step": 1422 }, { "epoch": 0.7485533929510784, "grad_norm": 2.15071177482605, "learning_rate": 4.8329621517452685e-06, "loss": 1.011, "step": 1423 }, { "epoch": 0.749079431877959, "grad_norm": 2.0375895500183105, "learning_rate": 4.8327123981957025e-06, "loss": 1.0021, "step": 1424 }, { "epoch": 0.7496054708048395, "grad_norm": 1.9808685779571533, "learning_rate": 4.832462464534e-06, "loss": 1.025, "step": 1425 }, { "epoch": 0.7501315097317202, "grad_norm": 2.046558380126953, "learning_rate": 4.832212350779459e-06, "loss": 1.0435, "step": 1426 }, { "epoch": 0.7506575486586007, "grad_norm": 2.0020248889923096, "learning_rate": 4.831962056951392e-06, "loss": 1.0207, "step": 1427 }, { "epoch": 0.7511835875854813, "grad_norm": 1.9901740550994873, "learning_rate": 4.831711583069122e-06, "loss": 1.0505, "step": 1428 }, { "epoch": 0.751709626512362, "grad_norm": 2.112236738204956, "learning_rate": 4.83146092915199e-06, "loss": 1.0353, "step": 1429 }, { "epoch": 0.7522356654392425, "grad_norm": 2.0244028568267822, "learning_rate": 4.831210095219349e-06, "loss": 1.0169, "step": 1430 }, { "epoch": 0.7527617043661231, "grad_norm": 2.298645257949829, "learning_rate": 4.830959081290567e-06, "loss": 1.0498, "step": 1431 }, { "epoch": 0.7532877432930036, "grad_norm": 2.1593234539031982, "learning_rate": 4.8307078873850244e-06, "loss": 1.0954, "step": 1432 }, { "epoch": 0.7538137822198843, "grad_norm": 1.9387123584747314, "learning_rate": 4.830456513522117e-06, "loss": 0.9784, "step": 1433 }, { "epoch": 0.7543398211467649, "grad_norm": 2.1634531021118164, "learning_rate": 4.830204959721253e-06, "loss": 1.0516, "step": 1434 }, { "epoch": 0.7548658600736454, "grad_norm": 1.9310704469680786, "learning_rate": 4.829953226001855e-06, "loss": 0.9648, "step": 1435 }, { "epoch": 0.7553918990005261, "grad_norm": 2.0547149181365967, "learning_rate": 4.8297013123833605e-06, "loss": 1.0734, "step": 1436 }, { "epoch": 0.7559179379274066, "grad_norm": 2.222872734069824, "learning_rate": 4.829449218885219e-06, "loss": 0.9645, "step": 1437 }, { "epoch": 0.7564439768542872, "grad_norm": 2.128120183944702, "learning_rate": 4.829196945526897e-06, "loss": 1.046, "step": 1438 }, { "epoch": 0.7569700157811678, "grad_norm": 2.0309526920318604, "learning_rate": 4.828944492327872e-06, "loss": 1.0596, "step": 1439 }, { "epoch": 0.7574960547080484, "grad_norm": 2.0946176052093506, "learning_rate": 4.828691859307635e-06, "loss": 1.0134, "step": 1440 }, { "epoch": 0.758022093634929, "grad_norm": 1.9159823656082153, "learning_rate": 4.828439046485693e-06, "loss": 1.0081, "step": 1441 }, { "epoch": 0.7585481325618095, "grad_norm": 2.203627586364746, "learning_rate": 4.828186053881566e-06, "loss": 1.0451, "step": 1442 }, { "epoch": 0.7590741714886902, "grad_norm": 2.065521240234375, "learning_rate": 4.8279328815147895e-06, "loss": 1.0289, "step": 1443 }, { "epoch": 0.7596002104155708, "grad_norm": 2.1597719192504883, "learning_rate": 4.827679529404909e-06, "loss": 1.0373, "step": 1444 }, { "epoch": 0.7601262493424513, "grad_norm": 2.6100237369537354, "learning_rate": 4.827425997571488e-06, "loss": 1.0254, "step": 1445 }, { "epoch": 0.760652288269332, "grad_norm": 2.1975550651550293, "learning_rate": 4.8271722860341e-06, "loss": 1.0254, "step": 1446 }, { "epoch": 0.7611783271962125, "grad_norm": 2.019261360168457, "learning_rate": 4.826918394812336e-06, "loss": 1.0823, "step": 1447 }, { "epoch": 0.7617043661230931, "grad_norm": 1.9351961612701416, "learning_rate": 4.8266643239257996e-06, "loss": 1.0248, "step": 1448 }, { "epoch": 0.7622304050499737, "grad_norm": 1.9437129497528076, "learning_rate": 4.826410073394106e-06, "loss": 0.984, "step": 1449 }, { "epoch": 0.7627564439768543, "grad_norm": 2.277479887008667, "learning_rate": 4.826155643236889e-06, "loss": 1.0264, "step": 1450 }, { "epoch": 0.7632824829037349, "grad_norm": 2.2033772468566895, "learning_rate": 4.825901033473791e-06, "loss": 1.0249, "step": 1451 }, { "epoch": 0.7638085218306154, "grad_norm": 2.1912593841552734, "learning_rate": 4.825646244124472e-06, "loss": 1.0366, "step": 1452 }, { "epoch": 0.7643345607574961, "grad_norm": 2.0046746730804443, "learning_rate": 4.825391275208606e-06, "loss": 1.0411, "step": 1453 }, { "epoch": 0.7648605996843767, "grad_norm": 2.0601322650909424, "learning_rate": 4.825136126745877e-06, "loss": 1.052, "step": 1454 }, { "epoch": 0.7653866386112572, "grad_norm": 2.148794651031494, "learning_rate": 4.824880798755986e-06, "loss": 1.04, "step": 1455 }, { "epoch": 0.7659126775381379, "grad_norm": 2.027374505996704, "learning_rate": 4.824625291258649e-06, "loss": 1.005, "step": 1456 }, { "epoch": 0.7664387164650184, "grad_norm": 2.0703351497650146, "learning_rate": 4.824369604273592e-06, "loss": 1.0157, "step": 1457 }, { "epoch": 0.766964755391899, "grad_norm": 2.1002986431121826, "learning_rate": 4.8241137378205575e-06, "loss": 1.0355, "step": 1458 }, { "epoch": 0.7674907943187795, "grad_norm": 1.9970546960830688, "learning_rate": 4.823857691919302e-06, "loss": 0.9833, "step": 1459 }, { "epoch": 0.7680168332456602, "grad_norm": 2.0489771366119385, "learning_rate": 4.823601466589595e-06, "loss": 1.0351, "step": 1460 }, { "epoch": 0.7685428721725408, "grad_norm": 2.0190834999084473, "learning_rate": 4.823345061851219e-06, "loss": 1.0406, "step": 1461 }, { "epoch": 0.7690689110994213, "grad_norm": 2.0567877292633057, "learning_rate": 4.823088477723973e-06, "loss": 1.0593, "step": 1462 }, { "epoch": 0.769594950026302, "grad_norm": 1.883132815361023, "learning_rate": 4.822831714227667e-06, "loss": 1.0055, "step": 1463 }, { "epoch": 0.7701209889531825, "grad_norm": 1.9520277976989746, "learning_rate": 4.822574771382127e-06, "loss": 0.9831, "step": 1464 }, { "epoch": 0.7706470278800631, "grad_norm": 2.0123813152313232, "learning_rate": 4.822317649207191e-06, "loss": 0.9841, "step": 1465 }, { "epoch": 0.7711730668069438, "grad_norm": 2.089940309524536, "learning_rate": 4.8220603477227124e-06, "loss": 1.0121, "step": 1466 }, { "epoch": 0.7716991057338243, "grad_norm": 1.9485499858856201, "learning_rate": 4.8218028669485585e-06, "loss": 0.9744, "step": 1467 }, { "epoch": 0.7722251446607049, "grad_norm": 2.2764859199523926, "learning_rate": 4.821545206904608e-06, "loss": 1.0018, "step": 1468 }, { "epoch": 0.7727511835875854, "grad_norm": 2.039769411087036, "learning_rate": 4.821287367610756e-06, "loss": 1.0256, "step": 1469 }, { "epoch": 0.7732772225144661, "grad_norm": 2.0036065578460693, "learning_rate": 4.821029349086911e-06, "loss": 1.0399, "step": 1470 }, { "epoch": 0.7738032614413467, "grad_norm": 2.056286573410034, "learning_rate": 4.820771151352996e-06, "loss": 1.0077, "step": 1471 }, { "epoch": 0.7743293003682272, "grad_norm": 2.0001938343048096, "learning_rate": 4.820512774428944e-06, "loss": 1.0109, "step": 1472 }, { "epoch": 0.7748553392951079, "grad_norm": 2.007289409637451, "learning_rate": 4.820254218334707e-06, "loss": 1.0223, "step": 1473 }, { "epoch": 0.7753813782219884, "grad_norm": 2.079768657684326, "learning_rate": 4.8199954830902465e-06, "loss": 1.0565, "step": 1474 }, { "epoch": 0.775907417148869, "grad_norm": 2.030198097229004, "learning_rate": 4.819736568715543e-06, "loss": 1.033, "step": 1475 }, { "epoch": 0.7764334560757497, "grad_norm": 2.6482961177825928, "learning_rate": 4.819477475230584e-06, "loss": 1.0595, "step": 1476 }, { "epoch": 0.7769594950026302, "grad_norm": 2.160472869873047, "learning_rate": 4.8192182026553775e-06, "loss": 1.0214, "step": 1477 }, { "epoch": 0.7774855339295108, "grad_norm": 2.1956963539123535, "learning_rate": 4.818958751009941e-06, "loss": 1.0647, "step": 1478 }, { "epoch": 0.7780115728563913, "grad_norm": 2.346040725708008, "learning_rate": 4.818699120314306e-06, "loss": 1.0289, "step": 1479 }, { "epoch": 0.778537611783272, "grad_norm": 2.049593448638916, "learning_rate": 4.818439310588521e-06, "loss": 1.0188, "step": 1480 }, { "epoch": 0.7790636507101526, "grad_norm": 1.9567065238952637, "learning_rate": 4.818179321852646e-06, "loss": 1.0645, "step": 1481 }, { "epoch": 0.7795896896370331, "grad_norm": 2.0995101928710938, "learning_rate": 4.817919154126753e-06, "loss": 1.0283, "step": 1482 }, { "epoch": 0.7801157285639138, "grad_norm": 2.117649555206299, "learning_rate": 4.817658807430933e-06, "loss": 0.9973, "step": 1483 }, { "epoch": 0.7806417674907943, "grad_norm": 2.058525800704956, "learning_rate": 4.817398281785286e-06, "loss": 1.0278, "step": 1484 }, { "epoch": 0.7811678064176749, "grad_norm": 1.9914313554763794, "learning_rate": 4.817137577209927e-06, "loss": 0.9591, "step": 1485 }, { "epoch": 0.7816938453445555, "grad_norm": 1.9432276487350464, "learning_rate": 4.816876693724987e-06, "loss": 0.9964, "step": 1486 }, { "epoch": 0.7822198842714361, "grad_norm": 2.011399507522583, "learning_rate": 4.816615631350608e-06, "loss": 0.9963, "step": 1487 }, { "epoch": 0.7827459231983167, "grad_norm": 1.9606966972351074, "learning_rate": 4.816354390106947e-06, "loss": 0.9756, "step": 1488 }, { "epoch": 0.7832719621251972, "grad_norm": 2.011887788772583, "learning_rate": 4.816092970014176e-06, "loss": 1.0194, "step": 1489 }, { "epoch": 0.7837980010520779, "grad_norm": 2.0520918369293213, "learning_rate": 4.815831371092478e-06, "loss": 1.02, "step": 1490 }, { "epoch": 0.7843240399789585, "grad_norm": 2.018293619155884, "learning_rate": 4.815569593362053e-06, "loss": 1.0289, "step": 1491 }, { "epoch": 0.784850078905839, "grad_norm": 2.016738176345825, "learning_rate": 4.815307636843112e-06, "loss": 1.0523, "step": 1492 }, { "epoch": 0.7853761178327197, "grad_norm": 2.063619375228882, "learning_rate": 4.815045501555882e-06, "loss": 1.0099, "step": 1493 }, { "epoch": 0.7859021567596002, "grad_norm": 2.122360944747925, "learning_rate": 4.814783187520602e-06, "loss": 1.0346, "step": 1494 }, { "epoch": 0.7864281956864808, "grad_norm": 2.040095329284668, "learning_rate": 4.814520694757526e-06, "loss": 1.0017, "step": 1495 }, { "epoch": 0.7869542346133613, "grad_norm": 2.003471612930298, "learning_rate": 4.814258023286922e-06, "loss": 0.975, "step": 1496 }, { "epoch": 0.787480273540242, "grad_norm": 1.905517816543579, "learning_rate": 4.81399517312907e-06, "loss": 0.9899, "step": 1497 }, { "epoch": 0.7880063124671226, "grad_norm": 2.047112226486206, "learning_rate": 4.813732144304266e-06, "loss": 0.9558, "step": 1498 }, { "epoch": 0.7885323513940031, "grad_norm": 1.9621355533599854, "learning_rate": 4.8134689368328194e-06, "loss": 1.0668, "step": 1499 }, { "epoch": 0.7890583903208838, "grad_norm": 1.9221957921981812, "learning_rate": 4.813205550735052e-06, "loss": 1.0082, "step": 1500 }, { "epoch": 0.7895844292477643, "grad_norm": 2.002659797668457, "learning_rate": 4.812941986031299e-06, "loss": 1.0192, "step": 1501 }, { "epoch": 0.7901104681746449, "grad_norm": 2.1077136993408203, "learning_rate": 4.812678242741913e-06, "loss": 1.0316, "step": 1502 }, { "epoch": 0.7906365071015256, "grad_norm": 2.0782320499420166, "learning_rate": 4.812414320887256e-06, "loss": 1.058, "step": 1503 }, { "epoch": 0.7911625460284061, "grad_norm": 2.049888849258423, "learning_rate": 4.812150220487708e-06, "loss": 1.0033, "step": 1504 }, { "epoch": 0.7916885849552867, "grad_norm": 2.025468587875366, "learning_rate": 4.811885941563659e-06, "loss": 1.0066, "step": 1505 }, { "epoch": 0.7922146238821672, "grad_norm": 2.0612878799438477, "learning_rate": 4.8116214841355145e-06, "loss": 0.9783, "step": 1506 }, { "epoch": 0.7927406628090479, "grad_norm": 1.9370075464248657, "learning_rate": 4.811356848223693e-06, "loss": 1.0171, "step": 1507 }, { "epoch": 0.7932667017359285, "grad_norm": 2.069326877593994, "learning_rate": 4.8110920338486285e-06, "loss": 1.0283, "step": 1508 }, { "epoch": 0.793792740662809, "grad_norm": 2.076786518096924, "learning_rate": 4.810827041030768e-06, "loss": 0.9942, "step": 1509 }, { "epoch": 0.7943187795896897, "grad_norm": 1.8861708641052246, "learning_rate": 4.810561869790571e-06, "loss": 0.9909, "step": 1510 }, { "epoch": 0.7948448185165702, "grad_norm": 2.064493417739868, "learning_rate": 4.810296520148513e-06, "loss": 1.0302, "step": 1511 }, { "epoch": 0.7953708574434508, "grad_norm": 2.0212459564208984, "learning_rate": 4.810030992125081e-06, "loss": 0.9912, "step": 1512 }, { "epoch": 0.7958968963703315, "grad_norm": 2.047384023666382, "learning_rate": 4.809765285740776e-06, "loss": 1.0165, "step": 1513 }, { "epoch": 0.796422935297212, "grad_norm": 2.2222740650177, "learning_rate": 4.809499401016115e-06, "loss": 1.0295, "step": 1514 }, { "epoch": 0.7969489742240926, "grad_norm": 1.9516112804412842, "learning_rate": 4.809233337971627e-06, "loss": 0.9562, "step": 1515 }, { "epoch": 0.7974750131509731, "grad_norm": 2.0002121925354004, "learning_rate": 4.808967096627855e-06, "loss": 1.0076, "step": 1516 }, { "epoch": 0.7980010520778538, "grad_norm": 2.182039260864258, "learning_rate": 4.808700677005357e-06, "loss": 0.9925, "step": 1517 }, { "epoch": 0.7985270910047344, "grad_norm": 2.0578761100769043, "learning_rate": 4.808434079124701e-06, "loss": 0.9831, "step": 1518 }, { "epoch": 0.7990531299316149, "grad_norm": 1.8856642246246338, "learning_rate": 4.8081673030064735e-06, "loss": 1.0309, "step": 1519 }, { "epoch": 0.7995791688584956, "grad_norm": 2.1273880004882812, "learning_rate": 4.807900348671272e-06, "loss": 1.0581, "step": 1520 }, { "epoch": 0.8001052077853761, "grad_norm": 2.0696675777435303, "learning_rate": 4.8076332161397085e-06, "loss": 1.0402, "step": 1521 }, { "epoch": 0.8006312467122567, "grad_norm": 2.034176826477051, "learning_rate": 4.80736590543241e-06, "loss": 1.018, "step": 1522 }, { "epoch": 0.8011572856391374, "grad_norm": 1.9405510425567627, "learning_rate": 4.807098416570014e-06, "loss": 1.037, "step": 1523 }, { "epoch": 0.8016833245660179, "grad_norm": 2.0185844898223877, "learning_rate": 4.806830749573174e-06, "loss": 1.0817, "step": 1524 }, { "epoch": 0.8022093634928985, "grad_norm": 2.0617692470550537, "learning_rate": 4.806562904462559e-06, "loss": 0.989, "step": 1525 }, { "epoch": 0.802735402419779, "grad_norm": 2.022000789642334, "learning_rate": 4.806294881258846e-06, "loss": 1.0245, "step": 1526 }, { "epoch": 0.8032614413466597, "grad_norm": 2.189361572265625, "learning_rate": 4.806026679982733e-06, "loss": 1.0537, "step": 1527 }, { "epoch": 0.8037874802735402, "grad_norm": 2.0094563961029053, "learning_rate": 4.805758300654926e-06, "loss": 1.0437, "step": 1528 }, { "epoch": 0.8043135192004208, "grad_norm": 1.8940585851669312, "learning_rate": 4.805489743296148e-06, "loss": 0.9811, "step": 1529 }, { "epoch": 0.8048395581273015, "grad_norm": 2.0169241428375244, "learning_rate": 4.805221007927134e-06, "loss": 1.0354, "step": 1530 }, { "epoch": 0.805365597054182, "grad_norm": 2.1269545555114746, "learning_rate": 4.804952094568635e-06, "loss": 1.0439, "step": 1531 }, { "epoch": 0.8058916359810626, "grad_norm": 1.99850332736969, "learning_rate": 4.804683003241413e-06, "loss": 1.0313, "step": 1532 }, { "epoch": 0.8064176749079431, "grad_norm": 2.0577683448791504, "learning_rate": 4.804413733966244e-06, "loss": 1.0319, "step": 1533 }, { "epoch": 0.8069437138348238, "grad_norm": 1.993945837020874, "learning_rate": 4.804144286763921e-06, "loss": 1.0571, "step": 1534 }, { "epoch": 0.8074697527617044, "grad_norm": 2.00144624710083, "learning_rate": 4.803874661655246e-06, "loss": 1.0136, "step": 1535 }, { "epoch": 0.8079957916885849, "grad_norm": 2.114583969116211, "learning_rate": 4.8036048586610394e-06, "loss": 0.9996, "step": 1536 }, { "epoch": 0.8085218306154656, "grad_norm": 2.019767999649048, "learning_rate": 4.803334877802131e-06, "loss": 0.9812, "step": 1537 }, { "epoch": 0.8090478695423461, "grad_norm": 2.1253437995910645, "learning_rate": 4.803064719099368e-06, "loss": 1.041, "step": 1538 }, { "epoch": 0.8095739084692267, "grad_norm": 2.055514335632324, "learning_rate": 4.802794382573609e-06, "loss": 0.9733, "step": 1539 }, { "epoch": 0.8100999473961074, "grad_norm": 2.0274434089660645, "learning_rate": 4.802523868245727e-06, "loss": 1.0222, "step": 1540 }, { "epoch": 0.8106259863229879, "grad_norm": 2.1663291454315186, "learning_rate": 4.80225317613661e-06, "loss": 1.0308, "step": 1541 }, { "epoch": 0.8111520252498685, "grad_norm": 1.8864918947219849, "learning_rate": 4.801982306267156e-06, "loss": 0.9551, "step": 1542 }, { "epoch": 0.811678064176749, "grad_norm": 2.1302011013031006, "learning_rate": 4.801711258658281e-06, "loss": 1.0188, "step": 1543 }, { "epoch": 0.8122041031036297, "grad_norm": 1.9002829790115356, "learning_rate": 4.801440033330914e-06, "loss": 1.0278, "step": 1544 }, { "epoch": 0.8127301420305103, "grad_norm": 2.1114113330841064, "learning_rate": 4.801168630305995e-06, "loss": 1.0616, "step": 1545 }, { "epoch": 0.8132561809573908, "grad_norm": 1.9383304119110107, "learning_rate": 4.800897049604479e-06, "loss": 0.9977, "step": 1546 }, { "epoch": 0.8137822198842715, "grad_norm": 1.9206221103668213, "learning_rate": 4.800625291247338e-06, "loss": 0.9758, "step": 1547 }, { "epoch": 0.814308258811152, "grad_norm": 1.9258513450622559, "learning_rate": 4.800353355255552e-06, "loss": 0.985, "step": 1548 }, { "epoch": 0.8148342977380326, "grad_norm": 1.9767898321151733, "learning_rate": 4.800081241650117e-06, "loss": 0.9802, "step": 1549 }, { "epoch": 0.8153603366649133, "grad_norm": 1.9899487495422363, "learning_rate": 4.799808950452047e-06, "loss": 1.0104, "step": 1550 }, { "epoch": 0.8158863755917938, "grad_norm": 1.9970616102218628, "learning_rate": 4.799536481682362e-06, "loss": 1.0125, "step": 1551 }, { "epoch": 0.8164124145186744, "grad_norm": 1.9914542436599731, "learning_rate": 4.799263835362103e-06, "loss": 1.0458, "step": 1552 }, { "epoch": 0.8169384534455549, "grad_norm": 2.072939157485962, "learning_rate": 4.798991011512319e-06, "loss": 1.0663, "step": 1553 }, { "epoch": 0.8174644923724356, "grad_norm": 1.9783833026885986, "learning_rate": 4.798718010154076e-06, "loss": 1.0281, "step": 1554 }, { "epoch": 0.8179905312993162, "grad_norm": 2.4431405067443848, "learning_rate": 4.798444831308454e-06, "loss": 1.0667, "step": 1555 }, { "epoch": 0.8185165702261967, "grad_norm": 2.1270408630371094, "learning_rate": 4.798171474996543e-06, "loss": 1.0217, "step": 1556 }, { "epoch": 0.8190426091530774, "grad_norm": 2.091042995452881, "learning_rate": 4.797897941239452e-06, "loss": 1.0126, "step": 1557 }, { "epoch": 0.8195686480799579, "grad_norm": 2.016575336456299, "learning_rate": 4.797624230058299e-06, "loss": 1.0269, "step": 1558 }, { "epoch": 0.8200946870068385, "grad_norm": 2.1780738830566406, "learning_rate": 4.797350341474218e-06, "loss": 1.0405, "step": 1559 }, { "epoch": 0.820620725933719, "grad_norm": 2.0331525802612305, "learning_rate": 4.797076275508358e-06, "loss": 1.0452, "step": 1560 }, { "epoch": 0.8211467648605997, "grad_norm": 2.0023865699768066, "learning_rate": 4.796802032181877e-06, "loss": 0.9752, "step": 1561 }, { "epoch": 0.8216728037874803, "grad_norm": 2.11030912399292, "learning_rate": 4.796527611515952e-06, "loss": 1.0675, "step": 1562 }, { "epoch": 0.8221988427143608, "grad_norm": 2.0733113288879395, "learning_rate": 4.7962530135317705e-06, "loss": 1.0511, "step": 1563 }, { "epoch": 0.8227248816412415, "grad_norm": 2.0920655727386475, "learning_rate": 4.795978238250535e-06, "loss": 1.0797, "step": 1564 }, { "epoch": 0.823250920568122, "grad_norm": 2.218693256378174, "learning_rate": 4.795703285693461e-06, "loss": 1.0385, "step": 1565 }, { "epoch": 0.8237769594950026, "grad_norm": 1.9661623239517212, "learning_rate": 4.795428155881779e-06, "loss": 1.001, "step": 1566 }, { "epoch": 0.8243029984218833, "grad_norm": 2.1669209003448486, "learning_rate": 4.795152848836731e-06, "loss": 1.0317, "step": 1567 }, { "epoch": 0.8248290373487638, "grad_norm": 1.9323532581329346, "learning_rate": 4.794877364579573e-06, "loss": 1.0182, "step": 1568 }, { "epoch": 0.8253550762756444, "grad_norm": 1.9551295042037964, "learning_rate": 4.794601703131579e-06, "loss": 1.0048, "step": 1569 }, { "epoch": 0.8258811152025249, "grad_norm": 1.9809366464614868, "learning_rate": 4.7943258645140285e-06, "loss": 1.0377, "step": 1570 }, { "epoch": 0.8264071541294056, "grad_norm": 2.0074756145477295, "learning_rate": 4.794049848748224e-06, "loss": 1.0218, "step": 1571 }, { "epoch": 0.8269331930562862, "grad_norm": 2.0177736282348633, "learning_rate": 4.793773655855474e-06, "loss": 1.0402, "step": 1572 }, { "epoch": 0.8274592319831667, "grad_norm": 2.0348360538482666, "learning_rate": 4.7934972858571035e-06, "loss": 1.0312, "step": 1573 }, { "epoch": 0.8279852709100474, "grad_norm": 2.097808599472046, "learning_rate": 4.793220738774455e-06, "loss": 1.0618, "step": 1574 }, { "epoch": 0.8285113098369279, "grad_norm": 2.061023473739624, "learning_rate": 4.792944014628877e-06, "loss": 1.0464, "step": 1575 }, { "epoch": 0.8290373487638085, "grad_norm": 2.1510798931121826, "learning_rate": 4.792667113441738e-06, "loss": 1.0102, "step": 1576 }, { "epoch": 0.8295633876906892, "grad_norm": 2.1446409225463867, "learning_rate": 4.7923900352344185e-06, "loss": 1.0577, "step": 1577 }, { "epoch": 0.8300894266175697, "grad_norm": 2.2582831382751465, "learning_rate": 4.79211278002831e-06, "loss": 1.1042, "step": 1578 }, { "epoch": 0.8306154655444503, "grad_norm": 2.0069401264190674, "learning_rate": 4.791835347844821e-06, "loss": 0.9835, "step": 1579 }, { "epoch": 0.8311415044713308, "grad_norm": 2.0074360370635986, "learning_rate": 4.791557738705372e-06, "loss": 1.0596, "step": 1580 }, { "epoch": 0.8316675433982115, "grad_norm": 2.2237892150878906, "learning_rate": 4.791279952631399e-06, "loss": 1.0162, "step": 1581 }, { "epoch": 0.8321935823250921, "grad_norm": 2.0037453174591064, "learning_rate": 4.791001989644349e-06, "loss": 0.9879, "step": 1582 }, { "epoch": 0.8327196212519726, "grad_norm": 1.994869351387024, "learning_rate": 4.790723849765684e-06, "loss": 0.9908, "step": 1583 }, { "epoch": 0.8332456601788533, "grad_norm": 2.1808955669403076, "learning_rate": 4.790445533016879e-06, "loss": 0.9896, "step": 1584 }, { "epoch": 0.8337716991057338, "grad_norm": 1.9274131059646606, "learning_rate": 4.790167039419424e-06, "loss": 0.9383, "step": 1585 }, { "epoch": 0.8342977380326144, "grad_norm": 2.0095322132110596, "learning_rate": 4.789888368994823e-06, "loss": 1.0282, "step": 1586 }, { "epoch": 0.8348237769594951, "grad_norm": 1.957546353340149, "learning_rate": 4.7896095217645895e-06, "loss": 0.9559, "step": 1587 }, { "epoch": 0.8353498158863756, "grad_norm": 2.1231918334960938, "learning_rate": 4.789330497750258e-06, "loss": 1.0414, "step": 1588 }, { "epoch": 0.8358758548132562, "grad_norm": 2.0618984699249268, "learning_rate": 4.789051296973368e-06, "loss": 0.9931, "step": 1589 }, { "epoch": 0.8364018937401367, "grad_norm": 2.023416042327881, "learning_rate": 4.78877191945548e-06, "loss": 0.963, "step": 1590 }, { "epoch": 0.8369279326670174, "grad_norm": 2.0902810096740723, "learning_rate": 4.788492365218164e-06, "loss": 1.076, "step": 1591 }, { "epoch": 0.8374539715938979, "grad_norm": 1.9094164371490479, "learning_rate": 4.788212634283005e-06, "loss": 0.9444, "step": 1592 }, { "epoch": 0.8379800105207785, "grad_norm": 1.9887592792510986, "learning_rate": 4.7879327266716e-06, "loss": 1.0364, "step": 1593 }, { "epoch": 0.8385060494476592, "grad_norm": 2.0019707679748535, "learning_rate": 4.787652642405564e-06, "loss": 1.0544, "step": 1594 }, { "epoch": 0.8390320883745397, "grad_norm": 2.0776329040527344, "learning_rate": 4.787372381506521e-06, "loss": 0.9949, "step": 1595 }, { "epoch": 0.8395581273014203, "grad_norm": 2.0091662406921387, "learning_rate": 4.7870919439961094e-06, "loss": 1.0165, "step": 1596 }, { "epoch": 0.8400841662283008, "grad_norm": 2.0458288192749023, "learning_rate": 4.786811329895984e-06, "loss": 1.0341, "step": 1597 }, { "epoch": 0.8406102051551815, "grad_norm": 2.0741751194000244, "learning_rate": 4.78653053922781e-06, "loss": 1.0509, "step": 1598 }, { "epoch": 0.8411362440820621, "grad_norm": 2.141406774520874, "learning_rate": 4.7862495720132695e-06, "loss": 1.0665, "step": 1599 }, { "epoch": 0.8416622830089426, "grad_norm": 2.2400975227355957, "learning_rate": 4.785968428274055e-06, "loss": 0.93, "step": 1600 }, { "epoch": 0.8421883219358233, "grad_norm": 1.929742455482483, "learning_rate": 4.785687108031875e-06, "loss": 1.0339, "step": 1601 }, { "epoch": 0.8427143608627038, "grad_norm": 2.012728452682495, "learning_rate": 4.785405611308448e-06, "loss": 0.9945, "step": 1602 }, { "epoch": 0.8432403997895844, "grad_norm": 2.0826306343078613, "learning_rate": 4.785123938125511e-06, "loss": 1.0322, "step": 1603 }, { "epoch": 0.8437664387164651, "grad_norm": 2.0303595066070557, "learning_rate": 4.784842088504813e-06, "loss": 1.0304, "step": 1604 }, { "epoch": 0.8442924776433456, "grad_norm": 2.0710513591766357, "learning_rate": 4.7845600624681145e-06, "loss": 1.0358, "step": 1605 }, { "epoch": 0.8448185165702262, "grad_norm": 2.052515983581543, "learning_rate": 4.784277860037192e-06, "loss": 1.0316, "step": 1606 }, { "epoch": 0.8453445554971067, "grad_norm": 2.1331636905670166, "learning_rate": 4.783995481233835e-06, "loss": 1.0139, "step": 1607 }, { "epoch": 0.8458705944239874, "grad_norm": 1.9738709926605225, "learning_rate": 4.783712926079846e-06, "loss": 1.034, "step": 1608 }, { "epoch": 0.846396633350868, "grad_norm": 2.059412956237793, "learning_rate": 4.78343019459704e-06, "loss": 1.0468, "step": 1609 }, { "epoch": 0.8469226722777485, "grad_norm": 2.027773141860962, "learning_rate": 4.783147286807249e-06, "loss": 1.0028, "step": 1610 }, { "epoch": 0.8474487112046292, "grad_norm": 2.1288933753967285, "learning_rate": 4.782864202732317e-06, "loss": 1.0177, "step": 1611 }, { "epoch": 0.8479747501315097, "grad_norm": 2.160947322845459, "learning_rate": 4.7825809423941e-06, "loss": 0.9814, "step": 1612 }, { "epoch": 0.8485007890583903, "grad_norm": 2.021970272064209, "learning_rate": 4.782297505814469e-06, "loss": 1.0198, "step": 1613 }, { "epoch": 0.849026827985271, "grad_norm": 1.9154043197631836, "learning_rate": 4.7820138930153106e-06, "loss": 1.0044, "step": 1614 }, { "epoch": 0.8495528669121515, "grad_norm": 2.0858964920043945, "learning_rate": 4.781730104018521e-06, "loss": 0.9932, "step": 1615 }, { "epoch": 0.8500789058390321, "grad_norm": 2.236711025238037, "learning_rate": 4.7814461388460105e-06, "loss": 1.0495, "step": 1616 }, { "epoch": 0.8506049447659126, "grad_norm": 2.0810344219207764, "learning_rate": 4.781161997519707e-06, "loss": 1.0617, "step": 1617 }, { "epoch": 0.8511309836927933, "grad_norm": 2.224187135696411, "learning_rate": 4.780877680061551e-06, "loss": 0.9911, "step": 1618 }, { "epoch": 0.8516570226196739, "grad_norm": 1.8846218585968018, "learning_rate": 4.780593186493491e-06, "loss": 1.0185, "step": 1619 }, { "epoch": 0.8521830615465544, "grad_norm": 2.0876333713531494, "learning_rate": 4.780308516837495e-06, "loss": 1.0173, "step": 1620 }, { "epoch": 0.8527091004734351, "grad_norm": 1.942492961883545, "learning_rate": 4.780023671115544e-06, "loss": 1.0154, "step": 1621 }, { "epoch": 0.8532351394003156, "grad_norm": 1.9483400583267212, "learning_rate": 4.779738649349629e-06, "loss": 1.0492, "step": 1622 }, { "epoch": 0.8537611783271962, "grad_norm": 1.8866205215454102, "learning_rate": 4.7794534515617586e-06, "loss": 0.9896, "step": 1623 }, { "epoch": 0.8542872172540767, "grad_norm": 2.146117687225342, "learning_rate": 4.779168077773953e-06, "loss": 1.0391, "step": 1624 }, { "epoch": 0.8548132561809574, "grad_norm": 2.099858283996582, "learning_rate": 4.778882528008245e-06, "loss": 1.0185, "step": 1625 }, { "epoch": 0.855339295107838, "grad_norm": 2.0597662925720215, "learning_rate": 4.7785968022866846e-06, "loss": 1.0373, "step": 1626 }, { "epoch": 0.8558653340347185, "grad_norm": 2.0234663486480713, "learning_rate": 4.7783109006313316e-06, "loss": 1.0471, "step": 1627 }, { "epoch": 0.8563913729615992, "grad_norm": 1.9113049507141113, "learning_rate": 4.778024823064261e-06, "loss": 1.01, "step": 1628 }, { "epoch": 0.8569174118884797, "grad_norm": 2.4924910068511963, "learning_rate": 4.777738569607562e-06, "loss": 1.0267, "step": 1629 }, { "epoch": 0.8574434508153603, "grad_norm": 1.9605613946914673, "learning_rate": 4.777452140283336e-06, "loss": 1.0237, "step": 1630 }, { "epoch": 0.857969489742241, "grad_norm": 2.1404225826263428, "learning_rate": 4.7771655351136996e-06, "loss": 1.0353, "step": 1631 }, { "epoch": 0.8584955286691215, "grad_norm": 2.1174509525299072, "learning_rate": 4.776878754120781e-06, "loss": 1.0517, "step": 1632 }, { "epoch": 0.8590215675960021, "grad_norm": 1.895843267440796, "learning_rate": 4.7765917973267226e-06, "loss": 0.9479, "step": 1633 }, { "epoch": 0.8595476065228826, "grad_norm": 2.080152988433838, "learning_rate": 4.776304664753682e-06, "loss": 1.0642, "step": 1634 }, { "epoch": 0.8600736454497633, "grad_norm": 1.9730490446090698, "learning_rate": 4.776017356423827e-06, "loss": 1.0059, "step": 1635 }, { "epoch": 0.8605996843766439, "grad_norm": 2.19085693359375, "learning_rate": 4.775729872359343e-06, "loss": 1.0368, "step": 1636 }, { "epoch": 0.8611257233035244, "grad_norm": 2.14911150932312, "learning_rate": 4.775442212582428e-06, "loss": 1.0583, "step": 1637 }, { "epoch": 0.8616517622304051, "grad_norm": 1.9603419303894043, "learning_rate": 4.775154377115291e-06, "loss": 1.0336, "step": 1638 }, { "epoch": 0.8621778011572856, "grad_norm": 1.9417442083358765, "learning_rate": 4.774866365980156e-06, "loss": 0.9885, "step": 1639 }, { "epoch": 0.8627038400841662, "grad_norm": 2.092170000076294, "learning_rate": 4.774578179199261e-06, "loss": 1.0496, "step": 1640 }, { "epoch": 0.8632298790110469, "grad_norm": 2.0614163875579834, "learning_rate": 4.774289816794858e-06, "loss": 1.0011, "step": 1641 }, { "epoch": 0.8637559179379274, "grad_norm": 2.168977975845337, "learning_rate": 4.774001278789211e-06, "loss": 1.0342, "step": 1642 }, { "epoch": 0.864281956864808, "grad_norm": 2.0560708045959473, "learning_rate": 4.773712565204599e-06, "loss": 1.0239, "step": 1643 }, { "epoch": 0.8648079957916885, "grad_norm": 1.9980727434158325, "learning_rate": 4.773423676063314e-06, "loss": 1.0312, "step": 1644 }, { "epoch": 0.8653340347185692, "grad_norm": 2.0650413036346436, "learning_rate": 4.773134611387661e-06, "loss": 1.0468, "step": 1645 }, { "epoch": 0.8658600736454498, "grad_norm": 1.954148530960083, "learning_rate": 4.77284537119996e-06, "loss": 1.0138, "step": 1646 }, { "epoch": 0.8663861125723303, "grad_norm": 2.092515468597412, "learning_rate": 4.772555955522543e-06, "loss": 0.987, "step": 1647 }, { "epoch": 0.866912151499211, "grad_norm": 2.007941246032715, "learning_rate": 4.772266364377757e-06, "loss": 0.9918, "step": 1648 }, { "epoch": 0.8674381904260915, "grad_norm": 1.9608757495880127, "learning_rate": 4.77197659778796e-06, "loss": 1.0502, "step": 1649 }, { "epoch": 0.8679642293529721, "grad_norm": 2.0067436695098877, "learning_rate": 4.771686655775527e-06, "loss": 1.0335, "step": 1650 }, { "epoch": 0.8684902682798528, "grad_norm": 2.079745292663574, "learning_rate": 4.771396538362845e-06, "loss": 1.043, "step": 1651 }, { "epoch": 0.8690163072067333, "grad_norm": 1.9542405605316162, "learning_rate": 4.771106245572313e-06, "loss": 0.984, "step": 1652 }, { "epoch": 0.8695423461336139, "grad_norm": 2.028416872024536, "learning_rate": 4.770815777426346e-06, "loss": 0.9933, "step": 1653 }, { "epoch": 0.8700683850604944, "grad_norm": 1.9436818361282349, "learning_rate": 4.77052513394737e-06, "loss": 1.0118, "step": 1654 }, { "epoch": 0.8705944239873751, "grad_norm": 2.028409004211426, "learning_rate": 4.770234315157828e-06, "loss": 1.0494, "step": 1655 }, { "epoch": 0.8711204629142556, "grad_norm": 2.0709540843963623, "learning_rate": 4.769943321080174e-06, "loss": 1.0542, "step": 1656 }, { "epoch": 0.8716465018411362, "grad_norm": 2.0256619453430176, "learning_rate": 4.7696521517368755e-06, "loss": 1.0011, "step": 1657 }, { "epoch": 0.8721725407680169, "grad_norm": 2.0937297344207764, "learning_rate": 4.769360807150414e-06, "loss": 0.9974, "step": 1658 }, { "epoch": 0.8726985796948974, "grad_norm": 2.2346062660217285, "learning_rate": 4.769069287343285e-06, "loss": 1.0128, "step": 1659 }, { "epoch": 0.873224618621778, "grad_norm": 2.1082491874694824, "learning_rate": 4.7687775923379975e-06, "loss": 1.0321, "step": 1660 }, { "epoch": 0.8737506575486585, "grad_norm": 2.0769453048706055, "learning_rate": 4.768485722157074e-06, "loss": 0.973, "step": 1661 }, { "epoch": 0.8742766964755392, "grad_norm": 2.0329558849334717, "learning_rate": 4.768193676823048e-06, "loss": 1.0102, "step": 1662 }, { "epoch": 0.8748027354024198, "grad_norm": 2.0758261680603027, "learning_rate": 4.767901456358471e-06, "loss": 1.0125, "step": 1663 }, { "epoch": 0.8753287743293003, "grad_norm": 2.12320613861084, "learning_rate": 4.767609060785905e-06, "loss": 1.0294, "step": 1664 }, { "epoch": 0.875854813256181, "grad_norm": 1.9771841764450073, "learning_rate": 4.767316490127927e-06, "loss": 0.9886, "step": 1665 }, { "epoch": 0.8763808521830615, "grad_norm": 1.9373329877853394, "learning_rate": 4.7670237444071255e-06, "loss": 0.994, "step": 1666 }, { "epoch": 0.8769068911099421, "grad_norm": 2.0343801975250244, "learning_rate": 4.766730823646105e-06, "loss": 1.0352, "step": 1667 }, { "epoch": 0.8774329300368228, "grad_norm": 2.020343542098999, "learning_rate": 4.766437727867481e-06, "loss": 0.979, "step": 1668 }, { "epoch": 0.8779589689637033, "grad_norm": 2.107820510864258, "learning_rate": 4.766144457093886e-06, "loss": 1.0296, "step": 1669 }, { "epoch": 0.8784850078905839, "grad_norm": 2.1452198028564453, "learning_rate": 4.765851011347962e-06, "loss": 1.0438, "step": 1670 }, { "epoch": 0.8790110468174644, "grad_norm": 2.087686777114868, "learning_rate": 4.7655573906523665e-06, "loss": 0.9788, "step": 1671 }, { "epoch": 0.8795370857443451, "grad_norm": 2.083097457885742, "learning_rate": 4.765263595029771e-06, "loss": 0.9921, "step": 1672 }, { "epoch": 0.8800631246712257, "grad_norm": 2.0001168251037598, "learning_rate": 4.76496962450286e-06, "loss": 0.9784, "step": 1673 }, { "epoch": 0.8805891635981062, "grad_norm": 1.9493898153305054, "learning_rate": 4.7646754790943315e-06, "loss": 1.0145, "step": 1674 }, { "epoch": 0.8811152025249869, "grad_norm": 2.140746831893921, "learning_rate": 4.764381158826896e-06, "loss": 1.0286, "step": 1675 }, { "epoch": 0.8816412414518674, "grad_norm": 2.0411407947540283, "learning_rate": 4.764086663723278e-06, "loss": 1.0297, "step": 1676 }, { "epoch": 0.882167280378748, "grad_norm": 2.164043664932251, "learning_rate": 4.763791993806218e-06, "loss": 1.0246, "step": 1677 }, { "epoch": 0.8826933193056287, "grad_norm": 2.0231616497039795, "learning_rate": 4.7634971490984675e-06, "loss": 0.9692, "step": 1678 }, { "epoch": 0.8832193582325092, "grad_norm": 2.0884130001068115, "learning_rate": 4.763202129622789e-06, "loss": 1.0441, "step": 1679 }, { "epoch": 0.8837453971593898, "grad_norm": 1.959078311920166, "learning_rate": 4.7629069354019654e-06, "loss": 1.0166, "step": 1680 }, { "epoch": 0.8842714360862703, "grad_norm": 1.836121916770935, "learning_rate": 4.762611566458786e-06, "loss": 1.0347, "step": 1681 }, { "epoch": 0.884797475013151, "grad_norm": 2.099907398223877, "learning_rate": 4.762316022816058e-06, "loss": 1.0309, "step": 1682 }, { "epoch": 0.8853235139400316, "grad_norm": 1.941465139389038, "learning_rate": 4.7620203044966004e-06, "loss": 1.0203, "step": 1683 }, { "epoch": 0.8858495528669121, "grad_norm": 1.893522024154663, "learning_rate": 4.761724411523247e-06, "loss": 0.9769, "step": 1684 }, { "epoch": 0.8863755917937928, "grad_norm": 1.9919662475585938, "learning_rate": 4.7614283439188426e-06, "loss": 1.0116, "step": 1685 }, { "epoch": 0.8869016307206733, "grad_norm": 1.9670614004135132, "learning_rate": 4.761132101706249e-06, "loss": 0.9719, "step": 1686 }, { "epoch": 0.8874276696475539, "grad_norm": 1.9545384645462036, "learning_rate": 4.760835684908337e-06, "loss": 0.9986, "step": 1687 }, { "epoch": 0.8879537085744345, "grad_norm": 1.9402283430099487, "learning_rate": 4.7605390935479946e-06, "loss": 0.9911, "step": 1688 }, { "epoch": 0.8884797475013151, "grad_norm": 1.954526424407959, "learning_rate": 4.760242327648122e-06, "loss": 1.0021, "step": 1689 }, { "epoch": 0.8890057864281957, "grad_norm": 1.9458253383636475, "learning_rate": 4.759945387231633e-06, "loss": 1.0346, "step": 1690 }, { "epoch": 0.8895318253550762, "grad_norm": 1.9583990573883057, "learning_rate": 4.7596482723214565e-06, "loss": 1.0509, "step": 1691 }, { "epoch": 0.8900578642819569, "grad_norm": 2.0227482318878174, "learning_rate": 4.75935098294053e-06, "loss": 1.0651, "step": 1692 }, { "epoch": 0.8905839032088374, "grad_norm": 1.977971076965332, "learning_rate": 4.7590535191118096e-06, "loss": 1.0609, "step": 1693 }, { "epoch": 0.891109942135718, "grad_norm": 2.0564186573028564, "learning_rate": 4.758755880858262e-06, "loss": 1.0125, "step": 1694 }, { "epoch": 0.8916359810625987, "grad_norm": 1.9081783294677734, "learning_rate": 4.75845806820287e-06, "loss": 1.007, "step": 1695 }, { "epoch": 0.8921620199894792, "grad_norm": 2.0456745624542236, "learning_rate": 4.758160081168626e-06, "loss": 1.0116, "step": 1696 }, { "epoch": 0.8926880589163598, "grad_norm": 1.9237746000289917, "learning_rate": 4.757861919778539e-06, "loss": 1.0023, "step": 1697 }, { "epoch": 0.8932140978432404, "grad_norm": 1.9402356147766113, "learning_rate": 4.75756358405563e-06, "loss": 1.0264, "step": 1698 }, { "epoch": 0.893740136770121, "grad_norm": 1.9538573026657104, "learning_rate": 4.757265074022935e-06, "loss": 0.9582, "step": 1699 }, { "epoch": 0.8942661756970016, "grad_norm": 2.09053897857666, "learning_rate": 4.756966389703501e-06, "loss": 1.0245, "step": 1700 }, { "epoch": 0.8947922146238821, "grad_norm": 2.071685552597046, "learning_rate": 4.756667531120391e-06, "loss": 1.0124, "step": 1701 }, { "epoch": 0.8953182535507628, "grad_norm": 2.0141103267669678, "learning_rate": 4.75636849829668e-06, "loss": 0.9852, "step": 1702 }, { "epoch": 0.8958442924776433, "grad_norm": 1.9167203903198242, "learning_rate": 4.756069291255456e-06, "loss": 1.0194, "step": 1703 }, { "epoch": 0.8963703314045239, "grad_norm": 2.011918067932129, "learning_rate": 4.755769910019823e-06, "loss": 1.0029, "step": 1704 }, { "epoch": 0.8968963703314046, "grad_norm": 2.1252031326293945, "learning_rate": 4.755470354612895e-06, "loss": 1.0071, "step": 1705 }, { "epoch": 0.8974224092582851, "grad_norm": 2.0214016437530518, "learning_rate": 4.755170625057801e-06, "loss": 1.0371, "step": 1706 }, { "epoch": 0.8979484481851657, "grad_norm": 2.4289193153381348, "learning_rate": 4.754870721377685e-06, "loss": 1.0581, "step": 1707 }, { "epoch": 0.8984744871120462, "grad_norm": 2.1093404293060303, "learning_rate": 4.754570643595702e-06, "loss": 1.0017, "step": 1708 }, { "epoch": 0.8990005260389269, "grad_norm": 2.0420546531677246, "learning_rate": 4.7542703917350215e-06, "loss": 1.0642, "step": 1709 }, { "epoch": 0.8995265649658075, "grad_norm": 1.9818446636199951, "learning_rate": 4.753969965818827e-06, "loss": 1.0313, "step": 1710 }, { "epoch": 0.900052603892688, "grad_norm": 1.897628664970398, "learning_rate": 4.753669365870313e-06, "loss": 0.9875, "step": 1711 }, { "epoch": 0.9005786428195687, "grad_norm": 2.0208487510681152, "learning_rate": 4.753368591912693e-06, "loss": 1.0271, "step": 1712 }, { "epoch": 0.9011046817464492, "grad_norm": 1.9346519708633423, "learning_rate": 4.753067643969186e-06, "loss": 1.0352, "step": 1713 }, { "epoch": 0.9016307206733298, "grad_norm": 2.0617661476135254, "learning_rate": 4.75276652206303e-06, "loss": 0.9806, "step": 1714 }, { "epoch": 0.9021567596002105, "grad_norm": 1.8809938430786133, "learning_rate": 4.752465226217477e-06, "loss": 1.0333, "step": 1715 }, { "epoch": 0.902682798527091, "grad_norm": 2.047309398651123, "learning_rate": 4.752163756455789e-06, "loss": 1.0614, "step": 1716 }, { "epoch": 0.9032088374539716, "grad_norm": 2.1308083534240723, "learning_rate": 4.751862112801242e-06, "loss": 1.0229, "step": 1717 }, { "epoch": 0.9037348763808521, "grad_norm": 2.0333852767944336, "learning_rate": 4.751560295277127e-06, "loss": 1.0077, "step": 1718 }, { "epoch": 0.9042609153077328, "grad_norm": 1.9486128091812134, "learning_rate": 4.7512583039067485e-06, "loss": 1.0026, "step": 1719 }, { "epoch": 0.9047869542346134, "grad_norm": 2.004258394241333, "learning_rate": 4.750956138713424e-06, "loss": 0.986, "step": 1720 }, { "epoch": 0.9053129931614939, "grad_norm": 2.5763192176818848, "learning_rate": 4.750653799720483e-06, "loss": 0.979, "step": 1721 }, { "epoch": 0.9058390320883746, "grad_norm": 2.1086039543151855, "learning_rate": 4.750351286951269e-06, "loss": 1.0368, "step": 1722 }, { "epoch": 0.9063650710152551, "grad_norm": 2.0445361137390137, "learning_rate": 4.750048600429141e-06, "loss": 0.9756, "step": 1723 }, { "epoch": 0.9068911099421357, "grad_norm": 1.8900635242462158, "learning_rate": 4.7497457401774694e-06, "loss": 0.8947, "step": 1724 }, { "epoch": 0.9074171488690163, "grad_norm": 2.116900682449341, "learning_rate": 4.749442706219638e-06, "loss": 1.0502, "step": 1725 }, { "epoch": 0.9079431877958969, "grad_norm": 2.1096391677856445, "learning_rate": 4.749139498579044e-06, "loss": 1.0089, "step": 1726 }, { "epoch": 0.9084692267227775, "grad_norm": 2.2117018699645996, "learning_rate": 4.7488361172791005e-06, "loss": 1.056, "step": 1727 }, { "epoch": 0.908995265649658, "grad_norm": 2.0012335777282715, "learning_rate": 4.748532562343231e-06, "loss": 0.916, "step": 1728 }, { "epoch": 0.9095213045765387, "grad_norm": 1.8673421144485474, "learning_rate": 4.748228833794872e-06, "loss": 0.9844, "step": 1729 }, { "epoch": 0.9100473435034192, "grad_norm": 1.9152559041976929, "learning_rate": 4.747924931657477e-06, "loss": 0.9619, "step": 1730 }, { "epoch": 0.9105733824302998, "grad_norm": 2.107985496520996, "learning_rate": 4.7476208559545104e-06, "loss": 1.017, "step": 1731 }, { "epoch": 0.9110994213571805, "grad_norm": 2.162464141845703, "learning_rate": 4.7473166067094474e-06, "loss": 1.0197, "step": 1732 }, { "epoch": 0.911625460284061, "grad_norm": 2.085958480834961, "learning_rate": 4.747012183945784e-06, "loss": 1.0166, "step": 1733 }, { "epoch": 0.9121514992109416, "grad_norm": 2.0198309421539307, "learning_rate": 4.746707587687022e-06, "loss": 0.9883, "step": 1734 }, { "epoch": 0.9126775381378222, "grad_norm": 2.013784646987915, "learning_rate": 4.746402817956681e-06, "loss": 0.9775, "step": 1735 }, { "epoch": 0.9132035770647028, "grad_norm": 2.1442627906799316, "learning_rate": 4.746097874778293e-06, "loss": 1.0358, "step": 1736 }, { "epoch": 0.9137296159915834, "grad_norm": 2.143627643585205, "learning_rate": 4.745792758175402e-06, "loss": 0.9537, "step": 1737 }, { "epoch": 0.914255654918464, "grad_norm": 1.9581515789031982, "learning_rate": 4.745487468171566e-06, "loss": 0.9756, "step": 1738 }, { "epoch": 0.9147816938453446, "grad_norm": 1.9869537353515625, "learning_rate": 4.74518200479036e-06, "loss": 0.995, "step": 1739 }, { "epoch": 0.9153077327722251, "grad_norm": 1.9129465818405151, "learning_rate": 4.744876368055365e-06, "loss": 1.0088, "step": 1740 }, { "epoch": 0.9158337716991057, "grad_norm": 1.957229733467102, "learning_rate": 4.744570557990183e-06, "loss": 0.9832, "step": 1741 }, { "epoch": 0.9163598106259864, "grad_norm": 2.061002492904663, "learning_rate": 4.744264574618425e-06, "loss": 1.0338, "step": 1742 }, { "epoch": 0.9168858495528669, "grad_norm": 2.0439558029174805, "learning_rate": 4.743958417963715e-06, "loss": 1.0678, "step": 1743 }, { "epoch": 0.9174118884797475, "grad_norm": 2.0407450199127197, "learning_rate": 4.743652088049695e-06, "loss": 1.0219, "step": 1744 }, { "epoch": 0.917937927406628, "grad_norm": 2.2696166038513184, "learning_rate": 4.743345584900014e-06, "loss": 0.9909, "step": 1745 }, { "epoch": 0.9184639663335087, "grad_norm": 1.9783145189285278, "learning_rate": 4.74303890853834e-06, "loss": 0.9423, "step": 1746 }, { "epoch": 0.9189900052603893, "grad_norm": 2.019179344177246, "learning_rate": 4.74273205898835e-06, "loss": 0.9985, "step": 1747 }, { "epoch": 0.9195160441872698, "grad_norm": 1.966417670249939, "learning_rate": 4.742425036273737e-06, "loss": 1.0605, "step": 1748 }, { "epoch": 0.9200420831141505, "grad_norm": 1.9425163269042969, "learning_rate": 4.742117840418207e-06, "loss": 0.9855, "step": 1749 }, { "epoch": 0.920568122041031, "grad_norm": 1.9825159311294556, "learning_rate": 4.741810471445478e-06, "loss": 1.0214, "step": 1750 }, { "epoch": 0.9210941609679116, "grad_norm": 1.9764158725738525, "learning_rate": 4.741502929379284e-06, "loss": 1.0249, "step": 1751 }, { "epoch": 0.9216201998947923, "grad_norm": 2.0177724361419678, "learning_rate": 4.74119521424337e-06, "loss": 1.0434, "step": 1752 }, { "epoch": 0.9221462388216728, "grad_norm": 2.0949506759643555, "learning_rate": 4.740887326061495e-06, "loss": 1.0331, "step": 1753 }, { "epoch": 0.9226722777485534, "grad_norm": 1.9468920230865479, "learning_rate": 4.740579264857431e-06, "loss": 0.9212, "step": 1754 }, { "epoch": 0.923198316675434, "grad_norm": 2.2116925716400146, "learning_rate": 4.740271030654965e-06, "loss": 1.0241, "step": 1755 }, { "epoch": 0.9237243556023146, "grad_norm": 1.9227603673934937, "learning_rate": 4.739962623477896e-06, "loss": 0.98, "step": 1756 }, { "epoch": 0.9242503945291951, "grad_norm": 2.013141632080078, "learning_rate": 4.739654043350036e-06, "loss": 1.0321, "step": 1757 }, { "epoch": 0.9247764334560757, "grad_norm": 2.1053218841552734, "learning_rate": 4.739345290295211e-06, "loss": 1.0359, "step": 1758 }, { "epoch": 0.9253024723829564, "grad_norm": 2.072932243347168, "learning_rate": 4.739036364337261e-06, "loss": 0.9826, "step": 1759 }, { "epoch": 0.9258285113098369, "grad_norm": 2.104072093963623, "learning_rate": 4.738727265500037e-06, "loss": 1.0239, "step": 1760 }, { "epoch": 0.9263545502367175, "grad_norm": 2.0704009532928467, "learning_rate": 4.738417993807407e-06, "loss": 1.0235, "step": 1761 }, { "epoch": 0.9268805891635981, "grad_norm": 1.9992990493774414, "learning_rate": 4.738108549283249e-06, "loss": 0.988, "step": 1762 }, { "epoch": 0.9274066280904787, "grad_norm": 2.150501251220703, "learning_rate": 4.737798931951456e-06, "loss": 1.0574, "step": 1763 }, { "epoch": 0.9279326670173593, "grad_norm": 1.906421184539795, "learning_rate": 4.7374891418359345e-06, "loss": 1.0479, "step": 1764 }, { "epoch": 0.9284587059442398, "grad_norm": 1.8720351457595825, "learning_rate": 4.737179178960603e-06, "loss": 1.038, "step": 1765 }, { "epoch": 0.9289847448711205, "grad_norm": 1.9185991287231445, "learning_rate": 4.736869043349394e-06, "loss": 1.0632, "step": 1766 }, { "epoch": 0.929510783798001, "grad_norm": 2.040290594100952, "learning_rate": 4.736558735026255e-06, "loss": 0.9857, "step": 1767 }, { "epoch": 0.9300368227248816, "grad_norm": 1.9188529253005981, "learning_rate": 4.7362482540151445e-06, "loss": 1.0115, "step": 1768 }, { "epoch": 0.9305628616517623, "grad_norm": 2.092855215072632, "learning_rate": 4.7359376003400345e-06, "loss": 1.0318, "step": 1769 }, { "epoch": 0.9310889005786428, "grad_norm": 1.9537826776504517, "learning_rate": 4.735626774024912e-06, "loss": 1.0005, "step": 1770 }, { "epoch": 0.9316149395055234, "grad_norm": 1.8022964000701904, "learning_rate": 4.735315775093775e-06, "loss": 0.9696, "step": 1771 }, { "epoch": 0.932140978432404, "grad_norm": 2.0534324645996094, "learning_rate": 4.735004603570639e-06, "loss": 1.0647, "step": 1772 }, { "epoch": 0.9326670173592846, "grad_norm": 2.082421064376831, "learning_rate": 4.734693259479527e-06, "loss": 1.0168, "step": 1773 }, { "epoch": 0.9331930562861652, "grad_norm": 2.2331955432891846, "learning_rate": 4.734381742844481e-06, "loss": 1.0288, "step": 1774 }, { "epoch": 0.9337190952130457, "grad_norm": 1.9978649616241455, "learning_rate": 4.73407005368955e-06, "loss": 0.9542, "step": 1775 }, { "epoch": 0.9342451341399264, "grad_norm": 2.054856061935425, "learning_rate": 4.733758192038804e-06, "loss": 1.0457, "step": 1776 }, { "epoch": 0.9347711730668069, "grad_norm": 2.1446175575256348, "learning_rate": 4.733446157916319e-06, "loss": 1.0767, "step": 1777 }, { "epoch": 0.9352972119936875, "grad_norm": 2.149594783782959, "learning_rate": 4.7331339513461905e-06, "loss": 0.9975, "step": 1778 }, { "epoch": 0.9358232509205682, "grad_norm": 2.0066800117492676, "learning_rate": 4.732821572352522e-06, "loss": 1.0296, "step": 1779 }, { "epoch": 0.9363492898474487, "grad_norm": 2.4036574363708496, "learning_rate": 4.732509020959434e-06, "loss": 0.9726, "step": 1780 }, { "epoch": 0.9368753287743293, "grad_norm": 2.0901482105255127, "learning_rate": 4.73219629719106e-06, "loss": 1.0748, "step": 1781 }, { "epoch": 0.9374013677012099, "grad_norm": 2.093503713607788, "learning_rate": 4.731883401071543e-06, "loss": 1.0413, "step": 1782 }, { "epoch": 0.9379274066280905, "grad_norm": 2.1437647342681885, "learning_rate": 4.731570332625044e-06, "loss": 1.0624, "step": 1783 }, { "epoch": 0.9384534455549711, "grad_norm": 2.141866445541382, "learning_rate": 4.731257091875736e-06, "loss": 0.9547, "step": 1784 }, { "epoch": 0.9389794844818516, "grad_norm": 2.138530731201172, "learning_rate": 4.730943678847804e-06, "loss": 1.0498, "step": 1785 }, { "epoch": 0.9395055234087323, "grad_norm": 2.192941188812256, "learning_rate": 4.730630093565447e-06, "loss": 1.0426, "step": 1786 }, { "epoch": 0.9400315623356128, "grad_norm": 1.9256808757781982, "learning_rate": 4.730316336052877e-06, "loss": 0.9864, "step": 1787 }, { "epoch": 0.9405576012624934, "grad_norm": 2.1694893836975098, "learning_rate": 4.730002406334321e-06, "loss": 0.9926, "step": 1788 }, { "epoch": 0.941083640189374, "grad_norm": 1.9891979694366455, "learning_rate": 4.729688304434017e-06, "loss": 0.9835, "step": 1789 }, { "epoch": 0.9416096791162546, "grad_norm": 2.112396240234375, "learning_rate": 4.729374030376217e-06, "loss": 1.0131, "step": 1790 }, { "epoch": 0.9421357180431352, "grad_norm": 2.049139976501465, "learning_rate": 4.729059584185187e-06, "loss": 1.0176, "step": 1791 }, { "epoch": 0.9426617569700158, "grad_norm": 2.259706497192383, "learning_rate": 4.728744965885207e-06, "loss": 1.0566, "step": 1792 }, { "epoch": 0.9431877958968964, "grad_norm": 1.9924520254135132, "learning_rate": 4.728430175500567e-06, "loss": 0.9912, "step": 1793 }, { "epoch": 0.9437138348237769, "grad_norm": 2.1724114418029785, "learning_rate": 4.728115213055573e-06, "loss": 0.9919, "step": 1794 }, { "epoch": 0.9442398737506575, "grad_norm": 2.083853244781494, "learning_rate": 4.7278000785745445e-06, "loss": 1.0368, "step": 1795 }, { "epoch": 0.9447659126775382, "grad_norm": 2.089245080947876, "learning_rate": 4.727484772081814e-06, "loss": 1.0471, "step": 1796 }, { "epoch": 0.9452919516044187, "grad_norm": 1.9880348443984985, "learning_rate": 4.727169293601725e-06, "loss": 0.9752, "step": 1797 }, { "epoch": 0.9458179905312993, "grad_norm": 2.0518887042999268, "learning_rate": 4.7268536431586375e-06, "loss": 0.977, "step": 1798 }, { "epoch": 0.9463440294581799, "grad_norm": 2.3292527198791504, "learning_rate": 4.726537820776922e-06, "loss": 0.9696, "step": 1799 }, { "epoch": 0.9468700683850605, "grad_norm": 2.093759775161743, "learning_rate": 4.7262218264809656e-06, "loss": 1.028, "step": 1800 }, { "epoch": 0.9473961073119411, "grad_norm": 1.9579375982284546, "learning_rate": 4.7259056602951644e-06, "loss": 0.9797, "step": 1801 }, { "epoch": 0.9479221462388217, "grad_norm": 2.1174583435058594, "learning_rate": 4.725589322243932e-06, "loss": 0.9993, "step": 1802 }, { "epoch": 0.9484481851657023, "grad_norm": 2.167732000350952, "learning_rate": 4.725272812351692e-06, "loss": 1.0031, "step": 1803 }, { "epoch": 0.9489742240925828, "grad_norm": 2.1166253089904785, "learning_rate": 4.724956130642883e-06, "loss": 1.0029, "step": 1804 }, { "epoch": 0.9495002630194634, "grad_norm": 2.0212886333465576, "learning_rate": 4.724639277141957e-06, "loss": 1.0202, "step": 1805 }, { "epoch": 0.9500263019463441, "grad_norm": 2.1849446296691895, "learning_rate": 4.7243222518733775e-06, "loss": 0.9847, "step": 1806 }, { "epoch": 0.9505523408732246, "grad_norm": 2.019671678543091, "learning_rate": 4.724005054861623e-06, "loss": 1.0141, "step": 1807 }, { "epoch": 0.9510783798001052, "grad_norm": 2.0654826164245605, "learning_rate": 4.723687686131186e-06, "loss": 1.0266, "step": 1808 }, { "epoch": 0.9516044187269858, "grad_norm": 2.0668342113494873, "learning_rate": 4.7233701457065694e-06, "loss": 1.0249, "step": 1809 }, { "epoch": 0.9521304576538664, "grad_norm": 1.9022929668426514, "learning_rate": 4.723052433612292e-06, "loss": 1.0092, "step": 1810 }, { "epoch": 0.952656496580747, "grad_norm": 2.0411059856414795, "learning_rate": 4.722734549872884e-06, "loss": 0.9896, "step": 1811 }, { "epoch": 0.9531825355076275, "grad_norm": 2.0354626178741455, "learning_rate": 4.722416494512889e-06, "loss": 0.9529, "step": 1812 }, { "epoch": 0.9537085744345082, "grad_norm": 1.866688847541809, "learning_rate": 4.722098267556867e-06, "loss": 0.971, "step": 1813 }, { "epoch": 0.9542346133613887, "grad_norm": 1.9963386058807373, "learning_rate": 4.721779869029387e-06, "loss": 0.9931, "step": 1814 }, { "epoch": 0.9547606522882693, "grad_norm": 1.9810550212860107, "learning_rate": 4.721461298955033e-06, "loss": 1.0335, "step": 1815 }, { "epoch": 0.95528669121515, "grad_norm": 2.0094194412231445, "learning_rate": 4.721142557358402e-06, "loss": 1.0248, "step": 1816 }, { "epoch": 0.9558127301420305, "grad_norm": 2.110318183898926, "learning_rate": 4.720823644264106e-06, "loss": 0.9726, "step": 1817 }, { "epoch": 0.9563387690689111, "grad_norm": 2.051914691925049, "learning_rate": 4.720504559696768e-06, "loss": 1.0205, "step": 1818 }, { "epoch": 0.9568648079957917, "grad_norm": 2.0969302654266357, "learning_rate": 4.7201853036810245e-06, "loss": 1.0313, "step": 1819 }, { "epoch": 0.9573908469226723, "grad_norm": 2.098721742630005, "learning_rate": 4.719865876241525e-06, "loss": 1.0276, "step": 1820 }, { "epoch": 0.9579168858495528, "grad_norm": 1.9741021394729614, "learning_rate": 4.719546277402936e-06, "loss": 1.0142, "step": 1821 }, { "epoch": 0.9584429247764334, "grad_norm": 2.1097187995910645, "learning_rate": 4.71922650718993e-06, "loss": 0.9812, "step": 1822 }, { "epoch": 0.9589689637033141, "grad_norm": 2.1343348026275635, "learning_rate": 4.718906565627201e-06, "loss": 1.0126, "step": 1823 }, { "epoch": 0.9594950026301946, "grad_norm": 2.089698553085327, "learning_rate": 4.71858645273945e-06, "loss": 0.9982, "step": 1824 }, { "epoch": 0.9600210415570752, "grad_norm": 2.1942148208618164, "learning_rate": 4.7182661685513925e-06, "loss": 1.0781, "step": 1825 }, { "epoch": 0.9605470804839558, "grad_norm": 1.92880380153656, "learning_rate": 4.7179457130877605e-06, "loss": 1.0214, "step": 1826 }, { "epoch": 0.9610731194108364, "grad_norm": 2.093219518661499, "learning_rate": 4.717625086373295e-06, "loss": 1.0411, "step": 1827 }, { "epoch": 0.961599158337717, "grad_norm": 1.9406787157058716, "learning_rate": 4.7173042884327525e-06, "loss": 1.0296, "step": 1828 }, { "epoch": 0.9621251972645976, "grad_norm": 1.9737564325332642, "learning_rate": 4.7169833192909025e-06, "loss": 1.0119, "step": 1829 }, { "epoch": 0.9626512361914782, "grad_norm": 1.9281796216964722, "learning_rate": 4.7166621789725276e-06, "loss": 1.0203, "step": 1830 }, { "epoch": 0.9631772751183587, "grad_norm": 2.128120183944702, "learning_rate": 4.716340867502424e-06, "loss": 1.087, "step": 1831 }, { "epoch": 0.9637033140452393, "grad_norm": 2.1313352584838867, "learning_rate": 4.716019384905399e-06, "loss": 1.0049, "step": 1832 }, { "epoch": 0.96422935297212, "grad_norm": 1.882323980331421, "learning_rate": 4.715697731206275e-06, "loss": 1.052, "step": 1833 }, { "epoch": 0.9647553918990005, "grad_norm": 1.902729868888855, "learning_rate": 4.71537590642989e-06, "loss": 1.013, "step": 1834 }, { "epoch": 0.9652814308258811, "grad_norm": 1.9752705097198486, "learning_rate": 4.715053910601089e-06, "loss": 0.9964, "step": 1835 }, { "epoch": 0.9658074697527617, "grad_norm": 2.2092044353485107, "learning_rate": 4.714731743744736e-06, "loss": 1.0142, "step": 1836 }, { "epoch": 0.9663335086796423, "grad_norm": 1.9738699197769165, "learning_rate": 4.714409405885706e-06, "loss": 1.0431, "step": 1837 }, { "epoch": 0.9668595476065229, "grad_norm": 1.94752836227417, "learning_rate": 4.714086897048886e-06, "loss": 0.9776, "step": 1838 }, { "epoch": 0.9673855865334035, "grad_norm": 2.044384717941284, "learning_rate": 4.713764217259178e-06, "loss": 0.9428, "step": 1839 }, { "epoch": 0.9679116254602841, "grad_norm": 2.067378520965576, "learning_rate": 4.713441366541497e-06, "loss": 1.0222, "step": 1840 }, { "epoch": 0.9684376643871646, "grad_norm": 2.0729427337646484, "learning_rate": 4.71311834492077e-06, "loss": 1.0244, "step": 1841 }, { "epoch": 0.9689637033140452, "grad_norm": 1.9986896514892578, "learning_rate": 4.712795152421938e-06, "loss": 1.0246, "step": 1842 }, { "epoch": 0.9694897422409259, "grad_norm": 2.134274482727051, "learning_rate": 4.712471789069956e-06, "loss": 1.0317, "step": 1843 }, { "epoch": 0.9700157811678064, "grad_norm": 2.116116762161255, "learning_rate": 4.7121482548897896e-06, "loss": 1.0431, "step": 1844 }, { "epoch": 0.970541820094687, "grad_norm": 2.146329164505005, "learning_rate": 4.7118245499064205e-06, "loss": 1.0185, "step": 1845 }, { "epoch": 0.9710678590215676, "grad_norm": 2.2587080001831055, "learning_rate": 4.711500674144844e-06, "loss": 1.0172, "step": 1846 }, { "epoch": 0.9715938979484482, "grad_norm": 2.133565902709961, "learning_rate": 4.7111766276300645e-06, "loss": 1.0887, "step": 1847 }, { "epoch": 0.9721199368753288, "grad_norm": 2.4180047512054443, "learning_rate": 4.710852410387103e-06, "loss": 1.0686, "step": 1848 }, { "epoch": 0.9726459758022094, "grad_norm": 1.9758679866790771, "learning_rate": 4.7105280224409936e-06, "loss": 0.9851, "step": 1849 }, { "epoch": 0.97317201472909, "grad_norm": 2.0190632343292236, "learning_rate": 4.710203463816782e-06, "loss": 0.9967, "step": 1850 }, { "epoch": 0.9736980536559705, "grad_norm": 2.0636117458343506, "learning_rate": 4.709878734539527e-06, "loss": 1.0209, "step": 1851 }, { "epoch": 0.9742240925828511, "grad_norm": 2.0756478309631348, "learning_rate": 4.709553834634303e-06, "loss": 0.9793, "step": 1852 }, { "epoch": 0.9747501315097317, "grad_norm": 1.94191312789917, "learning_rate": 4.709228764126195e-06, "loss": 0.9697, "step": 1853 }, { "epoch": 0.9752761704366123, "grad_norm": 2.057345390319824, "learning_rate": 4.708903523040303e-06, "loss": 0.938, "step": 1854 }, { "epoch": 0.9758022093634929, "grad_norm": 2.1611337661743164, "learning_rate": 4.7085781114017384e-06, "loss": 1.0464, "step": 1855 }, { "epoch": 0.9763282482903735, "grad_norm": 1.9461411237716675, "learning_rate": 4.708252529235627e-06, "loss": 0.9934, "step": 1856 }, { "epoch": 0.9768542872172541, "grad_norm": 1.9107236862182617, "learning_rate": 4.707926776567108e-06, "loss": 0.9895, "step": 1857 }, { "epoch": 0.9773803261441346, "grad_norm": 2.0953640937805176, "learning_rate": 4.707600853421332e-06, "loss": 1.0009, "step": 1858 }, { "epoch": 0.9779063650710152, "grad_norm": 2.126648187637329, "learning_rate": 4.707274759823466e-06, "loss": 0.9801, "step": 1859 }, { "epoch": 0.9784324039978959, "grad_norm": 2.0868916511535645, "learning_rate": 4.706948495798687e-06, "loss": 0.9765, "step": 1860 }, { "epoch": 0.9789584429247764, "grad_norm": 2.0332181453704834, "learning_rate": 4.706622061372185e-06, "loss": 1.0216, "step": 1861 }, { "epoch": 0.979484481851657, "grad_norm": 2.05155348777771, "learning_rate": 4.706295456569167e-06, "loss": 1.0594, "step": 1862 }, { "epoch": 0.9800105207785376, "grad_norm": 2.1178739070892334, "learning_rate": 4.7059686814148485e-06, "loss": 1.0463, "step": 1863 }, { "epoch": 0.9805365597054182, "grad_norm": 1.9961886405944824, "learning_rate": 4.705641735934462e-06, "loss": 0.9658, "step": 1864 }, { "epoch": 0.9810625986322988, "grad_norm": 1.9905188083648682, "learning_rate": 4.705314620153251e-06, "loss": 0.9677, "step": 1865 }, { "epoch": 0.9815886375591794, "grad_norm": 1.9200838804244995, "learning_rate": 4.704987334096471e-06, "loss": 1.0011, "step": 1866 }, { "epoch": 0.98211467648606, "grad_norm": 2.069359302520752, "learning_rate": 4.704659877789395e-06, "loss": 1.01, "step": 1867 }, { "epoch": 0.9826407154129405, "grad_norm": 1.8069074153900146, "learning_rate": 4.704332251257304e-06, "loss": 1.037, "step": 1868 }, { "epoch": 0.9831667543398211, "grad_norm": 1.9900349378585815, "learning_rate": 4.704004454525496e-06, "loss": 1.0035, "step": 1869 }, { "epoch": 0.9836927932667018, "grad_norm": 1.902032494544983, "learning_rate": 4.70367648761928e-06, "loss": 1.0001, "step": 1870 }, { "epoch": 0.9842188321935823, "grad_norm": 2.5718839168548584, "learning_rate": 4.703348350563978e-06, "loss": 1.002, "step": 1871 }, { "epoch": 0.9847448711204629, "grad_norm": 1.90852952003479, "learning_rate": 4.703020043384927e-06, "loss": 1.0338, "step": 1872 }, { "epoch": 0.9852709100473435, "grad_norm": 2.0179872512817383, "learning_rate": 4.702691566107477e-06, "loss": 0.9724, "step": 1873 }, { "epoch": 0.9857969489742241, "grad_norm": 2.0315425395965576, "learning_rate": 4.702362918756988e-06, "loss": 1.0256, "step": 1874 }, { "epoch": 0.9863229879011047, "grad_norm": 1.898896336555481, "learning_rate": 4.702034101358837e-06, "loss": 0.9695, "step": 1875 }, { "epoch": 0.9868490268279853, "grad_norm": 2.1176962852478027, "learning_rate": 4.701705113938411e-06, "loss": 1.0217, "step": 1876 }, { "epoch": 0.9873750657548659, "grad_norm": 1.94914972782135, "learning_rate": 4.701375956521113e-06, "loss": 1.0081, "step": 1877 }, { "epoch": 0.9879011046817464, "grad_norm": 1.9665032625198364, "learning_rate": 4.701046629132358e-06, "loss": 1.0174, "step": 1878 }, { "epoch": 0.988427143608627, "grad_norm": 2.005793571472168, "learning_rate": 4.700717131797573e-06, "loss": 0.9653, "step": 1879 }, { "epoch": 0.9889531825355077, "grad_norm": 2.0769705772399902, "learning_rate": 4.700387464542199e-06, "loss": 1.0142, "step": 1880 }, { "epoch": 0.9894792214623882, "grad_norm": 1.9945422410964966, "learning_rate": 4.700057627391689e-06, "loss": 1.0225, "step": 1881 }, { "epoch": 0.9900052603892688, "grad_norm": 2.1121349334716797, "learning_rate": 4.699727620371513e-06, "loss": 1.0056, "step": 1882 }, { "epoch": 0.9905312993161494, "grad_norm": 2.156942844390869, "learning_rate": 4.699397443507148e-06, "loss": 1.0049, "step": 1883 }, { "epoch": 0.99105733824303, "grad_norm": 2.065075159072876, "learning_rate": 4.699067096824091e-06, "loss": 0.9694, "step": 1884 }, { "epoch": 0.9915833771699105, "grad_norm": 2.12490177154541, "learning_rate": 4.698736580347845e-06, "loss": 1.0268, "step": 1885 }, { "epoch": 0.9921094160967912, "grad_norm": 2.039874792098999, "learning_rate": 4.698405894103932e-06, "loss": 1.0122, "step": 1886 }, { "epoch": 0.9926354550236718, "grad_norm": 2.0004734992980957, "learning_rate": 4.698075038117884e-06, "loss": 0.9996, "step": 1887 }, { "epoch": 0.9931614939505523, "grad_norm": 1.996697187423706, "learning_rate": 4.697744012415248e-06, "loss": 1.0658, "step": 1888 }, { "epoch": 0.9936875328774329, "grad_norm": 1.9783189296722412, "learning_rate": 4.69741281702158e-06, "loss": 0.9799, "step": 1889 }, { "epoch": 0.9942135718043135, "grad_norm": 2.054898738861084, "learning_rate": 4.697081451962456e-06, "loss": 1.0302, "step": 1890 }, { "epoch": 0.9947396107311941, "grad_norm": 1.953337550163269, "learning_rate": 4.696749917263458e-06, "loss": 0.9634, "step": 1891 }, { "epoch": 0.9952656496580747, "grad_norm": 2.6126086711883545, "learning_rate": 4.6964182129501855e-06, "loss": 0.9659, "step": 1892 }, { "epoch": 0.9957916885849553, "grad_norm": 1.931026816368103, "learning_rate": 4.69608633904825e-06, "loss": 1.0456, "step": 1893 }, { "epoch": 0.9963177275118359, "grad_norm": 1.9246487617492676, "learning_rate": 4.695754295583276e-06, "loss": 1.0057, "step": 1894 }, { "epoch": 0.9968437664387164, "grad_norm": 1.9731547832489014, "learning_rate": 4.695422082580901e-06, "loss": 0.9619, "step": 1895 }, { "epoch": 0.997369805365597, "grad_norm": 2.1975600719451904, "learning_rate": 4.695089700066776e-06, "loss": 0.9667, "step": 1896 }, { "epoch": 0.9978958442924777, "grad_norm": 1.9038164615631104, "learning_rate": 4.6947571480665636e-06, "loss": 0.9564, "step": 1897 }, { "epoch": 0.9984218832193582, "grad_norm": 1.9997332096099854, "learning_rate": 4.694424426605942e-06, "loss": 0.9717, "step": 1898 }, { "epoch": 0.9989479221462388, "grad_norm": 2.0790839195251465, "learning_rate": 4.6940915357106e-06, "loss": 1.044, "step": 1899 }, { "epoch": 0.9994739610731194, "grad_norm": 2.0779690742492676, "learning_rate": 4.693758475406241e-06, "loss": 1.052, "step": 1900 }, { "epoch": 1.0, "grad_norm": 2.3423078060150146, "learning_rate": 4.693425245718581e-06, "loss": 0.9887, "step": 1901 }, { "epoch": 1.0005260389268806, "grad_norm": 1.9817070960998535, "learning_rate": 4.69309184667335e-06, "loss": 0.9254, "step": 1902 }, { "epoch": 1.0010520778537613, "grad_norm": 1.8153924942016602, "learning_rate": 4.6927582782962886e-06, "loss": 0.9244, "step": 1903 }, { "epoch": 1.0015781167806417, "grad_norm": 1.982853651046753, "learning_rate": 4.6924245406131534e-06, "loss": 0.9473, "step": 1904 }, { "epoch": 1.0021041557075223, "grad_norm": 1.8686907291412354, "learning_rate": 4.692090633649712e-06, "loss": 0.9236, "step": 1905 }, { "epoch": 1.002630194634403, "grad_norm": 2.0107433795928955, "learning_rate": 4.691756557431747e-06, "loss": 0.9597, "step": 1906 }, { "epoch": 1.0031562335612836, "grad_norm": 1.9539220333099365, "learning_rate": 4.691422311985051e-06, "loss": 0.9715, "step": 1907 }, { "epoch": 1.0036822724881642, "grad_norm": 2.040505886077881, "learning_rate": 4.691087897335434e-06, "loss": 0.9907, "step": 1908 }, { "epoch": 1.0042083114150446, "grad_norm": 1.9904555082321167, "learning_rate": 4.690753313508715e-06, "loss": 0.9367, "step": 1909 }, { "epoch": 1.0047343503419253, "grad_norm": 2.037346601486206, "learning_rate": 4.6904185605307276e-06, "loss": 0.9139, "step": 1910 }, { "epoch": 1.005260389268806, "grad_norm": 2.014995574951172, "learning_rate": 4.690083638427318e-06, "loss": 0.9179, "step": 1911 }, { "epoch": 1.0057864281956865, "grad_norm": 2.044022798538208, "learning_rate": 4.689748547224349e-06, "loss": 0.9634, "step": 1912 }, { "epoch": 1.0063124671225672, "grad_norm": 2.0251998901367188, "learning_rate": 4.689413286947691e-06, "loss": 0.9557, "step": 1913 }, { "epoch": 1.0068385060494476, "grad_norm": 2.0683465003967285, "learning_rate": 4.68907785762323e-06, "loss": 0.9334, "step": 1914 }, { "epoch": 1.0073645449763282, "grad_norm": 2.0559067726135254, "learning_rate": 4.688742259276865e-06, "loss": 0.9497, "step": 1915 }, { "epoch": 1.0078905839032088, "grad_norm": 2.0504722595214844, "learning_rate": 4.688406491934509e-06, "loss": 0.9642, "step": 1916 }, { "epoch": 1.0084166228300895, "grad_norm": 1.9897568225860596, "learning_rate": 4.6880705556220865e-06, "loss": 0.9055, "step": 1917 }, { "epoch": 1.0089426617569701, "grad_norm": 2.2071170806884766, "learning_rate": 4.6877344503655365e-06, "loss": 0.9615, "step": 1918 }, { "epoch": 1.0094687006838505, "grad_norm": 1.9466966390609741, "learning_rate": 4.687398176190808e-06, "loss": 0.9088, "step": 1919 }, { "epoch": 1.0099947396107312, "grad_norm": 2.242229461669922, "learning_rate": 4.687061733123868e-06, "loss": 0.9785, "step": 1920 }, { "epoch": 1.0105207785376118, "grad_norm": 2.1745779514312744, "learning_rate": 4.686725121190692e-06, "loss": 0.9806, "step": 1921 }, { "epoch": 1.0110468174644924, "grad_norm": 2.1215994358062744, "learning_rate": 4.686388340417271e-06, "loss": 0.9587, "step": 1922 }, { "epoch": 1.011572856391373, "grad_norm": 2.0110349655151367, "learning_rate": 4.686051390829607e-06, "loss": 0.9798, "step": 1923 }, { "epoch": 1.0120988953182535, "grad_norm": 1.906720519065857, "learning_rate": 4.685714272453717e-06, "loss": 0.9697, "step": 1924 }, { "epoch": 1.0126249342451341, "grad_norm": 2.1385791301727295, "learning_rate": 4.685376985315632e-06, "loss": 1.0257, "step": 1925 }, { "epoch": 1.0131509731720147, "grad_norm": 2.169401168823242, "learning_rate": 4.685039529441393e-06, "loss": 0.9831, "step": 1926 }, { "epoch": 1.0136770120988954, "grad_norm": 2.0535483360290527, "learning_rate": 4.684701904857055e-06, "loss": 0.9376, "step": 1927 }, { "epoch": 1.014203051025776, "grad_norm": 1.9508135318756104, "learning_rate": 4.684364111588688e-06, "loss": 0.9478, "step": 1928 }, { "epoch": 1.0147290899526564, "grad_norm": 1.9286326169967651, "learning_rate": 4.684026149662373e-06, "loss": 0.9279, "step": 1929 }, { "epoch": 1.015255128879537, "grad_norm": 2.037071466445923, "learning_rate": 4.683688019104203e-06, "loss": 0.9323, "step": 1930 }, { "epoch": 1.0157811678064177, "grad_norm": 2.027493715286255, "learning_rate": 4.683349719940288e-06, "loss": 0.9484, "step": 1931 }, { "epoch": 1.0163072067332983, "grad_norm": 2.050152540206909, "learning_rate": 4.683011252196747e-06, "loss": 0.914, "step": 1932 }, { "epoch": 1.016833245660179, "grad_norm": 2.033648729324341, "learning_rate": 4.682672615899713e-06, "loss": 0.9278, "step": 1933 }, { "epoch": 1.0173592845870594, "grad_norm": 1.9856821298599243, "learning_rate": 4.682333811075334e-06, "loss": 0.9069, "step": 1934 }, { "epoch": 1.01788532351394, "grad_norm": 2.2752439975738525, "learning_rate": 4.681994837749769e-06, "loss": 0.9808, "step": 1935 }, { "epoch": 1.0184113624408206, "grad_norm": 1.9650025367736816, "learning_rate": 4.681655695949191e-06, "loss": 0.9287, "step": 1936 }, { "epoch": 1.0189374013677013, "grad_norm": 2.068004608154297, "learning_rate": 4.681316385699786e-06, "loss": 0.9446, "step": 1937 }, { "epoch": 1.0194634402945817, "grad_norm": 1.9954777956008911, "learning_rate": 4.680976907027751e-06, "loss": 0.94, "step": 1938 }, { "epoch": 1.0199894792214623, "grad_norm": 1.9835753440856934, "learning_rate": 4.6806372599593e-06, "loss": 0.912, "step": 1939 }, { "epoch": 1.020515518148343, "grad_norm": 2.903367280960083, "learning_rate": 4.6802974445206554e-06, "loss": 0.9568, "step": 1940 }, { "epoch": 1.0210415570752236, "grad_norm": 2.0142931938171387, "learning_rate": 4.679957460738056e-06, "loss": 0.9534, "step": 1941 }, { "epoch": 1.0215675960021042, "grad_norm": 2.080303192138672, "learning_rate": 4.679617308637752e-06, "loss": 0.9518, "step": 1942 }, { "epoch": 1.0220936349289846, "grad_norm": 1.963465690612793, "learning_rate": 4.679276988246007e-06, "loss": 0.9263, "step": 1943 }, { "epoch": 1.0226196738558653, "grad_norm": 2.035710334777832, "learning_rate": 4.678936499589099e-06, "loss": 0.9576, "step": 1944 }, { "epoch": 1.023145712782746, "grad_norm": 2.036870002746582, "learning_rate": 4.678595842693316e-06, "loss": 0.9179, "step": 1945 }, { "epoch": 1.0236717517096265, "grad_norm": 2.0613720417022705, "learning_rate": 4.678255017584961e-06, "loss": 0.9778, "step": 1946 }, { "epoch": 1.0241977906365072, "grad_norm": 2.149697780609131, "learning_rate": 4.67791402429035e-06, "loss": 0.9222, "step": 1947 }, { "epoch": 1.0247238295633876, "grad_norm": 1.9258294105529785, "learning_rate": 4.677572862835811e-06, "loss": 0.903, "step": 1948 }, { "epoch": 1.0252498684902682, "grad_norm": 1.9599074125289917, "learning_rate": 4.677231533247687e-06, "loss": 0.9484, "step": 1949 }, { "epoch": 1.0257759074171489, "grad_norm": 1.917982816696167, "learning_rate": 4.67689003555233e-06, "loss": 0.8994, "step": 1950 }, { "epoch": 1.0263019463440295, "grad_norm": 2.0358879566192627, "learning_rate": 4.67654836977611e-06, "loss": 0.9367, "step": 1951 }, { "epoch": 1.0268279852709101, "grad_norm": 2.0364022254943848, "learning_rate": 4.6762065359454056e-06, "loss": 0.9272, "step": 1952 }, { "epoch": 1.0273540241977905, "grad_norm": 1.9820277690887451, "learning_rate": 4.675864534086612e-06, "loss": 0.9586, "step": 1953 }, { "epoch": 1.0278800631246712, "grad_norm": 2.1438517570495605, "learning_rate": 4.675522364226135e-06, "loss": 1.0007, "step": 1954 }, { "epoch": 1.0284061020515518, "grad_norm": 1.9437652826309204, "learning_rate": 4.675180026390393e-06, "loss": 0.9622, "step": 1955 }, { "epoch": 1.0289321409784324, "grad_norm": 2.197209596633911, "learning_rate": 4.67483752060582e-06, "loss": 0.9292, "step": 1956 }, { "epoch": 1.029458179905313, "grad_norm": 2.0781588554382324, "learning_rate": 4.674494846898861e-06, "loss": 0.9032, "step": 1957 }, { "epoch": 1.0299842188321935, "grad_norm": 2.0351462364196777, "learning_rate": 4.674152005295974e-06, "loss": 0.9644, "step": 1958 }, { "epoch": 1.0305102577590741, "grad_norm": 2.1569135189056396, "learning_rate": 4.67380899582363e-06, "loss": 0.9658, "step": 1959 }, { "epoch": 1.0310362966859548, "grad_norm": 2.0446736812591553, "learning_rate": 4.6734658185083135e-06, "loss": 0.8667, "step": 1960 }, { "epoch": 1.0315623356128354, "grad_norm": 2.2169229984283447, "learning_rate": 4.673122473376522e-06, "loss": 0.917, "step": 1961 }, { "epoch": 1.032088374539716, "grad_norm": 2.000225067138672, "learning_rate": 4.6727789604547655e-06, "loss": 0.8952, "step": 1962 }, { "epoch": 1.0326144134665964, "grad_norm": 1.986311435699463, "learning_rate": 4.672435279769567e-06, "loss": 0.9558, "step": 1963 }, { "epoch": 1.033140452393477, "grad_norm": 2.138862371444702, "learning_rate": 4.672091431347463e-06, "loss": 0.9604, "step": 1964 }, { "epoch": 1.0336664913203577, "grad_norm": 2.291006565093994, "learning_rate": 4.671747415215002e-06, "loss": 1.0001, "step": 1965 }, { "epoch": 1.0341925302472383, "grad_norm": 2.2395827770233154, "learning_rate": 4.671403231398747e-06, "loss": 0.9705, "step": 1966 }, { "epoch": 1.034718569174119, "grad_norm": 1.9645211696624756, "learning_rate": 4.671058879925271e-06, "loss": 0.9185, "step": 1967 }, { "epoch": 1.0352446081009994, "grad_norm": 2.1107375621795654, "learning_rate": 4.670714360821165e-06, "loss": 0.8937, "step": 1968 }, { "epoch": 1.03577064702788, "grad_norm": 2.1025230884552, "learning_rate": 4.670369674113026e-06, "loss": 0.9537, "step": 1969 }, { "epoch": 1.0362966859547607, "grad_norm": 2.0165746212005615, "learning_rate": 4.67002481982747e-06, "loss": 0.9679, "step": 1970 }, { "epoch": 1.0368227248816413, "grad_norm": 2.054098606109619, "learning_rate": 4.669679797991123e-06, "loss": 0.9748, "step": 1971 }, { "epoch": 1.037348763808522, "grad_norm": 2.160297393798828, "learning_rate": 4.669334608630627e-06, "loss": 0.9211, "step": 1972 }, { "epoch": 1.0378748027354023, "grad_norm": 2.1559903621673584, "learning_rate": 4.668989251772631e-06, "loss": 0.8847, "step": 1973 }, { "epoch": 1.038400841662283, "grad_norm": 1.9618030786514282, "learning_rate": 4.6686437274438025e-06, "loss": 0.9079, "step": 1974 }, { "epoch": 1.0389268805891636, "grad_norm": 1.9198315143585205, "learning_rate": 4.668298035670818e-06, "loss": 0.9138, "step": 1975 }, { "epoch": 1.0394529195160442, "grad_norm": 2.126873731613159, "learning_rate": 4.667952176480373e-06, "loss": 0.9888, "step": 1976 }, { "epoch": 1.0399789584429249, "grad_norm": 1.9329999685287476, "learning_rate": 4.667606149899168e-06, "loss": 0.9022, "step": 1977 }, { "epoch": 1.0405049973698053, "grad_norm": 2.1013362407684326, "learning_rate": 4.667259955953921e-06, "loss": 0.9787, "step": 1978 }, { "epoch": 1.041031036296686, "grad_norm": 1.891451358795166, "learning_rate": 4.666913594671363e-06, "loss": 0.9121, "step": 1979 }, { "epoch": 1.0415570752235666, "grad_norm": 2.084703207015991, "learning_rate": 4.666567066078237e-06, "loss": 0.9561, "step": 1980 }, { "epoch": 1.0420831141504472, "grad_norm": 1.9543099403381348, "learning_rate": 4.666220370201298e-06, "loss": 0.9344, "step": 1981 }, { "epoch": 1.0426091530773278, "grad_norm": 1.93330717086792, "learning_rate": 4.665873507067316e-06, "loss": 0.9802, "step": 1982 }, { "epoch": 1.0431351920042082, "grad_norm": 2.096243381500244, "learning_rate": 4.665526476703072e-06, "loss": 1.0, "step": 1983 }, { "epoch": 1.0436612309310889, "grad_norm": 2.0133352279663086, "learning_rate": 4.66517927913536e-06, "loss": 1.002, "step": 1984 }, { "epoch": 1.0441872698579695, "grad_norm": 1.921587586402893, "learning_rate": 4.6648319143909906e-06, "loss": 0.8968, "step": 1985 }, { "epoch": 1.0447133087848501, "grad_norm": 2.086876630783081, "learning_rate": 4.664484382496781e-06, "loss": 0.9112, "step": 1986 }, { "epoch": 1.0452393477117308, "grad_norm": 2.039179563522339, "learning_rate": 4.6641366834795665e-06, "loss": 0.9897, "step": 1987 }, { "epoch": 1.0457653866386112, "grad_norm": 1.9592386484146118, "learning_rate": 4.663788817366192e-06, "loss": 0.9364, "step": 1988 }, { "epoch": 1.0462914255654918, "grad_norm": 2.0162618160247803, "learning_rate": 4.66344078418352e-06, "loss": 0.9626, "step": 1989 }, { "epoch": 1.0468174644923725, "grad_norm": 1.9067870378494263, "learning_rate": 4.663092583958419e-06, "loss": 0.9235, "step": 1990 }, { "epoch": 1.047343503419253, "grad_norm": 2.151963710784912, "learning_rate": 4.662744216717775e-06, "loss": 0.9567, "step": 1991 }, { "epoch": 1.0478695423461337, "grad_norm": 2.244503974914551, "learning_rate": 4.662395682488487e-06, "loss": 0.9824, "step": 1992 }, { "epoch": 1.0483955812730141, "grad_norm": 2.0067191123962402, "learning_rate": 4.662046981297465e-06, "loss": 0.952, "step": 1993 }, { "epoch": 1.0489216201998948, "grad_norm": 2.012934923171997, "learning_rate": 4.6616981131716335e-06, "loss": 0.9217, "step": 1994 }, { "epoch": 1.0494476591267754, "grad_norm": 1.9931223392486572, "learning_rate": 4.6613490781379276e-06, "loss": 1.0257, "step": 1995 }, { "epoch": 1.049973698053656, "grad_norm": 1.9512076377868652, "learning_rate": 4.660999876223299e-06, "loss": 0.9377, "step": 1996 }, { "epoch": 1.0504997369805364, "grad_norm": 2.249743700027466, "learning_rate": 4.660650507454708e-06, "loss": 0.9249, "step": 1997 }, { "epoch": 1.051025775907417, "grad_norm": 2.1437506675720215, "learning_rate": 4.660300971859131e-06, "loss": 0.9217, "step": 1998 }, { "epoch": 1.0515518148342977, "grad_norm": 2.184690475463867, "learning_rate": 4.659951269463555e-06, "loss": 0.9518, "step": 1999 }, { "epoch": 1.0520778537611783, "grad_norm": 2.054569959640503, "learning_rate": 4.659601400294984e-06, "loss": 0.9346, "step": 2000 }, { "epoch": 1.052603892688059, "grad_norm": 2.049569845199585, "learning_rate": 4.6592513643804285e-06, "loss": 0.9472, "step": 2001 }, { "epoch": 1.0531299316149396, "grad_norm": 2.139266014099121, "learning_rate": 4.658901161746917e-06, "loss": 0.9687, "step": 2002 }, { "epoch": 1.05365597054182, "grad_norm": 2.0240185260772705, "learning_rate": 4.658550792421488e-06, "loss": 0.9403, "step": 2003 }, { "epoch": 1.0541820094687007, "grad_norm": 1.9521421194076538, "learning_rate": 4.6582002564311955e-06, "loss": 0.9716, "step": 2004 }, { "epoch": 1.0547080483955813, "grad_norm": 2.2795963287353516, "learning_rate": 4.657849553803104e-06, "loss": 0.9747, "step": 2005 }, { "epoch": 1.055234087322462, "grad_norm": 2.0353355407714844, "learning_rate": 4.657498684564292e-06, "loss": 0.9618, "step": 2006 }, { "epoch": 1.0557601262493423, "grad_norm": 2.4355268478393555, "learning_rate": 4.657147648741851e-06, "loss": 0.9846, "step": 2007 }, { "epoch": 1.056286165176223, "grad_norm": 2.16408371925354, "learning_rate": 4.656796446362883e-06, "loss": 0.9277, "step": 2008 }, { "epoch": 1.0568122041031036, "grad_norm": 2.2572405338287354, "learning_rate": 4.6564450774545085e-06, "loss": 1.0186, "step": 2009 }, { "epoch": 1.0573382430299842, "grad_norm": 2.01306414604187, "learning_rate": 4.656093542043854e-06, "loss": 0.9083, "step": 2010 }, { "epoch": 1.0578642819568649, "grad_norm": 2.0974233150482178, "learning_rate": 4.6557418401580634e-06, "loss": 0.9568, "step": 2011 }, { "epoch": 1.0583903208837453, "grad_norm": 2.685208797454834, "learning_rate": 4.655389971824292e-06, "loss": 0.9709, "step": 2012 }, { "epoch": 1.058916359810626, "grad_norm": 1.9793546199798584, "learning_rate": 4.6550379370697085e-06, "loss": 0.9482, "step": 2013 }, { "epoch": 1.0594423987375066, "grad_norm": 2.092241048812866, "learning_rate": 4.654685735921493e-06, "loss": 0.9631, "step": 2014 }, { "epoch": 1.0599684376643872, "grad_norm": 2.060007095336914, "learning_rate": 4.65433336840684e-06, "loss": 0.9416, "step": 2015 }, { "epoch": 1.0604944765912678, "grad_norm": 2.1290628910064697, "learning_rate": 4.653980834552956e-06, "loss": 0.9629, "step": 2016 }, { "epoch": 1.0610205155181482, "grad_norm": 2.187473773956299, "learning_rate": 4.6536281343870614e-06, "loss": 0.9476, "step": 2017 }, { "epoch": 1.0615465544450289, "grad_norm": 2.041978359222412, "learning_rate": 4.653275267936388e-06, "loss": 0.9479, "step": 2018 }, { "epoch": 1.0620725933719095, "grad_norm": 2.0300915241241455, "learning_rate": 4.652922235228182e-06, "loss": 0.9654, "step": 2019 }, { "epoch": 1.0625986322987901, "grad_norm": 1.9150184392929077, "learning_rate": 4.6525690362897006e-06, "loss": 0.9248, "step": 2020 }, { "epoch": 1.0631246712256708, "grad_norm": 1.8847254514694214, "learning_rate": 4.6522156711482146e-06, "loss": 0.9171, "step": 2021 }, { "epoch": 1.0636507101525512, "grad_norm": 2.097339630126953, "learning_rate": 4.651862139831008e-06, "loss": 0.9976, "step": 2022 }, { "epoch": 1.0641767490794318, "grad_norm": 2.09112286567688, "learning_rate": 4.651508442365379e-06, "loss": 0.9473, "step": 2023 }, { "epoch": 1.0647027880063125, "grad_norm": 2.2583703994750977, "learning_rate": 4.651154578778636e-06, "loss": 0.9849, "step": 2024 }, { "epoch": 1.065228826933193, "grad_norm": 2.1210403442382812, "learning_rate": 4.650800549098101e-06, "loss": 0.9396, "step": 2025 }, { "epoch": 1.0657548658600737, "grad_norm": 2.198775291442871, "learning_rate": 4.650446353351109e-06, "loss": 0.9, "step": 2026 }, { "epoch": 1.0662809047869541, "grad_norm": 2.0861377716064453, "learning_rate": 4.650091991565009e-06, "loss": 0.9248, "step": 2027 }, { "epoch": 1.0668069437138348, "grad_norm": 2.0532140731811523, "learning_rate": 4.649737463767161e-06, "loss": 0.8827, "step": 2028 }, { "epoch": 1.0673329826407154, "grad_norm": 2.1287131309509277, "learning_rate": 4.649382769984938e-06, "loss": 0.9748, "step": 2029 }, { "epoch": 1.067859021567596, "grad_norm": 2.07169771194458, "learning_rate": 4.649027910245728e-06, "loss": 0.9893, "step": 2030 }, { "epoch": 1.0683850604944767, "grad_norm": 2.0845425128936768, "learning_rate": 4.648672884576929e-06, "loss": 0.9565, "step": 2031 }, { "epoch": 1.068911099421357, "grad_norm": 2.153416395187378, "learning_rate": 4.648317693005955e-06, "loss": 0.9745, "step": 2032 }, { "epoch": 1.0694371383482377, "grad_norm": 2.0058255195617676, "learning_rate": 4.647962335560228e-06, "loss": 0.9396, "step": 2033 }, { "epoch": 1.0699631772751184, "grad_norm": 2.191807270050049, "learning_rate": 4.647606812267187e-06, "loss": 0.9702, "step": 2034 }, { "epoch": 1.070489216201999, "grad_norm": 2.1113696098327637, "learning_rate": 4.647251123154283e-06, "loss": 0.9245, "step": 2035 }, { "epoch": 1.0710152551288796, "grad_norm": 2.034909248352051, "learning_rate": 4.646895268248979e-06, "loss": 0.9886, "step": 2036 }, { "epoch": 1.07154129405576, "grad_norm": 1.967405080795288, "learning_rate": 4.646539247578751e-06, "loss": 0.9364, "step": 2037 }, { "epoch": 1.0720673329826407, "grad_norm": 2.005378484725952, "learning_rate": 4.646183061171087e-06, "loss": 0.9065, "step": 2038 }, { "epoch": 1.0725933719095213, "grad_norm": 2.1951041221618652, "learning_rate": 4.645826709053489e-06, "loss": 0.9649, "step": 2039 }, { "epoch": 1.073119410836402, "grad_norm": 2.10109543800354, "learning_rate": 4.6454701912534736e-06, "loss": 0.9455, "step": 2040 }, { "epoch": 1.0736454497632826, "grad_norm": 2.1643972396850586, "learning_rate": 4.6451135077985655e-06, "loss": 0.9892, "step": 2041 }, { "epoch": 1.074171488690163, "grad_norm": 2.028247833251953, "learning_rate": 4.644756658716305e-06, "loss": 0.9791, "step": 2042 }, { "epoch": 1.0746975276170436, "grad_norm": 2.0592141151428223, "learning_rate": 4.644399644034246e-06, "loss": 0.9557, "step": 2043 }, { "epoch": 1.0752235665439243, "grad_norm": 1.9696271419525146, "learning_rate": 4.644042463779954e-06, "loss": 0.9328, "step": 2044 }, { "epoch": 1.075749605470805, "grad_norm": 2.300461530685425, "learning_rate": 4.6436851179810075e-06, "loss": 0.947, "step": 2045 }, { "epoch": 1.0762756443976855, "grad_norm": 2.0531888008117676, "learning_rate": 4.643327606664996e-06, "loss": 0.939, "step": 2046 }, { "epoch": 1.076801683324566, "grad_norm": 2.0392346382141113, "learning_rate": 4.642969929859526e-06, "loss": 0.9779, "step": 2047 }, { "epoch": 1.0773277222514466, "grad_norm": 2.2741591930389404, "learning_rate": 4.642612087592212e-06, "loss": 0.9658, "step": 2048 }, { "epoch": 1.0778537611783272, "grad_norm": 2.1253297328948975, "learning_rate": 4.642254079890684e-06, "loss": 1.0048, "step": 2049 }, { "epoch": 1.0783798001052078, "grad_norm": 2.121539831161499, "learning_rate": 4.641895906782586e-06, "loss": 0.9418, "step": 2050 }, { "epoch": 1.0789058390320885, "grad_norm": 1.9954118728637695, "learning_rate": 4.641537568295572e-06, "loss": 0.9201, "step": 2051 }, { "epoch": 1.0794318779589689, "grad_norm": 2.434898853302002, "learning_rate": 4.64117906445731e-06, "loss": 1.0158, "step": 2052 }, { "epoch": 1.0799579168858495, "grad_norm": 2.2084853649139404, "learning_rate": 4.640820395295479e-06, "loss": 0.929, "step": 2053 }, { "epoch": 1.0804839558127302, "grad_norm": 2.0358357429504395, "learning_rate": 4.640461560837774e-06, "loss": 0.9432, "step": 2054 }, { "epoch": 1.0810099947396108, "grad_norm": 2.025202751159668, "learning_rate": 4.6401025611119025e-06, "loss": 0.9768, "step": 2055 }, { "epoch": 1.0815360336664912, "grad_norm": 1.9872761964797974, "learning_rate": 4.6397433961455805e-06, "loss": 0.9303, "step": 2056 }, { "epoch": 1.0820620725933718, "grad_norm": 2.1098103523254395, "learning_rate": 4.63938406596654e-06, "loss": 0.9847, "step": 2057 }, { "epoch": 1.0825881115202525, "grad_norm": 2.0892457962036133, "learning_rate": 4.6390245706025275e-06, "loss": 0.9349, "step": 2058 }, { "epoch": 1.083114150447133, "grad_norm": 2.0502517223358154, "learning_rate": 4.638664910081298e-06, "loss": 0.9366, "step": 2059 }, { "epoch": 1.0836401893740137, "grad_norm": 1.979395866394043, "learning_rate": 4.6383050844306234e-06, "loss": 0.9782, "step": 2060 }, { "epoch": 1.0841662283008944, "grad_norm": 1.9518492221832275, "learning_rate": 4.6379450936782854e-06, "loss": 0.9456, "step": 2061 }, { "epoch": 1.0846922672277748, "grad_norm": 2.1265623569488525, "learning_rate": 4.637584937852079e-06, "loss": 1.0054, "step": 2062 }, { "epoch": 1.0852183061546554, "grad_norm": 1.8959511518478394, "learning_rate": 4.637224616979813e-06, "loss": 0.9175, "step": 2063 }, { "epoch": 1.085744345081536, "grad_norm": 1.9260917901992798, "learning_rate": 4.636864131089307e-06, "loss": 0.942, "step": 2064 }, { "epoch": 1.0862703840084167, "grad_norm": 2.039825677871704, "learning_rate": 4.636503480208397e-06, "loss": 0.9008, "step": 2065 }, { "epoch": 1.086796422935297, "grad_norm": 1.9619112014770508, "learning_rate": 4.6361426643649265e-06, "loss": 0.9189, "step": 2066 }, { "epoch": 1.0873224618621777, "grad_norm": 1.9015494585037231, "learning_rate": 4.635781683586758e-06, "loss": 0.8762, "step": 2067 }, { "epoch": 1.0878485007890584, "grad_norm": 2.2694029808044434, "learning_rate": 4.6354205379017606e-06, "loss": 0.9519, "step": 2068 }, { "epoch": 1.088374539715939, "grad_norm": 1.8958660364151, "learning_rate": 4.635059227337819e-06, "loss": 0.972, "step": 2069 }, { "epoch": 1.0889005786428196, "grad_norm": 2.080143451690674, "learning_rate": 4.634697751922832e-06, "loss": 0.9156, "step": 2070 }, { "epoch": 1.0894266175697003, "grad_norm": 2.0264220237731934, "learning_rate": 4.634336111684709e-06, "loss": 0.9619, "step": 2071 }, { "epoch": 1.0899526564965807, "grad_norm": 2.071171998977661, "learning_rate": 4.633974306651373e-06, "loss": 0.9035, "step": 2072 }, { "epoch": 1.0904786954234613, "grad_norm": 1.926443338394165, "learning_rate": 4.633612336850759e-06, "loss": 0.9724, "step": 2073 }, { "epoch": 1.091004734350342, "grad_norm": 2.2006890773773193, "learning_rate": 4.633250202310815e-06, "loss": 0.941, "step": 2074 }, { "epoch": 1.0915307732772226, "grad_norm": 1.9194120168685913, "learning_rate": 4.632887903059503e-06, "loss": 0.9307, "step": 2075 }, { "epoch": 1.092056812204103, "grad_norm": 2.0963993072509766, "learning_rate": 4.632525439124796e-06, "loss": 0.9356, "step": 2076 }, { "epoch": 1.0925828511309836, "grad_norm": 2.040682792663574, "learning_rate": 4.63216281053468e-06, "loss": 0.968, "step": 2077 }, { "epoch": 1.0931088900578643, "grad_norm": 2.061224937438965, "learning_rate": 4.631800017317154e-06, "loss": 0.9381, "step": 2078 }, { "epoch": 1.093634928984745, "grad_norm": 2.0894832611083984, "learning_rate": 4.6314370595002315e-06, "loss": 0.9246, "step": 2079 }, { "epoch": 1.0941609679116255, "grad_norm": 2.0694963932037354, "learning_rate": 4.631073937111936e-06, "loss": 0.9059, "step": 2080 }, { "epoch": 1.094687006838506, "grad_norm": 2.2278406620025635, "learning_rate": 4.6307106501803035e-06, "loss": 0.9149, "step": 2081 }, { "epoch": 1.0952130457653866, "grad_norm": 2.0981225967407227, "learning_rate": 4.630347198733385e-06, "loss": 0.9965, "step": 2082 }, { "epoch": 1.0957390846922672, "grad_norm": 2.153197765350342, "learning_rate": 4.629983582799243e-06, "loss": 0.9471, "step": 2083 }, { "epoch": 1.0962651236191479, "grad_norm": 2.0150113105773926, "learning_rate": 4.629619802405953e-06, "loss": 0.9694, "step": 2084 }, { "epoch": 1.0967911625460285, "grad_norm": 2.080817222595215, "learning_rate": 4.6292558575816035e-06, "loss": 0.9275, "step": 2085 }, { "epoch": 1.097317201472909, "grad_norm": 2.024019956588745, "learning_rate": 4.6288917483542935e-06, "loss": 0.9466, "step": 2086 }, { "epoch": 1.0978432403997895, "grad_norm": 1.9700965881347656, "learning_rate": 4.628527474752138e-06, "loss": 0.9202, "step": 2087 }, { "epoch": 1.0983692793266702, "grad_norm": 1.9840142726898193, "learning_rate": 4.628163036803263e-06, "loss": 0.9836, "step": 2088 }, { "epoch": 1.0988953182535508, "grad_norm": 2.0773203372955322, "learning_rate": 4.627798434535807e-06, "loss": 0.9819, "step": 2089 }, { "epoch": 1.0994213571804314, "grad_norm": 2.5213847160339355, "learning_rate": 4.627433667977921e-06, "loss": 0.917, "step": 2090 }, { "epoch": 1.0999473961073118, "grad_norm": 2.071960687637329, "learning_rate": 4.627068737157769e-06, "loss": 0.9579, "step": 2091 }, { "epoch": 1.1004734350341925, "grad_norm": 2.091217517852783, "learning_rate": 4.626703642103528e-06, "loss": 0.9201, "step": 2092 }, { "epoch": 1.1009994739610731, "grad_norm": 2.1105289459228516, "learning_rate": 4.62633838284339e-06, "loss": 0.9008, "step": 2093 }, { "epoch": 1.1015255128879538, "grad_norm": 2.0329155921936035, "learning_rate": 4.625972959405553e-06, "loss": 0.9629, "step": 2094 }, { "epoch": 1.1020515518148344, "grad_norm": 2.2218475341796875, "learning_rate": 4.625607371818235e-06, "loss": 0.9771, "step": 2095 }, { "epoch": 1.1025775907417148, "grad_norm": 1.9372378587722778, "learning_rate": 4.625241620109662e-06, "loss": 0.8767, "step": 2096 }, { "epoch": 1.1031036296685954, "grad_norm": 2.11558198928833, "learning_rate": 4.624875704308075e-06, "loss": 0.9399, "step": 2097 }, { "epoch": 1.103629668595476, "grad_norm": 2.135223627090454, "learning_rate": 4.624509624441726e-06, "loss": 0.9942, "step": 2098 }, { "epoch": 1.1041557075223567, "grad_norm": 1.995465636253357, "learning_rate": 4.624143380538881e-06, "loss": 0.9041, "step": 2099 }, { "epoch": 1.1046817464492373, "grad_norm": 2.067887783050537, "learning_rate": 4.6237769726278195e-06, "loss": 0.985, "step": 2100 }, { "epoch": 1.1052077853761177, "grad_norm": 2.1130104064941406, "learning_rate": 4.623410400736831e-06, "loss": 0.9802, "step": 2101 }, { "epoch": 1.1057338243029984, "grad_norm": 2.0574796199798584, "learning_rate": 4.623043664894219e-06, "loss": 0.9311, "step": 2102 }, { "epoch": 1.106259863229879, "grad_norm": 2.0544402599334717, "learning_rate": 4.6226767651283e-06, "loss": 0.9446, "step": 2103 }, { "epoch": 1.1067859021567596, "grad_norm": 2.1335291862487793, "learning_rate": 4.622309701467403e-06, "loss": 0.9573, "step": 2104 }, { "epoch": 1.1073119410836403, "grad_norm": 2.5629992485046387, "learning_rate": 4.62194247393987e-06, "loss": 0.9915, "step": 2105 }, { "epoch": 1.1078379800105207, "grad_norm": 2.0296285152435303, "learning_rate": 4.6215750825740545e-06, "loss": 0.9257, "step": 2106 }, { "epoch": 1.1083640189374013, "grad_norm": 2.0325889587402344, "learning_rate": 4.621207527398324e-06, "loss": 0.8907, "step": 2107 }, { "epoch": 1.108890057864282, "grad_norm": 2.0547173023223877, "learning_rate": 4.620839808441056e-06, "loss": 0.9733, "step": 2108 }, { "epoch": 1.1094160967911626, "grad_norm": 1.995935082435608, "learning_rate": 4.620471925730645e-06, "loss": 0.9106, "step": 2109 }, { "epoch": 1.1099421357180432, "grad_norm": 2.371973991394043, "learning_rate": 4.6201038792954945e-06, "loss": 0.9309, "step": 2110 }, { "epoch": 1.1104681746449236, "grad_norm": 2.0574443340301514, "learning_rate": 4.6197356691640225e-06, "loss": 0.9848, "step": 2111 }, { "epoch": 1.1109942135718043, "grad_norm": 2.0146894454956055, "learning_rate": 4.6193672953646585e-06, "loss": 0.973, "step": 2112 }, { "epoch": 1.111520252498685, "grad_norm": 2.1383960247039795, "learning_rate": 4.618998757925846e-06, "loss": 0.9147, "step": 2113 }, { "epoch": 1.1120462914255655, "grad_norm": 2.035088539123535, "learning_rate": 4.618630056876039e-06, "loss": 0.9245, "step": 2114 }, { "epoch": 1.1125723303524462, "grad_norm": 2.038917064666748, "learning_rate": 4.618261192243706e-06, "loss": 1.0075, "step": 2115 }, { "epoch": 1.1130983692793266, "grad_norm": 1.9224687814712524, "learning_rate": 4.617892164057328e-06, "loss": 0.9209, "step": 2116 }, { "epoch": 1.1136244082062072, "grad_norm": 1.9721827507019043, "learning_rate": 4.617522972345398e-06, "loss": 0.8663, "step": 2117 }, { "epoch": 1.1141504471330879, "grad_norm": 1.9861339330673218, "learning_rate": 4.6171536171364225e-06, "loss": 0.9306, "step": 2118 }, { "epoch": 1.1146764860599685, "grad_norm": 2.097487688064575, "learning_rate": 4.616784098458918e-06, "loss": 0.9262, "step": 2119 }, { "epoch": 1.1152025249868491, "grad_norm": 2.0926623344421387, "learning_rate": 4.616414416341418e-06, "loss": 0.945, "step": 2120 }, { "epoch": 1.1157285639137295, "grad_norm": 2.019235849380493, "learning_rate": 4.616044570812465e-06, "loss": 0.9446, "step": 2121 }, { "epoch": 1.1162546028406102, "grad_norm": 2.095885992050171, "learning_rate": 4.615674561900615e-06, "loss": 0.9503, "step": 2122 }, { "epoch": 1.1167806417674908, "grad_norm": 2.0200493335723877, "learning_rate": 4.615304389634437e-06, "loss": 0.9141, "step": 2123 }, { "epoch": 1.1173066806943714, "grad_norm": 2.2150073051452637, "learning_rate": 4.614934054042514e-06, "loss": 0.945, "step": 2124 }, { "epoch": 1.1178327196212519, "grad_norm": 2.169135570526123, "learning_rate": 4.614563555153437e-06, "loss": 1.0375, "step": 2125 }, { "epoch": 1.1183587585481325, "grad_norm": 2.796290874481201, "learning_rate": 4.614192892995817e-06, "loss": 0.8948, "step": 2126 }, { "epoch": 1.1188847974750131, "grad_norm": 2.06923508644104, "learning_rate": 4.613822067598269e-06, "loss": 0.9306, "step": 2127 }, { "epoch": 1.1194108364018938, "grad_norm": 2.1803252696990967, "learning_rate": 4.613451078989428e-06, "loss": 0.9354, "step": 2128 }, { "epoch": 1.1199368753287744, "grad_norm": 1.9848898649215698, "learning_rate": 4.6130799271979385e-06, "loss": 0.9225, "step": 2129 }, { "epoch": 1.120462914255655, "grad_norm": 2.0420773029327393, "learning_rate": 4.612708612252456e-06, "loss": 0.958, "step": 2130 }, { "epoch": 1.1209889531825354, "grad_norm": 2.1113882064819336, "learning_rate": 4.6123371341816506e-06, "loss": 0.9673, "step": 2131 }, { "epoch": 1.121514992109416, "grad_norm": 2.2430622577667236, "learning_rate": 4.611965493014206e-06, "loss": 0.922, "step": 2132 }, { "epoch": 1.1220410310362967, "grad_norm": 2.1610355377197266, "learning_rate": 4.611593688778816e-06, "loss": 0.9566, "step": 2133 }, { "epoch": 1.1225670699631773, "grad_norm": 2.0127573013305664, "learning_rate": 4.611221721504189e-06, "loss": 0.9371, "step": 2134 }, { "epoch": 1.1230931088900578, "grad_norm": 2.0172536373138428, "learning_rate": 4.6108495912190435e-06, "loss": 0.9495, "step": 2135 }, { "epoch": 1.1236191478169384, "grad_norm": 2.1512820720672607, "learning_rate": 4.610477297952114e-06, "loss": 0.9608, "step": 2136 }, { "epoch": 1.124145186743819, "grad_norm": 1.8832893371582031, "learning_rate": 4.610104841732145e-06, "loss": 0.9251, "step": 2137 }, { "epoch": 1.1246712256706997, "grad_norm": 1.8792293071746826, "learning_rate": 4.6097322225878945e-06, "loss": 0.9487, "step": 2138 }, { "epoch": 1.1251972645975803, "grad_norm": 2.035464286804199, "learning_rate": 4.609359440548133e-06, "loss": 0.9327, "step": 2139 }, { "epoch": 1.125723303524461, "grad_norm": 2.070833683013916, "learning_rate": 4.6089864956416445e-06, "loss": 0.9014, "step": 2140 }, { "epoch": 1.1262493424513413, "grad_norm": 2.0978243350982666, "learning_rate": 4.608613387897223e-06, "loss": 0.9477, "step": 2141 }, { "epoch": 1.126775381378222, "grad_norm": 2.3318967819213867, "learning_rate": 4.608240117343677e-06, "loss": 0.9646, "step": 2142 }, { "epoch": 1.1273014203051026, "grad_norm": 2.1404736042022705, "learning_rate": 4.6078666840098275e-06, "loss": 0.9688, "step": 2143 }, { "epoch": 1.1278274592319832, "grad_norm": 2.244619846343994, "learning_rate": 4.607493087924508e-06, "loss": 0.9718, "step": 2144 }, { "epoch": 1.1283534981588637, "grad_norm": 2.287501335144043, "learning_rate": 4.607119329116565e-06, "loss": 0.9263, "step": 2145 }, { "epoch": 1.1288795370857443, "grad_norm": 1.9234886169433594, "learning_rate": 4.606745407614856e-06, "loss": 0.8622, "step": 2146 }, { "epoch": 1.129405576012625, "grad_norm": 2.1752331256866455, "learning_rate": 4.606371323448252e-06, "loss": 1.0183, "step": 2147 }, { "epoch": 1.1299316149395056, "grad_norm": 2.0048513412475586, "learning_rate": 4.605997076645638e-06, "loss": 0.9563, "step": 2148 }, { "epoch": 1.1304576538663862, "grad_norm": 2.272096633911133, "learning_rate": 4.605622667235907e-06, "loss": 1.0203, "step": 2149 }, { "epoch": 1.1309836927932668, "grad_norm": 2.023000717163086, "learning_rate": 4.6052480952479715e-06, "loss": 0.9408, "step": 2150 }, { "epoch": 1.1315097317201472, "grad_norm": 2.0389719009399414, "learning_rate": 4.604873360710751e-06, "loss": 0.9345, "step": 2151 }, { "epoch": 1.1320357706470279, "grad_norm": 2.1599810123443604, "learning_rate": 4.604498463653179e-06, "loss": 0.9609, "step": 2152 }, { "epoch": 1.1325618095739085, "grad_norm": 2.0829246044158936, "learning_rate": 4.604123404104202e-06, "loss": 0.9489, "step": 2153 }, { "epoch": 1.1330878485007891, "grad_norm": 2.0154478549957275, "learning_rate": 4.603748182092779e-06, "loss": 0.9404, "step": 2154 }, { "epoch": 1.1336138874276696, "grad_norm": 1.971787452697754, "learning_rate": 4.603372797647882e-06, "loss": 0.889, "step": 2155 }, { "epoch": 1.1341399263545502, "grad_norm": 2.367708206176758, "learning_rate": 4.602997250798494e-06, "loss": 0.9641, "step": 2156 }, { "epoch": 1.1346659652814308, "grad_norm": 1.9592807292938232, "learning_rate": 4.602621541573613e-06, "loss": 0.9394, "step": 2157 }, { "epoch": 1.1351920042083115, "grad_norm": 2.111332416534424, "learning_rate": 4.602245670002246e-06, "loss": 0.9458, "step": 2158 }, { "epoch": 1.135718043135192, "grad_norm": 2.0517547130584717, "learning_rate": 4.601869636113416e-06, "loss": 0.9487, "step": 2159 }, { "epoch": 1.1362440820620725, "grad_norm": 2.054208993911743, "learning_rate": 4.601493439936156e-06, "loss": 0.9189, "step": 2160 }, { "epoch": 1.1367701209889531, "grad_norm": 2.1155214309692383, "learning_rate": 4.601117081499515e-06, "loss": 0.9308, "step": 2161 }, { "epoch": 1.1372961599158338, "grad_norm": 2.1389827728271484, "learning_rate": 4.600740560832551e-06, "loss": 0.9746, "step": 2162 }, { "epoch": 1.1378221988427144, "grad_norm": 1.9715708494186401, "learning_rate": 4.600363877964334e-06, "loss": 0.9658, "step": 2163 }, { "epoch": 1.138348237769595, "grad_norm": 2.169759750366211, "learning_rate": 4.599987032923949e-06, "loss": 0.9543, "step": 2164 }, { "epoch": 1.1388742766964755, "grad_norm": 1.9511892795562744, "learning_rate": 4.599610025740494e-06, "loss": 0.972, "step": 2165 }, { "epoch": 1.139400315623356, "grad_norm": 2.095147132873535, "learning_rate": 4.599232856443078e-06, "loss": 0.9247, "step": 2166 }, { "epoch": 1.1399263545502367, "grad_norm": 2.0037872791290283, "learning_rate": 4.5988555250608225e-06, "loss": 0.9414, "step": 2167 }, { "epoch": 1.1404523934771174, "grad_norm": 1.9899228811264038, "learning_rate": 4.598478031622862e-06, "loss": 0.9716, "step": 2168 }, { "epoch": 1.140978432403998, "grad_norm": 2.0828182697296143, "learning_rate": 4.598100376158342e-06, "loss": 0.9108, "step": 2169 }, { "epoch": 1.1415044713308784, "grad_norm": 2.2328367233276367, "learning_rate": 4.597722558696424e-06, "loss": 0.9618, "step": 2170 }, { "epoch": 1.142030510257759, "grad_norm": 1.9552658796310425, "learning_rate": 4.5973445792662776e-06, "loss": 0.9713, "step": 2171 }, { "epoch": 1.1425565491846397, "grad_norm": 1.9701144695281982, "learning_rate": 4.596966437897089e-06, "loss": 0.9383, "step": 2172 }, { "epoch": 1.1430825881115203, "grad_norm": 2.198983907699585, "learning_rate": 4.596588134618054e-06, "loss": 0.9355, "step": 2173 }, { "epoch": 1.143608627038401, "grad_norm": 2.20420503616333, "learning_rate": 4.596209669458383e-06, "loss": 0.9992, "step": 2174 }, { "epoch": 1.1441346659652813, "grad_norm": 2.103262186050415, "learning_rate": 4.595831042447296e-06, "loss": 0.966, "step": 2175 }, { "epoch": 1.144660704892162, "grad_norm": 2.0620105266571045, "learning_rate": 4.595452253614029e-06, "loss": 0.9393, "step": 2176 }, { "epoch": 1.1451867438190426, "grad_norm": 2.0859262943267822, "learning_rate": 4.595073302987828e-06, "loss": 0.9824, "step": 2177 }, { "epoch": 1.1457127827459233, "grad_norm": 2.04518985748291, "learning_rate": 4.594694190597953e-06, "loss": 0.9389, "step": 2178 }, { "epoch": 1.1462388216728039, "grad_norm": 1.9948362112045288, "learning_rate": 4.594314916473676e-06, "loss": 0.9434, "step": 2179 }, { "epoch": 1.1467648605996843, "grad_norm": 2.104213237762451, "learning_rate": 4.59393548064428e-06, "loss": 0.9765, "step": 2180 }, { "epoch": 1.147290899526565, "grad_norm": 2.026655673980713, "learning_rate": 4.593555883139062e-06, "loss": 0.9489, "step": 2181 }, { "epoch": 1.1478169384534456, "grad_norm": 2.111635446548462, "learning_rate": 4.593176123987333e-06, "loss": 0.9549, "step": 2182 }, { "epoch": 1.1483429773803262, "grad_norm": 2.0284945964813232, "learning_rate": 4.592796203218413e-06, "loss": 0.9621, "step": 2183 }, { "epoch": 1.1488690163072066, "grad_norm": 2.026057243347168, "learning_rate": 4.592416120861637e-06, "loss": 0.9412, "step": 2184 }, { "epoch": 1.1493950552340872, "grad_norm": 2.0450279712677, "learning_rate": 4.592035876946351e-06, "loss": 0.939, "step": 2185 }, { "epoch": 1.1499210941609679, "grad_norm": 2.177182674407959, "learning_rate": 4.591655471501915e-06, "loss": 0.9546, "step": 2186 }, { "epoch": 1.1504471330878485, "grad_norm": 2.2185943126678467, "learning_rate": 4.591274904557701e-06, "loss": 0.9264, "step": 2187 }, { "epoch": 1.1509731720147292, "grad_norm": 2.103032350540161, "learning_rate": 4.590894176143092e-06, "loss": 0.9784, "step": 2188 }, { "epoch": 1.1514992109416098, "grad_norm": 2.1525919437408447, "learning_rate": 4.590513286287485e-06, "loss": 0.9442, "step": 2189 }, { "epoch": 1.1520252498684902, "grad_norm": 2.0014309883117676, "learning_rate": 4.5901322350202894e-06, "loss": 0.9326, "step": 2190 }, { "epoch": 1.1525512887953708, "grad_norm": 2.0484871864318848, "learning_rate": 4.589751022370926e-06, "loss": 0.9489, "step": 2191 }, { "epoch": 1.1530773277222515, "grad_norm": 2.1447575092315674, "learning_rate": 4.58936964836883e-06, "loss": 0.9073, "step": 2192 }, { "epoch": 1.153603366649132, "grad_norm": 2.328009605407715, "learning_rate": 4.588988113043448e-06, "loss": 0.9433, "step": 2193 }, { "epoch": 1.1541294055760125, "grad_norm": 2.051511526107788, "learning_rate": 4.5886064164242364e-06, "loss": 0.954, "step": 2194 }, { "epoch": 1.1546554445028931, "grad_norm": 2.137183904647827, "learning_rate": 4.588224558540668e-06, "loss": 0.9705, "step": 2195 }, { "epoch": 1.1551814834297738, "grad_norm": 2.0305912494659424, "learning_rate": 4.587842539422228e-06, "loss": 0.988, "step": 2196 }, { "epoch": 1.1557075223566544, "grad_norm": 2.0433285236358643, "learning_rate": 4.587460359098411e-06, "loss": 0.9428, "step": 2197 }, { "epoch": 1.156233561283535, "grad_norm": 2.0450432300567627, "learning_rate": 4.587078017598726e-06, "loss": 0.9209, "step": 2198 }, { "epoch": 1.1567596002104157, "grad_norm": 2.0686779022216797, "learning_rate": 4.5866955149526945e-06, "loss": 0.937, "step": 2199 }, { "epoch": 1.157285639137296, "grad_norm": 2.069709300994873, "learning_rate": 4.58631285118985e-06, "loss": 0.9374, "step": 2200 }, { "epoch": 1.1578116780641767, "grad_norm": 1.9677889347076416, "learning_rate": 4.585930026339738e-06, "loss": 0.9577, "step": 2201 }, { "epoch": 1.1583377169910574, "grad_norm": 2.0871002674102783, "learning_rate": 4.585547040431918e-06, "loss": 0.9573, "step": 2202 }, { "epoch": 1.158863755917938, "grad_norm": 2.0991642475128174, "learning_rate": 4.585163893495961e-06, "loss": 0.9567, "step": 2203 }, { "epoch": 1.1593897948448184, "grad_norm": 2.0081803798675537, "learning_rate": 4.584780585561448e-06, "loss": 0.9739, "step": 2204 }, { "epoch": 1.159915833771699, "grad_norm": 2.147531032562256, "learning_rate": 4.584397116657977e-06, "loss": 0.989, "step": 2205 }, { "epoch": 1.1604418726985797, "grad_norm": 2.0847082138061523, "learning_rate": 4.584013486815155e-06, "loss": 0.9587, "step": 2206 }, { "epoch": 1.1609679116254603, "grad_norm": 2.165778160095215, "learning_rate": 4.583629696062604e-06, "loss": 0.9763, "step": 2207 }, { "epoch": 1.161493950552341, "grad_norm": 1.9898335933685303, "learning_rate": 4.583245744429956e-06, "loss": 0.9406, "step": 2208 }, { "epoch": 1.1620199894792216, "grad_norm": 2.101166248321533, "learning_rate": 4.582861631946857e-06, "loss": 1.0025, "step": 2209 }, { "epoch": 1.162546028406102, "grad_norm": 2.094820022583008, "learning_rate": 4.582477358642964e-06, "loss": 0.9349, "step": 2210 }, { "epoch": 1.1630720673329826, "grad_norm": 2.1106255054473877, "learning_rate": 4.582092924547948e-06, "loss": 0.9975, "step": 2211 }, { "epoch": 1.1635981062598633, "grad_norm": 2.1793222427368164, "learning_rate": 4.581708329691493e-06, "loss": 0.9982, "step": 2212 }, { "epoch": 1.164124145186744, "grad_norm": 2.1435718536376953, "learning_rate": 4.581323574103291e-06, "loss": 0.8832, "step": 2213 }, { "epoch": 1.1646501841136243, "grad_norm": 1.996934175491333, "learning_rate": 4.580938657813052e-06, "loss": 0.8712, "step": 2214 }, { "epoch": 1.165176223040505, "grad_norm": 1.928216814994812, "learning_rate": 4.580553580850495e-06, "loss": 0.9186, "step": 2215 }, { "epoch": 1.1657022619673856, "grad_norm": 2.129814386367798, "learning_rate": 4.580168343245354e-06, "loss": 0.9258, "step": 2216 }, { "epoch": 1.1662283008942662, "grad_norm": 2.2137105464935303, "learning_rate": 4.579782945027371e-06, "loss": 0.9124, "step": 2217 }, { "epoch": 1.1667543398211468, "grad_norm": 2.036886692047119, "learning_rate": 4.5793973862263045e-06, "loss": 0.9172, "step": 2218 }, { "epoch": 1.1672803787480273, "grad_norm": 2.0155868530273438, "learning_rate": 4.579011666871924e-06, "loss": 0.9145, "step": 2219 }, { "epoch": 1.167806417674908, "grad_norm": 2.001176118850708, "learning_rate": 4.5786257869940125e-06, "loss": 0.9237, "step": 2220 }, { "epoch": 1.1683324566017885, "grad_norm": 2.418034791946411, "learning_rate": 4.578239746622363e-06, "loss": 0.9415, "step": 2221 }, { "epoch": 1.1688584955286692, "grad_norm": 2.069413423538208, "learning_rate": 4.577853545786782e-06, "loss": 0.9585, "step": 2222 }, { "epoch": 1.1693845344555498, "grad_norm": 2.1006906032562256, "learning_rate": 4.577467184517089e-06, "loss": 1.0138, "step": 2223 }, { "epoch": 1.1699105733824302, "grad_norm": 2.1174235343933105, "learning_rate": 4.577080662843117e-06, "loss": 0.9108, "step": 2224 }, { "epoch": 1.1704366123093108, "grad_norm": 2.51448392868042, "learning_rate": 4.576693980794708e-06, "loss": 1.0006, "step": 2225 }, { "epoch": 1.1709626512361915, "grad_norm": 2.03072190284729, "learning_rate": 4.57630713840172e-06, "loss": 0.9314, "step": 2226 }, { "epoch": 1.171488690163072, "grad_norm": 2.079005002975464, "learning_rate": 4.575920135694019e-06, "loss": 0.9023, "step": 2227 }, { "epoch": 1.1720147290899527, "grad_norm": 1.9575252532958984, "learning_rate": 4.575532972701488e-06, "loss": 0.9384, "step": 2228 }, { "epoch": 1.1725407680168332, "grad_norm": 2.0194175243377686, "learning_rate": 4.575145649454021e-06, "loss": 0.969, "step": 2229 }, { "epoch": 1.1730668069437138, "grad_norm": 2.0641977787017822, "learning_rate": 4.574758165981523e-06, "loss": 0.9374, "step": 2230 }, { "epoch": 1.1735928458705944, "grad_norm": 2.7739837169647217, "learning_rate": 4.5743705223139115e-06, "loss": 0.9322, "step": 2231 }, { "epoch": 1.174118884797475, "grad_norm": 2.1928372383117676, "learning_rate": 4.573982718481117e-06, "loss": 0.8982, "step": 2232 }, { "epoch": 1.1746449237243557, "grad_norm": 1.8924100399017334, "learning_rate": 4.573594754513083e-06, "loss": 0.9313, "step": 2233 }, { "epoch": 1.175170962651236, "grad_norm": 2.0933282375335693, "learning_rate": 4.573206630439766e-06, "loss": 0.9171, "step": 2234 }, { "epoch": 1.1756970015781167, "grad_norm": 2.0586800575256348, "learning_rate": 4.572818346291133e-06, "loss": 0.9756, "step": 2235 }, { "epoch": 1.1762230405049974, "grad_norm": 1.9426237344741821, "learning_rate": 4.572429902097161e-06, "loss": 0.8887, "step": 2236 }, { "epoch": 1.176749079431878, "grad_norm": 1.9896386861801147, "learning_rate": 4.572041297887846e-06, "loss": 0.927, "step": 2237 }, { "epoch": 1.1772751183587586, "grad_norm": 2.138934373855591, "learning_rate": 4.571652533693192e-06, "loss": 0.9835, "step": 2238 }, { "epoch": 1.177801157285639, "grad_norm": 2.141348361968994, "learning_rate": 4.571263609543215e-06, "loss": 0.9423, "step": 2239 }, { "epoch": 1.1783271962125197, "grad_norm": 2.3880257606506348, "learning_rate": 4.570874525467945e-06, "loss": 0.9188, "step": 2240 }, { "epoch": 1.1788532351394003, "grad_norm": 2.104442834854126, "learning_rate": 4.570485281497423e-06, "loss": 0.937, "step": 2241 }, { "epoch": 1.179379274066281, "grad_norm": 2.215341329574585, "learning_rate": 4.570095877661704e-06, "loss": 0.9318, "step": 2242 }, { "epoch": 1.1799053129931614, "grad_norm": 2.0575337409973145, "learning_rate": 4.569706313990854e-06, "loss": 0.9187, "step": 2243 }, { "epoch": 1.180431351920042, "grad_norm": 2.2835347652435303, "learning_rate": 4.569316590514952e-06, "loss": 0.9305, "step": 2244 }, { "epoch": 1.1809573908469226, "grad_norm": 2.066976308822632, "learning_rate": 4.56892670726409e-06, "loss": 0.9258, "step": 2245 }, { "epoch": 1.1814834297738033, "grad_norm": 2.0150883197784424, "learning_rate": 4.568536664268369e-06, "loss": 0.9259, "step": 2246 }, { "epoch": 1.182009468700684, "grad_norm": 1.9549976587295532, "learning_rate": 4.568146461557908e-06, "loss": 0.9753, "step": 2247 }, { "epoch": 1.1825355076275645, "grad_norm": 2.251574993133545, "learning_rate": 4.5677560991628326e-06, "loss": 1.0101, "step": 2248 }, { "epoch": 1.183061546554445, "grad_norm": 2.047912836074829, "learning_rate": 4.5673655771132835e-06, "loss": 0.9074, "step": 2249 }, { "epoch": 1.1835875854813256, "grad_norm": 2.1108009815216064, "learning_rate": 4.566974895439414e-06, "loss": 0.9252, "step": 2250 }, { "epoch": 1.1841136244082062, "grad_norm": 2.06037974357605, "learning_rate": 4.566584054171391e-06, "loss": 0.9745, "step": 2251 }, { "epoch": 1.1846396633350869, "grad_norm": 2.164132595062256, "learning_rate": 4.566193053339389e-06, "loss": 0.945, "step": 2252 }, { "epoch": 1.1851657022619673, "grad_norm": 2.1002984046936035, "learning_rate": 4.565801892973599e-06, "loss": 0.9341, "step": 2253 }, { "epoch": 1.185691741188848, "grad_norm": 2.0745604038238525, "learning_rate": 4.565410573104223e-06, "loss": 0.9163, "step": 2254 }, { "epoch": 1.1862177801157285, "grad_norm": 2.0689234733581543, "learning_rate": 4.565019093761476e-06, "loss": 0.9619, "step": 2255 }, { "epoch": 1.1867438190426092, "grad_norm": 1.8939684629440308, "learning_rate": 4.564627454975583e-06, "loss": 0.9117, "step": 2256 }, { "epoch": 1.1872698579694898, "grad_norm": 2.0398480892181396, "learning_rate": 4.564235656776784e-06, "loss": 0.9354, "step": 2257 }, { "epoch": 1.1877958968963704, "grad_norm": 2.2121827602386475, "learning_rate": 4.563843699195331e-06, "loss": 0.9898, "step": 2258 }, { "epoch": 1.1883219358232509, "grad_norm": 2.0941290855407715, "learning_rate": 4.563451582261488e-06, "loss": 0.9359, "step": 2259 }, { "epoch": 1.1888479747501315, "grad_norm": 1.958443522453308, "learning_rate": 4.5630593060055285e-06, "loss": 0.8915, "step": 2260 }, { "epoch": 1.1893740136770121, "grad_norm": 2.125643014907837, "learning_rate": 4.562666870457742e-06, "loss": 0.9695, "step": 2261 }, { "epoch": 1.1899000526038928, "grad_norm": 2.0711584091186523, "learning_rate": 4.56227427564843e-06, "loss": 0.9895, "step": 2262 }, { "epoch": 1.1904260915307732, "grad_norm": 1.9734212160110474, "learning_rate": 4.561881521607905e-06, "loss": 0.9276, "step": 2263 }, { "epoch": 1.1909521304576538, "grad_norm": 1.9757225513458252, "learning_rate": 4.561488608366491e-06, "loss": 0.933, "step": 2264 }, { "epoch": 1.1914781693845344, "grad_norm": 2.0021965503692627, "learning_rate": 4.561095535954526e-06, "loss": 0.9517, "step": 2265 }, { "epoch": 1.192004208311415, "grad_norm": 2.054388999938965, "learning_rate": 4.560702304402359e-06, "loss": 0.9494, "step": 2266 }, { "epoch": 1.1925302472382957, "grad_norm": 2.0927810668945312, "learning_rate": 4.560308913740354e-06, "loss": 0.9814, "step": 2267 }, { "epoch": 1.1930562861651763, "grad_norm": 2.0837948322296143, "learning_rate": 4.559915363998883e-06, "loss": 0.9452, "step": 2268 }, { "epoch": 1.1935823250920568, "grad_norm": 2.0685925483703613, "learning_rate": 4.559521655208334e-06, "loss": 0.9683, "step": 2269 }, { "epoch": 1.1941083640189374, "grad_norm": 2.033249855041504, "learning_rate": 4.559127787399106e-06, "loss": 0.9251, "step": 2270 }, { "epoch": 1.194634402945818, "grad_norm": 2.0225706100463867, "learning_rate": 4.558733760601608e-06, "loss": 0.944, "step": 2271 }, { "epoch": 1.1951604418726987, "grad_norm": 2.263442039489746, "learning_rate": 4.558339574846265e-06, "loss": 0.9721, "step": 2272 }, { "epoch": 1.195686480799579, "grad_norm": 1.9405720233917236, "learning_rate": 4.557945230163514e-06, "loss": 0.9235, "step": 2273 }, { "epoch": 1.1962125197264597, "grad_norm": 2.0711405277252197, "learning_rate": 4.5575507265838e-06, "loss": 0.9106, "step": 2274 }, { "epoch": 1.1967385586533403, "grad_norm": 1.9209696054458618, "learning_rate": 4.557156064137585e-06, "loss": 0.9504, "step": 2275 }, { "epoch": 1.197264597580221, "grad_norm": 1.9152365922927856, "learning_rate": 4.5567612428553414e-06, "loss": 0.9134, "step": 2276 }, { "epoch": 1.1977906365071016, "grad_norm": 2.214308738708496, "learning_rate": 4.556366262767554e-06, "loss": 1.0137, "step": 2277 }, { "epoch": 1.1983166754339822, "grad_norm": 1.9821407794952393, "learning_rate": 4.555971123904719e-06, "loss": 0.9552, "step": 2278 }, { "epoch": 1.1988427143608626, "grad_norm": 1.909018874168396, "learning_rate": 4.555575826297346e-06, "loss": 0.911, "step": 2279 }, { "epoch": 1.1993687532877433, "grad_norm": 1.9589500427246094, "learning_rate": 4.555180369975956e-06, "loss": 0.9533, "step": 2280 }, { "epoch": 1.199894792214624, "grad_norm": 1.9503285884857178, "learning_rate": 4.554784754971085e-06, "loss": 0.8835, "step": 2281 }, { "epoch": 1.2004208311415046, "grad_norm": 2.0783655643463135, "learning_rate": 4.554388981313275e-06, "loss": 0.9974, "step": 2282 }, { "epoch": 1.200946870068385, "grad_norm": 2.1446335315704346, "learning_rate": 4.553993049033088e-06, "loss": 0.9428, "step": 2283 }, { "epoch": 1.2014729089952656, "grad_norm": 1.9783402681350708, "learning_rate": 4.553596958161093e-06, "loss": 0.8851, "step": 2284 }, { "epoch": 1.2019989479221462, "grad_norm": 2.1446151733398438, "learning_rate": 4.553200708727873e-06, "loss": 0.9231, "step": 2285 }, { "epoch": 1.2025249868490269, "grad_norm": 1.9833011627197266, "learning_rate": 4.552804300764022e-06, "loss": 0.9165, "step": 2286 }, { "epoch": 1.2030510257759075, "grad_norm": 2.1723484992980957, "learning_rate": 4.55240773430015e-06, "loss": 0.9263, "step": 2287 }, { "epoch": 1.203577064702788, "grad_norm": 2.1445460319519043, "learning_rate": 4.552011009366873e-06, "loss": 0.9686, "step": 2288 }, { "epoch": 1.2041031036296685, "grad_norm": 2.0098588466644287, "learning_rate": 4.551614125994824e-06, "loss": 0.9675, "step": 2289 }, { "epoch": 1.2046291425565492, "grad_norm": 2.2001070976257324, "learning_rate": 4.551217084214649e-06, "loss": 1.0049, "step": 2290 }, { "epoch": 1.2051551814834298, "grad_norm": 1.9142197370529175, "learning_rate": 4.550819884057001e-06, "loss": 0.8986, "step": 2291 }, { "epoch": 1.2056812204103105, "grad_norm": 2.1119284629821777, "learning_rate": 4.55042252555255e-06, "loss": 0.9571, "step": 2292 }, { "epoch": 1.2062072593371909, "grad_norm": 2.2181107997894287, "learning_rate": 4.550025008731977e-06, "loss": 0.9761, "step": 2293 }, { "epoch": 1.2067332982640715, "grad_norm": 1.9644392728805542, "learning_rate": 4.549627333625975e-06, "loss": 0.9717, "step": 2294 }, { "epoch": 1.2072593371909521, "grad_norm": 2.106912851333618, "learning_rate": 4.549229500265246e-06, "loss": 1.0127, "step": 2295 }, { "epoch": 1.2077853761178328, "grad_norm": 2.0657334327697754, "learning_rate": 4.548831508680511e-06, "loss": 0.9563, "step": 2296 }, { "epoch": 1.2083114150447134, "grad_norm": 1.9914659261703491, "learning_rate": 4.548433358902499e-06, "loss": 0.9604, "step": 2297 }, { "epoch": 1.2088374539715938, "grad_norm": 2.0038280487060547, "learning_rate": 4.54803505096195e-06, "loss": 0.9058, "step": 2298 }, { "epoch": 1.2093634928984744, "grad_norm": 2.0413012504577637, "learning_rate": 4.547636584889619e-06, "loss": 0.9904, "step": 2299 }, { "epoch": 1.209889531825355, "grad_norm": 2.122159719467163, "learning_rate": 4.547237960716272e-06, "loss": 0.9509, "step": 2300 }, { "epoch": 1.2104155707522357, "grad_norm": 1.907065510749817, "learning_rate": 4.546839178472688e-06, "loss": 0.8814, "step": 2301 }, { "epoch": 1.2109416096791163, "grad_norm": 1.9999891519546509, "learning_rate": 4.5464402381896565e-06, "loss": 0.9119, "step": 2302 }, { "epoch": 1.2114676486059968, "grad_norm": 2.095425844192505, "learning_rate": 4.546041139897981e-06, "loss": 0.9855, "step": 2303 }, { "epoch": 1.2119936875328774, "grad_norm": 2.1287622451782227, "learning_rate": 4.545641883628475e-06, "loss": 0.9959, "step": 2304 }, { "epoch": 1.212519726459758, "grad_norm": 2.00107479095459, "learning_rate": 4.545242469411968e-06, "loss": 0.9404, "step": 2305 }, { "epoch": 1.2130457653866387, "grad_norm": 2.1348764896392822, "learning_rate": 4.544842897279298e-06, "loss": 0.9103, "step": 2306 }, { "epoch": 1.2135718043135193, "grad_norm": 2.061293125152588, "learning_rate": 4.544443167261317e-06, "loss": 0.9234, "step": 2307 }, { "epoch": 1.2140978432403997, "grad_norm": 2.0079479217529297, "learning_rate": 4.544043279388887e-06, "loss": 0.962, "step": 2308 }, { "epoch": 1.2146238821672803, "grad_norm": 2.0040595531463623, "learning_rate": 4.543643233692887e-06, "loss": 0.9185, "step": 2309 }, { "epoch": 1.215149921094161, "grad_norm": 1.9917961359024048, "learning_rate": 4.543243030204203e-06, "loss": 0.9079, "step": 2310 }, { "epoch": 1.2156759600210416, "grad_norm": 2.262044906616211, "learning_rate": 4.5428426689537355e-06, "loss": 0.9455, "step": 2311 }, { "epoch": 1.216201998947922, "grad_norm": 2.0358335971832275, "learning_rate": 4.5424421499723974e-06, "loss": 0.9794, "step": 2312 }, { "epoch": 1.2167280378748027, "grad_norm": 2.1631860733032227, "learning_rate": 4.542041473291113e-06, "loss": 0.966, "step": 2313 }, { "epoch": 1.2172540768016833, "grad_norm": 2.0367043018341064, "learning_rate": 4.54164063894082e-06, "loss": 0.9377, "step": 2314 }, { "epoch": 1.217780115728564, "grad_norm": 2.015761613845825, "learning_rate": 4.541239646952466e-06, "loss": 0.9207, "step": 2315 }, { "epoch": 1.2183061546554446, "grad_norm": 1.893565058708191, "learning_rate": 4.540838497357014e-06, "loss": 0.963, "step": 2316 }, { "epoch": 1.2188321935823252, "grad_norm": 2.026319742202759, "learning_rate": 4.540437190185435e-06, "loss": 0.9695, "step": 2317 }, { "epoch": 1.2193582325092056, "grad_norm": 2.119520664215088, "learning_rate": 4.540035725468718e-06, "loss": 0.9977, "step": 2318 }, { "epoch": 1.2198842714360862, "grad_norm": 1.9087727069854736, "learning_rate": 4.5396341032378564e-06, "loss": 0.9048, "step": 2319 }, { "epoch": 1.2204103103629669, "grad_norm": 1.9850622415542603, "learning_rate": 4.539232323523862e-06, "loss": 0.9819, "step": 2320 }, { "epoch": 1.2209363492898475, "grad_norm": 1.9811348915100098, "learning_rate": 4.538830386357759e-06, "loss": 0.9397, "step": 2321 }, { "epoch": 1.221462388216728, "grad_norm": 2.0679426193237305, "learning_rate": 4.538428291770578e-06, "loss": 0.9311, "step": 2322 }, { "epoch": 1.2219884271436086, "grad_norm": 2.18583345413208, "learning_rate": 4.538026039793366e-06, "loss": 0.9666, "step": 2323 }, { "epoch": 1.2225144660704892, "grad_norm": 2.1667392253875732, "learning_rate": 4.537623630457184e-06, "loss": 0.9855, "step": 2324 }, { "epoch": 1.2230405049973698, "grad_norm": 2.158278465270996, "learning_rate": 4.5372210637931e-06, "loss": 0.9479, "step": 2325 }, { "epoch": 1.2235665439242505, "grad_norm": 2.0774621963500977, "learning_rate": 4.536818339832197e-06, "loss": 0.9234, "step": 2326 }, { "epoch": 1.224092582851131, "grad_norm": 2.0122995376586914, "learning_rate": 4.536415458605572e-06, "loss": 0.982, "step": 2327 }, { "epoch": 1.2246186217780115, "grad_norm": 1.9868342876434326, "learning_rate": 4.53601242014433e-06, "loss": 0.9153, "step": 2328 }, { "epoch": 1.2251446607048921, "grad_norm": 1.9843225479125977, "learning_rate": 4.535609224479591e-06, "loss": 0.944, "step": 2329 }, { "epoch": 1.2256706996317728, "grad_norm": 2.0100514888763428, "learning_rate": 4.5352058716424855e-06, "loss": 0.9057, "step": 2330 }, { "epoch": 1.2261967385586534, "grad_norm": 1.9764091968536377, "learning_rate": 4.534802361664158e-06, "loss": 0.8985, "step": 2331 }, { "epoch": 1.2267227774855338, "grad_norm": 2.1023499965667725, "learning_rate": 4.534398694575764e-06, "loss": 0.8647, "step": 2332 }, { "epoch": 1.2272488164124145, "grad_norm": 2.141122579574585, "learning_rate": 4.53399487040847e-06, "loss": 0.9913, "step": 2333 }, { "epoch": 1.227774855339295, "grad_norm": 2.193509817123413, "learning_rate": 4.533590889193457e-06, "loss": 0.9396, "step": 2334 }, { "epoch": 1.2283008942661757, "grad_norm": 2.07718563079834, "learning_rate": 4.533186750961917e-06, "loss": 0.9662, "step": 2335 }, { "epoch": 1.2288269331930564, "grad_norm": 2.5485520362854004, "learning_rate": 4.532782455745054e-06, "loss": 0.9923, "step": 2336 }, { "epoch": 1.229352972119937, "grad_norm": 2.1097700595855713, "learning_rate": 4.532378003574084e-06, "loss": 0.9689, "step": 2337 }, { "epoch": 1.2298790110468174, "grad_norm": 2.0095794200897217, "learning_rate": 4.5319733944802345e-06, "loss": 0.956, "step": 2338 }, { "epoch": 1.230405049973698, "grad_norm": 1.9793727397918701, "learning_rate": 4.531568628494748e-06, "loss": 0.8748, "step": 2339 }, { "epoch": 1.2309310889005787, "grad_norm": 1.988430142402649, "learning_rate": 4.531163705648875e-06, "loss": 0.9903, "step": 2340 }, { "epoch": 1.2314571278274593, "grad_norm": 2.1825473308563232, "learning_rate": 4.530758625973882e-06, "loss": 0.9517, "step": 2341 }, { "epoch": 1.2319831667543397, "grad_norm": 2.120129108428955, "learning_rate": 4.530353389501045e-06, "loss": 0.9399, "step": 2342 }, { "epoch": 1.2325092056812204, "grad_norm": 2.168637990951538, "learning_rate": 4.529947996261652e-06, "loss": 0.9493, "step": 2343 }, { "epoch": 1.233035244608101, "grad_norm": 1.9897968769073486, "learning_rate": 4.529542446287005e-06, "loss": 0.963, "step": 2344 }, { "epoch": 1.2335612835349816, "grad_norm": 2.204454183578491, "learning_rate": 4.529136739608418e-06, "loss": 0.9177, "step": 2345 }, { "epoch": 1.2340873224618623, "grad_norm": 1.9778000116348267, "learning_rate": 4.5287308762572135e-06, "loss": 0.9425, "step": 2346 }, { "epoch": 1.234613361388743, "grad_norm": 2.2127273082733154, "learning_rate": 4.528324856264731e-06, "loss": 0.9161, "step": 2347 }, { "epoch": 1.2351394003156233, "grad_norm": 2.1865501403808594, "learning_rate": 4.52791867966232e-06, "loss": 0.9622, "step": 2348 }, { "epoch": 1.235665439242504, "grad_norm": 2.1409499645233154, "learning_rate": 4.527512346481341e-06, "loss": 0.9043, "step": 2349 }, { "epoch": 1.2361914781693846, "grad_norm": 2.0245275497436523, "learning_rate": 4.527105856753168e-06, "loss": 0.8997, "step": 2350 }, { "epoch": 1.2367175170962652, "grad_norm": 2.031557559967041, "learning_rate": 4.526699210509186e-06, "loss": 0.9467, "step": 2351 }, { "epoch": 1.2372435560231456, "grad_norm": 1.9724940061569214, "learning_rate": 4.5262924077807936e-06, "loss": 0.8992, "step": 2352 }, { "epoch": 1.2377695949500263, "grad_norm": 2.120136022567749, "learning_rate": 4.5258854485994e-06, "loss": 0.9568, "step": 2353 }, { "epoch": 1.2382956338769069, "grad_norm": 2.007608413696289, "learning_rate": 4.525478332996428e-06, "loss": 0.928, "step": 2354 }, { "epoch": 1.2388216728037875, "grad_norm": 2.1604855060577393, "learning_rate": 4.525071061003311e-06, "loss": 0.9795, "step": 2355 }, { "epoch": 1.2393477117306682, "grad_norm": 2.2880077362060547, "learning_rate": 4.524663632651495e-06, "loss": 0.96, "step": 2356 }, { "epoch": 1.2398737506575486, "grad_norm": 2.1377370357513428, "learning_rate": 4.524256047972438e-06, "loss": 0.9128, "step": 2357 }, { "epoch": 1.2403997895844292, "grad_norm": 1.9751715660095215, "learning_rate": 4.523848306997611e-06, "loss": 0.9054, "step": 2358 }, { "epoch": 1.2409258285113098, "grad_norm": 2.1510539054870605, "learning_rate": 4.523440409758495e-06, "loss": 0.9007, "step": 2359 }, { "epoch": 1.2414518674381905, "grad_norm": 1.988323450088501, "learning_rate": 4.523032356286587e-06, "loss": 0.8735, "step": 2360 }, { "epoch": 1.241977906365071, "grad_norm": 2.0008699893951416, "learning_rate": 4.522624146613389e-06, "loss": 0.9891, "step": 2361 }, { "epoch": 1.2425039452919515, "grad_norm": 2.041987180709839, "learning_rate": 4.522215780770424e-06, "loss": 0.9366, "step": 2362 }, { "epoch": 1.2430299842188322, "grad_norm": 1.9101216793060303, "learning_rate": 4.521807258789219e-06, "loss": 0.8801, "step": 2363 }, { "epoch": 1.2435560231457128, "grad_norm": 2.129915475845337, "learning_rate": 4.521398580701319e-06, "loss": 0.9313, "step": 2364 }, { "epoch": 1.2440820620725934, "grad_norm": 2.062591314315796, "learning_rate": 4.520989746538277e-06, "loss": 0.9736, "step": 2365 }, { "epoch": 1.244608100999474, "grad_norm": 2.126099109649658, "learning_rate": 4.5205807563316604e-06, "loss": 1.0002, "step": 2366 }, { "epoch": 1.2451341399263545, "grad_norm": 2.038667678833008, "learning_rate": 4.520171610113049e-06, "loss": 0.9156, "step": 2367 }, { "epoch": 1.245660178853235, "grad_norm": 2.0621137619018555, "learning_rate": 4.519762307914032e-06, "loss": 0.9685, "step": 2368 }, { "epoch": 1.2461862177801157, "grad_norm": 2.080672025680542, "learning_rate": 4.519352849766212e-06, "loss": 0.9618, "step": 2369 }, { "epoch": 1.2467122567069964, "grad_norm": 2.064520835876465, "learning_rate": 4.5189432357012055e-06, "loss": 0.9454, "step": 2370 }, { "epoch": 1.247238295633877, "grad_norm": 2.018462657928467, "learning_rate": 4.518533465750638e-06, "loss": 0.9063, "step": 2371 }, { "epoch": 1.2477643345607574, "grad_norm": 1.985949158668518, "learning_rate": 4.5181235399461484e-06, "loss": 0.9448, "step": 2372 }, { "epoch": 1.248290373487638, "grad_norm": 2.068023920059204, "learning_rate": 4.517713458319389e-06, "loss": 0.9186, "step": 2373 }, { "epoch": 1.2488164124145187, "grad_norm": 2.0476534366607666, "learning_rate": 4.517303220902022e-06, "loss": 0.8944, "step": 2374 }, { "epoch": 1.2493424513413993, "grad_norm": 2.1194283962249756, "learning_rate": 4.516892827725722e-06, "loss": 0.9273, "step": 2375 }, { "epoch": 1.24986849026828, "grad_norm": 2.097421884536743, "learning_rate": 4.516482278822177e-06, "loss": 0.9247, "step": 2376 }, { "epoch": 1.2503945291951604, "grad_norm": 2.0427608489990234, "learning_rate": 4.516071574223085e-06, "loss": 0.9542, "step": 2377 }, { "epoch": 1.250920568122041, "grad_norm": 2.206160068511963, "learning_rate": 4.515660713960158e-06, "loss": 0.9822, "step": 2378 }, { "epoch": 1.2514466070489216, "grad_norm": 2.2046313285827637, "learning_rate": 4.515249698065118e-06, "loss": 0.966, "step": 2379 }, { "epoch": 1.2519726459758023, "grad_norm": 2.1513445377349854, "learning_rate": 4.514838526569702e-06, "loss": 0.9193, "step": 2380 }, { "epoch": 1.2524986849026827, "grad_norm": 2.2664003372192383, "learning_rate": 4.514427199505655e-06, "loss": 0.9657, "step": 2381 }, { "epoch": 1.2530247238295633, "grad_norm": 2.037461280822754, "learning_rate": 4.514015716904739e-06, "loss": 0.9666, "step": 2382 }, { "epoch": 1.253550762756444, "grad_norm": 2.117765188217163, "learning_rate": 4.5136040787987225e-06, "loss": 0.9408, "step": 2383 }, { "epoch": 1.2540768016833246, "grad_norm": 2.0627427101135254, "learning_rate": 4.5131922852193884e-06, "loss": 0.9265, "step": 2384 }, { "epoch": 1.2546028406102052, "grad_norm": 2.0914230346679688, "learning_rate": 4.512780336198534e-06, "loss": 0.9483, "step": 2385 }, { "epoch": 1.2551288795370859, "grad_norm": 2.2735514640808105, "learning_rate": 4.5123682317679665e-06, "loss": 0.9732, "step": 2386 }, { "epoch": 1.2556549184639663, "grad_norm": 2.0221076011657715, "learning_rate": 4.511955971959503e-06, "loss": 0.9524, "step": 2387 }, { "epoch": 1.256180957390847, "grad_norm": 2.1264171600341797, "learning_rate": 4.511543556804977e-06, "loss": 0.9613, "step": 2388 }, { "epoch": 1.2567069963177275, "grad_norm": 2.041597604751587, "learning_rate": 4.51113098633623e-06, "loss": 0.9337, "step": 2389 }, { "epoch": 1.2572330352446082, "grad_norm": 1.8505622148513794, "learning_rate": 4.510718260585116e-06, "loss": 0.954, "step": 2390 }, { "epoch": 1.2577590741714886, "grad_norm": 1.9987319707870483, "learning_rate": 4.510305379583506e-06, "loss": 0.942, "step": 2391 }, { "epoch": 1.2582851130983692, "grad_norm": 2.174147844314575, "learning_rate": 4.509892343363276e-06, "loss": 0.9528, "step": 2392 }, { "epoch": 1.2588111520252498, "grad_norm": 2.0779876708984375, "learning_rate": 4.509479151956319e-06, "loss": 0.9426, "step": 2393 }, { "epoch": 1.2593371909521305, "grad_norm": 2.073341131210327, "learning_rate": 4.509065805394535e-06, "loss": 0.9534, "step": 2394 }, { "epoch": 1.2598632298790111, "grad_norm": 1.9890536069869995, "learning_rate": 4.508652303709844e-06, "loss": 0.9873, "step": 2395 }, { "epoch": 1.2603892688058917, "grad_norm": 1.9639276266098022, "learning_rate": 4.508238646934169e-06, "loss": 0.915, "step": 2396 }, { "epoch": 1.2609153077327722, "grad_norm": 2.0988283157348633, "learning_rate": 4.507824835099451e-06, "loss": 0.9225, "step": 2397 }, { "epoch": 1.2614413466596528, "grad_norm": 2.0186917781829834, "learning_rate": 4.50741086823764e-06, "loss": 0.9535, "step": 2398 }, { "epoch": 1.2619673855865334, "grad_norm": 2.1138482093811035, "learning_rate": 4.5069967463807e-06, "loss": 0.9428, "step": 2399 }, { "epoch": 1.262493424513414, "grad_norm": 2.1341164112091064, "learning_rate": 4.5065824695606045e-06, "loss": 0.9368, "step": 2400 }, { "epoch": 1.2630194634402945, "grad_norm": 2.0811543464660645, "learning_rate": 4.506168037809342e-06, "loss": 0.9558, "step": 2401 }, { "epoch": 1.263545502367175, "grad_norm": 2.067427158355713, "learning_rate": 4.505753451158911e-06, "loss": 0.8918, "step": 2402 }, { "epoch": 1.2640715412940557, "grad_norm": 2.081106185913086, "learning_rate": 4.505338709641321e-06, "loss": 0.9488, "step": 2403 }, { "epoch": 1.2645975802209364, "grad_norm": 1.9976449012756348, "learning_rate": 4.5049238132885966e-06, "loss": 0.956, "step": 2404 }, { "epoch": 1.265123619147817, "grad_norm": 1.9591141939163208, "learning_rate": 4.504508762132772e-06, "loss": 0.9195, "step": 2405 }, { "epoch": 1.2656496580746976, "grad_norm": 2.0610501766204834, "learning_rate": 4.504093556205893e-06, "loss": 0.9174, "step": 2406 }, { "epoch": 1.266175697001578, "grad_norm": 2.0975704193115234, "learning_rate": 4.503678195540019e-06, "loss": 0.9364, "step": 2407 }, { "epoch": 1.2667017359284587, "grad_norm": 2.036421775817871, "learning_rate": 4.503262680167221e-06, "loss": 0.855, "step": 2408 }, { "epoch": 1.2672277748553393, "grad_norm": 1.947721242904663, "learning_rate": 4.502847010119581e-06, "loss": 0.9063, "step": 2409 }, { "epoch": 1.26775381378222, "grad_norm": 2.057432174682617, "learning_rate": 4.5024311854291935e-06, "loss": 0.9356, "step": 2410 }, { "epoch": 1.2682798527091004, "grad_norm": 2.3818373680114746, "learning_rate": 4.502015206128165e-06, "loss": 0.9896, "step": 2411 }, { "epoch": 1.268805891635981, "grad_norm": 1.9357292652130127, "learning_rate": 4.501599072248614e-06, "loss": 0.927, "step": 2412 }, { "epoch": 1.2693319305628616, "grad_norm": 2.0625691413879395, "learning_rate": 4.501182783822671e-06, "loss": 0.9391, "step": 2413 }, { "epoch": 1.2698579694897423, "grad_norm": 2.3002283573150635, "learning_rate": 4.5007663408824775e-06, "loss": 0.9798, "step": 2414 }, { "epoch": 1.270384008416623, "grad_norm": 2.030616283416748, "learning_rate": 4.500349743460188e-06, "loss": 0.9863, "step": 2415 }, { "epoch": 1.2709100473435035, "grad_norm": 2.094247817993164, "learning_rate": 4.4999329915879694e-06, "loss": 0.9504, "step": 2416 }, { "epoch": 1.271436086270384, "grad_norm": 2.10866117477417, "learning_rate": 4.499516085297998e-06, "loss": 0.9585, "step": 2417 }, { "epoch": 1.2719621251972646, "grad_norm": 2.2691314220428467, "learning_rate": 4.4990990246224656e-06, "loss": 0.8979, "step": 2418 }, { "epoch": 1.2724881641241452, "grad_norm": 2.1009504795074463, "learning_rate": 4.498681809593574e-06, "loss": 0.9392, "step": 2419 }, { "epoch": 1.2730142030510256, "grad_norm": 2.042738199234009, "learning_rate": 4.498264440243534e-06, "loss": 0.8967, "step": 2420 }, { "epoch": 1.2735402419779063, "grad_norm": 2.00138521194458, "learning_rate": 4.497846916604576e-06, "loss": 0.9642, "step": 2421 }, { "epoch": 1.274066280904787, "grad_norm": 2.086472988128662, "learning_rate": 4.4974292387089334e-06, "loss": 0.9474, "step": 2422 }, { "epoch": 1.2745923198316675, "grad_norm": 1.9719960689544678, "learning_rate": 4.4970114065888585e-06, "loss": 0.9121, "step": 2423 }, { "epoch": 1.2751183587585482, "grad_norm": 2.027043342590332, "learning_rate": 4.49659342027661e-06, "loss": 0.9262, "step": 2424 }, { "epoch": 1.2756443976854288, "grad_norm": 2.1368863582611084, "learning_rate": 4.4961752798044645e-06, "loss": 0.9439, "step": 2425 }, { "epoch": 1.2761704366123094, "grad_norm": 2.1782031059265137, "learning_rate": 4.495756985204705e-06, "loss": 0.9222, "step": 2426 }, { "epoch": 1.2766964755391899, "grad_norm": 2.1297860145568848, "learning_rate": 4.49533853650963e-06, "loss": 0.8776, "step": 2427 }, { "epoch": 1.2772225144660705, "grad_norm": 2.205399990081787, "learning_rate": 4.494919933751548e-06, "loss": 0.9438, "step": 2428 }, { "epoch": 1.2777485533929511, "grad_norm": 2.076138496398926, "learning_rate": 4.494501176962779e-06, "loss": 0.94, "step": 2429 }, { "epoch": 1.2782745923198315, "grad_norm": 2.0347354412078857, "learning_rate": 4.4940822661756566e-06, "loss": 0.9011, "step": 2430 }, { "epoch": 1.2788006312467122, "grad_norm": 2.0521199703216553, "learning_rate": 4.493663201422526e-06, "loss": 0.9039, "step": 2431 }, { "epoch": 1.2793266701735928, "grad_norm": 2.13946795463562, "learning_rate": 4.493243982735742e-06, "loss": 0.9488, "step": 2432 }, { "epoch": 1.2798527091004734, "grad_norm": 2.1136245727539062, "learning_rate": 4.492824610147676e-06, "loss": 0.9265, "step": 2433 }, { "epoch": 1.280378748027354, "grad_norm": 2.0845329761505127, "learning_rate": 4.4924050836907065e-06, "loss": 0.9652, "step": 2434 }, { "epoch": 1.2809047869542347, "grad_norm": 2.2292892932891846, "learning_rate": 4.4919854033972254e-06, "loss": 0.9731, "step": 2435 }, { "epoch": 1.2814308258811151, "grad_norm": 2.179656505584717, "learning_rate": 4.491565569299637e-06, "loss": 0.9456, "step": 2436 }, { "epoch": 1.2819568648079958, "grad_norm": 2.0454165935516357, "learning_rate": 4.49114558143036e-06, "loss": 0.877, "step": 2437 }, { "epoch": 1.2824829037348764, "grad_norm": 2.105604648590088, "learning_rate": 4.490725439821817e-06, "loss": 0.9817, "step": 2438 }, { "epoch": 1.283008942661757, "grad_norm": 2.1095452308654785, "learning_rate": 4.490305144506453e-06, "loss": 0.9423, "step": 2439 }, { "epoch": 1.2835349815886374, "grad_norm": 1.8949556350708008, "learning_rate": 4.489884695516716e-06, "loss": 0.9731, "step": 2440 }, { "epoch": 1.284061020515518, "grad_norm": 1.9607486724853516, "learning_rate": 4.489464092885072e-06, "loss": 0.8632, "step": 2441 }, { "epoch": 1.2845870594423987, "grad_norm": 1.9174182415008545, "learning_rate": 4.489043336643994e-06, "loss": 0.916, "step": 2442 }, { "epoch": 1.2851130983692793, "grad_norm": 2.09295654296875, "learning_rate": 4.488622426825972e-06, "loss": 0.919, "step": 2443 }, { "epoch": 1.28563913729616, "grad_norm": 2.0831458568573, "learning_rate": 4.488201363463503e-06, "loss": 0.9494, "step": 2444 }, { "epoch": 1.2861651762230406, "grad_norm": 2.1123828887939453, "learning_rate": 4.487780146589098e-06, "loss": 0.9913, "step": 2445 }, { "epoch": 1.286691215149921, "grad_norm": 2.1118533611297607, "learning_rate": 4.48735877623528e-06, "loss": 0.9224, "step": 2446 }, { "epoch": 1.2872172540768017, "grad_norm": 1.9321138858795166, "learning_rate": 4.486937252434584e-06, "loss": 0.9135, "step": 2447 }, { "epoch": 1.2877432930036823, "grad_norm": 2.107614755630493, "learning_rate": 4.4865155752195565e-06, "loss": 0.9449, "step": 2448 }, { "epoch": 1.288269331930563, "grad_norm": 2.1365113258361816, "learning_rate": 4.486093744622756e-06, "loss": 0.9571, "step": 2449 }, { "epoch": 1.2887953708574433, "grad_norm": 2.0298421382904053, "learning_rate": 4.4856717606767515e-06, "loss": 0.9162, "step": 2450 }, { "epoch": 1.289321409784324, "grad_norm": 2.0793778896331787, "learning_rate": 4.485249623414125e-06, "loss": 0.9568, "step": 2451 }, { "epoch": 1.2898474487112046, "grad_norm": 2.1142778396606445, "learning_rate": 4.484827332867473e-06, "loss": 1.0266, "step": 2452 }, { "epoch": 1.2903734876380852, "grad_norm": 2.5684921741485596, "learning_rate": 4.484404889069398e-06, "loss": 0.952, "step": 2453 }, { "epoch": 1.2908995265649659, "grad_norm": 1.9950506687164307, "learning_rate": 4.483982292052519e-06, "loss": 0.8993, "step": 2454 }, { "epoch": 1.2914255654918465, "grad_norm": 2.0234720706939697, "learning_rate": 4.483559541849465e-06, "loss": 0.9031, "step": 2455 }, { "epoch": 1.291951604418727, "grad_norm": 2.147324800491333, "learning_rate": 4.483136638492877e-06, "loss": 1.0002, "step": 2456 }, { "epoch": 1.2924776433456076, "grad_norm": 2.3617396354675293, "learning_rate": 4.482713582015409e-06, "loss": 0.9163, "step": 2457 }, { "epoch": 1.2930036822724882, "grad_norm": 2.0739989280700684, "learning_rate": 4.482290372449725e-06, "loss": 0.9832, "step": 2458 }, { "epoch": 1.2935297211993688, "grad_norm": 2.0278656482696533, "learning_rate": 4.4818670098285e-06, "loss": 0.8862, "step": 2459 }, { "epoch": 1.2940557601262492, "grad_norm": 2.222480058670044, "learning_rate": 4.481443494184426e-06, "loss": 0.9608, "step": 2460 }, { "epoch": 1.2945817990531299, "grad_norm": 2.0249786376953125, "learning_rate": 4.481019825550201e-06, "loss": 0.9937, "step": 2461 }, { "epoch": 1.2951078379800105, "grad_norm": 2.045994520187378, "learning_rate": 4.480596003958537e-06, "loss": 0.9731, "step": 2462 }, { "epoch": 1.2956338769068911, "grad_norm": 2.124706268310547, "learning_rate": 4.480172029442158e-06, "loss": 0.9634, "step": 2463 }, { "epoch": 1.2961599158337718, "grad_norm": 1.9319393634796143, "learning_rate": 4.479747902033801e-06, "loss": 0.9429, "step": 2464 }, { "epoch": 1.2966859547606524, "grad_norm": 2.0962777137756348, "learning_rate": 4.479323621766212e-06, "loss": 0.9516, "step": 2465 }, { "epoch": 1.2972119936875328, "grad_norm": 2.1256442070007324, "learning_rate": 4.478899188672151e-06, "loss": 0.948, "step": 2466 }, { "epoch": 1.2977380326144135, "grad_norm": 1.9735897779464722, "learning_rate": 4.4784746027843885e-06, "loss": 0.9133, "step": 2467 }, { "epoch": 1.298264071541294, "grad_norm": 1.9956401586532593, "learning_rate": 4.478049864135708e-06, "loss": 0.9491, "step": 2468 }, { "epoch": 1.2987901104681747, "grad_norm": 1.9241101741790771, "learning_rate": 4.477624972758905e-06, "loss": 0.8982, "step": 2469 }, { "epoch": 1.2993161493950551, "grad_norm": 2.131563425064087, "learning_rate": 4.477199928686784e-06, "loss": 0.9395, "step": 2470 }, { "epoch": 1.2998421883219358, "grad_norm": 2.090785264968872, "learning_rate": 4.476774731952164e-06, "loss": 0.9385, "step": 2471 }, { "epoch": 1.3003682272488164, "grad_norm": 2.0358545780181885, "learning_rate": 4.476349382587876e-06, "loss": 0.9396, "step": 2472 }, { "epoch": 1.300894266175697, "grad_norm": 2.1488404273986816, "learning_rate": 4.475923880626761e-06, "loss": 0.956, "step": 2473 }, { "epoch": 1.3014203051025777, "grad_norm": 2.1079018115997314, "learning_rate": 4.475498226101673e-06, "loss": 0.924, "step": 2474 }, { "epoch": 1.3019463440294583, "grad_norm": 2.0726771354675293, "learning_rate": 4.475072419045477e-06, "loss": 0.8923, "step": 2475 }, { "epoch": 1.3024723829563387, "grad_norm": 2.2027525901794434, "learning_rate": 4.474646459491051e-06, "loss": 0.9192, "step": 2476 }, { "epoch": 1.3029984218832193, "grad_norm": 2.288954019546509, "learning_rate": 4.474220347471282e-06, "loss": 0.9775, "step": 2477 }, { "epoch": 1.3035244608101, "grad_norm": 2.2008302211761475, "learning_rate": 4.473794083019073e-06, "loss": 0.9722, "step": 2478 }, { "epoch": 1.3040504997369806, "grad_norm": 2.234795570373535, "learning_rate": 4.473367666167335e-06, "loss": 0.9365, "step": 2479 }, { "epoch": 1.304576538663861, "grad_norm": 2.0062167644500732, "learning_rate": 4.472941096948994e-06, "loss": 0.9607, "step": 2480 }, { "epoch": 1.3051025775907417, "grad_norm": 1.9761099815368652, "learning_rate": 4.472514375396985e-06, "loss": 0.8885, "step": 2481 }, { "epoch": 1.3056286165176223, "grad_norm": 2.0999045372009277, "learning_rate": 4.4720875015442545e-06, "loss": 0.9752, "step": 2482 }, { "epoch": 1.306154655444503, "grad_norm": 2.0382134914398193, "learning_rate": 4.471660475423764e-06, "loss": 0.9698, "step": 2483 }, { "epoch": 1.3066806943713836, "grad_norm": 1.9761773347854614, "learning_rate": 4.471233297068484e-06, "loss": 0.9182, "step": 2484 }, { "epoch": 1.3072067332982642, "grad_norm": 2.030435800552368, "learning_rate": 4.4708059665113964e-06, "loss": 0.9615, "step": 2485 }, { "epoch": 1.3077327722251446, "grad_norm": 2.6968722343444824, "learning_rate": 4.470378483785499e-06, "loss": 0.8918, "step": 2486 }, { "epoch": 1.3082588111520252, "grad_norm": 1.9564226865768433, "learning_rate": 4.469950848923796e-06, "loss": 0.8998, "step": 2487 }, { "epoch": 1.3087848500789059, "grad_norm": 1.9719346761703491, "learning_rate": 4.469523061959305e-06, "loss": 0.9288, "step": 2488 }, { "epoch": 1.3093108890057863, "grad_norm": 2.0258467197418213, "learning_rate": 4.46909512292506e-06, "loss": 0.8893, "step": 2489 }, { "epoch": 1.309836927932667, "grad_norm": 2.0949339866638184, "learning_rate": 4.4686670318540985e-06, "loss": 0.9722, "step": 2490 }, { "epoch": 1.3103629668595476, "grad_norm": 1.9772303104400635, "learning_rate": 4.468238788779476e-06, "loss": 0.9216, "step": 2491 }, { "epoch": 1.3108890057864282, "grad_norm": 2.2911159992218018, "learning_rate": 4.467810393734258e-06, "loss": 0.9934, "step": 2492 }, { "epoch": 1.3114150447133088, "grad_norm": 2.1708860397338867, "learning_rate": 4.46738184675152e-06, "loss": 0.9508, "step": 2493 }, { "epoch": 1.3119410836401895, "grad_norm": 1.987321138381958, "learning_rate": 4.466953147864352e-06, "loss": 0.9174, "step": 2494 }, { "epoch": 1.31246712256707, "grad_norm": 2.016629695892334, "learning_rate": 4.466524297105855e-06, "loss": 0.9283, "step": 2495 }, { "epoch": 1.3129931614939505, "grad_norm": 2.141136407852173, "learning_rate": 4.46609529450914e-06, "loss": 0.9569, "step": 2496 }, { "epoch": 1.3135192004208311, "grad_norm": 2.0675323009490967, "learning_rate": 4.465666140107331e-06, "loss": 0.9624, "step": 2497 }, { "epoch": 1.3140452393477118, "grad_norm": 2.1058006286621094, "learning_rate": 4.465236833933565e-06, "loss": 0.9828, "step": 2498 }, { "epoch": 1.3145712782745922, "grad_norm": 2.0386290550231934, "learning_rate": 4.464807376020987e-06, "loss": 0.9602, "step": 2499 }, { "epoch": 1.3150973172014728, "grad_norm": 2.048245429992676, "learning_rate": 4.464377766402757e-06, "loss": 0.9539, "step": 2500 }, { "epoch": 1.3156233561283535, "grad_norm": 2.112946033477783, "learning_rate": 4.463948005112048e-06, "loss": 0.9845, "step": 2501 }, { "epoch": 1.316149395055234, "grad_norm": 2.1279125213623047, "learning_rate": 4.46351809218204e-06, "loss": 0.9949, "step": 2502 }, { "epoch": 1.3166754339821147, "grad_norm": 2.1278843879699707, "learning_rate": 4.463088027645927e-06, "loss": 0.9024, "step": 2503 }, { "epoch": 1.3172014729089954, "grad_norm": 2.043644428253174, "learning_rate": 4.462657811536917e-06, "loss": 0.9018, "step": 2504 }, { "epoch": 1.3177275118358758, "grad_norm": 2.12113356590271, "learning_rate": 4.462227443888227e-06, "loss": 0.9548, "step": 2505 }, { "epoch": 1.3182535507627564, "grad_norm": 2.0414116382598877, "learning_rate": 4.461796924733084e-06, "loss": 0.9442, "step": 2506 }, { "epoch": 1.318779589689637, "grad_norm": 2.12378191947937, "learning_rate": 4.4613662541047305e-06, "loss": 0.9504, "step": 2507 }, { "epoch": 1.3193056286165177, "grad_norm": 2.197679281234741, "learning_rate": 4.4609354320364204e-06, "loss": 1.0346, "step": 2508 }, { "epoch": 1.319831667543398, "grad_norm": 2.0174758434295654, "learning_rate": 4.4605044585614174e-06, "loss": 0.9338, "step": 2509 }, { "epoch": 1.3203577064702787, "grad_norm": 2.1691973209381104, "learning_rate": 4.460073333712997e-06, "loss": 0.9806, "step": 2510 }, { "epoch": 1.3208837453971594, "grad_norm": 2.1645984649658203, "learning_rate": 4.459642057524448e-06, "loss": 1.0004, "step": 2511 }, { "epoch": 1.32140978432404, "grad_norm": 2.0355732440948486, "learning_rate": 4.459210630029068e-06, "loss": 0.9044, "step": 2512 }, { "epoch": 1.3219358232509206, "grad_norm": 2.1421124935150146, "learning_rate": 4.45877905126017e-06, "loss": 0.9928, "step": 2513 }, { "epoch": 1.3224618621778013, "grad_norm": 1.9635089635849, "learning_rate": 4.458347321251076e-06, "loss": 0.893, "step": 2514 }, { "epoch": 1.3229879011046817, "grad_norm": 2.012868881225586, "learning_rate": 4.457915440035121e-06, "loss": 0.9701, "step": 2515 }, { "epoch": 1.3235139400315623, "grad_norm": 2.090686559677124, "learning_rate": 4.457483407645651e-06, "loss": 0.9606, "step": 2516 }, { "epoch": 1.324039978958443, "grad_norm": 2.0503368377685547, "learning_rate": 4.457051224116023e-06, "loss": 0.9566, "step": 2517 }, { "epoch": 1.3245660178853236, "grad_norm": 1.9777687788009644, "learning_rate": 4.456618889479608e-06, "loss": 0.986, "step": 2518 }, { "epoch": 1.325092056812204, "grad_norm": 2.0104753971099854, "learning_rate": 4.456186403769786e-06, "loss": 0.9983, "step": 2519 }, { "epoch": 1.3256180957390846, "grad_norm": 2.1226019859313965, "learning_rate": 4.4557537670199505e-06, "loss": 0.9709, "step": 2520 }, { "epoch": 1.3261441346659653, "grad_norm": 2.061079263687134, "learning_rate": 4.4553209792635055e-06, "loss": 0.9499, "step": 2521 }, { "epoch": 1.326670173592846, "grad_norm": 2.1253788471221924, "learning_rate": 4.454888040533867e-06, "loss": 0.9375, "step": 2522 }, { "epoch": 1.3271962125197265, "grad_norm": 2.1977858543395996, "learning_rate": 4.454454950864464e-06, "loss": 0.8861, "step": 2523 }, { "epoch": 1.3277222514466072, "grad_norm": 2.219135284423828, "learning_rate": 4.454021710288735e-06, "loss": 0.9265, "step": 2524 }, { "epoch": 1.3282482903734876, "grad_norm": 2.074660539627075, "learning_rate": 4.4535883188401315e-06, "loss": 0.9912, "step": 2525 }, { "epoch": 1.3287743293003682, "grad_norm": 2.1656036376953125, "learning_rate": 4.453154776552117e-06, "loss": 0.9446, "step": 2526 }, { "epoch": 1.3293003682272488, "grad_norm": 1.962275505065918, "learning_rate": 4.452721083458164e-06, "loss": 0.9025, "step": 2527 }, { "epoch": 1.3298264071541295, "grad_norm": 2.0680644512176514, "learning_rate": 4.45228723959176e-06, "loss": 0.9606, "step": 2528 }, { "epoch": 1.3303524460810099, "grad_norm": 2.117361068725586, "learning_rate": 4.451853244986403e-06, "loss": 0.9622, "step": 2529 }, { "epoch": 1.3308784850078905, "grad_norm": 2.1107771396636963, "learning_rate": 4.4514190996756005e-06, "loss": 0.9792, "step": 2530 }, { "epoch": 1.3314045239347712, "grad_norm": 2.094346761703491, "learning_rate": 4.450984803692876e-06, "loss": 0.9317, "step": 2531 }, { "epoch": 1.3319305628616518, "grad_norm": 2.103429079055786, "learning_rate": 4.45055035707176e-06, "loss": 0.9472, "step": 2532 }, { "epoch": 1.3324566017885324, "grad_norm": 2.033013105392456, "learning_rate": 4.450115759845799e-06, "loss": 0.9561, "step": 2533 }, { "epoch": 1.332982640715413, "grad_norm": 2.102220058441162, "learning_rate": 4.449681012048547e-06, "loss": 0.8936, "step": 2534 }, { "epoch": 1.3335086796422935, "grad_norm": 1.9735665321350098, "learning_rate": 4.4492461137135715e-06, "loss": 0.9421, "step": 2535 }, { "epoch": 1.334034718569174, "grad_norm": 2.1749532222747803, "learning_rate": 4.448811064874453e-06, "loss": 0.9439, "step": 2536 }, { "epoch": 1.3345607574960547, "grad_norm": 1.955908179283142, "learning_rate": 4.448375865564781e-06, "loss": 0.9124, "step": 2537 }, { "epoch": 1.3350867964229354, "grad_norm": 2.041921377182007, "learning_rate": 4.447940515818158e-06, "loss": 0.9186, "step": 2538 }, { "epoch": 1.3356128353498158, "grad_norm": 1.945713996887207, "learning_rate": 4.447505015668199e-06, "loss": 0.9246, "step": 2539 }, { "epoch": 1.3361388742766964, "grad_norm": 2.085697889328003, "learning_rate": 4.447069365148529e-06, "loss": 0.9051, "step": 2540 }, { "epoch": 1.336664913203577, "grad_norm": 2.116046667098999, "learning_rate": 4.446633564292785e-06, "loss": 0.9515, "step": 2541 }, { "epoch": 1.3371909521304577, "grad_norm": 2.1968820095062256, "learning_rate": 4.446197613134616e-06, "loss": 0.9967, "step": 2542 }, { "epoch": 1.3377169910573383, "grad_norm": 2.061464786529541, "learning_rate": 4.445761511707682e-06, "loss": 0.9259, "step": 2543 }, { "epoch": 1.338243029984219, "grad_norm": 1.9848337173461914, "learning_rate": 4.445325260045656e-06, "loss": 0.9775, "step": 2544 }, { "epoch": 1.3387690689110994, "grad_norm": 2.1794087886810303, "learning_rate": 4.444888858182222e-06, "loss": 0.9738, "step": 2545 }, { "epoch": 1.33929510783798, "grad_norm": 2.2552716732025146, "learning_rate": 4.4444523061510725e-06, "loss": 0.9776, "step": 2546 }, { "epoch": 1.3398211467648606, "grad_norm": 2.059893846511841, "learning_rate": 4.444015603985917e-06, "loss": 0.9579, "step": 2547 }, { "epoch": 1.3403471856917413, "grad_norm": 2.019697666168213, "learning_rate": 4.443578751720472e-06, "loss": 0.9773, "step": 2548 }, { "epoch": 1.3408732246186217, "grad_norm": 2.0542571544647217, "learning_rate": 4.44314174938847e-06, "loss": 0.9466, "step": 2549 }, { "epoch": 1.3413992635455023, "grad_norm": 2.059788942337036, "learning_rate": 4.44270459702365e-06, "loss": 0.9203, "step": 2550 }, { "epoch": 1.341925302472383, "grad_norm": 2.0966968536376953, "learning_rate": 4.442267294659767e-06, "loss": 0.9394, "step": 2551 }, { "epoch": 1.3424513413992636, "grad_norm": 1.9987400770187378, "learning_rate": 4.441829842330585e-06, "loss": 0.9614, "step": 2552 }, { "epoch": 1.3429773803261442, "grad_norm": 2.062717914581299, "learning_rate": 4.44139224006988e-06, "loss": 0.9811, "step": 2553 }, { "epoch": 1.3435034192530249, "grad_norm": 2.2932512760162354, "learning_rate": 4.440954487911441e-06, "loss": 0.9232, "step": 2554 }, { "epoch": 1.3440294581799053, "grad_norm": 2.021737813949585, "learning_rate": 4.440516585889066e-06, "loss": 0.984, "step": 2555 }, { "epoch": 1.344555497106786, "grad_norm": 2.053077459335327, "learning_rate": 4.440078534036567e-06, "loss": 1.0011, "step": 2556 }, { "epoch": 1.3450815360336665, "grad_norm": 2.045336961746216, "learning_rate": 4.439640332387767e-06, "loss": 0.9304, "step": 2557 }, { "epoch": 1.345607574960547, "grad_norm": 2.1932125091552734, "learning_rate": 4.439201980976498e-06, "loss": 0.9421, "step": 2558 }, { "epoch": 1.3461336138874276, "grad_norm": 1.9581273794174194, "learning_rate": 4.438763479836609e-06, "loss": 0.9252, "step": 2559 }, { "epoch": 1.3466596528143082, "grad_norm": 1.9648422002792358, "learning_rate": 4.4383248290019555e-06, "loss": 0.932, "step": 2560 }, { "epoch": 1.3471856917411889, "grad_norm": 2.0493264198303223, "learning_rate": 4.437886028506406e-06, "loss": 0.9754, "step": 2561 }, { "epoch": 1.3477117306680695, "grad_norm": 2.2594149112701416, "learning_rate": 4.437447078383843e-06, "loss": 0.9004, "step": 2562 }, { "epoch": 1.3482377695949501, "grad_norm": 2.0411531925201416, "learning_rate": 4.437007978668156e-06, "loss": 0.9635, "step": 2563 }, { "epoch": 1.3487638085218308, "grad_norm": 2.0295286178588867, "learning_rate": 4.436568729393249e-06, "loss": 0.9224, "step": 2564 }, { "epoch": 1.3492898474487112, "grad_norm": 2.0805602073669434, "learning_rate": 4.436129330593038e-06, "loss": 0.9636, "step": 2565 }, { "epoch": 1.3498158863755918, "grad_norm": 2.0708534717559814, "learning_rate": 4.435689782301449e-06, "loss": 0.9452, "step": 2566 }, { "epoch": 1.3503419253024724, "grad_norm": 2.005274534225464, "learning_rate": 4.435250084552422e-06, "loss": 0.9832, "step": 2567 }, { "epoch": 1.3508679642293528, "grad_norm": 2.108915328979492, "learning_rate": 4.434810237379903e-06, "loss": 0.9682, "step": 2568 }, { "epoch": 1.3513940031562335, "grad_norm": 2.290133476257324, "learning_rate": 4.434370240817856e-06, "loss": 0.9577, "step": 2569 }, { "epoch": 1.3519200420831141, "grad_norm": 1.9996196031570435, "learning_rate": 4.433930094900253e-06, "loss": 0.9494, "step": 2570 }, { "epoch": 1.3524460810099947, "grad_norm": 2.068378210067749, "learning_rate": 4.4334897996610795e-06, "loss": 0.9632, "step": 2571 }, { "epoch": 1.3529721199368754, "grad_norm": 2.2263996601104736, "learning_rate": 4.4330493551343286e-06, "loss": 0.9812, "step": 2572 }, { "epoch": 1.353498158863756, "grad_norm": 2.0052454471588135, "learning_rate": 4.432608761354011e-06, "loss": 0.9448, "step": 2573 }, { "epoch": 1.3540241977906364, "grad_norm": 1.876429557800293, "learning_rate": 4.432168018354143e-06, "loss": 0.9026, "step": 2574 }, { "epoch": 1.354550236717517, "grad_norm": 2.1408214569091797, "learning_rate": 4.431727126168757e-06, "loss": 0.9356, "step": 2575 }, { "epoch": 1.3550762756443977, "grad_norm": 1.9882752895355225, "learning_rate": 4.431286084831892e-06, "loss": 0.9186, "step": 2576 }, { "epoch": 1.3556023145712783, "grad_norm": 2.128145456314087, "learning_rate": 4.430844894377605e-06, "loss": 0.9405, "step": 2577 }, { "epoch": 1.3561283534981587, "grad_norm": 2.059305429458618, "learning_rate": 4.4304035548399585e-06, "loss": 0.9209, "step": 2578 }, { "epoch": 1.3566543924250394, "grad_norm": 2.2779366970062256, "learning_rate": 4.429962066253029e-06, "loss": 1.0281, "step": 2579 }, { "epoch": 1.35718043135192, "grad_norm": 1.9767261743545532, "learning_rate": 4.429520428650907e-06, "loss": 0.9581, "step": 2580 }, { "epoch": 1.3577064702788006, "grad_norm": 2.1368567943573, "learning_rate": 4.42907864206769e-06, "loss": 0.952, "step": 2581 }, { "epoch": 1.3582325092056813, "grad_norm": 2.1179497241973877, "learning_rate": 4.428636706537488e-06, "loss": 0.9442, "step": 2582 }, { "epoch": 1.358758548132562, "grad_norm": 2.0669925212860107, "learning_rate": 4.428194622094425e-06, "loss": 0.9727, "step": 2583 }, { "epoch": 1.3592845870594423, "grad_norm": 2.081160068511963, "learning_rate": 4.427752388772635e-06, "loss": 0.9751, "step": 2584 }, { "epoch": 1.359810625986323, "grad_norm": 1.9884616136550903, "learning_rate": 4.427310006606264e-06, "loss": 0.9226, "step": 2585 }, { "epoch": 1.3603366649132036, "grad_norm": 2.130305528640747, "learning_rate": 4.426867475629466e-06, "loss": 0.9072, "step": 2586 }, { "epoch": 1.3608627038400842, "grad_norm": 2.2059152126312256, "learning_rate": 4.426424795876414e-06, "loss": 0.9469, "step": 2587 }, { "epoch": 1.3613887427669646, "grad_norm": 2.043848752975464, "learning_rate": 4.425981967381284e-06, "loss": 0.9374, "step": 2588 }, { "epoch": 1.3619147816938453, "grad_norm": 2.1981630325317383, "learning_rate": 4.4255389901782695e-06, "loss": 0.924, "step": 2589 }, { "epoch": 1.362440820620726, "grad_norm": 2.0194413661956787, "learning_rate": 4.425095864301573e-06, "loss": 0.9838, "step": 2590 }, { "epoch": 1.3629668595476065, "grad_norm": 2.1409754753112793, "learning_rate": 4.424652589785409e-06, "loss": 0.9054, "step": 2591 }, { "epoch": 1.3634928984744872, "grad_norm": 2.040001153945923, "learning_rate": 4.424209166664002e-06, "loss": 0.9419, "step": 2592 }, { "epoch": 1.3640189374013678, "grad_norm": 2.015530824661255, "learning_rate": 4.423765594971592e-06, "loss": 0.8718, "step": 2593 }, { "epoch": 1.3645449763282482, "grad_norm": 2.042180061340332, "learning_rate": 4.423321874742425e-06, "loss": 0.926, "step": 2594 }, { "epoch": 1.3650710152551289, "grad_norm": 2.10259747505188, "learning_rate": 4.422878006010764e-06, "loss": 0.9064, "step": 2595 }, { "epoch": 1.3655970541820095, "grad_norm": 1.9849088191986084, "learning_rate": 4.4224339888108785e-06, "loss": 0.9026, "step": 2596 }, { "epoch": 1.3661230931088901, "grad_norm": 2.0189146995544434, "learning_rate": 4.421989823177053e-06, "loss": 0.9334, "step": 2597 }, { "epoch": 1.3666491320357705, "grad_norm": 2.3741374015808105, "learning_rate": 4.421545509143581e-06, "loss": 0.9743, "step": 2598 }, { "epoch": 1.3671751709626512, "grad_norm": 2.226306438446045, "learning_rate": 4.42110104674477e-06, "loss": 0.9548, "step": 2599 }, { "epoch": 1.3677012098895318, "grad_norm": 2.2016761302948, "learning_rate": 4.420656436014937e-06, "loss": 0.9332, "step": 2600 }, { "epoch": 1.3682272488164124, "grad_norm": 2.193507671356201, "learning_rate": 4.420211676988412e-06, "loss": 0.9941, "step": 2601 }, { "epoch": 1.368753287743293, "grad_norm": 2.064352035522461, "learning_rate": 4.419766769699534e-06, "loss": 0.9172, "step": 2602 }, { "epoch": 1.3692793266701737, "grad_norm": 2.1775360107421875, "learning_rate": 4.419321714182654e-06, "loss": 0.9771, "step": 2603 }, { "epoch": 1.3698053655970541, "grad_norm": 2.454411745071411, "learning_rate": 4.4188765104721386e-06, "loss": 0.9139, "step": 2604 }, { "epoch": 1.3703314045239348, "grad_norm": 2.157207727432251, "learning_rate": 4.41843115860236e-06, "loss": 0.9697, "step": 2605 }, { "epoch": 1.3708574434508154, "grad_norm": 2.005450963973999, "learning_rate": 4.417985658607706e-06, "loss": 0.9366, "step": 2606 }, { "epoch": 1.371383482377696, "grad_norm": 1.9498809576034546, "learning_rate": 4.417540010522573e-06, "loss": 0.8898, "step": 2607 }, { "epoch": 1.3719095213045764, "grad_norm": 2.0493645668029785, "learning_rate": 4.4170942143813715e-06, "loss": 0.9417, "step": 2608 }, { "epoch": 1.372435560231457, "grad_norm": 2.289781093597412, "learning_rate": 4.416648270218521e-06, "loss": 0.9575, "step": 2609 }, { "epoch": 1.3729615991583377, "grad_norm": 2.0118207931518555, "learning_rate": 4.416202178068454e-06, "loss": 0.9282, "step": 2610 }, { "epoch": 1.3734876380852183, "grad_norm": 1.9697855710983276, "learning_rate": 4.415755937965615e-06, "loss": 0.9277, "step": 2611 }, { "epoch": 1.374013677012099, "grad_norm": 1.9677777290344238, "learning_rate": 4.415309549944456e-06, "loss": 0.9322, "step": 2612 }, { "epoch": 1.3745397159389796, "grad_norm": 2.0683321952819824, "learning_rate": 4.414863014039445e-06, "loss": 0.9361, "step": 2613 }, { "epoch": 1.37506575486586, "grad_norm": 2.1321773529052734, "learning_rate": 4.414416330285061e-06, "loss": 0.9093, "step": 2614 }, { "epoch": 1.3755917937927407, "grad_norm": 1.9600199460983276, "learning_rate": 4.413969498715791e-06, "loss": 0.9428, "step": 2615 }, { "epoch": 1.3761178327196213, "grad_norm": 1.8668115139007568, "learning_rate": 4.413522519366136e-06, "loss": 0.9093, "step": 2616 }, { "epoch": 1.3766438716465017, "grad_norm": 1.9672036170959473, "learning_rate": 4.413075392270608e-06, "loss": 0.9426, "step": 2617 }, { "epoch": 1.3771699105733823, "grad_norm": 1.9827178716659546, "learning_rate": 4.412628117463731e-06, "loss": 0.9616, "step": 2618 }, { "epoch": 1.377695949500263, "grad_norm": 2.099527597427368, "learning_rate": 4.4121806949800395e-06, "loss": 0.938, "step": 2619 }, { "epoch": 1.3782219884271436, "grad_norm": 2.0840420722961426, "learning_rate": 4.411733124854079e-06, "loss": 0.9304, "step": 2620 }, { "epoch": 1.3787480273540242, "grad_norm": 1.967368245124817, "learning_rate": 4.411285407120407e-06, "loss": 0.9662, "step": 2621 }, { "epoch": 1.3792740662809049, "grad_norm": 2.0552897453308105, "learning_rate": 4.410837541813594e-06, "loss": 0.929, "step": 2622 }, { "epoch": 1.3798001052077855, "grad_norm": 2.1046664714813232, "learning_rate": 4.410389528968218e-06, "loss": 0.9127, "step": 2623 }, { "epoch": 1.380326144134666, "grad_norm": 2.147519111633301, "learning_rate": 4.409941368618873e-06, "loss": 0.9379, "step": 2624 }, { "epoch": 1.3808521830615466, "grad_norm": 1.9555045366287231, "learning_rate": 4.409493060800161e-06, "loss": 0.9662, "step": 2625 }, { "epoch": 1.3813782219884272, "grad_norm": 2.1241092681884766, "learning_rate": 4.409044605546697e-06, "loss": 0.9444, "step": 2626 }, { "epoch": 1.3819042609153076, "grad_norm": 2.022423028945923, "learning_rate": 4.408596002893105e-06, "loss": 0.9405, "step": 2627 }, { "epoch": 1.3824302998421882, "grad_norm": 1.9746357202529907, "learning_rate": 4.4081472528740254e-06, "loss": 0.9266, "step": 2628 }, { "epoch": 1.3829563387690689, "grad_norm": 2.0098395347595215, "learning_rate": 4.4076983555241036e-06, "loss": 0.9917, "step": 2629 }, { "epoch": 1.3834823776959495, "grad_norm": 2.0168423652648926, "learning_rate": 4.4072493108780024e-06, "loss": 0.9402, "step": 2630 }, { "epoch": 1.3840084166228301, "grad_norm": 2.041032552719116, "learning_rate": 4.406800118970391e-06, "loss": 0.8898, "step": 2631 }, { "epoch": 1.3845344555497108, "grad_norm": 1.9849998950958252, "learning_rate": 4.406350779835955e-06, "loss": 0.9602, "step": 2632 }, { "epoch": 1.3850604944765912, "grad_norm": 2.139587640762329, "learning_rate": 4.405901293509384e-06, "loss": 0.9802, "step": 2633 }, { "epoch": 1.3855865334034718, "grad_norm": 2.1281473636627197, "learning_rate": 4.405451660025388e-06, "loss": 0.9788, "step": 2634 }, { "epoch": 1.3861125723303525, "grad_norm": 2.1347475051879883, "learning_rate": 4.405001879418682e-06, "loss": 0.9587, "step": 2635 }, { "epoch": 1.386638611257233, "grad_norm": 2.028740882873535, "learning_rate": 4.404551951723995e-06, "loss": 0.9376, "step": 2636 }, { "epoch": 1.3871646501841135, "grad_norm": 2.101991891860962, "learning_rate": 4.404101876976064e-06, "loss": 0.9696, "step": 2637 }, { "epoch": 1.3876906891109941, "grad_norm": 2.0458288192749023, "learning_rate": 4.403651655209643e-06, "loss": 1.0081, "step": 2638 }, { "epoch": 1.3882167280378748, "grad_norm": 1.973692536354065, "learning_rate": 4.403201286459493e-06, "loss": 0.8829, "step": 2639 }, { "epoch": 1.3887427669647554, "grad_norm": 2.2197206020355225, "learning_rate": 4.402750770760387e-06, "loss": 1.0196, "step": 2640 }, { "epoch": 1.389268805891636, "grad_norm": 2.0076587200164795, "learning_rate": 4.40230010814711e-06, "loss": 0.911, "step": 2641 }, { "epoch": 1.3897948448185167, "grad_norm": 2.0094308853149414, "learning_rate": 4.4018492986544615e-06, "loss": 0.9587, "step": 2642 }, { "epoch": 1.390320883745397, "grad_norm": 2.03900146484375, "learning_rate": 4.401398342317244e-06, "loss": 0.9515, "step": 2643 }, { "epoch": 1.3908469226722777, "grad_norm": 2.0408756732940674, "learning_rate": 4.400947239170281e-06, "loss": 0.9081, "step": 2644 }, { "epoch": 1.3913729615991584, "grad_norm": 2.020554304122925, "learning_rate": 4.4004959892484e-06, "loss": 0.9323, "step": 2645 }, { "epoch": 1.391899000526039, "grad_norm": 2.05672287940979, "learning_rate": 4.400044592586444e-06, "loss": 0.9031, "step": 2646 }, { "epoch": 1.3924250394529194, "grad_norm": 1.9206362962722778, "learning_rate": 4.399593049219265e-06, "loss": 0.8898, "step": 2647 }, { "epoch": 1.3929510783798, "grad_norm": 2.1542060375213623, "learning_rate": 4.39914135918173e-06, "loss": 0.9092, "step": 2648 }, { "epoch": 1.3934771173066807, "grad_norm": 2.160116195678711, "learning_rate": 4.39868952250871e-06, "loss": 0.9383, "step": 2649 }, { "epoch": 1.3940031562335613, "grad_norm": 2.011389970779419, "learning_rate": 4.398237539235096e-06, "loss": 0.9644, "step": 2650 }, { "epoch": 1.394529195160442, "grad_norm": 2.059589147567749, "learning_rate": 4.397785409395785e-06, "loss": 0.9255, "step": 2651 }, { "epoch": 1.3950552340873226, "grad_norm": 1.9272221326828003, "learning_rate": 4.3973331330256855e-06, "loss": 0.9214, "step": 2652 }, { "epoch": 1.395581273014203, "grad_norm": 1.9866399765014648, "learning_rate": 4.39688071015972e-06, "loss": 0.9349, "step": 2653 }, { "epoch": 1.3961073119410836, "grad_norm": 1.939637303352356, "learning_rate": 4.3964281408328205e-06, "loss": 0.9356, "step": 2654 }, { "epoch": 1.3966333508679643, "grad_norm": 2.0816657543182373, "learning_rate": 4.3959754250799295e-06, "loss": 0.937, "step": 2655 }, { "epoch": 1.3971593897948449, "grad_norm": 2.040522336959839, "learning_rate": 4.395522562936004e-06, "loss": 0.9206, "step": 2656 }, { "epoch": 1.3976854287217253, "grad_norm": 2.075200080871582, "learning_rate": 4.395069554436007e-06, "loss": 0.9118, "step": 2657 }, { "epoch": 1.398211467648606, "grad_norm": 1.8599414825439453, "learning_rate": 4.394616399614918e-06, "loss": 0.9123, "step": 2658 }, { "epoch": 1.3987375065754866, "grad_norm": 1.982853889465332, "learning_rate": 4.394163098507725e-06, "loss": 0.9036, "step": 2659 }, { "epoch": 1.3992635455023672, "grad_norm": 1.9821887016296387, "learning_rate": 4.39370965114943e-06, "loss": 0.979, "step": 2660 }, { "epoch": 1.3997895844292478, "grad_norm": 2.0990328788757324, "learning_rate": 4.393256057575041e-06, "loss": 0.9789, "step": 2661 }, { "epoch": 1.4003156233561285, "grad_norm": 2.0919525623321533, "learning_rate": 4.392802317819582e-06, "loss": 0.9322, "step": 2662 }, { "epoch": 1.4008416622830089, "grad_norm": 2.128913640975952, "learning_rate": 4.392348431918088e-06, "loss": 0.9616, "step": 2663 }, { "epoch": 1.4013677012098895, "grad_norm": 2.1351349353790283, "learning_rate": 4.391894399905604e-06, "loss": 0.9615, "step": 2664 }, { "epoch": 1.4018937401367702, "grad_norm": 2.0462634563446045, "learning_rate": 4.391440221817186e-06, "loss": 0.9211, "step": 2665 }, { "epoch": 1.4024197790636508, "grad_norm": 2.055590867996216, "learning_rate": 4.390985897687902e-06, "loss": 0.9131, "step": 2666 }, { "epoch": 1.4029458179905312, "grad_norm": 2.18259596824646, "learning_rate": 4.390531427552829e-06, "loss": 0.95, "step": 2667 }, { "epoch": 1.4034718569174118, "grad_norm": 2.030566453933716, "learning_rate": 4.39007681144706e-06, "loss": 0.9661, "step": 2668 }, { "epoch": 1.4039978958442925, "grad_norm": 2.1099185943603516, "learning_rate": 4.389622049405696e-06, "loss": 0.9214, "step": 2669 }, { "epoch": 1.404523934771173, "grad_norm": 2.0410983562469482, "learning_rate": 4.389167141463849e-06, "loss": 0.951, "step": 2670 }, { "epoch": 1.4050499736980537, "grad_norm": 2.051135778427124, "learning_rate": 4.388712087656644e-06, "loss": 0.9663, "step": 2671 }, { "epoch": 1.4055760126249344, "grad_norm": 1.989233374595642, "learning_rate": 4.388256888019215e-06, "loss": 0.9677, "step": 2672 }, { "epoch": 1.4061020515518148, "grad_norm": 2.050755262374878, "learning_rate": 4.3878015425867105e-06, "loss": 0.8911, "step": 2673 }, { "epoch": 1.4066280904786954, "grad_norm": 2.1822195053100586, "learning_rate": 4.387346051394287e-06, "loss": 0.922, "step": 2674 }, { "epoch": 1.407154129405576, "grad_norm": 1.9840717315673828, "learning_rate": 4.386890414477114e-06, "loss": 0.9505, "step": 2675 }, { "epoch": 1.4076801683324567, "grad_norm": 2.111574172973633, "learning_rate": 4.386434631870372e-06, "loss": 0.9313, "step": 2676 }, { "epoch": 1.408206207259337, "grad_norm": 2.0208487510681152, "learning_rate": 4.3859787036092525e-06, "loss": 0.9173, "step": 2677 }, { "epoch": 1.4087322461862177, "grad_norm": 2.087120771408081, "learning_rate": 4.3855226297289595e-06, "loss": 0.9596, "step": 2678 }, { "epoch": 1.4092582851130984, "grad_norm": 2.1067450046539307, "learning_rate": 4.385066410264706e-06, "loss": 0.9707, "step": 2679 }, { "epoch": 1.409784324039979, "grad_norm": 1.8783036470413208, "learning_rate": 4.3846100452517156e-06, "loss": 0.9167, "step": 2680 }, { "epoch": 1.4103103629668596, "grad_norm": 2.2781338691711426, "learning_rate": 4.3841535347252284e-06, "loss": 0.8218, "step": 2681 }, { "epoch": 1.4108364018937403, "grad_norm": 1.961376428604126, "learning_rate": 4.38369687872049e-06, "loss": 0.9083, "step": 2682 }, { "epoch": 1.4113624408206207, "grad_norm": 1.8851439952850342, "learning_rate": 4.383240077272761e-06, "loss": 0.932, "step": 2683 }, { "epoch": 1.4118884797475013, "grad_norm": 2.1675922870635986, "learning_rate": 4.3827831304173105e-06, "loss": 0.9404, "step": 2684 }, { "epoch": 1.412414518674382, "grad_norm": 2.210278272628784, "learning_rate": 4.3823260381894205e-06, "loss": 0.9639, "step": 2685 }, { "epoch": 1.4129405576012624, "grad_norm": 2.1129822731018066, "learning_rate": 4.381868800624383e-06, "loss": 0.9686, "step": 2686 }, { "epoch": 1.413466596528143, "grad_norm": 2.0460870265960693, "learning_rate": 4.381411417757503e-06, "loss": 0.9283, "step": 2687 }, { "epoch": 1.4139926354550236, "grad_norm": 1.9752156734466553, "learning_rate": 4.3809538896240956e-06, "loss": 0.9529, "step": 2688 }, { "epoch": 1.4145186743819043, "grad_norm": 1.92936110496521, "learning_rate": 4.3804962162594865e-06, "loss": 0.9012, "step": 2689 }, { "epoch": 1.415044713308785, "grad_norm": 1.996164083480835, "learning_rate": 4.380038397699015e-06, "loss": 0.9451, "step": 2690 }, { "epoch": 1.4155707522356655, "grad_norm": 2.04750394821167, "learning_rate": 4.379580433978027e-06, "loss": 0.8968, "step": 2691 }, { "epoch": 1.4160967911625462, "grad_norm": 2.072307586669922, "learning_rate": 4.379122325131884e-06, "loss": 0.9479, "step": 2692 }, { "epoch": 1.4166228300894266, "grad_norm": 2.035949468612671, "learning_rate": 4.378664071195959e-06, "loss": 0.9246, "step": 2693 }, { "epoch": 1.4171488690163072, "grad_norm": 2.0732228755950928, "learning_rate": 4.3782056722056334e-06, "loss": 0.935, "step": 2694 }, { "epoch": 1.4176749079431878, "grad_norm": 1.9315673112869263, "learning_rate": 4.377747128196299e-06, "loss": 0.9209, "step": 2695 }, { "epoch": 1.4182009468700683, "grad_norm": 2.1663613319396973, "learning_rate": 4.377288439203362e-06, "loss": 0.9496, "step": 2696 }, { "epoch": 1.418726985796949, "grad_norm": 1.9999171495437622, "learning_rate": 4.37682960526224e-06, "loss": 0.9614, "step": 2697 }, { "epoch": 1.4192530247238295, "grad_norm": 2.052579879760742, "learning_rate": 4.376370626408357e-06, "loss": 0.9347, "step": 2698 }, { "epoch": 1.4197790636507102, "grad_norm": 2.0660557746887207, "learning_rate": 4.375911502677154e-06, "loss": 0.922, "step": 2699 }, { "epoch": 1.4203051025775908, "grad_norm": 1.9994977712631226, "learning_rate": 4.37545223410408e-06, "loss": 0.942, "step": 2700 }, { "epoch": 1.4208311415044714, "grad_norm": 2.145674228668213, "learning_rate": 4.3749928207245954e-06, "loss": 0.9308, "step": 2701 }, { "epoch": 1.4213571804313518, "grad_norm": 2.1065597534179688, "learning_rate": 4.374533262574172e-06, "loss": 0.9852, "step": 2702 }, { "epoch": 1.4218832193582325, "grad_norm": 2.435072422027588, "learning_rate": 4.374073559688294e-06, "loss": 0.9525, "step": 2703 }, { "epoch": 1.422409258285113, "grad_norm": 2.01636004447937, "learning_rate": 4.373613712102455e-06, "loss": 0.944, "step": 2704 }, { "epoch": 1.4229352972119937, "grad_norm": 2.146397829055786, "learning_rate": 4.373153719852161e-06, "loss": 0.9631, "step": 2705 }, { "epoch": 1.4234613361388742, "grad_norm": 1.9630359411239624, "learning_rate": 4.372693582972928e-06, "loss": 0.9303, "step": 2706 }, { "epoch": 1.4239873750657548, "grad_norm": 1.8797608613967896, "learning_rate": 4.372233301500284e-06, "loss": 0.9177, "step": 2707 }, { "epoch": 1.4245134139926354, "grad_norm": 1.992043137550354, "learning_rate": 4.371772875469767e-06, "loss": 0.9622, "step": 2708 }, { "epoch": 1.425039452919516, "grad_norm": 1.9467499256134033, "learning_rate": 4.371312304916929e-06, "loss": 0.9322, "step": 2709 }, { "epoch": 1.4255654918463967, "grad_norm": 2.274771213531494, "learning_rate": 4.370851589877332e-06, "loss": 0.9428, "step": 2710 }, { "epoch": 1.4260915307732773, "grad_norm": 2.126898765563965, "learning_rate": 4.370390730386546e-06, "loss": 0.9545, "step": 2711 }, { "epoch": 1.4266175697001577, "grad_norm": 2.096545457839966, "learning_rate": 4.369929726480156e-06, "loss": 0.9079, "step": 2712 }, { "epoch": 1.4271436086270384, "grad_norm": 2.1355035305023193, "learning_rate": 4.369468578193755e-06, "loss": 0.9582, "step": 2713 }, { "epoch": 1.427669647553919, "grad_norm": 2.243448257446289, "learning_rate": 4.369007285562952e-06, "loss": 0.9886, "step": 2714 }, { "epoch": 1.4281956864807996, "grad_norm": 2.0589983463287354, "learning_rate": 4.368545848623362e-06, "loss": 0.9882, "step": 2715 }, { "epoch": 1.42872172540768, "grad_norm": 2.072906732559204, "learning_rate": 4.368084267410614e-06, "loss": 0.9577, "step": 2716 }, { "epoch": 1.4292477643345607, "grad_norm": 2.1737213134765625, "learning_rate": 4.367622541960347e-06, "loss": 0.9759, "step": 2717 }, { "epoch": 1.4297738032614413, "grad_norm": 2.2150609493255615, "learning_rate": 4.367160672308211e-06, "loss": 0.9581, "step": 2718 }, { "epoch": 1.430299842188322, "grad_norm": 2.2187082767486572, "learning_rate": 4.366698658489869e-06, "loss": 1.0004, "step": 2719 }, { "epoch": 1.4308258811152026, "grad_norm": 2.127653121948242, "learning_rate": 4.366236500540992e-06, "loss": 0.9436, "step": 2720 }, { "epoch": 1.4313519200420832, "grad_norm": 2.180147171020508, "learning_rate": 4.365774198497266e-06, "loss": 0.9978, "step": 2721 }, { "epoch": 1.4318779589689636, "grad_norm": 1.9998571872711182, "learning_rate": 4.365311752394384e-06, "loss": 0.9294, "step": 2722 }, { "epoch": 1.4324039978958443, "grad_norm": 1.970004916191101, "learning_rate": 4.364849162268054e-06, "loss": 0.9654, "step": 2723 }, { "epoch": 1.432930036822725, "grad_norm": 2.1497817039489746, "learning_rate": 4.364386428153992e-06, "loss": 0.9413, "step": 2724 }, { "epoch": 1.4334560757496055, "grad_norm": 1.9739991426467896, "learning_rate": 4.363923550087926e-06, "loss": 0.9488, "step": 2725 }, { "epoch": 1.433982114676486, "grad_norm": 1.930517315864563, "learning_rate": 4.363460528105598e-06, "loss": 0.9284, "step": 2726 }, { "epoch": 1.4345081536033666, "grad_norm": 2.066530227661133, "learning_rate": 4.3629973622427545e-06, "loss": 0.965, "step": 2727 }, { "epoch": 1.4350341925302472, "grad_norm": 2.2125234603881836, "learning_rate": 4.362534052535161e-06, "loss": 1.0175, "step": 2728 }, { "epoch": 1.4355602314571279, "grad_norm": 2.2930169105529785, "learning_rate": 4.362070599018587e-06, "loss": 0.9545, "step": 2729 }, { "epoch": 1.4360862703840085, "grad_norm": 1.9766452312469482, "learning_rate": 4.36160700172882e-06, "loss": 0.9357, "step": 2730 }, { "epoch": 1.4366123093108891, "grad_norm": 2.060267210006714, "learning_rate": 4.3611432607016536e-06, "loss": 0.9723, "step": 2731 }, { "epoch": 1.4371383482377695, "grad_norm": 2.0135252475738525, "learning_rate": 4.360679375972893e-06, "loss": 0.9515, "step": 2732 }, { "epoch": 1.4376643871646502, "grad_norm": 1.9998459815979004, "learning_rate": 4.360215347578355e-06, "loss": 0.8722, "step": 2733 }, { "epoch": 1.4381904260915308, "grad_norm": 2.0200717449188232, "learning_rate": 4.3597511755538695e-06, "loss": 0.9758, "step": 2734 }, { "epoch": 1.4387164650184114, "grad_norm": 2.147653341293335, "learning_rate": 4.359286859935276e-06, "loss": 0.964, "step": 2735 }, { "epoch": 1.4392425039452919, "grad_norm": 1.997365951538086, "learning_rate": 4.358822400758424e-06, "loss": 0.9303, "step": 2736 }, { "epoch": 1.4397685428721725, "grad_norm": 2.0461249351501465, "learning_rate": 4.358357798059175e-06, "loss": 0.9045, "step": 2737 }, { "epoch": 1.4402945817990531, "grad_norm": 2.1255886554718018, "learning_rate": 4.357893051873402e-06, "loss": 0.9565, "step": 2738 }, { "epoch": 1.4408206207259338, "grad_norm": 2.0816283226013184, "learning_rate": 4.357428162236988e-06, "loss": 0.9459, "step": 2739 }, { "epoch": 1.4413466596528144, "grad_norm": 2.100107192993164, "learning_rate": 4.35696312918583e-06, "loss": 0.962, "step": 2740 }, { "epoch": 1.441872698579695, "grad_norm": 2.2955291271209717, "learning_rate": 4.356497952755832e-06, "loss": 1.0057, "step": 2741 }, { "epoch": 1.4423987375065754, "grad_norm": 2.0716357231140137, "learning_rate": 4.3560326329829115e-06, "loss": 0.9997, "step": 2742 }, { "epoch": 1.442924776433456, "grad_norm": 1.953810453414917, "learning_rate": 4.3555671699029965e-06, "loss": 0.9002, "step": 2743 }, { "epoch": 1.4434508153603367, "grad_norm": 2.075598955154419, "learning_rate": 4.355101563552026e-06, "loss": 1.0174, "step": 2744 }, { "epoch": 1.4439768542872171, "grad_norm": 2.031865119934082, "learning_rate": 4.35463581396595e-06, "loss": 0.9218, "step": 2745 }, { "epoch": 1.4445028932140977, "grad_norm": 2.2142674922943115, "learning_rate": 4.354169921180729e-06, "loss": 0.9674, "step": 2746 }, { "epoch": 1.4450289321409784, "grad_norm": 1.9982706308364868, "learning_rate": 4.353703885232337e-06, "loss": 0.9983, "step": 2747 }, { "epoch": 1.445554971067859, "grad_norm": 2.209235429763794, "learning_rate": 4.3532377061567564e-06, "loss": 0.95, "step": 2748 }, { "epoch": 1.4460810099947397, "grad_norm": 2.1034419536590576, "learning_rate": 4.352771383989982e-06, "loss": 0.9972, "step": 2749 }, { "epoch": 1.4466070489216203, "grad_norm": 2.0965964794158936, "learning_rate": 4.3523049187680176e-06, "loss": 0.9163, "step": 2750 }, { "epoch": 1.447133087848501, "grad_norm": 2.135223627090454, "learning_rate": 4.351838310526882e-06, "loss": 0.921, "step": 2751 }, { "epoch": 1.4476591267753813, "grad_norm": 2.009145498275757, "learning_rate": 4.351371559302601e-06, "loss": 0.9618, "step": 2752 }, { "epoch": 1.448185165702262, "grad_norm": 2.0639655590057373, "learning_rate": 4.350904665131214e-06, "loss": 0.9415, "step": 2753 }, { "epoch": 1.4487112046291426, "grad_norm": 2.023287773132324, "learning_rate": 4.35043762804877e-06, "loss": 0.934, "step": 2754 }, { "epoch": 1.449237243556023, "grad_norm": 2.6245598793029785, "learning_rate": 4.349970448091331e-06, "loss": 0.98, "step": 2755 }, { "epoch": 1.4497632824829036, "grad_norm": 1.9980888366699219, "learning_rate": 4.349503125294966e-06, "loss": 0.926, "step": 2756 }, { "epoch": 1.4502893214097843, "grad_norm": 2.0972256660461426, "learning_rate": 4.3490356596957606e-06, "loss": 0.9526, "step": 2757 }, { "epoch": 1.450815360336665, "grad_norm": 1.9339475631713867, "learning_rate": 4.348568051329807e-06, "loss": 0.946, "step": 2758 }, { "epoch": 1.4513413992635456, "grad_norm": 2.0820112228393555, "learning_rate": 4.348100300233209e-06, "loss": 0.9795, "step": 2759 }, { "epoch": 1.4518674381904262, "grad_norm": 1.9987796545028687, "learning_rate": 4.347632406442085e-06, "loss": 0.9559, "step": 2760 }, { "epoch": 1.4523934771173068, "grad_norm": 2.133943557739258, "learning_rate": 4.3471643699925594e-06, "loss": 0.9414, "step": 2761 }, { "epoch": 1.4529195160441872, "grad_norm": 1.9641112089157104, "learning_rate": 4.346696190920771e-06, "loss": 0.8977, "step": 2762 }, { "epoch": 1.4534455549710679, "grad_norm": 1.9633978605270386, "learning_rate": 4.3462278692628675e-06, "loss": 0.951, "step": 2763 }, { "epoch": 1.4539715938979485, "grad_norm": 2.073227643966675, "learning_rate": 4.34575940505501e-06, "loss": 0.9436, "step": 2764 }, { "epoch": 1.454497632824829, "grad_norm": 2.080355167388916, "learning_rate": 4.345290798333369e-06, "loss": 0.9302, "step": 2765 }, { "epoch": 1.4550236717517095, "grad_norm": 2.071218967437744, "learning_rate": 4.344822049134126e-06, "loss": 0.979, "step": 2766 }, { "epoch": 1.4555497106785902, "grad_norm": 1.9839423894882202, "learning_rate": 4.344353157493475e-06, "loss": 0.9477, "step": 2767 }, { "epoch": 1.4560757496054708, "grad_norm": 1.9391570091247559, "learning_rate": 4.343884123447618e-06, "loss": 0.9042, "step": 2768 }, { "epoch": 1.4566017885323514, "grad_norm": 2.070699453353882, "learning_rate": 4.343414947032771e-06, "loss": 0.8974, "step": 2769 }, { "epoch": 1.457127827459232, "grad_norm": 1.9143608808517456, "learning_rate": 4.342945628285159e-06, "loss": 1.0173, "step": 2770 }, { "epoch": 1.4576538663861125, "grad_norm": 2.040562868118286, "learning_rate": 4.342476167241019e-06, "loss": 0.9201, "step": 2771 }, { "epoch": 1.4581799053129931, "grad_norm": 2.0661396980285645, "learning_rate": 4.342006563936599e-06, "loss": 0.9206, "step": 2772 }, { "epoch": 1.4587059442398738, "grad_norm": 2.0306687355041504, "learning_rate": 4.341536818408158e-06, "loss": 0.9346, "step": 2773 }, { "epoch": 1.4592319831667544, "grad_norm": 2.020789384841919, "learning_rate": 4.3410669306919666e-06, "loss": 0.9565, "step": 2774 }, { "epoch": 1.4597580220936348, "grad_norm": 2.044773817062378, "learning_rate": 4.340596900824303e-06, "loss": 0.9584, "step": 2775 }, { "epoch": 1.4602840610205154, "grad_norm": 2.112931728363037, "learning_rate": 4.340126728841461e-06, "loss": 0.9221, "step": 2776 }, { "epoch": 1.460810099947396, "grad_norm": 1.990900993347168, "learning_rate": 4.339656414779742e-06, "loss": 0.9116, "step": 2777 }, { "epoch": 1.4613361388742767, "grad_norm": 2.0449421405792236, "learning_rate": 4.33918595867546e-06, "loss": 0.9581, "step": 2778 }, { "epoch": 1.4618621778011573, "grad_norm": 1.8929235935211182, "learning_rate": 4.33871536056494e-06, "loss": 0.9274, "step": 2779 }, { "epoch": 1.462388216728038, "grad_norm": 2.001241445541382, "learning_rate": 4.338244620484517e-06, "loss": 0.92, "step": 2780 }, { "epoch": 1.4629142556549184, "grad_norm": 1.966840147972107, "learning_rate": 4.337773738470539e-06, "loss": 0.9073, "step": 2781 }, { "epoch": 1.463440294581799, "grad_norm": 2.086106777191162, "learning_rate": 4.337302714559361e-06, "loss": 0.8889, "step": 2782 }, { "epoch": 1.4639663335086797, "grad_norm": 1.8999865055084229, "learning_rate": 4.336831548787354e-06, "loss": 0.9005, "step": 2783 }, { "epoch": 1.4644923724355603, "grad_norm": 2.0190553665161133, "learning_rate": 4.336360241190896e-06, "loss": 0.9016, "step": 2784 }, { "epoch": 1.4650184113624407, "grad_norm": 2.081059694290161, "learning_rate": 4.335888791806377e-06, "loss": 0.9042, "step": 2785 }, { "epoch": 1.4655444502893213, "grad_norm": 2.0061800479888916, "learning_rate": 4.3354172006701985e-06, "loss": 0.9404, "step": 2786 }, { "epoch": 1.466070489216202, "grad_norm": 2.066344976425171, "learning_rate": 4.334945467818774e-06, "loss": 1.0027, "step": 2787 }, { "epoch": 1.4665965281430826, "grad_norm": 2.0009186267852783, "learning_rate": 4.3344735932885265e-06, "loss": 0.953, "step": 2788 }, { "epoch": 1.4671225670699632, "grad_norm": 2.0518672466278076, "learning_rate": 4.334001577115888e-06, "loss": 0.9466, "step": 2789 }, { "epoch": 1.4676486059968439, "grad_norm": 2.108964204788208, "learning_rate": 4.333529419337306e-06, "loss": 0.9264, "step": 2790 }, { "epoch": 1.4681746449237243, "grad_norm": 2.1517839431762695, "learning_rate": 4.333057119989235e-06, "loss": 0.9676, "step": 2791 }, { "epoch": 1.468700683850605, "grad_norm": 2.0158376693725586, "learning_rate": 4.332584679108141e-06, "loss": 0.9285, "step": 2792 }, { "epoch": 1.4692267227774856, "grad_norm": 1.9635580778121948, "learning_rate": 4.332112096730505e-06, "loss": 0.9108, "step": 2793 }, { "epoch": 1.4697527617043662, "grad_norm": 4.2303948402404785, "learning_rate": 4.3316393728928145e-06, "loss": 0.9461, "step": 2794 }, { "epoch": 1.4702788006312466, "grad_norm": 2.1729414463043213, "learning_rate": 4.331166507631567e-06, "loss": 0.9954, "step": 2795 }, { "epoch": 1.4708048395581272, "grad_norm": 2.0636355876922607, "learning_rate": 4.330693500983275e-06, "loss": 0.9424, "step": 2796 }, { "epoch": 1.4713308784850079, "grad_norm": 2.0802760124206543, "learning_rate": 4.330220352984461e-06, "loss": 0.9575, "step": 2797 }, { "epoch": 1.4718569174118885, "grad_norm": 1.989311695098877, "learning_rate": 4.329747063671656e-06, "loss": 0.9357, "step": 2798 }, { "epoch": 1.4723829563387691, "grad_norm": 1.855409860610962, "learning_rate": 4.329273633081403e-06, "loss": 0.8957, "step": 2799 }, { "epoch": 1.4729089952656498, "grad_norm": 2.1010360717773438, "learning_rate": 4.328800061250258e-06, "loss": 0.9646, "step": 2800 }, { "epoch": 1.4734350341925302, "grad_norm": 2.0032832622528076, "learning_rate": 4.328326348214784e-06, "loss": 0.8992, "step": 2801 }, { "epoch": 1.4739610731194108, "grad_norm": 2.0246925354003906, "learning_rate": 4.327852494011559e-06, "loss": 0.964, "step": 2802 }, { "epoch": 1.4744871120462915, "grad_norm": 2.136380434036255, "learning_rate": 4.327378498677169e-06, "loss": 0.9054, "step": 2803 }, { "epoch": 1.475013150973172, "grad_norm": 2.098071813583374, "learning_rate": 4.326904362248212e-06, "loss": 1.0004, "step": 2804 }, { "epoch": 1.4755391899000525, "grad_norm": 1.933727741241455, "learning_rate": 4.326430084761296e-06, "loss": 0.9402, "step": 2805 }, { "epoch": 1.4760652288269331, "grad_norm": 1.9036450386047363, "learning_rate": 4.325955666253043e-06, "loss": 0.9109, "step": 2806 }, { "epoch": 1.4765912677538138, "grad_norm": 2.2040369510650635, "learning_rate": 4.325481106760081e-06, "loss": 0.9325, "step": 2807 }, { "epoch": 1.4771173066806944, "grad_norm": 2.009504795074463, "learning_rate": 4.325006406319053e-06, "loss": 0.9026, "step": 2808 }, { "epoch": 1.477643345607575, "grad_norm": 2.3199565410614014, "learning_rate": 4.324531564966611e-06, "loss": 0.9873, "step": 2809 }, { "epoch": 1.4781693845344557, "grad_norm": 2.0439627170562744, "learning_rate": 4.3240565827394175e-06, "loss": 0.9246, "step": 2810 }, { "epoch": 1.478695423461336, "grad_norm": 2.013406753540039, "learning_rate": 4.323581459674147e-06, "loss": 0.9511, "step": 2811 }, { "epoch": 1.4792214623882167, "grad_norm": 1.95964515209198, "learning_rate": 4.323106195807484e-06, "loss": 0.9417, "step": 2812 }, { "epoch": 1.4797475013150974, "grad_norm": 2.0100247859954834, "learning_rate": 4.322630791176125e-06, "loss": 0.9274, "step": 2813 }, { "epoch": 1.4802735402419778, "grad_norm": 2.192779302597046, "learning_rate": 4.322155245816777e-06, "loss": 0.9878, "step": 2814 }, { "epoch": 1.4807995791688584, "grad_norm": 2.030768871307373, "learning_rate": 4.321679559766156e-06, "loss": 0.9025, "step": 2815 }, { "epoch": 1.481325618095739, "grad_norm": 1.9836081266403198, "learning_rate": 4.321203733060993e-06, "loss": 0.949, "step": 2816 }, { "epoch": 1.4818516570226197, "grad_norm": 2.0333588123321533, "learning_rate": 4.3207277657380255e-06, "loss": 0.9459, "step": 2817 }, { "epoch": 1.4823776959495003, "grad_norm": 2.035921812057495, "learning_rate": 4.3202516578340024e-06, "loss": 0.9468, "step": 2818 }, { "epoch": 1.482903734876381, "grad_norm": 1.989184856414795, "learning_rate": 4.319775409385688e-06, "loss": 0.9563, "step": 2819 }, { "epoch": 1.4834297738032616, "grad_norm": 2.0952956676483154, "learning_rate": 4.319299020429851e-06, "loss": 0.9089, "step": 2820 }, { "epoch": 1.483955812730142, "grad_norm": 2.2449991703033447, "learning_rate": 4.318822491003276e-06, "loss": 1.0142, "step": 2821 }, { "epoch": 1.4844818516570226, "grad_norm": 2.065350294113159, "learning_rate": 4.3183458211427554e-06, "loss": 0.9017, "step": 2822 }, { "epoch": 1.4850078905839033, "grad_norm": 2.016639471054077, "learning_rate": 4.317869010885094e-06, "loss": 0.951, "step": 2823 }, { "epoch": 1.4855339295107837, "grad_norm": 2.2200684547424316, "learning_rate": 4.317392060267108e-06, "loss": 0.9102, "step": 2824 }, { "epoch": 1.4860599684376643, "grad_norm": 2.0607798099517822, "learning_rate": 4.316914969325622e-06, "loss": 0.9037, "step": 2825 }, { "epoch": 1.486586007364545, "grad_norm": 2.1300361156463623, "learning_rate": 4.316437738097473e-06, "loss": 0.988, "step": 2826 }, { "epoch": 1.4871120462914256, "grad_norm": 2.0302493572235107, "learning_rate": 4.315960366619511e-06, "loss": 0.9148, "step": 2827 }, { "epoch": 1.4876380852183062, "grad_norm": 2.10941481590271, "learning_rate": 4.315482854928591e-06, "loss": 0.9566, "step": 2828 }, { "epoch": 1.4881641241451868, "grad_norm": 2.1145081520080566, "learning_rate": 4.315005203061584e-06, "loss": 0.9662, "step": 2829 }, { "epoch": 1.4886901630720673, "grad_norm": 2.1365935802459717, "learning_rate": 4.314527411055371e-06, "loss": 0.9707, "step": 2830 }, { "epoch": 1.4892162019989479, "grad_norm": 2.082724094390869, "learning_rate": 4.314049478946842e-06, "loss": 0.99, "step": 2831 }, { "epoch": 1.4897422409258285, "grad_norm": 2.1307926177978516, "learning_rate": 4.313571406772899e-06, "loss": 0.9417, "step": 2832 }, { "epoch": 1.4902682798527092, "grad_norm": 1.963191032409668, "learning_rate": 4.3130931945704554e-06, "loss": 0.9214, "step": 2833 }, { "epoch": 1.4907943187795896, "grad_norm": 2.2791664600372314, "learning_rate": 4.312614842376434e-06, "loss": 0.9229, "step": 2834 }, { "epoch": 1.4913203577064702, "grad_norm": 2.023608922958374, "learning_rate": 4.312136350227769e-06, "loss": 0.9386, "step": 2835 }, { "epoch": 1.4918463966333508, "grad_norm": 2.239856481552124, "learning_rate": 4.311657718161405e-06, "loss": 0.9091, "step": 2836 }, { "epoch": 1.4923724355602315, "grad_norm": 1.9821600914001465, "learning_rate": 4.311178946214299e-06, "loss": 0.9533, "step": 2837 }, { "epoch": 1.492898474487112, "grad_norm": 1.967998743057251, "learning_rate": 4.310700034423417e-06, "loss": 0.9011, "step": 2838 }, { "epoch": 1.4934245134139927, "grad_norm": 1.9766757488250732, "learning_rate": 4.310220982825738e-06, "loss": 0.9295, "step": 2839 }, { "epoch": 1.4939505523408732, "grad_norm": 1.9775986671447754, "learning_rate": 4.3097417914582475e-06, "loss": 0.914, "step": 2840 }, { "epoch": 1.4944765912677538, "grad_norm": 2.0174460411071777, "learning_rate": 4.309262460357946e-06, "loss": 0.9597, "step": 2841 }, { "epoch": 1.4950026301946344, "grad_norm": 2.358562707901001, "learning_rate": 4.308782989561844e-06, "loss": 0.9703, "step": 2842 }, { "epoch": 1.495528669121515, "grad_norm": 2.00032639503479, "learning_rate": 4.308303379106962e-06, "loss": 0.957, "step": 2843 }, { "epoch": 1.4960547080483955, "grad_norm": 2.1930489540100098, "learning_rate": 4.30782362903033e-06, "loss": 0.9834, "step": 2844 }, { "epoch": 1.496580746975276, "grad_norm": 2.0691113471984863, "learning_rate": 4.307343739368991e-06, "loss": 0.9799, "step": 2845 }, { "epoch": 1.4971067859021567, "grad_norm": 2.1371543407440186, "learning_rate": 4.306863710159999e-06, "loss": 0.9436, "step": 2846 }, { "epoch": 1.4976328248290374, "grad_norm": 2.1186442375183105, "learning_rate": 4.306383541440415e-06, "loss": 0.9843, "step": 2847 }, { "epoch": 1.498158863755918, "grad_norm": 2.091587543487549, "learning_rate": 4.3059032332473174e-06, "loss": 0.9357, "step": 2848 }, { "epoch": 1.4986849026827986, "grad_norm": 2.022662401199341, "learning_rate": 4.305422785617789e-06, "loss": 0.9234, "step": 2849 }, { "epoch": 1.499210941609679, "grad_norm": 2.015878915786743, "learning_rate": 4.304942198588926e-06, "loss": 0.9361, "step": 2850 }, { "epoch": 1.4997369805365597, "grad_norm": 1.9839386940002441, "learning_rate": 4.304461472197836e-06, "loss": 0.9103, "step": 2851 }, { "epoch": 1.5002630194634403, "grad_norm": 1.995928168296814, "learning_rate": 4.303980606481636e-06, "loss": 0.9094, "step": 2852 }, { "epoch": 1.5007890583903207, "grad_norm": 2.102646827697754, "learning_rate": 4.303499601477455e-06, "loss": 0.9397, "step": 2853 }, { "epoch": 1.5013150973172014, "grad_norm": 2.030672550201416, "learning_rate": 4.30301845722243e-06, "loss": 0.9101, "step": 2854 }, { "epoch": 1.501841136244082, "grad_norm": 2.064537286758423, "learning_rate": 4.302537173753714e-06, "loss": 0.9662, "step": 2855 }, { "epoch": 1.5023671751709626, "grad_norm": 2.000596284866333, "learning_rate": 4.302055751108465e-06, "loss": 0.9134, "step": 2856 }, { "epoch": 1.5028932140978433, "grad_norm": 2.0901315212249756, "learning_rate": 4.301574189323857e-06, "loss": 0.9528, "step": 2857 }, { "epoch": 1.503419253024724, "grad_norm": 1.9879727363586426, "learning_rate": 4.30109248843707e-06, "loss": 0.8697, "step": 2858 }, { "epoch": 1.5039452919516045, "grad_norm": 1.9458873271942139, "learning_rate": 4.300610648485296e-06, "loss": 0.9542, "step": 2859 }, { "epoch": 1.5044713308784852, "grad_norm": 1.984338402748108, "learning_rate": 4.300128669505741e-06, "loss": 0.9782, "step": 2860 }, { "epoch": 1.5049973698053656, "grad_norm": 1.9819425344467163, "learning_rate": 4.299646551535618e-06, "loss": 0.9478, "step": 2861 }, { "epoch": 1.5055234087322462, "grad_norm": 1.972513198852539, "learning_rate": 4.299164294612153e-06, "loss": 0.9438, "step": 2862 }, { "epoch": 1.5060494476591266, "grad_norm": 1.9727842807769775, "learning_rate": 4.29868189877258e-06, "loss": 0.9008, "step": 2863 }, { "epoch": 1.5065754865860073, "grad_norm": 1.9885272979736328, "learning_rate": 4.298199364054147e-06, "loss": 0.9472, "step": 2864 }, { "epoch": 1.507101525512888, "grad_norm": 3.059291362762451, "learning_rate": 4.2977166904941105e-06, "loss": 0.9379, "step": 2865 }, { "epoch": 1.5076275644397685, "grad_norm": 2.0359585285186768, "learning_rate": 4.297233878129739e-06, "loss": 0.9033, "step": 2866 }, { "epoch": 1.5081536033666492, "grad_norm": 2.0338592529296875, "learning_rate": 4.296750926998311e-06, "loss": 0.9086, "step": 2867 }, { "epoch": 1.5086796422935298, "grad_norm": 2.0874390602111816, "learning_rate": 4.296267837137115e-06, "loss": 0.9548, "step": 2868 }, { "epoch": 1.5092056812204104, "grad_norm": 1.9003558158874512, "learning_rate": 4.295784608583451e-06, "loss": 0.9116, "step": 2869 }, { "epoch": 1.5097317201472908, "grad_norm": 2.1565816402435303, "learning_rate": 4.295301241374632e-06, "loss": 0.9157, "step": 2870 }, { "epoch": 1.5102577590741715, "grad_norm": 1.9842712879180908, "learning_rate": 4.294817735547977e-06, "loss": 0.9291, "step": 2871 }, { "epoch": 1.5107837980010521, "grad_norm": 1.9991481304168701, "learning_rate": 4.29433409114082e-06, "loss": 0.9047, "step": 2872 }, { "epoch": 1.5113098369279325, "grad_norm": 1.9973630905151367, "learning_rate": 4.293850308190502e-06, "loss": 0.9625, "step": 2873 }, { "epoch": 1.5118358758548132, "grad_norm": 1.8854007720947266, "learning_rate": 4.2933663867343785e-06, "loss": 0.9208, "step": 2874 }, { "epoch": 1.5123619147816938, "grad_norm": 2.1708757877349854, "learning_rate": 4.292882326809814e-06, "loss": 1.0043, "step": 2875 }, { "epoch": 1.5128879537085744, "grad_norm": 2.030937671661377, "learning_rate": 4.2923981284541805e-06, "loss": 0.9343, "step": 2876 }, { "epoch": 1.513413992635455, "grad_norm": 1.9324885606765747, "learning_rate": 4.291913791704867e-06, "loss": 0.9047, "step": 2877 }, { "epoch": 1.5139400315623357, "grad_norm": 2.183217763900757, "learning_rate": 4.2914293165992675e-06, "loss": 0.9516, "step": 2878 }, { "epoch": 1.5144660704892163, "grad_norm": 2.1086974143981934, "learning_rate": 4.290944703174791e-06, "loss": 0.9303, "step": 2879 }, { "epoch": 1.5149921094160967, "grad_norm": 2.1121649742126465, "learning_rate": 4.290459951468853e-06, "loss": 0.9018, "step": 2880 }, { "epoch": 1.5155181483429774, "grad_norm": 1.879559874534607, "learning_rate": 4.289975061518884e-06, "loss": 0.9525, "step": 2881 }, { "epoch": 1.516044187269858, "grad_norm": 2.0514028072357178, "learning_rate": 4.289490033362322e-06, "loss": 0.9741, "step": 2882 }, { "epoch": 1.5165702261967384, "grad_norm": 2.1021642684936523, "learning_rate": 4.2890048670366154e-06, "loss": 0.9898, "step": 2883 }, { "epoch": 1.517096265123619, "grad_norm": 1.877241849899292, "learning_rate": 4.2885195625792275e-06, "loss": 0.8746, "step": 2884 }, { "epoch": 1.5176223040504997, "grad_norm": 1.9616249799728394, "learning_rate": 4.288034120027628e-06, "loss": 0.8823, "step": 2885 }, { "epoch": 1.5181483429773803, "grad_norm": 2.1360700130462646, "learning_rate": 4.287548539419298e-06, "loss": 0.9771, "step": 2886 }, { "epoch": 1.518674381904261, "grad_norm": 1.9651004076004028, "learning_rate": 4.287062820791731e-06, "loss": 0.9203, "step": 2887 }, { "epoch": 1.5192004208311416, "grad_norm": 1.9593781232833862, "learning_rate": 4.286576964182429e-06, "loss": 0.9295, "step": 2888 }, { "epoch": 1.5197264597580222, "grad_norm": 2.0677568912506104, "learning_rate": 4.286090969628907e-06, "loss": 0.937, "step": 2889 }, { "epoch": 1.5202524986849026, "grad_norm": 2.019524335861206, "learning_rate": 4.285604837168689e-06, "loss": 0.9655, "step": 2890 }, { "epoch": 1.5207785376117833, "grad_norm": 2.0417678356170654, "learning_rate": 4.285118566839308e-06, "loss": 0.9564, "step": 2891 }, { "epoch": 1.521304576538664, "grad_norm": 2.0766942501068115, "learning_rate": 4.284632158678312e-06, "loss": 0.9795, "step": 2892 }, { "epoch": 1.5218306154655443, "grad_norm": 2.1768674850463867, "learning_rate": 4.284145612723256e-06, "loss": 0.9203, "step": 2893 }, { "epoch": 1.522356654392425, "grad_norm": 2.027491569519043, "learning_rate": 4.283658929011708e-06, "loss": 0.9526, "step": 2894 }, { "epoch": 1.5228826933193056, "grad_norm": 2.1974449157714844, "learning_rate": 4.283172107581245e-06, "loss": 0.9475, "step": 2895 }, { "epoch": 1.5234087322461862, "grad_norm": 2.0966193675994873, "learning_rate": 4.282685148469454e-06, "loss": 0.9261, "step": 2896 }, { "epoch": 1.5239347711730669, "grad_norm": 2.052272319793701, "learning_rate": 4.282198051713936e-06, "loss": 0.9781, "step": 2897 }, { "epoch": 1.5244608100999475, "grad_norm": 2.1015512943267822, "learning_rate": 4.281710817352299e-06, "loss": 0.8729, "step": 2898 }, { "epoch": 1.5249868490268281, "grad_norm": 2.110563039779663, "learning_rate": 4.281223445422165e-06, "loss": 0.9974, "step": 2899 }, { "epoch": 1.5255128879537085, "grad_norm": 2.1721880435943604, "learning_rate": 4.280735935961161e-06, "loss": 0.9293, "step": 2900 }, { "epoch": 1.5260389268805892, "grad_norm": 2.011016845703125, "learning_rate": 4.2802482890069315e-06, "loss": 0.9336, "step": 2901 }, { "epoch": 1.5265649658074696, "grad_norm": 2.1780524253845215, "learning_rate": 4.279760504597128e-06, "loss": 0.9383, "step": 2902 }, { "epoch": 1.5270910047343502, "grad_norm": 2.0925979614257812, "learning_rate": 4.279272582769412e-06, "loss": 0.9651, "step": 2903 }, { "epoch": 1.5276170436612309, "grad_norm": 2.1651055812835693, "learning_rate": 4.278784523561458e-06, "loss": 0.99, "step": 2904 }, { "epoch": 1.5281430825881115, "grad_norm": 1.9780375957489014, "learning_rate": 4.278296327010948e-06, "loss": 0.9006, "step": 2905 }, { "epoch": 1.5286691215149921, "grad_norm": 2.053251028060913, "learning_rate": 4.2778079931555775e-06, "loss": 0.9746, "step": 2906 }, { "epoch": 1.5291951604418728, "grad_norm": 2.0084316730499268, "learning_rate": 4.277319522033051e-06, "loss": 0.9276, "step": 2907 }, { "epoch": 1.5297211993687534, "grad_norm": 2.1033427715301514, "learning_rate": 4.276830913681085e-06, "loss": 0.9612, "step": 2908 }, { "epoch": 1.530247238295634, "grad_norm": 3.971057176589966, "learning_rate": 4.2763421681374045e-06, "loss": 0.9064, "step": 2909 }, { "epoch": 1.5307732772225144, "grad_norm": 2.1207163333892822, "learning_rate": 4.275853285439747e-06, "loss": 0.9463, "step": 2910 }, { "epoch": 1.531299316149395, "grad_norm": 2.7853474617004395, "learning_rate": 4.27536426562586e-06, "loss": 0.9335, "step": 2911 }, { "epoch": 1.5318253550762755, "grad_norm": 2.0105173587799072, "learning_rate": 4.2748751087335e-06, "loss": 0.9734, "step": 2912 }, { "epoch": 1.5323513940031561, "grad_norm": 2.0165185928344727, "learning_rate": 4.274385814800438e-06, "loss": 0.9229, "step": 2913 }, { "epoch": 1.5328774329300368, "grad_norm": 1.9177309274673462, "learning_rate": 4.273896383864451e-06, "loss": 0.9276, "step": 2914 }, { "epoch": 1.5334034718569174, "grad_norm": 2.082285165786743, "learning_rate": 4.273406815963329e-06, "loss": 0.9122, "step": 2915 }, { "epoch": 1.533929510783798, "grad_norm": 1.9629693031311035, "learning_rate": 4.272917111134873e-06, "loss": 0.9476, "step": 2916 }, { "epoch": 1.5344555497106787, "grad_norm": 2.0855002403259277, "learning_rate": 4.272427269416893e-06, "loss": 0.8956, "step": 2917 }, { "epoch": 1.5349815886375593, "grad_norm": 2.0533595085144043, "learning_rate": 4.271937290847212e-06, "loss": 0.9518, "step": 2918 }, { "epoch": 1.53550762756444, "grad_norm": 2.0000126361846924, "learning_rate": 4.27144717546366e-06, "loss": 0.8994, "step": 2919 }, { "epoch": 1.5360336664913203, "grad_norm": 2.0330493450164795, "learning_rate": 4.2709569233040806e-06, "loss": 0.9349, "step": 2920 }, { "epoch": 1.536559705418201, "grad_norm": 2.2399914264678955, "learning_rate": 4.270466534406326e-06, "loss": 0.9755, "step": 2921 }, { "epoch": 1.5370857443450814, "grad_norm": 2.0268807411193848, "learning_rate": 4.269976008808261e-06, "loss": 0.9097, "step": 2922 }, { "epoch": 1.537611783271962, "grad_norm": 1.9109435081481934, "learning_rate": 4.26948534654776e-06, "loss": 0.8683, "step": 2923 }, { "epoch": 1.5381378221988427, "grad_norm": 2.0855066776275635, "learning_rate": 4.268994547662705e-06, "loss": 0.958, "step": 2924 }, { "epoch": 1.5386638611257233, "grad_norm": 2.0536108016967773, "learning_rate": 4.268503612190995e-06, "loss": 0.9176, "step": 2925 }, { "epoch": 1.539189900052604, "grad_norm": 2.137848138809204, "learning_rate": 4.268012540170533e-06, "loss": 0.9535, "step": 2926 }, { "epoch": 1.5397159389794846, "grad_norm": 2.0253493785858154, "learning_rate": 4.267521331639237e-06, "loss": 0.8924, "step": 2927 }, { "epoch": 1.5402419779063652, "grad_norm": 1.972090244293213, "learning_rate": 4.267029986635034e-06, "loss": 0.9428, "step": 2928 }, { "epoch": 1.5407680168332458, "grad_norm": 2.040821075439453, "learning_rate": 4.266538505195861e-06, "loss": 0.9717, "step": 2929 }, { "epoch": 1.5412940557601262, "grad_norm": 1.9857984781265259, "learning_rate": 4.266046887359665e-06, "loss": 0.9474, "step": 2930 }, { "epoch": 1.5418200946870069, "grad_norm": 2.050723075866699, "learning_rate": 4.265555133164406e-06, "loss": 0.9506, "step": 2931 }, { "epoch": 1.5423461336138873, "grad_norm": 1.996460199356079, "learning_rate": 4.265063242648052e-06, "loss": 0.9111, "step": 2932 }, { "epoch": 1.542872172540768, "grad_norm": 2.0638771057128906, "learning_rate": 4.264571215848584e-06, "loss": 0.939, "step": 2933 }, { "epoch": 1.5433982114676486, "grad_norm": 1.990309476852417, "learning_rate": 4.264079052803991e-06, "loss": 0.8726, "step": 2934 }, { "epoch": 1.5439242503945292, "grad_norm": 1.9167861938476562, "learning_rate": 4.263586753552274e-06, "loss": 0.9005, "step": 2935 }, { "epoch": 1.5444502893214098, "grad_norm": 2.025639533996582, "learning_rate": 4.263094318131443e-06, "loss": 0.9234, "step": 2936 }, { "epoch": 1.5449763282482905, "grad_norm": 2.042449474334717, "learning_rate": 4.262601746579521e-06, "loss": 0.9545, "step": 2937 }, { "epoch": 1.545502367175171, "grad_norm": 1.97848641872406, "learning_rate": 4.26210903893454e-06, "loss": 0.8961, "step": 2938 }, { "epoch": 1.5460284061020515, "grad_norm": 1.9675039052963257, "learning_rate": 4.261616195234544e-06, "loss": 1.0115, "step": 2939 }, { "epoch": 1.5465544450289321, "grad_norm": 2.0312752723693848, "learning_rate": 4.261123215517583e-06, "loss": 0.9335, "step": 2940 }, { "epoch": 1.5470804839558128, "grad_norm": 2.0146288871765137, "learning_rate": 4.260630099821722e-06, "loss": 0.9767, "step": 2941 }, { "epoch": 1.5476065228826932, "grad_norm": 1.9374853372573853, "learning_rate": 4.260136848185036e-06, "loss": 0.9363, "step": 2942 }, { "epoch": 1.5481325618095738, "grad_norm": 2.024721384048462, "learning_rate": 4.2596434606456106e-06, "loss": 0.9664, "step": 2943 }, { "epoch": 1.5486586007364544, "grad_norm": 1.9361040592193604, "learning_rate": 4.259149937241538e-06, "loss": 0.9168, "step": 2944 }, { "epoch": 1.549184639663335, "grad_norm": 1.9720629453659058, "learning_rate": 4.258656278010926e-06, "loss": 0.9508, "step": 2945 }, { "epoch": 1.5497106785902157, "grad_norm": 2.2427873611450195, "learning_rate": 4.25816248299189e-06, "loss": 0.929, "step": 2946 }, { "epoch": 1.5502367175170964, "grad_norm": 2.058295965194702, "learning_rate": 4.257668552222558e-06, "loss": 0.9301, "step": 2947 }, { "epoch": 1.550762756443977, "grad_norm": 2.1682353019714355, "learning_rate": 4.257174485741064e-06, "loss": 0.9307, "step": 2948 }, { "epoch": 1.5512887953708574, "grad_norm": 1.8824107646942139, "learning_rate": 4.256680283585559e-06, "loss": 0.9651, "step": 2949 }, { "epoch": 1.551814834297738, "grad_norm": 1.9697431325912476, "learning_rate": 4.2561859457942e-06, "loss": 0.956, "step": 2950 }, { "epoch": 1.5523408732246187, "grad_norm": 2.080841302871704, "learning_rate": 4.255691472405155e-06, "loss": 0.9525, "step": 2951 }, { "epoch": 1.552866912151499, "grad_norm": 2.130826950073242, "learning_rate": 4.255196863456602e-06, "loss": 0.9365, "step": 2952 }, { "epoch": 1.5533929510783797, "grad_norm": 2.101513385772705, "learning_rate": 4.254702118986732e-06, "loss": 0.8871, "step": 2953 }, { "epoch": 1.5539189900052603, "grad_norm": 2.055342197418213, "learning_rate": 4.254207239033746e-06, "loss": 0.9333, "step": 2954 }, { "epoch": 1.554445028932141, "grad_norm": 1.9468308687210083, "learning_rate": 4.253712223635852e-06, "loss": 0.9198, "step": 2955 }, { "epoch": 1.5549710678590216, "grad_norm": 2.011702299118042, "learning_rate": 4.253217072831272e-06, "loss": 0.936, "step": 2956 }, { "epoch": 1.5554971067859023, "grad_norm": 1.9965795278549194, "learning_rate": 4.252721786658237e-06, "loss": 0.8836, "step": 2957 }, { "epoch": 1.5560231457127829, "grad_norm": 2.023535966873169, "learning_rate": 4.252226365154989e-06, "loss": 0.9256, "step": 2958 }, { "epoch": 1.5565491846396633, "grad_norm": 1.9498130083084106, "learning_rate": 4.251730808359781e-06, "loss": 0.9481, "step": 2959 }, { "epoch": 1.557075223566544, "grad_norm": 1.9864977598190308, "learning_rate": 4.251235116310874e-06, "loss": 0.8909, "step": 2960 }, { "epoch": 1.5576012624934246, "grad_norm": 2.0688984394073486, "learning_rate": 4.250739289046542e-06, "loss": 0.9526, "step": 2961 }, { "epoch": 1.558127301420305, "grad_norm": 1.9319058656692505, "learning_rate": 4.250243326605069e-06, "loss": 0.9905, "step": 2962 }, { "epoch": 1.5586533403471856, "grad_norm": 2.342468738555908, "learning_rate": 4.249747229024748e-06, "loss": 0.9612, "step": 2963 }, { "epoch": 1.5591793792740662, "grad_norm": 2.1509134769439697, "learning_rate": 4.249250996343884e-06, "loss": 0.9446, "step": 2964 }, { "epoch": 1.5597054182009469, "grad_norm": 2.132373571395874, "learning_rate": 4.248754628600793e-06, "loss": 0.9379, "step": 2965 }, { "epoch": 1.5602314571278275, "grad_norm": 2.0995333194732666, "learning_rate": 4.248258125833797e-06, "loss": 0.9239, "step": 2966 }, { "epoch": 1.5607574960547081, "grad_norm": 2.122519016265869, "learning_rate": 4.247761488081236e-06, "loss": 0.917, "step": 2967 }, { "epoch": 1.5612835349815888, "grad_norm": 1.9754114151000977, "learning_rate": 4.247264715381453e-06, "loss": 0.9197, "step": 2968 }, { "epoch": 1.5618095739084692, "grad_norm": 2.0227582454681396, "learning_rate": 4.2467678077728044e-06, "loss": 0.9128, "step": 2969 }, { "epoch": 1.5623356128353498, "grad_norm": 1.9663293361663818, "learning_rate": 4.246270765293659e-06, "loss": 0.9485, "step": 2970 }, { "epoch": 1.5628616517622302, "grad_norm": 1.953763484954834, "learning_rate": 4.245773587982394e-06, "loss": 0.9392, "step": 2971 }, { "epoch": 1.5633876906891109, "grad_norm": 2.063044309616089, "learning_rate": 4.245276275877396e-06, "loss": 0.9448, "step": 2972 }, { "epoch": 1.5639137296159915, "grad_norm": 2.148085117340088, "learning_rate": 4.244778829017063e-06, "loss": 0.9352, "step": 2973 }, { "epoch": 1.5644397685428721, "grad_norm": 2.1457817554473877, "learning_rate": 4.244281247439805e-06, "loss": 0.9651, "step": 2974 }, { "epoch": 1.5649658074697528, "grad_norm": 2.127017021179199, "learning_rate": 4.243783531184041e-06, "loss": 0.9157, "step": 2975 }, { "epoch": 1.5654918463966334, "grad_norm": 1.941909670829773, "learning_rate": 4.2432856802882e-06, "loss": 0.93, "step": 2976 }, { "epoch": 1.566017885323514, "grad_norm": 2.045715093612671, "learning_rate": 4.24278769479072e-06, "loss": 0.8973, "step": 2977 }, { "epoch": 1.5665439242503947, "grad_norm": 2.024904727935791, "learning_rate": 4.242289574730053e-06, "loss": 0.9165, "step": 2978 }, { "epoch": 1.567069963177275, "grad_norm": 1.9800280332565308, "learning_rate": 4.241791320144661e-06, "loss": 0.9598, "step": 2979 }, { "epoch": 1.5675960021041557, "grad_norm": 2.0844175815582275, "learning_rate": 4.241292931073012e-06, "loss": 0.9064, "step": 2980 }, { "epoch": 1.5681220410310361, "grad_norm": 1.9466551542282104, "learning_rate": 4.240794407553589e-06, "loss": 0.8825, "step": 2981 }, { "epoch": 1.5686480799579168, "grad_norm": 1.9948161840438843, "learning_rate": 4.240295749624883e-06, "loss": 0.889, "step": 2982 }, { "epoch": 1.5691741188847974, "grad_norm": 2.0572495460510254, "learning_rate": 4.2397969573253965e-06, "loss": 0.9171, "step": 2983 }, { "epoch": 1.569700157811678, "grad_norm": 1.970253348350525, "learning_rate": 4.239298030693643e-06, "loss": 0.9645, "step": 2984 }, { "epoch": 1.5702261967385587, "grad_norm": 2.019176959991455, "learning_rate": 4.238798969768143e-06, "loss": 0.9488, "step": 2985 }, { "epoch": 1.5707522356654393, "grad_norm": 2.239705801010132, "learning_rate": 4.238299774587432e-06, "loss": 0.8997, "step": 2986 }, { "epoch": 1.57127827459232, "grad_norm": 2.076761245727539, "learning_rate": 4.2378004451900515e-06, "loss": 0.9058, "step": 2987 }, { "epoch": 1.5718043135192006, "grad_norm": 2.033193588256836, "learning_rate": 4.237300981614557e-06, "loss": 0.8999, "step": 2988 }, { "epoch": 1.572330352446081, "grad_norm": 2.116682291030884, "learning_rate": 4.236801383899514e-06, "loss": 0.9616, "step": 2989 }, { "epoch": 1.5728563913729616, "grad_norm": 1.901473879814148, "learning_rate": 4.236301652083493e-06, "loss": 0.876, "step": 2990 }, { "epoch": 1.573382430299842, "grad_norm": 2.0186798572540283, "learning_rate": 4.235801786205083e-06, "loss": 0.9549, "step": 2991 }, { "epoch": 1.5739084692267227, "grad_norm": 1.9795666933059692, "learning_rate": 4.235301786302878e-06, "loss": 0.9298, "step": 2992 }, { "epoch": 1.5744345081536033, "grad_norm": 1.9244426488876343, "learning_rate": 4.234801652415484e-06, "loss": 0.9036, "step": 2993 }, { "epoch": 1.574960547080484, "grad_norm": 2.013563394546509, "learning_rate": 4.234301384581516e-06, "loss": 0.9765, "step": 2994 }, { "epoch": 1.5754865860073646, "grad_norm": 2.078366994857788, "learning_rate": 4.2338009828396015e-06, "loss": 0.9556, "step": 2995 }, { "epoch": 1.5760126249342452, "grad_norm": 2.0059587955474854, "learning_rate": 4.233300447228376e-06, "loss": 0.9333, "step": 2996 }, { "epoch": 1.5765386638611258, "grad_norm": 1.874704122543335, "learning_rate": 4.2327997777864895e-06, "loss": 0.8841, "step": 2997 }, { "epoch": 1.5770647027880065, "grad_norm": 2.0818729400634766, "learning_rate": 4.232298974552596e-06, "loss": 0.9115, "step": 2998 }, { "epoch": 1.577590741714887, "grad_norm": 2.0479767322540283, "learning_rate": 4.231798037565365e-06, "loss": 0.9048, "step": 2999 }, { "epoch": 1.5781167806417675, "grad_norm": 1.973334789276123, "learning_rate": 4.2312969668634745e-06, "loss": 0.9308, "step": 3000 }, { "epoch": 1.578642819568648, "grad_norm": 1.9209327697753906, "learning_rate": 4.230795762485612e-06, "loss": 0.9196, "step": 3001 }, { "epoch": 1.5791688584955286, "grad_norm": 2.012134552001953, "learning_rate": 4.230294424470478e-06, "loss": 0.9649, "step": 3002 }, { "epoch": 1.5796948974224092, "grad_norm": 2.057433605194092, "learning_rate": 4.229792952856779e-06, "loss": 0.9747, "step": 3003 }, { "epoch": 1.5802209363492898, "grad_norm": 1.8826886415481567, "learning_rate": 4.2292913476832375e-06, "loss": 0.962, "step": 3004 }, { "epoch": 1.5807469752761705, "grad_norm": 2.063971519470215, "learning_rate": 4.22878960898858e-06, "loss": 0.9498, "step": 3005 }, { "epoch": 1.581273014203051, "grad_norm": 2.021562099456787, "learning_rate": 4.228287736811548e-06, "loss": 0.9084, "step": 3006 }, { "epoch": 1.5817990531299317, "grad_norm": 2.029219388961792, "learning_rate": 4.227785731190893e-06, "loss": 0.9443, "step": 3007 }, { "epoch": 1.5823250920568122, "grad_norm": 1.8719290494918823, "learning_rate": 4.227283592165373e-06, "loss": 0.9135, "step": 3008 }, { "epoch": 1.5828511309836928, "grad_norm": 1.9184664487838745, "learning_rate": 4.226781319773761e-06, "loss": 0.9267, "step": 3009 }, { "epoch": 1.5833771699105734, "grad_norm": 2.0591418743133545, "learning_rate": 4.226278914054837e-06, "loss": 0.9522, "step": 3010 }, { "epoch": 1.5839032088374538, "grad_norm": 1.992311954498291, "learning_rate": 4.225776375047394e-06, "loss": 0.913, "step": 3011 }, { "epoch": 1.5844292477643345, "grad_norm": 1.8933684825897217, "learning_rate": 4.225273702790231e-06, "loss": 0.9486, "step": 3012 }, { "epoch": 1.584955286691215, "grad_norm": 1.999765157699585, "learning_rate": 4.224770897322162e-06, "loss": 0.9097, "step": 3013 }, { "epoch": 1.5854813256180957, "grad_norm": 2.038668632507324, "learning_rate": 4.224267958682009e-06, "loss": 0.9389, "step": 3014 }, { "epoch": 1.5860073645449764, "grad_norm": 1.9329705238342285, "learning_rate": 4.2237648869086055e-06, "loss": 0.9617, "step": 3015 }, { "epoch": 1.586533403471857, "grad_norm": 2.012115955352783, "learning_rate": 4.223261682040793e-06, "loss": 0.9012, "step": 3016 }, { "epoch": 1.5870594423987376, "grad_norm": 2.1576831340789795, "learning_rate": 4.222758344117424e-06, "loss": 0.9212, "step": 3017 }, { "epoch": 1.587585481325618, "grad_norm": 1.9946175813674927, "learning_rate": 4.222254873177366e-06, "loss": 0.9553, "step": 3018 }, { "epoch": 1.5881115202524987, "grad_norm": 2.050882339477539, "learning_rate": 4.221751269259488e-06, "loss": 0.9352, "step": 3019 }, { "epoch": 1.5886375591793793, "grad_norm": 1.9702178239822388, "learning_rate": 4.2212475324026765e-06, "loss": 0.8958, "step": 3020 }, { "epoch": 1.5891635981062597, "grad_norm": 2.0302233695983887, "learning_rate": 4.220743662645825e-06, "loss": 0.96, "step": 3021 }, { "epoch": 1.5896896370331404, "grad_norm": 2.007420063018799, "learning_rate": 4.220239660027839e-06, "loss": 0.9651, "step": 3022 }, { "epoch": 1.590215675960021, "grad_norm": 1.9850428104400635, "learning_rate": 4.219735524587632e-06, "loss": 0.9015, "step": 3023 }, { "epoch": 1.5907417148869016, "grad_norm": 2.006885051727295, "learning_rate": 4.2192312563641285e-06, "loss": 0.9478, "step": 3024 }, { "epoch": 1.5912677538137823, "grad_norm": 1.8564372062683105, "learning_rate": 4.218726855396265e-06, "loss": 0.8902, "step": 3025 }, { "epoch": 1.591793792740663, "grad_norm": 2.064643383026123, "learning_rate": 4.218222321722988e-06, "loss": 0.9758, "step": 3026 }, { "epoch": 1.5923198316675435, "grad_norm": 1.9374020099639893, "learning_rate": 4.217717655383252e-06, "loss": 0.8865, "step": 3027 }, { "epoch": 1.592845870594424, "grad_norm": 2.206101655960083, "learning_rate": 4.217212856416023e-06, "loss": 0.9711, "step": 3028 }, { "epoch": 1.5933719095213046, "grad_norm": 1.9797239303588867, "learning_rate": 4.216707924860277e-06, "loss": 0.9303, "step": 3029 }, { "epoch": 1.5938979484481852, "grad_norm": 2.039867401123047, "learning_rate": 4.216202860755001e-06, "loss": 0.9324, "step": 3030 }, { "epoch": 1.5944239873750656, "grad_norm": 2.0361459255218506, "learning_rate": 4.215697664139192e-06, "loss": 0.9229, "step": 3031 }, { "epoch": 1.5949500263019463, "grad_norm": 2.052764654159546, "learning_rate": 4.215192335051857e-06, "loss": 0.9114, "step": 3032 }, { "epoch": 1.595476065228827, "grad_norm": 2.084118127822876, "learning_rate": 4.214686873532013e-06, "loss": 0.9188, "step": 3033 }, { "epoch": 1.5960021041557075, "grad_norm": 2.352452278137207, "learning_rate": 4.214181279618686e-06, "loss": 0.944, "step": 3034 }, { "epoch": 1.5965281430825882, "grad_norm": 2.076841354370117, "learning_rate": 4.213675553350915e-06, "loss": 0.9896, "step": 3035 }, { "epoch": 1.5970541820094688, "grad_norm": 1.9557950496673584, "learning_rate": 4.213169694767749e-06, "loss": 0.935, "step": 3036 }, { "epoch": 1.5975802209363494, "grad_norm": 2.0926027297973633, "learning_rate": 4.212663703908244e-06, "loss": 0.9745, "step": 3037 }, { "epoch": 1.5981062598632298, "grad_norm": 2.065674066543579, "learning_rate": 4.212157580811469e-06, "loss": 0.9136, "step": 3038 }, { "epoch": 1.5986322987901105, "grad_norm": 1.9909225702285767, "learning_rate": 4.211651325516504e-06, "loss": 0.9501, "step": 3039 }, { "epoch": 1.599158337716991, "grad_norm": 2.153087854385376, "learning_rate": 4.211144938062434e-06, "loss": 0.9155, "step": 3040 }, { "epoch": 1.5996843766438715, "grad_norm": 2.061505079269409, "learning_rate": 4.210638418488363e-06, "loss": 0.9457, "step": 3041 }, { "epoch": 1.6002104155707522, "grad_norm": 1.9815692901611328, "learning_rate": 4.210131766833396e-06, "loss": 0.9528, "step": 3042 }, { "epoch": 1.6007364544976328, "grad_norm": 2.0386016368865967, "learning_rate": 4.2096249831366535e-06, "loss": 0.9551, "step": 3043 }, { "epoch": 1.6012624934245134, "grad_norm": 2.04970645904541, "learning_rate": 4.209118067437266e-06, "loss": 0.9282, "step": 3044 }, { "epoch": 1.601788532351394, "grad_norm": 2.013005018234253, "learning_rate": 4.208611019774372e-06, "loss": 0.9468, "step": 3045 }, { "epoch": 1.6023145712782747, "grad_norm": 1.9524039030075073, "learning_rate": 4.208103840187121e-06, "loss": 0.9581, "step": 3046 }, { "epoch": 1.6028406102051553, "grad_norm": 2.1470460891723633, "learning_rate": 4.207596528714675e-06, "loss": 0.9304, "step": 3047 }, { "epoch": 1.6033666491320357, "grad_norm": 2.1197633743286133, "learning_rate": 4.207089085396203e-06, "loss": 0.9498, "step": 3048 }, { "epoch": 1.6038926880589164, "grad_norm": 1.953028678894043, "learning_rate": 4.206581510270885e-06, "loss": 0.9385, "step": 3049 }, { "epoch": 1.6044187269857968, "grad_norm": 2.1204376220703125, "learning_rate": 4.206073803377913e-06, "loss": 0.9772, "step": 3050 }, { "epoch": 1.6049447659126774, "grad_norm": 2.153193712234497, "learning_rate": 4.2055659647564875e-06, "loss": 0.9488, "step": 3051 }, { "epoch": 1.605470804839558, "grad_norm": 2.1081886291503906, "learning_rate": 4.205057994445818e-06, "loss": 0.9291, "step": 3052 }, { "epoch": 1.6059968437664387, "grad_norm": 2.0400824546813965, "learning_rate": 4.2045498924851266e-06, "loss": 0.9512, "step": 3053 }, { "epoch": 1.6065228826933193, "grad_norm": 2.0144882202148438, "learning_rate": 4.2040416589136445e-06, "loss": 0.9566, "step": 3054 }, { "epoch": 1.6070489216202, "grad_norm": 2.105506181716919, "learning_rate": 4.2035332937706145e-06, "loss": 0.9245, "step": 3055 }, { "epoch": 1.6075749605470806, "grad_norm": 1.857825517654419, "learning_rate": 4.203024797095286e-06, "loss": 0.916, "step": 3056 }, { "epoch": 1.6081009994739612, "grad_norm": 2.0820188522338867, "learning_rate": 4.202516168926921e-06, "loss": 0.9359, "step": 3057 }, { "epoch": 1.6086270384008416, "grad_norm": 2.287496328353882, "learning_rate": 4.202007409304793e-06, "loss": 0.9798, "step": 3058 }, { "epoch": 1.6091530773277223, "grad_norm": 2.085906982421875, "learning_rate": 4.201498518268184e-06, "loss": 0.9623, "step": 3059 }, { "epoch": 1.6096791162546027, "grad_norm": 1.9612935781478882, "learning_rate": 4.200989495856383e-06, "loss": 0.9044, "step": 3060 }, { "epoch": 1.6102051551814833, "grad_norm": 1.9369161128997803, "learning_rate": 4.200480342108698e-06, "loss": 0.9829, "step": 3061 }, { "epoch": 1.610731194108364, "grad_norm": 2.031216621398926, "learning_rate": 4.1999710570644354e-06, "loss": 0.9347, "step": 3062 }, { "epoch": 1.6112572330352446, "grad_norm": 1.9881701469421387, "learning_rate": 4.1994616407629225e-06, "loss": 0.9293, "step": 3063 }, { "epoch": 1.6117832719621252, "grad_norm": 2.299139976501465, "learning_rate": 4.198952093243491e-06, "loss": 1.0016, "step": 3064 }, { "epoch": 1.6123093108890059, "grad_norm": 1.9135477542877197, "learning_rate": 4.198442414545482e-06, "loss": 0.8458, "step": 3065 }, { "epoch": 1.6128353498158865, "grad_norm": 1.994346022605896, "learning_rate": 4.19793260470825e-06, "loss": 0.9244, "step": 3066 }, { "epoch": 1.613361388742767, "grad_norm": 1.923024296760559, "learning_rate": 4.197422663771158e-06, "loss": 0.9403, "step": 3067 }, { "epoch": 1.6138874276696475, "grad_norm": 2.1085264682769775, "learning_rate": 4.196912591773578e-06, "loss": 0.9221, "step": 3068 }, { "epoch": 1.6144134665965282, "grad_norm": 2.141895055770874, "learning_rate": 4.196402388754897e-06, "loss": 0.9547, "step": 3069 }, { "epoch": 1.6149395055234086, "grad_norm": 1.9833990335464478, "learning_rate": 4.195892054754504e-06, "loss": 0.9117, "step": 3070 }, { "epoch": 1.6154655444502892, "grad_norm": 2.216627359390259, "learning_rate": 4.195381589811805e-06, "loss": 0.9246, "step": 3071 }, { "epoch": 1.6159915833771699, "grad_norm": 2.049163579940796, "learning_rate": 4.194870993966214e-06, "loss": 0.9732, "step": 3072 }, { "epoch": 1.6165176223040505, "grad_norm": 2.0937061309814453, "learning_rate": 4.194360267257155e-06, "loss": 0.9848, "step": 3073 }, { "epoch": 1.6170436612309311, "grad_norm": 2.0454630851745605, "learning_rate": 4.19384940972406e-06, "loss": 0.9469, "step": 3074 }, { "epoch": 1.6175697001578118, "grad_norm": 2.1442503929138184, "learning_rate": 4.193338421406375e-06, "loss": 0.9344, "step": 3075 }, { "epoch": 1.6180957390846924, "grad_norm": 2.012234926223755, "learning_rate": 4.192827302343553e-06, "loss": 0.964, "step": 3076 }, { "epoch": 1.6186217780115728, "grad_norm": 2.215226411819458, "learning_rate": 4.192316052575059e-06, "loss": 0.9623, "step": 3077 }, { "epoch": 1.6191478169384534, "grad_norm": 2.119033098220825, "learning_rate": 4.191804672140367e-06, "loss": 0.9899, "step": 3078 }, { "epoch": 1.619673855865334, "grad_norm": 2.088515281677246, "learning_rate": 4.191293161078962e-06, "loss": 0.9402, "step": 3079 }, { "epoch": 1.6201998947922145, "grad_norm": 2.0898115634918213, "learning_rate": 4.190781519430337e-06, "loss": 0.9665, "step": 3080 }, { "epoch": 1.6207259337190951, "grad_norm": 1.9977632761001587, "learning_rate": 4.190269747233998e-06, "loss": 0.9353, "step": 3081 }, { "epoch": 1.6212519726459758, "grad_norm": 2.0618317127227783, "learning_rate": 4.18975784452946e-06, "loss": 0.9394, "step": 3082 }, { "epoch": 1.6217780115728564, "grad_norm": 2.0050265789031982, "learning_rate": 4.189245811356246e-06, "loss": 0.9242, "step": 3083 }, { "epoch": 1.622304050499737, "grad_norm": 2.0560061931610107, "learning_rate": 4.188733647753893e-06, "loss": 0.9436, "step": 3084 }, { "epoch": 1.6228300894266177, "grad_norm": 1.9926953315734863, "learning_rate": 4.188221353761944e-06, "loss": 0.9097, "step": 3085 }, { "epoch": 1.6233561283534983, "grad_norm": 2.0866425037384033, "learning_rate": 4.187708929419956e-06, "loss": 0.9619, "step": 3086 }, { "epoch": 1.6238821672803787, "grad_norm": 2.047328472137451, "learning_rate": 4.1871963747674916e-06, "loss": 0.9068, "step": 3087 }, { "epoch": 1.6244082062072593, "grad_norm": 1.9664018154144287, "learning_rate": 4.1866836898441265e-06, "loss": 0.9652, "step": 3088 }, { "epoch": 1.62493424513414, "grad_norm": 2.173121213912964, "learning_rate": 4.186170874689448e-06, "loss": 0.9887, "step": 3089 }, { "epoch": 1.6254602840610204, "grad_norm": 2.1074130535125732, "learning_rate": 4.185657929343049e-06, "loss": 0.9538, "step": 3090 }, { "epoch": 1.625986322987901, "grad_norm": 2.1423840522766113, "learning_rate": 4.185144853844535e-06, "loss": 0.9835, "step": 3091 }, { "epoch": 1.6265123619147817, "grad_norm": 2.026642084121704, "learning_rate": 4.184631648233523e-06, "loss": 0.9129, "step": 3092 }, { "epoch": 1.6270384008416623, "grad_norm": 1.9563450813293457, "learning_rate": 4.184118312549636e-06, "loss": 0.9475, "step": 3093 }, { "epoch": 1.627564439768543, "grad_norm": 2.0315756797790527, "learning_rate": 4.1836048468325115e-06, "loss": 0.8751, "step": 3094 }, { "epoch": 1.6280904786954236, "grad_norm": 2.045595407485962, "learning_rate": 4.1830912511217935e-06, "loss": 0.892, "step": 3095 }, { "epoch": 1.6286165176223042, "grad_norm": 2.065134048461914, "learning_rate": 4.182577525457138e-06, "loss": 0.9238, "step": 3096 }, { "epoch": 1.6291425565491846, "grad_norm": 1.991685390472412, "learning_rate": 4.182063669878211e-06, "loss": 0.9295, "step": 3097 }, { "epoch": 1.6296685954760652, "grad_norm": 2.017810106277466, "learning_rate": 4.181549684424687e-06, "loss": 0.8972, "step": 3098 }, { "epoch": 1.6301946344029457, "grad_norm": 2.072643756866455, "learning_rate": 4.181035569136252e-06, "loss": 0.8713, "step": 3099 }, { "epoch": 1.6307206733298263, "grad_norm": 2.0642311573028564, "learning_rate": 4.180521324052602e-06, "loss": 0.9312, "step": 3100 }, { "epoch": 1.631246712256707, "grad_norm": 2.052084445953369, "learning_rate": 4.1800069492134425e-06, "loss": 0.889, "step": 3101 }, { "epoch": 1.6317727511835876, "grad_norm": 1.9940053224563599, "learning_rate": 4.179492444658488e-06, "loss": 0.9606, "step": 3102 }, { "epoch": 1.6322987901104682, "grad_norm": 1.9448810815811157, "learning_rate": 4.178977810427466e-06, "loss": 0.9042, "step": 3103 }, { "epoch": 1.6328248290373488, "grad_norm": 2.0380942821502686, "learning_rate": 4.1784630465601114e-06, "loss": 0.8952, "step": 3104 }, { "epoch": 1.6333508679642295, "grad_norm": 2.0342228412628174, "learning_rate": 4.1779481530961695e-06, "loss": 0.8834, "step": 3105 }, { "epoch": 1.63387690689111, "grad_norm": 2.086081027984619, "learning_rate": 4.177433130075397e-06, "loss": 0.949, "step": 3106 }, { "epoch": 1.6344029458179905, "grad_norm": 2.1689419746398926, "learning_rate": 4.176917977537558e-06, "loss": 0.9563, "step": 3107 }, { "epoch": 1.6349289847448711, "grad_norm": 1.9891860485076904, "learning_rate": 4.1764026955224276e-06, "loss": 0.9233, "step": 3108 }, { "epoch": 1.6354550236717516, "grad_norm": 1.9214919805526733, "learning_rate": 4.175887284069795e-06, "loss": 0.8776, "step": 3109 }, { "epoch": 1.6359810625986322, "grad_norm": 1.9802346229553223, "learning_rate": 4.175371743219453e-06, "loss": 0.9275, "step": 3110 }, { "epoch": 1.6365071015255128, "grad_norm": 1.956176519393921, "learning_rate": 4.174856073011208e-06, "loss": 0.9669, "step": 3111 }, { "epoch": 1.6370331404523935, "grad_norm": 1.9850467443466187, "learning_rate": 4.174340273484875e-06, "loss": 0.9446, "step": 3112 }, { "epoch": 1.637559179379274, "grad_norm": 2.1451969146728516, "learning_rate": 4.173824344680281e-06, "loss": 1.0022, "step": 3113 }, { "epoch": 1.6380852183061547, "grad_norm": 2.0854461193084717, "learning_rate": 4.173308286637261e-06, "loss": 1.0051, "step": 3114 }, { "epoch": 1.6386112572330354, "grad_norm": 2.2442920207977295, "learning_rate": 4.1727920993956604e-06, "loss": 0.9611, "step": 3115 }, { "epoch": 1.639137296159916, "grad_norm": 2.0998659133911133, "learning_rate": 4.172275782995334e-06, "loss": 0.9306, "step": 3116 }, { "epoch": 1.6396633350867964, "grad_norm": 2.0424299240112305, "learning_rate": 4.171759337476149e-06, "loss": 0.9085, "step": 3117 }, { "epoch": 1.640189374013677, "grad_norm": 2.0824828147888184, "learning_rate": 4.171242762877981e-06, "loss": 0.9236, "step": 3118 }, { "epoch": 1.6407154129405574, "grad_norm": 2.1286230087280273, "learning_rate": 4.170726059240713e-06, "loss": 1.0158, "step": 3119 }, { "epoch": 1.641241451867438, "grad_norm": 2.1157569885253906, "learning_rate": 4.1702092266042425e-06, "loss": 0.9332, "step": 3120 }, { "epoch": 1.6417674907943187, "grad_norm": 2.1147117614746094, "learning_rate": 4.169692265008475e-06, "loss": 0.9673, "step": 3121 }, { "epoch": 1.6422935297211994, "grad_norm": 2.021984100341797, "learning_rate": 4.169175174493325e-06, "loss": 0.9808, "step": 3122 }, { "epoch": 1.64281956864808, "grad_norm": 1.9503893852233887, "learning_rate": 4.168657955098718e-06, "loss": 0.9293, "step": 3123 }, { "epoch": 1.6433456075749606, "grad_norm": 2.125218629837036, "learning_rate": 4.1681406068645895e-06, "loss": 0.9082, "step": 3124 }, { "epoch": 1.6438716465018413, "grad_norm": 2.25494647026062, "learning_rate": 4.167623129830884e-06, "loss": 0.9515, "step": 3125 }, { "epoch": 1.644397685428722, "grad_norm": 2.164952516555786, "learning_rate": 4.167105524037558e-06, "loss": 0.9344, "step": 3126 }, { "epoch": 1.6449237243556023, "grad_norm": 2.136427164077759, "learning_rate": 4.166587789524576e-06, "loss": 0.9056, "step": 3127 }, { "epoch": 1.645449763282483, "grad_norm": 1.9986604452133179, "learning_rate": 4.166069926331912e-06, "loss": 0.9076, "step": 3128 }, { "epoch": 1.6459758022093633, "grad_norm": 2.1529273986816406, "learning_rate": 4.165551934499553e-06, "loss": 0.9151, "step": 3129 }, { "epoch": 1.646501841136244, "grad_norm": 2.1443097591400146, "learning_rate": 4.1650338140674916e-06, "loss": 0.9907, "step": 3130 }, { "epoch": 1.6470278800631246, "grad_norm": 2.0582191944122314, "learning_rate": 4.164515565075735e-06, "loss": 0.9531, "step": 3131 }, { "epoch": 1.6475539189900053, "grad_norm": 2.032761573791504, "learning_rate": 4.163997187564296e-06, "loss": 0.8946, "step": 3132 }, { "epoch": 1.6480799579168859, "grad_norm": 2.1963999271392822, "learning_rate": 4.163478681573201e-06, "loss": 0.955, "step": 3133 }, { "epoch": 1.6486059968437665, "grad_norm": 2.0777454376220703, "learning_rate": 4.162960047142482e-06, "loss": 0.9267, "step": 3134 }, { "epoch": 1.6491320357706472, "grad_norm": 2.2676467895507812, "learning_rate": 4.162441284312186e-06, "loss": 1.021, "step": 3135 }, { "epoch": 1.6496580746975276, "grad_norm": 2.0743093490600586, "learning_rate": 4.161922393122368e-06, "loss": 0.9285, "step": 3136 }, { "epoch": 1.6501841136244082, "grad_norm": 2.017014980316162, "learning_rate": 4.161403373613089e-06, "loss": 0.932, "step": 3137 }, { "epoch": 1.6507101525512888, "grad_norm": 1.935449242591858, "learning_rate": 4.160884225824427e-06, "loss": 0.8828, "step": 3138 }, { "epoch": 1.6512361914781692, "grad_norm": 2.1415903568267822, "learning_rate": 4.160364949796462e-06, "loss": 0.8867, "step": 3139 }, { "epoch": 1.6517622304050499, "grad_norm": 2.27426815032959, "learning_rate": 4.1598455455692924e-06, "loss": 0.9527, "step": 3140 }, { "epoch": 1.6522882693319305, "grad_norm": 2.1137592792510986, "learning_rate": 4.15932601318302e-06, "loss": 0.9242, "step": 3141 }, { "epoch": 1.6528143082588111, "grad_norm": 2.1169378757476807, "learning_rate": 4.1588063526777586e-06, "loss": 0.9069, "step": 3142 }, { "epoch": 1.6533403471856918, "grad_norm": 2.0287654399871826, "learning_rate": 4.158286564093632e-06, "loss": 0.9316, "step": 3143 }, { "epoch": 1.6538663861125724, "grad_norm": 2.131171226501465, "learning_rate": 4.157766647470774e-06, "loss": 0.9938, "step": 3144 }, { "epoch": 1.654392425039453, "grad_norm": 1.9909673929214478, "learning_rate": 4.1572466028493285e-06, "loss": 0.8913, "step": 3145 }, { "epoch": 1.6549184639663335, "grad_norm": 1.9932284355163574, "learning_rate": 4.1567264302694495e-06, "loss": 0.8966, "step": 3146 }, { "epoch": 1.655444502893214, "grad_norm": 2.0377657413482666, "learning_rate": 4.156206129771298e-06, "loss": 0.9106, "step": 3147 }, { "epoch": 1.6559705418200947, "grad_norm": 2.0442769527435303, "learning_rate": 4.155685701395049e-06, "loss": 1.0005, "step": 3148 }, { "epoch": 1.6564965807469751, "grad_norm": 1.9996798038482666, "learning_rate": 4.155165145180885e-06, "loss": 0.8855, "step": 3149 }, { "epoch": 1.6570226196738558, "grad_norm": 2.041524887084961, "learning_rate": 4.154644461168999e-06, "loss": 0.9461, "step": 3150 }, { "epoch": 1.6575486586007364, "grad_norm": 1.9929088354110718, "learning_rate": 4.154123649399594e-06, "loss": 0.9425, "step": 3151 }, { "epoch": 1.658074697527617, "grad_norm": 1.9028164148330688, "learning_rate": 4.153602709912882e-06, "loss": 0.8973, "step": 3152 }, { "epoch": 1.6586007364544977, "grad_norm": 1.9024345874786377, "learning_rate": 4.1530816427490865e-06, "loss": 0.9084, "step": 3153 }, { "epoch": 1.6591267753813783, "grad_norm": 2.1555325984954834, "learning_rate": 4.152560447948438e-06, "loss": 0.933, "step": 3154 }, { "epoch": 1.659652814308259, "grad_norm": 2.1050522327423096, "learning_rate": 4.152039125551182e-06, "loss": 0.9663, "step": 3155 }, { "epoch": 1.6601788532351394, "grad_norm": 1.9364120960235596, "learning_rate": 4.151517675597566e-06, "loss": 0.8771, "step": 3156 }, { "epoch": 1.66070489216202, "grad_norm": 2.0937752723693848, "learning_rate": 4.150996098127856e-06, "loss": 0.9343, "step": 3157 }, { "epoch": 1.6612309310889006, "grad_norm": 1.9322218894958496, "learning_rate": 4.150474393182322e-06, "loss": 0.9277, "step": 3158 }, { "epoch": 1.661756970015781, "grad_norm": 1.9301400184631348, "learning_rate": 4.149952560801246e-06, "loss": 0.8937, "step": 3159 }, { "epoch": 1.6622830089426617, "grad_norm": 2.0601139068603516, "learning_rate": 4.149430601024919e-06, "loss": 0.9411, "step": 3160 }, { "epoch": 1.6628090478695423, "grad_norm": 2.0804789066314697, "learning_rate": 4.148908513893643e-06, "loss": 0.9271, "step": 3161 }, { "epoch": 1.663335086796423, "grad_norm": 2.0413947105407715, "learning_rate": 4.148386299447728e-06, "loss": 0.9446, "step": 3162 }, { "epoch": 1.6638611257233036, "grad_norm": 2.0702428817749023, "learning_rate": 4.1478639577274956e-06, "loss": 0.9527, "step": 3163 }, { "epoch": 1.6643871646501842, "grad_norm": 2.1859652996063232, "learning_rate": 4.147341488773277e-06, "loss": 0.9219, "step": 3164 }, { "epoch": 1.6649132035770648, "grad_norm": 1.9993692636489868, "learning_rate": 4.146818892625412e-06, "loss": 0.9205, "step": 3165 }, { "epoch": 1.6654392425039453, "grad_norm": 1.9294373989105225, "learning_rate": 4.146296169324251e-06, "loss": 0.9163, "step": 3166 }, { "epoch": 1.665965281430826, "grad_norm": 2.0685832500457764, "learning_rate": 4.145773318910156e-06, "loss": 0.9098, "step": 3167 }, { "epoch": 1.6664913203577063, "grad_norm": 2.0580673217773438, "learning_rate": 4.145250341423494e-06, "loss": 0.9478, "step": 3168 }, { "epoch": 1.667017359284587, "grad_norm": 1.9835233688354492, "learning_rate": 4.144727236904647e-06, "loss": 0.9843, "step": 3169 }, { "epoch": 1.6675433982114676, "grad_norm": 1.9416699409484863, "learning_rate": 4.144204005394005e-06, "loss": 0.8998, "step": 3170 }, { "epoch": 1.6680694371383482, "grad_norm": 2.109788179397583, "learning_rate": 4.143680646931966e-06, "loss": 1.012, "step": 3171 }, { "epoch": 1.6685954760652288, "grad_norm": 1.9718213081359863, "learning_rate": 4.143157161558939e-06, "loss": 0.9549, "step": 3172 }, { "epoch": 1.6691215149921095, "grad_norm": 2.1271800994873047, "learning_rate": 4.142633549315345e-06, "loss": 0.9119, "step": 3173 }, { "epoch": 1.6696475539189901, "grad_norm": 1.9742629528045654, "learning_rate": 4.142109810241611e-06, "loss": 0.9187, "step": 3174 }, { "epoch": 1.6701735928458707, "grad_norm": 2.1130025386810303, "learning_rate": 4.1415859443781775e-06, "loss": 0.9665, "step": 3175 }, { "epoch": 1.6706996317727512, "grad_norm": 2.1361539363861084, "learning_rate": 4.141061951765491e-06, "loss": 0.9287, "step": 3176 }, { "epoch": 1.6712256706996318, "grad_norm": 1.9771889448165894, "learning_rate": 4.140537832444012e-06, "loss": 0.9135, "step": 3177 }, { "epoch": 1.6717517096265122, "grad_norm": 2.1832566261291504, "learning_rate": 4.1400135864542054e-06, "loss": 0.8852, "step": 3178 }, { "epoch": 1.6722777485533928, "grad_norm": 2.119732618331909, "learning_rate": 4.139489213836552e-06, "loss": 0.9804, "step": 3179 }, { "epoch": 1.6728037874802735, "grad_norm": 2.2114458084106445, "learning_rate": 4.138964714631538e-06, "loss": 0.9262, "step": 3180 }, { "epoch": 1.673329826407154, "grad_norm": 2.1006972789764404, "learning_rate": 4.1384400888796604e-06, "loss": 0.9234, "step": 3181 }, { "epoch": 1.6738558653340347, "grad_norm": 2.1315839290618896, "learning_rate": 4.137915336621428e-06, "loss": 0.9986, "step": 3182 }, { "epoch": 1.6743819042609154, "grad_norm": 1.92546546459198, "learning_rate": 4.137390457897356e-06, "loss": 0.9263, "step": 3183 }, { "epoch": 1.674907943187796, "grad_norm": 2.1958208084106445, "learning_rate": 4.136865452747971e-06, "loss": 0.9483, "step": 3184 }, { "epoch": 1.6754339821146766, "grad_norm": 2.2576241493225098, "learning_rate": 4.13634032121381e-06, "loss": 0.9555, "step": 3185 }, { "epoch": 1.675960021041557, "grad_norm": 2.141944646835327, "learning_rate": 4.1358150633354195e-06, "loss": 0.9573, "step": 3186 }, { "epoch": 1.6764860599684377, "grad_norm": 2.1498608589172363, "learning_rate": 4.135289679153356e-06, "loss": 0.9284, "step": 3187 }, { "epoch": 1.677012098895318, "grad_norm": 1.8684433698654175, "learning_rate": 4.134764168708183e-06, "loss": 0.8987, "step": 3188 }, { "epoch": 1.6775381378221987, "grad_norm": 2.0984723567962646, "learning_rate": 4.134238532040479e-06, "loss": 0.9444, "step": 3189 }, { "epoch": 1.6780641767490794, "grad_norm": 1.9849915504455566, "learning_rate": 4.1337127691908255e-06, "loss": 0.961, "step": 3190 }, { "epoch": 1.67859021567596, "grad_norm": 1.9701485633850098, "learning_rate": 4.13318688019982e-06, "loss": 0.9391, "step": 3191 }, { "epoch": 1.6791162546028406, "grad_norm": 2.1401596069335938, "learning_rate": 4.132660865108065e-06, "loss": 0.9557, "step": 3192 }, { "epoch": 1.6796422935297213, "grad_norm": 1.9514589309692383, "learning_rate": 4.132134723956178e-06, "loss": 0.9181, "step": 3193 }, { "epoch": 1.680168332456602, "grad_norm": 1.9245766401290894, "learning_rate": 4.131608456784782e-06, "loss": 0.897, "step": 3194 }, { "epoch": 1.6806943713834825, "grad_norm": 1.9949605464935303, "learning_rate": 4.13108206363451e-06, "loss": 0.9441, "step": 3195 }, { "epoch": 1.681220410310363, "grad_norm": 2.337113857269287, "learning_rate": 4.130555544546005e-06, "loss": 0.8951, "step": 3196 }, { "epoch": 1.6817464492372436, "grad_norm": 1.9845924377441406, "learning_rate": 4.130028899559922e-06, "loss": 0.934, "step": 3197 }, { "epoch": 1.682272488164124, "grad_norm": 1.9929417371749878, "learning_rate": 4.129502128716922e-06, "loss": 0.969, "step": 3198 }, { "epoch": 1.6827985270910046, "grad_norm": 1.9864381551742554, "learning_rate": 4.12897523205768e-06, "loss": 0.9729, "step": 3199 }, { "epoch": 1.6833245660178853, "grad_norm": 2.0064280033111572, "learning_rate": 4.128448209622878e-06, "loss": 0.8764, "step": 3200 }, { "epoch": 1.683850604944766, "grad_norm": 1.9478604793548584, "learning_rate": 4.1279210614532075e-06, "loss": 0.9274, "step": 3201 }, { "epoch": 1.6843766438716465, "grad_norm": 2.097722053527832, "learning_rate": 4.12739378758937e-06, "loss": 0.9318, "step": 3202 }, { "epoch": 1.6849026827985272, "grad_norm": 2.1442971229553223, "learning_rate": 4.126866388072078e-06, "loss": 0.921, "step": 3203 }, { "epoch": 1.6854287217254078, "grad_norm": 2.283609390258789, "learning_rate": 4.126338862942053e-06, "loss": 0.8789, "step": 3204 }, { "epoch": 1.6859547606522882, "grad_norm": 2.0093772411346436, "learning_rate": 4.125811212240024e-06, "loss": 0.9492, "step": 3205 }, { "epoch": 1.6864807995791689, "grad_norm": 2.0583980083465576, "learning_rate": 4.125283436006734e-06, "loss": 0.9086, "step": 3206 }, { "epoch": 1.6870068385060495, "grad_norm": 2.033256769180298, "learning_rate": 4.1247555342829324e-06, "loss": 0.9349, "step": 3207 }, { "epoch": 1.68753287743293, "grad_norm": 2.022223472595215, "learning_rate": 4.124227507109379e-06, "loss": 0.9331, "step": 3208 }, { "epoch": 1.6880589163598105, "grad_norm": 2.046595811843872, "learning_rate": 4.1236993545268435e-06, "loss": 0.9894, "step": 3209 }, { "epoch": 1.6885849552866912, "grad_norm": 1.9218565225601196, "learning_rate": 4.123171076576107e-06, "loss": 0.9271, "step": 3210 }, { "epoch": 1.6891109942135718, "grad_norm": 2.1003799438476562, "learning_rate": 4.122642673297956e-06, "loss": 0.949, "step": 3211 }, { "epoch": 1.6896370331404524, "grad_norm": 1.9997978210449219, "learning_rate": 4.122114144733191e-06, "loss": 0.9741, "step": 3212 }, { "epoch": 1.690163072067333, "grad_norm": 2.0700809955596924, "learning_rate": 4.121585490922621e-06, "loss": 0.9892, "step": 3213 }, { "epoch": 1.6906891109942137, "grad_norm": 1.9868223667144775, "learning_rate": 4.121056711907062e-06, "loss": 0.8757, "step": 3214 }, { "epoch": 1.6912151499210941, "grad_norm": 2.085045337677002, "learning_rate": 4.120527807727343e-06, "loss": 0.9595, "step": 3215 }, { "epoch": 1.6917411888479748, "grad_norm": 1.824670433998108, "learning_rate": 4.1199987784243025e-06, "loss": 0.9565, "step": 3216 }, { "epoch": 1.6922672277748554, "grad_norm": 1.9989205598831177, "learning_rate": 4.119469624038786e-06, "loss": 0.959, "step": 3217 }, { "epoch": 1.6927932667017358, "grad_norm": 1.9225279092788696, "learning_rate": 4.11894034461165e-06, "loss": 0.9166, "step": 3218 }, { "epoch": 1.6933193056286164, "grad_norm": 2.031460762023926, "learning_rate": 4.1184109401837635e-06, "loss": 0.9507, "step": 3219 }, { "epoch": 1.693845344555497, "grad_norm": 2.001452922821045, "learning_rate": 4.117881410795999e-06, "loss": 0.9006, "step": 3220 }, { "epoch": 1.6943713834823777, "grad_norm": 1.879812240600586, "learning_rate": 4.117351756489246e-06, "loss": 0.861, "step": 3221 }, { "epoch": 1.6948974224092583, "grad_norm": 2.122471809387207, "learning_rate": 4.116821977304398e-06, "loss": 0.8968, "step": 3222 }, { "epoch": 1.695423461336139, "grad_norm": 2.114089012145996, "learning_rate": 4.1162920732823595e-06, "loss": 0.9038, "step": 3223 }, { "epoch": 1.6959495002630196, "grad_norm": 1.99599289894104, "learning_rate": 4.115762044464046e-06, "loss": 0.9623, "step": 3224 }, { "epoch": 1.6964755391899, "grad_norm": 1.9803627729415894, "learning_rate": 4.115231890890381e-06, "loss": 0.9592, "step": 3225 }, { "epoch": 1.6970015781167807, "grad_norm": 1.9647092819213867, "learning_rate": 4.114701612602299e-06, "loss": 0.9371, "step": 3226 }, { "epoch": 1.6975276170436613, "grad_norm": 2.1353018283843994, "learning_rate": 4.1141712096407436e-06, "loss": 0.8824, "step": 3227 }, { "epoch": 1.6980536559705417, "grad_norm": 1.999079704284668, "learning_rate": 4.113640682046667e-06, "loss": 0.9609, "step": 3228 }, { "epoch": 1.6985796948974223, "grad_norm": 2.0933756828308105, "learning_rate": 4.113110029861035e-06, "loss": 0.9286, "step": 3229 }, { "epoch": 1.699105733824303, "grad_norm": 2.275855302810669, "learning_rate": 4.1125792531248165e-06, "loss": 0.9584, "step": 3230 }, { "epoch": 1.6996317727511836, "grad_norm": 2.0643019676208496, "learning_rate": 4.112048351878995e-06, "loss": 0.9332, "step": 3231 }, { "epoch": 1.7001578116780642, "grad_norm": 1.9502769708633423, "learning_rate": 4.111517326164562e-06, "loss": 0.9221, "step": 3232 }, { "epoch": 1.7006838506049449, "grad_norm": 2.0422542095184326, "learning_rate": 4.11098617602252e-06, "loss": 0.9784, "step": 3233 }, { "epoch": 1.7012098895318255, "grad_norm": 1.9388501644134521, "learning_rate": 4.110454901493878e-06, "loss": 0.9744, "step": 3234 }, { "epoch": 1.701735928458706, "grad_norm": 1.9232968091964722, "learning_rate": 4.109923502619657e-06, "loss": 0.8971, "step": 3235 }, { "epoch": 1.7022619673855865, "grad_norm": 2.1951730251312256, "learning_rate": 4.109391979440889e-06, "loss": 0.985, "step": 3236 }, { "epoch": 1.702788006312467, "grad_norm": 1.9620068073272705, "learning_rate": 4.108860331998611e-06, "loss": 0.8322, "step": 3237 }, { "epoch": 1.7033140452393476, "grad_norm": 1.961638331413269, "learning_rate": 4.108328560333873e-06, "loss": 0.9242, "step": 3238 }, { "epoch": 1.7038400841662282, "grad_norm": 2.0044515132904053, "learning_rate": 4.107796664487734e-06, "loss": 0.8943, "step": 3239 }, { "epoch": 1.7043661230931089, "grad_norm": 1.8574254512786865, "learning_rate": 4.107264644501264e-06, "loss": 0.9227, "step": 3240 }, { "epoch": 1.7048921620199895, "grad_norm": 1.9833990335464478, "learning_rate": 4.106732500415539e-06, "loss": 0.9405, "step": 3241 }, { "epoch": 1.7054182009468701, "grad_norm": 1.9433012008666992, "learning_rate": 4.106200232271647e-06, "loss": 0.8671, "step": 3242 }, { "epoch": 1.7059442398737508, "grad_norm": 2.0191597938537598, "learning_rate": 4.105667840110686e-06, "loss": 0.9877, "step": 3243 }, { "epoch": 1.7064702788006314, "grad_norm": 2.02719783782959, "learning_rate": 4.105135323973762e-06, "loss": 0.9427, "step": 3244 }, { "epoch": 1.7069963177275118, "grad_norm": 2.0298643112182617, "learning_rate": 4.104602683901991e-06, "loss": 0.9013, "step": 3245 }, { "epoch": 1.7075223566543924, "grad_norm": 2.04894757270813, "learning_rate": 4.104069919936501e-06, "loss": 0.8836, "step": 3246 }, { "epoch": 1.7080483955812729, "grad_norm": 1.8774256706237793, "learning_rate": 4.103537032118426e-06, "loss": 0.9109, "step": 3247 }, { "epoch": 1.7085744345081535, "grad_norm": 1.989170789718628, "learning_rate": 4.10300402048891e-06, "loss": 0.9529, "step": 3248 }, { "epoch": 1.7091004734350341, "grad_norm": 1.9783909320831299, "learning_rate": 4.102470885089109e-06, "loss": 0.9024, "step": 3249 }, { "epoch": 1.7096265123619148, "grad_norm": 2.037511110305786, "learning_rate": 4.101937625960187e-06, "loss": 0.9438, "step": 3250 }, { "epoch": 1.7101525512887954, "grad_norm": 1.983834147453308, "learning_rate": 4.101404243143318e-06, "loss": 0.9009, "step": 3251 }, { "epoch": 1.710678590215676, "grad_norm": 1.9569228887557983, "learning_rate": 4.100870736679684e-06, "loss": 0.9031, "step": 3252 }, { "epoch": 1.7112046291425567, "grad_norm": 2.137965440750122, "learning_rate": 4.100337106610479e-06, "loss": 0.9367, "step": 3253 }, { "epoch": 1.7117306680694373, "grad_norm": 2.038703203201294, "learning_rate": 4.099803352976906e-06, "loss": 0.9436, "step": 3254 }, { "epoch": 1.7122567069963177, "grad_norm": 2.0265614986419678, "learning_rate": 4.0992694758201754e-06, "loss": 0.9541, "step": 3255 }, { "epoch": 1.7127827459231983, "grad_norm": 1.9582267999649048, "learning_rate": 4.098735475181509e-06, "loss": 0.9345, "step": 3256 }, { "epoch": 1.7133087848500788, "grad_norm": 2.1450209617614746, "learning_rate": 4.098201351102138e-06, "loss": 0.9859, "step": 3257 }, { "epoch": 1.7138348237769594, "grad_norm": 2.063988447189331, "learning_rate": 4.097667103623303e-06, "loss": 0.9701, "step": 3258 }, { "epoch": 1.71436086270384, "grad_norm": 2.0069267749786377, "learning_rate": 4.097132732786253e-06, "loss": 0.9743, "step": 3259 }, { "epoch": 1.7148869016307207, "grad_norm": 1.9959112405776978, "learning_rate": 4.096598238632249e-06, "loss": 0.9029, "step": 3260 }, { "epoch": 1.7154129405576013, "grad_norm": 1.9694600105285645, "learning_rate": 4.09606362120256e-06, "loss": 0.9015, "step": 3261 }, { "epoch": 1.715938979484482, "grad_norm": 2.4184322357177734, "learning_rate": 4.0955288805384645e-06, "loss": 0.8851, "step": 3262 }, { "epoch": 1.7164650184113626, "grad_norm": 1.9955581426620483, "learning_rate": 4.094994016681248e-06, "loss": 0.964, "step": 3263 }, { "epoch": 1.716991057338243, "grad_norm": 1.981042504310608, "learning_rate": 4.094459029672213e-06, "loss": 0.931, "step": 3264 }, { "epoch": 1.7175170962651236, "grad_norm": 1.9434010982513428, "learning_rate": 4.093923919552663e-06, "loss": 0.8766, "step": 3265 }, { "epoch": 1.7180431351920042, "grad_norm": 1.9982467889785767, "learning_rate": 4.0933886863639145e-06, "loss": 0.8826, "step": 3266 }, { "epoch": 1.7185691741188847, "grad_norm": 1.9993340969085693, "learning_rate": 4.092853330147296e-06, "loss": 0.9527, "step": 3267 }, { "epoch": 1.7190952130457653, "grad_norm": 2.0537142753601074, "learning_rate": 4.092317850944141e-06, "loss": 0.9147, "step": 3268 }, { "epoch": 1.719621251972646, "grad_norm": 1.9260109663009644, "learning_rate": 4.091782248795796e-06, "loss": 0.8483, "step": 3269 }, { "epoch": 1.7201472908995266, "grad_norm": 2.032158136367798, "learning_rate": 4.091246523743615e-06, "loss": 0.929, "step": 3270 }, { "epoch": 1.7206733298264072, "grad_norm": 2.10296368598938, "learning_rate": 4.090710675828963e-06, "loss": 0.9243, "step": 3271 }, { "epoch": 1.7211993687532878, "grad_norm": 2.0934739112854004, "learning_rate": 4.090174705093212e-06, "loss": 0.9266, "step": 3272 }, { "epoch": 1.7217254076801685, "grad_norm": 1.994555115699768, "learning_rate": 4.089638611577745e-06, "loss": 0.9328, "step": 3273 }, { "epoch": 1.7222514466070489, "grad_norm": 2.169400215148926, "learning_rate": 4.089102395323957e-06, "loss": 0.9347, "step": 3274 }, { "epoch": 1.7227774855339295, "grad_norm": 2.2973196506500244, "learning_rate": 4.088566056373248e-06, "loss": 0.946, "step": 3275 }, { "epoch": 1.7233035244608101, "grad_norm": 1.9845727682113647, "learning_rate": 4.08802959476703e-06, "loss": 0.8932, "step": 3276 }, { "epoch": 1.7238295633876906, "grad_norm": 1.9923732280731201, "learning_rate": 4.087493010546725e-06, "loss": 0.9289, "step": 3277 }, { "epoch": 1.7243556023145712, "grad_norm": 3.3909382820129395, "learning_rate": 4.086956303753761e-06, "loss": 0.9671, "step": 3278 }, { "epoch": 1.7248816412414518, "grad_norm": 1.9383569955825806, "learning_rate": 4.08641947442958e-06, "loss": 0.9332, "step": 3279 }, { "epoch": 1.7254076801683325, "grad_norm": 1.9901137351989746, "learning_rate": 4.085882522615631e-06, "loss": 0.9043, "step": 3280 }, { "epoch": 1.725933719095213, "grad_norm": 1.969484806060791, "learning_rate": 4.085345448353373e-06, "loss": 0.8957, "step": 3281 }, { "epoch": 1.7264597580220937, "grad_norm": 1.9522175788879395, "learning_rate": 4.084808251684274e-06, "loss": 0.9594, "step": 3282 }, { "epoch": 1.7269857969489744, "grad_norm": 1.9766138792037964, "learning_rate": 4.08427093264981e-06, "loss": 0.919, "step": 3283 }, { "epoch": 1.7275118358758548, "grad_norm": 2.008881092071533, "learning_rate": 4.083733491291471e-06, "loss": 0.942, "step": 3284 }, { "epoch": 1.7280378748027354, "grad_norm": 2.085505247116089, "learning_rate": 4.083195927650752e-06, "loss": 0.8945, "step": 3285 }, { "epoch": 1.728563913729616, "grad_norm": 1.9567203521728516, "learning_rate": 4.0826582417691605e-06, "loss": 0.9192, "step": 3286 }, { "epoch": 1.7290899526564965, "grad_norm": 2.1172187328338623, "learning_rate": 4.08212043368821e-06, "loss": 0.8969, "step": 3287 }, { "epoch": 1.729615991583377, "grad_norm": 1.9815617799758911, "learning_rate": 4.081582503449427e-06, "loss": 0.9183, "step": 3288 }, { "epoch": 1.7301420305102577, "grad_norm": 1.9883172512054443, "learning_rate": 4.081044451094346e-06, "loss": 0.9549, "step": 3289 }, { "epoch": 1.7306680694371384, "grad_norm": 1.9001867771148682, "learning_rate": 4.080506276664509e-06, "loss": 0.9247, "step": 3290 }, { "epoch": 1.731194108364019, "grad_norm": 2.0134174823760986, "learning_rate": 4.07996798020147e-06, "loss": 0.9058, "step": 3291 }, { "epoch": 1.7317201472908996, "grad_norm": 2.0924973487854004, "learning_rate": 4.079429561746794e-06, "loss": 0.9548, "step": 3292 }, { "epoch": 1.7322461862177803, "grad_norm": 1.994047999382019, "learning_rate": 4.07889102134205e-06, "loss": 0.9381, "step": 3293 }, { "epoch": 1.7327722251446607, "grad_norm": 1.980707049369812, "learning_rate": 4.078352359028821e-06, "loss": 0.9435, "step": 3294 }, { "epoch": 1.7332982640715413, "grad_norm": 2.001751661300659, "learning_rate": 4.077813574848698e-06, "loss": 0.9294, "step": 3295 }, { "epoch": 1.7338243029984217, "grad_norm": 1.9710110425949097, "learning_rate": 4.07727466884328e-06, "loss": 0.9307, "step": 3296 }, { "epoch": 1.7343503419253024, "grad_norm": 2.128279209136963, "learning_rate": 4.076735641054177e-06, "loss": 0.9091, "step": 3297 }, { "epoch": 1.734876380852183, "grad_norm": 2.0614635944366455, "learning_rate": 4.07619649152301e-06, "loss": 0.9059, "step": 3298 }, { "epoch": 1.7354024197790636, "grad_norm": 2.0709080696105957, "learning_rate": 4.075657220291406e-06, "loss": 0.9665, "step": 3299 }, { "epoch": 1.7359284587059443, "grad_norm": 2.1249303817749023, "learning_rate": 4.075117827401003e-06, "loss": 0.9429, "step": 3300 }, { "epoch": 1.736454497632825, "grad_norm": 2.183804988861084, "learning_rate": 4.074578312893449e-06, "loss": 0.9603, "step": 3301 }, { "epoch": 1.7369805365597055, "grad_norm": 1.9954277276992798, "learning_rate": 4.074038676810399e-06, "loss": 0.8813, "step": 3302 }, { "epoch": 1.7375065754865862, "grad_norm": 2.0698442459106445, "learning_rate": 4.073498919193522e-06, "loss": 0.93, "step": 3303 }, { "epoch": 1.7380326144134666, "grad_norm": 1.9365150928497314, "learning_rate": 4.07295904008449e-06, "loss": 0.9466, "step": 3304 }, { "epoch": 1.7385586533403472, "grad_norm": 2.223771572113037, "learning_rate": 4.0724190395249905e-06, "loss": 0.9252, "step": 3305 }, { "epoch": 1.7390846922672276, "grad_norm": 1.9846419095993042, "learning_rate": 4.071878917556716e-06, "loss": 0.9207, "step": 3306 }, { "epoch": 1.7396107311941083, "grad_norm": 2.1234962940216064, "learning_rate": 4.071338674221373e-06, "loss": 0.9907, "step": 3307 }, { "epoch": 1.7401367701209889, "grad_norm": 2.0506014823913574, "learning_rate": 4.07079830956067e-06, "loss": 0.9653, "step": 3308 }, { "epoch": 1.7406628090478695, "grad_norm": 2.043147563934326, "learning_rate": 4.070257823616332e-06, "loss": 0.9406, "step": 3309 }, { "epoch": 1.7411888479747502, "grad_norm": 2.0823752880096436, "learning_rate": 4.069717216430093e-06, "loss": 0.9736, "step": 3310 }, { "epoch": 1.7417148869016308, "grad_norm": 1.9480175971984863, "learning_rate": 4.069176488043689e-06, "loss": 0.9258, "step": 3311 }, { "epoch": 1.7422409258285114, "grad_norm": 2.002666473388672, "learning_rate": 4.068635638498874e-06, "loss": 0.9734, "step": 3312 }, { "epoch": 1.742766964755392, "grad_norm": 2.2887017726898193, "learning_rate": 4.068094667837407e-06, "loss": 0.9505, "step": 3313 }, { "epoch": 1.7432930036822725, "grad_norm": 2.0582175254821777, "learning_rate": 4.067553576101056e-06, "loss": 0.9222, "step": 3314 }, { "epoch": 1.743819042609153, "grad_norm": 2.223386287689209, "learning_rate": 4.067012363331601e-06, "loss": 0.9363, "step": 3315 }, { "epoch": 1.7443450815360335, "grad_norm": 2.11930251121521, "learning_rate": 4.066471029570829e-06, "loss": 0.9246, "step": 3316 }, { "epoch": 1.7448711204629141, "grad_norm": 2.291123628616333, "learning_rate": 4.0659295748605374e-06, "loss": 0.9042, "step": 3317 }, { "epoch": 1.7453971593897948, "grad_norm": 1.9040521383285522, "learning_rate": 4.065387999242533e-06, "loss": 0.9317, "step": 3318 }, { "epoch": 1.7459231983166754, "grad_norm": 2.213954210281372, "learning_rate": 4.0648463027586316e-06, "loss": 0.9399, "step": 3319 }, { "epoch": 1.746449237243556, "grad_norm": 2.166907787322998, "learning_rate": 4.064304485450657e-06, "loss": 0.9521, "step": 3320 }, { "epoch": 1.7469752761704367, "grad_norm": 2.1662700176239014, "learning_rate": 4.063762547360446e-06, "loss": 0.9706, "step": 3321 }, { "epoch": 1.7475013150973173, "grad_norm": 2.051248073577881, "learning_rate": 4.06322048852984e-06, "loss": 0.9348, "step": 3322 }, { "epoch": 1.748027354024198, "grad_norm": 2.1157615184783936, "learning_rate": 4.062678309000695e-06, "loss": 0.9516, "step": 3323 }, { "epoch": 1.7485533929510784, "grad_norm": 1.9936703443527222, "learning_rate": 4.06213600881487e-06, "loss": 0.9134, "step": 3324 }, { "epoch": 1.749079431877959, "grad_norm": 1.8598010540008545, "learning_rate": 4.0615935880142406e-06, "loss": 0.8601, "step": 3325 }, { "epoch": 1.7496054708048394, "grad_norm": 2.115668773651123, "learning_rate": 4.061051046640685e-06, "loss": 0.9157, "step": 3326 }, { "epoch": 1.75013150973172, "grad_norm": 1.9370783567428589, "learning_rate": 4.060508384736095e-06, "loss": 0.9661, "step": 3327 }, { "epoch": 1.7506575486586007, "grad_norm": 2.0576865673065186, "learning_rate": 4.0599656023423695e-06, "loss": 0.8764, "step": 3328 }, { "epoch": 1.7511835875854813, "grad_norm": 2.078967809677124, "learning_rate": 4.059422699501418e-06, "loss": 0.9192, "step": 3329 }, { "epoch": 1.751709626512362, "grad_norm": 2.1120834350585938, "learning_rate": 4.058879676255158e-06, "loss": 0.9515, "step": 3330 }, { "epoch": 1.7522356654392426, "grad_norm": 1.9745832681655884, "learning_rate": 4.058336532645519e-06, "loss": 0.8756, "step": 3331 }, { "epoch": 1.7527617043661232, "grad_norm": 2.1134536266326904, "learning_rate": 4.057793268714438e-06, "loss": 0.9737, "step": 3332 }, { "epoch": 1.7532877432930036, "grad_norm": 2.158564329147339, "learning_rate": 4.0572498845038575e-06, "loss": 0.8804, "step": 3333 }, { "epoch": 1.7538137822198843, "grad_norm": 2.121643543243408, "learning_rate": 4.056706380055737e-06, "loss": 0.9237, "step": 3334 }, { "epoch": 1.754339821146765, "grad_norm": 1.9843847751617432, "learning_rate": 4.056162755412038e-06, "loss": 0.9501, "step": 3335 }, { "epoch": 1.7548658600736453, "grad_norm": 1.998321294784546, "learning_rate": 4.055619010614738e-06, "loss": 0.9545, "step": 3336 }, { "epoch": 1.755391899000526, "grad_norm": 2.104769229888916, "learning_rate": 4.055075145705819e-06, "loss": 0.9272, "step": 3337 }, { "epoch": 1.7559179379274066, "grad_norm": 2.0450971126556396, "learning_rate": 4.054531160727272e-06, "loss": 0.9261, "step": 3338 }, { "epoch": 1.7564439768542872, "grad_norm": 2.0160062313079834, "learning_rate": 4.053987055721102e-06, "loss": 0.8997, "step": 3339 }, { "epoch": 1.7569700157811678, "grad_norm": 2.213660478591919, "learning_rate": 4.053442830729316e-06, "loss": 0.923, "step": 3340 }, { "epoch": 1.7574960547080485, "grad_norm": 2.109800100326538, "learning_rate": 4.052898485793938e-06, "loss": 0.9012, "step": 3341 }, { "epoch": 1.7580220936349291, "grad_norm": 2.068057060241699, "learning_rate": 4.052354020956995e-06, "loss": 0.9589, "step": 3342 }, { "epoch": 1.7585481325618095, "grad_norm": 1.9910136461257935, "learning_rate": 4.0518094362605285e-06, "loss": 0.9209, "step": 3343 }, { "epoch": 1.7590741714886902, "grad_norm": 2.1302149295806885, "learning_rate": 4.0512647317465856e-06, "loss": 0.9447, "step": 3344 }, { "epoch": 1.7596002104155708, "grad_norm": 2.0052196979522705, "learning_rate": 4.050719907457222e-06, "loss": 0.9138, "step": 3345 }, { "epoch": 1.7601262493424512, "grad_norm": 1.9952465295791626, "learning_rate": 4.050174963434508e-06, "loss": 0.9339, "step": 3346 }, { "epoch": 1.7606522882693318, "grad_norm": 2.0861756801605225, "learning_rate": 4.049629899720516e-06, "loss": 0.913, "step": 3347 }, { "epoch": 1.7611783271962125, "grad_norm": 2.0138020515441895, "learning_rate": 4.0490847163573335e-06, "loss": 0.9264, "step": 3348 }, { "epoch": 1.7617043661230931, "grad_norm": 2.0093681812286377, "learning_rate": 4.048539413387053e-06, "loss": 0.9597, "step": 3349 }, { "epoch": 1.7622304050499737, "grad_norm": 2.0494930744171143, "learning_rate": 4.047993990851781e-06, "loss": 0.974, "step": 3350 }, { "epoch": 1.7627564439768544, "grad_norm": 2.0490882396698, "learning_rate": 4.0474484487936275e-06, "loss": 0.9241, "step": 3351 }, { "epoch": 1.763282482903735, "grad_norm": 1.9500864744186401, "learning_rate": 4.046902787254717e-06, "loss": 0.9448, "step": 3352 }, { "epoch": 1.7638085218306154, "grad_norm": 2.1233692169189453, "learning_rate": 4.046357006277177e-06, "loss": 0.9255, "step": 3353 }, { "epoch": 1.764334560757496, "grad_norm": 2.1407630443573, "learning_rate": 4.0458111059031525e-06, "loss": 0.9697, "step": 3354 }, { "epoch": 1.7648605996843767, "grad_norm": 2.052542209625244, "learning_rate": 4.04526508617479e-06, "loss": 0.9533, "step": 3355 }, { "epoch": 1.765386638611257, "grad_norm": 1.98019540309906, "learning_rate": 4.044718947134251e-06, "loss": 0.9118, "step": 3356 }, { "epoch": 1.7659126775381377, "grad_norm": 1.932976245880127, "learning_rate": 4.0441726888237025e-06, "loss": 0.9289, "step": 3357 }, { "epoch": 1.7664387164650184, "grad_norm": 2.0778164863586426, "learning_rate": 4.0436263112853205e-06, "loss": 0.9055, "step": 3358 }, { "epoch": 1.766964755391899, "grad_norm": 1.998678207397461, "learning_rate": 4.043079814561294e-06, "loss": 0.9465, "step": 3359 }, { "epoch": 1.7674907943187796, "grad_norm": 1.97935950756073, "learning_rate": 4.042533198693818e-06, "loss": 0.9617, "step": 3360 }, { "epoch": 1.7680168332456603, "grad_norm": 2.1482112407684326, "learning_rate": 4.041986463725097e-06, "loss": 0.9664, "step": 3361 }, { "epoch": 1.768542872172541, "grad_norm": 1.997873306274414, "learning_rate": 4.041439609697345e-06, "loss": 0.8919, "step": 3362 }, { "epoch": 1.7690689110994213, "grad_norm": 2.037944793701172, "learning_rate": 4.040892636652787e-06, "loss": 0.9327, "step": 3363 }, { "epoch": 1.769594950026302, "grad_norm": 2.235060691833496, "learning_rate": 4.040345544633654e-06, "loss": 0.9484, "step": 3364 }, { "epoch": 1.7701209889531824, "grad_norm": 2.0589184761047363, "learning_rate": 4.039798333682188e-06, "loss": 0.9324, "step": 3365 }, { "epoch": 1.770647027880063, "grad_norm": 1.9436758756637573, "learning_rate": 4.039251003840641e-06, "loss": 0.9637, "step": 3366 }, { "epoch": 1.7711730668069436, "grad_norm": 1.9861931800842285, "learning_rate": 4.038703555151271e-06, "loss": 0.9415, "step": 3367 }, { "epoch": 1.7716991057338243, "grad_norm": 1.8725347518920898, "learning_rate": 4.03815598765635e-06, "loss": 0.8893, "step": 3368 }, { "epoch": 1.772225144660705, "grad_norm": 2.049994945526123, "learning_rate": 4.037608301398155e-06, "loss": 0.9413, "step": 3369 }, { "epoch": 1.7727511835875855, "grad_norm": 1.93240225315094, "learning_rate": 4.037060496418973e-06, "loss": 0.866, "step": 3370 }, { "epoch": 1.7732772225144662, "grad_norm": 2.157377243041992, "learning_rate": 4.036512572761103e-06, "loss": 0.9135, "step": 3371 }, { "epoch": 1.7738032614413468, "grad_norm": 2.0129494667053223, "learning_rate": 4.03596453046685e-06, "loss": 0.9028, "step": 3372 }, { "epoch": 1.7743293003682272, "grad_norm": 2.0105013847351074, "learning_rate": 4.035416369578527e-06, "loss": 0.9115, "step": 3373 }, { "epoch": 1.7748553392951079, "grad_norm": 2.2003941535949707, "learning_rate": 4.0348680901384604e-06, "loss": 0.9599, "step": 3374 }, { "epoch": 1.7753813782219883, "grad_norm": 2.059209108352661, "learning_rate": 4.034319692188984e-06, "loss": 0.9076, "step": 3375 }, { "epoch": 1.775907417148869, "grad_norm": 1.91313898563385, "learning_rate": 4.033771175772439e-06, "loss": 0.9006, "step": 3376 }, { "epoch": 1.7764334560757495, "grad_norm": 2.1229684352874756, "learning_rate": 4.0332225409311795e-06, "loss": 0.9237, "step": 3377 }, { "epoch": 1.7769594950026302, "grad_norm": 1.9086703062057495, "learning_rate": 4.032673787707564e-06, "loss": 0.9071, "step": 3378 }, { "epoch": 1.7774855339295108, "grad_norm": 1.9538326263427734, "learning_rate": 4.0321249161439635e-06, "loss": 0.8935, "step": 3379 }, { "epoch": 1.7780115728563914, "grad_norm": 2.0689868927001953, "learning_rate": 4.031575926282757e-06, "loss": 0.9726, "step": 3380 }, { "epoch": 1.778537611783272, "grad_norm": 2.095792531967163, "learning_rate": 4.031026818166332e-06, "loss": 0.9441, "step": 3381 }, { "epoch": 1.7790636507101527, "grad_norm": 2.124300003051758, "learning_rate": 4.030477591837088e-06, "loss": 0.9116, "step": 3382 }, { "epoch": 1.7795896896370331, "grad_norm": 2.085632801055908, "learning_rate": 4.02992824733743e-06, "loss": 0.92, "step": 3383 }, { "epoch": 1.7801157285639138, "grad_norm": 1.9192638397216797, "learning_rate": 4.029378784709774e-06, "loss": 0.9261, "step": 3384 }, { "epoch": 1.7806417674907942, "grad_norm": 1.8831740617752075, "learning_rate": 4.028829203996544e-06, "loss": 0.942, "step": 3385 }, { "epoch": 1.7811678064176748, "grad_norm": 2.037264823913574, "learning_rate": 4.028279505240176e-06, "loss": 0.8794, "step": 3386 }, { "epoch": 1.7816938453445554, "grad_norm": 2.0986642837524414, "learning_rate": 4.027729688483112e-06, "loss": 0.9363, "step": 3387 }, { "epoch": 1.782219884271436, "grad_norm": 2.0402517318725586, "learning_rate": 4.027179753767803e-06, "loss": 0.9687, "step": 3388 }, { "epoch": 1.7827459231983167, "grad_norm": 2.0788092613220215, "learning_rate": 4.0266297011367126e-06, "loss": 0.9769, "step": 3389 }, { "epoch": 1.7832719621251973, "grad_norm": 2.0980873107910156, "learning_rate": 4.026079530632309e-06, "loss": 0.9048, "step": 3390 }, { "epoch": 1.783798001052078, "grad_norm": 1.9436454772949219, "learning_rate": 4.025529242297071e-06, "loss": 0.8717, "step": 3391 }, { "epoch": 1.7843240399789586, "grad_norm": 2.1506896018981934, "learning_rate": 4.024978836173491e-06, "loss": 0.9504, "step": 3392 }, { "epoch": 1.784850078905839, "grad_norm": 2.014055013656616, "learning_rate": 4.024428312304064e-06, "loss": 0.9186, "step": 3393 }, { "epoch": 1.7853761178327197, "grad_norm": 2.0455405712127686, "learning_rate": 4.023877670731296e-06, "loss": 0.89, "step": 3394 }, { "epoch": 1.7859021567596, "grad_norm": 2.091071605682373, "learning_rate": 4.023326911497705e-06, "loss": 0.9457, "step": 3395 }, { "epoch": 1.7864281956864807, "grad_norm": 1.9758284091949463, "learning_rate": 4.022776034645814e-06, "loss": 0.9166, "step": 3396 }, { "epoch": 1.7869542346133613, "grad_norm": 2.020355463027954, "learning_rate": 4.022225040218158e-06, "loss": 0.9448, "step": 3397 }, { "epoch": 1.787480273540242, "grad_norm": 2.024291515350342, "learning_rate": 4.021673928257281e-06, "loss": 0.9847, "step": 3398 }, { "epoch": 1.7880063124671226, "grad_norm": 2.0182437896728516, "learning_rate": 4.021122698805733e-06, "loss": 0.9613, "step": 3399 }, { "epoch": 1.7885323513940032, "grad_norm": 2.1047723293304443, "learning_rate": 4.020571351906077e-06, "loss": 0.8924, "step": 3400 }, { "epoch": 1.7890583903208839, "grad_norm": 2.1373345851898193, "learning_rate": 4.020019887600882e-06, "loss": 0.8715, "step": 3401 }, { "epoch": 1.7895844292477643, "grad_norm": 2.0742130279541016, "learning_rate": 4.019468305932728e-06, "loss": 0.9074, "step": 3402 }, { "epoch": 1.790110468174645, "grad_norm": 1.9964598417282104, "learning_rate": 4.018916606944205e-06, "loss": 0.9133, "step": 3403 }, { "epoch": 1.7906365071015256, "grad_norm": 2.060347080230713, "learning_rate": 4.018364790677907e-06, "loss": 0.954, "step": 3404 }, { "epoch": 1.791162546028406, "grad_norm": 1.9850090742111206, "learning_rate": 4.017812857176443e-06, "loss": 0.9305, "step": 3405 }, { "epoch": 1.7916885849552866, "grad_norm": 1.9255491495132446, "learning_rate": 4.0172608064824284e-06, "loss": 0.9121, "step": 3406 }, { "epoch": 1.7922146238821672, "grad_norm": 2.1255195140838623, "learning_rate": 4.016708638638487e-06, "loss": 0.9093, "step": 3407 }, { "epoch": 1.7927406628090479, "grad_norm": 2.060781240463257, "learning_rate": 4.016156353687253e-06, "loss": 0.9531, "step": 3408 }, { "epoch": 1.7932667017359285, "grad_norm": 2.0284042358398438, "learning_rate": 4.01560395167137e-06, "loss": 0.9552, "step": 3409 }, { "epoch": 1.7937927406628091, "grad_norm": 2.0162978172302246, "learning_rate": 4.015051432633487e-06, "loss": 1.0171, "step": 3410 }, { "epoch": 1.7943187795896898, "grad_norm": 1.9135181903839111, "learning_rate": 4.014498796616269e-06, "loss": 0.9062, "step": 3411 }, { "epoch": 1.7948448185165702, "grad_norm": 2.0738272666931152, "learning_rate": 4.013946043662382e-06, "loss": 0.9397, "step": 3412 }, { "epoch": 1.7953708574434508, "grad_norm": 2.1009130477905273, "learning_rate": 4.013393173814507e-06, "loss": 0.9524, "step": 3413 }, { "epoch": 1.7958968963703315, "grad_norm": 2.2217934131622314, "learning_rate": 4.012840187115331e-06, "loss": 0.9275, "step": 3414 }, { "epoch": 1.7964229352972119, "grad_norm": 1.976807951927185, "learning_rate": 4.012287083607552e-06, "loss": 0.9285, "step": 3415 }, { "epoch": 1.7969489742240925, "grad_norm": 2.154989242553711, "learning_rate": 4.011733863333874e-06, "loss": 0.9834, "step": 3416 }, { "epoch": 1.7974750131509731, "grad_norm": 2.0007622241973877, "learning_rate": 4.011180526337014e-06, "loss": 0.9589, "step": 3417 }, { "epoch": 1.7980010520778538, "grad_norm": 2.1359214782714844, "learning_rate": 4.010627072659694e-06, "loss": 0.931, "step": 3418 }, { "epoch": 1.7985270910047344, "grad_norm": 2.0183515548706055, "learning_rate": 4.010073502344648e-06, "loss": 0.9644, "step": 3419 }, { "epoch": 1.799053129931615, "grad_norm": 2.0154995918273926, "learning_rate": 4.009519815434619e-06, "loss": 0.9341, "step": 3420 }, { "epoch": 1.7995791688584957, "grad_norm": 2.030836820602417, "learning_rate": 4.008966011972357e-06, "loss": 0.9219, "step": 3421 }, { "epoch": 1.800105207785376, "grad_norm": 2.099733829498291, "learning_rate": 4.008412092000621e-06, "loss": 0.9248, "step": 3422 }, { "epoch": 1.8006312467122567, "grad_norm": 1.9904857873916626, "learning_rate": 4.007858055562181e-06, "loss": 0.9101, "step": 3423 }, { "epoch": 1.8011572856391374, "grad_norm": 2.0814883708953857, "learning_rate": 4.007303902699815e-06, "loss": 0.9311, "step": 3424 }, { "epoch": 1.8016833245660178, "grad_norm": 2.056016445159912, "learning_rate": 4.00674963345631e-06, "loss": 0.9385, "step": 3425 }, { "epoch": 1.8022093634928984, "grad_norm": 2.0093741416931152, "learning_rate": 4.006195247874462e-06, "loss": 0.9388, "step": 3426 }, { "epoch": 1.802735402419779, "grad_norm": 2.0039663314819336, "learning_rate": 4.005640745997075e-06, "loss": 0.9341, "step": 3427 }, { "epoch": 1.8032614413466597, "grad_norm": 2.116356611251831, "learning_rate": 4.005086127866964e-06, "loss": 0.9732, "step": 3428 }, { "epoch": 1.8037874802735403, "grad_norm": 1.9771263599395752, "learning_rate": 4.004531393526951e-06, "loss": 0.9142, "step": 3429 }, { "epoch": 1.804313519200421, "grad_norm": 2.0317890644073486, "learning_rate": 4.003976543019868e-06, "loss": 0.9494, "step": 3430 }, { "epoch": 1.8048395581273016, "grad_norm": 1.9724228382110596, "learning_rate": 4.003421576388557e-06, "loss": 0.9429, "step": 3431 }, { "epoch": 1.805365597054182, "grad_norm": 2.042203903198242, "learning_rate": 4.002866493675867e-06, "loss": 0.9109, "step": 3432 }, { "epoch": 1.8058916359810626, "grad_norm": 2.16092848777771, "learning_rate": 4.002311294924656e-06, "loss": 0.9762, "step": 3433 }, { "epoch": 1.806417674907943, "grad_norm": 2.238441228866577, "learning_rate": 4.001755980177793e-06, "loss": 0.8959, "step": 3434 }, { "epoch": 1.8069437138348237, "grad_norm": 1.994781494140625, "learning_rate": 4.001200549478153e-06, "loss": 0.9076, "step": 3435 }, { "epoch": 1.8074697527617043, "grad_norm": 2.1131508350372314, "learning_rate": 4.000645002868624e-06, "loss": 0.9805, "step": 3436 }, { "epoch": 1.807995791688585, "grad_norm": 1.9825927019119263, "learning_rate": 4.000089340392098e-06, "loss": 0.9148, "step": 3437 }, { "epoch": 1.8085218306154656, "grad_norm": 1.9437118768692017, "learning_rate": 3.9995335620914795e-06, "loss": 0.8814, "step": 3438 }, { "epoch": 1.8090478695423462, "grad_norm": 2.112123489379883, "learning_rate": 3.9989776680096825e-06, "loss": 0.9005, "step": 3439 }, { "epoch": 1.8095739084692268, "grad_norm": 1.907055139541626, "learning_rate": 3.998421658189626e-06, "loss": 0.9416, "step": 3440 }, { "epoch": 1.8100999473961075, "grad_norm": 2.0174427032470703, "learning_rate": 3.997865532674242e-06, "loss": 0.9194, "step": 3441 }, { "epoch": 1.8106259863229879, "grad_norm": 1.9886671304702759, "learning_rate": 3.997309291506469e-06, "loss": 0.8964, "step": 3442 }, { "epoch": 1.8111520252498685, "grad_norm": 1.8449174165725708, "learning_rate": 3.996752934729256e-06, "loss": 0.9377, "step": 3443 }, { "epoch": 1.811678064176749, "grad_norm": 2.189577341079712, "learning_rate": 3.9961964623855584e-06, "loss": 0.9184, "step": 3444 }, { "epoch": 1.8122041031036296, "grad_norm": 1.9280545711517334, "learning_rate": 3.9956398745183434e-06, "loss": 0.9141, "step": 3445 }, { "epoch": 1.8127301420305102, "grad_norm": 2.119157075881958, "learning_rate": 3.995083171170586e-06, "loss": 0.9477, "step": 3446 }, { "epoch": 1.8132561809573908, "grad_norm": 2.035979747772217, "learning_rate": 3.99452635238527e-06, "loss": 0.8714, "step": 3447 }, { "epoch": 1.8137822198842715, "grad_norm": 2.0809593200683594, "learning_rate": 3.993969418205389e-06, "loss": 0.9573, "step": 3448 }, { "epoch": 1.814308258811152, "grad_norm": 1.9352896213531494, "learning_rate": 3.993412368673942e-06, "loss": 0.8903, "step": 3449 }, { "epoch": 1.8148342977380327, "grad_norm": 2.013093948364258, "learning_rate": 3.992855203833943e-06, "loss": 0.94, "step": 3450 }, { "epoch": 1.8153603366649134, "grad_norm": 2.097919464111328, "learning_rate": 3.9922979237284094e-06, "loss": 0.9373, "step": 3451 }, { "epoch": 1.8158863755917938, "grad_norm": 1.9477840662002563, "learning_rate": 3.99174052840037e-06, "loss": 0.9429, "step": 3452 }, { "epoch": 1.8164124145186744, "grad_norm": 2.093498945236206, "learning_rate": 3.991183017892863e-06, "loss": 0.9699, "step": 3453 }, { "epoch": 1.8169384534455548, "grad_norm": 2.1500258445739746, "learning_rate": 3.990625392248935e-06, "loss": 0.9855, "step": 3454 }, { "epoch": 1.8174644923724355, "grad_norm": 1.9574027061462402, "learning_rate": 3.990067651511637e-06, "loss": 0.983, "step": 3455 }, { "epoch": 1.817990531299316, "grad_norm": 2.1163482666015625, "learning_rate": 3.989509795724038e-06, "loss": 0.84, "step": 3456 }, { "epoch": 1.8185165702261967, "grad_norm": 1.987338662147522, "learning_rate": 3.9889518249292095e-06, "loss": 0.92, "step": 3457 }, { "epoch": 1.8190426091530774, "grad_norm": 2.0121538639068604, "learning_rate": 3.988393739170231e-06, "loss": 0.9392, "step": 3458 }, { "epoch": 1.819568648079958, "grad_norm": 2.9024999141693115, "learning_rate": 3.987835538490194e-06, "loss": 0.9129, "step": 3459 }, { "epoch": 1.8200946870068386, "grad_norm": 2.004316806793213, "learning_rate": 3.9872772229322e-06, "loss": 0.9544, "step": 3460 }, { "epoch": 1.820620725933719, "grad_norm": 2.0067389011383057, "learning_rate": 3.9867187925393566e-06, "loss": 0.9239, "step": 3461 }, { "epoch": 1.8211467648605997, "grad_norm": 2.09218692779541, "learning_rate": 3.986160247354779e-06, "loss": 0.9387, "step": 3462 }, { "epoch": 1.8216728037874803, "grad_norm": 2.061206579208374, "learning_rate": 3.985601587421596e-06, "loss": 0.9259, "step": 3463 }, { "epoch": 1.8221988427143607, "grad_norm": 2.206636428833008, "learning_rate": 3.985042812782941e-06, "loss": 0.929, "step": 3464 }, { "epoch": 1.8227248816412414, "grad_norm": 2.2978951930999756, "learning_rate": 3.9844839234819575e-06, "loss": 0.9518, "step": 3465 }, { "epoch": 1.823250920568122, "grad_norm": 2.1015002727508545, "learning_rate": 3.983924919561799e-06, "loss": 0.9238, "step": 3466 }, { "epoch": 1.8237769594950026, "grad_norm": 2.033895969390869, "learning_rate": 3.983365801065627e-06, "loss": 0.9068, "step": 3467 }, { "epoch": 1.8243029984218833, "grad_norm": 1.9699472188949585, "learning_rate": 3.9828065680366125e-06, "loss": 0.9226, "step": 3468 }, { "epoch": 1.824829037348764, "grad_norm": 2.005648136138916, "learning_rate": 3.982247220517933e-06, "loss": 0.8994, "step": 3469 }, { "epoch": 1.8253550762756445, "grad_norm": 1.9914512634277344, "learning_rate": 3.981687758552777e-06, "loss": 0.9424, "step": 3470 }, { "epoch": 1.825881115202525, "grad_norm": 2.1137712001800537, "learning_rate": 3.981128182184342e-06, "loss": 0.9184, "step": 3471 }, { "epoch": 1.8264071541294056, "grad_norm": 2.2346999645233154, "learning_rate": 3.980568491455834e-06, "loss": 0.8919, "step": 3472 }, { "epoch": 1.8269331930562862, "grad_norm": 2.0023348331451416, "learning_rate": 3.980008686410467e-06, "loss": 0.9461, "step": 3473 }, { "epoch": 1.8274592319831666, "grad_norm": 2.0820887088775635, "learning_rate": 3.979448767091464e-06, "loss": 0.9131, "step": 3474 }, { "epoch": 1.8279852709100473, "grad_norm": 1.8880040645599365, "learning_rate": 3.978888733542058e-06, "loss": 0.9383, "step": 3475 }, { "epoch": 1.828511309836928, "grad_norm": 1.9987270832061768, "learning_rate": 3.978328585805488e-06, "loss": 0.9356, "step": 3476 }, { "epoch": 1.8290373487638085, "grad_norm": 1.984171986579895, "learning_rate": 3.977768323925006e-06, "loss": 0.8697, "step": 3477 }, { "epoch": 1.8295633876906892, "grad_norm": 1.9593433141708374, "learning_rate": 3.977207947943872e-06, "loss": 0.9397, "step": 3478 }, { "epoch": 1.8300894266175698, "grad_norm": 2.1641416549682617, "learning_rate": 3.976647457905349e-06, "loss": 0.9444, "step": 3479 }, { "epoch": 1.8306154655444504, "grad_norm": 2.08544921875, "learning_rate": 3.976086853852718e-06, "loss": 0.9853, "step": 3480 }, { "epoch": 1.8311415044713308, "grad_norm": 2.1902668476104736, "learning_rate": 3.9755261358292604e-06, "loss": 0.8569, "step": 3481 }, { "epoch": 1.8316675433982115, "grad_norm": 2.251384735107422, "learning_rate": 3.9749653038782725e-06, "loss": 0.9372, "step": 3482 }, { "epoch": 1.832193582325092, "grad_norm": 2.038652181625366, "learning_rate": 3.974404358043054e-06, "loss": 0.9624, "step": 3483 }, { "epoch": 1.8327196212519725, "grad_norm": 2.1229982376098633, "learning_rate": 3.9738432983669195e-06, "loss": 0.9698, "step": 3484 }, { "epoch": 1.8332456601788532, "grad_norm": 2.0407466888427734, "learning_rate": 3.9732821248931874e-06, "loss": 0.8918, "step": 3485 }, { "epoch": 1.8337716991057338, "grad_norm": 2.099503993988037, "learning_rate": 3.972720837665188e-06, "loss": 0.9041, "step": 3486 }, { "epoch": 1.8342977380326144, "grad_norm": 1.93677818775177, "learning_rate": 3.972159436726259e-06, "loss": 0.8743, "step": 3487 }, { "epoch": 1.834823776959495, "grad_norm": 2.3074264526367188, "learning_rate": 3.971597922119745e-06, "loss": 0.9377, "step": 3488 }, { "epoch": 1.8353498158863757, "grad_norm": 2.1103453636169434, "learning_rate": 3.971036293889004e-06, "loss": 0.9421, "step": 3489 }, { "epoch": 1.8358758548132563, "grad_norm": 2.43330717086792, "learning_rate": 3.970474552077399e-06, "loss": 0.8997, "step": 3490 }, { "epoch": 1.8364018937401367, "grad_norm": 1.9432933330535889, "learning_rate": 3.969912696728302e-06, "loss": 0.9191, "step": 3491 }, { "epoch": 1.8369279326670174, "grad_norm": 2.0006496906280518, "learning_rate": 3.969350727885095e-06, "loss": 0.9202, "step": 3492 }, { "epoch": 1.8374539715938978, "grad_norm": 2.0259854793548584, "learning_rate": 3.968788645591171e-06, "loss": 0.9747, "step": 3493 }, { "epoch": 1.8379800105207784, "grad_norm": 2.106186866760254, "learning_rate": 3.968226449889926e-06, "loss": 0.9078, "step": 3494 }, { "epoch": 1.838506049447659, "grad_norm": 2.0630154609680176, "learning_rate": 3.967664140824769e-06, "loss": 0.8738, "step": 3495 }, { "epoch": 1.8390320883745397, "grad_norm": 2.037552833557129, "learning_rate": 3.9671017184391165e-06, "loss": 1.0377, "step": 3496 }, { "epoch": 1.8395581273014203, "grad_norm": 2.241241693496704, "learning_rate": 3.966539182776395e-06, "loss": 0.9448, "step": 3497 }, { "epoch": 1.840084166228301, "grad_norm": 2.006606340408325, "learning_rate": 3.965976533880037e-06, "loss": 0.928, "step": 3498 }, { "epoch": 1.8406102051551816, "grad_norm": 1.9464002847671509, "learning_rate": 3.965413771793487e-06, "loss": 0.97, "step": 3499 }, { "epoch": 1.8411362440820622, "grad_norm": 2.0451865196228027, "learning_rate": 3.964850896560196e-06, "loss": 0.9165, "step": 3500 }, { "epoch": 1.8416622830089426, "grad_norm": 1.9204659461975098, "learning_rate": 3.964287908223624e-06, "loss": 0.9142, "step": 3501 }, { "epoch": 1.8421883219358233, "grad_norm": 2.125077486038208, "learning_rate": 3.9637248068272414e-06, "loss": 0.988, "step": 3502 }, { "epoch": 1.8427143608627037, "grad_norm": 1.939674735069275, "learning_rate": 3.963161592414526e-06, "loss": 0.862, "step": 3503 }, { "epoch": 1.8432403997895843, "grad_norm": 1.897651195526123, "learning_rate": 3.9625982650289625e-06, "loss": 0.8733, "step": 3504 }, { "epoch": 1.843766438716465, "grad_norm": 2.00661039352417, "learning_rate": 3.962034824714048e-06, "loss": 0.9172, "step": 3505 }, { "epoch": 1.8442924776433456, "grad_norm": 1.98454749584198, "learning_rate": 3.961471271513286e-06, "loss": 0.9424, "step": 3506 }, { "epoch": 1.8448185165702262, "grad_norm": 2.035205125808716, "learning_rate": 3.960907605470189e-06, "loss": 0.8885, "step": 3507 }, { "epoch": 1.8453445554971069, "grad_norm": 2.179349422454834, "learning_rate": 3.960343826628279e-06, "loss": 0.9111, "step": 3508 }, { "epoch": 1.8458705944239875, "grad_norm": 2.022300958633423, "learning_rate": 3.9597799350310865e-06, "loss": 0.9136, "step": 3509 }, { "epoch": 1.8463966333508681, "grad_norm": 2.047797918319702, "learning_rate": 3.959215930722149e-06, "loss": 0.9462, "step": 3510 }, { "epoch": 1.8469226722777485, "grad_norm": 2.07372784614563, "learning_rate": 3.958651813745015e-06, "loss": 0.9396, "step": 3511 }, { "epoch": 1.8474487112046292, "grad_norm": 2.019566774368286, "learning_rate": 3.958087584143241e-06, "loss": 0.9722, "step": 3512 }, { "epoch": 1.8479747501315096, "grad_norm": 1.9272618293762207, "learning_rate": 3.957523241960391e-06, "loss": 0.8958, "step": 3513 }, { "epoch": 1.8485007890583902, "grad_norm": 2.054652690887451, "learning_rate": 3.956958787240042e-06, "loss": 0.9224, "step": 3514 }, { "epoch": 1.8490268279852708, "grad_norm": 2.078669309616089, "learning_rate": 3.9563942200257715e-06, "loss": 0.9243, "step": 3515 }, { "epoch": 1.8495528669121515, "grad_norm": 2.0470850467681885, "learning_rate": 3.9558295403611735e-06, "loss": 0.9303, "step": 3516 }, { "epoch": 1.8500789058390321, "grad_norm": 2.0248045921325684, "learning_rate": 3.955264748289847e-06, "loss": 0.9309, "step": 3517 }, { "epoch": 1.8506049447659128, "grad_norm": 1.88893723487854, "learning_rate": 3.954699843855401e-06, "loss": 0.8972, "step": 3518 }, { "epoch": 1.8511309836927934, "grad_norm": 2.1490895748138428, "learning_rate": 3.954134827101451e-06, "loss": 0.9634, "step": 3519 }, { "epoch": 1.851657022619674, "grad_norm": 1.9519559144973755, "learning_rate": 3.953569698071625e-06, "loss": 0.9785, "step": 3520 }, { "epoch": 1.8521830615465544, "grad_norm": 1.9176199436187744, "learning_rate": 3.953004456809556e-06, "loss": 0.9059, "step": 3521 }, { "epoch": 1.852709100473435, "grad_norm": 1.8604464530944824, "learning_rate": 3.9524391033588876e-06, "loss": 0.8784, "step": 3522 }, { "epoch": 1.8532351394003155, "grad_norm": 2.101825475692749, "learning_rate": 3.95187363776327e-06, "loss": 0.9172, "step": 3523 }, { "epoch": 1.8537611783271961, "grad_norm": 1.93021559715271, "learning_rate": 3.9513080600663665e-06, "loss": 0.9763, "step": 3524 }, { "epoch": 1.8542872172540767, "grad_norm": 2.113861322402954, "learning_rate": 3.950742370311846e-06, "loss": 0.9769, "step": 3525 }, { "epoch": 1.8548132561809574, "grad_norm": 2.099055051803589, "learning_rate": 3.950176568543382e-06, "loss": 0.8805, "step": 3526 }, { "epoch": 1.855339295107838, "grad_norm": 2.029845952987671, "learning_rate": 3.949610654804666e-06, "loss": 0.9417, "step": 3527 }, { "epoch": 1.8558653340347187, "grad_norm": 2.2449495792388916, "learning_rate": 3.94904462913939e-06, "loss": 0.8459, "step": 3528 }, { "epoch": 1.8563913729615993, "grad_norm": 1.970975399017334, "learning_rate": 3.948478491591259e-06, "loss": 0.9175, "step": 3529 }, { "epoch": 1.8569174118884797, "grad_norm": 2.04496431350708, "learning_rate": 3.947912242203984e-06, "loss": 0.8819, "step": 3530 }, { "epoch": 1.8574434508153603, "grad_norm": 2.012033462524414, "learning_rate": 3.947345881021287e-06, "loss": 0.9318, "step": 3531 }, { "epoch": 1.857969489742241, "grad_norm": 2.1508708000183105, "learning_rate": 3.946779408086898e-06, "loss": 0.9753, "step": 3532 }, { "epoch": 1.8584955286691214, "grad_norm": 1.9989691972732544, "learning_rate": 3.946212823444555e-06, "loss": 0.933, "step": 3533 }, { "epoch": 1.859021567596002, "grad_norm": 2.150106191635132, "learning_rate": 3.945646127138003e-06, "loss": 0.9417, "step": 3534 }, { "epoch": 1.8595476065228826, "grad_norm": 2.0419235229492188, "learning_rate": 3.945079319211001e-06, "loss": 0.9687, "step": 3535 }, { "epoch": 1.8600736454497633, "grad_norm": 2.0383622646331787, "learning_rate": 3.944512399707309e-06, "loss": 0.9552, "step": 3536 }, { "epoch": 1.860599684376644, "grad_norm": 2.070049524307251, "learning_rate": 3.943945368670702e-06, "loss": 0.9677, "step": 3537 }, { "epoch": 1.8611257233035245, "grad_norm": 1.9403014183044434, "learning_rate": 3.94337822614496e-06, "loss": 0.905, "step": 3538 }, { "epoch": 1.8616517622304052, "grad_norm": 1.9676467180252075, "learning_rate": 3.942810972173875e-06, "loss": 0.8961, "step": 3539 }, { "epoch": 1.8621778011572856, "grad_norm": 2.0308592319488525, "learning_rate": 3.942243606801244e-06, "loss": 0.974, "step": 3540 }, { "epoch": 1.8627038400841662, "grad_norm": 2.3764054775238037, "learning_rate": 3.941676130070874e-06, "loss": 0.9501, "step": 3541 }, { "epoch": 1.8632298790110469, "grad_norm": 1.9631781578063965, "learning_rate": 3.941108542026582e-06, "loss": 0.9582, "step": 3542 }, { "epoch": 1.8637559179379273, "grad_norm": 2.015774726867676, "learning_rate": 3.940540842712192e-06, "loss": 0.9663, "step": 3543 }, { "epoch": 1.864281956864808, "grad_norm": 1.9868817329406738, "learning_rate": 3.939973032171534e-06, "loss": 0.9296, "step": 3544 }, { "epoch": 1.8648079957916885, "grad_norm": 2.0739920139312744, "learning_rate": 3.939405110448454e-06, "loss": 0.9182, "step": 3545 }, { "epoch": 1.8653340347185692, "grad_norm": 2.1579036712646484, "learning_rate": 3.938837077586799e-06, "loss": 0.9168, "step": 3546 }, { "epoch": 1.8658600736454498, "grad_norm": 1.8793091773986816, "learning_rate": 3.938268933630428e-06, "loss": 0.8804, "step": 3547 }, { "epoch": 1.8663861125723304, "grad_norm": 2.020940065383911, "learning_rate": 3.937700678623209e-06, "loss": 0.9249, "step": 3548 }, { "epoch": 1.866912151499211, "grad_norm": 2.0007359981536865, "learning_rate": 3.937132312609019e-06, "loss": 0.9348, "step": 3549 }, { "epoch": 1.8674381904260915, "grad_norm": 2.0313973426818848, "learning_rate": 3.936563835631739e-06, "loss": 0.9433, "step": 3550 }, { "epoch": 1.8679642293529721, "grad_norm": 1.9634615182876587, "learning_rate": 3.935995247735265e-06, "loss": 0.9424, "step": 3551 }, { "epoch": 1.8684902682798528, "grad_norm": 2.065763235092163, "learning_rate": 3.935426548963497e-06, "loss": 0.9016, "step": 3552 }, { "epoch": 1.8690163072067332, "grad_norm": 2.0466232299804688, "learning_rate": 3.934857739360345e-06, "loss": 0.9697, "step": 3553 }, { "epoch": 1.8695423461336138, "grad_norm": 2.0537874698638916, "learning_rate": 3.9342888189697295e-06, "loss": 0.9518, "step": 3554 }, { "epoch": 1.8700683850604944, "grad_norm": 1.9860124588012695, "learning_rate": 3.933719787835575e-06, "loss": 0.9465, "step": 3555 }, { "epoch": 1.870594423987375, "grad_norm": 2.1217265129089355, "learning_rate": 3.9331506460018175e-06, "loss": 0.9013, "step": 3556 }, { "epoch": 1.8711204629142557, "grad_norm": 2.0979650020599365, "learning_rate": 3.932581393512404e-06, "loss": 0.966, "step": 3557 }, { "epoch": 1.8716465018411363, "grad_norm": 2.1109416484832764, "learning_rate": 3.932012030411285e-06, "loss": 0.8636, "step": 3558 }, { "epoch": 1.872172540768017, "grad_norm": 1.9996652603149414, "learning_rate": 3.931442556742422e-06, "loss": 0.914, "step": 3559 }, { "epoch": 1.8726985796948974, "grad_norm": 1.9707735776901245, "learning_rate": 3.930872972549786e-06, "loss": 0.9091, "step": 3560 }, { "epoch": 1.873224618621778, "grad_norm": 2.126910924911499, "learning_rate": 3.930303277877354e-06, "loss": 0.9279, "step": 3561 }, { "epoch": 1.8737506575486584, "grad_norm": 1.9631305932998657, "learning_rate": 3.929733472769114e-06, "loss": 0.9808, "step": 3562 }, { "epoch": 1.874276696475539, "grad_norm": 2.0501370429992676, "learning_rate": 3.929163557269061e-06, "loss": 0.9403, "step": 3563 }, { "epoch": 1.8748027354024197, "grad_norm": 1.8951704502105713, "learning_rate": 3.9285935314212e-06, "loss": 0.9056, "step": 3564 }, { "epoch": 1.8753287743293003, "grad_norm": 2.058051109313965, "learning_rate": 3.928023395269543e-06, "loss": 0.9108, "step": 3565 }, { "epoch": 1.875854813256181, "grad_norm": 2.1401350498199463, "learning_rate": 3.927453148858109e-06, "loss": 0.9322, "step": 3566 }, { "epoch": 1.8763808521830616, "grad_norm": 2.0684049129486084, "learning_rate": 3.926882792230929e-06, "loss": 0.946, "step": 3567 }, { "epoch": 1.8769068911099422, "grad_norm": 2.124798059463501, "learning_rate": 3.926312325432043e-06, "loss": 0.908, "step": 3568 }, { "epoch": 1.8774329300368229, "grad_norm": 2.369767665863037, "learning_rate": 3.925741748505496e-06, "loss": 0.9851, "step": 3569 }, { "epoch": 1.8779589689637033, "grad_norm": 2.009350538253784, "learning_rate": 3.925171061495342e-06, "loss": 0.9367, "step": 3570 }, { "epoch": 1.878485007890584, "grad_norm": 2.1008574962615967, "learning_rate": 3.9246002644456475e-06, "loss": 0.9569, "step": 3571 }, { "epoch": 1.8790110468174643, "grad_norm": 2.0691325664520264, "learning_rate": 3.92402935740048e-06, "loss": 0.9193, "step": 3572 }, { "epoch": 1.879537085744345, "grad_norm": 2.1765453815460205, "learning_rate": 3.9234583404039245e-06, "loss": 0.8742, "step": 3573 }, { "epoch": 1.8800631246712256, "grad_norm": 2.004166603088379, "learning_rate": 3.922887213500067e-06, "loss": 0.923, "step": 3574 }, { "epoch": 1.8805891635981062, "grad_norm": 2.0143234729766846, "learning_rate": 3.922315976733008e-06, "loss": 0.9512, "step": 3575 }, { "epoch": 1.8811152025249869, "grad_norm": 2.1664822101593018, "learning_rate": 3.921744630146851e-06, "loss": 0.9997, "step": 3576 }, { "epoch": 1.8816412414518675, "grad_norm": 2.0898940563201904, "learning_rate": 3.921173173785712e-06, "loss": 0.9678, "step": 3577 }, { "epoch": 1.8821672803787481, "grad_norm": 2.0758349895477295, "learning_rate": 3.920601607693712e-06, "loss": 0.8768, "step": 3578 }, { "epoch": 1.8826933193056288, "grad_norm": 2.125844717025757, "learning_rate": 3.920029931914986e-06, "loss": 0.8709, "step": 3579 }, { "epoch": 1.8832193582325092, "grad_norm": 2.0366671085357666, "learning_rate": 3.91945814649367e-06, "loss": 0.8688, "step": 3580 }, { "epoch": 1.8837453971593898, "grad_norm": 2.055398941040039, "learning_rate": 3.918886251473914e-06, "loss": 0.9797, "step": 3581 }, { "epoch": 1.8842714360862702, "grad_norm": 2.0487844944000244, "learning_rate": 3.918314246899876e-06, "loss": 0.9574, "step": 3582 }, { "epoch": 1.8847974750131509, "grad_norm": 1.9624133110046387, "learning_rate": 3.91774213281572e-06, "loss": 0.918, "step": 3583 }, { "epoch": 1.8853235139400315, "grad_norm": 2.0293092727661133, "learning_rate": 3.91716990926562e-06, "loss": 0.9201, "step": 3584 }, { "epoch": 1.8858495528669121, "grad_norm": 2.2962074279785156, "learning_rate": 3.916597576293759e-06, "loss": 0.9452, "step": 3585 }, { "epoch": 1.8863755917937928, "grad_norm": 2.127540349960327, "learning_rate": 3.9160251339443256e-06, "loss": 0.9648, "step": 3586 }, { "epoch": 1.8869016307206734, "grad_norm": 1.877824306488037, "learning_rate": 3.915452582261521e-06, "loss": 0.9235, "step": 3587 }, { "epoch": 1.887427669647554, "grad_norm": 2.057386875152588, "learning_rate": 3.914879921289551e-06, "loss": 0.9292, "step": 3588 }, { "epoch": 1.8879537085744345, "grad_norm": 3.2312283515930176, "learning_rate": 3.914307151072635e-06, "loss": 0.9328, "step": 3589 }, { "epoch": 1.888479747501315, "grad_norm": 1.950832724571228, "learning_rate": 3.9137342716549934e-06, "loss": 0.9605, "step": 3590 }, { "epoch": 1.8890057864281957, "grad_norm": 1.954448938369751, "learning_rate": 3.913161283080861e-06, "loss": 0.9513, "step": 3591 }, { "epoch": 1.8895318253550761, "grad_norm": 2.27455735206604, "learning_rate": 3.912588185394478e-06, "loss": 0.973, "step": 3592 }, { "epoch": 1.8900578642819568, "grad_norm": 2.0080385208129883, "learning_rate": 3.912014978640095e-06, "loss": 0.921, "step": 3593 }, { "epoch": 1.8905839032088374, "grad_norm": 2.099836826324463, "learning_rate": 3.911441662861971e-06, "loss": 0.9357, "step": 3594 }, { "epoch": 1.891109942135718, "grad_norm": 2.0644240379333496, "learning_rate": 3.910868238104371e-06, "loss": 0.9077, "step": 3595 }, { "epoch": 1.8916359810625987, "grad_norm": 2.009563446044922, "learning_rate": 3.910294704411571e-06, "loss": 0.9689, "step": 3596 }, { "epoch": 1.8921620199894793, "grad_norm": 2.054391860961914, "learning_rate": 3.909721061827854e-06, "loss": 0.9415, "step": 3597 }, { "epoch": 1.89268805891636, "grad_norm": 2.0017175674438477, "learning_rate": 3.90914731039751e-06, "loss": 0.9376, "step": 3598 }, { "epoch": 1.8932140978432404, "grad_norm": 2.0345120429992676, "learning_rate": 3.908573450164843e-06, "loss": 0.9262, "step": 3599 }, { "epoch": 1.893740136770121, "grad_norm": 2.150937557220459, "learning_rate": 3.907999481174158e-06, "loss": 0.9221, "step": 3600 }, { "epoch": 1.8942661756970016, "grad_norm": 2.1007256507873535, "learning_rate": 3.907425403469775e-06, "loss": 0.9695, "step": 3601 }, { "epoch": 1.894792214623882, "grad_norm": 1.9815309047698975, "learning_rate": 3.906851217096016e-06, "loss": 0.9216, "step": 3602 }, { "epoch": 1.8953182535507627, "grad_norm": 2.0300369262695312, "learning_rate": 3.9062769220972175e-06, "loss": 0.9402, "step": 3603 }, { "epoch": 1.8958442924776433, "grad_norm": 2.0138251781463623, "learning_rate": 3.905702518517721e-06, "loss": 0.9359, "step": 3604 }, { "epoch": 1.896370331404524, "grad_norm": 1.9801183938980103, "learning_rate": 3.905128006401876e-06, "loss": 0.8944, "step": 3605 }, { "epoch": 1.8968963703314046, "grad_norm": 1.9915120601654053, "learning_rate": 3.904553385794043e-06, "loss": 0.949, "step": 3606 }, { "epoch": 1.8974224092582852, "grad_norm": 2.2659802436828613, "learning_rate": 3.903978656738588e-06, "loss": 0.9494, "step": 3607 }, { "epoch": 1.8979484481851658, "grad_norm": 1.9523696899414062, "learning_rate": 3.903403819279887e-06, "loss": 0.9271, "step": 3608 }, { "epoch": 1.8984744871120462, "grad_norm": 1.9705463647842407, "learning_rate": 3.902828873462325e-06, "loss": 0.9143, "step": 3609 }, { "epoch": 1.8990005260389269, "grad_norm": 2.035841703414917, "learning_rate": 3.902253819330294e-06, "loss": 0.9478, "step": 3610 }, { "epoch": 1.8995265649658075, "grad_norm": 2.091041326522827, "learning_rate": 3.901678656928193e-06, "loss": 0.9618, "step": 3611 }, { "epoch": 1.900052603892688, "grad_norm": 2.0914456844329834, "learning_rate": 3.901103386300433e-06, "loss": 0.938, "step": 3612 }, { "epoch": 1.9005786428195686, "grad_norm": 2.0624167919158936, "learning_rate": 3.900528007491431e-06, "loss": 0.9657, "step": 3613 }, { "epoch": 1.9011046817464492, "grad_norm": 2.2461578845977783, "learning_rate": 3.899952520545613e-06, "loss": 0.9332, "step": 3614 }, { "epoch": 1.9016307206733298, "grad_norm": 2.151547908782959, "learning_rate": 3.899376925507415e-06, "loss": 0.9012, "step": 3615 }, { "epoch": 1.9021567596002105, "grad_norm": 1.9578272104263306, "learning_rate": 3.898801222421275e-06, "loss": 0.9635, "step": 3616 }, { "epoch": 1.902682798527091, "grad_norm": 2.041421890258789, "learning_rate": 3.898225411331647e-06, "loss": 0.9166, "step": 3617 }, { "epoch": 1.9032088374539717, "grad_norm": 2.0090739727020264, "learning_rate": 3.89764949228299e-06, "loss": 0.8683, "step": 3618 }, { "epoch": 1.9037348763808521, "grad_norm": 2.2205519676208496, "learning_rate": 3.89707346531977e-06, "loss": 0.8927, "step": 3619 }, { "epoch": 1.9042609153077328, "grad_norm": 2.2684524059295654, "learning_rate": 3.896497330486466e-06, "loss": 0.9509, "step": 3620 }, { "epoch": 1.9047869542346134, "grad_norm": 2.0757508277893066, "learning_rate": 3.89592108782756e-06, "loss": 0.9806, "step": 3621 }, { "epoch": 1.9053129931614938, "grad_norm": 2.0777761936187744, "learning_rate": 3.8953447373875444e-06, "loss": 0.9496, "step": 3622 }, { "epoch": 1.9058390320883745, "grad_norm": 2.0333797931671143, "learning_rate": 3.89476827921092e-06, "loss": 0.9306, "step": 3623 }, { "epoch": 1.906365071015255, "grad_norm": 1.8876526355743408, "learning_rate": 3.894191713342197e-06, "loss": 0.8991, "step": 3624 }, { "epoch": 1.9068911099421357, "grad_norm": 1.9477458000183105, "learning_rate": 3.893615039825893e-06, "loss": 0.8891, "step": 3625 }, { "epoch": 1.9074171488690164, "grad_norm": 3.033585786819458, "learning_rate": 3.893038258706533e-06, "loss": 0.9388, "step": 3626 }, { "epoch": 1.907943187795897, "grad_norm": 1.9571001529693604, "learning_rate": 3.892461370028651e-06, "loss": 0.9243, "step": 3627 }, { "epoch": 1.9084692267227776, "grad_norm": 2.1075046062469482, "learning_rate": 3.89188437383679e-06, "loss": 0.9102, "step": 3628 }, { "epoch": 1.908995265649658, "grad_norm": 1.987993597984314, "learning_rate": 3.891307270175499e-06, "loss": 0.976, "step": 3629 }, { "epoch": 1.9095213045765387, "grad_norm": 2.0590975284576416, "learning_rate": 3.8907300590893405e-06, "loss": 0.9873, "step": 3630 }, { "epoch": 1.910047343503419, "grad_norm": 1.9899734258651733, "learning_rate": 3.890152740622879e-06, "loss": 0.9302, "step": 3631 }, { "epoch": 1.9105733824302997, "grad_norm": 1.9252071380615234, "learning_rate": 3.88957531482069e-06, "loss": 0.8609, "step": 3632 }, { "epoch": 1.9110994213571804, "grad_norm": 2.0324370861053467, "learning_rate": 3.88899778172736e-06, "loss": 0.959, "step": 3633 }, { "epoch": 1.911625460284061, "grad_norm": 1.955954670906067, "learning_rate": 3.888420141387479e-06, "loss": 0.9463, "step": 3634 }, { "epoch": 1.9121514992109416, "grad_norm": 2.011570930480957, "learning_rate": 3.887842393845647e-06, "loss": 0.9082, "step": 3635 }, { "epoch": 1.9126775381378223, "grad_norm": 1.8616174459457397, "learning_rate": 3.887264539146475e-06, "loss": 0.822, "step": 3636 }, { "epoch": 1.913203577064703, "grad_norm": 1.9932093620300293, "learning_rate": 3.8866865773345775e-06, "loss": 0.9408, "step": 3637 }, { "epoch": 1.9137296159915835, "grad_norm": 2.084038496017456, "learning_rate": 3.8861085084545804e-06, "loss": 0.9423, "step": 3638 }, { "epoch": 1.914255654918464, "grad_norm": 2.0028975009918213, "learning_rate": 3.885530332551119e-06, "loss": 0.9405, "step": 3639 }, { "epoch": 1.9147816938453446, "grad_norm": 2.0257554054260254, "learning_rate": 3.884952049668834e-06, "loss": 0.9229, "step": 3640 }, { "epoch": 1.915307732772225, "grad_norm": 2.041738271713257, "learning_rate": 3.884373659852375e-06, "loss": 0.9212, "step": 3641 }, { "epoch": 1.9158337716991056, "grad_norm": 2.1852664947509766, "learning_rate": 3.883795163146401e-06, "loss": 0.9516, "step": 3642 }, { "epoch": 1.9163598106259863, "grad_norm": 2.118609666824341, "learning_rate": 3.883216559595578e-06, "loss": 0.8979, "step": 3643 }, { "epoch": 1.916885849552867, "grad_norm": 2.0641415119171143, "learning_rate": 3.88263784924458e-06, "loss": 0.9353, "step": 3644 }, { "epoch": 1.9174118884797475, "grad_norm": 2.07556414604187, "learning_rate": 3.882059032138093e-06, "loss": 0.9462, "step": 3645 }, { "epoch": 1.9179379274066282, "grad_norm": 1.8993345499038696, "learning_rate": 3.8814801083208055e-06, "loss": 0.881, "step": 3646 }, { "epoch": 1.9184639663335088, "grad_norm": 2.1719818115234375, "learning_rate": 3.880901077837417e-06, "loss": 0.9887, "step": 3647 }, { "epoch": 1.9189900052603894, "grad_norm": 1.9939392805099487, "learning_rate": 3.880321940732637e-06, "loss": 0.9207, "step": 3648 }, { "epoch": 1.9195160441872698, "grad_norm": 2.0594565868377686, "learning_rate": 3.879742697051182e-06, "loss": 0.9562, "step": 3649 }, { "epoch": 1.9200420831141505, "grad_norm": 2.0248348712921143, "learning_rate": 3.879163346837774e-06, "loss": 0.9588, "step": 3650 }, { "epoch": 1.920568122041031, "grad_norm": 1.984763264656067, "learning_rate": 3.878583890137147e-06, "loss": 0.8868, "step": 3651 }, { "epoch": 1.9210941609679115, "grad_norm": 2.1007652282714844, "learning_rate": 3.878004326994041e-06, "loss": 0.9158, "step": 3652 }, { "epoch": 1.9216201998947922, "grad_norm": 2.0460150241851807, "learning_rate": 3.877424657453206e-06, "loss": 0.9232, "step": 3653 }, { "epoch": 1.9221462388216728, "grad_norm": 2.0245115756988525, "learning_rate": 3.876844881559397e-06, "loss": 0.8974, "step": 3654 }, { "epoch": 1.9226722777485534, "grad_norm": 2.114398956298828, "learning_rate": 3.876264999357381e-06, "loss": 0.9799, "step": 3655 }, { "epoch": 1.923198316675434, "grad_norm": 2.089329481124878, "learning_rate": 3.875685010891932e-06, "loss": 0.9099, "step": 3656 }, { "epoch": 1.9237243556023147, "grad_norm": 2.0064103603363037, "learning_rate": 3.8751049162078306e-06, "loss": 0.9408, "step": 3657 }, { "epoch": 1.924250394529195, "grad_norm": 2.079148769378662, "learning_rate": 3.874524715349867e-06, "loss": 0.883, "step": 3658 }, { "epoch": 1.9247764334560757, "grad_norm": 2.3152129650115967, "learning_rate": 3.873944408362839e-06, "loss": 0.9905, "step": 3659 }, { "epoch": 1.9253024723829564, "grad_norm": 2.098961114883423, "learning_rate": 3.873363995291555e-06, "loss": 0.9313, "step": 3660 }, { "epoch": 1.9258285113098368, "grad_norm": 2.1047542095184326, "learning_rate": 3.8727834761808265e-06, "loss": 0.933, "step": 3661 }, { "epoch": 1.9263545502367174, "grad_norm": 1.9624619483947754, "learning_rate": 3.872202851075478e-06, "loss": 0.9063, "step": 3662 }, { "epoch": 1.926880589163598, "grad_norm": 2.1317026615142822, "learning_rate": 3.87162212002034e-06, "loss": 1.0107, "step": 3663 }, { "epoch": 1.9274066280904787, "grad_norm": 1.983315348625183, "learning_rate": 3.871041283060252e-06, "loss": 0.9195, "step": 3664 }, { "epoch": 1.9279326670173593, "grad_norm": 1.998632550239563, "learning_rate": 3.870460340240061e-06, "loss": 0.908, "step": 3665 }, { "epoch": 1.92845870594424, "grad_norm": 2.03670072555542, "learning_rate": 3.869879291604623e-06, "loss": 0.9694, "step": 3666 }, { "epoch": 1.9289847448711206, "grad_norm": 2.0909924507141113, "learning_rate": 3.869298137198799e-06, "loss": 0.9395, "step": 3667 }, { "epoch": 1.929510783798001, "grad_norm": 1.8925726413726807, "learning_rate": 3.868716877067465e-06, "loss": 0.9029, "step": 3668 }, { "epoch": 1.9300368227248816, "grad_norm": 2.022911548614502, "learning_rate": 3.868135511255498e-06, "loss": 0.9254, "step": 3669 }, { "epoch": 1.9305628616517623, "grad_norm": 2.0514330863952637, "learning_rate": 3.867554039807787e-06, "loss": 0.9092, "step": 3670 }, { "epoch": 1.9310889005786427, "grad_norm": 1.999674677848816, "learning_rate": 3.8669724627692286e-06, "loss": 0.9699, "step": 3671 }, { "epoch": 1.9316149395055233, "grad_norm": 2.05678129196167, "learning_rate": 3.866390780184726e-06, "loss": 0.9735, "step": 3672 }, { "epoch": 1.932140978432404, "grad_norm": 2.085590124130249, "learning_rate": 3.8658089920991925e-06, "loss": 0.8801, "step": 3673 }, { "epoch": 1.9326670173592846, "grad_norm": 1.9885491132736206, "learning_rate": 3.865227098557549e-06, "loss": 0.9209, "step": 3674 }, { "epoch": 1.9331930562861652, "grad_norm": 2.196415662765503, "learning_rate": 3.864645099604724e-06, "loss": 0.9276, "step": 3675 }, { "epoch": 1.9337190952130459, "grad_norm": 1.8344975709915161, "learning_rate": 3.864062995285655e-06, "loss": 0.909, "step": 3676 }, { "epoch": 1.9342451341399265, "grad_norm": 2.0901834964752197, "learning_rate": 3.863480785645286e-06, "loss": 0.9277, "step": 3677 }, { "epoch": 1.934771173066807, "grad_norm": 2.1690406799316406, "learning_rate": 3.8628984707285725e-06, "loss": 0.8746, "step": 3678 }, { "epoch": 1.9352972119936875, "grad_norm": 2.040762424468994, "learning_rate": 3.862316050580473e-06, "loss": 0.9166, "step": 3679 }, { "epoch": 1.9358232509205682, "grad_norm": 1.9970393180847168, "learning_rate": 3.86173352524596e-06, "loss": 0.8848, "step": 3680 }, { "epoch": 1.9363492898474486, "grad_norm": 1.9791969060897827, "learning_rate": 3.861150894770008e-06, "loss": 0.8964, "step": 3681 }, { "epoch": 1.9368753287743292, "grad_norm": 2.1223771572113037, "learning_rate": 3.860568159197606e-06, "loss": 0.9815, "step": 3682 }, { "epoch": 1.9374013677012099, "grad_norm": 1.9766583442687988, "learning_rate": 3.859985318573745e-06, "loss": 0.9412, "step": 3683 }, { "epoch": 1.9379274066280905, "grad_norm": 1.9654362201690674, "learning_rate": 3.859402372943428e-06, "loss": 0.9381, "step": 3684 }, { "epoch": 1.9384534455549711, "grad_norm": 2.3252549171447754, "learning_rate": 3.858819322351667e-06, "loss": 0.947, "step": 3685 }, { "epoch": 1.9389794844818518, "grad_norm": 1.995221495628357, "learning_rate": 3.858236166843476e-06, "loss": 0.9367, "step": 3686 }, { "epoch": 1.9395055234087324, "grad_norm": 1.9967231750488281, "learning_rate": 3.857652906463886e-06, "loss": 0.9408, "step": 3687 }, { "epoch": 1.9400315623356128, "grad_norm": 2.0427780151367188, "learning_rate": 3.857069541257928e-06, "loss": 0.9301, "step": 3688 }, { "epoch": 1.9405576012624934, "grad_norm": 2.0469813346862793, "learning_rate": 3.856486071270645e-06, "loss": 0.9251, "step": 3689 }, { "epoch": 1.9410836401893738, "grad_norm": 2.1560215950012207, "learning_rate": 3.85590249654709e-06, "loss": 0.9303, "step": 3690 }, { "epoch": 1.9416096791162545, "grad_norm": 2.150679111480713, "learning_rate": 3.855318817132318e-06, "loss": 0.9144, "step": 3691 }, { "epoch": 1.9421357180431351, "grad_norm": 2.093212366104126, "learning_rate": 3.854735033071398e-06, "loss": 0.9746, "step": 3692 }, { "epoch": 1.9426617569700158, "grad_norm": 2.0641937255859375, "learning_rate": 3.854151144409405e-06, "loss": 0.8845, "step": 3693 }, { "epoch": 1.9431877958968964, "grad_norm": 2.0783212184906006, "learning_rate": 3.853567151191421e-06, "loss": 0.9351, "step": 3694 }, { "epoch": 1.943713834823777, "grad_norm": 2.0518221855163574, "learning_rate": 3.852983053462536e-06, "loss": 0.9608, "step": 3695 }, { "epoch": 1.9442398737506577, "grad_norm": 1.9672813415527344, "learning_rate": 3.852398851267853e-06, "loss": 0.8898, "step": 3696 }, { "epoch": 1.9447659126775383, "grad_norm": 1.9930319786071777, "learning_rate": 3.851814544652475e-06, "loss": 0.8886, "step": 3697 }, { "epoch": 1.9452919516044187, "grad_norm": 2.096101999282837, "learning_rate": 3.851230133661518e-06, "loss": 0.9094, "step": 3698 }, { "epoch": 1.9458179905312993, "grad_norm": 2.036329746246338, "learning_rate": 3.850645618340107e-06, "loss": 0.9404, "step": 3699 }, { "epoch": 1.9463440294581797, "grad_norm": 2.061896562576294, "learning_rate": 3.850060998733371e-06, "loss": 0.9542, "step": 3700 }, { "epoch": 1.9468700683850604, "grad_norm": 2.0338451862335205, "learning_rate": 3.849476274886451e-06, "loss": 0.887, "step": 3701 }, { "epoch": 1.947396107311941, "grad_norm": 1.9912186861038208, "learning_rate": 3.848891446844494e-06, "loss": 0.896, "step": 3702 }, { "epoch": 1.9479221462388217, "grad_norm": 2.1454694271087646, "learning_rate": 3.848306514652655e-06, "loss": 0.9176, "step": 3703 }, { "epoch": 1.9484481851657023, "grad_norm": 2.037468194961548, "learning_rate": 3.8477214783560975e-06, "loss": 0.9542, "step": 3704 }, { "epoch": 1.948974224092583, "grad_norm": 1.9898576736450195, "learning_rate": 3.8471363379999935e-06, "loss": 0.9174, "step": 3705 }, { "epoch": 1.9495002630194636, "grad_norm": 1.9927200078964233, "learning_rate": 3.846551093629522e-06, "loss": 0.9024, "step": 3706 }, { "epoch": 1.9500263019463442, "grad_norm": 2.0695693492889404, "learning_rate": 3.8459657452898714e-06, "loss": 0.9337, "step": 3707 }, { "epoch": 1.9505523408732246, "grad_norm": 2.1777498722076416, "learning_rate": 3.845380293026236e-06, "loss": 0.8862, "step": 3708 }, { "epoch": 1.9510783798001052, "grad_norm": 1.9665758609771729, "learning_rate": 3.84479473688382e-06, "loss": 0.8591, "step": 3709 }, { "epoch": 1.9516044187269856, "grad_norm": 2.0922954082489014, "learning_rate": 3.844209076907836e-06, "loss": 0.9255, "step": 3710 }, { "epoch": 1.9521304576538663, "grad_norm": 2.1177494525909424, "learning_rate": 3.843623313143503e-06, "loss": 0.9439, "step": 3711 }, { "epoch": 1.952656496580747, "grad_norm": 2.041552782058716, "learning_rate": 3.8430374456360486e-06, "loss": 0.8789, "step": 3712 }, { "epoch": 1.9531825355076275, "grad_norm": 2.0672214031219482, "learning_rate": 3.842451474430707e-06, "loss": 0.9426, "step": 3713 }, { "epoch": 1.9537085744345082, "grad_norm": 2.0467536449432373, "learning_rate": 3.841865399572725e-06, "loss": 0.894, "step": 3714 }, { "epoch": 1.9542346133613888, "grad_norm": 2.0909125804901123, "learning_rate": 3.841279221107352e-06, "loss": 0.8702, "step": 3715 }, { "epoch": 1.9547606522882695, "grad_norm": 2.0857174396514893, "learning_rate": 3.840692939079848e-06, "loss": 0.9279, "step": 3716 }, { "epoch": 1.95528669121515, "grad_norm": 2.0140902996063232, "learning_rate": 3.840106553535482e-06, "loss": 0.9305, "step": 3717 }, { "epoch": 1.9558127301420305, "grad_norm": 1.952136516571045, "learning_rate": 3.839520064519528e-06, "loss": 0.8755, "step": 3718 }, { "epoch": 1.9563387690689111, "grad_norm": 2.0122833251953125, "learning_rate": 3.838933472077271e-06, "loss": 0.8571, "step": 3719 }, { "epoch": 1.9568648079957915, "grad_norm": 1.9882973432540894, "learning_rate": 3.838346776254e-06, "loss": 0.9303, "step": 3720 }, { "epoch": 1.9573908469226722, "grad_norm": 2.374667167663574, "learning_rate": 3.837759977095019e-06, "loss": 0.9209, "step": 3721 }, { "epoch": 1.9579168858495528, "grad_norm": 2.002048969268799, "learning_rate": 3.837173074645632e-06, "loss": 0.9023, "step": 3722 }, { "epoch": 1.9584429247764334, "grad_norm": 2.130359172821045, "learning_rate": 3.836586068951157e-06, "loss": 0.9033, "step": 3723 }, { "epoch": 1.958968963703314, "grad_norm": 2.143298387527466, "learning_rate": 3.835998960056915e-06, "loss": 0.9437, "step": 3724 }, { "epoch": 1.9594950026301947, "grad_norm": 2.0639281272888184, "learning_rate": 3.835411748008241e-06, "loss": 0.9239, "step": 3725 }, { "epoch": 1.9600210415570753, "grad_norm": 2.1186087131500244, "learning_rate": 3.834824432850471e-06, "loss": 0.9517, "step": 3726 }, { "epoch": 1.9605470804839558, "grad_norm": 2.086833953857422, "learning_rate": 3.834237014628954e-06, "loss": 0.9462, "step": 3727 }, { "epoch": 1.9610731194108364, "grad_norm": 2.0423338413238525, "learning_rate": 3.8336494933890455e-06, "loss": 0.9423, "step": 3728 }, { "epoch": 1.961599158337717, "grad_norm": 2.1271145343780518, "learning_rate": 3.833061869176109e-06, "loss": 0.8833, "step": 3729 }, { "epoch": 1.9621251972645974, "grad_norm": 2.020315170288086, "learning_rate": 3.832474142035516e-06, "loss": 0.9602, "step": 3730 }, { "epoch": 1.962651236191478, "grad_norm": 1.994666576385498, "learning_rate": 3.8318863120126445e-06, "loss": 0.9176, "step": 3731 }, { "epoch": 1.9631772751183587, "grad_norm": 2.020279884338379, "learning_rate": 3.831298379152884e-06, "loss": 0.9439, "step": 3732 }, { "epoch": 1.9637033140452393, "grad_norm": 1.978032112121582, "learning_rate": 3.830710343501627e-06, "loss": 0.8713, "step": 3733 }, { "epoch": 1.96422935297212, "grad_norm": 2.230541229248047, "learning_rate": 3.830122205104277e-06, "loss": 0.9199, "step": 3734 }, { "epoch": 1.9647553918990006, "grad_norm": 2.0482237339019775, "learning_rate": 3.829533964006248e-06, "loss": 0.9111, "step": 3735 }, { "epoch": 1.9652814308258812, "grad_norm": 2.1555042266845703, "learning_rate": 3.828945620252956e-06, "loss": 0.9417, "step": 3736 }, { "epoch": 1.9658074697527617, "grad_norm": 1.9371111392974854, "learning_rate": 3.828357173889828e-06, "loss": 0.9222, "step": 3737 }, { "epoch": 1.9663335086796423, "grad_norm": 1.9611694812774658, "learning_rate": 3.8277686249622994e-06, "loss": 0.8849, "step": 3738 }, { "epoch": 1.966859547606523, "grad_norm": 2.1161680221557617, "learning_rate": 3.827179973515815e-06, "loss": 0.9615, "step": 3739 }, { "epoch": 1.9673855865334033, "grad_norm": 2.0131423473358154, "learning_rate": 3.826591219595821e-06, "loss": 0.8836, "step": 3740 }, { "epoch": 1.967911625460284, "grad_norm": 1.9942444562911987, "learning_rate": 3.82600236324778e-06, "loss": 0.9373, "step": 3741 }, { "epoch": 1.9684376643871646, "grad_norm": 1.8967758417129517, "learning_rate": 3.825413404517156e-06, "loss": 0.9021, "step": 3742 }, { "epoch": 1.9689637033140452, "grad_norm": 1.8794455528259277, "learning_rate": 3.824824343449425e-06, "loss": 0.9309, "step": 3743 }, { "epoch": 1.9694897422409259, "grad_norm": 1.946311116218567, "learning_rate": 3.8242351800900674e-06, "loss": 0.8965, "step": 3744 }, { "epoch": 1.9700157811678065, "grad_norm": 2.0435941219329834, "learning_rate": 3.823645914484575e-06, "loss": 0.9217, "step": 3745 }, { "epoch": 1.9705418200946871, "grad_norm": 1.9912713766098022, "learning_rate": 3.823056546678445e-06, "loss": 0.9325, "step": 3746 }, { "epoch": 1.9710678590215676, "grad_norm": 1.891200304031372, "learning_rate": 3.822467076717184e-06, "loss": 0.9494, "step": 3747 }, { "epoch": 1.9715938979484482, "grad_norm": 1.9701929092407227, "learning_rate": 3.821877504646306e-06, "loss": 0.9224, "step": 3748 }, { "epoch": 1.9721199368753288, "grad_norm": 2.028235673904419, "learning_rate": 3.821287830511332e-06, "loss": 0.9088, "step": 3749 }, { "epoch": 1.9726459758022092, "grad_norm": 1.9580707550048828, "learning_rate": 3.820698054357792e-06, "loss": 0.9471, "step": 3750 }, { "epoch": 1.9731720147290899, "grad_norm": 2.0986881256103516, "learning_rate": 3.820108176231223e-06, "loss": 0.8719, "step": 3751 }, { "epoch": 1.9736980536559705, "grad_norm": 1.945340633392334, "learning_rate": 3.81951819617717e-06, "loss": 0.8948, "step": 3752 }, { "epoch": 1.9742240925828511, "grad_norm": 2.095881462097168, "learning_rate": 3.8189281142411885e-06, "loss": 0.893, "step": 3753 }, { "epoch": 1.9747501315097318, "grad_norm": 2.002525568008423, "learning_rate": 3.8183379304688375e-06, "loss": 0.8472, "step": 3754 }, { "epoch": 1.9752761704366124, "grad_norm": 2.4062154293060303, "learning_rate": 3.817747644905685e-06, "loss": 0.9429, "step": 3755 }, { "epoch": 1.975802209363493, "grad_norm": 2.005993604660034, "learning_rate": 3.817157257597311e-06, "loss": 0.935, "step": 3756 }, { "epoch": 1.9763282482903735, "grad_norm": 2.0487518310546875, "learning_rate": 3.8165667685892975e-06, "loss": 0.8916, "step": 3757 }, { "epoch": 1.976854287217254, "grad_norm": 1.9535064697265625, "learning_rate": 3.8159761779272375e-06, "loss": 0.94, "step": 3758 }, { "epoch": 1.9773803261441345, "grad_norm": 2.1313724517822266, "learning_rate": 3.815385485656732e-06, "loss": 0.9342, "step": 3759 }, { "epoch": 1.9779063650710151, "grad_norm": 2.204428195953369, "learning_rate": 3.81479469182339e-06, "loss": 0.9595, "step": 3760 }, { "epoch": 1.9784324039978958, "grad_norm": 2.049039602279663, "learning_rate": 3.814203796472826e-06, "loss": 0.9878, "step": 3761 }, { "epoch": 1.9789584429247764, "grad_norm": 2.3214313983917236, "learning_rate": 3.8136127996506646e-06, "loss": 0.9841, "step": 3762 }, { "epoch": 1.979484481851657, "grad_norm": 2.0491721630096436, "learning_rate": 3.813021701402537e-06, "loss": 0.9442, "step": 3763 }, { "epoch": 1.9800105207785377, "grad_norm": 2.259814500808716, "learning_rate": 3.8124305017740836e-06, "loss": 0.9934, "step": 3764 }, { "epoch": 1.9805365597054183, "grad_norm": 2.1149747371673584, "learning_rate": 3.811839200810952e-06, "loss": 0.901, "step": 3765 }, { "epoch": 1.981062598632299, "grad_norm": 2.0713698863983154, "learning_rate": 3.811247798558796e-06, "loss": 0.8785, "step": 3766 }, { "epoch": 1.9815886375591794, "grad_norm": 1.9660581350326538, "learning_rate": 3.810656295063279e-06, "loss": 0.8642, "step": 3767 }, { "epoch": 1.98211467648606, "grad_norm": 2.0408098697662354, "learning_rate": 3.8100646903700723e-06, "loss": 0.8872, "step": 3768 }, { "epoch": 1.9826407154129404, "grad_norm": 2.0717782974243164, "learning_rate": 3.8094729845248546e-06, "loss": 0.9656, "step": 3769 }, { "epoch": 1.983166754339821, "grad_norm": 2.035109043121338, "learning_rate": 3.8088811775733124e-06, "loss": 0.9504, "step": 3770 }, { "epoch": 1.9836927932667017, "grad_norm": 2.251352071762085, "learning_rate": 3.8082892695611407e-06, "loss": 0.9709, "step": 3771 }, { "epoch": 1.9842188321935823, "grad_norm": 2.101968765258789, "learning_rate": 3.8076972605340397e-06, "loss": 0.9324, "step": 3772 }, { "epoch": 1.984744871120463, "grad_norm": 1.9815831184387207, "learning_rate": 3.807105150537721e-06, "loss": 0.8799, "step": 3773 }, { "epoch": 1.9852709100473436, "grad_norm": 2.0454111099243164, "learning_rate": 3.8065129396179007e-06, "loss": 0.9219, "step": 3774 }, { "epoch": 1.9857969489742242, "grad_norm": 2.024466037750244, "learning_rate": 3.805920627820306e-06, "loss": 0.9341, "step": 3775 }, { "epoch": 1.9863229879011048, "grad_norm": 1.9955847263336182, "learning_rate": 3.805328215190669e-06, "loss": 0.9092, "step": 3776 }, { "epoch": 1.9868490268279853, "grad_norm": 2.1025545597076416, "learning_rate": 3.804735701774731e-06, "loss": 0.9297, "step": 3777 }, { "epoch": 1.9873750657548659, "grad_norm": 2.141676425933838, "learning_rate": 3.804143087618241e-06, "loss": 0.9414, "step": 3778 }, { "epoch": 1.9879011046817463, "grad_norm": 2.0731565952301025, "learning_rate": 3.803550372766956e-06, "loss": 0.9677, "step": 3779 }, { "epoch": 1.988427143608627, "grad_norm": 2.244962453842163, "learning_rate": 3.8029575572666387e-06, "loss": 0.9456, "step": 3780 }, { "epoch": 1.9889531825355076, "grad_norm": 2.0196962356567383, "learning_rate": 3.8023646411630633e-06, "loss": 0.9449, "step": 3781 }, { "epoch": 1.9894792214623882, "grad_norm": 2.038806200027466, "learning_rate": 3.801771624502009e-06, "loss": 0.9313, "step": 3782 }, { "epoch": 1.9900052603892688, "grad_norm": 2.0902819633483887, "learning_rate": 3.8011785073292627e-06, "loss": 0.9408, "step": 3783 }, { "epoch": 1.9905312993161495, "grad_norm": 2.00836181640625, "learning_rate": 3.8005852896906215e-06, "loss": 0.9369, "step": 3784 }, { "epoch": 1.99105733824303, "grad_norm": 2.069345712661743, "learning_rate": 3.7999919716318877e-06, "loss": 0.9748, "step": 3785 }, { "epoch": 1.9915833771699105, "grad_norm": 2.071449041366577, "learning_rate": 3.7993985531988708e-06, "loss": 0.9252, "step": 3786 }, { "epoch": 1.9921094160967912, "grad_norm": 2.245893955230713, "learning_rate": 3.7988050344373915e-06, "loss": 0.9503, "step": 3787 }, { "epoch": 1.9926354550236718, "grad_norm": 1.9542043209075928, "learning_rate": 3.798211415393276e-06, "loss": 0.8913, "step": 3788 }, { "epoch": 1.9931614939505522, "grad_norm": 2.013779401779175, "learning_rate": 3.7976176961123578e-06, "loss": 0.9097, "step": 3789 }, { "epoch": 1.9936875328774328, "grad_norm": 1.9830703735351562, "learning_rate": 3.797023876640479e-06, "loss": 0.9301, "step": 3790 }, { "epoch": 1.9942135718043135, "grad_norm": 1.9405882358551025, "learning_rate": 3.79642995702349e-06, "loss": 0.9477, "step": 3791 }, { "epoch": 1.994739610731194, "grad_norm": 2.159137725830078, "learning_rate": 3.7958359373072468e-06, "loss": 0.9709, "step": 3792 }, { "epoch": 1.9952656496580747, "grad_norm": 2.1361355781555176, "learning_rate": 3.7952418175376153e-06, "loss": 0.921, "step": 3793 }, { "epoch": 1.9957916885849554, "grad_norm": 1.8702547550201416, "learning_rate": 3.794647597760469e-06, "loss": 0.915, "step": 3794 }, { "epoch": 1.996317727511836, "grad_norm": 2.100464105606079, "learning_rate": 3.794053278021688e-06, "loss": 0.9085, "step": 3795 }, { "epoch": 1.9968437664387164, "grad_norm": 2.0043835639953613, "learning_rate": 3.7934588583671594e-06, "loss": 0.8704, "step": 3796 }, { "epoch": 1.997369805365597, "grad_norm": 2.139164924621582, "learning_rate": 3.7928643388427814e-06, "loss": 0.9234, "step": 3797 }, { "epoch": 1.9978958442924777, "grad_norm": 2.084885597229004, "learning_rate": 3.7922697194944564e-06, "loss": 0.9498, "step": 3798 }, { "epoch": 1.998421883219358, "grad_norm": 2.1158130168914795, "learning_rate": 3.791675000368096e-06, "loss": 0.9519, "step": 3799 }, { "epoch": 1.9989479221462387, "grad_norm": 2.0837512016296387, "learning_rate": 3.7910801815096186e-06, "loss": 0.9091, "step": 3800 }, { "epoch": 1.9994739610731194, "grad_norm": 1.8967145681381226, "learning_rate": 3.7904852629649523e-06, "loss": 0.8711, "step": 3801 }, { "epoch": 2.0, "grad_norm": 2.044868230819702, "learning_rate": 3.789890244780031e-06, "loss": 0.9276, "step": 3802 } ], "logging_steps": 1, "max_steps": 11406, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1901, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.603360011908547e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }