{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3222, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000931098696461825, "grad_norm": 2.354831102749536, "learning_rate": 6.191950464396286e-08, "loss": 0.8585, "step": 1 }, { "epoch": 0.00186219739292365, "grad_norm": 2.356824343625411, "learning_rate": 1.2383900928792572e-07, "loss": 0.8585, "step": 2 }, { "epoch": 0.002793296089385475, "grad_norm": 2.3203557246687048, "learning_rate": 1.8575851393188855e-07, "loss": 0.8437, "step": 3 }, { "epoch": 0.0037243947858473, "grad_norm": 2.3122116213832538, "learning_rate": 2.4767801857585145e-07, "loss": 0.8303, "step": 4 }, { "epoch": 0.004655493482309125, "grad_norm": 2.302904384680434, "learning_rate": 3.0959752321981424e-07, "loss": 0.8522, "step": 5 }, { "epoch": 0.00558659217877095, "grad_norm": 2.349128090192101, "learning_rate": 3.715170278637771e-07, "loss": 0.8539, "step": 6 }, { "epoch": 0.006517690875232775, "grad_norm": 2.267098465504872, "learning_rate": 4.3343653250774e-07, "loss": 0.8399, "step": 7 }, { "epoch": 0.0074487895716946, "grad_norm": 2.2871405138088914, "learning_rate": 4.953560371517029e-07, "loss": 0.8481, "step": 8 }, { "epoch": 0.008379888268156424, "grad_norm": 2.250491646179114, "learning_rate": 5.572755417956656e-07, "loss": 0.8301, "step": 9 }, { "epoch": 0.00931098696461825, "grad_norm": 2.268288006645849, "learning_rate": 6.191950464396285e-07, "loss": 0.8509, "step": 10 }, { "epoch": 0.010242085661080074, "grad_norm": 2.2561893299679983, "learning_rate": 6.811145510835913e-07, "loss": 0.8437, "step": 11 }, { "epoch": 0.0111731843575419, "grad_norm": 2.229287055948286, "learning_rate": 7.430340557275542e-07, "loss": 0.8273, "step": 12 }, { "epoch": 0.012104283054003724, "grad_norm": 2.11062624381255, "learning_rate": 8.049535603715171e-07, "loss": 0.8321, "step": 13 }, { "epoch": 0.01303538175046555, "grad_norm": 2.09305991369358, "learning_rate": 8.6687306501548e-07, "loss": 0.8268, "step": 14 }, { "epoch": 0.013966480446927373, "grad_norm": 2.052210256792575, "learning_rate": 9.287925696594428e-07, "loss": 0.8413, "step": 15 }, { "epoch": 0.0148975791433892, "grad_norm": 2.0384844391885393, "learning_rate": 9.907120743034058e-07, "loss": 0.8489, "step": 16 }, { "epoch": 0.015828677839851025, "grad_norm": 2.0724784812342283, "learning_rate": 1.0526315789473685e-06, "loss": 0.8518, "step": 17 }, { "epoch": 0.01675977653631285, "grad_norm": 1.6742710082401544, "learning_rate": 1.1145510835913313e-06, "loss": 0.8229, "step": 18 }, { "epoch": 0.017690875232774673, "grad_norm": 1.593174559924109, "learning_rate": 1.1764705882352942e-06, "loss": 0.8221, "step": 19 }, { "epoch": 0.0186219739292365, "grad_norm": 1.6166035765880125, "learning_rate": 1.238390092879257e-06, "loss": 0.8216, "step": 20 }, { "epoch": 0.019553072625698324, "grad_norm": 1.5758893157666458, "learning_rate": 1.30030959752322e-06, "loss": 0.8277, "step": 21 }, { "epoch": 0.020484171322160148, "grad_norm": 1.4823980746955772, "learning_rate": 1.3622291021671827e-06, "loss": 0.8218, "step": 22 }, { "epoch": 0.021415270018621976, "grad_norm": 1.5002097761991822, "learning_rate": 1.4241486068111456e-06, "loss": 0.84, "step": 23 }, { "epoch": 0.0223463687150838, "grad_norm": 1.1065958187333222, "learning_rate": 1.4860681114551084e-06, "loss": 0.7969, "step": 24 }, { "epoch": 0.023277467411545624, "grad_norm": 1.1661768888420705, "learning_rate": 1.5479876160990715e-06, "loss": 0.7645, "step": 25 }, { "epoch": 0.024208566108007448, "grad_norm": 1.2491722688756128, "learning_rate": 1.6099071207430343e-06, "loss": 0.8326, "step": 26 }, { "epoch": 0.025139664804469275, "grad_norm": 1.1732580041298464, "learning_rate": 1.671826625386997e-06, "loss": 0.7663, "step": 27 }, { "epoch": 0.0260707635009311, "grad_norm": 1.1465765502934258, "learning_rate": 1.73374613003096e-06, "loss": 0.8084, "step": 28 }, { "epoch": 0.027001862197392923, "grad_norm": 1.1494666847976427, "learning_rate": 1.7956656346749227e-06, "loss": 0.7986, "step": 29 }, { "epoch": 0.027932960893854747, "grad_norm": 1.0509590522604513, "learning_rate": 1.8575851393188857e-06, "loss": 0.805, "step": 30 }, { "epoch": 0.028864059590316574, "grad_norm": 0.9990220400207278, "learning_rate": 1.9195046439628486e-06, "loss": 0.7988, "step": 31 }, { "epoch": 0.0297951582867784, "grad_norm": 0.8241483601951352, "learning_rate": 1.9814241486068116e-06, "loss": 0.8267, "step": 32 }, { "epoch": 0.030726256983240222, "grad_norm": 0.7991089237515523, "learning_rate": 2.043343653250774e-06, "loss": 0.7869, "step": 33 }, { "epoch": 0.03165735567970205, "grad_norm": 0.8072454471072027, "learning_rate": 2.105263157894737e-06, "loss": 0.7528, "step": 34 }, { "epoch": 0.032588454376163874, "grad_norm": 0.8169227578551558, "learning_rate": 2.1671826625387e-06, "loss": 0.788, "step": 35 }, { "epoch": 0.0335195530726257, "grad_norm": 0.7803254151758982, "learning_rate": 2.2291021671826625e-06, "loss": 0.7969, "step": 36 }, { "epoch": 0.03445065176908752, "grad_norm": 0.8031899455652386, "learning_rate": 2.2910216718266255e-06, "loss": 0.7586, "step": 37 }, { "epoch": 0.035381750465549346, "grad_norm": 0.7534020702863516, "learning_rate": 2.3529411764705885e-06, "loss": 0.767, "step": 38 }, { "epoch": 0.036312849162011177, "grad_norm": 0.7741040025100155, "learning_rate": 2.4148606811145514e-06, "loss": 0.7628, "step": 39 }, { "epoch": 0.037243947858473, "grad_norm": 0.6889771133867628, "learning_rate": 2.476780185758514e-06, "loss": 0.7704, "step": 40 }, { "epoch": 0.038175046554934824, "grad_norm": 0.6798405097116249, "learning_rate": 2.538699690402477e-06, "loss": 0.7418, "step": 41 }, { "epoch": 0.03910614525139665, "grad_norm": 0.5829614483653275, "learning_rate": 2.60061919504644e-06, "loss": 0.7324, "step": 42 }, { "epoch": 0.04003724394785847, "grad_norm": 0.5053247651373973, "learning_rate": 2.662538699690403e-06, "loss": 0.7108, "step": 43 }, { "epoch": 0.040968342644320296, "grad_norm": 0.49700451718110983, "learning_rate": 2.7244582043343653e-06, "loss": 0.7152, "step": 44 }, { "epoch": 0.04189944134078212, "grad_norm": 0.575089966878554, "learning_rate": 2.7863777089783283e-06, "loss": 0.7344, "step": 45 }, { "epoch": 0.04283054003724395, "grad_norm": 0.5960002159902736, "learning_rate": 2.8482972136222912e-06, "loss": 0.7353, "step": 46 }, { "epoch": 0.043761638733705775, "grad_norm": 0.566701713712474, "learning_rate": 2.9102167182662538e-06, "loss": 0.7068, "step": 47 }, { "epoch": 0.0446927374301676, "grad_norm": 0.5753611753916037, "learning_rate": 2.9721362229102167e-06, "loss": 0.7344, "step": 48 }, { "epoch": 0.04562383612662942, "grad_norm": 0.5223372789340055, "learning_rate": 3.0340557275541797e-06, "loss": 0.7374, "step": 49 }, { "epoch": 0.04655493482309125, "grad_norm": 0.5022130542870258, "learning_rate": 3.095975232198143e-06, "loss": 0.7211, "step": 50 }, { "epoch": 0.04748603351955307, "grad_norm": 0.47258019907510546, "learning_rate": 3.157894736842105e-06, "loss": 0.7232, "step": 51 }, { "epoch": 0.048417132216014895, "grad_norm": 0.5029982254674935, "learning_rate": 3.2198142414860685e-06, "loss": 0.7144, "step": 52 }, { "epoch": 0.049348230912476726, "grad_norm": 0.500526826487608, "learning_rate": 3.2817337461300315e-06, "loss": 0.6853, "step": 53 }, { "epoch": 0.05027932960893855, "grad_norm": 0.5179271318981962, "learning_rate": 3.343653250773994e-06, "loss": 0.714, "step": 54 }, { "epoch": 0.051210428305400374, "grad_norm": 0.45864158594534976, "learning_rate": 3.405572755417957e-06, "loss": 0.6965, "step": 55 }, { "epoch": 0.0521415270018622, "grad_norm": 0.49554238972484266, "learning_rate": 3.46749226006192e-06, "loss": 0.7285, "step": 56 }, { "epoch": 0.05307262569832402, "grad_norm": 0.43411367265674594, "learning_rate": 3.529411764705883e-06, "loss": 0.696, "step": 57 }, { "epoch": 0.054003724394785846, "grad_norm": 0.3856075029026902, "learning_rate": 3.5913312693498454e-06, "loss": 0.6686, "step": 58 }, { "epoch": 0.05493482309124767, "grad_norm": 0.38637452067087935, "learning_rate": 3.6532507739938084e-06, "loss": 0.6965, "step": 59 }, { "epoch": 0.055865921787709494, "grad_norm": 0.3931471017801562, "learning_rate": 3.7151702786377713e-06, "loss": 0.7096, "step": 60 }, { "epoch": 0.056797020484171325, "grad_norm": 0.37582427325282547, "learning_rate": 3.777089783281734e-06, "loss": 0.6816, "step": 61 }, { "epoch": 0.05772811918063315, "grad_norm": 0.37774305222037574, "learning_rate": 3.839009287925697e-06, "loss": 0.7158, "step": 62 }, { "epoch": 0.05865921787709497, "grad_norm": 0.4102678350608818, "learning_rate": 3.90092879256966e-06, "loss": 0.7023, "step": 63 }, { "epoch": 0.0595903165735568, "grad_norm": 0.3987063189608159, "learning_rate": 3.962848297213623e-06, "loss": 0.6622, "step": 64 }, { "epoch": 0.06052141527001862, "grad_norm": 0.37183619351727204, "learning_rate": 4.024767801857586e-06, "loss": 0.6836, "step": 65 }, { "epoch": 0.061452513966480445, "grad_norm": 0.3342435950159049, "learning_rate": 4.086687306501548e-06, "loss": 0.666, "step": 66 }, { "epoch": 0.06238361266294227, "grad_norm": 0.33540621300836365, "learning_rate": 4.148606811145512e-06, "loss": 0.6544, "step": 67 }, { "epoch": 0.0633147113594041, "grad_norm": 0.3373712531798284, "learning_rate": 4.210526315789474e-06, "loss": 0.669, "step": 68 }, { "epoch": 0.06424581005586592, "grad_norm": 0.37040019045808914, "learning_rate": 4.272445820433437e-06, "loss": 0.676, "step": 69 }, { "epoch": 0.06517690875232775, "grad_norm": 0.3270586252738098, "learning_rate": 4.3343653250774e-06, "loss": 0.6453, "step": 70 }, { "epoch": 0.06610800744878957, "grad_norm": 0.3403007149522075, "learning_rate": 4.3962848297213626e-06, "loss": 0.6711, "step": 71 }, { "epoch": 0.0670391061452514, "grad_norm": 0.31926767449610777, "learning_rate": 4.458204334365325e-06, "loss": 0.6329, "step": 72 }, { "epoch": 0.06797020484171322, "grad_norm": 0.30683941030454465, "learning_rate": 4.5201238390092885e-06, "loss": 0.6828, "step": 73 }, { "epoch": 0.06890130353817504, "grad_norm": 0.34788378492305766, "learning_rate": 4.582043343653251e-06, "loss": 0.6617, "step": 74 }, { "epoch": 0.06983240223463687, "grad_norm": 0.3322307780226603, "learning_rate": 4.6439628482972135e-06, "loss": 0.662, "step": 75 }, { "epoch": 0.07076350093109869, "grad_norm": 0.3555799912137855, "learning_rate": 4.705882352941177e-06, "loss": 0.6786, "step": 76 }, { "epoch": 0.07169459962756052, "grad_norm": 0.3313007827296315, "learning_rate": 4.7678018575851394e-06, "loss": 0.7166, "step": 77 }, { "epoch": 0.07262569832402235, "grad_norm": 0.33087686668262023, "learning_rate": 4.829721362229103e-06, "loss": 0.6489, "step": 78 }, { "epoch": 0.07355679702048418, "grad_norm": 0.31548495828681594, "learning_rate": 4.891640866873065e-06, "loss": 0.6437, "step": 79 }, { "epoch": 0.074487895716946, "grad_norm": 0.3361575887935754, "learning_rate": 4.953560371517028e-06, "loss": 0.685, "step": 80 }, { "epoch": 0.07541899441340782, "grad_norm": 0.3110985122091292, "learning_rate": 5.015479876160991e-06, "loss": 0.6427, "step": 81 }, { "epoch": 0.07635009310986965, "grad_norm": 0.3165346864618634, "learning_rate": 5.077399380804954e-06, "loss": 0.646, "step": 82 }, { "epoch": 0.07728119180633147, "grad_norm": 0.32218585110032416, "learning_rate": 5.139318885448917e-06, "loss": 0.6699, "step": 83 }, { "epoch": 0.0782122905027933, "grad_norm": 0.3052349785400411, "learning_rate": 5.20123839009288e-06, "loss": 0.6336, "step": 84 }, { "epoch": 0.07914338919925512, "grad_norm": 0.3015407892614267, "learning_rate": 5.263157894736842e-06, "loss": 0.643, "step": 85 }, { "epoch": 0.08007448789571694, "grad_norm": 0.29938393653591266, "learning_rate": 5.325077399380806e-06, "loss": 0.6521, "step": 86 }, { "epoch": 0.08100558659217877, "grad_norm": 0.3084874529114082, "learning_rate": 5.386996904024768e-06, "loss": 0.646, "step": 87 }, { "epoch": 0.08193668528864059, "grad_norm": 0.33751944531649386, "learning_rate": 5.448916408668731e-06, "loss": 0.6598, "step": 88 }, { "epoch": 0.08286778398510242, "grad_norm": 0.3131491845609995, "learning_rate": 5.510835913312694e-06, "loss": 0.6265, "step": 89 }, { "epoch": 0.08379888268156424, "grad_norm": 0.3431851131216333, "learning_rate": 5.5727554179566566e-06, "loss": 0.6789, "step": 90 }, { "epoch": 0.08472998137802606, "grad_norm": 0.3167575796672166, "learning_rate": 5.634674922600619e-06, "loss": 0.6952, "step": 91 }, { "epoch": 0.0856610800744879, "grad_norm": 0.31093222843003265, "learning_rate": 5.6965944272445825e-06, "loss": 0.6502, "step": 92 }, { "epoch": 0.08659217877094973, "grad_norm": 0.3108540893202423, "learning_rate": 5.758513931888545e-06, "loss": 0.6201, "step": 93 }, { "epoch": 0.08752327746741155, "grad_norm": 0.31051911475461136, "learning_rate": 5.8204334365325075e-06, "loss": 0.6474, "step": 94 }, { "epoch": 0.08845437616387337, "grad_norm": 0.32246781903051047, "learning_rate": 5.882352941176471e-06, "loss": 0.6865, "step": 95 }, { "epoch": 0.0893854748603352, "grad_norm": 0.3146077218356652, "learning_rate": 5.9442724458204335e-06, "loss": 0.6382, "step": 96 }, { "epoch": 0.09031657355679702, "grad_norm": 0.33398257963201183, "learning_rate": 6.006191950464398e-06, "loss": 0.6675, "step": 97 }, { "epoch": 0.09124767225325885, "grad_norm": 0.30395017822299697, "learning_rate": 6.068111455108359e-06, "loss": 0.6431, "step": 98 }, { "epoch": 0.09217877094972067, "grad_norm": 0.2965779065640429, "learning_rate": 6.130030959752322e-06, "loss": 0.6219, "step": 99 }, { "epoch": 0.0931098696461825, "grad_norm": 0.31640256529301825, "learning_rate": 6.191950464396286e-06, "loss": 0.6567, "step": 100 }, { "epoch": 0.09404096834264432, "grad_norm": 0.34377377377713897, "learning_rate": 6.253869969040249e-06, "loss": 0.662, "step": 101 }, { "epoch": 0.09497206703910614, "grad_norm": 0.29814458094136587, "learning_rate": 6.31578947368421e-06, "loss": 0.6702, "step": 102 }, { "epoch": 0.09590316573556797, "grad_norm": 0.3041484462562524, "learning_rate": 6.3777089783281746e-06, "loss": 0.6482, "step": 103 }, { "epoch": 0.09683426443202979, "grad_norm": 0.3014837892471764, "learning_rate": 6.439628482972137e-06, "loss": 0.6246, "step": 104 }, { "epoch": 0.09776536312849161, "grad_norm": 0.3081838323005241, "learning_rate": 6.501547987616099e-06, "loss": 0.6447, "step": 105 }, { "epoch": 0.09869646182495345, "grad_norm": 0.30689300738474334, "learning_rate": 6.563467492260063e-06, "loss": 0.6346, "step": 106 }, { "epoch": 0.09962756052141528, "grad_norm": 0.29938246604509167, "learning_rate": 6.6253869969040255e-06, "loss": 0.6572, "step": 107 }, { "epoch": 0.1005586592178771, "grad_norm": 0.30140856201790084, "learning_rate": 6.687306501547988e-06, "loss": 0.6288, "step": 108 }, { "epoch": 0.10148975791433892, "grad_norm": 0.28700426881791424, "learning_rate": 6.7492260061919514e-06, "loss": 0.6269, "step": 109 }, { "epoch": 0.10242085661080075, "grad_norm": 0.31229662097132294, "learning_rate": 6.811145510835914e-06, "loss": 0.6198, "step": 110 }, { "epoch": 0.10335195530726257, "grad_norm": 0.3182918331204797, "learning_rate": 6.873065015479877e-06, "loss": 0.6551, "step": 111 }, { "epoch": 0.1042830540037244, "grad_norm": 0.31128974034951157, "learning_rate": 6.93498452012384e-06, "loss": 0.6579, "step": 112 }, { "epoch": 0.10521415270018622, "grad_norm": 0.297182997824584, "learning_rate": 6.996904024767802e-06, "loss": 0.6099, "step": 113 }, { "epoch": 0.10614525139664804, "grad_norm": 0.30004901564651565, "learning_rate": 7.058823529411766e-06, "loss": 0.6046, "step": 114 }, { "epoch": 0.10707635009310987, "grad_norm": 0.30568538404901224, "learning_rate": 7.120743034055728e-06, "loss": 0.6369, "step": 115 }, { "epoch": 0.10800744878957169, "grad_norm": 0.2992528294737455, "learning_rate": 7.182662538699691e-06, "loss": 0.6547, "step": 116 }, { "epoch": 0.10893854748603352, "grad_norm": 0.3223010131293463, "learning_rate": 7.244582043343654e-06, "loss": 0.6141, "step": 117 }, { "epoch": 0.10986964618249534, "grad_norm": 0.3211121365175639, "learning_rate": 7.306501547987617e-06, "loss": 0.6487, "step": 118 }, { "epoch": 0.11080074487895716, "grad_norm": 0.31735106348606507, "learning_rate": 7.368421052631579e-06, "loss": 0.6657, "step": 119 }, { "epoch": 0.11173184357541899, "grad_norm": 0.3180191906089642, "learning_rate": 7.430340557275543e-06, "loss": 0.6442, "step": 120 }, { "epoch": 0.11266294227188083, "grad_norm": 0.29269320196601134, "learning_rate": 7.492260061919505e-06, "loss": 0.6117, "step": 121 }, { "epoch": 0.11359404096834265, "grad_norm": 0.3165454976772469, "learning_rate": 7.554179566563468e-06, "loss": 0.6248, "step": 122 }, { "epoch": 0.11452513966480447, "grad_norm": 0.3274081150784577, "learning_rate": 7.616099071207431e-06, "loss": 0.6053, "step": 123 }, { "epoch": 0.1154562383612663, "grad_norm": 0.32223086769970183, "learning_rate": 7.678018575851394e-06, "loss": 0.6289, "step": 124 }, { "epoch": 0.11638733705772812, "grad_norm": 0.3040414310568357, "learning_rate": 7.739938080495358e-06, "loss": 0.6273, "step": 125 }, { "epoch": 0.11731843575418995, "grad_norm": 0.31787882081744634, "learning_rate": 7.80185758513932e-06, "loss": 0.6147, "step": 126 }, { "epoch": 0.11824953445065177, "grad_norm": 0.3187433717502804, "learning_rate": 7.863777089783283e-06, "loss": 0.625, "step": 127 }, { "epoch": 0.1191806331471136, "grad_norm": 0.32250040189099993, "learning_rate": 7.925696594427246e-06, "loss": 0.6278, "step": 128 }, { "epoch": 0.12011173184357542, "grad_norm": 0.30877752247099743, "learning_rate": 7.987616099071208e-06, "loss": 0.6175, "step": 129 }, { "epoch": 0.12104283054003724, "grad_norm": 0.3100340308894509, "learning_rate": 8.049535603715171e-06, "loss": 0.6301, "step": 130 }, { "epoch": 0.12197392923649907, "grad_norm": 0.29103725389911106, "learning_rate": 8.111455108359135e-06, "loss": 0.618, "step": 131 }, { "epoch": 0.12290502793296089, "grad_norm": 0.3135417606995896, "learning_rate": 8.173374613003096e-06, "loss": 0.6589, "step": 132 }, { "epoch": 0.12383612662942271, "grad_norm": 0.3091985920246209, "learning_rate": 8.23529411764706e-06, "loss": 0.6387, "step": 133 }, { "epoch": 0.12476722532588454, "grad_norm": 0.2849738943759004, "learning_rate": 8.297213622291023e-06, "loss": 0.6016, "step": 134 }, { "epoch": 0.12569832402234637, "grad_norm": 0.3077474489113993, "learning_rate": 8.359133126934985e-06, "loss": 0.6377, "step": 135 }, { "epoch": 0.1266294227188082, "grad_norm": 0.316602364410257, "learning_rate": 8.421052631578948e-06, "loss": 0.6411, "step": 136 }, { "epoch": 0.12756052141527002, "grad_norm": 0.3049892385469189, "learning_rate": 8.482972136222912e-06, "loss": 0.6183, "step": 137 }, { "epoch": 0.12849162011173185, "grad_norm": 0.29818772540424976, "learning_rate": 8.544891640866873e-06, "loss": 0.6176, "step": 138 }, { "epoch": 0.12942271880819367, "grad_norm": 0.32262566751694305, "learning_rate": 8.606811145510837e-06, "loss": 0.6304, "step": 139 }, { "epoch": 0.1303538175046555, "grad_norm": 0.31702912025472474, "learning_rate": 8.6687306501548e-06, "loss": 0.6039, "step": 140 }, { "epoch": 0.13128491620111732, "grad_norm": 0.30916138122104364, "learning_rate": 8.730650154798762e-06, "loss": 0.6266, "step": 141 }, { "epoch": 0.13221601489757914, "grad_norm": 0.33332198435939236, "learning_rate": 8.792569659442725e-06, "loss": 0.6666, "step": 142 }, { "epoch": 0.13314711359404097, "grad_norm": 0.3101169517077026, "learning_rate": 8.854489164086688e-06, "loss": 0.6271, "step": 143 }, { "epoch": 0.1340782122905028, "grad_norm": 0.31609082172612996, "learning_rate": 8.91640866873065e-06, "loss": 0.636, "step": 144 }, { "epoch": 0.13500931098696461, "grad_norm": 0.3282032139364846, "learning_rate": 8.978328173374614e-06, "loss": 0.6175, "step": 145 }, { "epoch": 0.13594040968342644, "grad_norm": 0.3218411586151158, "learning_rate": 9.040247678018577e-06, "loss": 0.6278, "step": 146 }, { "epoch": 0.13687150837988826, "grad_norm": 0.32536115442497804, "learning_rate": 9.102167182662539e-06, "loss": 0.6433, "step": 147 }, { "epoch": 0.1378026070763501, "grad_norm": 0.3034333335922977, "learning_rate": 9.164086687306502e-06, "loss": 0.6133, "step": 148 }, { "epoch": 0.1387337057728119, "grad_norm": 0.3080232789578542, "learning_rate": 9.226006191950465e-06, "loss": 0.5993, "step": 149 }, { "epoch": 0.13966480446927373, "grad_norm": 0.3101184408765573, "learning_rate": 9.287925696594427e-06, "loss": 0.6464, "step": 150 }, { "epoch": 0.14059590316573556, "grad_norm": 0.3099713121932133, "learning_rate": 9.34984520123839e-06, "loss": 0.5868, "step": 151 }, { "epoch": 0.14152700186219738, "grad_norm": 0.316849460990793, "learning_rate": 9.411764705882354e-06, "loss": 0.6059, "step": 152 }, { "epoch": 0.1424581005586592, "grad_norm": 0.3387200402608541, "learning_rate": 9.473684210526315e-06, "loss": 0.6268, "step": 153 }, { "epoch": 0.14338919925512103, "grad_norm": 0.34446588834691183, "learning_rate": 9.535603715170279e-06, "loss": 0.5974, "step": 154 }, { "epoch": 0.14432029795158285, "grad_norm": 0.33652890437261557, "learning_rate": 9.597523219814242e-06, "loss": 0.6037, "step": 155 }, { "epoch": 0.1452513966480447, "grad_norm": 0.3465762916651475, "learning_rate": 9.659442724458206e-06, "loss": 0.5894, "step": 156 }, { "epoch": 0.14618249534450653, "grad_norm": 0.37953586938765826, "learning_rate": 9.721362229102167e-06, "loss": 0.6053, "step": 157 }, { "epoch": 0.14711359404096835, "grad_norm": 0.3654805438066682, "learning_rate": 9.78328173374613e-06, "loss": 0.6223, "step": 158 }, { "epoch": 0.14804469273743018, "grad_norm": 0.3674245364036211, "learning_rate": 9.845201238390094e-06, "loss": 0.6112, "step": 159 }, { "epoch": 0.148975791433892, "grad_norm": 0.38172004846007346, "learning_rate": 9.907120743034056e-06, "loss": 0.6295, "step": 160 }, { "epoch": 0.14990689013035383, "grad_norm": 0.3696751525594143, "learning_rate": 9.969040247678019e-06, "loss": 0.5943, "step": 161 }, { "epoch": 0.15083798882681565, "grad_norm": 0.3345976523151113, "learning_rate": 1.0030959752321983e-05, "loss": 0.6079, "step": 162 }, { "epoch": 0.15176908752327747, "grad_norm": 0.35868887992480564, "learning_rate": 1.0092879256965946e-05, "loss": 0.6157, "step": 163 }, { "epoch": 0.1527001862197393, "grad_norm": 0.3660523361080569, "learning_rate": 1.0154798761609908e-05, "loss": 0.6424, "step": 164 }, { "epoch": 0.15363128491620112, "grad_norm": 0.3321275947616358, "learning_rate": 1.0216718266253871e-05, "loss": 0.6038, "step": 165 }, { "epoch": 0.15456238361266295, "grad_norm": 0.3691455519372419, "learning_rate": 1.0278637770897834e-05, "loss": 0.6284, "step": 166 }, { "epoch": 0.15549348230912477, "grad_norm": 0.3640854776665393, "learning_rate": 1.0340557275541796e-05, "loss": 0.5923, "step": 167 }, { "epoch": 0.1564245810055866, "grad_norm": 0.36750136631845326, "learning_rate": 1.040247678018576e-05, "loss": 0.5794, "step": 168 }, { "epoch": 0.15735567970204842, "grad_norm": 0.3231833796021066, "learning_rate": 1.0464396284829723e-05, "loss": 0.5995, "step": 169 }, { "epoch": 0.15828677839851024, "grad_norm": 0.40468713179261984, "learning_rate": 1.0526315789473684e-05, "loss": 0.6498, "step": 170 }, { "epoch": 0.15921787709497207, "grad_norm": 0.3864727036349536, "learning_rate": 1.0588235294117648e-05, "loss": 0.6348, "step": 171 }, { "epoch": 0.1601489757914339, "grad_norm": 0.3641331668003492, "learning_rate": 1.0650154798761611e-05, "loss": 0.6245, "step": 172 }, { "epoch": 0.1610800744878957, "grad_norm": 0.3825567283894926, "learning_rate": 1.0712074303405573e-05, "loss": 0.6202, "step": 173 }, { "epoch": 0.16201117318435754, "grad_norm": 0.36093840652581055, "learning_rate": 1.0773993808049536e-05, "loss": 0.6404, "step": 174 }, { "epoch": 0.16294227188081936, "grad_norm": 0.3630273289713244, "learning_rate": 1.08359133126935e-05, "loss": 0.5939, "step": 175 }, { "epoch": 0.16387337057728119, "grad_norm": 0.34972937230951895, "learning_rate": 1.0897832817337461e-05, "loss": 0.6217, "step": 176 }, { "epoch": 0.164804469273743, "grad_norm": 0.37350123324962786, "learning_rate": 1.0959752321981425e-05, "loss": 0.625, "step": 177 }, { "epoch": 0.16573556797020483, "grad_norm": 0.3408429211961421, "learning_rate": 1.1021671826625388e-05, "loss": 0.6122, "step": 178 }, { "epoch": 0.16666666666666666, "grad_norm": 0.35516584416010827, "learning_rate": 1.108359133126935e-05, "loss": 0.6237, "step": 179 }, { "epoch": 0.16759776536312848, "grad_norm": 0.374929372903721, "learning_rate": 1.1145510835913313e-05, "loss": 0.5959, "step": 180 }, { "epoch": 0.1685288640595903, "grad_norm": 0.34513881824753084, "learning_rate": 1.1207430340557277e-05, "loss": 0.6048, "step": 181 }, { "epoch": 0.16945996275605213, "grad_norm": 0.34836640753735176, "learning_rate": 1.1269349845201238e-05, "loss": 0.5984, "step": 182 }, { "epoch": 0.17039106145251395, "grad_norm": 0.3303283780352446, "learning_rate": 1.1331269349845202e-05, "loss": 0.6113, "step": 183 }, { "epoch": 0.1713221601489758, "grad_norm": 0.36721671592248845, "learning_rate": 1.1393188854489165e-05, "loss": 0.6182, "step": 184 }, { "epoch": 0.17225325884543763, "grad_norm": 0.34517429467589483, "learning_rate": 1.1455108359133127e-05, "loss": 0.6301, "step": 185 }, { "epoch": 0.17318435754189945, "grad_norm": 0.35338525552672023, "learning_rate": 1.151702786377709e-05, "loss": 0.613, "step": 186 }, { "epoch": 0.17411545623836128, "grad_norm": 0.3837844333299902, "learning_rate": 1.1578947368421053e-05, "loss": 0.6337, "step": 187 }, { "epoch": 0.1750465549348231, "grad_norm": 0.3335828022388588, "learning_rate": 1.1640866873065015e-05, "loss": 0.6047, "step": 188 }, { "epoch": 0.17597765363128492, "grad_norm": 0.38248043002778725, "learning_rate": 1.1702786377708978e-05, "loss": 0.604, "step": 189 }, { "epoch": 0.17690875232774675, "grad_norm": 0.3431373446948525, "learning_rate": 1.1764705882352942e-05, "loss": 0.5745, "step": 190 }, { "epoch": 0.17783985102420857, "grad_norm": 0.3648191289306754, "learning_rate": 1.1826625386996907e-05, "loss": 0.6036, "step": 191 }, { "epoch": 0.1787709497206704, "grad_norm": 0.3418226544537112, "learning_rate": 1.1888544891640867e-05, "loss": 0.6323, "step": 192 }, { "epoch": 0.17970204841713222, "grad_norm": 0.3660222239956115, "learning_rate": 1.195046439628483e-05, "loss": 0.5761, "step": 193 }, { "epoch": 0.18063314711359404, "grad_norm": 0.3354586446559913, "learning_rate": 1.2012383900928795e-05, "loss": 0.6231, "step": 194 }, { "epoch": 0.18156424581005587, "grad_norm": 0.3867375040728262, "learning_rate": 1.2074303405572755e-05, "loss": 0.6217, "step": 195 }, { "epoch": 0.1824953445065177, "grad_norm": 0.3225784046105545, "learning_rate": 1.2136222910216719e-05, "loss": 0.5795, "step": 196 }, { "epoch": 0.18342644320297952, "grad_norm": 0.3323067368877745, "learning_rate": 1.2198142414860684e-05, "loss": 0.5572, "step": 197 }, { "epoch": 0.18435754189944134, "grad_norm": 0.3384698090406128, "learning_rate": 1.2260061919504644e-05, "loss": 0.5988, "step": 198 }, { "epoch": 0.18528864059590316, "grad_norm": 0.3417022956115256, "learning_rate": 1.2321981424148607e-05, "loss": 0.5861, "step": 199 }, { "epoch": 0.186219739292365, "grad_norm": 0.3440594254441988, "learning_rate": 1.2383900928792572e-05, "loss": 0.5763, "step": 200 }, { "epoch": 0.1871508379888268, "grad_norm": 0.3359578566756565, "learning_rate": 1.2445820433436532e-05, "loss": 0.6138, "step": 201 }, { "epoch": 0.18808193668528864, "grad_norm": 0.344503864841421, "learning_rate": 1.2507739938080497e-05, "loss": 0.5825, "step": 202 }, { "epoch": 0.18901303538175046, "grad_norm": 0.3267586477764772, "learning_rate": 1.256965944272446e-05, "loss": 0.6184, "step": 203 }, { "epoch": 0.18994413407821228, "grad_norm": 0.3747054617059497, "learning_rate": 1.263157894736842e-05, "loss": 0.592, "step": 204 }, { "epoch": 0.1908752327746741, "grad_norm": 0.3303448341855896, "learning_rate": 1.2693498452012386e-05, "loss": 0.622, "step": 205 }, { "epoch": 0.19180633147113593, "grad_norm": 0.37382063100948826, "learning_rate": 1.2755417956656349e-05, "loss": 0.6015, "step": 206 }, { "epoch": 0.19273743016759776, "grad_norm": 0.3446869339804956, "learning_rate": 1.2817337461300309e-05, "loss": 0.5986, "step": 207 }, { "epoch": 0.19366852886405958, "grad_norm": 0.3540597598339445, "learning_rate": 1.2879256965944274e-05, "loss": 0.5956, "step": 208 }, { "epoch": 0.1945996275605214, "grad_norm": 0.3368863520177894, "learning_rate": 1.2941176470588238e-05, "loss": 0.5988, "step": 209 }, { "epoch": 0.19553072625698323, "grad_norm": 0.36001119772339957, "learning_rate": 1.3003095975232198e-05, "loss": 0.5703, "step": 210 }, { "epoch": 0.19646182495344505, "grad_norm": 0.35311362024165077, "learning_rate": 1.3065015479876163e-05, "loss": 0.61, "step": 211 }, { "epoch": 0.1973929236499069, "grad_norm": 0.335028020727615, "learning_rate": 1.3126934984520126e-05, "loss": 0.5751, "step": 212 }, { "epoch": 0.19832402234636873, "grad_norm": 0.3689272840230204, "learning_rate": 1.3188854489164088e-05, "loss": 0.6222, "step": 213 }, { "epoch": 0.19925512104283055, "grad_norm": 0.35203113431220434, "learning_rate": 1.3250773993808051e-05, "loss": 0.5958, "step": 214 }, { "epoch": 0.20018621973929238, "grad_norm": 0.3402226750934948, "learning_rate": 1.3312693498452014e-05, "loss": 0.6121, "step": 215 }, { "epoch": 0.2011173184357542, "grad_norm": 0.38324906548034887, "learning_rate": 1.3374613003095976e-05, "loss": 0.6, "step": 216 }, { "epoch": 0.20204841713221602, "grad_norm": 0.36274537605903656, "learning_rate": 1.343653250773994e-05, "loss": 0.6004, "step": 217 }, { "epoch": 0.20297951582867785, "grad_norm": 0.3525556461532976, "learning_rate": 1.3498452012383903e-05, "loss": 0.5918, "step": 218 }, { "epoch": 0.20391061452513967, "grad_norm": 0.3902882446788351, "learning_rate": 1.3560371517027865e-05, "loss": 0.6281, "step": 219 }, { "epoch": 0.2048417132216015, "grad_norm": 0.32028883703526045, "learning_rate": 1.3622291021671828e-05, "loss": 0.5775, "step": 220 }, { "epoch": 0.20577281191806332, "grad_norm": 0.34780480610553377, "learning_rate": 1.3684210526315791e-05, "loss": 0.6008, "step": 221 }, { "epoch": 0.20670391061452514, "grad_norm": 0.35580878141892885, "learning_rate": 1.3746130030959755e-05, "loss": 0.6111, "step": 222 }, { "epoch": 0.20763500931098697, "grad_norm": 0.3566719224445084, "learning_rate": 1.3808049535603716e-05, "loss": 0.6185, "step": 223 }, { "epoch": 0.2085661080074488, "grad_norm": 0.34703755828546295, "learning_rate": 1.386996904024768e-05, "loss": 0.6094, "step": 224 }, { "epoch": 0.20949720670391062, "grad_norm": 0.33813509560031035, "learning_rate": 1.3931888544891643e-05, "loss": 0.5716, "step": 225 }, { "epoch": 0.21042830540037244, "grad_norm": 0.3552049153682771, "learning_rate": 1.3993808049535605e-05, "loss": 0.6194, "step": 226 }, { "epoch": 0.21135940409683426, "grad_norm": 0.3390494086770813, "learning_rate": 1.4055727554179568e-05, "loss": 0.5851, "step": 227 }, { "epoch": 0.2122905027932961, "grad_norm": 0.3571640766676411, "learning_rate": 1.4117647058823532e-05, "loss": 0.5911, "step": 228 }, { "epoch": 0.2132216014897579, "grad_norm": 0.3711500982056998, "learning_rate": 1.4179566563467493e-05, "loss": 0.5955, "step": 229 }, { "epoch": 0.21415270018621974, "grad_norm": 0.3473307294297347, "learning_rate": 1.4241486068111457e-05, "loss": 0.6102, "step": 230 }, { "epoch": 0.21508379888268156, "grad_norm": 0.3489117488134003, "learning_rate": 1.430340557275542e-05, "loss": 0.634, "step": 231 }, { "epoch": 0.21601489757914338, "grad_norm": 0.3221980813705926, "learning_rate": 1.4365325077399382e-05, "loss": 0.5948, "step": 232 }, { "epoch": 0.2169459962756052, "grad_norm": 0.34463495258727916, "learning_rate": 1.4427244582043345e-05, "loss": 0.5753, "step": 233 }, { "epoch": 0.21787709497206703, "grad_norm": 0.3294838461882524, "learning_rate": 1.4489164086687308e-05, "loss": 0.5883, "step": 234 }, { "epoch": 0.21880819366852886, "grad_norm": 0.3448881448915341, "learning_rate": 1.455108359133127e-05, "loss": 0.5782, "step": 235 }, { "epoch": 0.21973929236499068, "grad_norm": 0.35798061311802604, "learning_rate": 1.4613003095975234e-05, "loss": 0.6057, "step": 236 }, { "epoch": 0.2206703910614525, "grad_norm": 0.35129707944162664, "learning_rate": 1.4674922600619197e-05, "loss": 0.5774, "step": 237 }, { "epoch": 0.22160148975791433, "grad_norm": 0.3342988315960284, "learning_rate": 1.4736842105263159e-05, "loss": 0.5572, "step": 238 }, { "epoch": 0.22253258845437615, "grad_norm": 0.46818631313023845, "learning_rate": 1.4798761609907122e-05, "loss": 0.6274, "step": 239 }, { "epoch": 0.22346368715083798, "grad_norm": 0.33616925582421053, "learning_rate": 1.4860681114551085e-05, "loss": 0.5958, "step": 240 }, { "epoch": 0.22439478584729983, "grad_norm": 0.36653953330327915, "learning_rate": 1.4922600619195047e-05, "loss": 0.5817, "step": 241 }, { "epoch": 0.22532588454376165, "grad_norm": 0.3669487389257859, "learning_rate": 1.498452012383901e-05, "loss": 0.5805, "step": 242 }, { "epoch": 0.22625698324022347, "grad_norm": 0.3550127865447363, "learning_rate": 1.5046439628482974e-05, "loss": 0.5874, "step": 243 }, { "epoch": 0.2271880819366853, "grad_norm": 0.369812721314182, "learning_rate": 1.5108359133126935e-05, "loss": 0.5806, "step": 244 }, { "epoch": 0.22811918063314712, "grad_norm": 0.32484393701662667, "learning_rate": 1.5170278637770899e-05, "loss": 0.5669, "step": 245 }, { "epoch": 0.22905027932960895, "grad_norm": 0.37060740032377665, "learning_rate": 1.5232198142414862e-05, "loss": 0.5655, "step": 246 }, { "epoch": 0.22998137802607077, "grad_norm": 0.37192296740047115, "learning_rate": 1.5294117647058822e-05, "loss": 0.5758, "step": 247 }, { "epoch": 0.2309124767225326, "grad_norm": 0.38219241109644353, "learning_rate": 1.535603715170279e-05, "loss": 0.5915, "step": 248 }, { "epoch": 0.23184357541899442, "grad_norm": 0.32085688344490687, "learning_rate": 1.5417956656346752e-05, "loss": 0.5781, "step": 249 }, { "epoch": 0.23277467411545624, "grad_norm": 0.35292461641938716, "learning_rate": 1.5479876160990716e-05, "loss": 0.5743, "step": 250 }, { "epoch": 0.23370577281191807, "grad_norm": 0.3684751532728346, "learning_rate": 1.5541795665634676e-05, "loss": 0.6173, "step": 251 }, { "epoch": 0.2346368715083799, "grad_norm": 0.36444100196858453, "learning_rate": 1.560371517027864e-05, "loss": 0.5939, "step": 252 }, { "epoch": 0.23556797020484171, "grad_norm": 0.3273318758914417, "learning_rate": 1.5665634674922602e-05, "loss": 0.5835, "step": 253 }, { "epoch": 0.23649906890130354, "grad_norm": 0.3329514436983228, "learning_rate": 1.5727554179566566e-05, "loss": 0.5591, "step": 254 }, { "epoch": 0.23743016759776536, "grad_norm": 0.4245548233322997, "learning_rate": 1.578947368421053e-05, "loss": 0.5952, "step": 255 }, { "epoch": 0.2383612662942272, "grad_norm": 0.3662961324127208, "learning_rate": 1.5851393188854493e-05, "loss": 0.5883, "step": 256 }, { "epoch": 0.239292364990689, "grad_norm": 0.3898459505164005, "learning_rate": 1.5913312693498453e-05, "loss": 0.5965, "step": 257 }, { "epoch": 0.24022346368715083, "grad_norm": 0.3486790527982334, "learning_rate": 1.5975232198142416e-05, "loss": 0.572, "step": 258 }, { "epoch": 0.24115456238361266, "grad_norm": 0.3885230309035845, "learning_rate": 1.603715170278638e-05, "loss": 0.5863, "step": 259 }, { "epoch": 0.24208566108007448, "grad_norm": 0.41049702534776494, "learning_rate": 1.6099071207430343e-05, "loss": 0.6339, "step": 260 }, { "epoch": 0.2430167597765363, "grad_norm": 0.38804066070456605, "learning_rate": 1.6160990712074306e-05, "loss": 0.5901, "step": 261 }, { "epoch": 0.24394785847299813, "grad_norm": 0.40162471368571784, "learning_rate": 1.622291021671827e-05, "loss": 0.5659, "step": 262 }, { "epoch": 0.24487895716945995, "grad_norm": 0.3925972718870882, "learning_rate": 1.628482972136223e-05, "loss": 0.5944, "step": 263 }, { "epoch": 0.24581005586592178, "grad_norm": 0.42741993470129025, "learning_rate": 1.6346749226006193e-05, "loss": 0.5916, "step": 264 }, { "epoch": 0.2467411545623836, "grad_norm": 0.36898681168799974, "learning_rate": 1.6408668730650156e-05, "loss": 0.5912, "step": 265 }, { "epoch": 0.24767225325884543, "grad_norm": 0.3943676998742722, "learning_rate": 1.647058823529412e-05, "loss": 0.5722, "step": 266 }, { "epoch": 0.24860335195530725, "grad_norm": 0.41950021723269326, "learning_rate": 1.6532507739938083e-05, "loss": 0.5651, "step": 267 }, { "epoch": 0.24953445065176907, "grad_norm": 0.37920445085531723, "learning_rate": 1.6594427244582046e-05, "loss": 0.5732, "step": 268 }, { "epoch": 0.2504655493482309, "grad_norm": 0.3392353859888486, "learning_rate": 1.6656346749226006e-05, "loss": 0.5863, "step": 269 }, { "epoch": 0.25139664804469275, "grad_norm": 0.37841359461673596, "learning_rate": 1.671826625386997e-05, "loss": 0.5731, "step": 270 }, { "epoch": 0.25232774674115455, "grad_norm": 0.34914845881272194, "learning_rate": 1.6780185758513933e-05, "loss": 0.6197, "step": 271 }, { "epoch": 0.2532588454376164, "grad_norm": 0.34999157151834936, "learning_rate": 1.6842105263157896e-05, "loss": 0.5726, "step": 272 }, { "epoch": 0.2541899441340782, "grad_norm": 0.35387933006648825, "learning_rate": 1.690402476780186e-05, "loss": 0.5798, "step": 273 }, { "epoch": 0.25512104283054005, "grad_norm": 0.364535308281995, "learning_rate": 1.6965944272445823e-05, "loss": 0.5883, "step": 274 }, { "epoch": 0.25605214152700184, "grad_norm": 0.38194828974275813, "learning_rate": 1.7027863777089783e-05, "loss": 0.6085, "step": 275 }, { "epoch": 0.2569832402234637, "grad_norm": 0.36869033856324473, "learning_rate": 1.7089783281733747e-05, "loss": 0.5841, "step": 276 }, { "epoch": 0.2579143389199255, "grad_norm": 0.40653615969845547, "learning_rate": 1.715170278637771e-05, "loss": 0.5869, "step": 277 }, { "epoch": 0.25884543761638734, "grad_norm": 0.3771017047882407, "learning_rate": 1.7213622291021673e-05, "loss": 0.5935, "step": 278 }, { "epoch": 0.25977653631284914, "grad_norm": 0.38979163355720975, "learning_rate": 1.7275541795665637e-05, "loss": 0.5412, "step": 279 }, { "epoch": 0.260707635009311, "grad_norm": 0.40468276763384603, "learning_rate": 1.73374613003096e-05, "loss": 0.5962, "step": 280 }, { "epoch": 0.2616387337057728, "grad_norm": 0.3726739698792466, "learning_rate": 1.7399380804953563e-05, "loss": 0.5485, "step": 281 }, { "epoch": 0.26256983240223464, "grad_norm": 0.42020322842073377, "learning_rate": 1.7461300309597523e-05, "loss": 0.5724, "step": 282 }, { "epoch": 0.2635009310986965, "grad_norm": 0.4663997778182358, "learning_rate": 1.7523219814241487e-05, "loss": 0.61, "step": 283 }, { "epoch": 0.2644320297951583, "grad_norm": 0.3583283816691081, "learning_rate": 1.758513931888545e-05, "loss": 0.5787, "step": 284 }, { "epoch": 0.26536312849162014, "grad_norm": 0.43908327292147237, "learning_rate": 1.7647058823529414e-05, "loss": 0.5787, "step": 285 }, { "epoch": 0.26629422718808193, "grad_norm": 0.37325125474787585, "learning_rate": 1.7708978328173377e-05, "loss": 0.5893, "step": 286 }, { "epoch": 0.2672253258845438, "grad_norm": 0.3797050303369585, "learning_rate": 1.777089783281734e-05, "loss": 0.5802, "step": 287 }, { "epoch": 0.2681564245810056, "grad_norm": 0.3572533299237073, "learning_rate": 1.78328173374613e-05, "loss": 0.5636, "step": 288 }, { "epoch": 0.26908752327746743, "grad_norm": 0.3762627152820312, "learning_rate": 1.7894736842105264e-05, "loss": 0.5728, "step": 289 }, { "epoch": 0.27001862197392923, "grad_norm": 0.4329891879217536, "learning_rate": 1.7956656346749227e-05, "loss": 0.5713, "step": 290 }, { "epoch": 0.2709497206703911, "grad_norm": 0.41565470185655334, "learning_rate": 1.801857585139319e-05, "loss": 0.5754, "step": 291 }, { "epoch": 0.2718808193668529, "grad_norm": 0.3814778443045893, "learning_rate": 1.8080495356037154e-05, "loss": 0.5605, "step": 292 }, { "epoch": 0.27281191806331473, "grad_norm": 0.3901038421958148, "learning_rate": 1.8142414860681117e-05, "loss": 0.5555, "step": 293 }, { "epoch": 0.2737430167597765, "grad_norm": 0.3783090252825516, "learning_rate": 1.8204334365325077e-05, "loss": 0.5687, "step": 294 }, { "epoch": 0.2746741154562384, "grad_norm": 0.40446257653923195, "learning_rate": 1.826625386996904e-05, "loss": 0.5796, "step": 295 }, { "epoch": 0.2756052141527002, "grad_norm": 0.3980861528005157, "learning_rate": 1.8328173374613004e-05, "loss": 0.543, "step": 296 }, { "epoch": 0.276536312849162, "grad_norm": 0.4134371553329208, "learning_rate": 1.8390092879256967e-05, "loss": 0.5829, "step": 297 }, { "epoch": 0.2774674115456238, "grad_norm": 0.35962424327266446, "learning_rate": 1.845201238390093e-05, "loss": 0.5957, "step": 298 }, { "epoch": 0.2783985102420857, "grad_norm": 0.35935588787284584, "learning_rate": 1.8513931888544894e-05, "loss": 0.5728, "step": 299 }, { "epoch": 0.27932960893854747, "grad_norm": 0.3668613152716071, "learning_rate": 1.8575851393188854e-05, "loss": 0.6073, "step": 300 }, { "epoch": 0.2802607076350093, "grad_norm": 0.3636057699692993, "learning_rate": 1.8637770897832817e-05, "loss": 0.5926, "step": 301 }, { "epoch": 0.2811918063314711, "grad_norm": 0.3687087120502099, "learning_rate": 1.869969040247678e-05, "loss": 0.5987, "step": 302 }, { "epoch": 0.28212290502793297, "grad_norm": 0.36959037211519746, "learning_rate": 1.8761609907120744e-05, "loss": 0.5858, "step": 303 }, { "epoch": 0.28305400372439476, "grad_norm": 0.43164477832871706, "learning_rate": 1.8823529411764708e-05, "loss": 0.5892, "step": 304 }, { "epoch": 0.2839851024208566, "grad_norm": 0.35345499632231026, "learning_rate": 1.888544891640867e-05, "loss": 0.5796, "step": 305 }, { "epoch": 0.2849162011173184, "grad_norm": 0.3931155470604422, "learning_rate": 1.894736842105263e-05, "loss": 0.5914, "step": 306 }, { "epoch": 0.28584729981378026, "grad_norm": 0.39651999424782697, "learning_rate": 1.9009287925696594e-05, "loss": 0.5938, "step": 307 }, { "epoch": 0.28677839851024206, "grad_norm": 0.3429872749598439, "learning_rate": 1.9071207430340558e-05, "loss": 0.5386, "step": 308 }, { "epoch": 0.2877094972067039, "grad_norm": 0.41266849806701994, "learning_rate": 1.913312693498452e-05, "loss": 0.5826, "step": 309 }, { "epoch": 0.2886405959031657, "grad_norm": 0.3873378369686832, "learning_rate": 1.9195046439628485e-05, "loss": 0.6095, "step": 310 }, { "epoch": 0.28957169459962756, "grad_norm": 0.47546351009428595, "learning_rate": 1.9256965944272448e-05, "loss": 0.586, "step": 311 }, { "epoch": 0.2905027932960894, "grad_norm": 0.37235131435353797, "learning_rate": 1.931888544891641e-05, "loss": 0.5614, "step": 312 }, { "epoch": 0.2914338919925512, "grad_norm": 0.3520395708695704, "learning_rate": 1.938080495356037e-05, "loss": 0.5725, "step": 313 }, { "epoch": 0.29236499068901306, "grad_norm": 0.4617377040225295, "learning_rate": 1.9442724458204335e-05, "loss": 0.5733, "step": 314 }, { "epoch": 0.29329608938547486, "grad_norm": 0.36218060622767556, "learning_rate": 1.95046439628483e-05, "loss": 0.5566, "step": 315 }, { "epoch": 0.2942271880819367, "grad_norm": 0.47464213329067906, "learning_rate": 1.956656346749226e-05, "loss": 0.5669, "step": 316 }, { "epoch": 0.2951582867783985, "grad_norm": 0.4115942982028366, "learning_rate": 1.9628482972136225e-05, "loss": 0.5662, "step": 317 }, { "epoch": 0.29608938547486036, "grad_norm": 0.45371202984213277, "learning_rate": 1.9690402476780188e-05, "loss": 0.5754, "step": 318 }, { "epoch": 0.29702048417132215, "grad_norm": 0.43154584814746255, "learning_rate": 1.9752321981424148e-05, "loss": 0.6129, "step": 319 }, { "epoch": 0.297951582867784, "grad_norm": 0.39449228158946936, "learning_rate": 1.981424148606811e-05, "loss": 0.5751, "step": 320 }, { "epoch": 0.2988826815642458, "grad_norm": 0.41568317552786965, "learning_rate": 1.9876160990712078e-05, "loss": 0.5524, "step": 321 }, { "epoch": 0.29981378026070765, "grad_norm": 0.4274362242745491, "learning_rate": 1.9938080495356038e-05, "loss": 0.5994, "step": 322 }, { "epoch": 0.30074487895716945, "grad_norm": 0.4477482738949482, "learning_rate": 2e-05, "loss": 0.6037, "step": 323 }, { "epoch": 0.3016759776536313, "grad_norm": 0.3563210184908157, "learning_rate": 1.9993101069334254e-05, "loss": 0.5583, "step": 324 }, { "epoch": 0.3026070763500931, "grad_norm": 0.4973463847818965, "learning_rate": 1.998620213866851e-05, "loss": 0.5947, "step": 325 }, { "epoch": 0.30353817504655495, "grad_norm": 0.3386695364935062, "learning_rate": 1.997930320800276e-05, "loss": 0.5858, "step": 326 }, { "epoch": 0.30446927374301674, "grad_norm": 0.429428351487666, "learning_rate": 1.9972404277337015e-05, "loss": 0.5697, "step": 327 }, { "epoch": 0.3054003724394786, "grad_norm": 0.3817151464879252, "learning_rate": 1.9965505346671267e-05, "loss": 0.5906, "step": 328 }, { "epoch": 0.3063314711359404, "grad_norm": 0.41483553843110765, "learning_rate": 1.9958606416005523e-05, "loss": 0.5505, "step": 329 }, { "epoch": 0.30726256983240224, "grad_norm": 0.4099215506084841, "learning_rate": 1.9951707485339772e-05, "loss": 0.6036, "step": 330 }, { "epoch": 0.30819366852886404, "grad_norm": 0.36213469575129653, "learning_rate": 1.9944808554674028e-05, "loss": 0.5594, "step": 331 }, { "epoch": 0.3091247672253259, "grad_norm": 0.38621070435511884, "learning_rate": 1.993790962400828e-05, "loss": 0.5462, "step": 332 }, { "epoch": 0.3100558659217877, "grad_norm": 0.4236149666137502, "learning_rate": 1.9931010693342532e-05, "loss": 0.572, "step": 333 }, { "epoch": 0.31098696461824954, "grad_norm": 0.3812248903020204, "learning_rate": 1.9924111762676788e-05, "loss": 0.5571, "step": 334 }, { "epoch": 0.31191806331471134, "grad_norm": 0.4385906090258591, "learning_rate": 1.991721283201104e-05, "loss": 0.5673, "step": 335 }, { "epoch": 0.3128491620111732, "grad_norm": 0.40461325544037197, "learning_rate": 1.9910313901345293e-05, "loss": 0.6238, "step": 336 }, { "epoch": 0.313780260707635, "grad_norm": 0.38734471916646224, "learning_rate": 1.9903414970679546e-05, "loss": 0.5471, "step": 337 }, { "epoch": 0.31471135940409684, "grad_norm": 0.3549909745784487, "learning_rate": 1.98965160400138e-05, "loss": 0.5745, "step": 338 }, { "epoch": 0.31564245810055863, "grad_norm": 0.40356114399684484, "learning_rate": 1.988961710934805e-05, "loss": 0.5912, "step": 339 }, { "epoch": 0.3165735567970205, "grad_norm": 0.35507990377925996, "learning_rate": 1.9882718178682306e-05, "loss": 0.5405, "step": 340 }, { "epoch": 0.31750465549348234, "grad_norm": 0.3976103245326853, "learning_rate": 1.987581924801656e-05, "loss": 0.5811, "step": 341 }, { "epoch": 0.31843575418994413, "grad_norm": 0.4048427326244477, "learning_rate": 1.9868920317350814e-05, "loss": 0.5967, "step": 342 }, { "epoch": 0.319366852886406, "grad_norm": 0.3795750596246578, "learning_rate": 1.9862021386685067e-05, "loss": 0.5313, "step": 343 }, { "epoch": 0.3202979515828678, "grad_norm": 0.4537107770843618, "learning_rate": 1.985512245601932e-05, "loss": 0.5637, "step": 344 }, { "epoch": 0.32122905027932963, "grad_norm": 0.3659452227372942, "learning_rate": 1.984822352535357e-05, "loss": 0.5414, "step": 345 }, { "epoch": 0.3221601489757914, "grad_norm": 0.5267885399790172, "learning_rate": 1.9841324594687824e-05, "loss": 0.5684, "step": 346 }, { "epoch": 0.3230912476722533, "grad_norm": 0.38231974927123136, "learning_rate": 1.983442566402208e-05, "loss": 0.5521, "step": 347 }, { "epoch": 0.3240223463687151, "grad_norm": 0.45941858839254457, "learning_rate": 1.982752673335633e-05, "loss": 0.5629, "step": 348 }, { "epoch": 0.3249534450651769, "grad_norm": 0.3824114236195757, "learning_rate": 1.9820627802690585e-05, "loss": 0.5725, "step": 349 }, { "epoch": 0.3258845437616387, "grad_norm": 0.4284872695299513, "learning_rate": 1.9813728872024837e-05, "loss": 0.5764, "step": 350 }, { "epoch": 0.3268156424581006, "grad_norm": 0.3876792225320314, "learning_rate": 1.9806829941359093e-05, "loss": 0.5938, "step": 351 }, { "epoch": 0.32774674115456237, "grad_norm": 0.36253920227924274, "learning_rate": 1.9799931010693345e-05, "loss": 0.5205, "step": 352 }, { "epoch": 0.3286778398510242, "grad_norm": 0.37161054974884944, "learning_rate": 1.9793032080027598e-05, "loss": 0.5276, "step": 353 }, { "epoch": 0.329608938547486, "grad_norm": 0.40169452538414274, "learning_rate": 1.978613314936185e-05, "loss": 0.5732, "step": 354 }, { "epoch": 0.33054003724394787, "grad_norm": 0.3807631269986973, "learning_rate": 1.9779234218696102e-05, "loss": 0.5959, "step": 355 }, { "epoch": 0.33147113594040967, "grad_norm": 0.39384579246953794, "learning_rate": 1.9772335288030358e-05, "loss": 0.5791, "step": 356 }, { "epoch": 0.3324022346368715, "grad_norm": 0.394143947163939, "learning_rate": 1.976543635736461e-05, "loss": 0.5735, "step": 357 }, { "epoch": 0.3333333333333333, "grad_norm": 0.36602367398469204, "learning_rate": 1.9758537426698863e-05, "loss": 0.5687, "step": 358 }, { "epoch": 0.33426443202979517, "grad_norm": 0.3964475226278281, "learning_rate": 1.9751638496033115e-05, "loss": 0.5617, "step": 359 }, { "epoch": 0.33519553072625696, "grad_norm": 0.33555398836400574, "learning_rate": 1.974473956536737e-05, "loss": 0.5866, "step": 360 }, { "epoch": 0.3361266294227188, "grad_norm": 0.36396157588626354, "learning_rate": 1.9737840634701624e-05, "loss": 0.5623, "step": 361 }, { "epoch": 0.3370577281191806, "grad_norm": 0.45270052979288866, "learning_rate": 1.9730941704035876e-05, "loss": 0.6053, "step": 362 }, { "epoch": 0.33798882681564246, "grad_norm": 0.4020901016830801, "learning_rate": 1.972404277337013e-05, "loss": 0.576, "step": 363 }, { "epoch": 0.33891992551210426, "grad_norm": 0.4015934545724578, "learning_rate": 1.9717143842704384e-05, "loss": 0.5637, "step": 364 }, { "epoch": 0.3398510242085661, "grad_norm": 0.3863155836758769, "learning_rate": 1.9710244912038637e-05, "loss": 0.5595, "step": 365 }, { "epoch": 0.3407821229050279, "grad_norm": 0.3699248856035974, "learning_rate": 1.970334598137289e-05, "loss": 0.5536, "step": 366 }, { "epoch": 0.34171322160148976, "grad_norm": 0.401142602274875, "learning_rate": 1.969644705070714e-05, "loss": 0.5955, "step": 367 }, { "epoch": 0.3426443202979516, "grad_norm": 0.3326313895704682, "learning_rate": 1.9689548120041394e-05, "loss": 0.5851, "step": 368 }, { "epoch": 0.3435754189944134, "grad_norm": 0.34608452780271115, "learning_rate": 1.968264918937565e-05, "loss": 0.5769, "step": 369 }, { "epoch": 0.34450651769087526, "grad_norm": 0.37045359469844447, "learning_rate": 1.9675750258709902e-05, "loss": 0.6104, "step": 370 }, { "epoch": 0.34543761638733705, "grad_norm": 0.3700730597904879, "learning_rate": 1.9668851328044154e-05, "loss": 0.5548, "step": 371 }, { "epoch": 0.3463687150837989, "grad_norm": 0.3835434155001392, "learning_rate": 1.9661952397378407e-05, "loss": 0.6047, "step": 372 }, { "epoch": 0.3472998137802607, "grad_norm": 0.3789470639843901, "learning_rate": 1.9655053466712663e-05, "loss": 0.558, "step": 373 }, { "epoch": 0.34823091247672255, "grad_norm": 0.3552699579281307, "learning_rate": 1.9648154536046915e-05, "loss": 0.5796, "step": 374 }, { "epoch": 0.34916201117318435, "grad_norm": 0.4527091611248984, "learning_rate": 1.9641255605381167e-05, "loss": 0.5876, "step": 375 }, { "epoch": 0.3500931098696462, "grad_norm": 0.34952090747531417, "learning_rate": 1.963435667471542e-05, "loss": 0.5455, "step": 376 }, { "epoch": 0.351024208566108, "grad_norm": 0.3881511041892465, "learning_rate": 1.9627457744049672e-05, "loss": 0.5741, "step": 377 }, { "epoch": 0.35195530726256985, "grad_norm": 0.36511883886594815, "learning_rate": 1.9620558813383928e-05, "loss": 0.5745, "step": 378 }, { "epoch": 0.35288640595903165, "grad_norm": 0.3455560959783625, "learning_rate": 1.961365988271818e-05, "loss": 0.5491, "step": 379 }, { "epoch": 0.3538175046554935, "grad_norm": 0.35339031375447805, "learning_rate": 1.9606760952052433e-05, "loss": 0.565, "step": 380 }, { "epoch": 0.3547486033519553, "grad_norm": 0.40692291022594224, "learning_rate": 1.9599862021386685e-05, "loss": 0.5651, "step": 381 }, { "epoch": 0.35567970204841715, "grad_norm": 0.3566585876382158, "learning_rate": 1.959296309072094e-05, "loss": 0.5507, "step": 382 }, { "epoch": 0.35661080074487894, "grad_norm": 0.40054593193914906, "learning_rate": 1.9586064160055193e-05, "loss": 0.5647, "step": 383 }, { "epoch": 0.3575418994413408, "grad_norm": 0.3724684602976093, "learning_rate": 1.9579165229389446e-05, "loss": 0.5523, "step": 384 }, { "epoch": 0.3584729981378026, "grad_norm": 0.38736132835306475, "learning_rate": 1.95722662987237e-05, "loss": 0.5416, "step": 385 }, { "epoch": 0.35940409683426444, "grad_norm": 0.3302823657021269, "learning_rate": 1.9565367368057954e-05, "loss": 0.5296, "step": 386 }, { "epoch": 0.36033519553072624, "grad_norm": 0.39806503145354166, "learning_rate": 1.9558468437392207e-05, "loss": 0.5706, "step": 387 }, { "epoch": 0.3612662942271881, "grad_norm": 0.36827538313767455, "learning_rate": 1.955156950672646e-05, "loss": 0.5848, "step": 388 }, { "epoch": 0.3621973929236499, "grad_norm": 0.3876227068814333, "learning_rate": 1.954467057606071e-05, "loss": 0.5386, "step": 389 }, { "epoch": 0.36312849162011174, "grad_norm": 0.38792482975431836, "learning_rate": 1.9537771645394964e-05, "loss": 0.5829, "step": 390 }, { "epoch": 0.36405959031657353, "grad_norm": 0.3926366774202285, "learning_rate": 1.953087271472922e-05, "loss": 0.565, "step": 391 }, { "epoch": 0.3649906890130354, "grad_norm": 0.4305103225615148, "learning_rate": 1.9523973784063472e-05, "loss": 0.5638, "step": 392 }, { "epoch": 0.3659217877094972, "grad_norm": 0.39747630028682823, "learning_rate": 1.9517074853397724e-05, "loss": 0.554, "step": 393 }, { "epoch": 0.36685288640595903, "grad_norm": 0.4534355880569542, "learning_rate": 1.9510175922731977e-05, "loss": 0.5452, "step": 394 }, { "epoch": 0.36778398510242083, "grad_norm": 0.36859523611640393, "learning_rate": 1.9503276992066233e-05, "loss": 0.5422, "step": 395 }, { "epoch": 0.3687150837988827, "grad_norm": 0.4258799520036439, "learning_rate": 1.9496378061400485e-05, "loss": 0.5521, "step": 396 }, { "epoch": 0.36964618249534453, "grad_norm": 0.36087730515169303, "learning_rate": 1.9489479130734737e-05, "loss": 0.5531, "step": 397 }, { "epoch": 0.37057728119180633, "grad_norm": 0.4555492285649551, "learning_rate": 1.948258020006899e-05, "loss": 0.5512, "step": 398 }, { "epoch": 0.3715083798882682, "grad_norm": 0.35485695497069925, "learning_rate": 1.9475681269403242e-05, "loss": 0.5475, "step": 399 }, { "epoch": 0.37243947858473, "grad_norm": 0.3974586956396829, "learning_rate": 1.9468782338737498e-05, "loss": 0.551, "step": 400 }, { "epoch": 0.37337057728119183, "grad_norm": 0.36745872357290915, "learning_rate": 1.946188340807175e-05, "loss": 0.5683, "step": 401 }, { "epoch": 0.3743016759776536, "grad_norm": 0.35352687555872686, "learning_rate": 1.9454984477406003e-05, "loss": 0.5367, "step": 402 }, { "epoch": 0.3752327746741155, "grad_norm": 0.3562451416269324, "learning_rate": 1.9448085546740255e-05, "loss": 0.5746, "step": 403 }, { "epoch": 0.3761638733705773, "grad_norm": 0.3586513399646623, "learning_rate": 1.944118661607451e-05, "loss": 0.5556, "step": 404 }, { "epoch": 0.3770949720670391, "grad_norm": 0.3908421866959711, "learning_rate": 1.9434287685408763e-05, "loss": 0.5444, "step": 405 }, { "epoch": 0.3780260707635009, "grad_norm": 0.37929971453170236, "learning_rate": 1.9427388754743016e-05, "loss": 0.5721, "step": 406 }, { "epoch": 0.3789571694599628, "grad_norm": 0.36934183160922085, "learning_rate": 1.9420489824077268e-05, "loss": 0.5656, "step": 407 }, { "epoch": 0.37988826815642457, "grad_norm": 0.370016495253698, "learning_rate": 1.9413590893411524e-05, "loss": 0.555, "step": 408 }, { "epoch": 0.3808193668528864, "grad_norm": 0.4048202606886159, "learning_rate": 1.9406691962745776e-05, "loss": 0.5486, "step": 409 }, { "epoch": 0.3817504655493482, "grad_norm": 0.3639811760812688, "learning_rate": 1.939979303208003e-05, "loss": 0.57, "step": 410 }, { "epoch": 0.38268156424581007, "grad_norm": 0.37905624221772855, "learning_rate": 1.939289410141428e-05, "loss": 0.556, "step": 411 }, { "epoch": 0.38361266294227186, "grad_norm": 0.37582696220941963, "learning_rate": 1.9385995170748534e-05, "loss": 0.5656, "step": 412 }, { "epoch": 0.3845437616387337, "grad_norm": 0.40021563604202626, "learning_rate": 1.937909624008279e-05, "loss": 0.5671, "step": 413 }, { "epoch": 0.3854748603351955, "grad_norm": 0.42103961702951714, "learning_rate": 1.9372197309417042e-05, "loss": 0.5663, "step": 414 }, { "epoch": 0.38640595903165736, "grad_norm": 0.3825638820407727, "learning_rate": 1.9365298378751298e-05, "loss": 0.5579, "step": 415 }, { "epoch": 0.38733705772811916, "grad_norm": 0.40235109473056974, "learning_rate": 1.9358399448085547e-05, "loss": 0.5397, "step": 416 }, { "epoch": 0.388268156424581, "grad_norm": 0.35637910999761335, "learning_rate": 1.9351500517419802e-05, "loss": 0.529, "step": 417 }, { "epoch": 0.3891992551210428, "grad_norm": 0.4000334422132714, "learning_rate": 1.9344601586754055e-05, "loss": 0.5515, "step": 418 }, { "epoch": 0.39013035381750466, "grad_norm": 0.35866306117233576, "learning_rate": 1.9337702656088307e-05, "loss": 0.5777, "step": 419 }, { "epoch": 0.39106145251396646, "grad_norm": 0.37863466097413456, "learning_rate": 1.933080372542256e-05, "loss": 0.5524, "step": 420 }, { "epoch": 0.3919925512104283, "grad_norm": 0.3541119313064459, "learning_rate": 1.9323904794756812e-05, "loss": 0.5817, "step": 421 }, { "epoch": 0.3929236499068901, "grad_norm": 0.37969748868592046, "learning_rate": 1.9317005864091068e-05, "loss": 0.5794, "step": 422 }, { "epoch": 0.39385474860335196, "grad_norm": 0.40847740814710165, "learning_rate": 1.931010693342532e-05, "loss": 0.5628, "step": 423 }, { "epoch": 0.3947858472998138, "grad_norm": 0.35785186836006794, "learning_rate": 1.9303208002759576e-05, "loss": 0.574, "step": 424 }, { "epoch": 0.3957169459962756, "grad_norm": 0.39264285240901464, "learning_rate": 1.9296309072093825e-05, "loss": 0.5454, "step": 425 }, { "epoch": 0.39664804469273746, "grad_norm": 0.3946829537675036, "learning_rate": 1.928941014142808e-05, "loss": 0.5717, "step": 426 }, { "epoch": 0.39757914338919925, "grad_norm": 0.3604101210827896, "learning_rate": 1.9282511210762333e-05, "loss": 0.5299, "step": 427 }, { "epoch": 0.3985102420856611, "grad_norm": 0.41871752909299825, "learning_rate": 1.9275612280096586e-05, "loss": 0.5569, "step": 428 }, { "epoch": 0.3994413407821229, "grad_norm": 0.36246451734030494, "learning_rate": 1.9268713349430838e-05, "loss": 0.557, "step": 429 }, { "epoch": 0.40037243947858475, "grad_norm": 0.40993986715765274, "learning_rate": 1.9261814418765094e-05, "loss": 0.5937, "step": 430 }, { "epoch": 0.40130353817504655, "grad_norm": 0.4038027359191298, "learning_rate": 1.9254915488099346e-05, "loss": 0.5761, "step": 431 }, { "epoch": 0.4022346368715084, "grad_norm": 0.3737454994040278, "learning_rate": 1.92480165574336e-05, "loss": 0.5828, "step": 432 }, { "epoch": 0.4031657355679702, "grad_norm": 0.407477785443783, "learning_rate": 1.9241117626767855e-05, "loss": 0.5561, "step": 433 }, { "epoch": 0.40409683426443205, "grad_norm": 0.3823897850247175, "learning_rate": 1.9234218696102104e-05, "loss": 0.5539, "step": 434 }, { "epoch": 0.40502793296089384, "grad_norm": 0.3810082867217002, "learning_rate": 1.922731976543636e-05, "loss": 0.5538, "step": 435 }, { "epoch": 0.4059590316573557, "grad_norm": 0.3961874080055355, "learning_rate": 1.9220420834770612e-05, "loss": 0.5939, "step": 436 }, { "epoch": 0.4068901303538175, "grad_norm": 0.38057392413726304, "learning_rate": 1.9213521904104868e-05, "loss": 0.5683, "step": 437 }, { "epoch": 0.40782122905027934, "grad_norm": 0.39766612895852466, "learning_rate": 1.9206622973439117e-05, "loss": 0.5783, "step": 438 }, { "epoch": 0.40875232774674114, "grad_norm": 0.38554704418989505, "learning_rate": 1.9199724042773372e-05, "loss": 0.5532, "step": 439 }, { "epoch": 0.409683426443203, "grad_norm": 0.3312722342209543, "learning_rate": 1.9192825112107625e-05, "loss": 0.5835, "step": 440 }, { "epoch": 0.4106145251396648, "grad_norm": 0.37836809132196336, "learning_rate": 1.9185926181441877e-05, "loss": 0.5668, "step": 441 }, { "epoch": 0.41154562383612664, "grad_norm": 0.36889529853957, "learning_rate": 1.9179027250776133e-05, "loss": 0.5713, "step": 442 }, { "epoch": 0.41247672253258844, "grad_norm": 0.37060479982275746, "learning_rate": 1.9172128320110382e-05, "loss": 0.5467, "step": 443 }, { "epoch": 0.4134078212290503, "grad_norm": 0.35816275474157455, "learning_rate": 1.9165229389444638e-05, "loss": 0.523, "step": 444 }, { "epoch": 0.4143389199255121, "grad_norm": 0.3459153605354565, "learning_rate": 1.915833045877889e-05, "loss": 0.5337, "step": 445 }, { "epoch": 0.41527001862197394, "grad_norm": 0.37948119332324887, "learning_rate": 1.9151431528113146e-05, "loss": 0.5508, "step": 446 }, { "epoch": 0.41620111731843573, "grad_norm": 0.3705108005323272, "learning_rate": 1.9144532597447395e-05, "loss": 0.5637, "step": 447 }, { "epoch": 0.4171322160148976, "grad_norm": 0.3574040572170811, "learning_rate": 1.913763366678165e-05, "loss": 0.5468, "step": 448 }, { "epoch": 0.4180633147113594, "grad_norm": 0.3732805407516767, "learning_rate": 1.9130734736115903e-05, "loss": 0.5238, "step": 449 }, { "epoch": 0.41899441340782123, "grad_norm": 0.3836494202996082, "learning_rate": 1.9123835805450156e-05, "loss": 0.5822, "step": 450 }, { "epoch": 0.419925512104283, "grad_norm": 0.38531849883568664, "learning_rate": 1.911693687478441e-05, "loss": 0.5704, "step": 451 }, { "epoch": 0.4208566108007449, "grad_norm": 0.3934255062106146, "learning_rate": 1.9110037944118664e-05, "loss": 0.5628, "step": 452 }, { "epoch": 0.42178770949720673, "grad_norm": 0.48235536288173037, "learning_rate": 1.9103139013452916e-05, "loss": 0.591, "step": 453 }, { "epoch": 0.4227188081936685, "grad_norm": 0.4098468726318603, "learning_rate": 1.909624008278717e-05, "loss": 0.5588, "step": 454 }, { "epoch": 0.4236499068901304, "grad_norm": 0.4330073090092441, "learning_rate": 1.9089341152121424e-05, "loss": 0.5876, "step": 455 }, { "epoch": 0.4245810055865922, "grad_norm": 0.411580658000406, "learning_rate": 1.9082442221455677e-05, "loss": 0.5766, "step": 456 }, { "epoch": 0.425512104283054, "grad_norm": 0.3616248004923249, "learning_rate": 1.907554329078993e-05, "loss": 0.5101, "step": 457 }, { "epoch": 0.4264432029795158, "grad_norm": 0.41777654052222485, "learning_rate": 1.906864436012418e-05, "loss": 0.5564, "step": 458 }, { "epoch": 0.4273743016759777, "grad_norm": 0.3439245352061906, "learning_rate": 1.9061745429458437e-05, "loss": 0.5666, "step": 459 }, { "epoch": 0.42830540037243947, "grad_norm": 0.39828757391427033, "learning_rate": 1.905484649879269e-05, "loss": 0.5534, "step": 460 }, { "epoch": 0.4292364990689013, "grad_norm": 0.36478679461295305, "learning_rate": 1.9047947568126942e-05, "loss": 0.5441, "step": 461 }, { "epoch": 0.4301675977653631, "grad_norm": 0.39986902465779234, "learning_rate": 1.9041048637461195e-05, "loss": 0.5642, "step": 462 }, { "epoch": 0.43109869646182497, "grad_norm": 0.36416708723477714, "learning_rate": 1.9034149706795447e-05, "loss": 0.5784, "step": 463 }, { "epoch": 0.43202979515828677, "grad_norm": 0.4098709456310299, "learning_rate": 1.9027250776129703e-05, "loss": 0.5491, "step": 464 }, { "epoch": 0.4329608938547486, "grad_norm": 0.35180255426735263, "learning_rate": 1.9020351845463955e-05, "loss": 0.5609, "step": 465 }, { "epoch": 0.4338919925512104, "grad_norm": 0.40352738016991924, "learning_rate": 1.9013452914798208e-05, "loss": 0.559, "step": 466 }, { "epoch": 0.43482309124767227, "grad_norm": 0.33925771737649146, "learning_rate": 1.900655398413246e-05, "loss": 0.548, "step": 467 }, { "epoch": 0.43575418994413406, "grad_norm": 0.3605507245977634, "learning_rate": 1.8999655053466716e-05, "loss": 0.5595, "step": 468 }, { "epoch": 0.4366852886405959, "grad_norm": 0.3655038052526918, "learning_rate": 1.8992756122800968e-05, "loss": 0.555, "step": 469 }, { "epoch": 0.4376163873370577, "grad_norm": 0.39959328005310196, "learning_rate": 1.898585719213522e-05, "loss": 0.5452, "step": 470 }, { "epoch": 0.43854748603351956, "grad_norm": 0.355759714856853, "learning_rate": 1.8978958261469473e-05, "loss": 0.5821, "step": 471 }, { "epoch": 0.43947858472998136, "grad_norm": 0.3983554144213067, "learning_rate": 1.8972059330803725e-05, "loss": 0.5577, "step": 472 }, { "epoch": 0.4404096834264432, "grad_norm": 0.3443109772391663, "learning_rate": 1.896516040013798e-05, "loss": 0.5647, "step": 473 }, { "epoch": 0.441340782122905, "grad_norm": 0.34748188213196124, "learning_rate": 1.8958261469472234e-05, "loss": 0.5433, "step": 474 }, { "epoch": 0.44227188081936686, "grad_norm": 0.3615379845429285, "learning_rate": 1.8951362538806486e-05, "loss": 0.5721, "step": 475 }, { "epoch": 0.44320297951582865, "grad_norm": 0.3240140779411053, "learning_rate": 1.894446360814074e-05, "loss": 0.5452, "step": 476 }, { "epoch": 0.4441340782122905, "grad_norm": 0.3602176170608408, "learning_rate": 1.8937564677474994e-05, "loss": 0.5502, "step": 477 }, { "epoch": 0.4450651769087523, "grad_norm": 0.39064193761803306, "learning_rate": 1.8930665746809247e-05, "loss": 0.5696, "step": 478 }, { "epoch": 0.44599627560521415, "grad_norm": 0.3308126984906385, "learning_rate": 1.89237668161435e-05, "loss": 0.5426, "step": 479 }, { "epoch": 0.44692737430167595, "grad_norm": 0.3651995064082963, "learning_rate": 1.891686788547775e-05, "loss": 0.5323, "step": 480 }, { "epoch": 0.4478584729981378, "grad_norm": 0.33992526567368175, "learning_rate": 1.8909968954812007e-05, "loss": 0.5508, "step": 481 }, { "epoch": 0.44878957169459965, "grad_norm": 0.39917991718025936, "learning_rate": 1.890307002414626e-05, "loss": 0.5636, "step": 482 }, { "epoch": 0.44972067039106145, "grad_norm": 0.37532390108018954, "learning_rate": 1.8896171093480512e-05, "loss": 0.5532, "step": 483 }, { "epoch": 0.4506517690875233, "grad_norm": 0.35002762134007254, "learning_rate": 1.8889272162814765e-05, "loss": 0.5339, "step": 484 }, { "epoch": 0.4515828677839851, "grad_norm": 0.36764392664398315, "learning_rate": 1.8882373232149017e-05, "loss": 0.5521, "step": 485 }, { "epoch": 0.45251396648044695, "grad_norm": 0.4005079299803608, "learning_rate": 1.8875474301483273e-05, "loss": 0.5639, "step": 486 }, { "epoch": 0.45344506517690875, "grad_norm": 0.3615377279712041, "learning_rate": 1.8868575370817525e-05, "loss": 0.5849, "step": 487 }, { "epoch": 0.4543761638733706, "grad_norm": 0.3638463673690751, "learning_rate": 1.8861676440151778e-05, "loss": 0.5345, "step": 488 }, { "epoch": 0.4553072625698324, "grad_norm": 0.34210859279245903, "learning_rate": 1.885477750948603e-05, "loss": 0.5338, "step": 489 }, { "epoch": 0.45623836126629425, "grad_norm": 0.44923190808842517, "learning_rate": 1.8847878578820286e-05, "loss": 0.5616, "step": 490 }, { "epoch": 0.45716945996275604, "grad_norm": 0.36270001834082827, "learning_rate": 1.8840979648154538e-05, "loss": 0.563, "step": 491 }, { "epoch": 0.4581005586592179, "grad_norm": 0.4862638906240566, "learning_rate": 1.883408071748879e-05, "loss": 0.5612, "step": 492 }, { "epoch": 0.4590316573556797, "grad_norm": 0.33646109620843967, "learning_rate": 1.8827181786823043e-05, "loss": 0.5422, "step": 493 }, { "epoch": 0.45996275605214154, "grad_norm": 0.41782870916909315, "learning_rate": 1.8820282856157295e-05, "loss": 0.5341, "step": 494 }, { "epoch": 0.46089385474860334, "grad_norm": 0.38401729262786205, "learning_rate": 1.881338392549155e-05, "loss": 0.5274, "step": 495 }, { "epoch": 0.4618249534450652, "grad_norm": 0.3824743754733937, "learning_rate": 1.8806484994825804e-05, "loss": 0.5364, "step": 496 }, { "epoch": 0.462756052141527, "grad_norm": 0.3863925775879702, "learning_rate": 1.8799586064160056e-05, "loss": 0.5457, "step": 497 }, { "epoch": 0.46368715083798884, "grad_norm": 0.4218851047356773, "learning_rate": 1.879268713349431e-05, "loss": 0.5698, "step": 498 }, { "epoch": 0.46461824953445063, "grad_norm": 0.3629528186231234, "learning_rate": 1.8785788202828564e-05, "loss": 0.5798, "step": 499 }, { "epoch": 0.4655493482309125, "grad_norm": 0.4119106197488833, "learning_rate": 1.8778889272162817e-05, "loss": 0.5397, "step": 500 }, { "epoch": 0.4664804469273743, "grad_norm": 0.33584918183229284, "learning_rate": 1.877199034149707e-05, "loss": 0.5402, "step": 501 }, { "epoch": 0.46741154562383613, "grad_norm": 0.38029481532889736, "learning_rate": 1.876509141083132e-05, "loss": 0.5548, "step": 502 }, { "epoch": 0.46834264432029793, "grad_norm": 0.3848539436015809, "learning_rate": 1.8758192480165577e-05, "loss": 0.5357, "step": 503 }, { "epoch": 0.4692737430167598, "grad_norm": 0.38215408038282, "learning_rate": 1.875129354949983e-05, "loss": 0.5788, "step": 504 }, { "epoch": 0.4702048417132216, "grad_norm": 0.3296583112843553, "learning_rate": 1.8744394618834082e-05, "loss": 0.5272, "step": 505 }, { "epoch": 0.47113594040968343, "grad_norm": 0.4121273194437246, "learning_rate": 1.8737495688168334e-05, "loss": 0.5469, "step": 506 }, { "epoch": 0.4720670391061452, "grad_norm": 0.34719799737729723, "learning_rate": 1.8730596757502587e-05, "loss": 0.5409, "step": 507 }, { "epoch": 0.4729981378026071, "grad_norm": 0.3692631136549245, "learning_rate": 1.8723697826836843e-05, "loss": 0.5162, "step": 508 }, { "epoch": 0.47392923649906893, "grad_norm": 0.36023820378423854, "learning_rate": 1.8716798896171095e-05, "loss": 0.5537, "step": 509 }, { "epoch": 0.4748603351955307, "grad_norm": 0.35263445651529096, "learning_rate": 1.870989996550535e-05, "loss": 0.5595, "step": 510 }, { "epoch": 0.4757914338919926, "grad_norm": 0.3775230210620991, "learning_rate": 1.87030010348396e-05, "loss": 0.5455, "step": 511 }, { "epoch": 0.4767225325884544, "grad_norm": 0.33696119899645155, "learning_rate": 1.8696102104173856e-05, "loss": 0.554, "step": 512 }, { "epoch": 0.4776536312849162, "grad_norm": 0.38973512439188573, "learning_rate": 1.8689203173508108e-05, "loss": 0.5471, "step": 513 }, { "epoch": 0.478584729981378, "grad_norm": 0.3490688136148917, "learning_rate": 1.868230424284236e-05, "loss": 0.5637, "step": 514 }, { "epoch": 0.4795158286778399, "grad_norm": 0.36995033919989206, "learning_rate": 1.8675405312176613e-05, "loss": 0.5706, "step": 515 }, { "epoch": 0.48044692737430167, "grad_norm": 0.34976826323279964, "learning_rate": 1.8668506381510865e-05, "loss": 0.5437, "step": 516 }, { "epoch": 0.4813780260707635, "grad_norm": 0.37971598875406387, "learning_rate": 1.866160745084512e-05, "loss": 0.566, "step": 517 }, { "epoch": 0.4823091247672253, "grad_norm": 0.3418030893402947, "learning_rate": 1.8654708520179373e-05, "loss": 0.5367, "step": 518 }, { "epoch": 0.48324022346368717, "grad_norm": 0.4383096066638027, "learning_rate": 1.864780958951363e-05, "loss": 0.574, "step": 519 }, { "epoch": 0.48417132216014896, "grad_norm": 0.38491722513287724, "learning_rate": 1.864091065884788e-05, "loss": 0.5339, "step": 520 }, { "epoch": 0.4851024208566108, "grad_norm": 0.3668906700987051, "learning_rate": 1.8634011728182134e-05, "loss": 0.5793, "step": 521 }, { "epoch": 0.4860335195530726, "grad_norm": 0.40390530770517497, "learning_rate": 1.8627112797516387e-05, "loss": 0.5486, "step": 522 }, { "epoch": 0.48696461824953446, "grad_norm": 0.3846003517000977, "learning_rate": 1.862021386685064e-05, "loss": 0.5683, "step": 523 }, { "epoch": 0.48789571694599626, "grad_norm": 0.3226297589137864, "learning_rate": 1.861331493618489e-05, "loss": 0.5557, "step": 524 }, { "epoch": 0.4888268156424581, "grad_norm": 0.37430134741654375, "learning_rate": 1.8606416005519147e-05, "loss": 0.5586, "step": 525 }, { "epoch": 0.4897579143389199, "grad_norm": 0.32597450024377217, "learning_rate": 1.85995170748534e-05, "loss": 0.5373, "step": 526 }, { "epoch": 0.49068901303538176, "grad_norm": 0.3683888791423687, "learning_rate": 1.8592618144187652e-05, "loss": 0.5703, "step": 527 }, { "epoch": 0.49162011173184356, "grad_norm": 0.4113082724898327, "learning_rate": 1.8585719213521908e-05, "loss": 0.5747, "step": 528 }, { "epoch": 0.4925512104283054, "grad_norm": 0.3630040411245967, "learning_rate": 1.8578820282856157e-05, "loss": 0.5171, "step": 529 }, { "epoch": 0.4934823091247672, "grad_norm": 0.35350419257578175, "learning_rate": 1.8571921352190413e-05, "loss": 0.529, "step": 530 }, { "epoch": 0.49441340782122906, "grad_norm": 0.3930483432356105, "learning_rate": 1.8565022421524665e-05, "loss": 0.5463, "step": 531 }, { "epoch": 0.49534450651769085, "grad_norm": 0.3582069883027661, "learning_rate": 1.855812349085892e-05, "loss": 0.5504, "step": 532 }, { "epoch": 0.4962756052141527, "grad_norm": 0.3501226365802424, "learning_rate": 1.855122456019317e-05, "loss": 0.5466, "step": 533 }, { "epoch": 0.4972067039106145, "grad_norm": 0.436790141407754, "learning_rate": 1.8544325629527426e-05, "loss": 0.555, "step": 534 }, { "epoch": 0.49813780260707635, "grad_norm": 0.3423628861258602, "learning_rate": 1.8537426698861678e-05, "loss": 0.5652, "step": 535 }, { "epoch": 0.49906890130353815, "grad_norm": 0.4270239373574108, "learning_rate": 1.853052776819593e-05, "loss": 0.5615, "step": 536 }, { "epoch": 0.5, "grad_norm": 0.3762629238187064, "learning_rate": 1.8523628837530186e-05, "loss": 0.5488, "step": 537 }, { "epoch": 0.5009310986964618, "grad_norm": 0.42099933025995356, "learning_rate": 1.8516729906864435e-05, "loss": 0.5333, "step": 538 }, { "epoch": 0.5018621973929237, "grad_norm": 0.4067442142246971, "learning_rate": 1.850983097619869e-05, "loss": 0.5875, "step": 539 }, { "epoch": 0.5027932960893855, "grad_norm": 0.42596648933285014, "learning_rate": 1.8502932045532943e-05, "loss": 0.5522, "step": 540 }, { "epoch": 0.5037243947858473, "grad_norm": 0.3808593325461599, "learning_rate": 1.84960331148672e-05, "loss": 0.5498, "step": 541 }, { "epoch": 0.5046554934823091, "grad_norm": 0.4363934962607882, "learning_rate": 1.8489134184201448e-05, "loss": 0.5595, "step": 542 }, { "epoch": 0.505586592178771, "grad_norm": 0.39999515591545187, "learning_rate": 1.8482235253535704e-05, "loss": 0.5467, "step": 543 }, { "epoch": 0.5065176908752328, "grad_norm": 0.43983922195986164, "learning_rate": 1.8475336322869956e-05, "loss": 0.5478, "step": 544 }, { "epoch": 0.5074487895716946, "grad_norm": 0.3883318950967464, "learning_rate": 1.846843739220421e-05, "loss": 0.5295, "step": 545 }, { "epoch": 0.5083798882681564, "grad_norm": 0.35217064092050854, "learning_rate": 1.8461538461538465e-05, "loss": 0.5742, "step": 546 }, { "epoch": 0.5093109869646183, "grad_norm": 0.45897755805326984, "learning_rate": 1.8454639530872717e-05, "loss": 0.5473, "step": 547 }, { "epoch": 0.5102420856610801, "grad_norm": 0.3362829018585025, "learning_rate": 1.844774060020697e-05, "loss": 0.5565, "step": 548 }, { "epoch": 0.5111731843575419, "grad_norm": 0.3947425487477197, "learning_rate": 1.8440841669541222e-05, "loss": 0.5576, "step": 549 }, { "epoch": 0.5121042830540037, "grad_norm": 0.41067778826683693, "learning_rate": 1.8433942738875478e-05, "loss": 0.5723, "step": 550 }, { "epoch": 0.5130353817504656, "grad_norm": 0.3350898912227013, "learning_rate": 1.8427043808209727e-05, "loss": 0.5277, "step": 551 }, { "epoch": 0.5139664804469274, "grad_norm": 0.42606723055504714, "learning_rate": 1.8420144877543982e-05, "loss": 0.5661, "step": 552 }, { "epoch": 0.5148975791433892, "grad_norm": 0.36733886905890656, "learning_rate": 1.8413245946878235e-05, "loss": 0.5714, "step": 553 }, { "epoch": 0.515828677839851, "grad_norm": 0.3823615779131506, "learning_rate": 1.840634701621249e-05, "loss": 0.5465, "step": 554 }, { "epoch": 0.5167597765363129, "grad_norm": 0.36831107045192496, "learning_rate": 1.8399448085546743e-05, "loss": 0.5244, "step": 555 }, { "epoch": 0.5176908752327747, "grad_norm": 0.3313748382367959, "learning_rate": 1.8392549154880995e-05, "loss": 0.5421, "step": 556 }, { "epoch": 0.5186219739292365, "grad_norm": 0.37343425675629927, "learning_rate": 1.8385650224215248e-05, "loss": 0.5666, "step": 557 }, { "epoch": 0.5195530726256983, "grad_norm": 0.36865098116661155, "learning_rate": 1.83787512935495e-05, "loss": 0.5551, "step": 558 }, { "epoch": 0.5204841713221602, "grad_norm": 0.3753106101106366, "learning_rate": 1.8371852362883756e-05, "loss": 0.548, "step": 559 }, { "epoch": 0.521415270018622, "grad_norm": 0.4054905293785059, "learning_rate": 1.8364953432218005e-05, "loss": 0.5553, "step": 560 }, { "epoch": 0.5223463687150838, "grad_norm": 0.4316822265578647, "learning_rate": 1.835805450155226e-05, "loss": 0.5783, "step": 561 }, { "epoch": 0.5232774674115456, "grad_norm": 0.4270953683676223, "learning_rate": 1.8351155570886513e-05, "loss": 0.5591, "step": 562 }, { "epoch": 0.5242085661080075, "grad_norm": 0.35490821682919355, "learning_rate": 1.834425664022077e-05, "loss": 0.5553, "step": 563 }, { "epoch": 0.5251396648044693, "grad_norm": 0.46788429157973044, "learning_rate": 1.833735770955502e-05, "loss": 0.5247, "step": 564 }, { "epoch": 0.5260707635009311, "grad_norm": 0.3940455485209773, "learning_rate": 1.8330458778889274e-05, "loss": 0.5334, "step": 565 }, { "epoch": 0.527001862197393, "grad_norm": 0.4043565480083852, "learning_rate": 1.8323559848223526e-05, "loss": 0.5557, "step": 566 }, { "epoch": 0.5279329608938548, "grad_norm": 0.4163732650331135, "learning_rate": 1.831666091755778e-05, "loss": 0.5342, "step": 567 }, { "epoch": 0.5288640595903166, "grad_norm": 0.3765756079466702, "learning_rate": 1.8309761986892035e-05, "loss": 0.5436, "step": 568 }, { "epoch": 0.5297951582867784, "grad_norm": 0.39426008269836343, "learning_rate": 1.8302863056226287e-05, "loss": 0.5624, "step": 569 }, { "epoch": 0.5307262569832403, "grad_norm": 0.36709642959149025, "learning_rate": 1.829596412556054e-05, "loss": 0.547, "step": 570 }, { "epoch": 0.5316573556797021, "grad_norm": 0.36522489481660336, "learning_rate": 1.8289065194894792e-05, "loss": 0.5467, "step": 571 }, { "epoch": 0.5325884543761639, "grad_norm": 0.3395483721677383, "learning_rate": 1.8282166264229048e-05, "loss": 0.5317, "step": 572 }, { "epoch": 0.5335195530726257, "grad_norm": 0.3760102061351917, "learning_rate": 1.82752673335633e-05, "loss": 0.5529, "step": 573 }, { "epoch": 0.5344506517690876, "grad_norm": 0.3593540533018182, "learning_rate": 1.8268368402897552e-05, "loss": 0.5456, "step": 574 }, { "epoch": 0.5353817504655494, "grad_norm": 0.3489443748885651, "learning_rate": 1.8261469472231805e-05, "loss": 0.5559, "step": 575 }, { "epoch": 0.5363128491620112, "grad_norm": 0.3721069670103184, "learning_rate": 1.825457054156606e-05, "loss": 0.5223, "step": 576 }, { "epoch": 0.537243947858473, "grad_norm": 0.34715187314300827, "learning_rate": 1.8247671610900313e-05, "loss": 0.5498, "step": 577 }, { "epoch": 0.5381750465549349, "grad_norm": 0.3746874294938734, "learning_rate": 1.8240772680234565e-05, "loss": 0.5483, "step": 578 }, { "epoch": 0.5391061452513967, "grad_norm": 0.3677809181515882, "learning_rate": 1.8233873749568818e-05, "loss": 0.5775, "step": 579 }, { "epoch": 0.5400372439478585, "grad_norm": 0.35767863499948194, "learning_rate": 1.822697481890307e-05, "loss": 0.5458, "step": 580 }, { "epoch": 0.5409683426443203, "grad_norm": 0.3498518060833482, "learning_rate": 1.8220075888237326e-05, "loss": 0.5566, "step": 581 }, { "epoch": 0.5418994413407822, "grad_norm": 0.3311429059892, "learning_rate": 1.821317695757158e-05, "loss": 0.5284, "step": 582 }, { "epoch": 0.542830540037244, "grad_norm": 0.35423804682937476, "learning_rate": 1.820627802690583e-05, "loss": 0.5359, "step": 583 }, { "epoch": 0.5437616387337058, "grad_norm": 0.3721964023605943, "learning_rate": 1.8199379096240083e-05, "loss": 0.5579, "step": 584 }, { "epoch": 0.5446927374301676, "grad_norm": 0.3458268201294874, "learning_rate": 1.819248016557434e-05, "loss": 0.5304, "step": 585 }, { "epoch": 0.5456238361266295, "grad_norm": 0.39515934580462764, "learning_rate": 1.818558123490859e-05, "loss": 0.536, "step": 586 }, { "epoch": 0.5465549348230913, "grad_norm": 0.33211602653364447, "learning_rate": 1.8178682304242844e-05, "loss": 0.5671, "step": 587 }, { "epoch": 0.547486033519553, "grad_norm": 0.4021702172103218, "learning_rate": 1.8171783373577096e-05, "loss": 0.5557, "step": 588 }, { "epoch": 0.5484171322160148, "grad_norm": 0.3792590797479294, "learning_rate": 1.816488444291135e-05, "loss": 0.5735, "step": 589 }, { "epoch": 0.5493482309124768, "grad_norm": 0.3511217466583204, "learning_rate": 1.8157985512245604e-05, "loss": 0.5722, "step": 590 }, { "epoch": 0.5502793296089385, "grad_norm": 0.3440236746577789, "learning_rate": 1.8151086581579857e-05, "loss": 0.5236, "step": 591 }, { "epoch": 0.5512104283054003, "grad_norm": 0.4202676031510148, "learning_rate": 1.814418765091411e-05, "loss": 0.565, "step": 592 }, { "epoch": 0.5521415270018621, "grad_norm": 0.3992257962248753, "learning_rate": 1.813728872024836e-05, "loss": 0.5463, "step": 593 }, { "epoch": 0.553072625698324, "grad_norm": 0.3548344712945117, "learning_rate": 1.8130389789582617e-05, "loss": 0.5711, "step": 594 }, { "epoch": 0.5540037243947858, "grad_norm": 0.36120050635033096, "learning_rate": 1.812349085891687e-05, "loss": 0.5468, "step": 595 }, { "epoch": 0.5549348230912476, "grad_norm": 0.4221595269324289, "learning_rate": 1.8116591928251122e-05, "loss": 0.5646, "step": 596 }, { "epoch": 0.5558659217877095, "grad_norm": 0.35521536679226506, "learning_rate": 1.8109692997585375e-05, "loss": 0.5519, "step": 597 }, { "epoch": 0.5567970204841713, "grad_norm": 0.3986236961980052, "learning_rate": 1.810279406691963e-05, "loss": 0.5625, "step": 598 }, { "epoch": 0.5577281191806331, "grad_norm": 0.38328760914929927, "learning_rate": 1.8095895136253883e-05, "loss": 0.5475, "step": 599 }, { "epoch": 0.5586592178770949, "grad_norm": 0.41854878497355635, "learning_rate": 1.8088996205588135e-05, "loss": 0.5584, "step": 600 }, { "epoch": 0.5595903165735568, "grad_norm": 0.34693445454183053, "learning_rate": 1.8082097274922388e-05, "loss": 0.5231, "step": 601 }, { "epoch": 0.5605214152700186, "grad_norm": 0.3343345149269928, "learning_rate": 1.807519834425664e-05, "loss": 0.5309, "step": 602 }, { "epoch": 0.5614525139664804, "grad_norm": 0.3940503122301207, "learning_rate": 1.8068299413590896e-05, "loss": 0.523, "step": 603 }, { "epoch": 0.5623836126629422, "grad_norm": 0.31656970611430546, "learning_rate": 1.8061400482925148e-05, "loss": 0.5295, "step": 604 }, { "epoch": 0.5633147113594041, "grad_norm": 0.34899653939939906, "learning_rate": 1.80545015522594e-05, "loss": 0.5275, "step": 605 }, { "epoch": 0.5642458100558659, "grad_norm": 0.3425455757721616, "learning_rate": 1.8047602621593653e-05, "loss": 0.5168, "step": 606 }, { "epoch": 0.5651769087523277, "grad_norm": 0.37914386753321966, "learning_rate": 1.804070369092791e-05, "loss": 0.5581, "step": 607 }, { "epoch": 0.5661080074487895, "grad_norm": 0.33511620146655696, "learning_rate": 1.803380476026216e-05, "loss": 0.5211, "step": 608 }, { "epoch": 0.5670391061452514, "grad_norm": 0.4142018682140536, "learning_rate": 1.8026905829596414e-05, "loss": 0.5831, "step": 609 }, { "epoch": 0.5679702048417132, "grad_norm": 0.3903820612899023, "learning_rate": 1.8020006898930666e-05, "loss": 0.5621, "step": 610 }, { "epoch": 0.568901303538175, "grad_norm": 0.33810187982940765, "learning_rate": 1.801310796826492e-05, "loss": 0.5627, "step": 611 }, { "epoch": 0.5698324022346368, "grad_norm": 0.3921099932617947, "learning_rate": 1.8006209037599174e-05, "loss": 0.5386, "step": 612 }, { "epoch": 0.5707635009310987, "grad_norm": 0.3623918957375319, "learning_rate": 1.7999310106933427e-05, "loss": 0.5267, "step": 613 }, { "epoch": 0.5716945996275605, "grad_norm": 0.3815724587788794, "learning_rate": 1.799241117626768e-05, "loss": 0.5461, "step": 614 }, { "epoch": 0.5726256983240223, "grad_norm": 0.36455682113763155, "learning_rate": 1.798551224560193e-05, "loss": 0.5269, "step": 615 }, { "epoch": 0.5735567970204841, "grad_norm": 0.39067777102765566, "learning_rate": 1.7978613314936187e-05, "loss": 0.5499, "step": 616 }, { "epoch": 0.574487895716946, "grad_norm": 0.34858094717387356, "learning_rate": 1.797171438427044e-05, "loss": 0.5448, "step": 617 }, { "epoch": 0.5754189944134078, "grad_norm": 0.3454869257535463, "learning_rate": 1.7964815453604692e-05, "loss": 0.5407, "step": 618 }, { "epoch": 0.5763500931098696, "grad_norm": 0.3617397770128774, "learning_rate": 1.7957916522938945e-05, "loss": 0.535, "step": 619 }, { "epoch": 0.5772811918063314, "grad_norm": 0.3707985883879738, "learning_rate": 1.79510175922732e-05, "loss": 0.5446, "step": 620 }, { "epoch": 0.5782122905027933, "grad_norm": 0.3227412884630603, "learning_rate": 1.7944118661607453e-05, "loss": 0.5726, "step": 621 }, { "epoch": 0.5791433891992551, "grad_norm": 0.37376844035433265, "learning_rate": 1.7937219730941705e-05, "loss": 0.5535, "step": 622 }, { "epoch": 0.5800744878957169, "grad_norm": 0.34257664434518437, "learning_rate": 1.7930320800275958e-05, "loss": 0.5293, "step": 623 }, { "epoch": 0.5810055865921788, "grad_norm": 0.31986380288195015, "learning_rate": 1.792342186961021e-05, "loss": 0.5344, "step": 624 }, { "epoch": 0.5819366852886406, "grad_norm": 0.3826803089533355, "learning_rate": 1.7916522938944466e-05, "loss": 0.5862, "step": 625 }, { "epoch": 0.5828677839851024, "grad_norm": 0.33554575614855203, "learning_rate": 1.7909624008278718e-05, "loss": 0.5353, "step": 626 }, { "epoch": 0.5837988826815642, "grad_norm": 0.3402087486178513, "learning_rate": 1.7902725077612974e-05, "loss": 0.5359, "step": 627 }, { "epoch": 0.5847299813780261, "grad_norm": 0.3625730792705765, "learning_rate": 1.7895826146947223e-05, "loss": 0.5581, "step": 628 }, { "epoch": 0.5856610800744879, "grad_norm": 0.326410684911989, "learning_rate": 1.788892721628148e-05, "loss": 0.5344, "step": 629 }, { "epoch": 0.5865921787709497, "grad_norm": 0.33916433401137425, "learning_rate": 1.788202828561573e-05, "loss": 0.5337, "step": 630 }, { "epoch": 0.5875232774674115, "grad_norm": 0.32305403486254247, "learning_rate": 1.7875129354949984e-05, "loss": 0.5243, "step": 631 }, { "epoch": 0.5884543761638734, "grad_norm": 0.32734906298741373, "learning_rate": 1.7868230424284236e-05, "loss": 0.5653, "step": 632 }, { "epoch": 0.5893854748603352, "grad_norm": 0.3853640753383497, "learning_rate": 1.786133149361849e-05, "loss": 0.5211, "step": 633 }, { "epoch": 0.590316573556797, "grad_norm": 0.3213296874408562, "learning_rate": 1.7854432562952744e-05, "loss": 0.5779, "step": 634 }, { "epoch": 0.5912476722532588, "grad_norm": 0.34702474176305215, "learning_rate": 1.7847533632286997e-05, "loss": 0.504, "step": 635 }, { "epoch": 0.5921787709497207, "grad_norm": 0.3992332825997294, "learning_rate": 1.7840634701621252e-05, "loss": 0.5601, "step": 636 }, { "epoch": 0.5931098696461825, "grad_norm": 0.33923613117776485, "learning_rate": 1.78337357709555e-05, "loss": 0.528, "step": 637 }, { "epoch": 0.5940409683426443, "grad_norm": 0.3756437986015526, "learning_rate": 1.7826836840289757e-05, "loss": 0.5559, "step": 638 }, { "epoch": 0.5949720670391061, "grad_norm": 0.3327010210455596, "learning_rate": 1.781993790962401e-05, "loss": 0.5289, "step": 639 }, { "epoch": 0.595903165735568, "grad_norm": 0.3541595930127993, "learning_rate": 1.7813038978958262e-05, "loss": 0.5541, "step": 640 }, { "epoch": 0.5968342644320298, "grad_norm": 0.34881211671118906, "learning_rate": 1.7806140048292514e-05, "loss": 0.5495, "step": 641 }, { "epoch": 0.5977653631284916, "grad_norm": 0.3635114644398324, "learning_rate": 1.779924111762677e-05, "loss": 0.5511, "step": 642 }, { "epoch": 0.5986964618249534, "grad_norm": 0.34771871218049255, "learning_rate": 1.7792342186961023e-05, "loss": 0.5462, "step": 643 }, { "epoch": 0.5996275605214153, "grad_norm": 0.3692544572935161, "learning_rate": 1.7785443256295275e-05, "loss": 0.5532, "step": 644 }, { "epoch": 0.6005586592178771, "grad_norm": 0.3425964900588, "learning_rate": 1.777854432562953e-05, "loss": 0.5313, "step": 645 }, { "epoch": 0.6014897579143389, "grad_norm": 0.3677020255406697, "learning_rate": 1.777164539496378e-05, "loss": 0.5455, "step": 646 }, { "epoch": 0.6024208566108007, "grad_norm": 0.40420324778052913, "learning_rate": 1.7764746464298036e-05, "loss": 0.5553, "step": 647 }, { "epoch": 0.6033519553072626, "grad_norm": 0.33446168455485803, "learning_rate": 1.7757847533632288e-05, "loss": 0.5498, "step": 648 }, { "epoch": 0.6042830540037244, "grad_norm": 0.3501899917063689, "learning_rate": 1.7750948602966544e-05, "loss": 0.5242, "step": 649 }, { "epoch": 0.6052141527001862, "grad_norm": 0.3933232338173743, "learning_rate": 1.7744049672300793e-05, "loss": 0.5365, "step": 650 }, { "epoch": 0.6061452513966481, "grad_norm": 0.3563070537471471, "learning_rate": 1.773715074163505e-05, "loss": 0.5325, "step": 651 }, { "epoch": 0.6070763500931099, "grad_norm": 0.3524758990723731, "learning_rate": 1.77302518109693e-05, "loss": 0.5289, "step": 652 }, { "epoch": 0.6080074487895717, "grad_norm": 0.38570065389194486, "learning_rate": 1.7723352880303553e-05, "loss": 0.5695, "step": 653 }, { "epoch": 0.6089385474860335, "grad_norm": 0.370455286054903, "learning_rate": 1.771645394963781e-05, "loss": 0.557, "step": 654 }, { "epoch": 0.6098696461824954, "grad_norm": 0.3524223561333024, "learning_rate": 1.770955501897206e-05, "loss": 0.5528, "step": 655 }, { "epoch": 0.6108007448789572, "grad_norm": 0.343904113361161, "learning_rate": 1.7702656088306314e-05, "loss": 0.5546, "step": 656 }, { "epoch": 0.611731843575419, "grad_norm": 0.41490543575882194, "learning_rate": 1.7695757157640567e-05, "loss": 0.5627, "step": 657 }, { "epoch": 0.6126629422718808, "grad_norm": 0.3318563246293762, "learning_rate": 1.7688858226974822e-05, "loss": 0.5239, "step": 658 }, { "epoch": 0.6135940409683427, "grad_norm": 0.3378744285656808, "learning_rate": 1.768195929630907e-05, "loss": 0.5839, "step": 659 }, { "epoch": 0.6145251396648045, "grad_norm": 0.37221576738101825, "learning_rate": 1.7675060365643327e-05, "loss": 0.5291, "step": 660 }, { "epoch": 0.6154562383612663, "grad_norm": 0.39412476349249703, "learning_rate": 1.766816143497758e-05, "loss": 0.5799, "step": 661 }, { "epoch": 0.6163873370577281, "grad_norm": 0.3441981428337193, "learning_rate": 1.7661262504311832e-05, "loss": 0.5457, "step": 662 }, { "epoch": 0.61731843575419, "grad_norm": 0.33121388358237114, "learning_rate": 1.7654363573646088e-05, "loss": 0.5144, "step": 663 }, { "epoch": 0.6182495344506518, "grad_norm": 0.38675517250562524, "learning_rate": 1.764746464298034e-05, "loss": 0.5273, "step": 664 }, { "epoch": 0.6191806331471136, "grad_norm": 0.3328164945507559, "learning_rate": 1.7640565712314593e-05, "loss": 0.5492, "step": 665 }, { "epoch": 0.6201117318435754, "grad_norm": 0.3662142698595696, "learning_rate": 1.7633666781648845e-05, "loss": 0.5617, "step": 666 }, { "epoch": 0.6210428305400373, "grad_norm": 0.3520129929062195, "learning_rate": 1.76267678509831e-05, "loss": 0.5318, "step": 667 }, { "epoch": 0.6219739292364991, "grad_norm": 0.3553900041492212, "learning_rate": 1.761986892031735e-05, "loss": 0.5459, "step": 668 }, { "epoch": 0.6229050279329609, "grad_norm": 0.3779647202595698, "learning_rate": 1.7612969989651606e-05, "loss": 0.5369, "step": 669 }, { "epoch": 0.6238361266294227, "grad_norm": 0.3783677322360891, "learning_rate": 1.7606071058985858e-05, "loss": 0.5624, "step": 670 }, { "epoch": 0.6247672253258846, "grad_norm": 0.3765549998561985, "learning_rate": 1.7599172128320114e-05, "loss": 0.5327, "step": 671 }, { "epoch": 0.6256983240223464, "grad_norm": 0.3962763697748269, "learning_rate": 1.7592273197654366e-05, "loss": 0.5399, "step": 672 }, { "epoch": 0.6266294227188082, "grad_norm": 0.3830663574257988, "learning_rate": 1.758537426698862e-05, "loss": 0.5415, "step": 673 }, { "epoch": 0.62756052141527, "grad_norm": 0.3992113054963334, "learning_rate": 1.757847533632287e-05, "loss": 0.5416, "step": 674 }, { "epoch": 0.6284916201117319, "grad_norm": 0.4033111839405797, "learning_rate": 1.7571576405657123e-05, "loss": 0.531, "step": 675 }, { "epoch": 0.6294227188081937, "grad_norm": 0.38135886973841265, "learning_rate": 1.756467747499138e-05, "loss": 0.5583, "step": 676 }, { "epoch": 0.6303538175046555, "grad_norm": 0.4016807114531726, "learning_rate": 1.7557778544325628e-05, "loss": 0.5306, "step": 677 }, { "epoch": 0.6312849162011173, "grad_norm": 0.3378744088644672, "learning_rate": 1.7550879613659884e-05, "loss": 0.525, "step": 678 }, { "epoch": 0.6322160148975792, "grad_norm": 0.3928325016168966, "learning_rate": 1.7543980682994136e-05, "loss": 0.5425, "step": 679 }, { "epoch": 0.633147113594041, "grad_norm": 0.36564903221554085, "learning_rate": 1.7537081752328392e-05, "loss": 0.5539, "step": 680 }, { "epoch": 0.6340782122905028, "grad_norm": 0.33781452211686824, "learning_rate": 1.7530182821662645e-05, "loss": 0.5535, "step": 681 }, { "epoch": 0.6350093109869647, "grad_norm": 0.4000155772975672, "learning_rate": 1.7523283890996897e-05, "loss": 0.545, "step": 682 }, { "epoch": 0.6359404096834265, "grad_norm": 0.38505515720252675, "learning_rate": 1.751638496033115e-05, "loss": 0.5678, "step": 683 }, { "epoch": 0.6368715083798883, "grad_norm": 0.3613879863541263, "learning_rate": 1.7509486029665402e-05, "loss": 0.5638, "step": 684 }, { "epoch": 0.6378026070763501, "grad_norm": 0.3593555448609962, "learning_rate": 1.7502587098999658e-05, "loss": 0.5526, "step": 685 }, { "epoch": 0.638733705772812, "grad_norm": 0.3941283397404392, "learning_rate": 1.749568816833391e-05, "loss": 0.529, "step": 686 }, { "epoch": 0.6396648044692738, "grad_norm": 0.37155879738648595, "learning_rate": 1.7488789237668162e-05, "loss": 0.5568, "step": 687 }, { "epoch": 0.6405959031657356, "grad_norm": 0.3883643387350072, "learning_rate": 1.7481890307002415e-05, "loss": 0.526, "step": 688 }, { "epoch": 0.6415270018621974, "grad_norm": 0.3473178777367979, "learning_rate": 1.747499137633667e-05, "loss": 0.5504, "step": 689 }, { "epoch": 0.6424581005586593, "grad_norm": 0.42088697870474834, "learning_rate": 1.7468092445670923e-05, "loss": 0.5518, "step": 690 }, { "epoch": 0.6433891992551211, "grad_norm": 0.37703438821681706, "learning_rate": 1.7461193515005175e-05, "loss": 0.5477, "step": 691 }, { "epoch": 0.6443202979515829, "grad_norm": 0.41061306959160193, "learning_rate": 1.7454294584339428e-05, "loss": 0.5648, "step": 692 }, { "epoch": 0.6452513966480447, "grad_norm": 0.3427458652992462, "learning_rate": 1.7447395653673684e-05, "loss": 0.5361, "step": 693 }, { "epoch": 0.6461824953445066, "grad_norm": 0.3556285907149381, "learning_rate": 1.7440496723007936e-05, "loss": 0.5413, "step": 694 }, { "epoch": 0.6471135940409684, "grad_norm": 0.36556192992055453, "learning_rate": 1.743359779234219e-05, "loss": 0.5247, "step": 695 }, { "epoch": 0.6480446927374302, "grad_norm": 0.38931217266551205, "learning_rate": 1.742669886167644e-05, "loss": 0.5564, "step": 696 }, { "epoch": 0.648975791433892, "grad_norm": 0.3761724372604538, "learning_rate": 1.7419799931010693e-05, "loss": 0.5491, "step": 697 }, { "epoch": 0.6499068901303539, "grad_norm": 0.41716701923007543, "learning_rate": 1.741290100034495e-05, "loss": 0.557, "step": 698 }, { "epoch": 0.6508379888268156, "grad_norm": 0.39899193657230186, "learning_rate": 1.74060020696792e-05, "loss": 0.5223, "step": 699 }, { "epoch": 0.6517690875232774, "grad_norm": 0.3678194548756233, "learning_rate": 1.7399103139013454e-05, "loss": 0.5126, "step": 700 }, { "epoch": 0.6527001862197392, "grad_norm": 0.3912619998925302, "learning_rate": 1.7392204208347706e-05, "loss": 0.5567, "step": 701 }, { "epoch": 0.6536312849162011, "grad_norm": 0.3640664037895845, "learning_rate": 1.7385305277681962e-05, "loss": 0.5369, "step": 702 }, { "epoch": 0.654562383612663, "grad_norm": 0.34187497068482287, "learning_rate": 1.7378406347016215e-05, "loss": 0.5263, "step": 703 }, { "epoch": 0.6554934823091247, "grad_norm": 0.3223765237004805, "learning_rate": 1.7371507416350467e-05, "loss": 0.5231, "step": 704 }, { "epoch": 0.6564245810055865, "grad_norm": 0.30944201585551057, "learning_rate": 1.736460848568472e-05, "loss": 0.5353, "step": 705 }, { "epoch": 0.6573556797020484, "grad_norm": 0.37568370442010457, "learning_rate": 1.7357709555018972e-05, "loss": 0.562, "step": 706 }, { "epoch": 0.6582867783985102, "grad_norm": 0.32548330367600187, "learning_rate": 1.7350810624353228e-05, "loss": 0.5395, "step": 707 }, { "epoch": 0.659217877094972, "grad_norm": 0.3238793970650222, "learning_rate": 1.734391169368748e-05, "loss": 0.5503, "step": 708 }, { "epoch": 0.660148975791434, "grad_norm": 0.31929942373098263, "learning_rate": 1.7337012763021732e-05, "loss": 0.5259, "step": 709 }, { "epoch": 0.6610800744878957, "grad_norm": 0.34062046439341187, "learning_rate": 1.7330113832355985e-05, "loss": 0.5671, "step": 710 }, { "epoch": 0.6620111731843575, "grad_norm": 0.32686657225577054, "learning_rate": 1.732321490169024e-05, "loss": 0.5438, "step": 711 }, { "epoch": 0.6629422718808193, "grad_norm": 0.34345048306655573, "learning_rate": 1.7316315971024493e-05, "loss": 0.5318, "step": 712 }, { "epoch": 0.6638733705772812, "grad_norm": 0.34063425934135994, "learning_rate": 1.7309417040358745e-05, "loss": 0.5658, "step": 713 }, { "epoch": 0.664804469273743, "grad_norm": 0.3031504974949563, "learning_rate": 1.7302518109692998e-05, "loss": 0.5217, "step": 714 }, { "epoch": 0.6657355679702048, "grad_norm": 0.35658483546398234, "learning_rate": 1.7295619179027254e-05, "loss": 0.5539, "step": 715 }, { "epoch": 0.6666666666666666, "grad_norm": 0.3266216673483327, "learning_rate": 1.7288720248361506e-05, "loss": 0.5397, "step": 716 }, { "epoch": 0.6675977653631285, "grad_norm": 0.32674873993696735, "learning_rate": 1.728182131769576e-05, "loss": 0.5281, "step": 717 }, { "epoch": 0.6685288640595903, "grad_norm": 0.3601908883048842, "learning_rate": 1.727492238703001e-05, "loss": 0.5478, "step": 718 }, { "epoch": 0.6694599627560521, "grad_norm": 0.35788336846085306, "learning_rate": 1.7268023456364263e-05, "loss": 0.5281, "step": 719 }, { "epoch": 0.6703910614525139, "grad_norm": 0.3343787435940339, "learning_rate": 1.726112452569852e-05, "loss": 0.5224, "step": 720 }, { "epoch": 0.6713221601489758, "grad_norm": 0.37613957387248953, "learning_rate": 1.725422559503277e-05, "loss": 0.5595, "step": 721 }, { "epoch": 0.6722532588454376, "grad_norm": 0.3158665823793518, "learning_rate": 1.7247326664367027e-05, "loss": 0.5237, "step": 722 }, { "epoch": 0.6731843575418994, "grad_norm": 0.3400676815039199, "learning_rate": 1.7240427733701276e-05, "loss": 0.5277, "step": 723 }, { "epoch": 0.6741154562383612, "grad_norm": 0.3054774818174772, "learning_rate": 1.7233528803035532e-05, "loss": 0.5332, "step": 724 }, { "epoch": 0.6750465549348231, "grad_norm": 0.39923489080724706, "learning_rate": 1.7226629872369784e-05, "loss": 0.5422, "step": 725 }, { "epoch": 0.6759776536312849, "grad_norm": 0.34559810199420204, "learning_rate": 1.7219730941704037e-05, "loss": 0.5167, "step": 726 }, { "epoch": 0.6769087523277467, "grad_norm": 0.3500891867136733, "learning_rate": 1.721283201103829e-05, "loss": 0.5489, "step": 727 }, { "epoch": 0.6778398510242085, "grad_norm": 0.36756153369488204, "learning_rate": 1.720593308037254e-05, "loss": 0.5662, "step": 728 }, { "epoch": 0.6787709497206704, "grad_norm": 0.3339896835523579, "learning_rate": 1.7199034149706797e-05, "loss": 0.5439, "step": 729 }, { "epoch": 0.6797020484171322, "grad_norm": 0.3611910694125025, "learning_rate": 1.719213521904105e-05, "loss": 0.5558, "step": 730 }, { "epoch": 0.680633147113594, "grad_norm": 0.335823947805116, "learning_rate": 1.7185236288375306e-05, "loss": 0.5168, "step": 731 }, { "epoch": 0.6815642458100558, "grad_norm": 0.33597914936013135, "learning_rate": 1.7178337357709555e-05, "loss": 0.5252, "step": 732 }, { "epoch": 0.6824953445065177, "grad_norm": 0.32398901696122634, "learning_rate": 1.717143842704381e-05, "loss": 0.5328, "step": 733 }, { "epoch": 0.6834264432029795, "grad_norm": 0.32525067909783456, "learning_rate": 1.7164539496378063e-05, "loss": 0.5508, "step": 734 }, { "epoch": 0.6843575418994413, "grad_norm": 0.31167101762512517, "learning_rate": 1.7157640565712315e-05, "loss": 0.5401, "step": 735 }, { "epoch": 0.6852886405959032, "grad_norm": 0.33263695825125233, "learning_rate": 1.7150741635046568e-05, "loss": 0.549, "step": 736 }, { "epoch": 0.686219739292365, "grad_norm": 0.3116480698446764, "learning_rate": 1.7143842704380823e-05, "loss": 0.5489, "step": 737 }, { "epoch": 0.6871508379888268, "grad_norm": 0.3157686589258359, "learning_rate": 1.7136943773715076e-05, "loss": 0.5326, "step": 738 }, { "epoch": 0.6880819366852886, "grad_norm": 0.3009830699639409, "learning_rate": 1.7130044843049328e-05, "loss": 0.5522, "step": 739 }, { "epoch": 0.6890130353817505, "grad_norm": 0.3131554531171315, "learning_rate": 1.7123145912383584e-05, "loss": 0.5182, "step": 740 }, { "epoch": 0.6899441340782123, "grad_norm": 0.3107042121429054, "learning_rate": 1.7116246981717833e-05, "loss": 0.5097, "step": 741 }, { "epoch": 0.6908752327746741, "grad_norm": 0.3218589129420941, "learning_rate": 1.710934805105209e-05, "loss": 0.5387, "step": 742 }, { "epoch": 0.6918063314711359, "grad_norm": 0.3084139573453371, "learning_rate": 1.710244912038634e-05, "loss": 0.5161, "step": 743 }, { "epoch": 0.6927374301675978, "grad_norm": 0.3716754003348254, "learning_rate": 1.7095550189720597e-05, "loss": 0.5365, "step": 744 }, { "epoch": 0.6936685288640596, "grad_norm": 0.34079386218762875, "learning_rate": 1.7088651259054846e-05, "loss": 0.5796, "step": 745 }, { "epoch": 0.6945996275605214, "grad_norm": 0.33350447945308787, "learning_rate": 1.7081752328389102e-05, "loss": 0.5293, "step": 746 }, { "epoch": 0.6955307262569832, "grad_norm": 0.3346880415907617, "learning_rate": 1.7074853397723354e-05, "loss": 0.5365, "step": 747 }, { "epoch": 0.6964618249534451, "grad_norm": 0.356454754416294, "learning_rate": 1.7067954467057607e-05, "loss": 0.5411, "step": 748 }, { "epoch": 0.6973929236499069, "grad_norm": 0.45642367064393785, "learning_rate": 1.7061055536391863e-05, "loss": 0.5362, "step": 749 }, { "epoch": 0.6983240223463687, "grad_norm": 0.3452311204642589, "learning_rate": 1.705415660572611e-05, "loss": 0.5456, "step": 750 }, { "epoch": 0.6992551210428305, "grad_norm": 0.3541827631309595, "learning_rate": 1.7047257675060367e-05, "loss": 0.5209, "step": 751 }, { "epoch": 0.7001862197392924, "grad_norm": 0.3186607848693412, "learning_rate": 1.704035874439462e-05, "loss": 0.5447, "step": 752 }, { "epoch": 0.7011173184357542, "grad_norm": 0.3349877330695743, "learning_rate": 1.7033459813728876e-05, "loss": 0.5375, "step": 753 }, { "epoch": 0.702048417132216, "grad_norm": 0.3403372366937618, "learning_rate": 1.7026560883063125e-05, "loss": 0.5316, "step": 754 }, { "epoch": 0.7029795158286778, "grad_norm": 0.3538757075576557, "learning_rate": 1.701966195239738e-05, "loss": 0.4984, "step": 755 }, { "epoch": 0.7039106145251397, "grad_norm": 0.3475410653822629, "learning_rate": 1.7012763021731633e-05, "loss": 0.5458, "step": 756 }, { "epoch": 0.7048417132216015, "grad_norm": 0.33245015439841397, "learning_rate": 1.7005864091065885e-05, "loss": 0.5479, "step": 757 }, { "epoch": 0.7057728119180633, "grad_norm": 0.3581827713967721, "learning_rate": 1.699896516040014e-05, "loss": 0.5448, "step": 758 }, { "epoch": 0.7067039106145251, "grad_norm": 0.3303110014122157, "learning_rate": 1.6992066229734393e-05, "loss": 0.5058, "step": 759 }, { "epoch": 0.707635009310987, "grad_norm": 0.34485785166260263, "learning_rate": 1.6985167299068646e-05, "loss": 0.505, "step": 760 }, { "epoch": 0.7085661080074488, "grad_norm": 0.3746087343690618, "learning_rate": 1.6978268368402898e-05, "loss": 0.5248, "step": 761 }, { "epoch": 0.7094972067039106, "grad_norm": 0.35613984997707504, "learning_rate": 1.6971369437737154e-05, "loss": 0.5571, "step": 762 }, { "epoch": 0.7104283054003724, "grad_norm": 0.36790759309596993, "learning_rate": 1.6964470507071403e-05, "loss": 0.5582, "step": 763 }, { "epoch": 0.7113594040968343, "grad_norm": 0.33208550836950873, "learning_rate": 1.695757157640566e-05, "loss": 0.5463, "step": 764 }, { "epoch": 0.7122905027932961, "grad_norm": 0.3348292192644929, "learning_rate": 1.695067264573991e-05, "loss": 0.5252, "step": 765 }, { "epoch": 0.7132216014897579, "grad_norm": 0.35953951203690016, "learning_rate": 1.6943773715074167e-05, "loss": 0.5113, "step": 766 }, { "epoch": 0.7141527001862198, "grad_norm": 0.3251990793373138, "learning_rate": 1.693687478440842e-05, "loss": 0.5631, "step": 767 }, { "epoch": 0.7150837988826816, "grad_norm": 0.34505592069762064, "learning_rate": 1.6929975853742672e-05, "loss": 0.5517, "step": 768 }, { "epoch": 0.7160148975791434, "grad_norm": 0.33660639607008264, "learning_rate": 1.6923076923076924e-05, "loss": 0.5075, "step": 769 }, { "epoch": 0.7169459962756052, "grad_norm": 0.37610288241493595, "learning_rate": 1.6916177992411177e-05, "loss": 0.5738, "step": 770 }, { "epoch": 0.7178770949720671, "grad_norm": 0.3735164526811979, "learning_rate": 1.6909279061745432e-05, "loss": 0.5462, "step": 771 }, { "epoch": 0.7188081936685289, "grad_norm": 0.3161355089711899, "learning_rate": 1.6902380131079685e-05, "loss": 0.5192, "step": 772 }, { "epoch": 0.7197392923649907, "grad_norm": 0.3829234818363106, "learning_rate": 1.6895481200413937e-05, "loss": 0.5204, "step": 773 }, { "epoch": 0.7206703910614525, "grad_norm": 0.31392835730450785, "learning_rate": 1.688858226974819e-05, "loss": 0.5183, "step": 774 }, { "epoch": 0.7216014897579144, "grad_norm": 0.43749070649390503, "learning_rate": 1.6881683339082445e-05, "loss": 0.5389, "step": 775 }, { "epoch": 0.7225325884543762, "grad_norm": 0.3968163620453453, "learning_rate": 1.6874784408416698e-05, "loss": 0.5686, "step": 776 }, { "epoch": 0.723463687150838, "grad_norm": 0.35723247435528777, "learning_rate": 1.686788547775095e-05, "loss": 0.5425, "step": 777 }, { "epoch": 0.7243947858472998, "grad_norm": 0.3985776207969194, "learning_rate": 1.6860986547085203e-05, "loss": 0.5451, "step": 778 }, { "epoch": 0.7253258845437617, "grad_norm": 0.32651702008547245, "learning_rate": 1.6854087616419455e-05, "loss": 0.5253, "step": 779 }, { "epoch": 0.7262569832402235, "grad_norm": 0.34776649524900943, "learning_rate": 1.684718868575371e-05, "loss": 0.5542, "step": 780 }, { "epoch": 0.7271880819366853, "grad_norm": 0.35704038659127824, "learning_rate": 1.6840289755087963e-05, "loss": 0.5713, "step": 781 }, { "epoch": 0.7281191806331471, "grad_norm": 0.3296795464532764, "learning_rate": 1.6833390824422216e-05, "loss": 0.5185, "step": 782 }, { "epoch": 0.729050279329609, "grad_norm": 0.3442913771431452, "learning_rate": 1.6826491893756468e-05, "loss": 0.5297, "step": 783 }, { "epoch": 0.7299813780260708, "grad_norm": 0.35772417407575463, "learning_rate": 1.6819592963090724e-05, "loss": 0.5399, "step": 784 }, { "epoch": 0.7309124767225326, "grad_norm": 0.3552764429011217, "learning_rate": 1.6812694032424976e-05, "loss": 0.5422, "step": 785 }, { "epoch": 0.7318435754189944, "grad_norm": 0.3683473593187075, "learning_rate": 1.680579510175923e-05, "loss": 0.542, "step": 786 }, { "epoch": 0.7327746741154563, "grad_norm": 0.38010448124997204, "learning_rate": 1.679889617109348e-05, "loss": 0.551, "step": 787 }, { "epoch": 0.7337057728119181, "grad_norm": 0.3719978482343093, "learning_rate": 1.6791997240427737e-05, "loss": 0.4983, "step": 788 }, { "epoch": 0.7346368715083799, "grad_norm": 0.38028693413404285, "learning_rate": 1.678509830976199e-05, "loss": 0.5558, "step": 789 }, { "epoch": 0.7355679702048417, "grad_norm": 0.32100841966316107, "learning_rate": 1.677819937909624e-05, "loss": 0.511, "step": 790 }, { "epoch": 0.7364990689013036, "grad_norm": 0.3455365778662038, "learning_rate": 1.6771300448430494e-05, "loss": 0.5551, "step": 791 }, { "epoch": 0.7374301675977654, "grad_norm": 0.3651455411173019, "learning_rate": 1.6764401517764747e-05, "loss": 0.5713, "step": 792 }, { "epoch": 0.7383612662942272, "grad_norm": 0.3716800296626704, "learning_rate": 1.6757502587099002e-05, "loss": 0.5258, "step": 793 }, { "epoch": 0.7392923649906891, "grad_norm": 0.3764262154809126, "learning_rate": 1.6750603656433255e-05, "loss": 0.5143, "step": 794 }, { "epoch": 0.7402234636871509, "grad_norm": 0.3305481909213889, "learning_rate": 1.6743704725767507e-05, "loss": 0.5223, "step": 795 }, { "epoch": 0.7411545623836127, "grad_norm": 0.3677852911926591, "learning_rate": 1.673680579510176e-05, "loss": 0.5173, "step": 796 }, { "epoch": 0.7420856610800745, "grad_norm": 0.3752606973555675, "learning_rate": 1.6729906864436015e-05, "loss": 0.526, "step": 797 }, { "epoch": 0.7430167597765364, "grad_norm": 0.3336984566286327, "learning_rate": 1.6723007933770268e-05, "loss": 0.5532, "step": 798 }, { "epoch": 0.7439478584729982, "grad_norm": 0.40194108251049976, "learning_rate": 1.671610900310452e-05, "loss": 0.5433, "step": 799 }, { "epoch": 0.74487895716946, "grad_norm": 0.35986233207661733, "learning_rate": 1.6709210072438773e-05, "loss": 0.5369, "step": 800 }, { "epoch": 0.7458100558659218, "grad_norm": 0.3708318657293492, "learning_rate": 1.6702311141773025e-05, "loss": 0.5718, "step": 801 }, { "epoch": 0.7467411545623837, "grad_norm": 0.3444450100592576, "learning_rate": 1.669541221110728e-05, "loss": 0.5616, "step": 802 }, { "epoch": 0.7476722532588455, "grad_norm": 0.35637784752336726, "learning_rate": 1.6688513280441533e-05, "loss": 0.5559, "step": 803 }, { "epoch": 0.7486033519553073, "grad_norm": 0.38398785289677334, "learning_rate": 1.6681614349775786e-05, "loss": 0.5443, "step": 804 }, { "epoch": 0.749534450651769, "grad_norm": 0.34651862479627416, "learning_rate": 1.6674715419110038e-05, "loss": 0.5252, "step": 805 }, { "epoch": 0.750465549348231, "grad_norm": 0.3481378172272939, "learning_rate": 1.6667816488444294e-05, "loss": 0.5309, "step": 806 }, { "epoch": 0.7513966480446927, "grad_norm": 0.427481237449562, "learning_rate": 1.6660917557778546e-05, "loss": 0.5281, "step": 807 }, { "epoch": 0.7523277467411545, "grad_norm": 0.3566703187472058, "learning_rate": 1.66540186271128e-05, "loss": 0.5397, "step": 808 }, { "epoch": 0.7532588454376163, "grad_norm": 0.38359830264619776, "learning_rate": 1.664711969644705e-05, "loss": 0.549, "step": 809 }, { "epoch": 0.7541899441340782, "grad_norm": 0.3513765048048889, "learning_rate": 1.6640220765781307e-05, "loss": 0.5257, "step": 810 }, { "epoch": 0.75512104283054, "grad_norm": 0.35336000400925577, "learning_rate": 1.663332183511556e-05, "loss": 0.5507, "step": 811 }, { "epoch": 0.7560521415270018, "grad_norm": 0.3823841599175915, "learning_rate": 1.662642290444981e-05, "loss": 0.535, "step": 812 }, { "epoch": 0.7569832402234636, "grad_norm": 0.36990772686705153, "learning_rate": 1.6619523973784064e-05, "loss": 0.537, "step": 813 }, { "epoch": 0.7579143389199255, "grad_norm": 0.32018446598424344, "learning_rate": 1.6612625043118316e-05, "loss": 0.5268, "step": 814 }, { "epoch": 0.7588454376163873, "grad_norm": 0.37975656958742515, "learning_rate": 1.6605726112452572e-05, "loss": 0.547, "step": 815 }, { "epoch": 0.7597765363128491, "grad_norm": 0.3198465701584887, "learning_rate": 1.6598827181786825e-05, "loss": 0.5239, "step": 816 }, { "epoch": 0.7607076350093109, "grad_norm": 0.36137609039401203, "learning_rate": 1.6591928251121077e-05, "loss": 0.5202, "step": 817 }, { "epoch": 0.7616387337057728, "grad_norm": 0.3593742407412463, "learning_rate": 1.658502932045533e-05, "loss": 0.5804, "step": 818 }, { "epoch": 0.7625698324022346, "grad_norm": 0.3773435823257264, "learning_rate": 1.6578130389789585e-05, "loss": 0.5481, "step": 819 }, { "epoch": 0.7635009310986964, "grad_norm": 0.3810817379399252, "learning_rate": 1.6571231459123838e-05, "loss": 0.5237, "step": 820 }, { "epoch": 0.7644320297951583, "grad_norm": 0.3994455973329335, "learning_rate": 1.656433252845809e-05, "loss": 0.5581, "step": 821 }, { "epoch": 0.7653631284916201, "grad_norm": 0.33619813096220597, "learning_rate": 1.6557433597792342e-05, "loss": 0.5062, "step": 822 }, { "epoch": 0.7662942271880819, "grad_norm": 0.39653908443738145, "learning_rate": 1.6550534667126595e-05, "loss": 0.5272, "step": 823 }, { "epoch": 0.7672253258845437, "grad_norm": 0.31300303121971074, "learning_rate": 1.654363573646085e-05, "loss": 0.5023, "step": 824 }, { "epoch": 0.7681564245810056, "grad_norm": 0.40621147640873784, "learning_rate": 1.6536736805795103e-05, "loss": 0.5272, "step": 825 }, { "epoch": 0.7690875232774674, "grad_norm": 0.3297019338618259, "learning_rate": 1.6529837875129355e-05, "loss": 0.5355, "step": 826 }, { "epoch": 0.7700186219739292, "grad_norm": 0.34280372839117695, "learning_rate": 1.6522938944463608e-05, "loss": 0.546, "step": 827 }, { "epoch": 0.770949720670391, "grad_norm": 0.3477641980549479, "learning_rate": 1.6516040013797864e-05, "loss": 0.5259, "step": 828 }, { "epoch": 0.7718808193668529, "grad_norm": 0.3577390564794027, "learning_rate": 1.6509141083132116e-05, "loss": 0.5375, "step": 829 }, { "epoch": 0.7728119180633147, "grad_norm": 0.3295156500444926, "learning_rate": 1.650224215246637e-05, "loss": 0.5233, "step": 830 }, { "epoch": 0.7737430167597765, "grad_norm": 0.39147285038294083, "learning_rate": 1.649534322180062e-05, "loss": 0.5515, "step": 831 }, { "epoch": 0.7746741154562383, "grad_norm": 0.3460357228564576, "learning_rate": 1.6488444291134877e-05, "loss": 0.5434, "step": 832 }, { "epoch": 0.7756052141527002, "grad_norm": 0.33240407112633463, "learning_rate": 1.648154536046913e-05, "loss": 0.5237, "step": 833 }, { "epoch": 0.776536312849162, "grad_norm": 0.36363520342592115, "learning_rate": 1.647464642980338e-05, "loss": 0.5536, "step": 834 }, { "epoch": 0.7774674115456238, "grad_norm": 0.33488197831379385, "learning_rate": 1.6467747499137634e-05, "loss": 0.5331, "step": 835 }, { "epoch": 0.7783985102420856, "grad_norm": 0.31766155569417187, "learning_rate": 1.6460848568471886e-05, "loss": 0.5228, "step": 836 }, { "epoch": 0.7793296089385475, "grad_norm": 0.3443522563052823, "learning_rate": 1.6453949637806142e-05, "loss": 0.5129, "step": 837 }, { "epoch": 0.7802607076350093, "grad_norm": 0.3355879618849527, "learning_rate": 1.6447050707140395e-05, "loss": 0.5449, "step": 838 }, { "epoch": 0.7811918063314711, "grad_norm": 0.3192063471552364, "learning_rate": 1.644015177647465e-05, "loss": 0.5196, "step": 839 }, { "epoch": 0.7821229050279329, "grad_norm": 0.34290269985101857, "learning_rate": 1.64332528458089e-05, "loss": 0.5246, "step": 840 }, { "epoch": 0.7830540037243948, "grad_norm": 0.3647507488928593, "learning_rate": 1.6426353915143155e-05, "loss": 0.5564, "step": 841 }, { "epoch": 0.7839851024208566, "grad_norm": 0.3618861387682594, "learning_rate": 1.6419454984477408e-05, "loss": 0.5078, "step": 842 }, { "epoch": 0.7849162011173184, "grad_norm": 0.3833734909704833, "learning_rate": 1.641255605381166e-05, "loss": 0.5492, "step": 843 }, { "epoch": 0.7858472998137802, "grad_norm": 0.35172900504734383, "learning_rate": 1.6405657123145912e-05, "loss": 0.5077, "step": 844 }, { "epoch": 0.7867783985102421, "grad_norm": 0.3672104197322266, "learning_rate": 1.6398758192480165e-05, "loss": 0.5392, "step": 845 }, { "epoch": 0.7877094972067039, "grad_norm": 0.3615823031436058, "learning_rate": 1.639185926181442e-05, "loss": 0.5643, "step": 846 }, { "epoch": 0.7886405959031657, "grad_norm": 0.3546810332854544, "learning_rate": 1.6384960331148673e-05, "loss": 0.5282, "step": 847 }, { "epoch": 0.7895716945996276, "grad_norm": 0.35738573091759435, "learning_rate": 1.637806140048293e-05, "loss": 0.5431, "step": 848 }, { "epoch": 0.7905027932960894, "grad_norm": 0.3349454207158209, "learning_rate": 1.6371162469817178e-05, "loss": 0.5211, "step": 849 }, { "epoch": 0.7914338919925512, "grad_norm": 0.37300375200878044, "learning_rate": 1.6364263539151434e-05, "loss": 0.5166, "step": 850 }, { "epoch": 0.792364990689013, "grad_norm": 0.3275068134232983, "learning_rate": 1.6357364608485686e-05, "loss": 0.5083, "step": 851 }, { "epoch": 0.7932960893854749, "grad_norm": 0.3471961397803821, "learning_rate": 1.635046567781994e-05, "loss": 0.5296, "step": 852 }, { "epoch": 0.7942271880819367, "grad_norm": 0.33869336196625155, "learning_rate": 1.634356674715419e-05, "loss": 0.4947, "step": 853 }, { "epoch": 0.7951582867783985, "grad_norm": 0.36921442352548445, "learning_rate": 1.6336667816488447e-05, "loss": 0.5612, "step": 854 }, { "epoch": 0.7960893854748603, "grad_norm": 0.3806602425506079, "learning_rate": 1.63297688858227e-05, "loss": 0.5272, "step": 855 }, { "epoch": 0.7970204841713222, "grad_norm": 0.3400398391998511, "learning_rate": 1.632286995515695e-05, "loss": 0.5217, "step": 856 }, { "epoch": 0.797951582867784, "grad_norm": 0.35158407934007735, "learning_rate": 1.6315971024491207e-05, "loss": 0.5183, "step": 857 }, { "epoch": 0.7988826815642458, "grad_norm": 0.3378051784543167, "learning_rate": 1.6309072093825456e-05, "loss": 0.5194, "step": 858 }, { "epoch": 0.7998137802607076, "grad_norm": 0.33826444327717925, "learning_rate": 1.6302173163159712e-05, "loss": 0.5432, "step": 859 }, { "epoch": 0.8007448789571695, "grad_norm": 0.3469184062111916, "learning_rate": 1.6295274232493964e-05, "loss": 0.5446, "step": 860 }, { "epoch": 0.8016759776536313, "grad_norm": 0.350757740078764, "learning_rate": 1.628837530182822e-05, "loss": 0.5506, "step": 861 }, { "epoch": 0.8026070763500931, "grad_norm": 0.3320041145922603, "learning_rate": 1.628147637116247e-05, "loss": 0.5319, "step": 862 }, { "epoch": 0.8035381750465549, "grad_norm": 0.3363754588056111, "learning_rate": 1.6274577440496725e-05, "loss": 0.5327, "step": 863 }, { "epoch": 0.8044692737430168, "grad_norm": 0.3335110199897388, "learning_rate": 1.6267678509830977e-05, "loss": 0.5504, "step": 864 }, { "epoch": 0.8054003724394786, "grad_norm": 0.35780904777336214, "learning_rate": 1.626077957916523e-05, "loss": 0.4915, "step": 865 }, { "epoch": 0.8063314711359404, "grad_norm": 0.3027281457499339, "learning_rate": 1.6253880648499486e-05, "loss": 0.5141, "step": 866 }, { "epoch": 0.8072625698324022, "grad_norm": 0.3250130189580202, "learning_rate": 1.6246981717833738e-05, "loss": 0.5381, "step": 867 }, { "epoch": 0.8081936685288641, "grad_norm": 0.3642416082420409, "learning_rate": 1.624008278716799e-05, "loss": 0.543, "step": 868 }, { "epoch": 0.8091247672253259, "grad_norm": 0.31853684161761225, "learning_rate": 1.6233183856502243e-05, "loss": 0.5032, "step": 869 }, { "epoch": 0.8100558659217877, "grad_norm": 0.3511162796096025, "learning_rate": 1.62262849258365e-05, "loss": 0.5207, "step": 870 }, { "epoch": 0.8109869646182495, "grad_norm": 0.3719761992016008, "learning_rate": 1.6219385995170748e-05, "loss": 0.5502, "step": 871 }, { "epoch": 0.8119180633147114, "grad_norm": 0.3235074537598124, "learning_rate": 1.6212487064505003e-05, "loss": 0.5253, "step": 872 }, { "epoch": 0.8128491620111732, "grad_norm": 0.390090525839355, "learning_rate": 1.6205588133839256e-05, "loss": 0.5678, "step": 873 }, { "epoch": 0.813780260707635, "grad_norm": 0.361827918740962, "learning_rate": 1.6198689203173508e-05, "loss": 0.5438, "step": 874 }, { "epoch": 0.8147113594040968, "grad_norm": 0.331095294065717, "learning_rate": 1.6191790272507764e-05, "loss": 0.504, "step": 875 }, { "epoch": 0.8156424581005587, "grad_norm": 0.36886587047192315, "learning_rate": 1.6184891341842016e-05, "loss": 0.5202, "step": 876 }, { "epoch": 0.8165735567970205, "grad_norm": 0.3292595800677433, "learning_rate": 1.617799241117627e-05, "loss": 0.5045, "step": 877 }, { "epoch": 0.8175046554934823, "grad_norm": 0.356962514718744, "learning_rate": 1.617109348051052e-05, "loss": 0.5318, "step": 878 }, { "epoch": 0.8184357541899442, "grad_norm": 0.36541070037645096, "learning_rate": 1.6164194549844777e-05, "loss": 0.5132, "step": 879 }, { "epoch": 0.819366852886406, "grad_norm": 0.3473250306081747, "learning_rate": 1.6157295619179026e-05, "loss": 0.5133, "step": 880 }, { "epoch": 0.8202979515828678, "grad_norm": 0.3486085320195872, "learning_rate": 1.6150396688513282e-05, "loss": 0.5377, "step": 881 }, { "epoch": 0.8212290502793296, "grad_norm": 0.3300390660001715, "learning_rate": 1.6143497757847534e-05, "loss": 0.5211, "step": 882 }, { "epoch": 0.8221601489757915, "grad_norm": 0.34284134850250264, "learning_rate": 1.613659882718179e-05, "loss": 0.52, "step": 883 }, { "epoch": 0.8230912476722533, "grad_norm": 0.35867702306226, "learning_rate": 1.6129699896516043e-05, "loss": 0.5582, "step": 884 }, { "epoch": 0.8240223463687151, "grad_norm": 0.30747286082092223, "learning_rate": 1.6122800965850295e-05, "loss": 0.5483, "step": 885 }, { "epoch": 0.8249534450651769, "grad_norm": 0.36535292169576794, "learning_rate": 1.6115902035184547e-05, "loss": 0.5752, "step": 886 }, { "epoch": 0.8258845437616388, "grad_norm": 0.30928090974642786, "learning_rate": 1.61090031045188e-05, "loss": 0.5258, "step": 887 }, { "epoch": 0.8268156424581006, "grad_norm": 0.31401111377137886, "learning_rate": 1.6102104173853056e-05, "loss": 0.5321, "step": 888 }, { "epoch": 0.8277467411545624, "grad_norm": 0.33550343435077484, "learning_rate": 1.6095205243187308e-05, "loss": 0.5209, "step": 889 }, { "epoch": 0.8286778398510242, "grad_norm": 0.3067640546085496, "learning_rate": 1.608830631252156e-05, "loss": 0.52, "step": 890 }, { "epoch": 0.8296089385474861, "grad_norm": 0.33281515135990064, "learning_rate": 1.6081407381855813e-05, "loss": 0.517, "step": 891 }, { "epoch": 0.8305400372439479, "grad_norm": 0.31127590834056573, "learning_rate": 1.607450845119007e-05, "loss": 0.5402, "step": 892 }, { "epoch": 0.8314711359404097, "grad_norm": 0.3302838663383482, "learning_rate": 1.606760952052432e-05, "loss": 0.5478, "step": 893 }, { "epoch": 0.8324022346368715, "grad_norm": 0.36624794028040175, "learning_rate": 1.6060710589858573e-05, "loss": 0.545, "step": 894 }, { "epoch": 0.8333333333333334, "grad_norm": 0.3447725791687726, "learning_rate": 1.6053811659192826e-05, "loss": 0.528, "step": 895 }, { "epoch": 0.8342644320297952, "grad_norm": 0.342032828069369, "learning_rate": 1.6046912728527078e-05, "loss": 0.5455, "step": 896 }, { "epoch": 0.835195530726257, "grad_norm": 0.3767201277143368, "learning_rate": 1.6040013797861334e-05, "loss": 0.5528, "step": 897 }, { "epoch": 0.8361266294227188, "grad_norm": 0.3057543071371632, "learning_rate": 1.6033114867195586e-05, "loss": 0.503, "step": 898 }, { "epoch": 0.8370577281191807, "grad_norm": 0.3525679290926551, "learning_rate": 1.602621593652984e-05, "loss": 0.5283, "step": 899 }, { "epoch": 0.8379888268156425, "grad_norm": 0.34988672622847167, "learning_rate": 1.601931700586409e-05, "loss": 0.5178, "step": 900 }, { "epoch": 0.8389199255121043, "grad_norm": 0.3114786235908851, "learning_rate": 1.6012418075198347e-05, "loss": 0.5221, "step": 901 }, { "epoch": 0.839851024208566, "grad_norm": 0.3435414456632741, "learning_rate": 1.60055191445326e-05, "loss": 0.5518, "step": 902 }, { "epoch": 0.840782122905028, "grad_norm": 0.3516284493603699, "learning_rate": 1.5998620213866852e-05, "loss": 0.541, "step": 903 }, { "epoch": 0.8417132216014898, "grad_norm": 0.3243425903464578, "learning_rate": 1.5991721283201104e-05, "loss": 0.5164, "step": 904 }, { "epoch": 0.8426443202979516, "grad_norm": 0.3397104544730467, "learning_rate": 1.598482235253536e-05, "loss": 0.5375, "step": 905 }, { "epoch": 0.8435754189944135, "grad_norm": 0.35602481864360375, "learning_rate": 1.5977923421869612e-05, "loss": 0.5369, "step": 906 }, { "epoch": 0.8445065176908753, "grad_norm": 0.3221017811078747, "learning_rate": 1.5971024491203865e-05, "loss": 0.54, "step": 907 }, { "epoch": 0.845437616387337, "grad_norm": 0.3773772302184878, "learning_rate": 1.5964125560538117e-05, "loss": 0.5581, "step": 908 }, { "epoch": 0.8463687150837989, "grad_norm": 0.36480015406835037, "learning_rate": 1.595722662987237e-05, "loss": 0.5514, "step": 909 }, { "epoch": 0.8472998137802608, "grad_norm": 0.31930807535360733, "learning_rate": 1.5950327699206625e-05, "loss": 0.5264, "step": 910 }, { "epoch": 0.8482309124767226, "grad_norm": 0.34113831870590994, "learning_rate": 1.5943428768540878e-05, "loss": 0.5159, "step": 911 }, { "epoch": 0.8491620111731844, "grad_norm": 0.3506623224998369, "learning_rate": 1.593652983787513e-05, "loss": 0.554, "step": 912 }, { "epoch": 0.8500931098696461, "grad_norm": 0.3502810326402818, "learning_rate": 1.5929630907209383e-05, "loss": 0.5201, "step": 913 }, { "epoch": 0.851024208566108, "grad_norm": 0.34597576693807575, "learning_rate": 1.592273197654364e-05, "loss": 0.5148, "step": 914 }, { "epoch": 0.8519553072625698, "grad_norm": 0.3509466236428332, "learning_rate": 1.591583304587789e-05, "loss": 0.5462, "step": 915 }, { "epoch": 0.8528864059590316, "grad_norm": 0.3733260118003614, "learning_rate": 1.5908934115212143e-05, "loss": 0.5035, "step": 916 }, { "epoch": 0.8538175046554934, "grad_norm": 0.3750365475101127, "learning_rate": 1.5902035184546396e-05, "loss": 0.5457, "step": 917 }, { "epoch": 0.8547486033519553, "grad_norm": 0.3618491852450799, "learning_rate": 1.5895136253880648e-05, "loss": 0.5588, "step": 918 }, { "epoch": 0.8556797020484171, "grad_norm": 0.3415418058296431, "learning_rate": 1.5888237323214904e-05, "loss": 0.5299, "step": 919 }, { "epoch": 0.8566108007448789, "grad_norm": 0.36343392316558504, "learning_rate": 1.5881338392549156e-05, "loss": 0.566, "step": 920 }, { "epoch": 0.8575418994413407, "grad_norm": 0.4314712383391316, "learning_rate": 1.587443946188341e-05, "loss": 0.5337, "step": 921 }, { "epoch": 0.8584729981378026, "grad_norm": 0.33618046882753866, "learning_rate": 1.586754053121766e-05, "loss": 0.5398, "step": 922 }, { "epoch": 0.8594040968342644, "grad_norm": 0.3748880286532561, "learning_rate": 1.5860641600551917e-05, "loss": 0.5238, "step": 923 }, { "epoch": 0.8603351955307262, "grad_norm": 0.38436713708639575, "learning_rate": 1.585374266988617e-05, "loss": 0.5385, "step": 924 }, { "epoch": 0.861266294227188, "grad_norm": 0.32495549976432087, "learning_rate": 1.584684373922042e-05, "loss": 0.5285, "step": 925 }, { "epoch": 0.8621973929236499, "grad_norm": 0.36750386099937893, "learning_rate": 1.5839944808554674e-05, "loss": 0.4972, "step": 926 }, { "epoch": 0.8631284916201117, "grad_norm": 0.37020433114417617, "learning_rate": 1.583304587788893e-05, "loss": 0.5339, "step": 927 }, { "epoch": 0.8640595903165735, "grad_norm": 0.353526556202217, "learning_rate": 1.5826146947223182e-05, "loss": 0.5416, "step": 928 }, { "epoch": 0.8649906890130353, "grad_norm": 0.3625491401937832, "learning_rate": 1.5819248016557435e-05, "loss": 0.5498, "step": 929 }, { "epoch": 0.8659217877094972, "grad_norm": 0.3684655911570856, "learning_rate": 1.5812349085891687e-05, "loss": 0.5567, "step": 930 }, { "epoch": 0.866852886405959, "grad_norm": 0.34582263634542687, "learning_rate": 1.580545015522594e-05, "loss": 0.5206, "step": 931 }, { "epoch": 0.8677839851024208, "grad_norm": 0.39121559405126505, "learning_rate": 1.5798551224560195e-05, "loss": 0.5309, "step": 932 }, { "epoch": 0.8687150837988827, "grad_norm": 0.3628533286352421, "learning_rate": 1.5791652293894448e-05, "loss": 0.5291, "step": 933 }, { "epoch": 0.8696461824953445, "grad_norm": 0.3409915407984461, "learning_rate": 1.57847533632287e-05, "loss": 0.5253, "step": 934 }, { "epoch": 0.8705772811918063, "grad_norm": 0.36552762580180725, "learning_rate": 1.5777854432562953e-05, "loss": 0.4994, "step": 935 }, { "epoch": 0.8715083798882681, "grad_norm": 0.3524439996573488, "learning_rate": 1.577095550189721e-05, "loss": 0.5014, "step": 936 }, { "epoch": 0.87243947858473, "grad_norm": 0.31591563766782915, "learning_rate": 1.576405657123146e-05, "loss": 0.5285, "step": 937 }, { "epoch": 0.8733705772811918, "grad_norm": 0.3865264315390415, "learning_rate": 1.5757157640565713e-05, "loss": 0.5101, "step": 938 }, { "epoch": 0.8743016759776536, "grad_norm": 0.3150746220818869, "learning_rate": 1.5750258709899966e-05, "loss": 0.5142, "step": 939 }, { "epoch": 0.8752327746741154, "grad_norm": 0.3202677644784043, "learning_rate": 1.5743359779234218e-05, "loss": 0.5418, "step": 940 }, { "epoch": 0.8761638733705773, "grad_norm": 0.3378716568946025, "learning_rate": 1.5736460848568474e-05, "loss": 0.5304, "step": 941 }, { "epoch": 0.8770949720670391, "grad_norm": 0.33080548861317766, "learning_rate": 1.5729561917902726e-05, "loss": 0.5388, "step": 942 }, { "epoch": 0.8780260707635009, "grad_norm": 0.3182338704492356, "learning_rate": 1.572266298723698e-05, "loss": 0.5297, "step": 943 }, { "epoch": 0.8789571694599627, "grad_norm": 0.32697494554115825, "learning_rate": 1.571576405657123e-05, "loss": 0.5035, "step": 944 }, { "epoch": 0.8798882681564246, "grad_norm": 0.33349790091807757, "learning_rate": 1.5708865125905487e-05, "loss": 0.5288, "step": 945 }, { "epoch": 0.8808193668528864, "grad_norm": 0.3473814145935271, "learning_rate": 1.570196619523974e-05, "loss": 0.5135, "step": 946 }, { "epoch": 0.8817504655493482, "grad_norm": 0.3250853232540784, "learning_rate": 1.569506726457399e-05, "loss": 0.525, "step": 947 }, { "epoch": 0.88268156424581, "grad_norm": 0.3250126052788058, "learning_rate": 1.5688168333908244e-05, "loss": 0.5057, "step": 948 }, { "epoch": 0.8836126629422719, "grad_norm": 0.33100111576055297, "learning_rate": 1.56812694032425e-05, "loss": 0.5198, "step": 949 }, { "epoch": 0.8845437616387337, "grad_norm": 0.3391835957228507, "learning_rate": 1.5674370472576752e-05, "loss": 0.5025, "step": 950 }, { "epoch": 0.8854748603351955, "grad_norm": 0.3381127460813809, "learning_rate": 1.5667471541911005e-05, "loss": 0.5495, "step": 951 }, { "epoch": 0.8864059590316573, "grad_norm": 0.3179355926163187, "learning_rate": 1.5660572611245257e-05, "loss": 0.5356, "step": 952 }, { "epoch": 0.8873370577281192, "grad_norm": 0.33619893408872975, "learning_rate": 1.565367368057951e-05, "loss": 0.5405, "step": 953 }, { "epoch": 0.888268156424581, "grad_norm": 0.32184796957432993, "learning_rate": 1.5646774749913765e-05, "loss": 0.5209, "step": 954 }, { "epoch": 0.8891992551210428, "grad_norm": 0.3513241162809745, "learning_rate": 1.5639875819248018e-05, "loss": 0.5038, "step": 955 }, { "epoch": 0.8901303538175046, "grad_norm": 0.33026755328373014, "learning_rate": 1.5632976888582273e-05, "loss": 0.5287, "step": 956 }, { "epoch": 0.8910614525139665, "grad_norm": 0.3456393031141812, "learning_rate": 1.5626077957916522e-05, "loss": 0.5244, "step": 957 }, { "epoch": 0.8919925512104283, "grad_norm": 0.33304478271819415, "learning_rate": 1.5619179027250778e-05, "loss": 0.5384, "step": 958 }, { "epoch": 0.8929236499068901, "grad_norm": 0.3117164915582247, "learning_rate": 1.561228009658503e-05, "loss": 0.5483, "step": 959 }, { "epoch": 0.8938547486033519, "grad_norm": 0.3423186543735759, "learning_rate": 1.5605381165919283e-05, "loss": 0.5389, "step": 960 }, { "epoch": 0.8947858472998138, "grad_norm": 0.3097311025515559, "learning_rate": 1.5598482235253535e-05, "loss": 0.4941, "step": 961 }, { "epoch": 0.8957169459962756, "grad_norm": 0.3421163781460456, "learning_rate": 1.559158330458779e-05, "loss": 0.5466, "step": 962 }, { "epoch": 0.8966480446927374, "grad_norm": 0.36848047371447495, "learning_rate": 1.5584684373922044e-05, "loss": 0.5601, "step": 963 }, { "epoch": 0.8975791433891993, "grad_norm": 0.32275496245964275, "learning_rate": 1.5577785443256296e-05, "loss": 0.517, "step": 964 }, { "epoch": 0.8985102420856611, "grad_norm": 0.3910552206327622, "learning_rate": 1.5570886512590552e-05, "loss": 0.5304, "step": 965 }, { "epoch": 0.8994413407821229, "grad_norm": 0.31376787509860493, "learning_rate": 1.55639875819248e-05, "loss": 0.5028, "step": 966 }, { "epoch": 0.9003724394785847, "grad_norm": 0.3732316841494711, "learning_rate": 1.5557088651259057e-05, "loss": 0.5584, "step": 967 }, { "epoch": 0.9013035381750466, "grad_norm": 0.3378218234051116, "learning_rate": 1.555018972059331e-05, "loss": 0.5399, "step": 968 }, { "epoch": 0.9022346368715084, "grad_norm": 0.31202924425938966, "learning_rate": 1.554329078992756e-05, "loss": 0.5205, "step": 969 }, { "epoch": 0.9031657355679702, "grad_norm": 0.36122774307603495, "learning_rate": 1.5536391859261814e-05, "loss": 0.544, "step": 970 }, { "epoch": 0.904096834264432, "grad_norm": 0.3353759394443146, "learning_rate": 1.552949292859607e-05, "loss": 0.5417, "step": 971 }, { "epoch": 0.9050279329608939, "grad_norm": 0.34510739443127253, "learning_rate": 1.5522593997930322e-05, "loss": 0.5183, "step": 972 }, { "epoch": 0.9059590316573557, "grad_norm": 0.32747553970597115, "learning_rate": 1.5515695067264575e-05, "loss": 0.5208, "step": 973 }, { "epoch": 0.9068901303538175, "grad_norm": 0.3351056141321513, "learning_rate": 1.550879613659883e-05, "loss": 0.5359, "step": 974 }, { "epoch": 0.9078212290502793, "grad_norm": 0.324659268361095, "learning_rate": 1.550189720593308e-05, "loss": 0.5178, "step": 975 }, { "epoch": 0.9087523277467412, "grad_norm": 0.38487836327362274, "learning_rate": 1.5494998275267335e-05, "loss": 0.546, "step": 976 }, { "epoch": 0.909683426443203, "grad_norm": 0.3454933016572094, "learning_rate": 1.5488099344601588e-05, "loss": 0.5359, "step": 977 }, { "epoch": 0.9106145251396648, "grad_norm": 0.31807936680980037, "learning_rate": 1.5481200413935843e-05, "loss": 0.5283, "step": 978 }, { "epoch": 0.9115456238361266, "grad_norm": 0.3215545832666132, "learning_rate": 1.5474301483270092e-05, "loss": 0.5138, "step": 979 }, { "epoch": 0.9124767225325885, "grad_norm": 0.36538879151431053, "learning_rate": 1.5467402552604348e-05, "loss": 0.5265, "step": 980 }, { "epoch": 0.9134078212290503, "grad_norm": 0.3130438656598645, "learning_rate": 1.54605036219386e-05, "loss": 0.5325, "step": 981 }, { "epoch": 0.9143389199255121, "grad_norm": 0.3676029580006594, "learning_rate": 1.5453604691272853e-05, "loss": 0.5433, "step": 982 }, { "epoch": 0.9152700186219739, "grad_norm": 0.34578970573755213, "learning_rate": 1.544670576060711e-05, "loss": 0.5263, "step": 983 }, { "epoch": 0.9162011173184358, "grad_norm": 0.35333620257862786, "learning_rate": 1.543980682994136e-05, "loss": 0.5467, "step": 984 }, { "epoch": 0.9171322160148976, "grad_norm": 0.38401678851672694, "learning_rate": 1.5432907899275614e-05, "loss": 0.5289, "step": 985 }, { "epoch": 0.9180633147113594, "grad_norm": 0.37950330826959155, "learning_rate": 1.5426008968609866e-05, "loss": 0.5394, "step": 986 }, { "epoch": 0.9189944134078212, "grad_norm": 0.35615103372870593, "learning_rate": 1.5419110037944122e-05, "loss": 0.514, "step": 987 }, { "epoch": 0.9199255121042831, "grad_norm": 0.33638840609272763, "learning_rate": 1.5412211107278374e-05, "loss": 0.4745, "step": 988 }, { "epoch": 0.9208566108007449, "grad_norm": 0.35159906316641815, "learning_rate": 1.5405312176612627e-05, "loss": 0.4945, "step": 989 }, { "epoch": 0.9217877094972067, "grad_norm": 0.34031257584636954, "learning_rate": 1.539841324594688e-05, "loss": 0.495, "step": 990 }, { "epoch": 0.9227188081936686, "grad_norm": 0.38503242634936996, "learning_rate": 1.539151431528113e-05, "loss": 0.5378, "step": 991 }, { "epoch": 0.9236499068901304, "grad_norm": 0.32363493338520977, "learning_rate": 1.5384615384615387e-05, "loss": 0.5035, "step": 992 }, { "epoch": 0.9245810055865922, "grad_norm": 0.3714011587197948, "learning_rate": 1.537771645394964e-05, "loss": 0.5028, "step": 993 }, { "epoch": 0.925512104283054, "grad_norm": 0.36411700147904086, "learning_rate": 1.5370817523283892e-05, "loss": 0.5257, "step": 994 }, { "epoch": 0.9264432029795159, "grad_norm": 0.37440103128325225, "learning_rate": 1.5363918592618144e-05, "loss": 0.5396, "step": 995 }, { "epoch": 0.9273743016759777, "grad_norm": 0.3612285214987829, "learning_rate": 1.53570196619524e-05, "loss": 0.5158, "step": 996 }, { "epoch": 0.9283054003724395, "grad_norm": 0.34575016483555193, "learning_rate": 1.5350120731286653e-05, "loss": 0.561, "step": 997 }, { "epoch": 0.9292364990689013, "grad_norm": 0.38788814657718484, "learning_rate": 1.5343221800620905e-05, "loss": 0.5507, "step": 998 }, { "epoch": 0.9301675977653632, "grad_norm": 0.3361901527473988, "learning_rate": 1.5336322869955157e-05, "loss": 0.5066, "step": 999 }, { "epoch": 0.931098696461825, "grad_norm": 0.3654161259634493, "learning_rate": 1.5329423939289413e-05, "loss": 0.4972, "step": 1000 }, { "epoch": 0.9320297951582868, "grad_norm": 0.36403453117480816, "learning_rate": 1.5322525008623666e-05, "loss": 0.5369, "step": 1001 }, { "epoch": 0.9329608938547486, "grad_norm": 0.4102127579585978, "learning_rate": 1.5315626077957918e-05, "loss": 0.5506, "step": 1002 }, { "epoch": 0.9338919925512105, "grad_norm": 0.313883442776274, "learning_rate": 1.530872714729217e-05, "loss": 0.5456, "step": 1003 }, { "epoch": 0.9348230912476723, "grad_norm": 0.3295527374232079, "learning_rate": 1.5301828216626423e-05, "loss": 0.5129, "step": 1004 }, { "epoch": 0.9357541899441341, "grad_norm": 0.3289209385942994, "learning_rate": 1.529492928596068e-05, "loss": 0.5373, "step": 1005 }, { "epoch": 0.9366852886405959, "grad_norm": 0.3465487942999269, "learning_rate": 1.528803035529493e-05, "loss": 0.5344, "step": 1006 }, { "epoch": 0.9376163873370578, "grad_norm": 0.33902351653444995, "learning_rate": 1.5281131424629183e-05, "loss": 0.5437, "step": 1007 }, { "epoch": 0.9385474860335196, "grad_norm": 0.35188076649082883, "learning_rate": 1.5274232493963436e-05, "loss": 0.5177, "step": 1008 }, { "epoch": 0.9394785847299814, "grad_norm": 0.3322803103096148, "learning_rate": 1.526733356329769e-05, "loss": 0.5042, "step": 1009 }, { "epoch": 0.9404096834264432, "grad_norm": 0.3705172160273387, "learning_rate": 1.5260434632631944e-05, "loss": 0.5192, "step": 1010 }, { "epoch": 0.9413407821229051, "grad_norm": 0.33241454759666117, "learning_rate": 1.5253535701966195e-05, "loss": 0.5592, "step": 1011 }, { "epoch": 0.9422718808193669, "grad_norm": 0.3274846564800393, "learning_rate": 1.5246636771300449e-05, "loss": 0.5055, "step": 1012 }, { "epoch": 0.9432029795158287, "grad_norm": 0.34454411923886635, "learning_rate": 1.5239737840634703e-05, "loss": 0.5258, "step": 1013 }, { "epoch": 0.9441340782122905, "grad_norm": 0.3711938178418708, "learning_rate": 1.5232838909968957e-05, "loss": 0.5418, "step": 1014 }, { "epoch": 0.9450651769087524, "grad_norm": 0.325435151453399, "learning_rate": 1.522593997930321e-05, "loss": 0.4997, "step": 1015 }, { "epoch": 0.9459962756052142, "grad_norm": 0.3454393304333053, "learning_rate": 1.5219041048637462e-05, "loss": 0.5355, "step": 1016 }, { "epoch": 0.946927374301676, "grad_norm": 0.3492183016378392, "learning_rate": 1.5212142117971716e-05, "loss": 0.5237, "step": 1017 }, { "epoch": 0.9478584729981379, "grad_norm": 0.3450950121003664, "learning_rate": 1.5205243187305968e-05, "loss": 0.5457, "step": 1018 }, { "epoch": 0.9487895716945997, "grad_norm": 0.33520331153742444, "learning_rate": 1.5198344256640222e-05, "loss": 0.5564, "step": 1019 }, { "epoch": 0.9497206703910615, "grad_norm": 0.3367394585419816, "learning_rate": 1.5191445325974475e-05, "loss": 0.5193, "step": 1020 }, { "epoch": 0.9506517690875232, "grad_norm": 0.3342960836918999, "learning_rate": 1.5184546395308727e-05, "loss": 0.5261, "step": 1021 }, { "epoch": 0.9515828677839852, "grad_norm": 0.33110398952737985, "learning_rate": 1.5177647464642981e-05, "loss": 0.5301, "step": 1022 }, { "epoch": 0.952513966480447, "grad_norm": 0.33504708783775294, "learning_rate": 1.5170748533977236e-05, "loss": 0.507, "step": 1023 }, { "epoch": 0.9534450651769087, "grad_norm": 0.33851524314178594, "learning_rate": 1.516384960331149e-05, "loss": 0.5273, "step": 1024 }, { "epoch": 0.9543761638733705, "grad_norm": 0.37710532343713205, "learning_rate": 1.515695067264574e-05, "loss": 0.5452, "step": 1025 }, { "epoch": 0.9553072625698324, "grad_norm": 0.31859385815184155, "learning_rate": 1.5150051741979994e-05, "loss": 0.5274, "step": 1026 }, { "epoch": 0.9562383612662942, "grad_norm": 0.3477439333887902, "learning_rate": 1.5143152811314249e-05, "loss": 0.4963, "step": 1027 }, { "epoch": 0.957169459962756, "grad_norm": 0.3056304063597552, "learning_rate": 1.5136253880648501e-05, "loss": 0.5234, "step": 1028 }, { "epoch": 0.9581005586592178, "grad_norm": 0.33232397663139096, "learning_rate": 1.5129354949982753e-05, "loss": 0.5264, "step": 1029 }, { "epoch": 0.9590316573556797, "grad_norm": 0.31244459045582784, "learning_rate": 1.5122456019317007e-05, "loss": 0.5115, "step": 1030 }, { "epoch": 0.9599627560521415, "grad_norm": 0.34182478589823123, "learning_rate": 1.511555708865126e-05, "loss": 0.5286, "step": 1031 }, { "epoch": 0.9608938547486033, "grad_norm": 0.3484850493760991, "learning_rate": 1.5108658157985514e-05, "loss": 0.5183, "step": 1032 }, { "epoch": 0.9618249534450651, "grad_norm": 0.3178502658010698, "learning_rate": 1.5101759227319768e-05, "loss": 0.5291, "step": 1033 }, { "epoch": 0.962756052141527, "grad_norm": 0.32122696617163454, "learning_rate": 1.5094860296654019e-05, "loss": 0.5119, "step": 1034 }, { "epoch": 0.9636871508379888, "grad_norm": 0.31174769298475785, "learning_rate": 1.5087961365988273e-05, "loss": 0.5237, "step": 1035 }, { "epoch": 0.9646182495344506, "grad_norm": 0.323078961046701, "learning_rate": 1.5081062435322527e-05, "loss": 0.5024, "step": 1036 }, { "epoch": 0.9655493482309124, "grad_norm": 0.35663904305403693, "learning_rate": 1.5074163504656781e-05, "loss": 0.5246, "step": 1037 }, { "epoch": 0.9664804469273743, "grad_norm": 0.33193546014974656, "learning_rate": 1.5067264573991032e-05, "loss": 0.5004, "step": 1038 }, { "epoch": 0.9674115456238361, "grad_norm": 0.3294919747313928, "learning_rate": 1.5060365643325286e-05, "loss": 0.5246, "step": 1039 }, { "epoch": 0.9683426443202979, "grad_norm": 0.31630252640405593, "learning_rate": 1.5053466712659538e-05, "loss": 0.5486, "step": 1040 }, { "epoch": 0.9692737430167597, "grad_norm": 0.3261543782136822, "learning_rate": 1.5046567781993792e-05, "loss": 0.5267, "step": 1041 }, { "epoch": 0.9702048417132216, "grad_norm": 0.31267116080717594, "learning_rate": 1.5039668851328046e-05, "loss": 0.5093, "step": 1042 }, { "epoch": 0.9711359404096834, "grad_norm": 0.3208261077506519, "learning_rate": 1.5032769920662297e-05, "loss": 0.5211, "step": 1043 }, { "epoch": 0.9720670391061452, "grad_norm": 0.3260442633860702, "learning_rate": 1.5025870989996551e-05, "loss": 0.5247, "step": 1044 }, { "epoch": 0.972998137802607, "grad_norm": 0.3194601436529867, "learning_rate": 1.5018972059330805e-05, "loss": 0.5317, "step": 1045 }, { "epoch": 0.9739292364990689, "grad_norm": 0.3471485005928702, "learning_rate": 1.501207312866506e-05, "loss": 0.5311, "step": 1046 }, { "epoch": 0.9748603351955307, "grad_norm": 0.3382534602837757, "learning_rate": 1.500517419799931e-05, "loss": 0.5026, "step": 1047 }, { "epoch": 0.9757914338919925, "grad_norm": 0.3576880372589358, "learning_rate": 1.4998275267333564e-05, "loss": 0.5373, "step": 1048 }, { "epoch": 0.9767225325884544, "grad_norm": 0.36005234123170915, "learning_rate": 1.4991376336667818e-05, "loss": 0.5301, "step": 1049 }, { "epoch": 0.9776536312849162, "grad_norm": 0.31897998585018067, "learning_rate": 1.498447740600207e-05, "loss": 0.506, "step": 1050 }, { "epoch": 0.978584729981378, "grad_norm": 0.37594411528726857, "learning_rate": 1.4977578475336325e-05, "loss": 0.5473, "step": 1051 }, { "epoch": 0.9795158286778398, "grad_norm": 0.3260397560116543, "learning_rate": 1.4970679544670577e-05, "loss": 0.5459, "step": 1052 }, { "epoch": 0.9804469273743017, "grad_norm": 0.38972985005547833, "learning_rate": 1.496378061400483e-05, "loss": 0.5063, "step": 1053 }, { "epoch": 0.9813780260707635, "grad_norm": 0.35358951184008364, "learning_rate": 1.4956881683339084e-05, "loss": 0.5204, "step": 1054 }, { "epoch": 0.9823091247672253, "grad_norm": 0.35365538915141853, "learning_rate": 1.4949982752673338e-05, "loss": 0.5027, "step": 1055 }, { "epoch": 0.9832402234636871, "grad_norm": 0.39547168287853884, "learning_rate": 1.4943083822007589e-05, "loss": 0.5254, "step": 1056 }, { "epoch": 0.984171322160149, "grad_norm": 0.3298352405676306, "learning_rate": 1.4936184891341843e-05, "loss": 0.5213, "step": 1057 }, { "epoch": 0.9851024208566108, "grad_norm": 0.4148583533130442, "learning_rate": 1.4929285960676097e-05, "loss": 0.5068, "step": 1058 }, { "epoch": 0.9860335195530726, "grad_norm": 0.3104242719797605, "learning_rate": 1.4922387030010351e-05, "loss": 0.5245, "step": 1059 }, { "epoch": 0.9869646182495344, "grad_norm": 0.3452763876562523, "learning_rate": 1.4915488099344603e-05, "loss": 0.5574, "step": 1060 }, { "epoch": 0.9878957169459963, "grad_norm": 0.4010598937240832, "learning_rate": 1.4908589168678856e-05, "loss": 0.5366, "step": 1061 }, { "epoch": 0.9888268156424581, "grad_norm": 0.3333022988906575, "learning_rate": 1.4901690238013108e-05, "loss": 0.5217, "step": 1062 }, { "epoch": 0.9897579143389199, "grad_norm": 0.34754490449261133, "learning_rate": 1.4894791307347362e-05, "loss": 0.5361, "step": 1063 }, { "epoch": 0.9906890130353817, "grad_norm": 0.3591316240811825, "learning_rate": 1.4887892376681616e-05, "loss": 0.4978, "step": 1064 }, { "epoch": 0.9916201117318436, "grad_norm": 0.3306103131416548, "learning_rate": 1.4880993446015867e-05, "loss": 0.5293, "step": 1065 }, { "epoch": 0.9925512104283054, "grad_norm": 0.3909716184087828, "learning_rate": 1.4874094515350121e-05, "loss": 0.4893, "step": 1066 }, { "epoch": 0.9934823091247672, "grad_norm": 0.321507361228369, "learning_rate": 1.4867195584684375e-05, "loss": 0.5142, "step": 1067 }, { "epoch": 0.994413407821229, "grad_norm": 0.37242804584695494, "learning_rate": 1.486029665401863e-05, "loss": 0.5296, "step": 1068 }, { "epoch": 0.9953445065176909, "grad_norm": 0.3292907480266062, "learning_rate": 1.4853397723352882e-05, "loss": 0.5004, "step": 1069 }, { "epoch": 0.9962756052141527, "grad_norm": 0.33014277171469864, "learning_rate": 1.4846498792687134e-05, "loss": 0.5417, "step": 1070 }, { "epoch": 0.9972067039106145, "grad_norm": 0.3234824599478624, "learning_rate": 1.4839599862021388e-05, "loss": 0.5107, "step": 1071 }, { "epoch": 0.9981378026070763, "grad_norm": 0.33193307330964755, "learning_rate": 1.483270093135564e-05, "loss": 0.5508, "step": 1072 }, { "epoch": 0.9990689013035382, "grad_norm": 0.31652408725038467, "learning_rate": 1.4825802000689895e-05, "loss": 0.523, "step": 1073 }, { "epoch": 1.0, "grad_norm": 0.3018488665435537, "learning_rate": 1.4818903070024147e-05, "loss": 0.537, "step": 1074 }, { "epoch": 1.000931098696462, "grad_norm": 0.3607383588801883, "learning_rate": 1.48120041393584e-05, "loss": 0.5145, "step": 1075 }, { "epoch": 1.0018621973929236, "grad_norm": 0.35222630111736136, "learning_rate": 1.4805105208692654e-05, "loss": 0.5174, "step": 1076 }, { "epoch": 1.0027932960893855, "grad_norm": 0.3314918608224104, "learning_rate": 1.4798206278026908e-05, "loss": 0.5104, "step": 1077 }, { "epoch": 1.0037243947858474, "grad_norm": 0.34523157339657035, "learning_rate": 1.4791307347361162e-05, "loss": 0.4966, "step": 1078 }, { "epoch": 1.004655493482309, "grad_norm": 0.37724081374220136, "learning_rate": 1.4784408416695413e-05, "loss": 0.5071, "step": 1079 }, { "epoch": 1.005586592178771, "grad_norm": 0.34583723604292776, "learning_rate": 1.4777509486029667e-05, "loss": 0.5056, "step": 1080 }, { "epoch": 1.0065176908752327, "grad_norm": 0.3286124617841868, "learning_rate": 1.4770610555363921e-05, "loss": 0.5086, "step": 1081 }, { "epoch": 1.0074487895716946, "grad_norm": 0.3371612941570212, "learning_rate": 1.4763711624698173e-05, "loss": 0.4906, "step": 1082 }, { "epoch": 1.0083798882681565, "grad_norm": 0.3579940242341767, "learning_rate": 1.4756812694032426e-05, "loss": 0.491, "step": 1083 }, { "epoch": 1.0093109869646182, "grad_norm": 0.31176684567675317, "learning_rate": 1.4749913763366678e-05, "loss": 0.4754, "step": 1084 }, { "epoch": 1.01024208566108, "grad_norm": 0.3167388891924566, "learning_rate": 1.4743014832700932e-05, "loss": 0.4774, "step": 1085 }, { "epoch": 1.011173184357542, "grad_norm": 0.342389578082644, "learning_rate": 1.4736115902035186e-05, "loss": 0.5254, "step": 1086 }, { "epoch": 1.0121042830540037, "grad_norm": 0.3045580375355906, "learning_rate": 1.472921697136944e-05, "loss": 0.4977, "step": 1087 }, { "epoch": 1.0130353817504656, "grad_norm": 0.3367253280487915, "learning_rate": 1.4722318040703691e-05, "loss": 0.5145, "step": 1088 }, { "epoch": 1.0139664804469273, "grad_norm": 0.31432015671606317, "learning_rate": 1.4715419110037945e-05, "loss": 0.5294, "step": 1089 }, { "epoch": 1.0148975791433892, "grad_norm": 0.33371854923183225, "learning_rate": 1.47085201793722e-05, "loss": 0.4865, "step": 1090 }, { "epoch": 1.015828677839851, "grad_norm": 0.3469046701918079, "learning_rate": 1.4701621248706452e-05, "loss": 0.5126, "step": 1091 }, { "epoch": 1.0167597765363128, "grad_norm": 0.3079996726829925, "learning_rate": 1.4694722318040704e-05, "loss": 0.5191, "step": 1092 }, { "epoch": 1.0176908752327747, "grad_norm": 0.3330116785237092, "learning_rate": 1.4687823387374958e-05, "loss": 0.4988, "step": 1093 }, { "epoch": 1.0186219739292366, "grad_norm": 0.31470987279417106, "learning_rate": 1.468092445670921e-05, "loss": 0.4932, "step": 1094 }, { "epoch": 1.0195530726256983, "grad_norm": 0.3407315229125257, "learning_rate": 1.4674025526043465e-05, "loss": 0.5165, "step": 1095 }, { "epoch": 1.0204841713221602, "grad_norm": 0.34956718305974976, "learning_rate": 1.4667126595377719e-05, "loss": 0.5211, "step": 1096 }, { "epoch": 1.0214152700186219, "grad_norm": 0.3076831066203146, "learning_rate": 1.466022766471197e-05, "loss": 0.5084, "step": 1097 }, { "epoch": 1.0223463687150838, "grad_norm": 0.420941931786035, "learning_rate": 1.4653328734046224e-05, "loss": 0.4904, "step": 1098 }, { "epoch": 1.0232774674115457, "grad_norm": 0.31047547782101453, "learning_rate": 1.4646429803380478e-05, "loss": 0.4855, "step": 1099 }, { "epoch": 1.0242085661080074, "grad_norm": 0.46005482438892725, "learning_rate": 1.4639530872714732e-05, "loss": 0.5462, "step": 1100 }, { "epoch": 1.0251396648044693, "grad_norm": 0.34626128248348026, "learning_rate": 1.4632631942048983e-05, "loss": 0.4905, "step": 1101 }, { "epoch": 1.0260707635009312, "grad_norm": 0.369638838834237, "learning_rate": 1.4625733011383237e-05, "loss": 0.4896, "step": 1102 }, { "epoch": 1.0270018621973929, "grad_norm": 0.36398818850656217, "learning_rate": 1.461883408071749e-05, "loss": 0.5101, "step": 1103 }, { "epoch": 1.0279329608938548, "grad_norm": 0.35635634230557395, "learning_rate": 1.4611935150051743e-05, "loss": 0.5126, "step": 1104 }, { "epoch": 1.0288640595903167, "grad_norm": 0.3645844121314818, "learning_rate": 1.4605036219385997e-05, "loss": 0.5218, "step": 1105 }, { "epoch": 1.0297951582867784, "grad_norm": 0.3474374493849282, "learning_rate": 1.4598137288720248e-05, "loss": 0.4922, "step": 1106 }, { "epoch": 1.0307262569832403, "grad_norm": 0.35674961655547305, "learning_rate": 1.4591238358054502e-05, "loss": 0.5217, "step": 1107 }, { "epoch": 1.031657355679702, "grad_norm": 0.3195524065841838, "learning_rate": 1.4584339427388756e-05, "loss": 0.5015, "step": 1108 }, { "epoch": 1.0325884543761639, "grad_norm": 0.3487377119808147, "learning_rate": 1.457744049672301e-05, "loss": 0.5205, "step": 1109 }, { "epoch": 1.0335195530726258, "grad_norm": 0.31542767570244173, "learning_rate": 1.4570541566057261e-05, "loss": 0.4736, "step": 1110 }, { "epoch": 1.0344506517690875, "grad_norm": 0.31803957184208065, "learning_rate": 1.4563642635391515e-05, "loss": 0.4809, "step": 1111 }, { "epoch": 1.0353817504655494, "grad_norm": 0.3174439617694399, "learning_rate": 1.455674370472577e-05, "loss": 0.5103, "step": 1112 }, { "epoch": 1.0363128491620113, "grad_norm": 0.36941082533590275, "learning_rate": 1.4549844774060022e-05, "loss": 0.4933, "step": 1113 }, { "epoch": 1.037243947858473, "grad_norm": 0.3078934869233152, "learning_rate": 1.4542945843394276e-05, "loss": 0.5154, "step": 1114 }, { "epoch": 1.0381750465549349, "grad_norm": 0.313266125718791, "learning_rate": 1.4536046912728528e-05, "loss": 0.4782, "step": 1115 }, { "epoch": 1.0391061452513966, "grad_norm": 0.32478120151668377, "learning_rate": 1.452914798206278e-05, "loss": 0.5014, "step": 1116 }, { "epoch": 1.0400372439478585, "grad_norm": 0.3001680318032309, "learning_rate": 1.4522249051397035e-05, "loss": 0.4831, "step": 1117 }, { "epoch": 1.0409683426443204, "grad_norm": 0.3213852935874318, "learning_rate": 1.4515350120731289e-05, "loss": 0.5041, "step": 1118 }, { "epoch": 1.041899441340782, "grad_norm": 0.32084783544312506, "learning_rate": 1.450845119006554e-05, "loss": 0.5092, "step": 1119 }, { "epoch": 1.042830540037244, "grad_norm": 0.34202980080713935, "learning_rate": 1.4501552259399794e-05, "loss": 0.529, "step": 1120 }, { "epoch": 1.0437616387337059, "grad_norm": 0.34620251957409026, "learning_rate": 1.4494653328734048e-05, "loss": 0.4971, "step": 1121 }, { "epoch": 1.0446927374301676, "grad_norm": 0.34917499779190747, "learning_rate": 1.4487754398068302e-05, "loss": 0.5067, "step": 1122 }, { "epoch": 1.0456238361266295, "grad_norm": 0.3353094343575274, "learning_rate": 1.4480855467402554e-05, "loss": 0.5181, "step": 1123 }, { "epoch": 1.0465549348230911, "grad_norm": 0.3186983617614411, "learning_rate": 1.4473956536736807e-05, "loss": 0.5076, "step": 1124 }, { "epoch": 1.047486033519553, "grad_norm": 0.3542486151198647, "learning_rate": 1.446705760607106e-05, "loss": 0.5267, "step": 1125 }, { "epoch": 1.048417132216015, "grad_norm": 0.3093539763857413, "learning_rate": 1.4460158675405313e-05, "loss": 0.5138, "step": 1126 }, { "epoch": 1.0493482309124766, "grad_norm": 0.32393711046976376, "learning_rate": 1.4453259744739567e-05, "loss": 0.4545, "step": 1127 }, { "epoch": 1.0502793296089385, "grad_norm": 0.3048700395729063, "learning_rate": 1.444636081407382e-05, "loss": 0.5002, "step": 1128 }, { "epoch": 1.0512104283054005, "grad_norm": 0.35694158604256687, "learning_rate": 1.4439461883408072e-05, "loss": 0.5324, "step": 1129 }, { "epoch": 1.0521415270018621, "grad_norm": 0.3319128769114891, "learning_rate": 1.4432562952742326e-05, "loss": 0.5014, "step": 1130 }, { "epoch": 1.053072625698324, "grad_norm": 0.311517404802985, "learning_rate": 1.442566402207658e-05, "loss": 0.4911, "step": 1131 }, { "epoch": 1.0540037243947857, "grad_norm": 0.3570061391034149, "learning_rate": 1.4418765091410834e-05, "loss": 0.5218, "step": 1132 }, { "epoch": 1.0549348230912476, "grad_norm": 0.33604460756200655, "learning_rate": 1.4411866160745085e-05, "loss": 0.511, "step": 1133 }, { "epoch": 1.0558659217877095, "grad_norm": 0.30712423151605317, "learning_rate": 1.4404967230079339e-05, "loss": 0.4989, "step": 1134 }, { "epoch": 1.0567970204841712, "grad_norm": 0.3483389785481719, "learning_rate": 1.4398068299413592e-05, "loss": 0.5063, "step": 1135 }, { "epoch": 1.0577281191806331, "grad_norm": 0.319771989209309, "learning_rate": 1.4391169368747846e-05, "loss": 0.5014, "step": 1136 }, { "epoch": 1.058659217877095, "grad_norm": 0.32980609654996046, "learning_rate": 1.4384270438082098e-05, "loss": 0.4982, "step": 1137 }, { "epoch": 1.0595903165735567, "grad_norm": 0.32609671287228365, "learning_rate": 1.437737150741635e-05, "loss": 0.5034, "step": 1138 }, { "epoch": 1.0605214152700186, "grad_norm": 0.31408025973401127, "learning_rate": 1.4370472576750605e-05, "loss": 0.5119, "step": 1139 }, { "epoch": 1.0614525139664805, "grad_norm": 0.2980938834959801, "learning_rate": 1.4363573646084859e-05, "loss": 0.5142, "step": 1140 }, { "epoch": 1.0623836126629422, "grad_norm": 0.33526483061996126, "learning_rate": 1.4356674715419113e-05, "loss": 0.5175, "step": 1141 }, { "epoch": 1.0633147113594041, "grad_norm": 0.33324134499956004, "learning_rate": 1.4349775784753363e-05, "loss": 0.5027, "step": 1142 }, { "epoch": 1.0642458100558658, "grad_norm": 0.3307394852811774, "learning_rate": 1.4342876854087618e-05, "loss": 0.472, "step": 1143 }, { "epoch": 1.0651769087523277, "grad_norm": 0.33781465660277954, "learning_rate": 1.4335977923421872e-05, "loss": 0.5406, "step": 1144 }, { "epoch": 1.0661080074487896, "grad_norm": 0.3231219509686651, "learning_rate": 1.4329078992756124e-05, "loss": 0.4959, "step": 1145 }, { "epoch": 1.0670391061452513, "grad_norm": 0.30777363405678876, "learning_rate": 1.4322180062090376e-05, "loss": 0.5099, "step": 1146 }, { "epoch": 1.0679702048417132, "grad_norm": 0.33007489109610344, "learning_rate": 1.431528113142463e-05, "loss": 0.4991, "step": 1147 }, { "epoch": 1.0689013035381751, "grad_norm": 0.3380176854070317, "learning_rate": 1.4308382200758883e-05, "loss": 0.5155, "step": 1148 }, { "epoch": 1.0698324022346368, "grad_norm": 0.3334749450280891, "learning_rate": 1.4301483270093137e-05, "loss": 0.5014, "step": 1149 }, { "epoch": 1.0707635009310987, "grad_norm": 0.31564294280290034, "learning_rate": 1.4294584339427391e-05, "loss": 0.5031, "step": 1150 }, { "epoch": 1.0716945996275604, "grad_norm": 0.34572985818878743, "learning_rate": 1.4287685408761642e-05, "loss": 0.49, "step": 1151 }, { "epoch": 1.0726256983240223, "grad_norm": 0.34415724810797876, "learning_rate": 1.4280786478095896e-05, "loss": 0.5041, "step": 1152 }, { "epoch": 1.0735567970204842, "grad_norm": 0.353007424375207, "learning_rate": 1.427388754743015e-05, "loss": 0.5314, "step": 1153 }, { "epoch": 1.074487895716946, "grad_norm": 0.3140432351311985, "learning_rate": 1.4266988616764404e-05, "loss": 0.5242, "step": 1154 }, { "epoch": 1.0754189944134078, "grad_norm": 0.36305041908150953, "learning_rate": 1.4260089686098655e-05, "loss": 0.4927, "step": 1155 }, { "epoch": 1.0763500931098697, "grad_norm": 0.3146654072083439, "learning_rate": 1.4253190755432909e-05, "loss": 0.5331, "step": 1156 }, { "epoch": 1.0772811918063314, "grad_norm": 0.34407536629630814, "learning_rate": 1.4246291824767161e-05, "loss": 0.506, "step": 1157 }, { "epoch": 1.0782122905027933, "grad_norm": 0.38296018763090445, "learning_rate": 1.4239392894101416e-05, "loss": 0.5247, "step": 1158 }, { "epoch": 1.0791433891992552, "grad_norm": 0.2997306914737791, "learning_rate": 1.423249396343567e-05, "loss": 0.4936, "step": 1159 }, { "epoch": 1.080074487895717, "grad_norm": 0.3302437754736737, "learning_rate": 1.422559503276992e-05, "loss": 0.5142, "step": 1160 }, { "epoch": 1.0810055865921788, "grad_norm": 0.361064386065145, "learning_rate": 1.4218696102104174e-05, "loss": 0.5039, "step": 1161 }, { "epoch": 1.0819366852886405, "grad_norm": 0.3383527455072968, "learning_rate": 1.4211797171438429e-05, "loss": 0.5363, "step": 1162 }, { "epoch": 1.0828677839851024, "grad_norm": 0.30876931222202075, "learning_rate": 1.4204898240772683e-05, "loss": 0.4957, "step": 1163 }, { "epoch": 1.0837988826815643, "grad_norm": 0.3188910174773383, "learning_rate": 1.4197999310106933e-05, "loss": 0.5068, "step": 1164 }, { "epoch": 1.084729981378026, "grad_norm": 0.36710722522146766, "learning_rate": 1.4191100379441187e-05, "loss": 0.4937, "step": 1165 }, { "epoch": 1.085661080074488, "grad_norm": 0.3099192718309035, "learning_rate": 1.4184201448775442e-05, "loss": 0.537, "step": 1166 }, { "epoch": 1.0865921787709498, "grad_norm": 0.32721331213765315, "learning_rate": 1.4177302518109694e-05, "loss": 0.4897, "step": 1167 }, { "epoch": 1.0875232774674115, "grad_norm": 0.34412831107912156, "learning_rate": 1.4170403587443948e-05, "loss": 0.4995, "step": 1168 }, { "epoch": 1.0884543761638734, "grad_norm": 0.29378888471963666, "learning_rate": 1.41635046567782e-05, "loss": 0.5049, "step": 1169 }, { "epoch": 1.089385474860335, "grad_norm": 0.3289286580825955, "learning_rate": 1.4156605726112453e-05, "loss": 0.4996, "step": 1170 }, { "epoch": 1.090316573556797, "grad_norm": 0.32999524430810623, "learning_rate": 1.4149706795446707e-05, "loss": 0.5356, "step": 1171 }, { "epoch": 1.091247672253259, "grad_norm": 0.30590174540962933, "learning_rate": 1.4142807864780961e-05, "loss": 0.5005, "step": 1172 }, { "epoch": 1.0921787709497206, "grad_norm": 0.33437467162476975, "learning_rate": 1.4135908934115212e-05, "loss": 0.486, "step": 1173 }, { "epoch": 1.0931098696461825, "grad_norm": 0.3206339118705933, "learning_rate": 1.4129010003449466e-05, "loss": 0.5174, "step": 1174 }, { "epoch": 1.0940409683426444, "grad_norm": 0.2966787735334988, "learning_rate": 1.412211107278372e-05, "loss": 0.5006, "step": 1175 }, { "epoch": 1.094972067039106, "grad_norm": 0.3662569464094714, "learning_rate": 1.4115212142117974e-05, "loss": 0.4963, "step": 1176 }, { "epoch": 1.095903165735568, "grad_norm": 0.3311236866300449, "learning_rate": 1.4108313211452226e-05, "loss": 0.5116, "step": 1177 }, { "epoch": 1.0968342644320297, "grad_norm": 0.3400140298806511, "learning_rate": 1.4101414280786479e-05, "loss": 0.5085, "step": 1178 }, { "epoch": 1.0977653631284916, "grad_norm": 0.38689295515456334, "learning_rate": 1.4094515350120731e-05, "loss": 0.485, "step": 1179 }, { "epoch": 1.0986964618249535, "grad_norm": 0.32914117164864315, "learning_rate": 1.4087616419454985e-05, "loss": 0.4986, "step": 1180 }, { "epoch": 1.0996275605214152, "grad_norm": 0.3114388146255812, "learning_rate": 1.408071748878924e-05, "loss": 0.5005, "step": 1181 }, { "epoch": 1.100558659217877, "grad_norm": 0.36528347697895, "learning_rate": 1.407381855812349e-05, "loss": 0.4992, "step": 1182 }, { "epoch": 1.101489757914339, "grad_norm": 0.377454622245841, "learning_rate": 1.4066919627457744e-05, "loss": 0.4937, "step": 1183 }, { "epoch": 1.1024208566108007, "grad_norm": 0.3228500287919699, "learning_rate": 1.4060020696791998e-05, "loss": 0.4948, "step": 1184 }, { "epoch": 1.1033519553072626, "grad_norm": 0.3454915994215628, "learning_rate": 1.4053121766126253e-05, "loss": 0.5017, "step": 1185 }, { "epoch": 1.1042830540037243, "grad_norm": 0.3472088146843883, "learning_rate": 1.4046222835460505e-05, "loss": 0.4735, "step": 1186 }, { "epoch": 1.1052141527001862, "grad_norm": 0.31442029300280777, "learning_rate": 1.4039323904794757e-05, "loss": 0.4737, "step": 1187 }, { "epoch": 1.106145251396648, "grad_norm": 0.3239287828939803, "learning_rate": 1.4032424974129011e-05, "loss": 0.4787, "step": 1188 }, { "epoch": 1.1070763500931098, "grad_norm": 0.3364851647442111, "learning_rate": 1.4025526043463264e-05, "loss": 0.5033, "step": 1189 }, { "epoch": 1.1080074487895717, "grad_norm": 0.3428084089249749, "learning_rate": 1.4018627112797518e-05, "loss": 0.4807, "step": 1190 }, { "epoch": 1.1089385474860336, "grad_norm": 0.31967592532638417, "learning_rate": 1.401172818213177e-05, "loss": 0.5009, "step": 1191 }, { "epoch": 1.1098696461824953, "grad_norm": 0.32315982322639125, "learning_rate": 1.4004829251466023e-05, "loss": 0.4855, "step": 1192 }, { "epoch": 1.1108007448789572, "grad_norm": 0.34295955008350754, "learning_rate": 1.3997930320800277e-05, "loss": 0.4894, "step": 1193 }, { "epoch": 1.111731843575419, "grad_norm": 0.34617247342082424, "learning_rate": 1.3991031390134531e-05, "loss": 0.4829, "step": 1194 }, { "epoch": 1.1126629422718808, "grad_norm": 0.3289561095722787, "learning_rate": 1.3984132459468785e-05, "loss": 0.5113, "step": 1195 }, { "epoch": 1.1135940409683427, "grad_norm": 0.30582078268836044, "learning_rate": 1.3977233528803036e-05, "loss": 0.4756, "step": 1196 }, { "epoch": 1.1145251396648044, "grad_norm": 0.31543267578174317, "learning_rate": 1.397033459813729e-05, "loss": 0.5206, "step": 1197 }, { "epoch": 1.1154562383612663, "grad_norm": 0.3086878625155308, "learning_rate": 1.3963435667471544e-05, "loss": 0.5123, "step": 1198 }, { "epoch": 1.1163873370577282, "grad_norm": 0.33098516936797884, "learning_rate": 1.3956536736805796e-05, "loss": 0.494, "step": 1199 }, { "epoch": 1.1173184357541899, "grad_norm": 0.3637004479075605, "learning_rate": 1.3949637806140049e-05, "loss": 0.5053, "step": 1200 }, { "epoch": 1.1182495344506518, "grad_norm": 0.32781672086284575, "learning_rate": 1.3942738875474301e-05, "loss": 0.5158, "step": 1201 }, { "epoch": 1.1191806331471137, "grad_norm": 0.3846100178066749, "learning_rate": 1.3935839944808555e-05, "loss": 0.5268, "step": 1202 }, { "epoch": 1.1201117318435754, "grad_norm": 0.3346895499959486, "learning_rate": 1.392894101414281e-05, "loss": 0.5022, "step": 1203 }, { "epoch": 1.1210428305400373, "grad_norm": 0.39077393463563287, "learning_rate": 1.3922042083477064e-05, "loss": 0.5216, "step": 1204 }, { "epoch": 1.121973929236499, "grad_norm": 0.34167425670308454, "learning_rate": 1.3915143152811314e-05, "loss": 0.4977, "step": 1205 }, { "epoch": 1.1229050279329609, "grad_norm": 0.3609854150551946, "learning_rate": 1.3908244222145568e-05, "loss": 0.5267, "step": 1206 }, { "epoch": 1.1238361266294228, "grad_norm": 0.3232479533588911, "learning_rate": 1.3901345291479822e-05, "loss": 0.5054, "step": 1207 }, { "epoch": 1.1247672253258845, "grad_norm": 0.35612597353724257, "learning_rate": 1.3894446360814075e-05, "loss": 0.4861, "step": 1208 }, { "epoch": 1.1256983240223464, "grad_norm": 0.3246455133539746, "learning_rate": 1.3887547430148327e-05, "loss": 0.5099, "step": 1209 }, { "epoch": 1.1266294227188083, "grad_norm": 0.36916168468041616, "learning_rate": 1.3880648499482581e-05, "loss": 0.511, "step": 1210 }, { "epoch": 1.12756052141527, "grad_norm": 0.33720608948953795, "learning_rate": 1.3873749568816834e-05, "loss": 0.5087, "step": 1211 }, { "epoch": 1.1284916201117319, "grad_norm": 0.2998686481097845, "learning_rate": 1.3866850638151088e-05, "loss": 0.4954, "step": 1212 }, { "epoch": 1.1294227188081938, "grad_norm": 0.32740195020873103, "learning_rate": 1.3859951707485342e-05, "loss": 0.4787, "step": 1213 }, { "epoch": 1.1303538175046555, "grad_norm": 0.30917026507593043, "learning_rate": 1.3853052776819593e-05, "loss": 0.5078, "step": 1214 }, { "epoch": 1.1312849162011174, "grad_norm": 0.3489900645826049, "learning_rate": 1.3846153846153847e-05, "loss": 0.5177, "step": 1215 }, { "epoch": 1.132216014897579, "grad_norm": 0.3237706950328776, "learning_rate": 1.3839254915488101e-05, "loss": 0.5094, "step": 1216 }, { "epoch": 1.133147113594041, "grad_norm": 0.3083887695566988, "learning_rate": 1.3832355984822355e-05, "loss": 0.4902, "step": 1217 }, { "epoch": 1.1340782122905029, "grad_norm": 0.3501369383066554, "learning_rate": 1.3825457054156606e-05, "loss": 0.5029, "step": 1218 }, { "epoch": 1.1350093109869646, "grad_norm": 0.35226470525907483, "learning_rate": 1.381855812349086e-05, "loss": 0.514, "step": 1219 }, { "epoch": 1.1359404096834265, "grad_norm": 0.32310892008330505, "learning_rate": 1.3811659192825114e-05, "loss": 0.5152, "step": 1220 }, { "epoch": 1.1368715083798882, "grad_norm": 0.324707557276515, "learning_rate": 1.3804760262159366e-05, "loss": 0.4929, "step": 1221 }, { "epoch": 1.13780260707635, "grad_norm": 0.3442415119031479, "learning_rate": 1.379786133149362e-05, "loss": 0.494, "step": 1222 }, { "epoch": 1.138733705772812, "grad_norm": 0.3221551085828865, "learning_rate": 1.3790962400827873e-05, "loss": 0.4834, "step": 1223 }, { "epoch": 1.1396648044692737, "grad_norm": 0.3294215287475715, "learning_rate": 1.3784063470162125e-05, "loss": 0.5047, "step": 1224 }, { "epoch": 1.1405959031657356, "grad_norm": 0.3263493659222321, "learning_rate": 1.377716453949638e-05, "loss": 0.5171, "step": 1225 }, { "epoch": 1.1415270018621975, "grad_norm": 0.32189951443105563, "learning_rate": 1.3770265608830633e-05, "loss": 0.503, "step": 1226 }, { "epoch": 1.1424581005586592, "grad_norm": 0.3412769320487267, "learning_rate": 1.3763366678164884e-05, "loss": 0.4877, "step": 1227 }, { "epoch": 1.143389199255121, "grad_norm": 0.2979717633586282, "learning_rate": 1.3756467747499138e-05, "loss": 0.5338, "step": 1228 }, { "epoch": 1.144320297951583, "grad_norm": 0.31657335671471276, "learning_rate": 1.3749568816833392e-05, "loss": 0.4779, "step": 1229 }, { "epoch": 1.1452513966480447, "grad_norm": 0.32917679125900917, "learning_rate": 1.3742669886167645e-05, "loss": 0.4946, "step": 1230 }, { "epoch": 1.1461824953445066, "grad_norm": 0.30566564736910823, "learning_rate": 1.3735770955501899e-05, "loss": 0.5076, "step": 1231 }, { "epoch": 1.1471135940409685, "grad_norm": 0.3229767790098733, "learning_rate": 1.3728872024836151e-05, "loss": 0.5004, "step": 1232 }, { "epoch": 1.1480446927374302, "grad_norm": 0.3259390432021766, "learning_rate": 1.3721973094170404e-05, "loss": 0.4932, "step": 1233 }, { "epoch": 1.148975791433892, "grad_norm": 0.32045081919633883, "learning_rate": 1.3715074163504658e-05, "loss": 0.5037, "step": 1234 }, { "epoch": 1.1499068901303537, "grad_norm": 0.3197630697083405, "learning_rate": 1.3708175232838912e-05, "loss": 0.512, "step": 1235 }, { "epoch": 1.1508379888268156, "grad_norm": 0.3158524420436749, "learning_rate": 1.3701276302173163e-05, "loss": 0.4917, "step": 1236 }, { "epoch": 1.1517690875232776, "grad_norm": 0.3067554686899198, "learning_rate": 1.3694377371507417e-05, "loss": 0.5051, "step": 1237 }, { "epoch": 1.1527001862197392, "grad_norm": 0.3426750055004949, "learning_rate": 1.368747844084167e-05, "loss": 0.5077, "step": 1238 }, { "epoch": 1.1536312849162011, "grad_norm": 0.3611933616300605, "learning_rate": 1.3680579510175925e-05, "loss": 0.5311, "step": 1239 }, { "epoch": 1.1545623836126628, "grad_norm": 0.3441879735474908, "learning_rate": 1.3673680579510177e-05, "loss": 0.5298, "step": 1240 }, { "epoch": 1.1554934823091247, "grad_norm": 0.3362734156497757, "learning_rate": 1.366678164884443e-05, "loss": 0.4959, "step": 1241 }, { "epoch": 1.1564245810055866, "grad_norm": 0.34002131564197113, "learning_rate": 1.3659882718178684e-05, "loss": 0.5022, "step": 1242 }, { "epoch": 1.1573556797020483, "grad_norm": 0.33863519891279, "learning_rate": 1.3652983787512936e-05, "loss": 0.513, "step": 1243 }, { "epoch": 1.1582867783985102, "grad_norm": 0.3731417178741631, "learning_rate": 1.364608485684719e-05, "loss": 0.5197, "step": 1244 }, { "epoch": 1.1592178770949721, "grad_norm": 0.3076325341636071, "learning_rate": 1.3639185926181443e-05, "loss": 0.5043, "step": 1245 }, { "epoch": 1.1601489757914338, "grad_norm": 0.3761199709010273, "learning_rate": 1.3632286995515695e-05, "loss": 0.5297, "step": 1246 }, { "epoch": 1.1610800744878957, "grad_norm": 0.3133228688723319, "learning_rate": 1.362538806484995e-05, "loss": 0.541, "step": 1247 }, { "epoch": 1.1620111731843576, "grad_norm": 0.320814949333145, "learning_rate": 1.3618489134184203e-05, "loss": 0.5064, "step": 1248 }, { "epoch": 1.1629422718808193, "grad_norm": 0.31776825522929875, "learning_rate": 1.3611590203518457e-05, "loss": 0.4813, "step": 1249 }, { "epoch": 1.1638733705772812, "grad_norm": 0.29539232687987804, "learning_rate": 1.3604691272852708e-05, "loss": 0.5305, "step": 1250 }, { "epoch": 1.164804469273743, "grad_norm": 0.3372431403409422, "learning_rate": 1.3597792342186962e-05, "loss": 0.5339, "step": 1251 }, { "epoch": 1.1657355679702048, "grad_norm": 0.3016010410499711, "learning_rate": 1.3590893411521215e-05, "loss": 0.4744, "step": 1252 }, { "epoch": 1.1666666666666667, "grad_norm": 0.3398380711801422, "learning_rate": 1.3583994480855469e-05, "loss": 0.5029, "step": 1253 }, { "epoch": 1.1675977653631284, "grad_norm": 0.32084830157434163, "learning_rate": 1.3577095550189723e-05, "loss": 0.5213, "step": 1254 }, { "epoch": 1.1685288640595903, "grad_norm": 0.32628680823413175, "learning_rate": 1.3570196619523974e-05, "loss": 0.4984, "step": 1255 }, { "epoch": 1.169459962756052, "grad_norm": 0.34304326902838234, "learning_rate": 1.3563297688858228e-05, "loss": 0.5179, "step": 1256 }, { "epoch": 1.170391061452514, "grad_norm": 0.33094818360556283, "learning_rate": 1.3556398758192482e-05, "loss": 0.5267, "step": 1257 }, { "epoch": 1.1713221601489758, "grad_norm": 0.3584355375605901, "learning_rate": 1.3549499827526736e-05, "loss": 0.5151, "step": 1258 }, { "epoch": 1.1722532588454375, "grad_norm": 0.34968297303643675, "learning_rate": 1.3542600896860987e-05, "loss": 0.498, "step": 1259 }, { "epoch": 1.1731843575418994, "grad_norm": 0.3294463813231704, "learning_rate": 1.353570196619524e-05, "loss": 0.5003, "step": 1260 }, { "epoch": 1.1741154562383613, "grad_norm": 0.32757852067628845, "learning_rate": 1.3528803035529495e-05, "loss": 0.4909, "step": 1261 }, { "epoch": 1.175046554934823, "grad_norm": 0.3365213956149654, "learning_rate": 1.3521904104863747e-05, "loss": 0.4856, "step": 1262 }, { "epoch": 1.175977653631285, "grad_norm": 0.3341489571294647, "learning_rate": 1.3515005174198001e-05, "loss": 0.5254, "step": 1263 }, { "epoch": 1.1769087523277468, "grad_norm": 0.33428107126525136, "learning_rate": 1.3508106243532254e-05, "loss": 0.5085, "step": 1264 }, { "epoch": 1.1778398510242085, "grad_norm": 0.31987735366243447, "learning_rate": 1.3501207312866506e-05, "loss": 0.4902, "step": 1265 }, { "epoch": 1.1787709497206704, "grad_norm": 0.32588230520182804, "learning_rate": 1.349430838220076e-05, "loss": 0.5095, "step": 1266 }, { "epoch": 1.1797020484171323, "grad_norm": 0.34045724519890963, "learning_rate": 1.3487409451535014e-05, "loss": 0.4783, "step": 1267 }, { "epoch": 1.180633147113594, "grad_norm": 0.29105376396145766, "learning_rate": 1.3480510520869265e-05, "loss": 0.4817, "step": 1268 }, { "epoch": 1.181564245810056, "grad_norm": 0.35587288425327407, "learning_rate": 1.3473611590203519e-05, "loss": 0.4985, "step": 1269 }, { "epoch": 1.1824953445065176, "grad_norm": 0.3270749519249929, "learning_rate": 1.3466712659537773e-05, "loss": 0.4993, "step": 1270 }, { "epoch": 1.1834264432029795, "grad_norm": 0.3437090209026815, "learning_rate": 1.3459813728872027e-05, "loss": 0.4847, "step": 1271 }, { "epoch": 1.1843575418994414, "grad_norm": 0.3481077380780853, "learning_rate": 1.345291479820628e-05, "loss": 0.4986, "step": 1272 }, { "epoch": 1.185288640595903, "grad_norm": 0.3194669297410237, "learning_rate": 1.3446015867540532e-05, "loss": 0.5063, "step": 1273 }, { "epoch": 1.186219739292365, "grad_norm": 0.31586133678345796, "learning_rate": 1.3439116936874785e-05, "loss": 0.5131, "step": 1274 }, { "epoch": 1.1871508379888267, "grad_norm": 0.3418531381911588, "learning_rate": 1.3432218006209039e-05, "loss": 0.4867, "step": 1275 }, { "epoch": 1.1880819366852886, "grad_norm": 0.31991826793306605, "learning_rate": 1.3425319075543293e-05, "loss": 0.4702, "step": 1276 }, { "epoch": 1.1890130353817505, "grad_norm": 0.3306898886379889, "learning_rate": 1.3418420144877543e-05, "loss": 0.5447, "step": 1277 }, { "epoch": 1.1899441340782122, "grad_norm": 0.34317713629968044, "learning_rate": 1.3411521214211798e-05, "loss": 0.4831, "step": 1278 }, { "epoch": 1.190875232774674, "grad_norm": 0.35461317459334213, "learning_rate": 1.3404622283546052e-05, "loss": 0.5155, "step": 1279 }, { "epoch": 1.191806331471136, "grad_norm": 0.3247181552165596, "learning_rate": 1.3397723352880306e-05, "loss": 0.5224, "step": 1280 }, { "epoch": 1.1927374301675977, "grad_norm": 0.32000419681022924, "learning_rate": 1.3390824422214558e-05, "loss": 0.506, "step": 1281 }, { "epoch": 1.1936685288640596, "grad_norm": 0.333766523199072, "learning_rate": 1.338392549154881e-05, "loss": 0.5103, "step": 1282 }, { "epoch": 1.1945996275605215, "grad_norm": 0.3344663852145038, "learning_rate": 1.3377026560883065e-05, "loss": 0.5122, "step": 1283 }, { "epoch": 1.1955307262569832, "grad_norm": 0.33053414233045625, "learning_rate": 1.3370127630217317e-05, "loss": 0.5102, "step": 1284 }, { "epoch": 1.196461824953445, "grad_norm": 0.39072377460480345, "learning_rate": 1.3363228699551571e-05, "loss": 0.5027, "step": 1285 }, { "epoch": 1.197392923649907, "grad_norm": 0.3081421064984111, "learning_rate": 1.3356329768885824e-05, "loss": 0.5244, "step": 1286 }, { "epoch": 1.1983240223463687, "grad_norm": 0.34197544737724545, "learning_rate": 1.3349430838220076e-05, "loss": 0.4765, "step": 1287 }, { "epoch": 1.1992551210428306, "grad_norm": 0.3258606195229087, "learning_rate": 1.334253190755433e-05, "loss": 0.4865, "step": 1288 }, { "epoch": 1.2001862197392923, "grad_norm": 0.3730566247659233, "learning_rate": 1.3335632976888584e-05, "loss": 0.4895, "step": 1289 }, { "epoch": 1.2011173184357542, "grad_norm": 0.3229304292936373, "learning_rate": 1.3328734046222838e-05, "loss": 0.4947, "step": 1290 }, { "epoch": 1.202048417132216, "grad_norm": 0.3431200271583498, "learning_rate": 1.3321835115557089e-05, "loss": 0.5128, "step": 1291 }, { "epoch": 1.2029795158286778, "grad_norm": 0.3416244986094625, "learning_rate": 1.3314936184891343e-05, "loss": 0.4974, "step": 1292 }, { "epoch": 1.2039106145251397, "grad_norm": 0.32170426221040066, "learning_rate": 1.3308037254225597e-05, "loss": 0.4943, "step": 1293 }, { "epoch": 1.2048417132216014, "grad_norm": 0.3128153233905717, "learning_rate": 1.330113832355985e-05, "loss": 0.513, "step": 1294 }, { "epoch": 1.2057728119180633, "grad_norm": 0.3286559966779193, "learning_rate": 1.3294239392894102e-05, "loss": 0.4933, "step": 1295 }, { "epoch": 1.2067039106145252, "grad_norm": 0.32235329249137584, "learning_rate": 1.3287340462228356e-05, "loss": 0.4893, "step": 1296 }, { "epoch": 1.2076350093109869, "grad_norm": 0.3407983381895898, "learning_rate": 1.3280441531562609e-05, "loss": 0.5149, "step": 1297 }, { "epoch": 1.2085661080074488, "grad_norm": 0.29423653515313714, "learning_rate": 1.3273542600896863e-05, "loss": 0.4936, "step": 1298 }, { "epoch": 1.2094972067039107, "grad_norm": 0.3232338194067745, "learning_rate": 1.3266643670231117e-05, "loss": 0.4892, "step": 1299 }, { "epoch": 1.2104283054003724, "grad_norm": 0.3010298195629113, "learning_rate": 1.3259744739565367e-05, "loss": 0.5016, "step": 1300 }, { "epoch": 1.2113594040968343, "grad_norm": 0.32941330721920414, "learning_rate": 1.3252845808899622e-05, "loss": 0.5129, "step": 1301 }, { "epoch": 1.2122905027932962, "grad_norm": 0.3288832009611037, "learning_rate": 1.3245946878233876e-05, "loss": 0.5028, "step": 1302 }, { "epoch": 1.2132216014897579, "grad_norm": 0.3131078134972568, "learning_rate": 1.3239047947568128e-05, "loss": 0.5091, "step": 1303 }, { "epoch": 1.2141527001862198, "grad_norm": 0.330521377738156, "learning_rate": 1.323214901690238e-05, "loss": 0.484, "step": 1304 }, { "epoch": 1.2150837988826815, "grad_norm": 0.29827461260366095, "learning_rate": 1.3225250086236635e-05, "loss": 0.5299, "step": 1305 }, { "epoch": 1.2160148975791434, "grad_norm": 0.31986677865853486, "learning_rate": 1.3218351155570887e-05, "loss": 0.5038, "step": 1306 }, { "epoch": 1.2169459962756053, "grad_norm": 0.33509669448544965, "learning_rate": 1.3211452224905141e-05, "loss": 0.5098, "step": 1307 }, { "epoch": 1.217877094972067, "grad_norm": 0.3275630636982507, "learning_rate": 1.3204553294239395e-05, "loss": 0.5044, "step": 1308 }, { "epoch": 1.2188081936685289, "grad_norm": 0.3083386101473164, "learning_rate": 1.3197654363573646e-05, "loss": 0.5049, "step": 1309 }, { "epoch": 1.2197392923649906, "grad_norm": 0.3518093113863356, "learning_rate": 1.31907554329079e-05, "loss": 0.5345, "step": 1310 }, { "epoch": 1.2206703910614525, "grad_norm": 0.32674494460787445, "learning_rate": 1.3183856502242154e-05, "loss": 0.5127, "step": 1311 }, { "epoch": 1.2216014897579144, "grad_norm": 0.32786943259283813, "learning_rate": 1.3176957571576408e-05, "loss": 0.4813, "step": 1312 }, { "epoch": 1.222532588454376, "grad_norm": 0.3016265684182032, "learning_rate": 1.3170058640910659e-05, "loss": 0.4975, "step": 1313 }, { "epoch": 1.223463687150838, "grad_norm": 0.3385258975544585, "learning_rate": 1.3163159710244913e-05, "loss": 0.5168, "step": 1314 }, { "epoch": 1.2243947858472999, "grad_norm": 0.30272517628452156, "learning_rate": 1.3156260779579167e-05, "loss": 0.4999, "step": 1315 }, { "epoch": 1.2253258845437616, "grad_norm": 0.3344735408675697, "learning_rate": 1.314936184891342e-05, "loss": 0.5233, "step": 1316 }, { "epoch": 1.2262569832402235, "grad_norm": 0.3032533661939453, "learning_rate": 1.3142462918247674e-05, "loss": 0.5023, "step": 1317 }, { "epoch": 1.2271880819366854, "grad_norm": 0.29530114638201554, "learning_rate": 1.3135563987581926e-05, "loss": 0.5008, "step": 1318 }, { "epoch": 1.228119180633147, "grad_norm": 0.32349677842377617, "learning_rate": 1.3128665056916178e-05, "loss": 0.5193, "step": 1319 }, { "epoch": 1.229050279329609, "grad_norm": 0.31406499472224336, "learning_rate": 1.3121766126250433e-05, "loss": 0.5153, "step": 1320 }, { "epoch": 1.2299813780260709, "grad_norm": 0.3009477902858821, "learning_rate": 1.3114867195584687e-05, "loss": 0.4995, "step": 1321 }, { "epoch": 1.2309124767225326, "grad_norm": 0.3199570243340866, "learning_rate": 1.3107968264918937e-05, "loss": 0.5139, "step": 1322 }, { "epoch": 1.2318435754189945, "grad_norm": 0.31879402526384637, "learning_rate": 1.3101069334253191e-05, "loss": 0.5108, "step": 1323 }, { "epoch": 1.2327746741154562, "grad_norm": 0.3455817759694552, "learning_rate": 1.3094170403587446e-05, "loss": 0.4798, "step": 1324 }, { "epoch": 1.233705772811918, "grad_norm": 0.300500804267792, "learning_rate": 1.3087271472921698e-05, "loss": 0.4783, "step": 1325 }, { "epoch": 1.23463687150838, "grad_norm": 0.32286664339424487, "learning_rate": 1.3080372542255952e-05, "loss": 0.5103, "step": 1326 }, { "epoch": 1.2355679702048417, "grad_norm": 0.3350087332551326, "learning_rate": 1.3073473611590204e-05, "loss": 0.5183, "step": 1327 }, { "epoch": 1.2364990689013036, "grad_norm": 0.29788756971628405, "learning_rate": 1.3066574680924457e-05, "loss": 0.4926, "step": 1328 }, { "epoch": 1.2374301675977653, "grad_norm": 0.3319382130049833, "learning_rate": 1.3059675750258711e-05, "loss": 0.5128, "step": 1329 }, { "epoch": 1.2383612662942272, "grad_norm": 0.33533872142823556, "learning_rate": 1.3052776819592965e-05, "loss": 0.5128, "step": 1330 }, { "epoch": 1.239292364990689, "grad_norm": 0.33317220361357947, "learning_rate": 1.3045877888927216e-05, "loss": 0.498, "step": 1331 }, { "epoch": 1.2402234636871508, "grad_norm": 0.31349866056764475, "learning_rate": 1.303897895826147e-05, "loss": 0.4929, "step": 1332 }, { "epoch": 1.2411545623836127, "grad_norm": 0.32178823725679123, "learning_rate": 1.3032080027595724e-05, "loss": 0.5081, "step": 1333 }, { "epoch": 1.2420856610800746, "grad_norm": 0.3247566004220211, "learning_rate": 1.3025181096929978e-05, "loss": 0.5257, "step": 1334 }, { "epoch": 1.2430167597765363, "grad_norm": 0.302394178853454, "learning_rate": 1.301828216626423e-05, "loss": 0.4929, "step": 1335 }, { "epoch": 1.2439478584729982, "grad_norm": 0.31965895257345, "learning_rate": 1.3011383235598483e-05, "loss": 0.4914, "step": 1336 }, { "epoch": 1.24487895716946, "grad_norm": 0.3222191213807062, "learning_rate": 1.3004484304932737e-05, "loss": 0.5032, "step": 1337 }, { "epoch": 1.2458100558659218, "grad_norm": 0.30262153160022004, "learning_rate": 1.299758537426699e-05, "loss": 0.4963, "step": 1338 }, { "epoch": 1.2467411545623837, "grad_norm": 0.33551256013396985, "learning_rate": 1.2990686443601244e-05, "loss": 0.5169, "step": 1339 }, { "epoch": 1.2476722532588453, "grad_norm": 0.29064848534855786, "learning_rate": 1.2983787512935496e-05, "loss": 0.4879, "step": 1340 }, { "epoch": 1.2486033519553073, "grad_norm": 0.32698474902319435, "learning_rate": 1.2976888582269748e-05, "loss": 0.5133, "step": 1341 }, { "epoch": 1.2495344506517692, "grad_norm": 0.33035076163337, "learning_rate": 1.2969989651604002e-05, "loss": 0.5025, "step": 1342 }, { "epoch": 1.2504655493482308, "grad_norm": 0.2925146509241728, "learning_rate": 1.2963090720938257e-05, "loss": 0.4825, "step": 1343 }, { "epoch": 1.2513966480446927, "grad_norm": 0.30308541075925166, "learning_rate": 1.295619179027251e-05, "loss": 0.4689, "step": 1344 }, { "epoch": 1.2523277467411544, "grad_norm": 0.2896215481798661, "learning_rate": 1.2949292859606761e-05, "loss": 0.4878, "step": 1345 }, { "epoch": 1.2532588454376163, "grad_norm": 0.31237140350893966, "learning_rate": 1.2942393928941015e-05, "loss": 0.4879, "step": 1346 }, { "epoch": 1.2541899441340782, "grad_norm": 0.32750598858568564, "learning_rate": 1.2935494998275268e-05, "loss": 0.5207, "step": 1347 }, { "epoch": 1.25512104283054, "grad_norm": 0.3142593935115795, "learning_rate": 1.2928596067609522e-05, "loss": 0.5119, "step": 1348 }, { "epoch": 1.2560521415270018, "grad_norm": 0.3279259396584536, "learning_rate": 1.2921697136943774e-05, "loss": 0.5094, "step": 1349 }, { "epoch": 1.2569832402234637, "grad_norm": 0.31944842391648504, "learning_rate": 1.2914798206278027e-05, "loss": 0.474, "step": 1350 }, { "epoch": 1.2579143389199254, "grad_norm": 0.2977131453963581, "learning_rate": 1.2907899275612281e-05, "loss": 0.4865, "step": 1351 }, { "epoch": 1.2588454376163873, "grad_norm": 0.3331898189770233, "learning_rate": 1.2901000344946535e-05, "loss": 0.4981, "step": 1352 }, { "epoch": 1.2597765363128492, "grad_norm": 0.3343936006496151, "learning_rate": 1.2894101414280789e-05, "loss": 0.5212, "step": 1353 }, { "epoch": 1.260707635009311, "grad_norm": 0.3073547491388362, "learning_rate": 1.288720248361504e-05, "loss": 0.4882, "step": 1354 }, { "epoch": 1.2616387337057728, "grad_norm": 0.3529389780186055, "learning_rate": 1.2880303552949294e-05, "loss": 0.4725, "step": 1355 }, { "epoch": 1.2625698324022347, "grad_norm": 0.2993715222854216, "learning_rate": 1.2873404622283548e-05, "loss": 0.4636, "step": 1356 }, { "epoch": 1.2635009310986964, "grad_norm": 0.3131594188349075, "learning_rate": 1.28665056916178e-05, "loss": 0.4926, "step": 1357 }, { "epoch": 1.2644320297951583, "grad_norm": 0.3253546383141496, "learning_rate": 1.2859606760952053e-05, "loss": 0.5007, "step": 1358 }, { "epoch": 1.2653631284916202, "grad_norm": 0.29222367416025785, "learning_rate": 1.2852707830286307e-05, "loss": 0.4838, "step": 1359 }, { "epoch": 1.266294227188082, "grad_norm": 0.2995234802290853, "learning_rate": 1.284580889962056e-05, "loss": 0.4924, "step": 1360 }, { "epoch": 1.2672253258845438, "grad_norm": 0.29553758441746514, "learning_rate": 1.2838909968954813e-05, "loss": 0.481, "step": 1361 }, { "epoch": 1.2681564245810055, "grad_norm": 0.29918911254679975, "learning_rate": 1.2832011038289068e-05, "loss": 0.502, "step": 1362 }, { "epoch": 1.2690875232774674, "grad_norm": 0.32471742996069075, "learning_rate": 1.2825112107623318e-05, "loss": 0.5246, "step": 1363 }, { "epoch": 1.2700186219739291, "grad_norm": 0.29881972380829175, "learning_rate": 1.2818213176957572e-05, "loss": 0.4962, "step": 1364 }, { "epoch": 1.270949720670391, "grad_norm": 0.31735993354027564, "learning_rate": 1.2811314246291826e-05, "loss": 0.5245, "step": 1365 }, { "epoch": 1.271880819366853, "grad_norm": 0.3085927634936413, "learning_rate": 1.280441531562608e-05, "loss": 0.5108, "step": 1366 }, { "epoch": 1.2728119180633146, "grad_norm": 0.3002256582811191, "learning_rate": 1.2797516384960331e-05, "loss": 0.4911, "step": 1367 }, { "epoch": 1.2737430167597765, "grad_norm": 0.3181499779760917, "learning_rate": 1.2790617454294585e-05, "loss": 0.5125, "step": 1368 }, { "epoch": 1.2746741154562384, "grad_norm": 0.3060153162544509, "learning_rate": 1.2783718523628838e-05, "loss": 0.479, "step": 1369 }, { "epoch": 1.2756052141527001, "grad_norm": 0.35280623962628455, "learning_rate": 1.2776819592963092e-05, "loss": 0.5084, "step": 1370 }, { "epoch": 1.276536312849162, "grad_norm": 0.36472648500665944, "learning_rate": 1.2769920662297346e-05, "loss": 0.496, "step": 1371 }, { "epoch": 1.277467411545624, "grad_norm": 0.31660519220388156, "learning_rate": 1.2763021731631597e-05, "loss": 0.5104, "step": 1372 }, { "epoch": 1.2783985102420856, "grad_norm": 0.3485816032374276, "learning_rate": 1.275612280096585e-05, "loss": 0.4936, "step": 1373 }, { "epoch": 1.2793296089385475, "grad_norm": 0.35366881003195116, "learning_rate": 1.2749223870300105e-05, "loss": 0.4906, "step": 1374 }, { "epoch": 1.2802607076350094, "grad_norm": 0.2970176973261587, "learning_rate": 1.2742324939634359e-05, "loss": 0.5085, "step": 1375 }, { "epoch": 1.2811918063314711, "grad_norm": 0.33010066301731467, "learning_rate": 1.273542600896861e-05, "loss": 0.4809, "step": 1376 }, { "epoch": 1.282122905027933, "grad_norm": 0.36734839163118704, "learning_rate": 1.2728527078302864e-05, "loss": 0.524, "step": 1377 }, { "epoch": 1.2830540037243947, "grad_norm": 0.31232106481039645, "learning_rate": 1.2721628147637118e-05, "loss": 0.5076, "step": 1378 }, { "epoch": 1.2839851024208566, "grad_norm": 0.3468481915010924, "learning_rate": 1.271472921697137e-05, "loss": 0.5071, "step": 1379 }, { "epoch": 1.2849162011173183, "grad_norm": 0.3234565673469789, "learning_rate": 1.2707830286305624e-05, "loss": 0.4891, "step": 1380 }, { "epoch": 1.2858472998137802, "grad_norm": 0.2967821517644958, "learning_rate": 1.2700931355639877e-05, "loss": 0.474, "step": 1381 }, { "epoch": 1.2867783985102421, "grad_norm": 0.33604252376429183, "learning_rate": 1.269403242497413e-05, "loss": 0.5201, "step": 1382 }, { "epoch": 1.2877094972067038, "grad_norm": 0.3004869136326838, "learning_rate": 1.2687133494308383e-05, "loss": 0.5005, "step": 1383 }, { "epoch": 1.2886405959031657, "grad_norm": 0.32317756334658243, "learning_rate": 1.2680234563642637e-05, "loss": 0.493, "step": 1384 }, { "epoch": 1.2895716945996276, "grad_norm": 0.30752184628496493, "learning_rate": 1.2673335632976888e-05, "loss": 0.5182, "step": 1385 }, { "epoch": 1.2905027932960893, "grad_norm": 0.31597613973683075, "learning_rate": 1.2666436702311142e-05, "loss": 0.5098, "step": 1386 }, { "epoch": 1.2914338919925512, "grad_norm": 0.33279915863121995, "learning_rate": 1.2659537771645396e-05, "loss": 0.4891, "step": 1387 }, { "epoch": 1.2923649906890131, "grad_norm": 0.3133430202299044, "learning_rate": 1.265263884097965e-05, "loss": 0.502, "step": 1388 }, { "epoch": 1.2932960893854748, "grad_norm": 0.3162896148211173, "learning_rate": 1.2645739910313903e-05, "loss": 0.5259, "step": 1389 }, { "epoch": 1.2942271880819367, "grad_norm": 0.31381683821719414, "learning_rate": 1.2638840979648155e-05, "loss": 0.5093, "step": 1390 }, { "epoch": 1.2951582867783986, "grad_norm": 0.3126786644499965, "learning_rate": 1.263194204898241e-05, "loss": 0.5349, "step": 1391 }, { "epoch": 1.2960893854748603, "grad_norm": 0.3477391021538575, "learning_rate": 1.2625043118316662e-05, "loss": 0.476, "step": 1392 }, { "epoch": 1.2970204841713222, "grad_norm": 0.3432765216333402, "learning_rate": 1.2618144187650916e-05, "loss": 0.488, "step": 1393 }, { "epoch": 1.2979515828677841, "grad_norm": 0.31105638462462915, "learning_rate": 1.2611245256985167e-05, "loss": 0.4807, "step": 1394 }, { "epoch": 1.2988826815642458, "grad_norm": 0.33657555767210207, "learning_rate": 1.260434632631942e-05, "loss": 0.4816, "step": 1395 }, { "epoch": 1.2998137802607077, "grad_norm": 0.4641690284696664, "learning_rate": 1.2597447395653675e-05, "loss": 0.5311, "step": 1396 }, { "epoch": 1.3007448789571694, "grad_norm": 0.3026657960222455, "learning_rate": 1.2590548464987929e-05, "loss": 0.5113, "step": 1397 }, { "epoch": 1.3016759776536313, "grad_norm": 0.34482488955669793, "learning_rate": 1.2583649534322181e-05, "loss": 0.4996, "step": 1398 }, { "epoch": 1.302607076350093, "grad_norm": 0.4135599429140351, "learning_rate": 1.2576750603656434e-05, "loss": 0.5135, "step": 1399 }, { "epoch": 1.303538175046555, "grad_norm": 0.3310741751432443, "learning_rate": 1.2569851672990688e-05, "loss": 0.489, "step": 1400 }, { "epoch": 1.3044692737430168, "grad_norm": 0.3846540922637821, "learning_rate": 1.256295274232494e-05, "loss": 0.5106, "step": 1401 }, { "epoch": 1.3054003724394785, "grad_norm": 0.3361253602183268, "learning_rate": 1.2556053811659194e-05, "loss": 0.4917, "step": 1402 }, { "epoch": 1.3063314711359404, "grad_norm": 0.3494194370320359, "learning_rate": 1.2549154880993447e-05, "loss": 0.4627, "step": 1403 }, { "epoch": 1.3072625698324023, "grad_norm": 0.3476339753211144, "learning_rate": 1.2542255950327699e-05, "loss": 0.4876, "step": 1404 }, { "epoch": 1.308193668528864, "grad_norm": 0.3297228136613133, "learning_rate": 1.2535357019661953e-05, "loss": 0.5231, "step": 1405 }, { "epoch": 1.309124767225326, "grad_norm": 0.4108525615186804, "learning_rate": 1.2528458088996207e-05, "loss": 0.4643, "step": 1406 }, { "epoch": 1.3100558659217878, "grad_norm": 0.3228924312330178, "learning_rate": 1.2521559158330461e-05, "loss": 0.4965, "step": 1407 }, { "epoch": 1.3109869646182495, "grad_norm": 0.3716524519666692, "learning_rate": 1.2514660227664712e-05, "loss": 0.4958, "step": 1408 }, { "epoch": 1.3119180633147114, "grad_norm": 0.3472043112861765, "learning_rate": 1.2507761296998966e-05, "loss": 0.4837, "step": 1409 }, { "epoch": 1.3128491620111733, "grad_norm": 0.3510828948048121, "learning_rate": 1.250086236633322e-05, "loss": 0.5221, "step": 1410 }, { "epoch": 1.313780260707635, "grad_norm": 0.35645912023642196, "learning_rate": 1.2493963435667473e-05, "loss": 0.5079, "step": 1411 }, { "epoch": 1.314711359404097, "grad_norm": 0.37268052847190525, "learning_rate": 1.2487064505001725e-05, "loss": 0.5113, "step": 1412 }, { "epoch": 1.3156424581005586, "grad_norm": 0.38094692729362395, "learning_rate": 1.248016557433598e-05, "loss": 0.5076, "step": 1413 }, { "epoch": 1.3165735567970205, "grad_norm": 0.33091217395396794, "learning_rate": 1.2473266643670232e-05, "loss": 0.5072, "step": 1414 }, { "epoch": 1.3175046554934824, "grad_norm": 0.333665552915627, "learning_rate": 1.2466367713004486e-05, "loss": 0.5311, "step": 1415 }, { "epoch": 1.318435754189944, "grad_norm": 0.3648921123737715, "learning_rate": 1.245946878233874e-05, "loss": 0.5011, "step": 1416 }, { "epoch": 1.319366852886406, "grad_norm": 0.31662495322238426, "learning_rate": 1.245256985167299e-05, "loss": 0.5081, "step": 1417 }, { "epoch": 1.3202979515828677, "grad_norm": 0.29861384571143557, "learning_rate": 1.2445670921007245e-05, "loss": 0.5105, "step": 1418 }, { "epoch": 1.3212290502793296, "grad_norm": 0.3999660793774277, "learning_rate": 1.2438771990341499e-05, "loss": 0.5115, "step": 1419 }, { "epoch": 1.3221601489757915, "grad_norm": 0.3178558002826316, "learning_rate": 1.2431873059675751e-05, "loss": 0.4848, "step": 1420 }, { "epoch": 1.3230912476722532, "grad_norm": 0.35240947393539634, "learning_rate": 1.2424974129010004e-05, "loss": 0.4872, "step": 1421 }, { "epoch": 1.324022346368715, "grad_norm": 0.35909949144769276, "learning_rate": 1.2418075198344258e-05, "loss": 0.5104, "step": 1422 }, { "epoch": 1.324953445065177, "grad_norm": 0.38338129738738846, "learning_rate": 1.241117626767851e-05, "loss": 0.515, "step": 1423 }, { "epoch": 1.3258845437616387, "grad_norm": 0.31173507155662233, "learning_rate": 1.2404277337012764e-05, "loss": 0.4906, "step": 1424 }, { "epoch": 1.3268156424581006, "grad_norm": 0.29623505641053355, "learning_rate": 1.2397378406347018e-05, "loss": 0.4675, "step": 1425 }, { "epoch": 1.3277467411545625, "grad_norm": 0.3315787334362606, "learning_rate": 1.2390479475681269e-05, "loss": 0.5316, "step": 1426 }, { "epoch": 1.3286778398510242, "grad_norm": 0.3025779940495458, "learning_rate": 1.2383580545015523e-05, "loss": 0.4649, "step": 1427 }, { "epoch": 1.329608938547486, "grad_norm": 0.30077706291283224, "learning_rate": 1.2376681614349777e-05, "loss": 0.5175, "step": 1428 }, { "epoch": 1.330540037243948, "grad_norm": 0.3224644700773527, "learning_rate": 1.2369782683684031e-05, "loss": 0.4869, "step": 1429 }, { "epoch": 1.3314711359404097, "grad_norm": 0.3147652989559827, "learning_rate": 1.2362883753018282e-05, "loss": 0.4728, "step": 1430 }, { "epoch": 1.3324022346368716, "grad_norm": 0.2978247891424876, "learning_rate": 1.2355984822352536e-05, "loss": 0.5046, "step": 1431 }, { "epoch": 1.3333333333333333, "grad_norm": 0.32156936372938183, "learning_rate": 1.234908589168679e-05, "loss": 0.5053, "step": 1432 }, { "epoch": 1.3342644320297952, "grad_norm": 0.322534865174073, "learning_rate": 1.2342186961021043e-05, "loss": 0.5092, "step": 1433 }, { "epoch": 1.3351955307262569, "grad_norm": 0.32539816921274844, "learning_rate": 1.2335288030355297e-05, "loss": 0.4749, "step": 1434 }, { "epoch": 1.3361266294227188, "grad_norm": 0.31181188728606696, "learning_rate": 1.2328389099689549e-05, "loss": 0.5035, "step": 1435 }, { "epoch": 1.3370577281191807, "grad_norm": 0.3147691534970954, "learning_rate": 1.2321490169023802e-05, "loss": 0.5123, "step": 1436 }, { "epoch": 1.3379888268156424, "grad_norm": 0.3772671108644651, "learning_rate": 1.2314591238358056e-05, "loss": 0.4978, "step": 1437 }, { "epoch": 1.3389199255121043, "grad_norm": 0.3251980035000102, "learning_rate": 1.230769230769231e-05, "loss": 0.5159, "step": 1438 }, { "epoch": 1.3398510242085662, "grad_norm": 0.3165805423302997, "learning_rate": 1.230079337702656e-05, "loss": 0.4914, "step": 1439 }, { "epoch": 1.3407821229050279, "grad_norm": 0.3073653577723417, "learning_rate": 1.2293894446360815e-05, "loss": 0.4973, "step": 1440 }, { "epoch": 1.3417132216014898, "grad_norm": 0.3175801708977407, "learning_rate": 1.2286995515695069e-05, "loss": 0.5032, "step": 1441 }, { "epoch": 1.3426443202979517, "grad_norm": 0.3210293113211401, "learning_rate": 1.2280096585029321e-05, "loss": 0.5074, "step": 1442 }, { "epoch": 1.3435754189944134, "grad_norm": 0.32361127313381666, "learning_rate": 1.2273197654363575e-05, "loss": 0.5227, "step": 1443 }, { "epoch": 1.3445065176908753, "grad_norm": 0.3062254893275078, "learning_rate": 1.2266298723697828e-05, "loss": 0.4965, "step": 1444 }, { "epoch": 1.3454376163873372, "grad_norm": 0.34645764751903374, "learning_rate": 1.225939979303208e-05, "loss": 0.5329, "step": 1445 }, { "epoch": 1.3463687150837989, "grad_norm": 0.3038492683219338, "learning_rate": 1.2252500862366334e-05, "loss": 0.4965, "step": 1446 }, { "epoch": 1.3472998137802608, "grad_norm": 0.3558740557042653, "learning_rate": 1.2245601931700588e-05, "loss": 0.5214, "step": 1447 }, { "epoch": 1.3482309124767227, "grad_norm": 0.3023610721339846, "learning_rate": 1.2238703001034839e-05, "loss": 0.4869, "step": 1448 }, { "epoch": 1.3491620111731844, "grad_norm": 0.2984734102682279, "learning_rate": 1.2231804070369093e-05, "loss": 0.488, "step": 1449 }, { "epoch": 1.3500931098696463, "grad_norm": 0.35622608242192805, "learning_rate": 1.2224905139703347e-05, "loss": 0.4972, "step": 1450 }, { "epoch": 1.351024208566108, "grad_norm": 0.3088100552919666, "learning_rate": 1.2218006209037601e-05, "loss": 0.5361, "step": 1451 }, { "epoch": 1.3519553072625698, "grad_norm": 0.36712064700394936, "learning_rate": 1.2211107278371854e-05, "loss": 0.487, "step": 1452 }, { "epoch": 1.3528864059590315, "grad_norm": 0.3402914509357792, "learning_rate": 1.2204208347706106e-05, "loss": 0.5099, "step": 1453 }, { "epoch": 1.3538175046554934, "grad_norm": 0.34561875633253475, "learning_rate": 1.219730941704036e-05, "loss": 0.4973, "step": 1454 }, { "epoch": 1.3547486033519553, "grad_norm": 0.346636532360014, "learning_rate": 1.2190410486374613e-05, "loss": 0.5119, "step": 1455 }, { "epoch": 1.355679702048417, "grad_norm": 0.3349947049188536, "learning_rate": 1.2183511555708867e-05, "loss": 0.4714, "step": 1456 }, { "epoch": 1.356610800744879, "grad_norm": 0.3220733719567677, "learning_rate": 1.2176612625043119e-05, "loss": 0.5026, "step": 1457 }, { "epoch": 1.3575418994413408, "grad_norm": 0.3292308116879035, "learning_rate": 1.2169713694377371e-05, "loss": 0.4875, "step": 1458 }, { "epoch": 1.3584729981378025, "grad_norm": 0.3314540118883711, "learning_rate": 1.2162814763711626e-05, "loss": 0.4751, "step": 1459 }, { "epoch": 1.3594040968342644, "grad_norm": 0.36142633174787436, "learning_rate": 1.215591583304588e-05, "loss": 0.4986, "step": 1460 }, { "epoch": 1.3603351955307263, "grad_norm": 0.30134277536258236, "learning_rate": 1.2149016902380134e-05, "loss": 0.5078, "step": 1461 }, { "epoch": 1.361266294227188, "grad_norm": 0.3630382482188497, "learning_rate": 1.2142117971714384e-05, "loss": 0.4849, "step": 1462 }, { "epoch": 1.36219739292365, "grad_norm": 0.32043175312014066, "learning_rate": 1.2135219041048639e-05, "loss": 0.489, "step": 1463 }, { "epoch": 1.3631284916201118, "grad_norm": 0.3154341700013024, "learning_rate": 1.2128320110382893e-05, "loss": 0.5049, "step": 1464 }, { "epoch": 1.3640595903165735, "grad_norm": 0.3497283866238918, "learning_rate": 1.2121421179717145e-05, "loss": 0.5063, "step": 1465 }, { "epoch": 1.3649906890130354, "grad_norm": 0.3198128505781862, "learning_rate": 1.2114522249051397e-05, "loss": 0.5162, "step": 1466 }, { "epoch": 1.3659217877094971, "grad_norm": 0.3637936402282673, "learning_rate": 1.210762331838565e-05, "loss": 0.5195, "step": 1467 }, { "epoch": 1.366852886405959, "grad_norm": 0.32652891585872945, "learning_rate": 1.2100724387719904e-05, "loss": 0.5, "step": 1468 }, { "epoch": 1.3677839851024207, "grad_norm": 0.3187240093728169, "learning_rate": 1.2093825457054158e-05, "loss": 0.4863, "step": 1469 }, { "epoch": 1.3687150837988826, "grad_norm": 0.3222193785777297, "learning_rate": 1.2086926526388412e-05, "loss": 0.4882, "step": 1470 }, { "epoch": 1.3696461824953445, "grad_norm": 0.3378397992863566, "learning_rate": 1.2080027595722663e-05, "loss": 0.5039, "step": 1471 }, { "epoch": 1.3705772811918062, "grad_norm": 0.3123447663210837, "learning_rate": 1.2073128665056917e-05, "loss": 0.4795, "step": 1472 }, { "epoch": 1.3715083798882681, "grad_norm": 0.35187687818818886, "learning_rate": 1.2066229734391171e-05, "loss": 0.5082, "step": 1473 }, { "epoch": 1.37243947858473, "grad_norm": 0.3127844100968807, "learning_rate": 1.2059330803725424e-05, "loss": 0.4591, "step": 1474 }, { "epoch": 1.3733705772811917, "grad_norm": 0.29154885423078747, "learning_rate": 1.2052431873059676e-05, "loss": 0.5156, "step": 1475 }, { "epoch": 1.3743016759776536, "grad_norm": 0.38215559298887475, "learning_rate": 1.204553294239393e-05, "loss": 0.4856, "step": 1476 }, { "epoch": 1.3752327746741155, "grad_norm": 0.3397230564423565, "learning_rate": 1.2038634011728182e-05, "loss": 0.5244, "step": 1477 }, { "epoch": 1.3761638733705772, "grad_norm": 0.3114213599895405, "learning_rate": 1.2031735081062437e-05, "loss": 0.5062, "step": 1478 }, { "epoch": 1.3770949720670391, "grad_norm": 0.3584103886108403, "learning_rate": 1.202483615039669e-05, "loss": 0.4689, "step": 1479 }, { "epoch": 1.378026070763501, "grad_norm": 0.3369556597714181, "learning_rate": 1.2017937219730941e-05, "loss": 0.4858, "step": 1480 }, { "epoch": 1.3789571694599627, "grad_norm": 0.32983382556586366, "learning_rate": 1.2011038289065195e-05, "loss": 0.5086, "step": 1481 }, { "epoch": 1.3798882681564246, "grad_norm": 0.30924807716923824, "learning_rate": 1.200413935839945e-05, "loss": 0.4964, "step": 1482 }, { "epoch": 1.3808193668528865, "grad_norm": 0.3419112054179755, "learning_rate": 1.1997240427733704e-05, "loss": 0.5093, "step": 1483 }, { "epoch": 1.3817504655493482, "grad_norm": 0.30405392984447677, "learning_rate": 1.1990341497067954e-05, "loss": 0.4841, "step": 1484 }, { "epoch": 1.3826815642458101, "grad_norm": 0.3125054830768345, "learning_rate": 1.1983442566402208e-05, "loss": 0.5005, "step": 1485 }, { "epoch": 1.3836126629422718, "grad_norm": 0.319992458644683, "learning_rate": 1.1976543635736463e-05, "loss": 0.4882, "step": 1486 }, { "epoch": 1.3845437616387337, "grad_norm": 0.33413510089646475, "learning_rate": 1.1969644705070715e-05, "loss": 0.5122, "step": 1487 }, { "epoch": 1.3854748603351954, "grad_norm": 0.3260982147780741, "learning_rate": 1.1962745774404969e-05, "loss": 0.4929, "step": 1488 }, { "epoch": 1.3864059590316573, "grad_norm": 0.32272627293859174, "learning_rate": 1.195584684373922e-05, "loss": 0.5422, "step": 1489 }, { "epoch": 1.3873370577281192, "grad_norm": 0.32813901147376906, "learning_rate": 1.1948947913073474e-05, "loss": 0.4853, "step": 1490 }, { "epoch": 1.388268156424581, "grad_norm": 0.31526713704698245, "learning_rate": 1.1942048982407728e-05, "loss": 0.5044, "step": 1491 }, { "epoch": 1.3891992551210428, "grad_norm": 0.306915147393497, "learning_rate": 1.1935150051741982e-05, "loss": 0.5111, "step": 1492 }, { "epoch": 1.3901303538175047, "grad_norm": 0.3213364821774854, "learning_rate": 1.1928251121076233e-05, "loss": 0.4967, "step": 1493 }, { "epoch": 1.3910614525139664, "grad_norm": 0.31508551265350426, "learning_rate": 1.1921352190410487e-05, "loss": 0.4954, "step": 1494 }, { "epoch": 1.3919925512104283, "grad_norm": 0.34406527861909364, "learning_rate": 1.1914453259744741e-05, "loss": 0.5171, "step": 1495 }, { "epoch": 1.3929236499068902, "grad_norm": 0.3418484305449046, "learning_rate": 1.1907554329078993e-05, "loss": 0.5069, "step": 1496 }, { "epoch": 1.393854748603352, "grad_norm": 0.32700937807038205, "learning_rate": 1.1900655398413248e-05, "loss": 0.5032, "step": 1497 }, { "epoch": 1.3947858472998138, "grad_norm": 0.3294857558350186, "learning_rate": 1.18937564677475e-05, "loss": 0.4854, "step": 1498 }, { "epoch": 1.3957169459962757, "grad_norm": 0.30895274179962845, "learning_rate": 1.1886857537081752e-05, "loss": 0.487, "step": 1499 }, { "epoch": 1.3966480446927374, "grad_norm": 0.3385683109426982, "learning_rate": 1.1879958606416006e-05, "loss": 0.4712, "step": 1500 }, { "epoch": 1.3975791433891993, "grad_norm": 0.34572747461494285, "learning_rate": 1.187305967575026e-05, "loss": 0.5031, "step": 1501 }, { "epoch": 1.3985102420856612, "grad_norm": 0.3027167409581621, "learning_rate": 1.1866160745084511e-05, "loss": 0.4933, "step": 1502 }, { "epoch": 1.399441340782123, "grad_norm": 0.35254641358260164, "learning_rate": 1.1859261814418765e-05, "loss": 0.5076, "step": 1503 }, { "epoch": 1.4003724394785848, "grad_norm": 0.32574231113920427, "learning_rate": 1.185236288375302e-05, "loss": 0.4766, "step": 1504 }, { "epoch": 1.4013035381750465, "grad_norm": 0.3355306007675964, "learning_rate": 1.1845463953087274e-05, "loss": 0.5005, "step": 1505 }, { "epoch": 1.4022346368715084, "grad_norm": 0.30246881458515956, "learning_rate": 1.1838565022421526e-05, "loss": 0.5049, "step": 1506 }, { "epoch": 1.40316573556797, "grad_norm": 0.33737917693480096, "learning_rate": 1.1831666091755778e-05, "loss": 0.483, "step": 1507 }, { "epoch": 1.404096834264432, "grad_norm": 0.30317928626158247, "learning_rate": 1.1824767161090032e-05, "loss": 0.4783, "step": 1508 }, { "epoch": 1.405027932960894, "grad_norm": 0.3256550306674302, "learning_rate": 1.1817868230424285e-05, "loss": 0.5128, "step": 1509 }, { "epoch": 1.4059590316573556, "grad_norm": 0.32186629891410673, "learning_rate": 1.1810969299758539e-05, "loss": 0.5027, "step": 1510 }, { "epoch": 1.4068901303538175, "grad_norm": 0.3119047450816145, "learning_rate": 1.1804070369092793e-05, "loss": 0.5192, "step": 1511 }, { "epoch": 1.4078212290502794, "grad_norm": 0.3587579565146891, "learning_rate": 1.1797171438427044e-05, "loss": 0.5222, "step": 1512 }, { "epoch": 1.408752327746741, "grad_norm": 0.31917232846218835, "learning_rate": 1.1790272507761298e-05, "loss": 0.5021, "step": 1513 }, { "epoch": 1.409683426443203, "grad_norm": 0.329828241417836, "learning_rate": 1.1783373577095552e-05, "loss": 0.4986, "step": 1514 }, { "epoch": 1.410614525139665, "grad_norm": 0.31252157222731797, "learning_rate": 1.1776474646429804e-05, "loss": 0.4973, "step": 1515 }, { "epoch": 1.4115456238361266, "grad_norm": 0.3195716698136379, "learning_rate": 1.1769575715764057e-05, "loss": 0.513, "step": 1516 }, { "epoch": 1.4124767225325885, "grad_norm": 0.30803395498806824, "learning_rate": 1.1762676785098311e-05, "loss": 0.4965, "step": 1517 }, { "epoch": 1.4134078212290504, "grad_norm": 0.3197293857641414, "learning_rate": 1.1755777854432563e-05, "loss": 0.4892, "step": 1518 }, { "epoch": 1.414338919925512, "grad_norm": 0.29905180662414516, "learning_rate": 1.1748878923766817e-05, "loss": 0.5004, "step": 1519 }, { "epoch": 1.415270018621974, "grad_norm": 0.3192031391026996, "learning_rate": 1.1741979993101072e-05, "loss": 0.4741, "step": 1520 }, { "epoch": 1.4162011173184357, "grad_norm": 0.290273918187987, "learning_rate": 1.1735081062435322e-05, "loss": 0.4961, "step": 1521 }, { "epoch": 1.4171322160148976, "grad_norm": 0.28305037733499644, "learning_rate": 1.1728182131769576e-05, "loss": 0.4703, "step": 1522 }, { "epoch": 1.4180633147113593, "grad_norm": 0.3179740527250545, "learning_rate": 1.172128320110383e-05, "loss": 0.4964, "step": 1523 }, { "epoch": 1.4189944134078212, "grad_norm": 0.3048756675683885, "learning_rate": 1.1714384270438085e-05, "loss": 0.5054, "step": 1524 }, { "epoch": 1.419925512104283, "grad_norm": 0.29938971744780934, "learning_rate": 1.1707485339772335e-05, "loss": 0.5065, "step": 1525 }, { "epoch": 1.4208566108007448, "grad_norm": 0.33695415669925394, "learning_rate": 1.170058640910659e-05, "loss": 0.4995, "step": 1526 }, { "epoch": 1.4217877094972067, "grad_norm": 0.3173441908159489, "learning_rate": 1.1693687478440843e-05, "loss": 0.4728, "step": 1527 }, { "epoch": 1.4227188081936686, "grad_norm": 0.2923703085233716, "learning_rate": 1.1686788547775096e-05, "loss": 0.4887, "step": 1528 }, { "epoch": 1.4236499068901303, "grad_norm": 0.32648717442149117, "learning_rate": 1.167988961710935e-05, "loss": 0.496, "step": 1529 }, { "epoch": 1.4245810055865922, "grad_norm": 0.3054891491432842, "learning_rate": 1.1672990686443602e-05, "loss": 0.4925, "step": 1530 }, { "epoch": 1.425512104283054, "grad_norm": 0.29638874152120154, "learning_rate": 1.1666091755777855e-05, "loss": 0.4967, "step": 1531 }, { "epoch": 1.4264432029795158, "grad_norm": 0.3142649554976863, "learning_rate": 1.1659192825112109e-05, "loss": 0.4787, "step": 1532 }, { "epoch": 1.4273743016759777, "grad_norm": 0.3037617069811643, "learning_rate": 1.1652293894446363e-05, "loss": 0.5078, "step": 1533 }, { "epoch": 1.4283054003724396, "grad_norm": 0.3149510280737587, "learning_rate": 1.1645394963780614e-05, "loss": 0.5342, "step": 1534 }, { "epoch": 1.4292364990689013, "grad_norm": 0.3123137983142928, "learning_rate": 1.1638496033114868e-05, "loss": 0.5033, "step": 1535 }, { "epoch": 1.4301675977653632, "grad_norm": 0.3266986394428113, "learning_rate": 1.1631597102449122e-05, "loss": 0.4741, "step": 1536 }, { "epoch": 1.431098696461825, "grad_norm": 0.3259392843906023, "learning_rate": 1.1624698171783374e-05, "loss": 0.5064, "step": 1537 }, { "epoch": 1.4320297951582868, "grad_norm": 0.29330446725367476, "learning_rate": 1.1617799241117628e-05, "loss": 0.4883, "step": 1538 }, { "epoch": 1.4329608938547487, "grad_norm": 0.30795655171359587, "learning_rate": 1.161090031045188e-05, "loss": 0.5069, "step": 1539 }, { "epoch": 1.4338919925512104, "grad_norm": 0.3183505599364942, "learning_rate": 1.1604001379786133e-05, "loss": 0.4935, "step": 1540 }, { "epoch": 1.4348230912476723, "grad_norm": 0.29681132949602457, "learning_rate": 1.1597102449120387e-05, "loss": 0.478, "step": 1541 }, { "epoch": 1.435754189944134, "grad_norm": 0.3024007586819468, "learning_rate": 1.1590203518454641e-05, "loss": 0.5089, "step": 1542 }, { "epoch": 1.4366852886405959, "grad_norm": 0.3064324187028734, "learning_rate": 1.1583304587788892e-05, "loss": 0.5012, "step": 1543 }, { "epoch": 1.4376163873370578, "grad_norm": 0.307976708775328, "learning_rate": 1.1576405657123146e-05, "loss": 0.4963, "step": 1544 }, { "epoch": 1.4385474860335195, "grad_norm": 0.3300412794332981, "learning_rate": 1.15695067264574e-05, "loss": 0.491, "step": 1545 }, { "epoch": 1.4394785847299814, "grad_norm": 0.31891388698649076, "learning_rate": 1.1562607795791654e-05, "loss": 0.5024, "step": 1546 }, { "epoch": 1.4404096834264433, "grad_norm": 0.31975141974211474, "learning_rate": 1.1555708865125907e-05, "loss": 0.4992, "step": 1547 }, { "epoch": 1.441340782122905, "grad_norm": 0.2876858125806239, "learning_rate": 1.154880993446016e-05, "loss": 0.4827, "step": 1548 }, { "epoch": 1.4422718808193669, "grad_norm": 0.30025885078234044, "learning_rate": 1.1541911003794413e-05, "loss": 0.4857, "step": 1549 }, { "epoch": 1.4432029795158288, "grad_norm": 0.3047229123150169, "learning_rate": 1.1535012073128666e-05, "loss": 0.4841, "step": 1550 }, { "epoch": 1.4441340782122905, "grad_norm": 0.28874314748475643, "learning_rate": 1.152811314246292e-05, "loss": 0.4973, "step": 1551 }, { "epoch": 1.4450651769087524, "grad_norm": 0.3175522449778512, "learning_rate": 1.1521214211797172e-05, "loss": 0.4911, "step": 1552 }, { "epoch": 1.4459962756052143, "grad_norm": 0.3055883361239215, "learning_rate": 1.1514315281131425e-05, "loss": 0.4876, "step": 1553 }, { "epoch": 1.446927374301676, "grad_norm": 0.28288666629151143, "learning_rate": 1.1507416350465679e-05, "loss": 0.4862, "step": 1554 }, { "epoch": 1.4478584729981379, "grad_norm": 0.2970604830887444, "learning_rate": 1.1500517419799933e-05, "loss": 0.4776, "step": 1555 }, { "epoch": 1.4487895716945998, "grad_norm": 0.31399663502051545, "learning_rate": 1.1493618489134187e-05, "loss": 0.4923, "step": 1556 }, { "epoch": 1.4497206703910615, "grad_norm": 0.3021419007501726, "learning_rate": 1.1486719558468438e-05, "loss": 0.5012, "step": 1557 }, { "epoch": 1.4506517690875234, "grad_norm": 0.3095860030756694, "learning_rate": 1.1479820627802692e-05, "loss": 0.5046, "step": 1558 }, { "epoch": 1.451582867783985, "grad_norm": 0.30506579404740586, "learning_rate": 1.1472921697136946e-05, "loss": 0.4975, "step": 1559 }, { "epoch": 1.452513966480447, "grad_norm": 0.29407994833628265, "learning_rate": 1.1466022766471198e-05, "loss": 0.4854, "step": 1560 }, { "epoch": 1.4534450651769086, "grad_norm": 0.30393694867153903, "learning_rate": 1.145912383580545e-05, "loss": 0.5122, "step": 1561 }, { "epoch": 1.4543761638733705, "grad_norm": 0.29466204937072743, "learning_rate": 1.1452224905139703e-05, "loss": 0.4788, "step": 1562 }, { "epoch": 1.4553072625698324, "grad_norm": 0.32078618487532584, "learning_rate": 1.1445325974473957e-05, "loss": 0.5281, "step": 1563 }, { "epoch": 1.4562383612662941, "grad_norm": 0.2986466912690393, "learning_rate": 1.1438427043808211e-05, "loss": 0.504, "step": 1564 }, { "epoch": 1.457169459962756, "grad_norm": 0.28361214765054754, "learning_rate": 1.1431528113142465e-05, "loss": 0.5023, "step": 1565 }, { "epoch": 1.458100558659218, "grad_norm": 0.3113993267975762, "learning_rate": 1.1424629182476716e-05, "loss": 0.5173, "step": 1566 }, { "epoch": 1.4590316573556796, "grad_norm": 0.30003845768135423, "learning_rate": 1.141773025181097e-05, "loss": 0.5227, "step": 1567 }, { "epoch": 1.4599627560521415, "grad_norm": 0.295404201942223, "learning_rate": 1.1410831321145224e-05, "loss": 0.5371, "step": 1568 }, { "epoch": 1.4608938547486034, "grad_norm": 0.29892566631537903, "learning_rate": 1.1403932390479477e-05, "loss": 0.5346, "step": 1569 }, { "epoch": 1.4618249534450651, "grad_norm": 0.3167631433097792, "learning_rate": 1.1397033459813729e-05, "loss": 0.4948, "step": 1570 }, { "epoch": 1.462756052141527, "grad_norm": 0.2963695235682915, "learning_rate": 1.1390134529147983e-05, "loss": 0.4907, "step": 1571 }, { "epoch": 1.463687150837989, "grad_norm": 0.31446577345203935, "learning_rate": 1.1383235598482236e-05, "loss": 0.5141, "step": 1572 }, { "epoch": 1.4646182495344506, "grad_norm": 0.2985922286584606, "learning_rate": 1.137633666781649e-05, "loss": 0.4802, "step": 1573 }, { "epoch": 1.4655493482309125, "grad_norm": 0.31468805461660426, "learning_rate": 1.1369437737150744e-05, "loss": 0.5123, "step": 1574 }, { "epoch": 1.4664804469273742, "grad_norm": 0.2976810923656446, "learning_rate": 1.1362538806484995e-05, "loss": 0.489, "step": 1575 }, { "epoch": 1.4674115456238361, "grad_norm": 0.2901683093564799, "learning_rate": 1.1355639875819249e-05, "loss": 0.4804, "step": 1576 }, { "epoch": 1.4683426443202978, "grad_norm": 0.3076273418853862, "learning_rate": 1.1348740945153503e-05, "loss": 0.4866, "step": 1577 }, { "epoch": 1.4692737430167597, "grad_norm": 0.29953772988626504, "learning_rate": 1.1341842014487757e-05, "loss": 0.4866, "step": 1578 }, { "epoch": 1.4702048417132216, "grad_norm": 0.31390101315654073, "learning_rate": 1.1334943083822008e-05, "loss": 0.4764, "step": 1579 }, { "epoch": 1.4711359404096833, "grad_norm": 0.3064379844494716, "learning_rate": 1.1328044153156262e-05, "loss": 0.5021, "step": 1580 }, { "epoch": 1.4720670391061452, "grad_norm": 0.31870755028666187, "learning_rate": 1.1321145222490516e-05, "loss": 0.5007, "step": 1581 }, { "epoch": 1.4729981378026071, "grad_norm": 0.33050797733493087, "learning_rate": 1.1314246291824768e-05, "loss": 0.5114, "step": 1582 }, { "epoch": 1.4739292364990688, "grad_norm": 0.2938254025919558, "learning_rate": 1.1307347361159022e-05, "loss": 0.474, "step": 1583 }, { "epoch": 1.4748603351955307, "grad_norm": 0.2815138511331892, "learning_rate": 1.1300448430493273e-05, "loss": 0.4695, "step": 1584 }, { "epoch": 1.4757914338919926, "grad_norm": 0.31209279239173715, "learning_rate": 1.1293549499827527e-05, "loss": 0.5203, "step": 1585 }, { "epoch": 1.4767225325884543, "grad_norm": 0.35482886133555347, "learning_rate": 1.1286650569161781e-05, "loss": 0.5255, "step": 1586 }, { "epoch": 1.4776536312849162, "grad_norm": 0.32638641676130076, "learning_rate": 1.1279751638496035e-05, "loss": 0.4756, "step": 1587 }, { "epoch": 1.4785847299813781, "grad_norm": 0.31479670829553663, "learning_rate": 1.1272852707830286e-05, "loss": 0.4751, "step": 1588 }, { "epoch": 1.4795158286778398, "grad_norm": 0.31974788222990613, "learning_rate": 1.126595377716454e-05, "loss": 0.5094, "step": 1589 }, { "epoch": 1.4804469273743017, "grad_norm": 0.35230154378373413, "learning_rate": 1.1259054846498794e-05, "loss": 0.4957, "step": 1590 }, { "epoch": 1.4813780260707636, "grad_norm": 0.29577140001010394, "learning_rate": 1.1252155915833047e-05, "loss": 0.4886, "step": 1591 }, { "epoch": 1.4823091247672253, "grad_norm": 0.31103113061992416, "learning_rate": 1.12452569851673e-05, "loss": 0.5102, "step": 1592 }, { "epoch": 1.4832402234636872, "grad_norm": 0.31223627259992476, "learning_rate": 1.1238358054501553e-05, "loss": 0.4581, "step": 1593 }, { "epoch": 1.484171322160149, "grad_norm": 0.30258982510268, "learning_rate": 1.1231459123835806e-05, "loss": 0.4956, "step": 1594 }, { "epoch": 1.4851024208566108, "grad_norm": 0.30559413827127735, "learning_rate": 1.122456019317006e-05, "loss": 0.5012, "step": 1595 }, { "epoch": 1.4860335195530725, "grad_norm": 0.30813945379555807, "learning_rate": 1.1217661262504314e-05, "loss": 0.4956, "step": 1596 }, { "epoch": 1.4869646182495344, "grad_norm": 0.2934815991178541, "learning_rate": 1.1210762331838564e-05, "loss": 0.4941, "step": 1597 }, { "epoch": 1.4878957169459963, "grad_norm": 0.33183563594866794, "learning_rate": 1.1203863401172819e-05, "loss": 0.5068, "step": 1598 }, { "epoch": 1.488826815642458, "grad_norm": 0.28260234035666354, "learning_rate": 1.1196964470507073e-05, "loss": 0.4933, "step": 1599 }, { "epoch": 1.48975791433892, "grad_norm": 0.30529116896444486, "learning_rate": 1.1190065539841327e-05, "loss": 0.5079, "step": 1600 }, { "epoch": 1.4906890130353818, "grad_norm": 0.31589367070206553, "learning_rate": 1.118316660917558e-05, "loss": 0.4642, "step": 1601 }, { "epoch": 1.4916201117318435, "grad_norm": 0.292204823966968, "learning_rate": 1.1176267678509832e-05, "loss": 0.5053, "step": 1602 }, { "epoch": 1.4925512104283054, "grad_norm": 0.30373626054086383, "learning_rate": 1.1169368747844086e-05, "loss": 0.4891, "step": 1603 }, { "epoch": 1.4934823091247673, "grad_norm": 0.3054743029050546, "learning_rate": 1.1162469817178338e-05, "loss": 0.5028, "step": 1604 }, { "epoch": 1.494413407821229, "grad_norm": 0.302159680631254, "learning_rate": 1.1155570886512592e-05, "loss": 0.5003, "step": 1605 }, { "epoch": 1.495344506517691, "grad_norm": 0.31878409417525055, "learning_rate": 1.1148671955846843e-05, "loss": 0.4927, "step": 1606 }, { "epoch": 1.4962756052141528, "grad_norm": 0.3150884513754109, "learning_rate": 1.1141773025181097e-05, "loss": 0.5196, "step": 1607 }, { "epoch": 1.4972067039106145, "grad_norm": 0.2902607955616942, "learning_rate": 1.1134874094515351e-05, "loss": 0.4886, "step": 1608 }, { "epoch": 1.4981378026070764, "grad_norm": 0.31101627054821973, "learning_rate": 1.1127975163849605e-05, "loss": 0.5019, "step": 1609 }, { "epoch": 1.499068901303538, "grad_norm": 0.30068797035597483, "learning_rate": 1.1121076233183858e-05, "loss": 0.4759, "step": 1610 }, { "epoch": 1.5, "grad_norm": 0.2972382074262038, "learning_rate": 1.111417730251811e-05, "loss": 0.498, "step": 1611 }, { "epoch": 1.5009310986964617, "grad_norm": 0.30426246447863875, "learning_rate": 1.1107278371852364e-05, "loss": 0.4926, "step": 1612 }, { "epoch": 1.5018621973929238, "grad_norm": 0.3042818323334038, "learning_rate": 1.1100379441186617e-05, "loss": 0.5106, "step": 1613 }, { "epoch": 1.5027932960893855, "grad_norm": 0.2961800040499315, "learning_rate": 1.109348051052087e-05, "loss": 0.4783, "step": 1614 }, { "epoch": 1.5037243947858472, "grad_norm": 0.30752119075444734, "learning_rate": 1.1086581579855123e-05, "loss": 0.5145, "step": 1615 }, { "epoch": 1.504655493482309, "grad_norm": 0.2990979744166731, "learning_rate": 1.1079682649189375e-05, "loss": 0.4874, "step": 1616 }, { "epoch": 1.505586592178771, "grad_norm": 0.31791797866734534, "learning_rate": 1.107278371852363e-05, "loss": 0.5166, "step": 1617 }, { "epoch": 1.5065176908752327, "grad_norm": 0.3055446638813843, "learning_rate": 1.1065884787857884e-05, "loss": 0.4878, "step": 1618 }, { "epoch": 1.5074487895716946, "grad_norm": 0.2914218219355836, "learning_rate": 1.1058985857192138e-05, "loss": 0.4831, "step": 1619 }, { "epoch": 1.5083798882681565, "grad_norm": 0.30631451790285497, "learning_rate": 1.1052086926526388e-05, "loss": 0.5011, "step": 1620 }, { "epoch": 1.5093109869646182, "grad_norm": 0.3061002525659987, "learning_rate": 1.1045187995860643e-05, "loss": 0.5086, "step": 1621 }, { "epoch": 1.51024208566108, "grad_norm": 0.29857176300231497, "learning_rate": 1.1038289065194897e-05, "loss": 0.5124, "step": 1622 }, { "epoch": 1.511173184357542, "grad_norm": 0.2949197430026894, "learning_rate": 1.1031390134529149e-05, "loss": 0.5079, "step": 1623 }, { "epoch": 1.5121042830540037, "grad_norm": 0.2755390106439256, "learning_rate": 1.1024491203863401e-05, "loss": 0.4955, "step": 1624 }, { "epoch": 1.5130353817504656, "grad_norm": 0.2913825527597605, "learning_rate": 1.1017592273197656e-05, "loss": 0.4849, "step": 1625 }, { "epoch": 1.5139664804469275, "grad_norm": 0.32492946493225283, "learning_rate": 1.1010693342531908e-05, "loss": 0.5125, "step": 1626 }, { "epoch": 1.5148975791433892, "grad_norm": 0.29496612113846177, "learning_rate": 1.1003794411866162e-05, "loss": 0.4817, "step": 1627 }, { "epoch": 1.5158286778398509, "grad_norm": 0.28661582660457746, "learning_rate": 1.0996895481200416e-05, "loss": 0.4876, "step": 1628 }, { "epoch": 1.516759776536313, "grad_norm": 0.30211449034585447, "learning_rate": 1.0989996550534667e-05, "loss": 0.4882, "step": 1629 }, { "epoch": 1.5176908752327747, "grad_norm": 0.31832339482036714, "learning_rate": 1.0983097619868921e-05, "loss": 0.5189, "step": 1630 }, { "epoch": 1.5186219739292364, "grad_norm": 0.3093132980895161, "learning_rate": 1.0976198689203175e-05, "loss": 0.515, "step": 1631 }, { "epoch": 1.5195530726256983, "grad_norm": 0.31349470807408897, "learning_rate": 1.0969299758537427e-05, "loss": 0.5105, "step": 1632 }, { "epoch": 1.5204841713221602, "grad_norm": 0.30259745820988543, "learning_rate": 1.096240082787168e-05, "loss": 0.5021, "step": 1633 }, { "epoch": 1.5214152700186219, "grad_norm": 0.2947938521015527, "learning_rate": 1.0955501897205934e-05, "loss": 0.5027, "step": 1634 }, { "epoch": 1.5223463687150838, "grad_norm": 0.28995973075701437, "learning_rate": 1.0948602966540186e-05, "loss": 0.514, "step": 1635 }, { "epoch": 1.5232774674115457, "grad_norm": 0.2962751305142807, "learning_rate": 1.094170403587444e-05, "loss": 0.4901, "step": 1636 }, { "epoch": 1.5242085661080074, "grad_norm": 0.3035574737182286, "learning_rate": 1.0934805105208695e-05, "loss": 0.492, "step": 1637 }, { "epoch": 1.5251396648044693, "grad_norm": 0.32502120681647734, "learning_rate": 1.0927906174542945e-05, "loss": 0.5284, "step": 1638 }, { "epoch": 1.5260707635009312, "grad_norm": 0.3084929880525772, "learning_rate": 1.09210072438772e-05, "loss": 0.4911, "step": 1639 }, { "epoch": 1.5270018621973929, "grad_norm": 0.2915176218061031, "learning_rate": 1.0914108313211454e-05, "loss": 0.506, "step": 1640 }, { "epoch": 1.5279329608938548, "grad_norm": 0.33140964426218683, "learning_rate": 1.0907209382545708e-05, "loss": 0.5092, "step": 1641 }, { "epoch": 1.5288640595903167, "grad_norm": 0.32839033517363225, "learning_rate": 1.0900310451879958e-05, "loss": 0.5183, "step": 1642 }, { "epoch": 1.5297951582867784, "grad_norm": 0.3110225232913441, "learning_rate": 1.0893411521214212e-05, "loss": 0.5063, "step": 1643 }, { "epoch": 1.5307262569832403, "grad_norm": 0.33361846379074056, "learning_rate": 1.0886512590548467e-05, "loss": 0.4905, "step": 1644 }, { "epoch": 1.5316573556797022, "grad_norm": 0.3246538084511171, "learning_rate": 1.0879613659882719e-05, "loss": 0.5159, "step": 1645 }, { "epoch": 1.5325884543761639, "grad_norm": 0.31136010769434747, "learning_rate": 1.0872714729216973e-05, "loss": 0.4795, "step": 1646 }, { "epoch": 1.5335195530726256, "grad_norm": 0.35702681399829506, "learning_rate": 1.0865815798551225e-05, "loss": 0.5063, "step": 1647 }, { "epoch": 1.5344506517690877, "grad_norm": 0.33165408044373107, "learning_rate": 1.0858916867885478e-05, "loss": 0.4943, "step": 1648 }, { "epoch": 1.5353817504655494, "grad_norm": 0.30407614448835574, "learning_rate": 1.0852017937219732e-05, "loss": 0.4868, "step": 1649 }, { "epoch": 1.536312849162011, "grad_norm": 0.3573375401480116, "learning_rate": 1.0845119006553986e-05, "loss": 0.4854, "step": 1650 }, { "epoch": 1.537243947858473, "grad_norm": 0.34687570401872453, "learning_rate": 1.0838220075888237e-05, "loss": 0.4919, "step": 1651 }, { "epoch": 1.5381750465549349, "grad_norm": 0.3028818150521698, "learning_rate": 1.0831321145222491e-05, "loss": 0.4743, "step": 1652 }, { "epoch": 1.5391061452513966, "grad_norm": 0.3695173015305603, "learning_rate": 1.0824422214556745e-05, "loss": 0.4813, "step": 1653 }, { "epoch": 1.5400372439478585, "grad_norm": 0.32510160927817294, "learning_rate": 1.0817523283890999e-05, "loss": 0.5002, "step": 1654 }, { "epoch": 1.5409683426443204, "grad_norm": 0.334270052237101, "learning_rate": 1.0810624353225251e-05, "loss": 0.496, "step": 1655 }, { "epoch": 1.541899441340782, "grad_norm": 0.35075131487174915, "learning_rate": 1.0803725422559504e-05, "loss": 0.4807, "step": 1656 }, { "epoch": 1.542830540037244, "grad_norm": 0.3302798538705529, "learning_rate": 1.0796826491893756e-05, "loss": 0.4783, "step": 1657 }, { "epoch": 1.5437616387337059, "grad_norm": 0.3596876869916029, "learning_rate": 1.078992756122801e-05, "loss": 0.5187, "step": 1658 }, { "epoch": 1.5446927374301676, "grad_norm": 0.36908259739717586, "learning_rate": 1.0783028630562265e-05, "loss": 0.4818, "step": 1659 }, { "epoch": 1.5456238361266295, "grad_norm": 0.32269649794218674, "learning_rate": 1.0776129699896515e-05, "loss": 0.4768, "step": 1660 }, { "epoch": 1.5465549348230914, "grad_norm": 0.3252620466988303, "learning_rate": 1.076923076923077e-05, "loss": 0.4991, "step": 1661 }, { "epoch": 1.547486033519553, "grad_norm": 0.3626211261370736, "learning_rate": 1.0762331838565023e-05, "loss": 0.5069, "step": 1662 }, { "epoch": 1.5484171322160147, "grad_norm": 0.3465130924422516, "learning_rate": 1.0755432907899278e-05, "loss": 0.4988, "step": 1663 }, { "epoch": 1.5493482309124769, "grad_norm": 0.3165800280656323, "learning_rate": 1.074853397723353e-05, "loss": 0.4734, "step": 1664 }, { "epoch": 1.5502793296089385, "grad_norm": 0.3137054984772283, "learning_rate": 1.0741635046567782e-05, "loss": 0.5258, "step": 1665 }, { "epoch": 1.5512104283054002, "grad_norm": 0.3252027049224563, "learning_rate": 1.0734736115902036e-05, "loss": 0.4912, "step": 1666 }, { "epoch": 1.5521415270018621, "grad_norm": 0.34523452261911286, "learning_rate": 1.0727837185236289e-05, "loss": 0.5135, "step": 1667 }, { "epoch": 1.553072625698324, "grad_norm": 0.29399664331317055, "learning_rate": 1.0720938254570543e-05, "loss": 0.4711, "step": 1668 }, { "epoch": 1.5540037243947857, "grad_norm": 0.3280810535073383, "learning_rate": 1.0714039323904795e-05, "loss": 0.5278, "step": 1669 }, { "epoch": 1.5549348230912476, "grad_norm": 0.30336337295329796, "learning_rate": 1.0707140393239048e-05, "loss": 0.4755, "step": 1670 }, { "epoch": 1.5558659217877095, "grad_norm": 0.30879540847182635, "learning_rate": 1.0700241462573302e-05, "loss": 0.4927, "step": 1671 }, { "epoch": 1.5567970204841712, "grad_norm": 0.34677665142371994, "learning_rate": 1.0693342531907556e-05, "loss": 0.519, "step": 1672 }, { "epoch": 1.5577281191806331, "grad_norm": 0.30227736689391965, "learning_rate": 1.068644360124181e-05, "loss": 0.4876, "step": 1673 }, { "epoch": 1.558659217877095, "grad_norm": 0.32575731704389926, "learning_rate": 1.067954467057606e-05, "loss": 0.4779, "step": 1674 }, { "epoch": 1.5595903165735567, "grad_norm": 0.30043670907794623, "learning_rate": 1.0672645739910315e-05, "loss": 0.4742, "step": 1675 }, { "epoch": 1.5605214152700186, "grad_norm": 0.3377169557866303, "learning_rate": 1.0665746809244569e-05, "loss": 0.5124, "step": 1676 }, { "epoch": 1.5614525139664805, "grad_norm": 0.31094763238183254, "learning_rate": 1.0658847878578821e-05, "loss": 0.4852, "step": 1677 }, { "epoch": 1.5623836126629422, "grad_norm": 0.29480525693972875, "learning_rate": 1.0651948947913074e-05, "loss": 0.4893, "step": 1678 }, { "epoch": 1.5633147113594041, "grad_norm": 0.31079872401459097, "learning_rate": 1.0645050017247326e-05, "loss": 0.5073, "step": 1679 }, { "epoch": 1.564245810055866, "grad_norm": 0.3169437856121022, "learning_rate": 1.063815108658158e-05, "loss": 0.5077, "step": 1680 }, { "epoch": 1.5651769087523277, "grad_norm": 0.3031177283795952, "learning_rate": 1.0631252155915834e-05, "loss": 0.4622, "step": 1681 }, { "epoch": 1.5661080074487894, "grad_norm": 0.31585844555720255, "learning_rate": 1.0624353225250089e-05, "loss": 0.4907, "step": 1682 }, { "epoch": 1.5670391061452515, "grad_norm": 0.31482529198853487, "learning_rate": 1.061745429458434e-05, "loss": 0.4825, "step": 1683 }, { "epoch": 1.5679702048417132, "grad_norm": 0.3041976585647666, "learning_rate": 1.0610555363918593e-05, "loss": 0.5069, "step": 1684 }, { "epoch": 1.568901303538175, "grad_norm": 0.35685045037127583, "learning_rate": 1.0603656433252847e-05, "loss": 0.5033, "step": 1685 }, { "epoch": 1.5698324022346368, "grad_norm": 0.312731299722245, "learning_rate": 1.05967575025871e-05, "loss": 0.507, "step": 1686 }, { "epoch": 1.5707635009310987, "grad_norm": 0.33348420736094536, "learning_rate": 1.0589858571921352e-05, "loss": 0.4982, "step": 1687 }, { "epoch": 1.5716945996275604, "grad_norm": 0.3670965234416642, "learning_rate": 1.0582959641255606e-05, "loss": 0.4955, "step": 1688 }, { "epoch": 1.5726256983240223, "grad_norm": 0.3095431092232973, "learning_rate": 1.0576060710589859e-05, "loss": 0.4827, "step": 1689 }, { "epoch": 1.5735567970204842, "grad_norm": 0.3051671373074317, "learning_rate": 1.0569161779924113e-05, "loss": 0.5031, "step": 1690 }, { "epoch": 1.574487895716946, "grad_norm": 0.30256499796311237, "learning_rate": 1.0562262849258367e-05, "loss": 0.492, "step": 1691 }, { "epoch": 1.5754189944134078, "grad_norm": 0.3085920997601109, "learning_rate": 1.0555363918592618e-05, "loss": 0.4878, "step": 1692 }, { "epoch": 1.5763500931098697, "grad_norm": 0.35026460538923676, "learning_rate": 1.0548464987926872e-05, "loss": 0.5261, "step": 1693 }, { "epoch": 1.5772811918063314, "grad_norm": 0.29710765270983036, "learning_rate": 1.0541566057261126e-05, "loss": 0.4854, "step": 1694 }, { "epoch": 1.5782122905027933, "grad_norm": 0.29641554342559756, "learning_rate": 1.053466712659538e-05, "loss": 0.4845, "step": 1695 }, { "epoch": 1.5791433891992552, "grad_norm": 0.3254962583964119, "learning_rate": 1.052776819592963e-05, "loss": 0.504, "step": 1696 }, { "epoch": 1.580074487895717, "grad_norm": 0.32904668221895417, "learning_rate": 1.0520869265263885e-05, "loss": 0.4998, "step": 1697 }, { "epoch": 1.5810055865921788, "grad_norm": 0.3006373346452701, "learning_rate": 1.0513970334598139e-05, "loss": 0.4868, "step": 1698 }, { "epoch": 1.5819366852886407, "grad_norm": 0.3080493566827904, "learning_rate": 1.0507071403932391e-05, "loss": 0.5101, "step": 1699 }, { "epoch": 1.5828677839851024, "grad_norm": 0.3003439301713382, "learning_rate": 1.0500172473266645e-05, "loss": 0.4929, "step": 1700 }, { "epoch": 1.583798882681564, "grad_norm": 0.29885689575508306, "learning_rate": 1.0493273542600896e-05, "loss": 0.5039, "step": 1701 }, { "epoch": 1.5847299813780262, "grad_norm": 0.2932566803598166, "learning_rate": 1.048637461193515e-05, "loss": 0.4758, "step": 1702 }, { "epoch": 1.585661080074488, "grad_norm": 0.3064129056172716, "learning_rate": 1.0479475681269404e-05, "loss": 0.4933, "step": 1703 }, { "epoch": 1.5865921787709496, "grad_norm": 0.3206758946706839, "learning_rate": 1.0472576750603658e-05, "loss": 0.509, "step": 1704 }, { "epoch": 1.5875232774674115, "grad_norm": 0.30558349636864296, "learning_rate": 1.0465677819937909e-05, "loss": 0.4888, "step": 1705 }, { "epoch": 1.5884543761638734, "grad_norm": 0.30397843836596966, "learning_rate": 1.0458778889272163e-05, "loss": 0.4886, "step": 1706 }, { "epoch": 1.589385474860335, "grad_norm": 0.3025840360526038, "learning_rate": 1.0451879958606417e-05, "loss": 0.4775, "step": 1707 }, { "epoch": 1.590316573556797, "grad_norm": 0.30635012963628233, "learning_rate": 1.044498102794067e-05, "loss": 0.5202, "step": 1708 }, { "epoch": 1.591247672253259, "grad_norm": 0.3089496976873633, "learning_rate": 1.0438082097274924e-05, "loss": 0.4772, "step": 1709 }, { "epoch": 1.5921787709497206, "grad_norm": 0.32071822684640156, "learning_rate": 1.0431183166609176e-05, "loss": 0.5231, "step": 1710 }, { "epoch": 1.5931098696461825, "grad_norm": 0.33416374901740076, "learning_rate": 1.0424284235943429e-05, "loss": 0.4959, "step": 1711 }, { "epoch": 1.5940409683426444, "grad_norm": 0.3087953963781966, "learning_rate": 1.0417385305277683e-05, "loss": 0.4897, "step": 1712 }, { "epoch": 1.594972067039106, "grad_norm": 0.322322635500486, "learning_rate": 1.0410486374611937e-05, "loss": 0.5371, "step": 1713 }, { "epoch": 1.595903165735568, "grad_norm": 0.3281588883838323, "learning_rate": 1.0403587443946188e-05, "loss": 0.4922, "step": 1714 }, { "epoch": 1.59683426443203, "grad_norm": 0.29480537574750854, "learning_rate": 1.0396688513280442e-05, "loss": 0.5264, "step": 1715 }, { "epoch": 1.5977653631284916, "grad_norm": 0.2804249874882031, "learning_rate": 1.0389789582614696e-05, "loss": 0.5003, "step": 1716 }, { "epoch": 1.5986964618249533, "grad_norm": 0.33830700554855214, "learning_rate": 1.038289065194895e-05, "loss": 0.4972, "step": 1717 }, { "epoch": 1.5996275605214154, "grad_norm": 0.28969965889330046, "learning_rate": 1.0375991721283202e-05, "loss": 0.5018, "step": 1718 }, { "epoch": 1.600558659217877, "grad_norm": 0.28224548592939436, "learning_rate": 1.0369092790617455e-05, "loss": 0.4842, "step": 1719 }, { "epoch": 1.6014897579143388, "grad_norm": 0.2945088129142804, "learning_rate": 1.0362193859951709e-05, "loss": 0.4643, "step": 1720 }, { "epoch": 1.6024208566108007, "grad_norm": 0.2953034393781217, "learning_rate": 1.0355294929285961e-05, "loss": 0.4926, "step": 1721 }, { "epoch": 1.6033519553072626, "grad_norm": 0.28636005457218583, "learning_rate": 1.0348395998620215e-05, "loss": 0.4964, "step": 1722 }, { "epoch": 1.6042830540037243, "grad_norm": 0.2822353270689103, "learning_rate": 1.0341497067954466e-05, "loss": 0.4756, "step": 1723 }, { "epoch": 1.6052141527001862, "grad_norm": 0.28162136198126486, "learning_rate": 1.033459813728872e-05, "loss": 0.4917, "step": 1724 }, { "epoch": 1.606145251396648, "grad_norm": 0.2958609033308991, "learning_rate": 1.0327699206622974e-05, "loss": 0.5115, "step": 1725 }, { "epoch": 1.6070763500931098, "grad_norm": 0.3045685602895541, "learning_rate": 1.0320800275957228e-05, "loss": 0.5143, "step": 1726 }, { "epoch": 1.6080074487895717, "grad_norm": 0.2882016368229705, "learning_rate": 1.0313901345291482e-05, "loss": 0.5253, "step": 1727 }, { "epoch": 1.6089385474860336, "grad_norm": 0.2786764660022298, "learning_rate": 1.0307002414625733e-05, "loss": 0.4718, "step": 1728 }, { "epoch": 1.6098696461824953, "grad_norm": 0.34765157925738227, "learning_rate": 1.0300103483959987e-05, "loss": 0.529, "step": 1729 }, { "epoch": 1.6108007448789572, "grad_norm": 0.28440882735678846, "learning_rate": 1.029320455329424e-05, "loss": 0.494, "step": 1730 }, { "epoch": 1.611731843575419, "grad_norm": 0.30529016612943355, "learning_rate": 1.0286305622628494e-05, "loss": 0.4609, "step": 1731 }, { "epoch": 1.6126629422718808, "grad_norm": 0.3177670092809712, "learning_rate": 1.0279406691962746e-05, "loss": 0.5204, "step": 1732 }, { "epoch": 1.6135940409683427, "grad_norm": 0.31647894337371263, "learning_rate": 1.0272507761296999e-05, "loss": 0.5078, "step": 1733 }, { "epoch": 1.6145251396648046, "grad_norm": 0.30825461443003127, "learning_rate": 1.0265608830631253e-05, "loss": 0.5033, "step": 1734 }, { "epoch": 1.6154562383612663, "grad_norm": 0.34421812485957126, "learning_rate": 1.0258709899965507e-05, "loss": 0.5182, "step": 1735 }, { "epoch": 1.616387337057728, "grad_norm": 0.3113340104116566, "learning_rate": 1.0251810969299761e-05, "loss": 0.5214, "step": 1736 }, { "epoch": 1.61731843575419, "grad_norm": 0.33014709781376567, "learning_rate": 1.0244912038634012e-05, "loss": 0.5042, "step": 1737 }, { "epoch": 1.6182495344506518, "grad_norm": 0.30728691272293757, "learning_rate": 1.0238013107968266e-05, "loss": 0.4651, "step": 1738 }, { "epoch": 1.6191806331471135, "grad_norm": 0.29067713808296614, "learning_rate": 1.023111417730252e-05, "loss": 0.4388, "step": 1739 }, { "epoch": 1.6201117318435754, "grad_norm": 0.32783480806804494, "learning_rate": 1.0224215246636772e-05, "loss": 0.486, "step": 1740 }, { "epoch": 1.6210428305400373, "grad_norm": 0.29612726318007687, "learning_rate": 1.0217316315971025e-05, "loss": 0.5337, "step": 1741 }, { "epoch": 1.621973929236499, "grad_norm": 0.30515788432211777, "learning_rate": 1.0210417385305279e-05, "loss": 0.5121, "step": 1742 }, { "epoch": 1.6229050279329609, "grad_norm": 0.31259554207363793, "learning_rate": 1.0203518454639531e-05, "loss": 0.5123, "step": 1743 }, { "epoch": 1.6238361266294228, "grad_norm": 0.29292375129484927, "learning_rate": 1.0196619523973785e-05, "loss": 0.5426, "step": 1744 }, { "epoch": 1.6247672253258845, "grad_norm": 0.2871645706686292, "learning_rate": 1.018972059330804e-05, "loss": 0.4992, "step": 1745 }, { "epoch": 1.6256983240223464, "grad_norm": 0.30004212366141436, "learning_rate": 1.018282166264229e-05, "loss": 0.484, "step": 1746 }, { "epoch": 1.6266294227188083, "grad_norm": 0.3213630389700471, "learning_rate": 1.0175922731976544e-05, "loss": 0.4996, "step": 1747 }, { "epoch": 1.62756052141527, "grad_norm": 0.31753547960516704, "learning_rate": 1.0169023801310798e-05, "loss": 0.4783, "step": 1748 }, { "epoch": 1.6284916201117319, "grad_norm": 0.3212562196149284, "learning_rate": 1.0162124870645052e-05, "loss": 0.4881, "step": 1749 }, { "epoch": 1.6294227188081938, "grad_norm": 0.3075563768465655, "learning_rate": 1.0155225939979303e-05, "loss": 0.4914, "step": 1750 }, { "epoch": 1.6303538175046555, "grad_norm": 0.3088845776670974, "learning_rate": 1.0148327009313557e-05, "loss": 0.4819, "step": 1751 }, { "epoch": 1.6312849162011172, "grad_norm": 0.3366461632054172, "learning_rate": 1.014142807864781e-05, "loss": 0.5262, "step": 1752 }, { "epoch": 1.6322160148975793, "grad_norm": 0.32759972044120067, "learning_rate": 1.0134529147982064e-05, "loss": 0.5335, "step": 1753 }, { "epoch": 1.633147113594041, "grad_norm": 0.2947726835720062, "learning_rate": 1.0127630217316318e-05, "loss": 0.4774, "step": 1754 }, { "epoch": 1.6340782122905027, "grad_norm": 0.3330193467883526, "learning_rate": 1.0120731286650568e-05, "loss": 0.5147, "step": 1755 }, { "epoch": 1.6350093109869648, "grad_norm": 0.3281756491092111, "learning_rate": 1.0113832355984823e-05, "loss": 0.4729, "step": 1756 }, { "epoch": 1.6359404096834265, "grad_norm": 0.2905244904216574, "learning_rate": 1.0106933425319077e-05, "loss": 0.5117, "step": 1757 }, { "epoch": 1.6368715083798882, "grad_norm": 0.3214514107690897, "learning_rate": 1.010003449465333e-05, "loss": 0.4845, "step": 1758 }, { "epoch": 1.63780260707635, "grad_norm": 0.32299472842823157, "learning_rate": 1.0093135563987581e-05, "loss": 0.4809, "step": 1759 }, { "epoch": 1.638733705772812, "grad_norm": 0.29187898322881933, "learning_rate": 1.0086236633321836e-05, "loss": 0.4772, "step": 1760 }, { "epoch": 1.6396648044692737, "grad_norm": 0.33193743924950514, "learning_rate": 1.007933770265609e-05, "loss": 0.5167, "step": 1761 }, { "epoch": 1.6405959031657356, "grad_norm": 0.29719168933104867, "learning_rate": 1.0072438771990342e-05, "loss": 0.4628, "step": 1762 }, { "epoch": 1.6415270018621975, "grad_norm": 0.34575165258466445, "learning_rate": 1.0065539841324596e-05, "loss": 0.4841, "step": 1763 }, { "epoch": 1.6424581005586592, "grad_norm": 0.3189221177165702, "learning_rate": 1.0058640910658849e-05, "loss": 0.4925, "step": 1764 }, { "epoch": 1.643389199255121, "grad_norm": 0.3091065181876988, "learning_rate": 1.0051741979993101e-05, "loss": 0.5059, "step": 1765 }, { "epoch": 1.644320297951583, "grad_norm": 0.3384907013396375, "learning_rate": 1.0044843049327355e-05, "loss": 0.49, "step": 1766 }, { "epoch": 1.6452513966480447, "grad_norm": 0.3332252610244633, "learning_rate": 1.003794411866161e-05, "loss": 0.5186, "step": 1767 }, { "epoch": 1.6461824953445066, "grad_norm": 0.28617845971056033, "learning_rate": 1.003104518799586e-05, "loss": 0.4597, "step": 1768 }, { "epoch": 1.6471135940409685, "grad_norm": 0.3252343809972776, "learning_rate": 1.0024146257330114e-05, "loss": 0.4887, "step": 1769 }, { "epoch": 1.6480446927374302, "grad_norm": 0.32338535820772285, "learning_rate": 1.0017247326664368e-05, "loss": 0.4796, "step": 1770 }, { "epoch": 1.6489757914338918, "grad_norm": 0.34479957464516703, "learning_rate": 1.0010348395998622e-05, "loss": 0.5089, "step": 1771 }, { "epoch": 1.649906890130354, "grad_norm": 0.3035193713128041, "learning_rate": 1.0003449465332875e-05, "loss": 0.4995, "step": 1772 }, { "epoch": 1.6508379888268156, "grad_norm": 0.33009856794595965, "learning_rate": 9.996550534667127e-06, "loss": 0.5286, "step": 1773 }, { "epoch": 1.6517690875232773, "grad_norm": 0.3300257851253095, "learning_rate": 9.98965160400138e-06, "loss": 0.4973, "step": 1774 }, { "epoch": 1.6527001862197392, "grad_norm": 0.30199231794291365, "learning_rate": 9.982752673335634e-06, "loss": 0.492, "step": 1775 }, { "epoch": 1.6536312849162011, "grad_norm": 0.32725278471237607, "learning_rate": 9.975853742669886e-06, "loss": 0.4849, "step": 1776 }, { "epoch": 1.6545623836126628, "grad_norm": 0.2860086771640454, "learning_rate": 9.96895481200414e-06, "loss": 0.4704, "step": 1777 }, { "epoch": 1.6554934823091247, "grad_norm": 0.30230533358901973, "learning_rate": 9.962055881338394e-06, "loss": 0.498, "step": 1778 }, { "epoch": 1.6564245810055866, "grad_norm": 0.2993182675915892, "learning_rate": 9.955156950672647e-06, "loss": 0.4913, "step": 1779 }, { "epoch": 1.6573556797020483, "grad_norm": 0.2971846951940615, "learning_rate": 9.9482580200069e-06, "loss": 0.4936, "step": 1780 }, { "epoch": 1.6582867783985102, "grad_norm": 0.28900227295343345, "learning_rate": 9.941359089341153e-06, "loss": 0.4912, "step": 1781 }, { "epoch": 1.6592178770949721, "grad_norm": 0.32537048928480505, "learning_rate": 9.934460158675407e-06, "loss": 0.5224, "step": 1782 }, { "epoch": 1.6601489757914338, "grad_norm": 0.3144551713422396, "learning_rate": 9.92756122800966e-06, "loss": 0.5009, "step": 1783 }, { "epoch": 1.6610800744878957, "grad_norm": 0.31247582805341667, "learning_rate": 9.920662297343912e-06, "loss": 0.4926, "step": 1784 }, { "epoch": 1.6620111731843576, "grad_norm": 0.33274587617105644, "learning_rate": 9.913763366678164e-06, "loss": 0.4919, "step": 1785 }, { "epoch": 1.6629422718808193, "grad_norm": 0.3042316821305636, "learning_rate": 9.906864436012418e-06, "loss": 0.4943, "step": 1786 }, { "epoch": 1.6638733705772812, "grad_norm": 0.31637404209690817, "learning_rate": 9.899965505346673e-06, "loss": 0.5093, "step": 1787 }, { "epoch": 1.6648044692737431, "grad_norm": 0.31679334895250394, "learning_rate": 9.893066574680925e-06, "loss": 0.4846, "step": 1788 }, { "epoch": 1.6657355679702048, "grad_norm": 0.29264279277959865, "learning_rate": 9.886167644015179e-06, "loss": 0.4941, "step": 1789 }, { "epoch": 1.6666666666666665, "grad_norm": 0.33106048187615594, "learning_rate": 9.879268713349431e-06, "loss": 0.4726, "step": 1790 }, { "epoch": 1.6675977653631286, "grad_norm": 0.30700896120128235, "learning_rate": 9.872369782683686e-06, "loss": 0.4771, "step": 1791 }, { "epoch": 1.6685288640595903, "grad_norm": 0.3007005747255259, "learning_rate": 9.865470852017938e-06, "loss": 0.4933, "step": 1792 }, { "epoch": 1.669459962756052, "grad_norm": 0.3028858537707139, "learning_rate": 9.858571921352192e-06, "loss": 0.4785, "step": 1793 }, { "epoch": 1.670391061452514, "grad_norm": 0.33264209283558277, "learning_rate": 9.851672990686445e-06, "loss": 0.4799, "step": 1794 }, { "epoch": 1.6713221601489758, "grad_norm": 0.3072552812122279, "learning_rate": 9.844774060020697e-06, "loss": 0.5051, "step": 1795 }, { "epoch": 1.6722532588454375, "grad_norm": 0.31104226093754395, "learning_rate": 9.837875129354951e-06, "loss": 0.4906, "step": 1796 }, { "epoch": 1.6731843575418994, "grad_norm": 0.29934441050873434, "learning_rate": 9.830976198689203e-06, "loss": 0.5041, "step": 1797 }, { "epoch": 1.6741154562383613, "grad_norm": 0.3028190080784005, "learning_rate": 9.824077268023458e-06, "loss": 0.4841, "step": 1798 }, { "epoch": 1.675046554934823, "grad_norm": 0.3183575965876792, "learning_rate": 9.81717833735771e-06, "loss": 0.486, "step": 1799 }, { "epoch": 1.675977653631285, "grad_norm": 0.2848018942809015, "learning_rate": 9.810279406691964e-06, "loss": 0.4733, "step": 1800 }, { "epoch": 1.6769087523277468, "grad_norm": 0.3466284575913065, "learning_rate": 9.803380476026216e-06, "loss": 0.4923, "step": 1801 }, { "epoch": 1.6778398510242085, "grad_norm": 0.30188304060034443, "learning_rate": 9.79648154536047e-06, "loss": 0.4987, "step": 1802 }, { "epoch": 1.6787709497206704, "grad_norm": 0.33608654462580434, "learning_rate": 9.789582614694723e-06, "loss": 0.501, "step": 1803 }, { "epoch": 1.6797020484171323, "grad_norm": 0.34784294914150954, "learning_rate": 9.782683684028977e-06, "loss": 0.4751, "step": 1804 }, { "epoch": 1.680633147113594, "grad_norm": 0.3052042392650728, "learning_rate": 9.77578475336323e-06, "loss": 0.5099, "step": 1805 }, { "epoch": 1.6815642458100557, "grad_norm": 0.2955871806502211, "learning_rate": 9.768885822697482e-06, "loss": 0.5076, "step": 1806 }, { "epoch": 1.6824953445065178, "grad_norm": 0.3418700032371924, "learning_rate": 9.761986892031736e-06, "loss": 0.4978, "step": 1807 }, { "epoch": 1.6834264432029795, "grad_norm": 0.2919555894322, "learning_rate": 9.755087961365988e-06, "loss": 0.4763, "step": 1808 }, { "epoch": 1.6843575418994412, "grad_norm": 0.3051408067960108, "learning_rate": 9.748189030700242e-06, "loss": 0.4825, "step": 1809 }, { "epoch": 1.6852886405959033, "grad_norm": 0.30557205520900216, "learning_rate": 9.741290100034495e-06, "loss": 0.4976, "step": 1810 }, { "epoch": 1.686219739292365, "grad_norm": 0.28897985010739724, "learning_rate": 9.734391169368749e-06, "loss": 0.4752, "step": 1811 }, { "epoch": 1.6871508379888267, "grad_norm": 0.32564747957292817, "learning_rate": 9.727492238703001e-06, "loss": 0.504, "step": 1812 }, { "epoch": 1.6880819366852886, "grad_norm": 0.3053579022019336, "learning_rate": 9.720593308037255e-06, "loss": 0.4946, "step": 1813 }, { "epoch": 1.6890130353817505, "grad_norm": 0.28880965957737337, "learning_rate": 9.713694377371508e-06, "loss": 0.4975, "step": 1814 }, { "epoch": 1.6899441340782122, "grad_norm": 0.35788325805620314, "learning_rate": 9.706795446705762e-06, "loss": 0.495, "step": 1815 }, { "epoch": 1.690875232774674, "grad_norm": 0.32675812120428155, "learning_rate": 9.699896516040014e-06, "loss": 0.4955, "step": 1816 }, { "epoch": 1.691806331471136, "grad_norm": 0.2865019546469338, "learning_rate": 9.692997585374267e-06, "loss": 0.4868, "step": 1817 }, { "epoch": 1.6927374301675977, "grad_norm": 0.2982119507920427, "learning_rate": 9.686098654708521e-06, "loss": 0.4628, "step": 1818 }, { "epoch": 1.6936685288640596, "grad_norm": 0.31404626659547397, "learning_rate": 9.679199724042773e-06, "loss": 0.464, "step": 1819 }, { "epoch": 1.6945996275605215, "grad_norm": 0.3174349097747208, "learning_rate": 9.672300793377027e-06, "loss": 0.4722, "step": 1820 }, { "epoch": 1.6955307262569832, "grad_norm": 0.30127285326816017, "learning_rate": 9.66540186271128e-06, "loss": 0.4899, "step": 1821 }, { "epoch": 1.696461824953445, "grad_norm": 0.2795975322032741, "learning_rate": 9.658502932045534e-06, "loss": 0.4845, "step": 1822 }, { "epoch": 1.697392923649907, "grad_norm": 0.30563348550172653, "learning_rate": 9.651604001379788e-06, "loss": 0.4975, "step": 1823 }, { "epoch": 1.6983240223463687, "grad_norm": 0.2925893282510288, "learning_rate": 9.64470507071404e-06, "loss": 0.4665, "step": 1824 }, { "epoch": 1.6992551210428304, "grad_norm": 0.27981983663309606, "learning_rate": 9.637806140048293e-06, "loss": 0.4798, "step": 1825 }, { "epoch": 1.7001862197392925, "grad_norm": 0.3057322597797275, "learning_rate": 9.630907209382547e-06, "loss": 0.4883, "step": 1826 }, { "epoch": 1.7011173184357542, "grad_norm": 0.313172093496565, "learning_rate": 9.6240082787168e-06, "loss": 0.4794, "step": 1827 }, { "epoch": 1.7020484171322159, "grad_norm": 0.3007466955555391, "learning_rate": 9.617109348051052e-06, "loss": 0.4586, "step": 1828 }, { "epoch": 1.7029795158286778, "grad_norm": 0.2837829349603267, "learning_rate": 9.610210417385306e-06, "loss": 0.4908, "step": 1829 }, { "epoch": 1.7039106145251397, "grad_norm": 0.30124813784085364, "learning_rate": 9.603311486719558e-06, "loss": 0.5052, "step": 1830 }, { "epoch": 1.7048417132216014, "grad_norm": 0.31976533994269085, "learning_rate": 9.596412556053812e-06, "loss": 0.4818, "step": 1831 }, { "epoch": 1.7057728119180633, "grad_norm": 0.29856334379709054, "learning_rate": 9.589513625388066e-06, "loss": 0.5036, "step": 1832 }, { "epoch": 1.7067039106145252, "grad_norm": 0.2917918188778255, "learning_rate": 9.582614694722319e-06, "loss": 0.4935, "step": 1833 }, { "epoch": 1.7076350093109869, "grad_norm": 0.32226154822967873, "learning_rate": 9.575715764056573e-06, "loss": 0.4926, "step": 1834 }, { "epoch": 1.7085661080074488, "grad_norm": 0.30153994493025904, "learning_rate": 9.568816833390825e-06, "loss": 0.5029, "step": 1835 }, { "epoch": 1.7094972067039107, "grad_norm": 0.3029752863136932, "learning_rate": 9.561917902725078e-06, "loss": 0.4968, "step": 1836 }, { "epoch": 1.7104283054003724, "grad_norm": 0.31257968958626453, "learning_rate": 9.555018972059332e-06, "loss": 0.4898, "step": 1837 }, { "epoch": 1.7113594040968343, "grad_norm": 0.2909987789746104, "learning_rate": 9.548120041393584e-06, "loss": 0.5046, "step": 1838 }, { "epoch": 1.7122905027932962, "grad_norm": 0.32212369941812175, "learning_rate": 9.541221110727838e-06, "loss": 0.4806, "step": 1839 }, { "epoch": 1.7132216014897579, "grad_norm": 0.314869306698511, "learning_rate": 9.53432218006209e-06, "loss": 0.4801, "step": 1840 }, { "epoch": 1.7141527001862198, "grad_norm": 0.31706296568978526, "learning_rate": 9.527423249396345e-06, "loss": 0.4892, "step": 1841 }, { "epoch": 1.7150837988826817, "grad_norm": 0.3072465787330472, "learning_rate": 9.520524318730597e-06, "loss": 0.5061, "step": 1842 }, { "epoch": 1.7160148975791434, "grad_norm": 0.31347210524719843, "learning_rate": 9.513625388064851e-06, "loss": 0.4983, "step": 1843 }, { "epoch": 1.716945996275605, "grad_norm": 0.31561770695642655, "learning_rate": 9.506726457399104e-06, "loss": 0.474, "step": 1844 }, { "epoch": 1.7178770949720672, "grad_norm": 0.29272479061586054, "learning_rate": 9.499827526733358e-06, "loss": 0.4899, "step": 1845 }, { "epoch": 1.7188081936685289, "grad_norm": 0.31449875245977293, "learning_rate": 9.49292859606761e-06, "loss": 0.4849, "step": 1846 }, { "epoch": 1.7197392923649906, "grad_norm": 0.3023298302650537, "learning_rate": 9.486029665401863e-06, "loss": 0.5046, "step": 1847 }, { "epoch": 1.7206703910614525, "grad_norm": 0.3026727917712334, "learning_rate": 9.479130734736117e-06, "loss": 0.4887, "step": 1848 }, { "epoch": 1.7216014897579144, "grad_norm": 0.3062856389989518, "learning_rate": 9.47223180407037e-06, "loss": 0.4981, "step": 1849 }, { "epoch": 1.722532588454376, "grad_norm": 0.3149187090398837, "learning_rate": 9.465332873404623e-06, "loss": 0.5401, "step": 1850 }, { "epoch": 1.723463687150838, "grad_norm": 0.2946580735292078, "learning_rate": 9.458433942738876e-06, "loss": 0.4849, "step": 1851 }, { "epoch": 1.7243947858472999, "grad_norm": 0.3098655948720947, "learning_rate": 9.45153501207313e-06, "loss": 0.4803, "step": 1852 }, { "epoch": 1.7253258845437616, "grad_norm": 0.3079679384397656, "learning_rate": 9.444636081407382e-06, "loss": 0.4825, "step": 1853 }, { "epoch": 1.7262569832402235, "grad_norm": 0.29265808519627295, "learning_rate": 9.437737150741636e-06, "loss": 0.4806, "step": 1854 }, { "epoch": 1.7271880819366854, "grad_norm": 0.3017491332364761, "learning_rate": 9.430838220075889e-06, "loss": 0.4809, "step": 1855 }, { "epoch": 1.728119180633147, "grad_norm": 0.3219991671736399, "learning_rate": 9.423939289410143e-06, "loss": 0.4954, "step": 1856 }, { "epoch": 1.729050279329609, "grad_norm": 0.31448016195995, "learning_rate": 9.417040358744395e-06, "loss": 0.5495, "step": 1857 }, { "epoch": 1.7299813780260709, "grad_norm": 0.29706425398273006, "learning_rate": 9.410141428078648e-06, "loss": 0.5066, "step": 1858 }, { "epoch": 1.7309124767225326, "grad_norm": 0.3042673276726806, "learning_rate": 9.403242497412902e-06, "loss": 0.5053, "step": 1859 }, { "epoch": 1.7318435754189943, "grad_norm": 0.29421857918772526, "learning_rate": 9.396343566747154e-06, "loss": 0.5107, "step": 1860 }, { "epoch": 1.7327746741154564, "grad_norm": 0.3131557330324323, "learning_rate": 9.389444636081408e-06, "loss": 0.486, "step": 1861 }, { "epoch": 1.733705772811918, "grad_norm": 0.2988380194865979, "learning_rate": 9.38254570541566e-06, "loss": 0.4577, "step": 1862 }, { "epoch": 1.7346368715083798, "grad_norm": 0.30679042616380386, "learning_rate": 9.375646774749915e-06, "loss": 0.4839, "step": 1863 }, { "epoch": 1.7355679702048417, "grad_norm": 0.31651194278280154, "learning_rate": 9.368747844084167e-06, "loss": 0.5067, "step": 1864 }, { "epoch": 1.7364990689013036, "grad_norm": 0.327833261203819, "learning_rate": 9.361848913418421e-06, "loss": 0.5026, "step": 1865 }, { "epoch": 1.7374301675977653, "grad_norm": 0.3353866172065466, "learning_rate": 9.354949982752675e-06, "loss": 0.512, "step": 1866 }, { "epoch": 1.7383612662942272, "grad_norm": 0.29564211127598844, "learning_rate": 9.348051052086928e-06, "loss": 0.5049, "step": 1867 }, { "epoch": 1.739292364990689, "grad_norm": 0.33057172783136646, "learning_rate": 9.34115212142118e-06, "loss": 0.4934, "step": 1868 }, { "epoch": 1.7402234636871508, "grad_norm": 0.31531576082765056, "learning_rate": 9.334253190755433e-06, "loss": 0.4894, "step": 1869 }, { "epoch": 1.7411545623836127, "grad_norm": 0.2846542729134606, "learning_rate": 9.327354260089687e-06, "loss": 0.4798, "step": 1870 }, { "epoch": 1.7420856610800746, "grad_norm": 0.28726303112310153, "learning_rate": 9.32045532942394e-06, "loss": 0.5145, "step": 1871 }, { "epoch": 1.7430167597765363, "grad_norm": 0.31549972412279403, "learning_rate": 9.313556398758193e-06, "loss": 0.4965, "step": 1872 }, { "epoch": 1.7439478584729982, "grad_norm": 0.28877667953803615, "learning_rate": 9.306657468092446e-06, "loss": 0.5087, "step": 1873 }, { "epoch": 1.74487895716946, "grad_norm": 0.3098307893661132, "learning_rate": 9.2997585374267e-06, "loss": 0.5438, "step": 1874 }, { "epoch": 1.7458100558659218, "grad_norm": 0.2948168803453685, "learning_rate": 9.292859606760954e-06, "loss": 0.4959, "step": 1875 }, { "epoch": 1.7467411545623837, "grad_norm": 0.28485360154124095, "learning_rate": 9.285960676095206e-06, "loss": 0.516, "step": 1876 }, { "epoch": 1.7476722532588456, "grad_norm": 0.30090904968995835, "learning_rate": 9.27906174542946e-06, "loss": 0.5027, "step": 1877 }, { "epoch": 1.7486033519553073, "grad_norm": 0.298234535281418, "learning_rate": 9.272162814763713e-06, "loss": 0.4912, "step": 1878 }, { "epoch": 1.749534450651769, "grad_norm": 0.2959875178307342, "learning_rate": 9.265263884097965e-06, "loss": 0.5002, "step": 1879 }, { "epoch": 1.750465549348231, "grad_norm": 0.2953603221798023, "learning_rate": 9.258364953432218e-06, "loss": 0.4977, "step": 1880 }, { "epoch": 1.7513966480446927, "grad_norm": 0.30007349626138424, "learning_rate": 9.251466022766472e-06, "loss": 0.5045, "step": 1881 }, { "epoch": 1.7523277467411544, "grad_norm": 0.2911316079985778, "learning_rate": 9.244567092100724e-06, "loss": 0.5032, "step": 1882 }, { "epoch": 1.7532588454376163, "grad_norm": 0.3000457287579537, "learning_rate": 9.237668161434978e-06, "loss": 0.479, "step": 1883 }, { "epoch": 1.7541899441340782, "grad_norm": 0.30925804964646275, "learning_rate": 9.230769230769232e-06, "loss": 0.5046, "step": 1884 }, { "epoch": 1.75512104283054, "grad_norm": 0.3042057518999857, "learning_rate": 9.223870300103485e-06, "loss": 0.5215, "step": 1885 }, { "epoch": 1.7560521415270018, "grad_norm": 0.3109779748602768, "learning_rate": 9.216971369437739e-06, "loss": 0.4828, "step": 1886 }, { "epoch": 1.7569832402234637, "grad_norm": 0.29567675108572883, "learning_rate": 9.210072438771991e-06, "loss": 0.4933, "step": 1887 }, { "epoch": 1.7579143389199254, "grad_norm": 0.3099782630948801, "learning_rate": 9.203173508106245e-06, "loss": 0.5002, "step": 1888 }, { "epoch": 1.7588454376163873, "grad_norm": 0.2917559881885042, "learning_rate": 9.196274577440498e-06, "loss": 0.4903, "step": 1889 }, { "epoch": 1.7597765363128492, "grad_norm": 0.3061360323936315, "learning_rate": 9.18937564677475e-06, "loss": 0.5185, "step": 1890 }, { "epoch": 1.760707635009311, "grad_norm": 0.30795355720030065, "learning_rate": 9.182476716109003e-06, "loss": 0.4867, "step": 1891 }, { "epoch": 1.7616387337057728, "grad_norm": 0.3077755180687435, "learning_rate": 9.175577785443257e-06, "loss": 0.5182, "step": 1892 }, { "epoch": 1.7625698324022347, "grad_norm": 0.2933930890099516, "learning_rate": 9.16867885477751e-06, "loss": 0.4891, "step": 1893 }, { "epoch": 1.7635009310986964, "grad_norm": 0.2976597607595482, "learning_rate": 9.161779924111763e-06, "loss": 0.5246, "step": 1894 }, { "epoch": 1.7644320297951583, "grad_norm": 0.2958252646567403, "learning_rate": 9.154880993446017e-06, "loss": 0.524, "step": 1895 }, { "epoch": 1.7653631284916202, "grad_norm": 0.295989654736897, "learning_rate": 9.14798206278027e-06, "loss": 0.5016, "step": 1896 }, { "epoch": 1.766294227188082, "grad_norm": 0.30992163025920405, "learning_rate": 9.141083132114524e-06, "loss": 0.4998, "step": 1897 }, { "epoch": 1.7672253258845436, "grad_norm": 0.29529346577547533, "learning_rate": 9.134184201448776e-06, "loss": 0.5157, "step": 1898 }, { "epoch": 1.7681564245810057, "grad_norm": 0.31502073232165223, "learning_rate": 9.12728527078303e-06, "loss": 0.4742, "step": 1899 }, { "epoch": 1.7690875232774674, "grad_norm": 0.30894775622626486, "learning_rate": 9.120386340117283e-06, "loss": 0.4952, "step": 1900 }, { "epoch": 1.7700186219739291, "grad_norm": 0.2982960198874359, "learning_rate": 9.113487409451535e-06, "loss": 0.5058, "step": 1901 }, { "epoch": 1.770949720670391, "grad_norm": 0.31569060126521453, "learning_rate": 9.10658847878579e-06, "loss": 0.497, "step": 1902 }, { "epoch": 1.771880819366853, "grad_norm": 0.3119734061374241, "learning_rate": 9.099689548120042e-06, "loss": 0.5041, "step": 1903 }, { "epoch": 1.7728119180633146, "grad_norm": 0.3119653293634104, "learning_rate": 9.092790617454296e-06, "loss": 0.4953, "step": 1904 }, { "epoch": 1.7737430167597765, "grad_norm": 0.3117074657406088, "learning_rate": 9.085891686788548e-06, "loss": 0.4797, "step": 1905 }, { "epoch": 1.7746741154562384, "grad_norm": 0.31971213583844255, "learning_rate": 9.078992756122802e-06, "loss": 0.4921, "step": 1906 }, { "epoch": 1.7756052141527001, "grad_norm": 0.300875796295969, "learning_rate": 9.072093825457055e-06, "loss": 0.5085, "step": 1907 }, { "epoch": 1.776536312849162, "grad_norm": 0.29702598725394425, "learning_rate": 9.065194894791309e-06, "loss": 0.4972, "step": 1908 }, { "epoch": 1.777467411545624, "grad_norm": 0.2964982432368937, "learning_rate": 9.058295964125561e-06, "loss": 0.4477, "step": 1909 }, { "epoch": 1.7783985102420856, "grad_norm": 0.3105978119946033, "learning_rate": 9.051397033459815e-06, "loss": 0.4856, "step": 1910 }, { "epoch": 1.7793296089385475, "grad_norm": 0.29253265894780234, "learning_rate": 9.044498102794068e-06, "loss": 0.4724, "step": 1911 }, { "epoch": 1.7802607076350094, "grad_norm": 0.28760843222453303, "learning_rate": 9.03759917212832e-06, "loss": 0.4839, "step": 1912 }, { "epoch": 1.7811918063314711, "grad_norm": 0.36025186181224533, "learning_rate": 9.030700241462574e-06, "loss": 0.4627, "step": 1913 }, { "epoch": 1.7821229050279328, "grad_norm": 0.31649973711201373, "learning_rate": 9.023801310796827e-06, "loss": 0.4945, "step": 1914 }, { "epoch": 1.783054003724395, "grad_norm": 0.320087909957759, "learning_rate": 9.01690238013108e-06, "loss": 0.4771, "step": 1915 }, { "epoch": 1.7839851024208566, "grad_norm": 0.31453224813198555, "learning_rate": 9.010003449465333e-06, "loss": 0.5157, "step": 1916 }, { "epoch": 1.7849162011173183, "grad_norm": 0.3109490139667831, "learning_rate": 9.003104518799587e-06, "loss": 0.475, "step": 1917 }, { "epoch": 1.7858472998137802, "grad_norm": 0.3162090414116423, "learning_rate": 8.99620558813384e-06, "loss": 0.4849, "step": 1918 }, { "epoch": 1.7867783985102421, "grad_norm": 0.31272233603460564, "learning_rate": 8.989306657468094e-06, "loss": 0.4914, "step": 1919 }, { "epoch": 1.7877094972067038, "grad_norm": 0.2951485623802353, "learning_rate": 8.982407726802346e-06, "loss": 0.4769, "step": 1920 }, { "epoch": 1.7886405959031657, "grad_norm": 0.33632188675709107, "learning_rate": 8.9755087961366e-06, "loss": 0.4754, "step": 1921 }, { "epoch": 1.7895716945996276, "grad_norm": 0.33951521850965455, "learning_rate": 8.968609865470853e-06, "loss": 0.5215, "step": 1922 }, { "epoch": 1.7905027932960893, "grad_norm": 0.297487173477515, "learning_rate": 8.961710934805105e-06, "loss": 0.4535, "step": 1923 }, { "epoch": 1.7914338919925512, "grad_norm": 0.3119671984197883, "learning_rate": 8.954812004139359e-06, "loss": 0.4926, "step": 1924 }, { "epoch": 1.7923649906890131, "grad_norm": 0.3069075780625246, "learning_rate": 8.947913073473611e-06, "loss": 0.5263, "step": 1925 }, { "epoch": 1.7932960893854748, "grad_norm": 0.314635583982731, "learning_rate": 8.941014142807866e-06, "loss": 0.4798, "step": 1926 }, { "epoch": 1.7942271880819367, "grad_norm": 0.3602452707662069, "learning_rate": 8.934115212142118e-06, "loss": 0.5041, "step": 1927 }, { "epoch": 1.7951582867783986, "grad_norm": 0.29674451551857306, "learning_rate": 8.927216281476372e-06, "loss": 0.4925, "step": 1928 }, { "epoch": 1.7960893854748603, "grad_norm": 0.29203385507164126, "learning_rate": 8.920317350810626e-06, "loss": 0.4919, "step": 1929 }, { "epoch": 1.7970204841713222, "grad_norm": 0.3403366181088374, "learning_rate": 8.913418420144879e-06, "loss": 0.4746, "step": 1930 }, { "epoch": 1.7979515828677841, "grad_norm": 0.33882238814467347, "learning_rate": 8.906519489479131e-06, "loss": 0.4844, "step": 1931 }, { "epoch": 1.7988826815642458, "grad_norm": 0.30313855461231515, "learning_rate": 8.899620558813385e-06, "loss": 0.4746, "step": 1932 }, { "epoch": 1.7998137802607075, "grad_norm": 0.3375290585627816, "learning_rate": 8.892721628147638e-06, "loss": 0.5006, "step": 1933 }, { "epoch": 1.8007448789571696, "grad_norm": 0.32793620269323703, "learning_rate": 8.88582269748189e-06, "loss": 0.4768, "step": 1934 }, { "epoch": 1.8016759776536313, "grad_norm": 0.30619399239919864, "learning_rate": 8.878923766816144e-06, "loss": 0.4712, "step": 1935 }, { "epoch": 1.802607076350093, "grad_norm": 0.3084793138965287, "learning_rate": 8.872024836150396e-06, "loss": 0.4885, "step": 1936 }, { "epoch": 1.803538175046555, "grad_norm": 0.29850411984121866, "learning_rate": 8.86512590548465e-06, "loss": 0.4915, "step": 1937 }, { "epoch": 1.8044692737430168, "grad_norm": 0.3064959974387425, "learning_rate": 8.858226974818905e-06, "loss": 0.4959, "step": 1938 }, { "epoch": 1.8054003724394785, "grad_norm": 0.29762227960071497, "learning_rate": 8.851328044153157e-06, "loss": 0.4685, "step": 1939 }, { "epoch": 1.8063314711359404, "grad_norm": 0.27634596026670466, "learning_rate": 8.844429113487411e-06, "loss": 0.4806, "step": 1940 }, { "epoch": 1.8072625698324023, "grad_norm": 0.2947822222074885, "learning_rate": 8.837530182821664e-06, "loss": 0.4947, "step": 1941 }, { "epoch": 1.808193668528864, "grad_norm": 0.2897641203195549, "learning_rate": 8.830631252155916e-06, "loss": 0.4781, "step": 1942 }, { "epoch": 1.809124767225326, "grad_norm": 0.30439469477141917, "learning_rate": 8.82373232149017e-06, "loss": 0.5216, "step": 1943 }, { "epoch": 1.8100558659217878, "grad_norm": 0.2937518093040811, "learning_rate": 8.816833390824422e-06, "loss": 0.503, "step": 1944 }, { "epoch": 1.8109869646182495, "grad_norm": 0.31377809524686723, "learning_rate": 8.809934460158675e-06, "loss": 0.4832, "step": 1945 }, { "epoch": 1.8119180633147114, "grad_norm": 0.2901254467131353, "learning_rate": 8.803035529492929e-06, "loss": 0.472, "step": 1946 }, { "epoch": 1.8128491620111733, "grad_norm": 0.29551559750566253, "learning_rate": 8.796136598827183e-06, "loss": 0.488, "step": 1947 }, { "epoch": 1.813780260707635, "grad_norm": 0.30352040644864137, "learning_rate": 8.789237668161435e-06, "loss": 0.4993, "step": 1948 }, { "epoch": 1.8147113594040967, "grad_norm": 0.3037045941142261, "learning_rate": 8.78233873749569e-06, "loss": 0.4666, "step": 1949 }, { "epoch": 1.8156424581005588, "grad_norm": 0.2991292592791065, "learning_rate": 8.775439806829942e-06, "loss": 0.5121, "step": 1950 }, { "epoch": 1.8165735567970205, "grad_norm": 0.3126399614963099, "learning_rate": 8.768540876164196e-06, "loss": 0.5043, "step": 1951 }, { "epoch": 1.8175046554934822, "grad_norm": 0.29618740156288775, "learning_rate": 8.761641945498449e-06, "loss": 0.5022, "step": 1952 }, { "epoch": 1.8184357541899443, "grad_norm": 0.3045712136688277, "learning_rate": 8.754743014832701e-06, "loss": 0.4907, "step": 1953 }, { "epoch": 1.819366852886406, "grad_norm": 0.32948292129710205, "learning_rate": 8.747844084166955e-06, "loss": 0.4926, "step": 1954 }, { "epoch": 1.8202979515828677, "grad_norm": 0.30697121576182584, "learning_rate": 8.740945153501207e-06, "loss": 0.4627, "step": 1955 }, { "epoch": 1.8212290502793296, "grad_norm": 0.28713638023911164, "learning_rate": 8.734046222835462e-06, "loss": 0.4716, "step": 1956 }, { "epoch": 1.8221601489757915, "grad_norm": 0.29768809694606463, "learning_rate": 8.727147292169714e-06, "loss": 0.5042, "step": 1957 }, { "epoch": 1.8230912476722532, "grad_norm": 0.31399419452931104, "learning_rate": 8.720248361503968e-06, "loss": 0.4948, "step": 1958 }, { "epoch": 1.824022346368715, "grad_norm": 0.329971611314568, "learning_rate": 8.71334943083822e-06, "loss": 0.4961, "step": 1959 }, { "epoch": 1.824953445065177, "grad_norm": 0.2868945158739483, "learning_rate": 8.706450500172475e-06, "loss": 0.4969, "step": 1960 }, { "epoch": 1.8258845437616387, "grad_norm": 0.29915568625874744, "learning_rate": 8.699551569506727e-06, "loss": 0.5014, "step": 1961 }, { "epoch": 1.8268156424581006, "grad_norm": 0.3435320224561459, "learning_rate": 8.692652638840981e-06, "loss": 0.487, "step": 1962 }, { "epoch": 1.8277467411545625, "grad_norm": 0.30705823566937174, "learning_rate": 8.685753708175233e-06, "loss": 0.4956, "step": 1963 }, { "epoch": 1.8286778398510242, "grad_norm": 0.27443940560137775, "learning_rate": 8.678854777509486e-06, "loss": 0.4727, "step": 1964 }, { "epoch": 1.829608938547486, "grad_norm": 0.32543837304559275, "learning_rate": 8.67195584684374e-06, "loss": 0.5127, "step": 1965 }, { "epoch": 1.830540037243948, "grad_norm": 0.3028720490371693, "learning_rate": 8.665056916177992e-06, "loss": 0.5087, "step": 1966 }, { "epoch": 1.8314711359404097, "grad_norm": 0.31501993567428255, "learning_rate": 8.658157985512246e-06, "loss": 0.4779, "step": 1967 }, { "epoch": 1.8324022346368714, "grad_norm": 0.3037945428418458, "learning_rate": 8.651259054846499e-06, "loss": 0.4855, "step": 1968 }, { "epoch": 1.8333333333333335, "grad_norm": 0.31203395562874686, "learning_rate": 8.644360124180753e-06, "loss": 0.4815, "step": 1969 }, { "epoch": 1.8342644320297952, "grad_norm": 0.3052858778946037, "learning_rate": 8.637461193515005e-06, "loss": 0.5008, "step": 1970 }, { "epoch": 1.8351955307262569, "grad_norm": 0.27271863118573614, "learning_rate": 8.63056226284926e-06, "loss": 0.4838, "step": 1971 }, { "epoch": 1.8361266294227188, "grad_norm": 0.31519060630784546, "learning_rate": 8.623663332183514e-06, "loss": 0.4953, "step": 1972 }, { "epoch": 1.8370577281191807, "grad_norm": 0.3197474981802368, "learning_rate": 8.616764401517766e-06, "loss": 0.5021, "step": 1973 }, { "epoch": 1.8379888268156424, "grad_norm": 0.2921744677281897, "learning_rate": 8.609865470852018e-06, "loss": 0.5252, "step": 1974 }, { "epoch": 1.8389199255121043, "grad_norm": 0.3236066768835218, "learning_rate": 8.60296654018627e-06, "loss": 0.4753, "step": 1975 }, { "epoch": 1.8398510242085662, "grad_norm": 0.29384615381939255, "learning_rate": 8.596067609520525e-06, "loss": 0.4891, "step": 1976 }, { "epoch": 1.8407821229050279, "grad_norm": 0.29070098117825327, "learning_rate": 8.589168678854777e-06, "loss": 0.5085, "step": 1977 }, { "epoch": 1.8417132216014898, "grad_norm": 0.335149754201223, "learning_rate": 8.582269748189031e-06, "loss": 0.5019, "step": 1978 }, { "epoch": 1.8426443202979517, "grad_norm": 0.30055249747902674, "learning_rate": 8.575370817523284e-06, "loss": 0.4782, "step": 1979 }, { "epoch": 1.8435754189944134, "grad_norm": 0.28315386428159683, "learning_rate": 8.568471886857538e-06, "loss": 0.4633, "step": 1980 }, { "epoch": 1.8445065176908753, "grad_norm": 0.28096992192540804, "learning_rate": 8.561572956191792e-06, "loss": 0.5021, "step": 1981 }, { "epoch": 1.8454376163873372, "grad_norm": 0.3136654255600861, "learning_rate": 8.554674025526044e-06, "loss": 0.5068, "step": 1982 }, { "epoch": 1.8463687150837989, "grad_norm": 0.3023717510983804, "learning_rate": 8.547775094860299e-06, "loss": 0.4862, "step": 1983 }, { "epoch": 1.8472998137802608, "grad_norm": 0.2944781704532581, "learning_rate": 8.540876164194551e-06, "loss": 0.5092, "step": 1984 }, { "epoch": 1.8482309124767227, "grad_norm": 0.299216347223534, "learning_rate": 8.533977233528803e-06, "loss": 0.5174, "step": 1985 }, { "epoch": 1.8491620111731844, "grad_norm": 0.2912988930123761, "learning_rate": 8.527078302863056e-06, "loss": 0.4768, "step": 1986 }, { "epoch": 1.850093109869646, "grad_norm": 0.29660874951795113, "learning_rate": 8.52017937219731e-06, "loss": 0.4843, "step": 1987 }, { "epoch": 1.8510242085661082, "grad_norm": 0.2832670255499438, "learning_rate": 8.513280441531562e-06, "loss": 0.5073, "step": 1988 }, { "epoch": 1.8519553072625698, "grad_norm": 0.3108961972675386, "learning_rate": 8.506381510865816e-06, "loss": 0.5096, "step": 1989 }, { "epoch": 1.8528864059590315, "grad_norm": 0.32604239432308924, "learning_rate": 8.49948258020007e-06, "loss": 0.5016, "step": 1990 }, { "epoch": 1.8538175046554934, "grad_norm": 0.28501061572787484, "learning_rate": 8.492583649534323e-06, "loss": 0.4922, "step": 1991 }, { "epoch": 1.8547486033519553, "grad_norm": 0.28539291656403504, "learning_rate": 8.485684718868577e-06, "loss": 0.4825, "step": 1992 }, { "epoch": 1.855679702048417, "grad_norm": 0.2976024552089551, "learning_rate": 8.47878578820283e-06, "loss": 0.476, "step": 1993 }, { "epoch": 1.856610800744879, "grad_norm": 0.29463385430503297, "learning_rate": 8.471886857537083e-06, "loss": 0.4791, "step": 1994 }, { "epoch": 1.8575418994413408, "grad_norm": 0.29994321302537136, "learning_rate": 8.464987926871336e-06, "loss": 0.4628, "step": 1995 }, { "epoch": 1.8584729981378025, "grad_norm": 0.3263975412375946, "learning_rate": 8.458088996205588e-06, "loss": 0.5058, "step": 1996 }, { "epoch": 1.8594040968342644, "grad_norm": 0.28425312092839966, "learning_rate": 8.451190065539842e-06, "loss": 0.4853, "step": 1997 }, { "epoch": 1.8603351955307263, "grad_norm": 0.3008631169118913, "learning_rate": 8.444291134874095e-06, "loss": 0.4717, "step": 1998 }, { "epoch": 1.861266294227188, "grad_norm": 0.2973572050723446, "learning_rate": 8.437392204208349e-06, "loss": 0.5076, "step": 1999 }, { "epoch": 1.86219739292365, "grad_norm": 0.29284759096647905, "learning_rate": 8.430493273542601e-06, "loss": 0.4882, "step": 2000 }, { "epoch": 1.8631284916201118, "grad_norm": 0.28731293231547306, "learning_rate": 8.423594342876855e-06, "loss": 0.49, "step": 2001 }, { "epoch": 1.8640595903165735, "grad_norm": 0.29077165109377534, "learning_rate": 8.416695412211108e-06, "loss": 0.4944, "step": 2002 }, { "epoch": 1.8649906890130352, "grad_norm": 0.2843864905572249, "learning_rate": 8.409796481545362e-06, "loss": 0.4892, "step": 2003 }, { "epoch": 1.8659217877094973, "grad_norm": 0.3297130498221757, "learning_rate": 8.402897550879614e-06, "loss": 0.5222, "step": 2004 }, { "epoch": 1.866852886405959, "grad_norm": 0.30751584316958036, "learning_rate": 8.395998620213868e-06, "loss": 0.5204, "step": 2005 }, { "epoch": 1.8677839851024207, "grad_norm": 0.2876847214281747, "learning_rate": 8.38909968954812e-06, "loss": 0.4809, "step": 2006 }, { "epoch": 1.8687150837988828, "grad_norm": 0.2929012195875827, "learning_rate": 8.382200758882373e-06, "loss": 0.4748, "step": 2007 }, { "epoch": 1.8696461824953445, "grad_norm": 0.29814060029539524, "learning_rate": 8.375301828216627e-06, "loss": 0.4906, "step": 2008 }, { "epoch": 1.8705772811918062, "grad_norm": 0.2877110890599968, "learning_rate": 8.36840289755088e-06, "loss": 0.4729, "step": 2009 }, { "epoch": 1.8715083798882681, "grad_norm": 0.2910558376933388, "learning_rate": 8.361503966885134e-06, "loss": 0.4985, "step": 2010 }, { "epoch": 1.87243947858473, "grad_norm": 0.2879904173108091, "learning_rate": 8.354605036219386e-06, "loss": 0.4826, "step": 2011 }, { "epoch": 1.8733705772811917, "grad_norm": 0.28182131499088703, "learning_rate": 8.34770610555364e-06, "loss": 0.5, "step": 2012 }, { "epoch": 1.8743016759776536, "grad_norm": 0.29257909107753083, "learning_rate": 8.340807174887893e-06, "loss": 0.5172, "step": 2013 }, { "epoch": 1.8752327746741155, "grad_norm": 0.2798599032298524, "learning_rate": 8.333908244222147e-06, "loss": 0.4885, "step": 2014 }, { "epoch": 1.8761638733705772, "grad_norm": 0.30517411169358355, "learning_rate": 8.3270093135564e-06, "loss": 0.5373, "step": 2015 }, { "epoch": 1.8770949720670391, "grad_norm": 0.30505616017143317, "learning_rate": 8.320110382890653e-06, "loss": 0.4801, "step": 2016 }, { "epoch": 1.878026070763501, "grad_norm": 0.2942176045251564, "learning_rate": 8.313211452224906e-06, "loss": 0.4883, "step": 2017 }, { "epoch": 1.8789571694599627, "grad_norm": 0.30117169724146564, "learning_rate": 8.306312521559158e-06, "loss": 0.4685, "step": 2018 }, { "epoch": 1.8798882681564246, "grad_norm": 0.2968635112432101, "learning_rate": 8.299413590893412e-06, "loss": 0.4973, "step": 2019 }, { "epoch": 1.8808193668528865, "grad_norm": 0.3174158624429029, "learning_rate": 8.292514660227665e-06, "loss": 0.4982, "step": 2020 }, { "epoch": 1.8817504655493482, "grad_norm": 0.29280500021643324, "learning_rate": 8.285615729561919e-06, "loss": 0.4997, "step": 2021 }, { "epoch": 1.88268156424581, "grad_norm": 0.3017229578591037, "learning_rate": 8.278716798896171e-06, "loss": 0.5051, "step": 2022 }, { "epoch": 1.883612662942272, "grad_norm": 0.314753441166852, "learning_rate": 8.271817868230425e-06, "loss": 0.4883, "step": 2023 }, { "epoch": 1.8845437616387337, "grad_norm": 0.2981749444599253, "learning_rate": 8.264918937564678e-06, "loss": 0.4832, "step": 2024 }, { "epoch": 1.8854748603351954, "grad_norm": 0.2907098797800322, "learning_rate": 8.258020006898932e-06, "loss": 0.4954, "step": 2025 }, { "epoch": 1.8864059590316573, "grad_norm": 0.3258271168916533, "learning_rate": 8.251121076233184e-06, "loss": 0.5023, "step": 2026 }, { "epoch": 1.8873370577281192, "grad_norm": 0.2866417020845537, "learning_rate": 8.244222145567438e-06, "loss": 0.4977, "step": 2027 }, { "epoch": 1.888268156424581, "grad_norm": 0.2802628934179091, "learning_rate": 8.23732321490169e-06, "loss": 0.4791, "step": 2028 }, { "epoch": 1.8891992551210428, "grad_norm": 0.3103625950385665, "learning_rate": 8.230424284235943e-06, "loss": 0.4816, "step": 2029 }, { "epoch": 1.8901303538175047, "grad_norm": 0.2916429223015322, "learning_rate": 8.223525353570197e-06, "loss": 0.4854, "step": 2030 }, { "epoch": 1.8910614525139664, "grad_norm": 0.29261435682025666, "learning_rate": 8.21662642290445e-06, "loss": 0.4779, "step": 2031 }, { "epoch": 1.8919925512104283, "grad_norm": 0.3218079941466753, "learning_rate": 8.209727492238704e-06, "loss": 0.4896, "step": 2032 }, { "epoch": 1.8929236499068902, "grad_norm": 0.32128155698881816, "learning_rate": 8.202828561572956e-06, "loss": 0.4791, "step": 2033 }, { "epoch": 1.893854748603352, "grad_norm": 0.29109010939279506, "learning_rate": 8.19592963090721e-06, "loss": 0.4807, "step": 2034 }, { "epoch": 1.8947858472998138, "grad_norm": 0.2944872781742083, "learning_rate": 8.189030700241464e-06, "loss": 0.4981, "step": 2035 }, { "epoch": 1.8957169459962757, "grad_norm": 0.3106122600751712, "learning_rate": 8.182131769575717e-06, "loss": 0.5175, "step": 2036 }, { "epoch": 1.8966480446927374, "grad_norm": 0.30673055747728395, "learning_rate": 8.17523283890997e-06, "loss": 0.5113, "step": 2037 }, { "epoch": 1.8975791433891993, "grad_norm": 0.3048538655444472, "learning_rate": 8.168333908244223e-06, "loss": 0.4918, "step": 2038 }, { "epoch": 1.8985102420856612, "grad_norm": 0.29028541747353565, "learning_rate": 8.161434977578476e-06, "loss": 0.4667, "step": 2039 }, { "epoch": 1.899441340782123, "grad_norm": 0.2982692783617434, "learning_rate": 8.154536046912728e-06, "loss": 0.5081, "step": 2040 }, { "epoch": 1.9003724394785846, "grad_norm": 0.2956675407294897, "learning_rate": 8.147637116246982e-06, "loss": 0.5049, "step": 2041 }, { "epoch": 1.9013035381750467, "grad_norm": 0.31599434453242786, "learning_rate": 8.140738185581235e-06, "loss": 0.5111, "step": 2042 }, { "epoch": 1.9022346368715084, "grad_norm": 0.30680862107882895, "learning_rate": 8.133839254915489e-06, "loss": 0.5046, "step": 2043 }, { "epoch": 1.90316573556797, "grad_norm": 0.30285673484012315, "learning_rate": 8.126940324249743e-06, "loss": 0.4801, "step": 2044 }, { "epoch": 1.904096834264432, "grad_norm": 0.3377263673002133, "learning_rate": 8.120041393583995e-06, "loss": 0.5229, "step": 2045 }, { "epoch": 1.905027932960894, "grad_norm": 0.3093030189299967, "learning_rate": 8.11314246291825e-06, "loss": 0.477, "step": 2046 }, { "epoch": 1.9059590316573556, "grad_norm": 0.30121587566707814, "learning_rate": 8.106243532252502e-06, "loss": 0.5075, "step": 2047 }, { "epoch": 1.9068901303538175, "grad_norm": 0.321120301576872, "learning_rate": 8.099344601586754e-06, "loss": 0.4858, "step": 2048 }, { "epoch": 1.9078212290502794, "grad_norm": 0.3158265039830501, "learning_rate": 8.092445670921008e-06, "loss": 0.4789, "step": 2049 }, { "epoch": 1.908752327746741, "grad_norm": 0.2856441060009797, "learning_rate": 8.08554674025526e-06, "loss": 0.5014, "step": 2050 }, { "epoch": 1.909683426443203, "grad_norm": 0.3313175620653425, "learning_rate": 8.078647809589513e-06, "loss": 0.5136, "step": 2051 }, { "epoch": 1.910614525139665, "grad_norm": 0.2759229161345923, "learning_rate": 8.071748878923767e-06, "loss": 0.4919, "step": 2052 }, { "epoch": 1.9115456238361266, "grad_norm": 0.3117819711436102, "learning_rate": 8.064849948258021e-06, "loss": 0.4617, "step": 2053 }, { "epoch": 1.9124767225325885, "grad_norm": 0.2969152027198621, "learning_rate": 8.057951017592274e-06, "loss": 0.4882, "step": 2054 }, { "epoch": 1.9134078212290504, "grad_norm": 0.29919842793032897, "learning_rate": 8.051052086926528e-06, "loss": 0.4975, "step": 2055 }, { "epoch": 1.914338919925512, "grad_norm": 0.28363833128738364, "learning_rate": 8.04415315626078e-06, "loss": 0.4788, "step": 2056 }, { "epoch": 1.9152700186219738, "grad_norm": 0.2918203478225537, "learning_rate": 8.037254225595034e-06, "loss": 0.4818, "step": 2057 }, { "epoch": 1.916201117318436, "grad_norm": 0.29574800478633706, "learning_rate": 8.030355294929287e-06, "loss": 0.5007, "step": 2058 }, { "epoch": 1.9171322160148976, "grad_norm": 0.28007959337818733, "learning_rate": 8.023456364263539e-06, "loss": 0.4778, "step": 2059 }, { "epoch": 1.9180633147113593, "grad_norm": 0.2850162358048566, "learning_rate": 8.016557433597793e-06, "loss": 0.4859, "step": 2060 }, { "epoch": 1.9189944134078212, "grad_norm": 0.3021100158467866, "learning_rate": 8.009658502932046e-06, "loss": 0.5125, "step": 2061 }, { "epoch": 1.919925512104283, "grad_norm": 0.2839915772111344, "learning_rate": 8.0027595722663e-06, "loss": 0.4778, "step": 2062 }, { "epoch": 1.9208566108007448, "grad_norm": 0.29354214175029697, "learning_rate": 7.995860641600552e-06, "loss": 0.4993, "step": 2063 }, { "epoch": 1.9217877094972067, "grad_norm": 0.2825006578980611, "learning_rate": 7.988961710934806e-06, "loss": 0.4819, "step": 2064 }, { "epoch": 1.9227188081936686, "grad_norm": 0.3127134588491596, "learning_rate": 7.982062780269059e-06, "loss": 0.4701, "step": 2065 }, { "epoch": 1.9236499068901303, "grad_norm": 0.2703898111679526, "learning_rate": 7.975163849603313e-06, "loss": 0.4801, "step": 2066 }, { "epoch": 1.9245810055865922, "grad_norm": 0.2813912591183113, "learning_rate": 7.968264918937565e-06, "loss": 0.4811, "step": 2067 }, { "epoch": 1.925512104283054, "grad_norm": 0.28925953806670596, "learning_rate": 7.96136598827182e-06, "loss": 0.4854, "step": 2068 }, { "epoch": 1.9264432029795158, "grad_norm": 0.32327093486578146, "learning_rate": 7.954467057606072e-06, "loss": 0.5119, "step": 2069 }, { "epoch": 1.9273743016759777, "grad_norm": 0.30745904928638007, "learning_rate": 7.947568126940324e-06, "loss": 0.4964, "step": 2070 }, { "epoch": 1.9283054003724396, "grad_norm": 0.2924816469246079, "learning_rate": 7.940669196274578e-06, "loss": 0.4874, "step": 2071 }, { "epoch": 1.9292364990689013, "grad_norm": 0.30005157414243083, "learning_rate": 7.93377026560883e-06, "loss": 0.4862, "step": 2072 }, { "epoch": 1.9301675977653632, "grad_norm": 0.29788529640697936, "learning_rate": 7.926871334943085e-06, "loss": 0.4976, "step": 2073 }, { "epoch": 1.931098696461825, "grad_norm": 0.3213852070806741, "learning_rate": 7.919972404277337e-06, "loss": 0.4884, "step": 2074 }, { "epoch": 1.9320297951582868, "grad_norm": 0.2985863710310047, "learning_rate": 7.913073473611591e-06, "loss": 0.4904, "step": 2075 }, { "epoch": 1.9329608938547485, "grad_norm": 0.30691324586918467, "learning_rate": 7.906174542945844e-06, "loss": 0.4739, "step": 2076 }, { "epoch": 1.9338919925512106, "grad_norm": 0.3083978921330899, "learning_rate": 7.899275612280098e-06, "loss": 0.4531, "step": 2077 }, { "epoch": 1.9348230912476723, "grad_norm": 0.30303889995932926, "learning_rate": 7.89237668161435e-06, "loss": 0.5005, "step": 2078 }, { "epoch": 1.935754189944134, "grad_norm": 0.300123617976017, "learning_rate": 7.885477750948604e-06, "loss": 0.4906, "step": 2079 }, { "epoch": 1.9366852886405959, "grad_norm": 0.2976567479151624, "learning_rate": 7.878578820282857e-06, "loss": 0.4902, "step": 2080 }, { "epoch": 1.9376163873370578, "grad_norm": 0.30899843872699556, "learning_rate": 7.871679889617109e-06, "loss": 0.5155, "step": 2081 }, { "epoch": 1.9385474860335195, "grad_norm": 0.2956156750337048, "learning_rate": 7.864780958951363e-06, "loss": 0.4995, "step": 2082 }, { "epoch": 1.9394785847299814, "grad_norm": 0.3245356561189525, "learning_rate": 7.857882028285615e-06, "loss": 0.4969, "step": 2083 }, { "epoch": 1.9404096834264433, "grad_norm": 0.29533240361326035, "learning_rate": 7.85098309761987e-06, "loss": 0.4982, "step": 2084 }, { "epoch": 1.941340782122905, "grad_norm": 0.3097433810496015, "learning_rate": 7.844084166954122e-06, "loss": 0.4899, "step": 2085 }, { "epoch": 1.9422718808193669, "grad_norm": 0.28889635605435304, "learning_rate": 7.837185236288376e-06, "loss": 0.4498, "step": 2086 }, { "epoch": 1.9432029795158288, "grad_norm": 0.2915378445314065, "learning_rate": 7.830286305622629e-06, "loss": 0.4958, "step": 2087 }, { "epoch": 1.9441340782122905, "grad_norm": 0.3169490827700207, "learning_rate": 7.823387374956883e-06, "loss": 0.466, "step": 2088 }, { "epoch": 1.9450651769087524, "grad_norm": 0.28626293917713286, "learning_rate": 7.816488444291137e-06, "loss": 0.4689, "step": 2089 }, { "epoch": 1.9459962756052143, "grad_norm": 0.296170269531376, "learning_rate": 7.809589513625389e-06, "loss": 0.4872, "step": 2090 }, { "epoch": 1.946927374301676, "grad_norm": 0.2975013814085215, "learning_rate": 7.802690582959642e-06, "loss": 0.487, "step": 2091 }, { "epoch": 1.9478584729981379, "grad_norm": 0.29047118605840194, "learning_rate": 7.795791652293896e-06, "loss": 0.4635, "step": 2092 }, { "epoch": 1.9487895716945998, "grad_norm": 0.3080770855526254, "learning_rate": 7.788892721628148e-06, "loss": 0.5058, "step": 2093 }, { "epoch": 1.9497206703910615, "grad_norm": 0.27308708964484046, "learning_rate": 7.7819937909624e-06, "loss": 0.4933, "step": 2094 }, { "epoch": 1.9506517690875231, "grad_norm": 0.2694597857573225, "learning_rate": 7.775094860296655e-06, "loss": 0.4873, "step": 2095 }, { "epoch": 1.9515828677839853, "grad_norm": 0.2965550017256191, "learning_rate": 7.768195929630907e-06, "loss": 0.4904, "step": 2096 }, { "epoch": 1.952513966480447, "grad_norm": 0.3156843248481259, "learning_rate": 7.761296998965161e-06, "loss": 0.5092, "step": 2097 }, { "epoch": 1.9534450651769086, "grad_norm": 0.2970397528556984, "learning_rate": 7.754398068299415e-06, "loss": 0.4894, "step": 2098 }, { "epoch": 1.9543761638733705, "grad_norm": 0.2776073458012902, "learning_rate": 7.747499137633668e-06, "loss": 0.476, "step": 2099 }, { "epoch": 1.9553072625698324, "grad_norm": 0.31705678928990627, "learning_rate": 7.740600206967922e-06, "loss": 0.4597, "step": 2100 }, { "epoch": 1.9562383612662941, "grad_norm": 0.30140284223845903, "learning_rate": 7.733701276302174e-06, "loss": 0.4814, "step": 2101 }, { "epoch": 1.957169459962756, "grad_norm": 0.2872588056541476, "learning_rate": 7.726802345636426e-06, "loss": 0.5028, "step": 2102 }, { "epoch": 1.958100558659218, "grad_norm": 0.2928789083075392, "learning_rate": 7.71990341497068e-06, "loss": 0.5211, "step": 2103 }, { "epoch": 1.9590316573556796, "grad_norm": 0.31976944612330993, "learning_rate": 7.713004484304933e-06, "loss": 0.5021, "step": 2104 }, { "epoch": 1.9599627560521415, "grad_norm": 0.2795052094001323, "learning_rate": 7.706105553639187e-06, "loss": 0.4773, "step": 2105 }, { "epoch": 1.9608938547486034, "grad_norm": 0.29420384408277556, "learning_rate": 7.69920662297344e-06, "loss": 0.4998, "step": 2106 }, { "epoch": 1.9618249534450651, "grad_norm": 0.2791222436616905, "learning_rate": 7.692307692307694e-06, "loss": 0.4879, "step": 2107 }, { "epoch": 1.962756052141527, "grad_norm": 0.2890082410003506, "learning_rate": 7.685408761641946e-06, "loss": 0.457, "step": 2108 }, { "epoch": 1.963687150837989, "grad_norm": 0.3081335505708573, "learning_rate": 7.6785098309762e-06, "loss": 0.4818, "step": 2109 }, { "epoch": 1.9646182495344506, "grad_norm": 0.2662614038675592, "learning_rate": 7.671610900310453e-06, "loss": 0.4657, "step": 2110 }, { "epoch": 1.9655493482309123, "grad_norm": 0.2959730049795449, "learning_rate": 7.664711969644707e-06, "loss": 0.4829, "step": 2111 }, { "epoch": 1.9664804469273744, "grad_norm": 0.3247369384468818, "learning_rate": 7.657813038978959e-06, "loss": 0.4822, "step": 2112 }, { "epoch": 1.9674115456238361, "grad_norm": 0.3169684969346512, "learning_rate": 7.650914108313211e-06, "loss": 0.5323, "step": 2113 }, { "epoch": 1.9683426443202978, "grad_norm": 0.27563851860126165, "learning_rate": 7.644015177647466e-06, "loss": 0.4802, "step": 2114 }, { "epoch": 1.9692737430167597, "grad_norm": 0.2862601601303229, "learning_rate": 7.637116246981718e-06, "loss": 0.4814, "step": 2115 }, { "epoch": 1.9702048417132216, "grad_norm": 0.29563904193922985, "learning_rate": 7.630217316315972e-06, "loss": 0.4871, "step": 2116 }, { "epoch": 1.9711359404096833, "grad_norm": 0.3192988343492819, "learning_rate": 7.6233183856502244e-06, "loss": 0.4743, "step": 2117 }, { "epoch": 1.9720670391061452, "grad_norm": 0.2895745841455862, "learning_rate": 7.6164194549844785e-06, "loss": 0.5154, "step": 2118 }, { "epoch": 1.9729981378026071, "grad_norm": 0.28399505228170674, "learning_rate": 7.609520524318731e-06, "loss": 0.5014, "step": 2119 }, { "epoch": 1.9739292364990688, "grad_norm": 0.3133125432081549, "learning_rate": 7.602621593652984e-06, "loss": 0.5014, "step": 2120 }, { "epoch": 1.9748603351955307, "grad_norm": 0.28339221255543, "learning_rate": 7.5957226629872375e-06, "loss": 0.4753, "step": 2121 }, { "epoch": 1.9757914338919926, "grad_norm": 0.2954879770680269, "learning_rate": 7.588823732321491e-06, "loss": 0.4718, "step": 2122 }, { "epoch": 1.9767225325884543, "grad_norm": 0.29145889999785957, "learning_rate": 7.581924801655745e-06, "loss": 0.4894, "step": 2123 }, { "epoch": 1.9776536312849162, "grad_norm": 0.2848398228513771, "learning_rate": 7.575025870989997e-06, "loss": 0.4922, "step": 2124 }, { "epoch": 1.9785847299813781, "grad_norm": 0.28553382563341045, "learning_rate": 7.5681269403242505e-06, "loss": 0.4839, "step": 2125 }, { "epoch": 1.9795158286778398, "grad_norm": 0.2915432559291673, "learning_rate": 7.561228009658504e-06, "loss": 0.486, "step": 2126 }, { "epoch": 1.9804469273743017, "grad_norm": 0.2790693415135005, "learning_rate": 7.554329078992757e-06, "loss": 0.4938, "step": 2127 }, { "epoch": 1.9813780260707636, "grad_norm": 0.28454125838123845, "learning_rate": 7.547430148327009e-06, "loss": 0.4915, "step": 2128 }, { "epoch": 1.9823091247672253, "grad_norm": 0.28820892052024666, "learning_rate": 7.5405312176612635e-06, "loss": 0.4855, "step": 2129 }, { "epoch": 1.983240223463687, "grad_norm": 0.2681063904987023, "learning_rate": 7.533632286995516e-06, "loss": 0.4774, "step": 2130 }, { "epoch": 1.9841713221601491, "grad_norm": 0.2793816334700124, "learning_rate": 7.526733356329769e-06, "loss": 0.4686, "step": 2131 }, { "epoch": 1.9851024208566108, "grad_norm": 0.29068833261584576, "learning_rate": 7.519834425664023e-06, "loss": 0.5016, "step": 2132 }, { "epoch": 1.9860335195530725, "grad_norm": 0.27950443419141574, "learning_rate": 7.512935494998276e-06, "loss": 0.5153, "step": 2133 }, { "epoch": 1.9869646182495344, "grad_norm": 0.2909688940321916, "learning_rate": 7.50603656433253e-06, "loss": 0.502, "step": 2134 }, { "epoch": 1.9878957169459963, "grad_norm": 0.27786832439365244, "learning_rate": 7.499137633666782e-06, "loss": 0.4808, "step": 2135 }, { "epoch": 1.988826815642458, "grad_norm": 0.29808112165561773, "learning_rate": 7.492238703001035e-06, "loss": 0.4963, "step": 2136 }, { "epoch": 1.98975791433892, "grad_norm": 0.2989046889020995, "learning_rate": 7.485339772335289e-06, "loss": 0.5094, "step": 2137 }, { "epoch": 1.9906890130353818, "grad_norm": 0.2874391532512281, "learning_rate": 7.478440841669542e-06, "loss": 0.4963, "step": 2138 }, { "epoch": 1.9916201117318435, "grad_norm": 0.2949947788142582, "learning_rate": 7.471541911003794e-06, "loss": 0.4591, "step": 2139 }, { "epoch": 1.9925512104283054, "grad_norm": 0.2853969866488652, "learning_rate": 7.4646429803380484e-06, "loss": 0.505, "step": 2140 }, { "epoch": 1.9934823091247673, "grad_norm": 0.2703257603391056, "learning_rate": 7.457744049672302e-06, "loss": 0.4651, "step": 2141 }, { "epoch": 1.994413407821229, "grad_norm": 0.29672799923177684, "learning_rate": 7.450845119006554e-06, "loss": 0.4971, "step": 2142 }, { "epoch": 1.995344506517691, "grad_norm": 0.2809955345824441, "learning_rate": 7.443946188340808e-06, "loss": 0.4883, "step": 2143 }, { "epoch": 1.9962756052141528, "grad_norm": 0.2922215275991972, "learning_rate": 7.437047257675061e-06, "loss": 0.5132, "step": 2144 }, { "epoch": 1.9972067039106145, "grad_norm": 0.2916056530491307, "learning_rate": 7.430148327009315e-06, "loss": 0.5041, "step": 2145 }, { "epoch": 1.9981378026070762, "grad_norm": 0.29147398674338604, "learning_rate": 7.423249396343567e-06, "loss": 0.5032, "step": 2146 }, { "epoch": 1.9990689013035383, "grad_norm": 0.3045198460919613, "learning_rate": 7.41635046567782e-06, "loss": 0.4897, "step": 2147 }, { "epoch": 2.0, "grad_norm": 0.29304550429163917, "learning_rate": 7.409451535012074e-06, "loss": 0.4928, "step": 2148 }, { "epoch": 2.0009310986964617, "grad_norm": 0.3172808514674632, "learning_rate": 7.402552604346327e-06, "loss": 0.4894, "step": 2149 }, { "epoch": 2.001862197392924, "grad_norm": 0.29562537683768664, "learning_rate": 7.395653673680581e-06, "loss": 0.472, "step": 2150 }, { "epoch": 2.0027932960893855, "grad_norm": 0.2762042171676711, "learning_rate": 7.388754743014833e-06, "loss": 0.4704, "step": 2151 }, { "epoch": 2.003724394785847, "grad_norm": 0.2752600094702437, "learning_rate": 7.381855812349087e-06, "loss": 0.4712, "step": 2152 }, { "epoch": 2.0046554934823093, "grad_norm": 0.3078120321348158, "learning_rate": 7.374956881683339e-06, "loss": 0.4874, "step": 2153 }, { "epoch": 2.005586592178771, "grad_norm": 0.2936769071554468, "learning_rate": 7.368057951017593e-06, "loss": 0.4781, "step": 2154 }, { "epoch": 2.0065176908752327, "grad_norm": 0.29095831901405095, "learning_rate": 7.3611590203518456e-06, "loss": 0.4641, "step": 2155 }, { "epoch": 2.007448789571695, "grad_norm": 0.3092220275810145, "learning_rate": 7.3542600896861e-06, "loss": 0.4673, "step": 2156 }, { "epoch": 2.0083798882681565, "grad_norm": 0.28940196646829663, "learning_rate": 7.347361159020352e-06, "loss": 0.4979, "step": 2157 }, { "epoch": 2.009310986964618, "grad_norm": 0.2686755467170517, "learning_rate": 7.340462228354605e-06, "loss": 0.4699, "step": 2158 }, { "epoch": 2.01024208566108, "grad_norm": 0.3097110856672536, "learning_rate": 7.333563297688859e-06, "loss": 0.4563, "step": 2159 }, { "epoch": 2.011173184357542, "grad_norm": 0.2863301587137006, "learning_rate": 7.326664367023112e-06, "loss": 0.4492, "step": 2160 }, { "epoch": 2.0121042830540037, "grad_norm": 0.31224937752611986, "learning_rate": 7.319765436357366e-06, "loss": 0.4906, "step": 2161 }, { "epoch": 2.0130353817504654, "grad_norm": 0.273707626386548, "learning_rate": 7.312866505691618e-06, "loss": 0.4692, "step": 2162 }, { "epoch": 2.0139664804469275, "grad_norm": 0.2844663682055065, "learning_rate": 7.305967575025872e-06, "loss": 0.4724, "step": 2163 }, { "epoch": 2.014897579143389, "grad_norm": 0.3108177364018582, "learning_rate": 7.299068644360124e-06, "loss": 0.4897, "step": 2164 }, { "epoch": 2.015828677839851, "grad_norm": 0.30246213005117695, "learning_rate": 7.292169713694378e-06, "loss": 0.4677, "step": 2165 }, { "epoch": 2.016759776536313, "grad_norm": 0.2793182557540871, "learning_rate": 7.2852707830286305e-06, "loss": 0.4348, "step": 2166 }, { "epoch": 2.0176908752327747, "grad_norm": 0.2942321562318967, "learning_rate": 7.278371852362885e-06, "loss": 0.4765, "step": 2167 }, { "epoch": 2.0186219739292364, "grad_norm": 0.2925575683491867, "learning_rate": 7.271472921697138e-06, "loss": 0.4626, "step": 2168 }, { "epoch": 2.0195530726256985, "grad_norm": 0.301852749166462, "learning_rate": 7.26457399103139e-06, "loss": 0.4434, "step": 2169 }, { "epoch": 2.02048417132216, "grad_norm": 0.28967815335684416, "learning_rate": 7.257675060365644e-06, "loss": 0.4762, "step": 2170 }, { "epoch": 2.021415270018622, "grad_norm": 0.306072775866776, "learning_rate": 7.250776129699897e-06, "loss": 0.4948, "step": 2171 }, { "epoch": 2.022346368715084, "grad_norm": 0.3194008537315497, "learning_rate": 7.243877199034151e-06, "loss": 0.4908, "step": 2172 }, { "epoch": 2.0232774674115457, "grad_norm": 0.31417808077569964, "learning_rate": 7.236978268368403e-06, "loss": 0.4702, "step": 2173 }, { "epoch": 2.0242085661080074, "grad_norm": 0.29167054255330965, "learning_rate": 7.2300793377026565e-06, "loss": 0.4857, "step": 2174 }, { "epoch": 2.0251396648044695, "grad_norm": 0.30075029965371897, "learning_rate": 7.22318040703691e-06, "loss": 0.4935, "step": 2175 }, { "epoch": 2.026070763500931, "grad_norm": 0.2884584572315969, "learning_rate": 7.216281476371163e-06, "loss": 0.4657, "step": 2176 }, { "epoch": 2.027001862197393, "grad_norm": 0.28660403487129, "learning_rate": 7.209382545705417e-06, "loss": 0.462, "step": 2177 }, { "epoch": 2.0279329608938546, "grad_norm": 0.30440078335090837, "learning_rate": 7.2024836150396696e-06, "loss": 0.4723, "step": 2178 }, { "epoch": 2.0288640595903167, "grad_norm": 0.2756958660423787, "learning_rate": 7.195584684373923e-06, "loss": 0.4721, "step": 2179 }, { "epoch": 2.0297951582867784, "grad_norm": 0.2918267375069382, "learning_rate": 7.188685753708175e-06, "loss": 0.4682, "step": 2180 }, { "epoch": 2.03072625698324, "grad_norm": 0.27451905796872983, "learning_rate": 7.181786823042429e-06, "loss": 0.4697, "step": 2181 }, { "epoch": 2.031657355679702, "grad_norm": 0.2987170737520997, "learning_rate": 7.174887892376682e-06, "loss": 0.4775, "step": 2182 }, { "epoch": 2.032588454376164, "grad_norm": 0.28913462295445014, "learning_rate": 7.167988961710936e-06, "loss": 0.4907, "step": 2183 }, { "epoch": 2.0335195530726256, "grad_norm": 0.2768892017062761, "learning_rate": 7.161090031045188e-06, "loss": 0.4576, "step": 2184 }, { "epoch": 2.0344506517690877, "grad_norm": 0.2953442660736149, "learning_rate": 7.1541911003794415e-06, "loss": 0.5054, "step": 2185 }, { "epoch": 2.0353817504655494, "grad_norm": 0.3002964758752958, "learning_rate": 7.147292169713696e-06, "loss": 0.461, "step": 2186 }, { "epoch": 2.036312849162011, "grad_norm": 0.2858810019460043, "learning_rate": 7.140393239047948e-06, "loss": 0.47, "step": 2187 }, { "epoch": 2.037243947858473, "grad_norm": 0.27980378098306385, "learning_rate": 7.133494308382202e-06, "loss": 0.464, "step": 2188 }, { "epoch": 2.038175046554935, "grad_norm": 0.3152646374319794, "learning_rate": 7.1265953777164545e-06, "loss": 0.4868, "step": 2189 }, { "epoch": 2.0391061452513966, "grad_norm": 0.3032508570912446, "learning_rate": 7.119696447050708e-06, "loss": 0.4918, "step": 2190 }, { "epoch": 2.0400372439478587, "grad_norm": 0.2998844103033101, "learning_rate": 7.11279751638496e-06, "loss": 0.4763, "step": 2191 }, { "epoch": 2.0409683426443204, "grad_norm": 0.31016411013612327, "learning_rate": 7.105898585719214e-06, "loss": 0.4761, "step": 2192 }, { "epoch": 2.041899441340782, "grad_norm": 0.2759256389486143, "learning_rate": 7.098999655053467e-06, "loss": 0.4529, "step": 2193 }, { "epoch": 2.0428305400372437, "grad_norm": 0.27648940775647457, "learning_rate": 7.092100724387721e-06, "loss": 0.4945, "step": 2194 }, { "epoch": 2.043761638733706, "grad_norm": 0.3216380124452144, "learning_rate": 7.085201793721974e-06, "loss": 0.4768, "step": 2195 }, { "epoch": 2.0446927374301676, "grad_norm": 0.2970191839074712, "learning_rate": 7.0783028630562264e-06, "loss": 0.4729, "step": 2196 }, { "epoch": 2.0456238361266292, "grad_norm": 0.2809553536681378, "learning_rate": 7.0714039323904805e-06, "loss": 0.4722, "step": 2197 }, { "epoch": 2.0465549348230914, "grad_norm": 0.2963832904660623, "learning_rate": 7.064505001724733e-06, "loss": 0.4606, "step": 2198 }, { "epoch": 2.047486033519553, "grad_norm": 0.32513721378423266, "learning_rate": 7.057606071058987e-06, "loss": 0.4752, "step": 2199 }, { "epoch": 2.0484171322160147, "grad_norm": 0.27562757907512386, "learning_rate": 7.0507071403932395e-06, "loss": 0.4743, "step": 2200 }, { "epoch": 2.049348230912477, "grad_norm": 0.2885220791815333, "learning_rate": 7.043808209727493e-06, "loss": 0.4688, "step": 2201 }, { "epoch": 2.0502793296089385, "grad_norm": 0.281244494875958, "learning_rate": 7.036909279061745e-06, "loss": 0.4724, "step": 2202 }, { "epoch": 2.0512104283054002, "grad_norm": 0.2769176965057692, "learning_rate": 7.030010348395999e-06, "loss": 0.4874, "step": 2203 }, { "epoch": 2.0521415270018624, "grad_norm": 0.29632326854127045, "learning_rate": 7.0231114177302525e-06, "loss": 0.4892, "step": 2204 }, { "epoch": 2.053072625698324, "grad_norm": 0.29925171482181495, "learning_rate": 7.016212487064506e-06, "loss": 0.4767, "step": 2205 }, { "epoch": 2.0540037243947857, "grad_norm": 0.3049661637589387, "learning_rate": 7.009313556398759e-06, "loss": 0.4701, "step": 2206 }, { "epoch": 2.054934823091248, "grad_norm": 0.2941738135014051, "learning_rate": 7.002414625733011e-06, "loss": 0.478, "step": 2207 }, { "epoch": 2.0558659217877095, "grad_norm": 0.2875899787986152, "learning_rate": 6.9955156950672655e-06, "loss": 0.466, "step": 2208 }, { "epoch": 2.0567970204841712, "grad_norm": 0.2809135090487815, "learning_rate": 6.988616764401518e-06, "loss": 0.4519, "step": 2209 }, { "epoch": 2.0577281191806334, "grad_norm": 0.2822868013770668, "learning_rate": 6.981717833735772e-06, "loss": 0.4508, "step": 2210 }, { "epoch": 2.058659217877095, "grad_norm": 0.2923220359164469, "learning_rate": 6.974818903070024e-06, "loss": 0.4766, "step": 2211 }, { "epoch": 2.0595903165735567, "grad_norm": 0.2974367145961149, "learning_rate": 6.967919972404278e-06, "loss": 0.4521, "step": 2212 }, { "epoch": 2.0605214152700184, "grad_norm": 0.2855049014857252, "learning_rate": 6.961021041738532e-06, "loss": 0.4662, "step": 2213 }, { "epoch": 2.0614525139664805, "grad_norm": 0.2719945547050218, "learning_rate": 6.954122111072784e-06, "loss": 0.4554, "step": 2214 }, { "epoch": 2.0623836126629422, "grad_norm": 0.2961535528179908, "learning_rate": 6.947223180407037e-06, "loss": 0.5105, "step": 2215 }, { "epoch": 2.063314711359404, "grad_norm": 0.2877164071336843, "learning_rate": 6.940324249741291e-06, "loss": 0.4639, "step": 2216 }, { "epoch": 2.064245810055866, "grad_norm": 0.2997895713530861, "learning_rate": 6.933425319075544e-06, "loss": 0.4641, "step": 2217 }, { "epoch": 2.0651769087523277, "grad_norm": 0.2838870541826308, "learning_rate": 6.926526388409796e-06, "loss": 0.4916, "step": 2218 }, { "epoch": 2.0661080074487894, "grad_norm": 0.2777431660581448, "learning_rate": 6.9196274577440504e-06, "loss": 0.4787, "step": 2219 }, { "epoch": 2.0670391061452515, "grad_norm": 0.28272770111184486, "learning_rate": 6.912728527078303e-06, "loss": 0.4847, "step": 2220 }, { "epoch": 2.0679702048417132, "grad_norm": 0.29996918980016263, "learning_rate": 6.905829596412557e-06, "loss": 0.4788, "step": 2221 }, { "epoch": 2.068901303538175, "grad_norm": 0.3160975208920299, "learning_rate": 6.89893066574681e-06, "loss": 0.48, "step": 2222 }, { "epoch": 2.069832402234637, "grad_norm": 0.28757997469858987, "learning_rate": 6.892031735081063e-06, "loss": 0.4492, "step": 2223 }, { "epoch": 2.0707635009310987, "grad_norm": 0.3196675394787691, "learning_rate": 6.885132804415317e-06, "loss": 0.5086, "step": 2224 }, { "epoch": 2.0716945996275604, "grad_norm": 0.3203473517761641, "learning_rate": 6.878233873749569e-06, "loss": 0.4369, "step": 2225 }, { "epoch": 2.0726256983240225, "grad_norm": 0.2962635945581439, "learning_rate": 6.871334943083822e-06, "loss": 0.4669, "step": 2226 }, { "epoch": 2.0735567970204842, "grad_norm": 0.29161428920551, "learning_rate": 6.864436012418076e-06, "loss": 0.4833, "step": 2227 }, { "epoch": 2.074487895716946, "grad_norm": 0.2985695091517355, "learning_rate": 6.857537081752329e-06, "loss": 0.476, "step": 2228 }, { "epoch": 2.0754189944134076, "grad_norm": 0.29523604106986445, "learning_rate": 6.850638151086581e-06, "loss": 0.4768, "step": 2229 }, { "epoch": 2.0763500931098697, "grad_norm": 0.30361490201840774, "learning_rate": 6.843739220420835e-06, "loss": 0.4637, "step": 2230 }, { "epoch": 2.0772811918063314, "grad_norm": 0.2878434371568588, "learning_rate": 6.836840289755089e-06, "loss": 0.4424, "step": 2231 }, { "epoch": 2.078212290502793, "grad_norm": 0.287827040374008, "learning_rate": 6.829941359089342e-06, "loss": 0.4833, "step": 2232 }, { "epoch": 2.0791433891992552, "grad_norm": 0.3212119628856486, "learning_rate": 6.823042428423595e-06, "loss": 0.469, "step": 2233 }, { "epoch": 2.080074487895717, "grad_norm": 0.2947494646159234, "learning_rate": 6.8161434977578476e-06, "loss": 0.4782, "step": 2234 }, { "epoch": 2.0810055865921786, "grad_norm": 0.277157363719719, "learning_rate": 6.809244567092102e-06, "loss": 0.4566, "step": 2235 }, { "epoch": 2.0819366852886407, "grad_norm": 0.28636995826320366, "learning_rate": 6.802345636426354e-06, "loss": 0.4809, "step": 2236 }, { "epoch": 2.0828677839851024, "grad_norm": 0.30205714677925494, "learning_rate": 6.795446705760607e-06, "loss": 0.4806, "step": 2237 }, { "epoch": 2.083798882681564, "grad_norm": 0.26409148857977766, "learning_rate": 6.788547775094861e-06, "loss": 0.4787, "step": 2238 }, { "epoch": 2.0847299813780262, "grad_norm": 0.301124976951571, "learning_rate": 6.781648844429114e-06, "loss": 0.4809, "step": 2239 }, { "epoch": 2.085661080074488, "grad_norm": 0.29098042908173355, "learning_rate": 6.774749913763368e-06, "loss": 0.461, "step": 2240 }, { "epoch": 2.0865921787709496, "grad_norm": 0.29627388311184416, "learning_rate": 6.76785098309762e-06, "loss": 0.4828, "step": 2241 }, { "epoch": 2.0875232774674117, "grad_norm": 0.2752133458200151, "learning_rate": 6.760952052431874e-06, "loss": 0.461, "step": 2242 }, { "epoch": 2.0884543761638734, "grad_norm": 0.31709550930259456, "learning_rate": 6.754053121766127e-06, "loss": 0.4598, "step": 2243 }, { "epoch": 2.089385474860335, "grad_norm": 0.2948697084790663, "learning_rate": 6.74715419110038e-06, "loss": 0.4973, "step": 2244 }, { "epoch": 2.0903165735567972, "grad_norm": 0.2919917125816585, "learning_rate": 6.7402552604346325e-06, "loss": 0.485, "step": 2245 }, { "epoch": 2.091247672253259, "grad_norm": 0.2944961887417588, "learning_rate": 6.733356329768887e-06, "loss": 0.4834, "step": 2246 }, { "epoch": 2.0921787709497206, "grad_norm": 0.2789824686290677, "learning_rate": 6.72645739910314e-06, "loss": 0.4712, "step": 2247 }, { "epoch": 2.0931098696461823, "grad_norm": 0.28808120556309347, "learning_rate": 6.719558468437392e-06, "loss": 0.4409, "step": 2248 }, { "epoch": 2.0940409683426444, "grad_norm": 0.2974673077964084, "learning_rate": 6.712659537771646e-06, "loss": 0.4644, "step": 2249 }, { "epoch": 2.094972067039106, "grad_norm": 0.2739344837255603, "learning_rate": 6.705760607105899e-06, "loss": 0.4822, "step": 2250 }, { "epoch": 2.095903165735568, "grad_norm": 0.30057465528232047, "learning_rate": 6.698861676440153e-06, "loss": 0.4589, "step": 2251 }, { "epoch": 2.09683426443203, "grad_norm": 0.30700109915739743, "learning_rate": 6.691962745774405e-06, "loss": 0.5074, "step": 2252 }, { "epoch": 2.0977653631284916, "grad_norm": 0.28075418984219425, "learning_rate": 6.6850638151086585e-06, "loss": 0.4592, "step": 2253 }, { "epoch": 2.0986964618249533, "grad_norm": 0.28693697740573587, "learning_rate": 6.678164884442912e-06, "loss": 0.4874, "step": 2254 }, { "epoch": 2.0996275605214154, "grad_norm": 0.27894160049516825, "learning_rate": 6.671265953777165e-06, "loss": 0.4959, "step": 2255 }, { "epoch": 2.100558659217877, "grad_norm": 0.30721301112862914, "learning_rate": 6.664367023111419e-06, "loss": 0.4532, "step": 2256 }, { "epoch": 2.101489757914339, "grad_norm": 0.3125643155790992, "learning_rate": 6.6574680924456716e-06, "loss": 0.482, "step": 2257 }, { "epoch": 2.102420856610801, "grad_norm": 0.26914260716906707, "learning_rate": 6.650569161779925e-06, "loss": 0.4751, "step": 2258 }, { "epoch": 2.1033519553072626, "grad_norm": 0.28357542000157887, "learning_rate": 6.643670231114178e-06, "loss": 0.4578, "step": 2259 }, { "epoch": 2.1042830540037243, "grad_norm": 0.291387048256196, "learning_rate": 6.636771300448431e-06, "loss": 0.4674, "step": 2260 }, { "epoch": 2.1052141527001864, "grad_norm": 0.28701803580593854, "learning_rate": 6.629872369782684e-06, "loss": 0.4914, "step": 2261 }, { "epoch": 2.106145251396648, "grad_norm": 0.28096467467031794, "learning_rate": 6.622973439116938e-06, "loss": 0.4819, "step": 2262 }, { "epoch": 2.10707635009311, "grad_norm": 0.2794203781528124, "learning_rate": 6.61607450845119e-06, "loss": 0.4907, "step": 2263 }, { "epoch": 2.1080074487895715, "grad_norm": 0.28708033948189404, "learning_rate": 6.6091755777854435e-06, "loss": 0.4931, "step": 2264 }, { "epoch": 2.1089385474860336, "grad_norm": 0.28445734071934226, "learning_rate": 6.602276647119698e-06, "loss": 0.4755, "step": 2265 }, { "epoch": 2.1098696461824953, "grad_norm": 0.2925784013371626, "learning_rate": 6.59537771645395e-06, "loss": 0.5136, "step": 2266 }, { "epoch": 2.110800744878957, "grad_norm": 0.2757355324934379, "learning_rate": 6.588478785788204e-06, "loss": 0.466, "step": 2267 }, { "epoch": 2.111731843575419, "grad_norm": 0.28265423298684395, "learning_rate": 6.5815798551224565e-06, "loss": 0.4635, "step": 2268 }, { "epoch": 2.112662942271881, "grad_norm": 0.3147497081285622, "learning_rate": 6.57468092445671e-06, "loss": 0.475, "step": 2269 }, { "epoch": 2.1135940409683425, "grad_norm": 0.2736542008928024, "learning_rate": 6.567781993790963e-06, "loss": 0.4367, "step": 2270 }, { "epoch": 2.1145251396648046, "grad_norm": 0.2704763512184406, "learning_rate": 6.560883063125216e-06, "loss": 0.4709, "step": 2271 }, { "epoch": 2.1154562383612663, "grad_norm": 0.30797613406659563, "learning_rate": 6.553984132459469e-06, "loss": 0.4799, "step": 2272 }, { "epoch": 2.116387337057728, "grad_norm": 0.2858889036941924, "learning_rate": 6.547085201793723e-06, "loss": 0.4599, "step": 2273 }, { "epoch": 2.11731843575419, "grad_norm": 0.2966029524177579, "learning_rate": 6.540186271127976e-06, "loss": 0.4628, "step": 2274 }, { "epoch": 2.118249534450652, "grad_norm": 0.28170072793827394, "learning_rate": 6.5332873404622284e-06, "loss": 0.4879, "step": 2275 }, { "epoch": 2.1191806331471135, "grad_norm": 0.28834784214519227, "learning_rate": 6.5263884097964825e-06, "loss": 0.4711, "step": 2276 }, { "epoch": 2.1201117318435756, "grad_norm": 0.27598390460972533, "learning_rate": 6.519489479130735e-06, "loss": 0.4601, "step": 2277 }, { "epoch": 2.1210428305400373, "grad_norm": 0.2839199598018159, "learning_rate": 6.512590548464989e-06, "loss": 0.4668, "step": 2278 }, { "epoch": 2.121973929236499, "grad_norm": 0.27847666405943683, "learning_rate": 6.5056916177992414e-06, "loss": 0.4656, "step": 2279 }, { "epoch": 2.122905027932961, "grad_norm": 0.28091358192759025, "learning_rate": 6.498792687133495e-06, "loss": 0.4881, "step": 2280 }, { "epoch": 2.123836126629423, "grad_norm": 0.2813756320616899, "learning_rate": 6.491893756467748e-06, "loss": 0.4729, "step": 2281 }, { "epoch": 2.1247672253258845, "grad_norm": 0.26853651888276625, "learning_rate": 6.484994825802001e-06, "loss": 0.486, "step": 2282 }, { "epoch": 2.1256983240223466, "grad_norm": 0.2849917317507407, "learning_rate": 6.478095895136255e-06, "loss": 0.4936, "step": 2283 }, { "epoch": 2.1266294227188083, "grad_norm": 0.27075400591442833, "learning_rate": 6.471196964470508e-06, "loss": 0.4565, "step": 2284 }, { "epoch": 2.12756052141527, "grad_norm": 0.2819258491011999, "learning_rate": 6.464298033804761e-06, "loss": 0.4723, "step": 2285 }, { "epoch": 2.1284916201117317, "grad_norm": 0.2836178665013942, "learning_rate": 6.457399103139013e-06, "loss": 0.4614, "step": 2286 }, { "epoch": 2.129422718808194, "grad_norm": 0.26789112548880745, "learning_rate": 6.4505001724732675e-06, "loss": 0.4611, "step": 2287 }, { "epoch": 2.1303538175046555, "grad_norm": 0.3283270936202214, "learning_rate": 6.44360124180752e-06, "loss": 0.4863, "step": 2288 }, { "epoch": 2.131284916201117, "grad_norm": 0.2725979007039713, "learning_rate": 6.436702311141774e-06, "loss": 0.4567, "step": 2289 }, { "epoch": 2.1322160148975793, "grad_norm": 0.28642375369309453, "learning_rate": 6.429803380476026e-06, "loss": 0.5099, "step": 2290 }, { "epoch": 2.133147113594041, "grad_norm": 0.30392850240256275, "learning_rate": 6.42290444981028e-06, "loss": 0.4934, "step": 2291 }, { "epoch": 2.1340782122905027, "grad_norm": 0.28902298332443777, "learning_rate": 6.416005519144534e-06, "loss": 0.4725, "step": 2292 }, { "epoch": 2.135009310986965, "grad_norm": 0.3040244220014207, "learning_rate": 6.409106588478786e-06, "loss": 0.5081, "step": 2293 }, { "epoch": 2.1359404096834265, "grad_norm": 0.27702607953828806, "learning_rate": 6.40220765781304e-06, "loss": 0.4642, "step": 2294 }, { "epoch": 2.136871508379888, "grad_norm": 0.2973952786198521, "learning_rate": 6.395308727147293e-06, "loss": 0.4775, "step": 2295 }, { "epoch": 2.1378026070763503, "grad_norm": 0.3207203218933782, "learning_rate": 6.388409796481546e-06, "loss": 0.4808, "step": 2296 }, { "epoch": 2.138733705772812, "grad_norm": 0.29245462224527297, "learning_rate": 6.381510865815798e-06, "loss": 0.502, "step": 2297 }, { "epoch": 2.1396648044692737, "grad_norm": 0.291011573061778, "learning_rate": 6.3746119351500524e-06, "loss": 0.4858, "step": 2298 }, { "epoch": 2.1405959031657353, "grad_norm": 0.301455784604225, "learning_rate": 6.367713004484305e-06, "loss": 0.4894, "step": 2299 }, { "epoch": 2.1415270018621975, "grad_norm": 0.29773270639461297, "learning_rate": 6.360814073818559e-06, "loss": 0.4668, "step": 2300 }, { "epoch": 2.142458100558659, "grad_norm": 0.2822207965117862, "learning_rate": 6.353915143152812e-06, "loss": 0.4801, "step": 2301 }, { "epoch": 2.143389199255121, "grad_norm": 0.2999233735495317, "learning_rate": 6.347016212487065e-06, "loss": 0.4689, "step": 2302 }, { "epoch": 2.144320297951583, "grad_norm": 0.2940863710604639, "learning_rate": 6.340117281821319e-06, "loss": 0.4846, "step": 2303 }, { "epoch": 2.1452513966480447, "grad_norm": 0.26227990360939274, "learning_rate": 6.333218351155571e-06, "loss": 0.4483, "step": 2304 }, { "epoch": 2.1461824953445063, "grad_norm": 0.29779993335396415, "learning_rate": 6.326319420489825e-06, "loss": 0.4878, "step": 2305 }, { "epoch": 2.1471135940409685, "grad_norm": 0.2799941925857918, "learning_rate": 6.319420489824078e-06, "loss": 0.4511, "step": 2306 }, { "epoch": 2.14804469273743, "grad_norm": 0.29016513284945006, "learning_rate": 6.312521559158331e-06, "loss": 0.4706, "step": 2307 }, { "epoch": 2.148975791433892, "grad_norm": 0.281805342903352, "learning_rate": 6.305622628492583e-06, "loss": 0.4459, "step": 2308 }, { "epoch": 2.149906890130354, "grad_norm": 0.29360451730231085, "learning_rate": 6.298723697826837e-06, "loss": 0.4667, "step": 2309 }, { "epoch": 2.1508379888268156, "grad_norm": 0.26221727153812574, "learning_rate": 6.291824767161091e-06, "loss": 0.4468, "step": 2310 }, { "epoch": 2.1517690875232773, "grad_norm": 0.29186440781863765, "learning_rate": 6.284925836495344e-06, "loss": 0.4824, "step": 2311 }, { "epoch": 2.1527001862197395, "grad_norm": 0.2792560172526676, "learning_rate": 6.278026905829597e-06, "loss": 0.4441, "step": 2312 }, { "epoch": 2.153631284916201, "grad_norm": 0.3128669251877039, "learning_rate": 6.2711279751638495e-06, "loss": 0.4752, "step": 2313 }, { "epoch": 2.154562383612663, "grad_norm": 0.28679714479565555, "learning_rate": 6.264229044498104e-06, "loss": 0.4624, "step": 2314 }, { "epoch": 2.155493482309125, "grad_norm": 0.2975992128906353, "learning_rate": 6.257330113832356e-06, "loss": 0.4581, "step": 2315 }, { "epoch": 2.1564245810055866, "grad_norm": 0.2784668870049539, "learning_rate": 6.25043118316661e-06, "loss": 0.4595, "step": 2316 }, { "epoch": 2.1573556797020483, "grad_norm": 0.3065285678463891, "learning_rate": 6.2435322525008626e-06, "loss": 0.4793, "step": 2317 }, { "epoch": 2.1582867783985105, "grad_norm": 0.2750154087843288, "learning_rate": 6.236633321835116e-06, "loss": 0.4674, "step": 2318 }, { "epoch": 2.159217877094972, "grad_norm": 0.28054518076885904, "learning_rate": 6.22973439116937e-06, "loss": 0.4659, "step": 2319 }, { "epoch": 2.160148975791434, "grad_norm": 0.2671024803412535, "learning_rate": 6.222835460503622e-06, "loss": 0.4653, "step": 2320 }, { "epoch": 2.1610800744878955, "grad_norm": 0.2786270221413241, "learning_rate": 6.215936529837876e-06, "loss": 0.4393, "step": 2321 }, { "epoch": 2.1620111731843576, "grad_norm": 0.29538396189908966, "learning_rate": 6.209037599172129e-06, "loss": 0.523, "step": 2322 }, { "epoch": 2.1629422718808193, "grad_norm": 0.29097837066481125, "learning_rate": 6.202138668506382e-06, "loss": 0.4613, "step": 2323 }, { "epoch": 2.163873370577281, "grad_norm": 0.294418884743535, "learning_rate": 6.1952397378406345e-06, "loss": 0.473, "step": 2324 }, { "epoch": 2.164804469273743, "grad_norm": 0.27157818824648106, "learning_rate": 6.188340807174889e-06, "loss": 0.4956, "step": 2325 }, { "epoch": 2.165735567970205, "grad_norm": 0.276796310455125, "learning_rate": 6.181441876509141e-06, "loss": 0.4742, "step": 2326 }, { "epoch": 2.1666666666666665, "grad_norm": 0.2926616887875992, "learning_rate": 6.174542945843395e-06, "loss": 0.4851, "step": 2327 }, { "epoch": 2.1675977653631286, "grad_norm": 0.2640494522794228, "learning_rate": 6.167644015177648e-06, "loss": 0.4769, "step": 2328 }, { "epoch": 2.1685288640595903, "grad_norm": 0.2990976583189707, "learning_rate": 6.160745084511901e-06, "loss": 0.5082, "step": 2329 }, { "epoch": 2.169459962756052, "grad_norm": 0.28998645076497526, "learning_rate": 6.153846153846155e-06, "loss": 0.46, "step": 2330 }, { "epoch": 2.170391061452514, "grad_norm": 0.27547451807894036, "learning_rate": 6.146947223180407e-06, "loss": 0.4446, "step": 2331 }, { "epoch": 2.171322160148976, "grad_norm": 0.27839768407215415, "learning_rate": 6.1400482925146605e-06, "loss": 0.4586, "step": 2332 }, { "epoch": 2.1722532588454375, "grad_norm": 0.28585058503470967, "learning_rate": 6.133149361848914e-06, "loss": 0.4692, "step": 2333 }, { "epoch": 2.1731843575418996, "grad_norm": 0.2909324201356784, "learning_rate": 6.126250431183167e-06, "loss": 0.4747, "step": 2334 }, { "epoch": 2.1741154562383613, "grad_norm": 0.28536107586777854, "learning_rate": 6.1193515005174194e-06, "loss": 0.478, "step": 2335 }, { "epoch": 2.175046554934823, "grad_norm": 0.303808215719886, "learning_rate": 6.1124525698516735e-06, "loss": 0.4775, "step": 2336 }, { "epoch": 2.1759776536312847, "grad_norm": 0.3012859761623921, "learning_rate": 6.105553639185927e-06, "loss": 0.4718, "step": 2337 }, { "epoch": 2.176908752327747, "grad_norm": 0.29401718121198306, "learning_rate": 6.09865470852018e-06, "loss": 0.4808, "step": 2338 }, { "epoch": 2.1778398510242085, "grad_norm": 0.2677175765447863, "learning_rate": 6.091755777854433e-06, "loss": 0.4684, "step": 2339 }, { "epoch": 2.17877094972067, "grad_norm": 0.29064880371971064, "learning_rate": 6.084856847188686e-06, "loss": 0.4905, "step": 2340 }, { "epoch": 2.1797020484171323, "grad_norm": 0.29625179406671465, "learning_rate": 6.07795791652294e-06, "loss": 0.4935, "step": 2341 }, { "epoch": 2.180633147113594, "grad_norm": 0.3199404431615953, "learning_rate": 6.071058985857192e-06, "loss": 0.4788, "step": 2342 }, { "epoch": 2.1815642458100557, "grad_norm": 0.3014341240378109, "learning_rate": 6.064160055191446e-06, "loss": 0.4666, "step": 2343 }, { "epoch": 2.182495344506518, "grad_norm": 0.29692526028868765, "learning_rate": 6.057261124525699e-06, "loss": 0.463, "step": 2344 }, { "epoch": 2.1834264432029795, "grad_norm": 0.26777211665781936, "learning_rate": 6.050362193859952e-06, "loss": 0.4716, "step": 2345 }, { "epoch": 2.184357541899441, "grad_norm": 0.3341731767577368, "learning_rate": 6.043463263194206e-06, "loss": 0.5084, "step": 2346 }, { "epoch": 2.1852886405959033, "grad_norm": 0.3016849354644657, "learning_rate": 6.0365643325284585e-06, "loss": 0.455, "step": 2347 }, { "epoch": 2.186219739292365, "grad_norm": 0.28838994469702606, "learning_rate": 6.029665401862712e-06, "loss": 0.468, "step": 2348 }, { "epoch": 2.1871508379888267, "grad_norm": 0.2876301569908795, "learning_rate": 6.022766471196965e-06, "loss": 0.4611, "step": 2349 }, { "epoch": 2.188081936685289, "grad_norm": 0.29475176321961893, "learning_rate": 6.015867540531218e-06, "loss": 0.4795, "step": 2350 }, { "epoch": 2.1890130353817505, "grad_norm": 0.29317391704453116, "learning_rate": 6.008968609865471e-06, "loss": 0.4611, "step": 2351 }, { "epoch": 2.189944134078212, "grad_norm": 0.2894085104784059, "learning_rate": 6.002069679199725e-06, "loss": 0.447, "step": 2352 }, { "epoch": 2.1908752327746743, "grad_norm": 0.28859974943643524, "learning_rate": 5.995170748533977e-06, "loss": 0.4856, "step": 2353 }, { "epoch": 2.191806331471136, "grad_norm": 0.2957799407460065, "learning_rate": 5.988271817868231e-06, "loss": 0.4856, "step": 2354 }, { "epoch": 2.1927374301675977, "grad_norm": 0.30429834374162756, "learning_rate": 5.9813728872024845e-06, "loss": 0.4511, "step": 2355 }, { "epoch": 2.1936685288640594, "grad_norm": 0.280993186050252, "learning_rate": 5.974473956536737e-06, "loss": 0.4669, "step": 2356 }, { "epoch": 2.1945996275605215, "grad_norm": 0.273806803303172, "learning_rate": 5.967575025870991e-06, "loss": 0.4681, "step": 2357 }, { "epoch": 2.195530726256983, "grad_norm": 0.3272256218852092, "learning_rate": 5.9606760952052434e-06, "loss": 0.4834, "step": 2358 }, { "epoch": 2.196461824953445, "grad_norm": 0.3054110146205404, "learning_rate": 5.953777164539497e-06, "loss": 0.496, "step": 2359 }, { "epoch": 2.197392923649907, "grad_norm": 0.30546762037562925, "learning_rate": 5.94687823387375e-06, "loss": 0.4833, "step": 2360 }, { "epoch": 2.1983240223463687, "grad_norm": 0.2784429818722673, "learning_rate": 5.939979303208003e-06, "loss": 0.4488, "step": 2361 }, { "epoch": 2.1992551210428304, "grad_norm": 0.27959245673975053, "learning_rate": 5.933080372542256e-06, "loss": 0.4723, "step": 2362 }, { "epoch": 2.2001862197392925, "grad_norm": 0.2831353628391782, "learning_rate": 5.92618144187651e-06, "loss": 0.4947, "step": 2363 }, { "epoch": 2.201117318435754, "grad_norm": 0.2829431882740148, "learning_rate": 5.919282511210763e-06, "loss": 0.4764, "step": 2364 }, { "epoch": 2.202048417132216, "grad_norm": 0.2818922043795733, "learning_rate": 5.912383580545016e-06, "loss": 0.4786, "step": 2365 }, { "epoch": 2.202979515828678, "grad_norm": 0.28278976278894513, "learning_rate": 5.9054846498792695e-06, "loss": 0.4849, "step": 2366 }, { "epoch": 2.2039106145251397, "grad_norm": 0.2990803478008309, "learning_rate": 5.898585719213522e-06, "loss": 0.4837, "step": 2367 }, { "epoch": 2.2048417132216014, "grad_norm": 0.2774035967620681, "learning_rate": 5.891686788547776e-06, "loss": 0.4878, "step": 2368 }, { "epoch": 2.2057728119180635, "grad_norm": 0.2827779868979279, "learning_rate": 5.884787857882028e-06, "loss": 0.4737, "step": 2369 }, { "epoch": 2.206703910614525, "grad_norm": 0.30195940773407426, "learning_rate": 5.877888927216282e-06, "loss": 0.4716, "step": 2370 }, { "epoch": 2.207635009310987, "grad_norm": 0.29054992289389364, "learning_rate": 5.870989996550536e-06, "loss": 0.4791, "step": 2371 }, { "epoch": 2.2085661080074486, "grad_norm": 0.2776423357405567, "learning_rate": 5.864091065884788e-06, "loss": 0.4543, "step": 2372 }, { "epoch": 2.2094972067039107, "grad_norm": 0.2906200585064831, "learning_rate": 5.857192135219042e-06, "loss": 0.4888, "step": 2373 }, { "epoch": 2.2104283054003724, "grad_norm": 0.286358263179219, "learning_rate": 5.850293204553295e-06, "loss": 0.478, "step": 2374 }, { "epoch": 2.211359404096834, "grad_norm": 0.2952112017556298, "learning_rate": 5.843394273887548e-06, "loss": 0.4937, "step": 2375 }, { "epoch": 2.212290502793296, "grad_norm": 0.31273692998151903, "learning_rate": 5.836495343221801e-06, "loss": 0.4832, "step": 2376 }, { "epoch": 2.213221601489758, "grad_norm": 0.28359843353699643, "learning_rate": 5.8295964125560544e-06, "loss": 0.4535, "step": 2377 }, { "epoch": 2.2141527001862196, "grad_norm": 0.27538200247552336, "learning_rate": 5.822697481890307e-06, "loss": 0.455, "step": 2378 }, { "epoch": 2.2150837988826817, "grad_norm": 0.28513138212970346, "learning_rate": 5.815798551224561e-06, "loss": 0.4728, "step": 2379 }, { "epoch": 2.2160148975791434, "grad_norm": 0.2907105054991736, "learning_rate": 5.808899620558814e-06, "loss": 0.4866, "step": 2380 }, { "epoch": 2.216945996275605, "grad_norm": 0.32377573426958794, "learning_rate": 5.802000689893067e-06, "loss": 0.4804, "step": 2381 }, { "epoch": 2.217877094972067, "grad_norm": 0.2926379311538932, "learning_rate": 5.795101759227321e-06, "loss": 0.4907, "step": 2382 }, { "epoch": 2.218808193668529, "grad_norm": 0.29689881501034954, "learning_rate": 5.788202828561573e-06, "loss": 0.4905, "step": 2383 }, { "epoch": 2.2197392923649906, "grad_norm": 0.3244923031704521, "learning_rate": 5.781303897895827e-06, "loss": 0.49, "step": 2384 }, { "epoch": 2.2206703910614527, "grad_norm": 0.2770823222786602, "learning_rate": 5.77440496723008e-06, "loss": 0.4741, "step": 2385 }, { "epoch": 2.2216014897579144, "grad_norm": 0.30677414583596585, "learning_rate": 5.767506036564333e-06, "loss": 0.4829, "step": 2386 }, { "epoch": 2.222532588454376, "grad_norm": 0.2979679267048333, "learning_rate": 5.760607105898586e-06, "loss": 0.4842, "step": 2387 }, { "epoch": 2.223463687150838, "grad_norm": 0.290635248800551, "learning_rate": 5.753708175232839e-06, "loss": 0.4794, "step": 2388 }, { "epoch": 2.2243947858473, "grad_norm": 0.28598227377180707, "learning_rate": 5.7468092445670935e-06, "loss": 0.4719, "step": 2389 }, { "epoch": 2.2253258845437616, "grad_norm": 0.26035680915905063, "learning_rate": 5.739910313901346e-06, "loss": 0.4422, "step": 2390 }, { "epoch": 2.2262569832402237, "grad_norm": 0.29614444372368814, "learning_rate": 5.733011383235599e-06, "loss": 0.4806, "step": 2391 }, { "epoch": 2.2271880819366854, "grad_norm": 0.2873421000996187, "learning_rate": 5.7261124525698515e-06, "loss": 0.4791, "step": 2392 }, { "epoch": 2.228119180633147, "grad_norm": 0.2844975239307282, "learning_rate": 5.719213521904106e-06, "loss": 0.4432, "step": 2393 }, { "epoch": 2.2290502793296088, "grad_norm": 0.27738556213673543, "learning_rate": 5.712314591238358e-06, "loss": 0.4782, "step": 2394 }, { "epoch": 2.229981378026071, "grad_norm": 0.267721711116799, "learning_rate": 5.705415660572612e-06, "loss": 0.4908, "step": 2395 }, { "epoch": 2.2309124767225326, "grad_norm": 0.2881245474836227, "learning_rate": 5.6985167299068646e-06, "loss": 0.4556, "step": 2396 }, { "epoch": 2.2318435754189943, "grad_norm": 0.29644425773731714, "learning_rate": 5.691617799241118e-06, "loss": 0.4777, "step": 2397 }, { "epoch": 2.2327746741154564, "grad_norm": 0.2888186766409821, "learning_rate": 5.684718868575372e-06, "loss": 0.4487, "step": 2398 }, { "epoch": 2.233705772811918, "grad_norm": 0.27302404197305, "learning_rate": 5.677819937909624e-06, "loss": 0.459, "step": 2399 }, { "epoch": 2.2346368715083798, "grad_norm": 0.28376915990615653, "learning_rate": 5.6709210072438784e-06, "loss": 0.4768, "step": 2400 }, { "epoch": 2.235567970204842, "grad_norm": 0.28562316548135114, "learning_rate": 5.664022076578131e-06, "loss": 0.4878, "step": 2401 }, { "epoch": 2.2364990689013036, "grad_norm": 0.2786254637386114, "learning_rate": 5.657123145912384e-06, "loss": 0.5016, "step": 2402 }, { "epoch": 2.2374301675977653, "grad_norm": 0.2718291203490611, "learning_rate": 5.6502242152466365e-06, "loss": 0.4678, "step": 2403 }, { "epoch": 2.2383612662942274, "grad_norm": 0.25610119333088904, "learning_rate": 5.643325284580891e-06, "loss": 0.4522, "step": 2404 }, { "epoch": 2.239292364990689, "grad_norm": 0.28817642776602226, "learning_rate": 5.636426353915143e-06, "loss": 0.4658, "step": 2405 }, { "epoch": 2.2402234636871508, "grad_norm": 0.2719364997628893, "learning_rate": 5.629527423249397e-06, "loss": 0.4593, "step": 2406 }, { "epoch": 2.2411545623836124, "grad_norm": 0.2752688936119595, "learning_rate": 5.62262849258365e-06, "loss": 0.4758, "step": 2407 }, { "epoch": 2.2420856610800746, "grad_norm": 0.2704026107169829, "learning_rate": 5.615729561917903e-06, "loss": 0.4653, "step": 2408 }, { "epoch": 2.2430167597765363, "grad_norm": 0.3004368006981677, "learning_rate": 5.608830631252157e-06, "loss": 0.5024, "step": 2409 }, { "epoch": 2.243947858472998, "grad_norm": 0.3050732715109249, "learning_rate": 5.601931700586409e-06, "loss": 0.4523, "step": 2410 }, { "epoch": 2.24487895716946, "grad_norm": 0.2880133807294865, "learning_rate": 5.595032769920663e-06, "loss": 0.4886, "step": 2411 }, { "epoch": 2.2458100558659218, "grad_norm": 0.2845116387516986, "learning_rate": 5.588133839254916e-06, "loss": 0.4664, "step": 2412 }, { "epoch": 2.2467411545623834, "grad_norm": 0.28455643241151646, "learning_rate": 5.581234908589169e-06, "loss": 0.4519, "step": 2413 }, { "epoch": 2.2476722532588456, "grad_norm": 0.29091991302924547, "learning_rate": 5.5743359779234214e-06, "loss": 0.4788, "step": 2414 }, { "epoch": 2.2486033519553073, "grad_norm": 0.2761564624216095, "learning_rate": 5.5674370472576755e-06, "loss": 0.4646, "step": 2415 }, { "epoch": 2.249534450651769, "grad_norm": 0.2882721871561539, "learning_rate": 5.560538116591929e-06, "loss": 0.466, "step": 2416 }, { "epoch": 2.250465549348231, "grad_norm": 0.2637817506184951, "learning_rate": 5.553639185926182e-06, "loss": 0.4215, "step": 2417 }, { "epoch": 2.2513966480446927, "grad_norm": 0.2747404924154154, "learning_rate": 5.546740255260435e-06, "loss": 0.4508, "step": 2418 }, { "epoch": 2.2523277467411544, "grad_norm": 0.2741496821102069, "learning_rate": 5.539841324594688e-06, "loss": 0.4638, "step": 2419 }, { "epoch": 2.2532588454376166, "grad_norm": 0.2857930657542647, "learning_rate": 5.532942393928942e-06, "loss": 0.478, "step": 2420 }, { "epoch": 2.2541899441340782, "grad_norm": 0.27988042974596394, "learning_rate": 5.526043463263194e-06, "loss": 0.4764, "step": 2421 }, { "epoch": 2.25512104283054, "grad_norm": 0.3155988765185957, "learning_rate": 5.519144532597448e-06, "loss": 0.5071, "step": 2422 }, { "epoch": 2.256052141527002, "grad_norm": 0.28018956974601184, "learning_rate": 5.512245601931701e-06, "loss": 0.4848, "step": 2423 }, { "epoch": 2.2569832402234637, "grad_norm": 0.2767317055239114, "learning_rate": 5.505346671265954e-06, "loss": 0.4617, "step": 2424 }, { "epoch": 2.2579143389199254, "grad_norm": 0.28720842618681064, "learning_rate": 5.498447740600208e-06, "loss": 0.4901, "step": 2425 }, { "epoch": 2.2588454376163876, "grad_norm": 0.2793359220787506, "learning_rate": 5.4915488099344605e-06, "loss": 0.4866, "step": 2426 }, { "epoch": 2.2597765363128492, "grad_norm": 0.26222945095421774, "learning_rate": 5.484649879268714e-06, "loss": 0.4352, "step": 2427 }, { "epoch": 2.260707635009311, "grad_norm": 0.29648350251297134, "learning_rate": 5.477750948602967e-06, "loss": 0.4615, "step": 2428 }, { "epoch": 2.2616387337057726, "grad_norm": 0.2778766043671479, "learning_rate": 5.47085201793722e-06, "loss": 0.4609, "step": 2429 }, { "epoch": 2.2625698324022347, "grad_norm": 0.2743650129541051, "learning_rate": 5.463953087271473e-06, "loss": 0.4717, "step": 2430 }, { "epoch": 2.2635009310986964, "grad_norm": 0.29137743644337405, "learning_rate": 5.457054156605727e-06, "loss": 0.4631, "step": 2431 }, { "epoch": 2.264432029795158, "grad_norm": 0.28648122429005746, "learning_rate": 5.450155225939979e-06, "loss": 0.4706, "step": 2432 }, { "epoch": 2.2653631284916202, "grad_norm": 0.2840120334841151, "learning_rate": 5.443256295274233e-06, "loss": 0.4885, "step": 2433 }, { "epoch": 2.266294227188082, "grad_norm": 0.2941909677999846, "learning_rate": 5.4363573646084865e-06, "loss": 0.4584, "step": 2434 }, { "epoch": 2.2672253258845436, "grad_norm": 0.3005195283969719, "learning_rate": 5.429458433942739e-06, "loss": 0.4838, "step": 2435 }, { "epoch": 2.2681564245810057, "grad_norm": 0.27454641086706527, "learning_rate": 5.422559503276993e-06, "loss": 0.457, "step": 2436 }, { "epoch": 2.2690875232774674, "grad_norm": 0.29462682455576267, "learning_rate": 5.4156605726112454e-06, "loss": 0.4654, "step": 2437 }, { "epoch": 2.270018621973929, "grad_norm": 0.2769797551045216, "learning_rate": 5.4087616419454995e-06, "loss": 0.4769, "step": 2438 }, { "epoch": 2.2709497206703912, "grad_norm": 0.2741707957535795, "learning_rate": 5.401862711279752e-06, "loss": 0.4654, "step": 2439 }, { "epoch": 2.271880819366853, "grad_norm": 0.27392046189554886, "learning_rate": 5.394963780614005e-06, "loss": 0.4932, "step": 2440 }, { "epoch": 2.2728119180633146, "grad_norm": 0.2852874841318396, "learning_rate": 5.388064849948258e-06, "loss": 0.4597, "step": 2441 }, { "epoch": 2.2737430167597763, "grad_norm": 0.2820378366077197, "learning_rate": 5.381165919282512e-06, "loss": 0.4653, "step": 2442 }, { "epoch": 2.2746741154562384, "grad_norm": 0.26719287599469566, "learning_rate": 5.374266988616765e-06, "loss": 0.4843, "step": 2443 }, { "epoch": 2.2756052141527, "grad_norm": 0.2868876310666457, "learning_rate": 5.367368057951018e-06, "loss": 0.4592, "step": 2444 }, { "epoch": 2.276536312849162, "grad_norm": 0.2878063421597445, "learning_rate": 5.3604691272852715e-06, "loss": 0.479, "step": 2445 }, { "epoch": 2.277467411545624, "grad_norm": 0.26815068192984465, "learning_rate": 5.353570196619524e-06, "loss": 0.4735, "step": 2446 }, { "epoch": 2.2783985102420856, "grad_norm": 0.25871982114001185, "learning_rate": 5.346671265953778e-06, "loss": 0.4571, "step": 2447 }, { "epoch": 2.2793296089385473, "grad_norm": 0.2737172034515323, "learning_rate": 5.33977233528803e-06, "loss": 0.4876, "step": 2448 }, { "epoch": 2.2802607076350094, "grad_norm": 0.2775802301070179, "learning_rate": 5.3328734046222845e-06, "loss": 0.4586, "step": 2449 }, { "epoch": 2.281191806331471, "grad_norm": 0.2695005399952836, "learning_rate": 5.325974473956537e-06, "loss": 0.4723, "step": 2450 }, { "epoch": 2.282122905027933, "grad_norm": 0.2769956007498579, "learning_rate": 5.31907554329079e-06, "loss": 0.4796, "step": 2451 }, { "epoch": 2.283054003724395, "grad_norm": 0.275146123211173, "learning_rate": 5.312176612625044e-06, "loss": 0.4579, "step": 2452 }, { "epoch": 2.2839851024208566, "grad_norm": 0.26866011137779094, "learning_rate": 5.305277681959297e-06, "loss": 0.4592, "step": 2453 }, { "epoch": 2.2849162011173183, "grad_norm": 0.2833838877106824, "learning_rate": 5.29837875129355e-06, "loss": 0.4645, "step": 2454 }, { "epoch": 2.2858472998137804, "grad_norm": 0.2908782684680362, "learning_rate": 5.291479820627803e-06, "loss": 0.469, "step": 2455 }, { "epoch": 2.286778398510242, "grad_norm": 0.2770356703187919, "learning_rate": 5.2845808899620564e-06, "loss": 0.4619, "step": 2456 }, { "epoch": 2.287709497206704, "grad_norm": 0.26881151440607837, "learning_rate": 5.277681959296309e-06, "loss": 0.4617, "step": 2457 }, { "epoch": 2.288640595903166, "grad_norm": 0.31638293319687544, "learning_rate": 5.270783028630563e-06, "loss": 0.4757, "step": 2458 }, { "epoch": 2.2895716945996276, "grad_norm": 0.2917625798120041, "learning_rate": 5.263884097964815e-06, "loss": 0.4525, "step": 2459 }, { "epoch": 2.2905027932960893, "grad_norm": 0.2695930078406565, "learning_rate": 5.2569851672990694e-06, "loss": 0.4508, "step": 2460 }, { "epoch": 2.2914338919925514, "grad_norm": 0.2912023482609165, "learning_rate": 5.250086236633323e-06, "loss": 0.471, "step": 2461 }, { "epoch": 2.292364990689013, "grad_norm": 0.2984297550176577, "learning_rate": 5.243187305967575e-06, "loss": 0.5289, "step": 2462 }, { "epoch": 2.293296089385475, "grad_norm": 0.29371136205671944, "learning_rate": 5.236288375301829e-06, "loss": 0.4632, "step": 2463 }, { "epoch": 2.294227188081937, "grad_norm": 0.26939455596485645, "learning_rate": 5.229389444636082e-06, "loss": 0.4683, "step": 2464 }, { "epoch": 2.2951582867783986, "grad_norm": 0.2877163896943471, "learning_rate": 5.222490513970335e-06, "loss": 0.4901, "step": 2465 }, { "epoch": 2.2960893854748603, "grad_norm": 0.30357438861810615, "learning_rate": 5.215591583304588e-06, "loss": 0.4598, "step": 2466 }, { "epoch": 2.297020484171322, "grad_norm": 0.2939355123647469, "learning_rate": 5.208692652638841e-06, "loss": 0.4849, "step": 2467 }, { "epoch": 2.297951582867784, "grad_norm": 0.2768204513915434, "learning_rate": 5.201793721973094e-06, "loss": 0.4727, "step": 2468 }, { "epoch": 2.298882681564246, "grad_norm": 0.3169700087644536, "learning_rate": 5.194894791307348e-06, "loss": 0.4752, "step": 2469 }, { "epoch": 2.2998137802607075, "grad_norm": 0.30023015726823926, "learning_rate": 5.187995860641601e-06, "loss": 0.4828, "step": 2470 }, { "epoch": 2.3007448789571696, "grad_norm": 0.2909489380875448, "learning_rate": 5.181096929975854e-06, "loss": 0.4719, "step": 2471 }, { "epoch": 2.3016759776536313, "grad_norm": 0.2892031695773583, "learning_rate": 5.174197999310108e-06, "loss": 0.4831, "step": 2472 }, { "epoch": 2.302607076350093, "grad_norm": 0.31570614951560855, "learning_rate": 5.16729906864436e-06, "loss": 0.4661, "step": 2473 }, { "epoch": 2.303538175046555, "grad_norm": 0.32765113847969823, "learning_rate": 5.160400137978614e-06, "loss": 0.4927, "step": 2474 }, { "epoch": 2.304469273743017, "grad_norm": 0.3030897757586361, "learning_rate": 5.1535012073128666e-06, "loss": 0.4758, "step": 2475 }, { "epoch": 2.3054003724394785, "grad_norm": 0.31723085003041535, "learning_rate": 5.14660227664712e-06, "loss": 0.4884, "step": 2476 }, { "epoch": 2.30633147113594, "grad_norm": 0.3218255459447322, "learning_rate": 5.139703345981373e-06, "loss": 0.4514, "step": 2477 }, { "epoch": 2.3072625698324023, "grad_norm": 0.2862172415203743, "learning_rate": 5.132804415315626e-06, "loss": 0.479, "step": 2478 }, { "epoch": 2.308193668528864, "grad_norm": 0.28861316809186527, "learning_rate": 5.1259054846498804e-06, "loss": 0.4377, "step": 2479 }, { "epoch": 2.3091247672253257, "grad_norm": 0.3003879722616465, "learning_rate": 5.119006553984133e-06, "loss": 0.4921, "step": 2480 }, { "epoch": 2.310055865921788, "grad_norm": 0.2968367047389629, "learning_rate": 5.112107623318386e-06, "loss": 0.5008, "step": 2481 }, { "epoch": 2.3109869646182495, "grad_norm": 0.2858871290740501, "learning_rate": 5.105208692652639e-06, "loss": 0.5001, "step": 2482 }, { "epoch": 2.311918063314711, "grad_norm": 0.27660018385999224, "learning_rate": 5.098309761986893e-06, "loss": 0.465, "step": 2483 }, { "epoch": 2.3128491620111733, "grad_norm": 0.285294762381888, "learning_rate": 5.091410831321145e-06, "loss": 0.4877, "step": 2484 }, { "epoch": 2.313780260707635, "grad_norm": 0.26780407582918353, "learning_rate": 5.084511900655399e-06, "loss": 0.4639, "step": 2485 }, { "epoch": 2.3147113594040967, "grad_norm": 0.29391929154373486, "learning_rate": 5.0776129699896515e-06, "loss": 0.4765, "step": 2486 }, { "epoch": 2.315642458100559, "grad_norm": 0.2840155169341059, "learning_rate": 5.070714039323905e-06, "loss": 0.4831, "step": 2487 }, { "epoch": 2.3165735567970205, "grad_norm": 0.28877637196648825, "learning_rate": 5.063815108658159e-06, "loss": 0.4938, "step": 2488 }, { "epoch": 2.317504655493482, "grad_norm": 0.2784433992931033, "learning_rate": 5.056916177992411e-06, "loss": 0.4838, "step": 2489 }, { "epoch": 2.3184357541899443, "grad_norm": 0.2710044717196588, "learning_rate": 5.050017247326665e-06, "loss": 0.4826, "step": 2490 }, { "epoch": 2.319366852886406, "grad_norm": 0.27694533165769925, "learning_rate": 5.043118316660918e-06, "loss": 0.4786, "step": 2491 }, { "epoch": 2.3202979515828677, "grad_norm": 0.2718436124632574, "learning_rate": 5.036219385995171e-06, "loss": 0.4397, "step": 2492 }, { "epoch": 2.32122905027933, "grad_norm": 0.27834714785164066, "learning_rate": 5.029320455329424e-06, "loss": 0.4686, "step": 2493 }, { "epoch": 2.3221601489757915, "grad_norm": 0.3029317900925325, "learning_rate": 5.0224215246636775e-06, "loss": 0.4962, "step": 2494 }, { "epoch": 2.323091247672253, "grad_norm": 0.2664651373844042, "learning_rate": 5.01552259399793e-06, "loss": 0.4745, "step": 2495 }, { "epoch": 2.3240223463687153, "grad_norm": 0.28144837982744897, "learning_rate": 5.008623663332184e-06, "loss": 0.4756, "step": 2496 }, { "epoch": 2.324953445065177, "grad_norm": 0.26875336874485223, "learning_rate": 5.001724732666437e-06, "loss": 0.4777, "step": 2497 }, { "epoch": 2.3258845437616387, "grad_norm": 0.26565089042790707, "learning_rate": 4.99482580200069e-06, "loss": 0.4546, "step": 2498 }, { "epoch": 2.326815642458101, "grad_norm": 0.27331555097767535, "learning_rate": 4.987926871334943e-06, "loss": 0.4421, "step": 2499 }, { "epoch": 2.3277467411545625, "grad_norm": 0.2689104308431194, "learning_rate": 4.981027940669197e-06, "loss": 0.438, "step": 2500 }, { "epoch": 2.328677839851024, "grad_norm": 0.27889530327188805, "learning_rate": 4.97412901000345e-06, "loss": 0.4754, "step": 2501 }, { "epoch": 2.329608938547486, "grad_norm": 0.2808101862310248, "learning_rate": 4.967230079337704e-06, "loss": 0.4553, "step": 2502 }, { "epoch": 2.330540037243948, "grad_norm": 0.28873132599658047, "learning_rate": 4.960331148671956e-06, "loss": 0.494, "step": 2503 }, { "epoch": 2.3314711359404097, "grad_norm": 0.25872510164932594, "learning_rate": 4.953432218006209e-06, "loss": 0.4559, "step": 2504 }, { "epoch": 2.3324022346368714, "grad_norm": 0.26743449427439586, "learning_rate": 4.9465332873404625e-06, "loss": 0.4669, "step": 2505 }, { "epoch": 2.3333333333333335, "grad_norm": 0.2898613449120438, "learning_rate": 4.939634356674716e-06, "loss": 0.5038, "step": 2506 }, { "epoch": 2.334264432029795, "grad_norm": 0.2620599925095467, "learning_rate": 4.932735426008969e-06, "loss": 0.4325, "step": 2507 }, { "epoch": 2.335195530726257, "grad_norm": 0.27386275792593145, "learning_rate": 4.925836495343222e-06, "loss": 0.4839, "step": 2508 }, { "epoch": 2.336126629422719, "grad_norm": 0.2628175258751735, "learning_rate": 4.9189375646774755e-06, "loss": 0.4575, "step": 2509 }, { "epoch": 2.3370577281191807, "grad_norm": 0.2837263993291428, "learning_rate": 4.912038634011729e-06, "loss": 0.477, "step": 2510 }, { "epoch": 2.3379888268156424, "grad_norm": 0.29696886264446937, "learning_rate": 4.905139703345982e-06, "loss": 0.4656, "step": 2511 }, { "epoch": 2.338919925512104, "grad_norm": 0.3001472245182636, "learning_rate": 4.898240772680235e-06, "loss": 0.4669, "step": 2512 }, { "epoch": 2.339851024208566, "grad_norm": 0.2667329672134847, "learning_rate": 4.8913418420144885e-06, "loss": 0.4771, "step": 2513 }, { "epoch": 2.340782122905028, "grad_norm": 0.288261661511884, "learning_rate": 4.884442911348741e-06, "loss": 0.4702, "step": 2514 }, { "epoch": 2.3417132216014895, "grad_norm": 0.2941810829130148, "learning_rate": 4.877543980682994e-06, "loss": 0.4797, "step": 2515 }, { "epoch": 2.3426443202979517, "grad_norm": 0.2839635999714699, "learning_rate": 4.8706450500172474e-06, "loss": 0.4805, "step": 2516 }, { "epoch": 2.3435754189944134, "grad_norm": 0.27450528989880696, "learning_rate": 4.863746119351501e-06, "loss": 0.474, "step": 2517 }, { "epoch": 2.344506517690875, "grad_norm": 0.28072708215241693, "learning_rate": 4.856847188685754e-06, "loss": 0.4795, "step": 2518 }, { "epoch": 2.345437616387337, "grad_norm": 0.29645505667730454, "learning_rate": 4.849948258020007e-06, "loss": 0.4787, "step": 2519 }, { "epoch": 2.346368715083799, "grad_norm": 0.2910608310472929, "learning_rate": 4.8430493273542605e-06, "loss": 0.4726, "step": 2520 }, { "epoch": 2.3472998137802605, "grad_norm": 0.27949167183356743, "learning_rate": 4.836150396688514e-06, "loss": 0.4588, "step": 2521 }, { "epoch": 2.3482309124767227, "grad_norm": 0.2923520143773042, "learning_rate": 4.829251466022767e-06, "loss": 0.4685, "step": 2522 }, { "epoch": 2.3491620111731844, "grad_norm": 0.28978228067385353, "learning_rate": 4.82235253535702e-06, "loss": 0.469, "step": 2523 }, { "epoch": 2.350093109869646, "grad_norm": 0.28131686630539876, "learning_rate": 4.8154536046912735e-06, "loss": 0.4903, "step": 2524 }, { "epoch": 2.351024208566108, "grad_norm": 0.2715704511571591, "learning_rate": 4.808554674025526e-06, "loss": 0.481, "step": 2525 }, { "epoch": 2.35195530726257, "grad_norm": 0.3040436459363051, "learning_rate": 4.801655743359779e-06, "loss": 0.4862, "step": 2526 }, { "epoch": 2.3528864059590315, "grad_norm": 0.2853571058577906, "learning_rate": 4.794756812694033e-06, "loss": 0.4776, "step": 2527 }, { "epoch": 2.3538175046554937, "grad_norm": 0.29295105784894393, "learning_rate": 4.7878578820282865e-06, "loss": 0.4776, "step": 2528 }, { "epoch": 2.3547486033519553, "grad_norm": 0.2766360695404266, "learning_rate": 4.780958951362539e-06, "loss": 0.4572, "step": 2529 }, { "epoch": 2.355679702048417, "grad_norm": 0.2844834203712477, "learning_rate": 4.774060020696792e-06, "loss": 0.4684, "step": 2530 }, { "epoch": 2.356610800744879, "grad_norm": 0.2676357267794078, "learning_rate": 4.767161090031045e-06, "loss": 0.4427, "step": 2531 }, { "epoch": 2.357541899441341, "grad_norm": 0.2911910418587784, "learning_rate": 4.760262159365299e-06, "loss": 0.4778, "step": 2532 }, { "epoch": 2.3584729981378025, "grad_norm": 0.29426285869771124, "learning_rate": 4.753363228699552e-06, "loss": 0.455, "step": 2533 }, { "epoch": 2.3594040968342647, "grad_norm": 0.2808337062657622, "learning_rate": 4.746464298033805e-06, "loss": 0.4663, "step": 2534 }, { "epoch": 2.3603351955307263, "grad_norm": 0.3060592869579121, "learning_rate": 4.7395653673680584e-06, "loss": 0.4557, "step": 2535 }, { "epoch": 2.361266294227188, "grad_norm": 0.2555848085134311, "learning_rate": 4.732666436702312e-06, "loss": 0.4267, "step": 2536 }, { "epoch": 2.3621973929236497, "grad_norm": 0.2869839392577163, "learning_rate": 4.725767506036565e-06, "loss": 0.4619, "step": 2537 }, { "epoch": 2.363128491620112, "grad_norm": 0.2809394838850736, "learning_rate": 4.718868575370818e-06, "loss": 0.4683, "step": 2538 }, { "epoch": 2.3640595903165735, "grad_norm": 0.29818652048807115, "learning_rate": 4.7119696447050714e-06, "loss": 0.4788, "step": 2539 }, { "epoch": 2.364990689013035, "grad_norm": 0.3055519519875813, "learning_rate": 4.705070714039324e-06, "loss": 0.4512, "step": 2540 }, { "epoch": 2.3659217877094973, "grad_norm": 0.298879396907047, "learning_rate": 4.698171783373577e-06, "loss": 0.5022, "step": 2541 }, { "epoch": 2.366852886405959, "grad_norm": 0.29528208571552805, "learning_rate": 4.69127285270783e-06, "loss": 0.4736, "step": 2542 }, { "epoch": 2.3677839851024207, "grad_norm": 0.2988846416620038, "learning_rate": 4.684373922042084e-06, "loss": 0.4582, "step": 2543 }, { "epoch": 2.368715083798883, "grad_norm": 0.30294655083820926, "learning_rate": 4.677474991376338e-06, "loss": 0.4889, "step": 2544 }, { "epoch": 2.3696461824953445, "grad_norm": 0.26628388238692163, "learning_rate": 4.67057606071059e-06, "loss": 0.4516, "step": 2545 }, { "epoch": 2.370577281191806, "grad_norm": 0.3131952087176523, "learning_rate": 4.663677130044843e-06, "loss": 0.4977, "step": 2546 }, { "epoch": 2.3715083798882683, "grad_norm": 0.28562236752309167, "learning_rate": 4.656778199379097e-06, "loss": 0.4799, "step": 2547 }, { "epoch": 2.37243947858473, "grad_norm": 0.26826055971496954, "learning_rate": 4.64987926871335e-06, "loss": 0.454, "step": 2548 }, { "epoch": 2.3733705772811917, "grad_norm": 0.2691617529543508, "learning_rate": 4.642980338047603e-06, "loss": 0.4752, "step": 2549 }, { "epoch": 2.3743016759776534, "grad_norm": 0.2922160871425005, "learning_rate": 4.636081407381856e-06, "loss": 0.4655, "step": 2550 }, { "epoch": 2.3752327746741155, "grad_norm": 0.31370742010435865, "learning_rate": 4.629182476716109e-06, "loss": 0.4514, "step": 2551 }, { "epoch": 2.376163873370577, "grad_norm": 0.2805175750572198, "learning_rate": 4.622283546050362e-06, "loss": 0.482, "step": 2552 }, { "epoch": 2.377094972067039, "grad_norm": 0.26211934342303383, "learning_rate": 4.615384615384616e-06, "loss": 0.458, "step": 2553 }, { "epoch": 2.378026070763501, "grad_norm": 0.25975119159723203, "learning_rate": 4.608485684718869e-06, "loss": 0.4451, "step": 2554 }, { "epoch": 2.3789571694599627, "grad_norm": 0.2805412046493255, "learning_rate": 4.601586754053123e-06, "loss": 0.4947, "step": 2555 }, { "epoch": 2.3798882681564244, "grad_norm": 0.2921168583400692, "learning_rate": 4.594687823387375e-06, "loss": 0.4723, "step": 2556 }, { "epoch": 2.3808193668528865, "grad_norm": 0.2800425968637399, "learning_rate": 4.587788892721628e-06, "loss": 0.4795, "step": 2557 }, { "epoch": 2.381750465549348, "grad_norm": 0.26364808955933067, "learning_rate": 4.580889962055882e-06, "loss": 0.4694, "step": 2558 }, { "epoch": 2.38268156424581, "grad_norm": 0.2579059054568288, "learning_rate": 4.573991031390135e-06, "loss": 0.4443, "step": 2559 }, { "epoch": 2.383612662942272, "grad_norm": 0.29127359828192234, "learning_rate": 4.567092100724388e-06, "loss": 0.4791, "step": 2560 }, { "epoch": 2.3845437616387337, "grad_norm": 0.2874900398458875, "learning_rate": 4.560193170058641e-06, "loss": 0.4742, "step": 2561 }, { "epoch": 2.3854748603351954, "grad_norm": 0.2671984937875677, "learning_rate": 4.553294239392895e-06, "loss": 0.4637, "step": 2562 }, { "epoch": 2.3864059590316575, "grad_norm": 0.27815045224665086, "learning_rate": 4.546395308727148e-06, "loss": 0.4754, "step": 2563 }, { "epoch": 2.387337057728119, "grad_norm": 0.2818577365537612, "learning_rate": 4.539496378061401e-06, "loss": 0.464, "step": 2564 }, { "epoch": 2.388268156424581, "grad_norm": 0.27473265015043963, "learning_rate": 4.532597447395654e-06, "loss": 0.4783, "step": 2565 }, { "epoch": 2.389199255121043, "grad_norm": 0.2953492338871191, "learning_rate": 4.525698516729908e-06, "loss": 0.4743, "step": 2566 }, { "epoch": 2.3901303538175047, "grad_norm": 0.28555899238211163, "learning_rate": 4.51879958606416e-06, "loss": 0.4817, "step": 2567 }, { "epoch": 2.3910614525139664, "grad_norm": 0.2811653820412045, "learning_rate": 4.511900655398413e-06, "loss": 0.465, "step": 2568 }, { "epoch": 2.3919925512104285, "grad_norm": 0.2739223117067731, "learning_rate": 4.5050017247326665e-06, "loss": 0.4637, "step": 2569 }, { "epoch": 2.39292364990689, "grad_norm": 0.27562261822535583, "learning_rate": 4.49810279406692e-06, "loss": 0.473, "step": 2570 }, { "epoch": 2.393854748603352, "grad_norm": 0.29256766841392456, "learning_rate": 4.491203863401173e-06, "loss": 0.4815, "step": 2571 }, { "epoch": 2.394785847299814, "grad_norm": 0.2821588816756574, "learning_rate": 4.484304932735426e-06, "loss": 0.468, "step": 2572 }, { "epoch": 2.3957169459962757, "grad_norm": 0.28982569877172343, "learning_rate": 4.4774060020696795e-06, "loss": 0.4986, "step": 2573 }, { "epoch": 2.3966480446927374, "grad_norm": 0.27383512695392626, "learning_rate": 4.470507071403933e-06, "loss": 0.4577, "step": 2574 }, { "epoch": 2.397579143389199, "grad_norm": 0.28385992960472445, "learning_rate": 4.463608140738186e-06, "loss": 0.4643, "step": 2575 }, { "epoch": 2.398510242085661, "grad_norm": 0.25543879291267085, "learning_rate": 4.456709210072439e-06, "loss": 0.4484, "step": 2576 }, { "epoch": 2.399441340782123, "grad_norm": 0.25935249253062376, "learning_rate": 4.4498102794066926e-06, "loss": 0.4729, "step": 2577 }, { "epoch": 2.4003724394785846, "grad_norm": 0.28196748953263356, "learning_rate": 4.442911348740945e-06, "loss": 0.4866, "step": 2578 }, { "epoch": 2.4013035381750467, "grad_norm": 0.27511270256822584, "learning_rate": 4.436012418075198e-06, "loss": 0.4441, "step": 2579 }, { "epoch": 2.4022346368715084, "grad_norm": 0.2674332001864754, "learning_rate": 4.429113487409452e-06, "loss": 0.4639, "step": 2580 }, { "epoch": 2.40316573556797, "grad_norm": 0.26121552685039895, "learning_rate": 4.4222145567437056e-06, "loss": 0.45, "step": 2581 }, { "epoch": 2.404096834264432, "grad_norm": 0.2672646300950619, "learning_rate": 4.415315626077958e-06, "loss": 0.4625, "step": 2582 }, { "epoch": 2.405027932960894, "grad_norm": 0.2703590138657534, "learning_rate": 4.408416695412211e-06, "loss": 0.4673, "step": 2583 }, { "epoch": 2.4059590316573556, "grad_norm": 0.27328732842119524, "learning_rate": 4.4015177647464645e-06, "loss": 0.4581, "step": 2584 }, { "epoch": 2.4068901303538173, "grad_norm": 0.27526262247738686, "learning_rate": 4.394618834080718e-06, "loss": 0.4759, "step": 2585 }, { "epoch": 2.4078212290502794, "grad_norm": 0.2701897577280307, "learning_rate": 4.387719903414971e-06, "loss": 0.4479, "step": 2586 }, { "epoch": 2.408752327746741, "grad_norm": 0.2596347808375826, "learning_rate": 4.380820972749224e-06, "loss": 0.4493, "step": 2587 }, { "epoch": 2.4096834264432028, "grad_norm": 0.27353768797464045, "learning_rate": 4.3739220420834775e-06, "loss": 0.4534, "step": 2588 }, { "epoch": 2.410614525139665, "grad_norm": 0.27332167611387226, "learning_rate": 4.367023111417731e-06, "loss": 0.4892, "step": 2589 }, { "epoch": 2.4115456238361266, "grad_norm": 0.2712562167053193, "learning_rate": 4.360124180751984e-06, "loss": 0.4449, "step": 2590 }, { "epoch": 2.4124767225325883, "grad_norm": 0.27747309937503745, "learning_rate": 4.353225250086237e-06, "loss": 0.4732, "step": 2591 }, { "epoch": 2.4134078212290504, "grad_norm": 0.2713300884885494, "learning_rate": 4.3463263194204905e-06, "loss": 0.4636, "step": 2592 }, { "epoch": 2.414338919925512, "grad_norm": 0.29166515878830257, "learning_rate": 4.339427388754743e-06, "loss": 0.4696, "step": 2593 }, { "epoch": 2.4152700186219738, "grad_norm": 0.2916924222871356, "learning_rate": 4.332528458088996e-06, "loss": 0.4986, "step": 2594 }, { "epoch": 2.416201117318436, "grad_norm": 0.26465816830031763, "learning_rate": 4.3256295274232494e-06, "loss": 0.4717, "step": 2595 }, { "epoch": 2.4171322160148976, "grad_norm": 0.26224278949150115, "learning_rate": 4.318730596757503e-06, "loss": 0.4669, "step": 2596 }, { "epoch": 2.4180633147113593, "grad_norm": 0.27484867231776594, "learning_rate": 4.311831666091757e-06, "loss": 0.4841, "step": 2597 }, { "epoch": 2.4189944134078214, "grad_norm": 0.2895307457527586, "learning_rate": 4.304932735426009e-06, "loss": 0.4685, "step": 2598 }, { "epoch": 2.419925512104283, "grad_norm": 0.2679688662947558, "learning_rate": 4.2980338047602625e-06, "loss": 0.4837, "step": 2599 }, { "epoch": 2.4208566108007448, "grad_norm": 0.26928085868074547, "learning_rate": 4.291134874094516e-06, "loss": 0.4721, "step": 2600 }, { "epoch": 2.421787709497207, "grad_norm": 0.2725415625044785, "learning_rate": 4.284235943428769e-06, "loss": 0.4668, "step": 2601 }, { "epoch": 2.4227188081936686, "grad_norm": 0.2921215598157249, "learning_rate": 4.277337012763022e-06, "loss": 0.4581, "step": 2602 }, { "epoch": 2.4236499068901303, "grad_norm": 0.26434882605288185, "learning_rate": 4.2704380820972755e-06, "loss": 0.4585, "step": 2603 }, { "epoch": 2.4245810055865924, "grad_norm": 0.2585934397660806, "learning_rate": 4.263539151431528e-06, "loss": 0.4675, "step": 2604 }, { "epoch": 2.425512104283054, "grad_norm": 0.2815007371912756, "learning_rate": 4.256640220765781e-06, "loss": 0.4716, "step": 2605 }, { "epoch": 2.4264432029795158, "grad_norm": 0.2660036722058796, "learning_rate": 4.249741290100035e-06, "loss": 0.4457, "step": 2606 }, { "epoch": 2.427374301675978, "grad_norm": 0.2577815254292961, "learning_rate": 4.2428423594342885e-06, "loss": 0.4674, "step": 2607 }, { "epoch": 2.4283054003724396, "grad_norm": 0.2816684332156709, "learning_rate": 4.235943428768542e-06, "loss": 0.4731, "step": 2608 }, { "epoch": 2.4292364990689013, "grad_norm": 0.26123488692761176, "learning_rate": 4.229044498102794e-06, "loss": 0.4438, "step": 2609 }, { "epoch": 2.430167597765363, "grad_norm": 0.28901488979100687, "learning_rate": 4.222145567437047e-06, "loss": 0.4652, "step": 2610 }, { "epoch": 2.431098696461825, "grad_norm": 0.2599253967124553, "learning_rate": 4.215246636771301e-06, "loss": 0.4691, "step": 2611 }, { "epoch": 2.4320297951582868, "grad_norm": 0.2750195208471436, "learning_rate": 4.208347706105554e-06, "loss": 0.4626, "step": 2612 }, { "epoch": 2.4329608938547485, "grad_norm": 0.271426305182384, "learning_rate": 4.201448775439807e-06, "loss": 0.4663, "step": 2613 }, { "epoch": 2.4338919925512106, "grad_norm": 0.25566399541414814, "learning_rate": 4.19454984477406e-06, "loss": 0.4638, "step": 2614 }, { "epoch": 2.4348230912476723, "grad_norm": 0.2734308886315062, "learning_rate": 4.187650914108314e-06, "loss": 0.4567, "step": 2615 }, { "epoch": 2.435754189944134, "grad_norm": 0.2558420074352828, "learning_rate": 4.180751983442567e-06, "loss": 0.4541, "step": 2616 }, { "epoch": 2.436685288640596, "grad_norm": 0.28206438373654996, "learning_rate": 4.17385305277682e-06, "loss": 0.4871, "step": 2617 }, { "epoch": 2.4376163873370578, "grad_norm": 0.2587517729248333, "learning_rate": 4.1669541221110734e-06, "loss": 0.4609, "step": 2618 }, { "epoch": 2.4385474860335195, "grad_norm": 0.26554842980660054, "learning_rate": 4.160055191445327e-06, "loss": 0.4836, "step": 2619 }, { "epoch": 2.439478584729981, "grad_norm": 0.2626195715809325, "learning_rate": 4.153156260779579e-06, "loss": 0.4665, "step": 2620 }, { "epoch": 2.4404096834264433, "grad_norm": 0.26581651831579667, "learning_rate": 4.146257330113832e-06, "loss": 0.4796, "step": 2621 }, { "epoch": 2.441340782122905, "grad_norm": 0.2707513314934482, "learning_rate": 4.139358399448086e-06, "loss": 0.4399, "step": 2622 }, { "epoch": 2.4422718808193666, "grad_norm": 0.2719630666053651, "learning_rate": 4.132459468782339e-06, "loss": 0.4862, "step": 2623 }, { "epoch": 2.4432029795158288, "grad_norm": 0.2637374522135825, "learning_rate": 4.125560538116592e-06, "loss": 0.4631, "step": 2624 }, { "epoch": 2.4441340782122905, "grad_norm": 0.27749223274169804, "learning_rate": 4.118661607450845e-06, "loss": 0.4731, "step": 2625 }, { "epoch": 2.445065176908752, "grad_norm": 0.26235383630496134, "learning_rate": 4.111762676785099e-06, "loss": 0.4621, "step": 2626 }, { "epoch": 2.4459962756052143, "grad_norm": 0.2859313726076243, "learning_rate": 4.104863746119352e-06, "loss": 0.4435, "step": 2627 }, { "epoch": 2.446927374301676, "grad_norm": 0.27327600955436543, "learning_rate": 4.097964815453605e-06, "loss": 0.4663, "step": 2628 }, { "epoch": 2.4478584729981376, "grad_norm": 0.2611241967605899, "learning_rate": 4.091065884787858e-06, "loss": 0.4578, "step": 2629 }, { "epoch": 2.4487895716945998, "grad_norm": 0.2755731160257392, "learning_rate": 4.084166954122112e-06, "loss": 0.4829, "step": 2630 }, { "epoch": 2.4497206703910615, "grad_norm": 0.2966909888236147, "learning_rate": 4.077268023456364e-06, "loss": 0.4864, "step": 2631 }, { "epoch": 2.450651769087523, "grad_norm": 0.26171584545759324, "learning_rate": 4.070369092790617e-06, "loss": 0.446, "step": 2632 }, { "epoch": 2.4515828677839853, "grad_norm": 0.280756912382753, "learning_rate": 4.063470162124871e-06, "loss": 0.4956, "step": 2633 }, { "epoch": 2.452513966480447, "grad_norm": 0.26411992950133767, "learning_rate": 4.056571231459125e-06, "loss": 0.489, "step": 2634 }, { "epoch": 2.4534450651769086, "grad_norm": 0.2630563867274606, "learning_rate": 4.049672300793377e-06, "loss": 0.4593, "step": 2635 }, { "epoch": 2.4543761638733708, "grad_norm": 0.27398432677927903, "learning_rate": 4.04277337012763e-06, "loss": 0.4592, "step": 2636 }, { "epoch": 2.4553072625698324, "grad_norm": 0.2815046317725373, "learning_rate": 4.0358744394618836e-06, "loss": 0.488, "step": 2637 }, { "epoch": 2.456238361266294, "grad_norm": 0.2767881733424721, "learning_rate": 4.028975508796137e-06, "loss": 0.4964, "step": 2638 }, { "epoch": 2.4571694599627563, "grad_norm": 0.2647054135077885, "learning_rate": 4.02207657813039e-06, "loss": 0.4461, "step": 2639 }, { "epoch": 2.458100558659218, "grad_norm": 0.28139741059338885, "learning_rate": 4.015177647464643e-06, "loss": 0.477, "step": 2640 }, { "epoch": 2.4590316573556796, "grad_norm": 0.27682897267132406, "learning_rate": 4.008278716798897e-06, "loss": 0.4758, "step": 2641 }, { "epoch": 2.4599627560521418, "grad_norm": 0.267476154108344, "learning_rate": 4.00137978613315e-06, "loss": 0.4707, "step": 2642 }, { "epoch": 2.4608938547486034, "grad_norm": 0.2800344732930239, "learning_rate": 3.994480855467403e-06, "loss": 0.468, "step": 2643 }, { "epoch": 2.461824953445065, "grad_norm": 0.28264827991519575, "learning_rate": 3.987581924801656e-06, "loss": 0.4883, "step": 2644 }, { "epoch": 2.462756052141527, "grad_norm": 0.29694831645160835, "learning_rate": 3.98068299413591e-06, "loss": 0.46, "step": 2645 }, { "epoch": 2.463687150837989, "grad_norm": 0.28119350209994726, "learning_rate": 3.973784063470162e-06, "loss": 0.4999, "step": 2646 }, { "epoch": 2.4646182495344506, "grad_norm": 0.27825520542758425, "learning_rate": 3.966885132804415e-06, "loss": 0.4606, "step": 2647 }, { "epoch": 2.4655493482309123, "grad_norm": 0.2595473041033143, "learning_rate": 3.9599862021386685e-06, "loss": 0.4552, "step": 2648 }, { "epoch": 2.4664804469273744, "grad_norm": 0.2645426477549188, "learning_rate": 3.953087271472922e-06, "loss": 0.4422, "step": 2649 }, { "epoch": 2.467411545623836, "grad_norm": 0.2693696087738344, "learning_rate": 3.946188340807175e-06, "loss": 0.4634, "step": 2650 }, { "epoch": 2.468342644320298, "grad_norm": 0.2711242005760129, "learning_rate": 3.939289410141428e-06, "loss": 0.4916, "step": 2651 }, { "epoch": 2.46927374301676, "grad_norm": 0.2636881424304365, "learning_rate": 3.9323904794756815e-06, "loss": 0.4763, "step": 2652 }, { "epoch": 2.4702048417132216, "grad_norm": 0.26488080756579474, "learning_rate": 3.925491548809935e-06, "loss": 0.4669, "step": 2653 }, { "epoch": 2.4711359404096833, "grad_norm": 0.2521680567112921, "learning_rate": 3.918592618144188e-06, "loss": 0.4554, "step": 2654 }, { "epoch": 2.472067039106145, "grad_norm": 0.2820046628877246, "learning_rate": 3.911693687478441e-06, "loss": 0.4784, "step": 2655 }, { "epoch": 2.472998137802607, "grad_norm": 0.2675584276767396, "learning_rate": 3.9047947568126946e-06, "loss": 0.4697, "step": 2656 }, { "epoch": 2.473929236499069, "grad_norm": 0.2575247248182965, "learning_rate": 3.897895826146948e-06, "loss": 0.4373, "step": 2657 }, { "epoch": 2.4748603351955305, "grad_norm": 0.26954961732931315, "learning_rate": 3.8909968954812e-06, "loss": 0.4708, "step": 2658 }, { "epoch": 2.4757914338919926, "grad_norm": 0.27820793328147697, "learning_rate": 3.8840979648154535e-06, "loss": 0.4794, "step": 2659 }, { "epoch": 2.4767225325884543, "grad_norm": 0.2758557260793894, "learning_rate": 3.8771990341497076e-06, "loss": 0.4687, "step": 2660 }, { "epoch": 2.477653631284916, "grad_norm": 0.274094785342193, "learning_rate": 3.870300103483961e-06, "loss": 0.4796, "step": 2661 }, { "epoch": 2.478584729981378, "grad_norm": 0.25215578589285653, "learning_rate": 3.863401172818213e-06, "loss": 0.4439, "step": 2662 }, { "epoch": 2.47951582867784, "grad_norm": 0.2754967851944646, "learning_rate": 3.8565022421524665e-06, "loss": 0.4514, "step": 2663 }, { "epoch": 2.4804469273743015, "grad_norm": 0.27618218849610726, "learning_rate": 3.84960331148672e-06, "loss": 0.4784, "step": 2664 }, { "epoch": 2.4813780260707636, "grad_norm": 0.2677834927697256, "learning_rate": 3.842704380820973e-06, "loss": 0.4702, "step": 2665 }, { "epoch": 2.4823091247672253, "grad_norm": 0.26995253302433464, "learning_rate": 3.835805450155226e-06, "loss": 0.4584, "step": 2666 }, { "epoch": 2.483240223463687, "grad_norm": 0.2848253076135274, "learning_rate": 3.8289065194894795e-06, "loss": 0.4856, "step": 2667 }, { "epoch": 2.484171322160149, "grad_norm": 0.2718986335726812, "learning_rate": 3.822007588823733e-06, "loss": 0.474, "step": 2668 }, { "epoch": 2.485102420856611, "grad_norm": 0.2704272691115826, "learning_rate": 3.815108658157986e-06, "loss": 0.475, "step": 2669 }, { "epoch": 2.4860335195530725, "grad_norm": 0.2649732685813575, "learning_rate": 3.8082097274922393e-06, "loss": 0.4707, "step": 2670 }, { "epoch": 2.4869646182495346, "grad_norm": 0.27311234469290113, "learning_rate": 3.801310796826492e-06, "loss": 0.4571, "step": 2671 }, { "epoch": 2.4878957169459963, "grad_norm": 0.27065332380914525, "learning_rate": 3.7944118661607454e-06, "loss": 0.4694, "step": 2672 }, { "epoch": 2.488826815642458, "grad_norm": 0.25516516088713864, "learning_rate": 3.7875129354949986e-06, "loss": 0.4453, "step": 2673 }, { "epoch": 2.48975791433892, "grad_norm": 0.2729536180301586, "learning_rate": 3.780614004829252e-06, "loss": 0.4696, "step": 2674 }, { "epoch": 2.490689013035382, "grad_norm": 0.257225983789691, "learning_rate": 3.7737150741635047e-06, "loss": 0.4695, "step": 2675 }, { "epoch": 2.4916201117318435, "grad_norm": 0.2796402285675931, "learning_rate": 3.766816143497758e-06, "loss": 0.4783, "step": 2676 }, { "epoch": 2.4925512104283056, "grad_norm": 0.2631974351745163, "learning_rate": 3.7599172128320116e-06, "loss": 0.4565, "step": 2677 }, { "epoch": 2.4934823091247673, "grad_norm": 0.26495492135587584, "learning_rate": 3.753018282166265e-06, "loss": 0.4739, "step": 2678 }, { "epoch": 2.494413407821229, "grad_norm": 0.2802861908974417, "learning_rate": 3.7461193515005177e-06, "loss": 0.4604, "step": 2679 }, { "epoch": 2.4953445065176907, "grad_norm": 0.2535638201859762, "learning_rate": 3.739220420834771e-06, "loss": 0.4632, "step": 2680 }, { "epoch": 2.496275605214153, "grad_norm": 0.26795811351849325, "learning_rate": 3.7323214901690242e-06, "loss": 0.4745, "step": 2681 }, { "epoch": 2.4972067039106145, "grad_norm": 0.2681239122011456, "learning_rate": 3.725422559503277e-06, "loss": 0.4661, "step": 2682 }, { "epoch": 2.498137802607076, "grad_norm": 0.2660897356343338, "learning_rate": 3.7185236288375303e-06, "loss": 0.4811, "step": 2683 }, { "epoch": 2.4990689013035383, "grad_norm": 0.2688090642959217, "learning_rate": 3.7116246981717836e-06, "loss": 0.4491, "step": 2684 }, { "epoch": 2.5, "grad_norm": 0.2790706155526743, "learning_rate": 3.704725767506037e-06, "loss": 0.4753, "step": 2685 }, { "epoch": 2.5009310986964617, "grad_norm": 0.2668839009469052, "learning_rate": 3.6978268368402905e-06, "loss": 0.4501, "step": 2686 }, { "epoch": 2.501862197392924, "grad_norm": 0.2517296953266348, "learning_rate": 3.6909279061745433e-06, "loss": 0.4457, "step": 2687 }, { "epoch": 2.5027932960893855, "grad_norm": 0.26000607084063015, "learning_rate": 3.6840289755087966e-06, "loss": 0.48, "step": 2688 }, { "epoch": 2.503724394785847, "grad_norm": 0.2640547265107715, "learning_rate": 3.67713004484305e-06, "loss": 0.4459, "step": 2689 }, { "epoch": 2.504655493482309, "grad_norm": 0.27010577517372103, "learning_rate": 3.6702311141773027e-06, "loss": 0.4552, "step": 2690 }, { "epoch": 2.505586592178771, "grad_norm": 0.2629977853096696, "learning_rate": 3.663332183511556e-06, "loss": 0.4436, "step": 2691 }, { "epoch": 2.5065176908752327, "grad_norm": 0.25973989408399906, "learning_rate": 3.656433252845809e-06, "loss": 0.4724, "step": 2692 }, { "epoch": 2.5074487895716944, "grad_norm": 0.2652316012051052, "learning_rate": 3.649534322180062e-06, "loss": 0.4671, "step": 2693 }, { "epoch": 2.5083798882681565, "grad_norm": 0.27044459862924214, "learning_rate": 3.6426353915143153e-06, "loss": 0.4693, "step": 2694 }, { "epoch": 2.509310986964618, "grad_norm": 0.26986552423999965, "learning_rate": 3.635736460848569e-06, "loss": 0.4881, "step": 2695 }, { "epoch": 2.51024208566108, "grad_norm": 0.2745491319605229, "learning_rate": 3.628837530182822e-06, "loss": 0.4768, "step": 2696 }, { "epoch": 2.511173184357542, "grad_norm": 0.27696405021500253, "learning_rate": 3.6219385995170754e-06, "loss": 0.4984, "step": 2697 }, { "epoch": 2.5121042830540037, "grad_norm": 0.2763199719619845, "learning_rate": 3.6150396688513283e-06, "loss": 0.4614, "step": 2698 }, { "epoch": 2.5130353817504654, "grad_norm": 0.2649441556410502, "learning_rate": 3.6081407381855815e-06, "loss": 0.4549, "step": 2699 }, { "epoch": 2.5139664804469275, "grad_norm": 0.2667459707421653, "learning_rate": 3.6012418075198348e-06, "loss": 0.4272, "step": 2700 }, { "epoch": 2.514897579143389, "grad_norm": 0.262084155323977, "learning_rate": 3.5943428768540876e-06, "loss": 0.4509, "step": 2701 }, { "epoch": 2.515828677839851, "grad_norm": 0.2782471147351659, "learning_rate": 3.587443946188341e-06, "loss": 0.4679, "step": 2702 }, { "epoch": 2.516759776536313, "grad_norm": 0.2671119278345049, "learning_rate": 3.580545015522594e-06, "loss": 0.4589, "step": 2703 }, { "epoch": 2.5176908752327747, "grad_norm": 0.2784342259544281, "learning_rate": 3.573646084856848e-06, "loss": 0.4842, "step": 2704 }, { "epoch": 2.5186219739292364, "grad_norm": 0.288333022806477, "learning_rate": 3.566747154191101e-06, "loss": 0.5115, "step": 2705 }, { "epoch": 2.5195530726256985, "grad_norm": 0.2693630873955026, "learning_rate": 3.559848223525354e-06, "loss": 0.4607, "step": 2706 }, { "epoch": 2.52048417132216, "grad_norm": 0.2637803174471486, "learning_rate": 3.552949292859607e-06, "loss": 0.4665, "step": 2707 }, { "epoch": 2.521415270018622, "grad_norm": 0.2689533531304808, "learning_rate": 3.5460503621938604e-06, "loss": 0.4809, "step": 2708 }, { "epoch": 2.522346368715084, "grad_norm": 0.2809295368577882, "learning_rate": 3.5391514315281132e-06, "loss": 0.4573, "step": 2709 }, { "epoch": 2.5232774674115457, "grad_norm": 0.2749990822376515, "learning_rate": 3.5322525008623665e-06, "loss": 0.4656, "step": 2710 }, { "epoch": 2.5242085661080074, "grad_norm": 0.2697058768711801, "learning_rate": 3.5253535701966197e-06, "loss": 0.4595, "step": 2711 }, { "epoch": 2.5251396648044695, "grad_norm": 0.28002768550834517, "learning_rate": 3.5184546395308726e-06, "loss": 0.4719, "step": 2712 }, { "epoch": 2.526070763500931, "grad_norm": 0.2860913270332325, "learning_rate": 3.5115557088651262e-06, "loss": 0.5022, "step": 2713 }, { "epoch": 2.527001862197393, "grad_norm": 0.2792636736961675, "learning_rate": 3.5046567781993795e-06, "loss": 0.4596, "step": 2714 }, { "epoch": 2.527932960893855, "grad_norm": 0.2686897085782147, "learning_rate": 3.4977578475336327e-06, "loss": 0.4775, "step": 2715 }, { "epoch": 2.5288640595903167, "grad_norm": 0.3214301741414197, "learning_rate": 3.490858916867886e-06, "loss": 0.5114, "step": 2716 }, { "epoch": 2.5297951582867784, "grad_norm": 0.2797678594035597, "learning_rate": 3.483959986202139e-06, "loss": 0.4818, "step": 2717 }, { "epoch": 2.5307262569832405, "grad_norm": 0.3013143276306528, "learning_rate": 3.477061055536392e-06, "loss": 0.4834, "step": 2718 }, { "epoch": 2.531657355679702, "grad_norm": 0.2942613560616877, "learning_rate": 3.4701621248706453e-06, "loss": 0.5142, "step": 2719 }, { "epoch": 2.532588454376164, "grad_norm": 0.2671598723920623, "learning_rate": 3.463263194204898e-06, "loss": 0.4531, "step": 2720 }, { "epoch": 2.5335195530726256, "grad_norm": 0.2634770126502687, "learning_rate": 3.4563642635391514e-06, "loss": 0.4668, "step": 2721 }, { "epoch": 2.5344506517690877, "grad_norm": 0.2768891795128559, "learning_rate": 3.449465332873405e-06, "loss": 0.4739, "step": 2722 }, { "epoch": 2.5353817504655494, "grad_norm": 0.2788340380532804, "learning_rate": 3.4425664022076584e-06, "loss": 0.4756, "step": 2723 }, { "epoch": 2.536312849162011, "grad_norm": 0.2757030336096306, "learning_rate": 3.435667471541911e-06, "loss": 0.4484, "step": 2724 }, { "epoch": 2.5372439478584727, "grad_norm": 0.2712487408573745, "learning_rate": 3.4287685408761644e-06, "loss": 0.473, "step": 2725 }, { "epoch": 2.538175046554935, "grad_norm": 0.27170817281318066, "learning_rate": 3.4218696102104177e-06, "loss": 0.4655, "step": 2726 }, { "epoch": 2.5391061452513966, "grad_norm": 0.2856391967061291, "learning_rate": 3.414970679544671e-06, "loss": 0.4716, "step": 2727 }, { "epoch": 2.5400372439478582, "grad_norm": 0.29301052115837173, "learning_rate": 3.4080717488789238e-06, "loss": 0.4715, "step": 2728 }, { "epoch": 2.5409683426443204, "grad_norm": 0.26932990387803035, "learning_rate": 3.401172818213177e-06, "loss": 0.4947, "step": 2729 }, { "epoch": 2.541899441340782, "grad_norm": 0.254000563533845, "learning_rate": 3.3942738875474307e-06, "loss": 0.4875, "step": 2730 }, { "epoch": 2.5428305400372437, "grad_norm": 0.28566220415566107, "learning_rate": 3.387374956881684e-06, "loss": 0.4682, "step": 2731 }, { "epoch": 2.543761638733706, "grad_norm": 0.26413866039763706, "learning_rate": 3.380476026215937e-06, "loss": 0.4513, "step": 2732 }, { "epoch": 2.5446927374301676, "grad_norm": 0.2622794310774501, "learning_rate": 3.37357709555019e-06, "loss": 0.4663, "step": 2733 }, { "epoch": 2.5456238361266292, "grad_norm": 0.2722791209338291, "learning_rate": 3.3666781648844433e-06, "loss": 0.4714, "step": 2734 }, { "epoch": 2.5465549348230914, "grad_norm": 0.2756852074140953, "learning_rate": 3.359779234218696e-06, "loss": 0.465, "step": 2735 }, { "epoch": 2.547486033519553, "grad_norm": 0.25567655516597343, "learning_rate": 3.3528803035529494e-06, "loss": 0.4361, "step": 2736 }, { "epoch": 2.5484171322160147, "grad_norm": 0.26445986460249465, "learning_rate": 3.3459813728872026e-06, "loss": 0.4659, "step": 2737 }, { "epoch": 2.549348230912477, "grad_norm": 0.2964216903069848, "learning_rate": 3.339082442221456e-06, "loss": 0.485, "step": 2738 }, { "epoch": 2.5502793296089385, "grad_norm": 0.2764823534262493, "learning_rate": 3.3321835115557096e-06, "loss": 0.478, "step": 2739 }, { "epoch": 2.5512104283054002, "grad_norm": 0.2863234341697766, "learning_rate": 3.3252845808899624e-06, "loss": 0.453, "step": 2740 }, { "epoch": 2.5521415270018624, "grad_norm": 0.2710271554897618, "learning_rate": 3.3183856502242157e-06, "loss": 0.4869, "step": 2741 }, { "epoch": 2.553072625698324, "grad_norm": 0.25016418461739043, "learning_rate": 3.311486719558469e-06, "loss": 0.4352, "step": 2742 }, { "epoch": 2.5540037243947857, "grad_norm": 0.2555964067421105, "learning_rate": 3.3045877888927217e-06, "loss": 0.4526, "step": 2743 }, { "epoch": 2.554934823091248, "grad_norm": 0.29055970567965317, "learning_rate": 3.297688858226975e-06, "loss": 0.4751, "step": 2744 }, { "epoch": 2.5558659217877095, "grad_norm": 0.32452361490134046, "learning_rate": 3.2907899275612282e-06, "loss": 0.4684, "step": 2745 }, { "epoch": 2.5567970204841712, "grad_norm": 0.2774357264684885, "learning_rate": 3.2838909968954815e-06, "loss": 0.4726, "step": 2746 }, { "epoch": 2.5577281191806334, "grad_norm": 0.2731162733944871, "learning_rate": 3.2769920662297343e-06, "loss": 0.4639, "step": 2747 }, { "epoch": 2.558659217877095, "grad_norm": 0.27262316211904974, "learning_rate": 3.270093135563988e-06, "loss": 0.4743, "step": 2748 }, { "epoch": 2.5595903165735567, "grad_norm": 0.27295524391106474, "learning_rate": 3.2631942048982413e-06, "loss": 0.4558, "step": 2749 }, { "epoch": 2.560521415270019, "grad_norm": 0.2790380090644723, "learning_rate": 3.2562952742324945e-06, "loss": 0.4496, "step": 2750 }, { "epoch": 2.5614525139664805, "grad_norm": 0.26635278249302974, "learning_rate": 3.2493963435667474e-06, "loss": 0.4249, "step": 2751 }, { "epoch": 2.5623836126629422, "grad_norm": 0.25721530723195446, "learning_rate": 3.2424974129010006e-06, "loss": 0.4474, "step": 2752 }, { "epoch": 2.5633147113594044, "grad_norm": 0.2588197124450571, "learning_rate": 3.235598482235254e-06, "loss": 0.4694, "step": 2753 }, { "epoch": 2.564245810055866, "grad_norm": 0.2594911139463708, "learning_rate": 3.2286995515695067e-06, "loss": 0.4706, "step": 2754 }, { "epoch": 2.5651769087523277, "grad_norm": 0.27218319521691825, "learning_rate": 3.22180062090376e-06, "loss": 0.4745, "step": 2755 }, { "epoch": 2.5661080074487894, "grad_norm": 0.27777631991583834, "learning_rate": 3.214901690238013e-06, "loss": 0.497, "step": 2756 }, { "epoch": 2.5670391061452515, "grad_norm": 0.27204995495545475, "learning_rate": 3.208002759572267e-06, "loss": 0.4426, "step": 2757 }, { "epoch": 2.5679702048417132, "grad_norm": 0.27404637962002826, "learning_rate": 3.20110382890652e-06, "loss": 0.4489, "step": 2758 }, { "epoch": 2.568901303538175, "grad_norm": 0.27821371405396206, "learning_rate": 3.194204898240773e-06, "loss": 0.4859, "step": 2759 }, { "epoch": 2.5698324022346366, "grad_norm": 0.268379784177016, "learning_rate": 3.1873059675750262e-06, "loss": 0.4814, "step": 2760 }, { "epoch": 2.5707635009310987, "grad_norm": 0.2826936203121762, "learning_rate": 3.1804070369092795e-06, "loss": 0.4854, "step": 2761 }, { "epoch": 2.5716945996275604, "grad_norm": 0.26796934419394114, "learning_rate": 3.1735081062435323e-06, "loss": 0.4486, "step": 2762 }, { "epoch": 2.572625698324022, "grad_norm": 0.27050113364340495, "learning_rate": 3.1666091755777856e-06, "loss": 0.4662, "step": 2763 }, { "epoch": 2.5735567970204842, "grad_norm": 0.2723114045687636, "learning_rate": 3.159710244912039e-06, "loss": 0.4628, "step": 2764 }, { "epoch": 2.574487895716946, "grad_norm": 0.28150932525448785, "learning_rate": 3.1528113142462916e-06, "loss": 0.4595, "step": 2765 }, { "epoch": 2.5754189944134076, "grad_norm": 0.25863658327459144, "learning_rate": 3.1459123835805453e-06, "loss": 0.4535, "step": 2766 }, { "epoch": 2.5763500931098697, "grad_norm": 0.27085946545045225, "learning_rate": 3.1390134529147986e-06, "loss": 0.48, "step": 2767 }, { "epoch": 2.5772811918063314, "grad_norm": 0.25623634506360876, "learning_rate": 3.132114522249052e-06, "loss": 0.4416, "step": 2768 }, { "epoch": 2.578212290502793, "grad_norm": 0.25857745686833883, "learning_rate": 3.125215591583305e-06, "loss": 0.4702, "step": 2769 }, { "epoch": 2.5791433891992552, "grad_norm": 0.27512081087243473, "learning_rate": 3.118316660917558e-06, "loss": 0.4781, "step": 2770 }, { "epoch": 2.580074487895717, "grad_norm": 0.27532627593768755, "learning_rate": 3.111417730251811e-06, "loss": 0.477, "step": 2771 }, { "epoch": 2.5810055865921786, "grad_norm": 0.2578738484238192, "learning_rate": 3.1045187995860644e-06, "loss": 0.446, "step": 2772 }, { "epoch": 2.5819366852886407, "grad_norm": 0.2613260978604694, "learning_rate": 3.0976198689203172e-06, "loss": 0.4751, "step": 2773 }, { "epoch": 2.5828677839851024, "grad_norm": 0.2674946856792037, "learning_rate": 3.0907209382545705e-06, "loss": 0.4538, "step": 2774 }, { "epoch": 2.583798882681564, "grad_norm": 0.27722665027451837, "learning_rate": 3.083822007588824e-06, "loss": 0.4661, "step": 2775 }, { "epoch": 2.5847299813780262, "grad_norm": 0.2788125211334941, "learning_rate": 3.0769230769230774e-06, "loss": 0.4813, "step": 2776 }, { "epoch": 2.585661080074488, "grad_norm": 0.26344367392540396, "learning_rate": 3.0700241462573303e-06, "loss": 0.4443, "step": 2777 }, { "epoch": 2.5865921787709496, "grad_norm": 0.2747334130955287, "learning_rate": 3.0631252155915835e-06, "loss": 0.4762, "step": 2778 }, { "epoch": 2.5875232774674117, "grad_norm": 0.2743984492616491, "learning_rate": 3.0562262849258368e-06, "loss": 0.4736, "step": 2779 }, { "epoch": 2.5884543761638734, "grad_norm": 0.26170563790916995, "learning_rate": 3.04932735426009e-06, "loss": 0.4751, "step": 2780 }, { "epoch": 2.589385474860335, "grad_norm": 0.28792279705830104, "learning_rate": 3.042428423594343e-06, "loss": 0.4916, "step": 2781 }, { "epoch": 2.5903165735567972, "grad_norm": 0.2587750394075859, "learning_rate": 3.035529492928596e-06, "loss": 0.4767, "step": 2782 }, { "epoch": 2.591247672253259, "grad_norm": 0.26685371210188574, "learning_rate": 3.0286305622628494e-06, "loss": 0.4939, "step": 2783 }, { "epoch": 2.5921787709497206, "grad_norm": 0.26957138307216844, "learning_rate": 3.021731631597103e-06, "loss": 0.4517, "step": 2784 }, { "epoch": 2.5931098696461827, "grad_norm": 0.2750280133781057, "learning_rate": 3.014832700931356e-06, "loss": 0.4694, "step": 2785 }, { "epoch": 2.5940409683426444, "grad_norm": 0.2699214514742673, "learning_rate": 3.007933770265609e-06, "loss": 0.4483, "step": 2786 }, { "epoch": 2.594972067039106, "grad_norm": 0.2847762635085825, "learning_rate": 3.0010348395998624e-06, "loss": 0.492, "step": 2787 }, { "epoch": 2.5959031657355682, "grad_norm": 0.2567358725240794, "learning_rate": 2.9941359089341156e-06, "loss": 0.468, "step": 2788 }, { "epoch": 2.59683426443203, "grad_norm": 0.2838251821062455, "learning_rate": 2.9872369782683685e-06, "loss": 0.5133, "step": 2789 }, { "epoch": 2.5977653631284916, "grad_norm": 0.27054349065102834, "learning_rate": 2.9803380476026217e-06, "loss": 0.4585, "step": 2790 }, { "epoch": 2.5986964618249533, "grad_norm": 0.2627238552513013, "learning_rate": 2.973439116936875e-06, "loss": 0.4632, "step": 2791 }, { "epoch": 2.5996275605214154, "grad_norm": 0.26422073810804764, "learning_rate": 2.966540186271128e-06, "loss": 0.4491, "step": 2792 }, { "epoch": 2.600558659217877, "grad_norm": 0.25735826518133814, "learning_rate": 2.9596412556053815e-06, "loss": 0.4725, "step": 2793 }, { "epoch": 2.601489757914339, "grad_norm": 0.26540951412521785, "learning_rate": 2.9527423249396347e-06, "loss": 0.4707, "step": 2794 }, { "epoch": 2.6024208566108005, "grad_norm": 0.26842169341469696, "learning_rate": 2.945843394273888e-06, "loss": 0.47, "step": 2795 }, { "epoch": 2.6033519553072626, "grad_norm": 0.27444051754970206, "learning_rate": 2.938944463608141e-06, "loss": 0.4967, "step": 2796 }, { "epoch": 2.6042830540037243, "grad_norm": 0.28737519750743473, "learning_rate": 2.932045532942394e-06, "loss": 0.4619, "step": 2797 }, { "epoch": 2.605214152700186, "grad_norm": 0.2621199590891676, "learning_rate": 2.9251466022766473e-06, "loss": 0.4674, "step": 2798 }, { "epoch": 2.606145251396648, "grad_norm": 0.2565908014373118, "learning_rate": 2.9182476716109006e-06, "loss": 0.4461, "step": 2799 }, { "epoch": 2.60707635009311, "grad_norm": 0.27488453224566056, "learning_rate": 2.9113487409451534e-06, "loss": 0.4643, "step": 2800 }, { "epoch": 2.6080074487895715, "grad_norm": 0.2676944419301556, "learning_rate": 2.904449810279407e-06, "loss": 0.4453, "step": 2801 }, { "epoch": 2.6089385474860336, "grad_norm": 0.2654213166420944, "learning_rate": 2.8975508796136603e-06, "loss": 0.465, "step": 2802 }, { "epoch": 2.6098696461824953, "grad_norm": 0.2622870685365415, "learning_rate": 2.8906519489479136e-06, "loss": 0.4507, "step": 2803 }, { "epoch": 2.610800744878957, "grad_norm": 0.27181939115388154, "learning_rate": 2.8837530182821664e-06, "loss": 0.4681, "step": 2804 }, { "epoch": 2.611731843575419, "grad_norm": 0.27024999615564377, "learning_rate": 2.8768540876164197e-06, "loss": 0.4806, "step": 2805 }, { "epoch": 2.612662942271881, "grad_norm": 0.2745430270940133, "learning_rate": 2.869955156950673e-06, "loss": 0.477, "step": 2806 }, { "epoch": 2.6135940409683425, "grad_norm": 0.2528696306337459, "learning_rate": 2.8630562262849258e-06, "loss": 0.4597, "step": 2807 }, { "epoch": 2.6145251396648046, "grad_norm": 0.257304504275568, "learning_rate": 2.856157295619179e-06, "loss": 0.4771, "step": 2808 }, { "epoch": 2.6154562383612663, "grad_norm": 0.24878210431214795, "learning_rate": 2.8492583649534323e-06, "loss": 0.4497, "step": 2809 }, { "epoch": 2.616387337057728, "grad_norm": 0.29562508021370026, "learning_rate": 2.842359434287686e-06, "loss": 0.4889, "step": 2810 }, { "epoch": 2.61731843575419, "grad_norm": 0.2825750441272009, "learning_rate": 2.8354605036219392e-06, "loss": 0.4533, "step": 2811 }, { "epoch": 2.618249534450652, "grad_norm": 0.2710285125275314, "learning_rate": 2.828561572956192e-06, "loss": 0.4764, "step": 2812 }, { "epoch": 2.6191806331471135, "grad_norm": 0.2580026925936322, "learning_rate": 2.8216626422904453e-06, "loss": 0.4715, "step": 2813 }, { "epoch": 2.6201117318435756, "grad_norm": 0.25731374194937195, "learning_rate": 2.8147637116246986e-06, "loss": 0.4652, "step": 2814 }, { "epoch": 2.6210428305400373, "grad_norm": 0.25308353752071566, "learning_rate": 2.8078647809589514e-06, "loss": 0.4482, "step": 2815 }, { "epoch": 2.621973929236499, "grad_norm": 0.26668706127580905, "learning_rate": 2.8009658502932046e-06, "loss": 0.4776, "step": 2816 }, { "epoch": 2.622905027932961, "grad_norm": 0.2538725486888562, "learning_rate": 2.794066919627458e-06, "loss": 0.445, "step": 2817 }, { "epoch": 2.623836126629423, "grad_norm": 0.2800798380599687, "learning_rate": 2.7871679889617107e-06, "loss": 0.4948, "step": 2818 }, { "epoch": 2.6247672253258845, "grad_norm": 0.2659588919474035, "learning_rate": 2.7802690582959644e-06, "loss": 0.4852, "step": 2819 }, { "epoch": 2.6256983240223466, "grad_norm": 0.2660286473194283, "learning_rate": 2.7733701276302177e-06, "loss": 0.4752, "step": 2820 }, { "epoch": 2.6266294227188083, "grad_norm": 0.26035553144100443, "learning_rate": 2.766471196964471e-06, "loss": 0.4764, "step": 2821 }, { "epoch": 2.62756052141527, "grad_norm": 0.2653872832585187, "learning_rate": 2.759572266298724e-06, "loss": 0.4476, "step": 2822 }, { "epoch": 2.628491620111732, "grad_norm": 0.2731447190262752, "learning_rate": 2.752673335632977e-06, "loss": 0.4591, "step": 2823 }, { "epoch": 2.629422718808194, "grad_norm": 0.2696342300372775, "learning_rate": 2.7457744049672302e-06, "loss": 0.5029, "step": 2824 }, { "epoch": 2.6303538175046555, "grad_norm": 0.28852615172778523, "learning_rate": 2.7388754743014835e-06, "loss": 0.4965, "step": 2825 }, { "epoch": 2.631284916201117, "grad_norm": 0.2684643096730522, "learning_rate": 2.7319765436357363e-06, "loss": 0.467, "step": 2826 }, { "epoch": 2.6322160148975793, "grad_norm": 0.2638625922776671, "learning_rate": 2.7250776129699896e-06, "loss": 0.4673, "step": 2827 }, { "epoch": 2.633147113594041, "grad_norm": 0.2832467465173022, "learning_rate": 2.7181786823042433e-06, "loss": 0.456, "step": 2828 }, { "epoch": 2.6340782122905027, "grad_norm": 0.25369687098233307, "learning_rate": 2.7112797516384965e-06, "loss": 0.4357, "step": 2829 }, { "epoch": 2.635009310986965, "grad_norm": 0.3156073845478609, "learning_rate": 2.7043808209727498e-06, "loss": 0.4894, "step": 2830 }, { "epoch": 2.6359404096834265, "grad_norm": 0.2749842830096823, "learning_rate": 2.6974818903070026e-06, "loss": 0.4719, "step": 2831 }, { "epoch": 2.636871508379888, "grad_norm": 0.2866906180423014, "learning_rate": 2.690582959641256e-06, "loss": 0.498, "step": 2832 }, { "epoch": 2.63780260707635, "grad_norm": 0.2580749937730544, "learning_rate": 2.683684028975509e-06, "loss": 0.4577, "step": 2833 }, { "epoch": 2.638733705772812, "grad_norm": 0.2707456157890224, "learning_rate": 2.676785098309762e-06, "loss": 0.4718, "step": 2834 }, { "epoch": 2.6396648044692737, "grad_norm": 0.2649494072511505, "learning_rate": 2.669886167644015e-06, "loss": 0.4864, "step": 2835 }, { "epoch": 2.6405959031657353, "grad_norm": 0.26864482396249195, "learning_rate": 2.6629872369782685e-06, "loss": 0.4938, "step": 2836 }, { "epoch": 2.6415270018621975, "grad_norm": 0.2626693710703146, "learning_rate": 2.656088306312522e-06, "loss": 0.4751, "step": 2837 }, { "epoch": 2.642458100558659, "grad_norm": 0.28818403618860183, "learning_rate": 2.649189375646775e-06, "loss": 0.4798, "step": 2838 }, { "epoch": 2.643389199255121, "grad_norm": 0.26403861980841553, "learning_rate": 2.6422904449810282e-06, "loss": 0.4742, "step": 2839 }, { "epoch": 2.644320297951583, "grad_norm": 0.25817814829410357, "learning_rate": 2.6353915143152815e-06, "loss": 0.4669, "step": 2840 }, { "epoch": 2.6452513966480447, "grad_norm": 0.2691260445740653, "learning_rate": 2.6284925836495347e-06, "loss": 0.4732, "step": 2841 }, { "epoch": 2.6461824953445063, "grad_norm": 0.2509371112071059, "learning_rate": 2.6215936529837876e-06, "loss": 0.4563, "step": 2842 }, { "epoch": 2.6471135940409685, "grad_norm": 0.2681230380921411, "learning_rate": 2.614694722318041e-06, "loss": 0.4968, "step": 2843 }, { "epoch": 2.64804469273743, "grad_norm": 0.25155454297798735, "learning_rate": 2.607795791652294e-06, "loss": 0.4659, "step": 2844 }, { "epoch": 2.648975791433892, "grad_norm": 0.26664355134741385, "learning_rate": 2.600896860986547e-06, "loss": 0.4827, "step": 2845 }, { "epoch": 2.649906890130354, "grad_norm": 0.26685058559336916, "learning_rate": 2.5939979303208006e-06, "loss": 0.4811, "step": 2846 }, { "epoch": 2.6508379888268156, "grad_norm": 0.259678301849592, "learning_rate": 2.587098999655054e-06, "loss": 0.4649, "step": 2847 }, { "epoch": 2.6517690875232773, "grad_norm": 0.2647364980064598, "learning_rate": 2.580200068989307e-06, "loss": 0.483, "step": 2848 }, { "epoch": 2.6527001862197395, "grad_norm": 0.2810021335639368, "learning_rate": 2.57330113832356e-06, "loss": 0.4931, "step": 2849 }, { "epoch": 2.653631284916201, "grad_norm": 0.26311150078134377, "learning_rate": 2.566402207657813e-06, "loss": 0.4483, "step": 2850 }, { "epoch": 2.654562383612663, "grad_norm": 0.2863438427839895, "learning_rate": 2.5595032769920664e-06, "loss": 0.4895, "step": 2851 }, { "epoch": 2.655493482309125, "grad_norm": 0.25821288427947636, "learning_rate": 2.5526043463263197e-06, "loss": 0.4637, "step": 2852 }, { "epoch": 2.6564245810055866, "grad_norm": 0.26401596729962395, "learning_rate": 2.5457054156605725e-06, "loss": 0.4672, "step": 2853 }, { "epoch": 2.6573556797020483, "grad_norm": 0.25194418879902386, "learning_rate": 2.5388064849948258e-06, "loss": 0.4367, "step": 2854 }, { "epoch": 2.6582867783985105, "grad_norm": 0.27027973921706544, "learning_rate": 2.5319075543290794e-06, "loss": 0.4742, "step": 2855 }, { "epoch": 2.659217877094972, "grad_norm": 0.27356899632015524, "learning_rate": 2.5250086236633327e-06, "loss": 0.4845, "step": 2856 }, { "epoch": 2.660148975791434, "grad_norm": 0.25662426446513786, "learning_rate": 2.5181096929975855e-06, "loss": 0.4611, "step": 2857 }, { "epoch": 2.661080074487896, "grad_norm": 0.26298134540433626, "learning_rate": 2.5112107623318388e-06, "loss": 0.4881, "step": 2858 }, { "epoch": 2.6620111731843576, "grad_norm": 0.2752051253869857, "learning_rate": 2.504311831666092e-06, "loss": 0.4706, "step": 2859 }, { "epoch": 2.6629422718808193, "grad_norm": 0.2570634812570687, "learning_rate": 2.497412901000345e-06, "loss": 0.4754, "step": 2860 }, { "epoch": 2.6638733705772815, "grad_norm": 0.2701944575181796, "learning_rate": 2.4905139703345985e-06, "loss": 0.4628, "step": 2861 }, { "epoch": 2.664804469273743, "grad_norm": 0.2700362704720154, "learning_rate": 2.483615039668852e-06, "loss": 0.4958, "step": 2862 }, { "epoch": 2.665735567970205, "grad_norm": 0.27374216133679324, "learning_rate": 2.4767161090031046e-06, "loss": 0.4418, "step": 2863 }, { "epoch": 2.6666666666666665, "grad_norm": 0.25722557374237803, "learning_rate": 2.469817178337358e-06, "loss": 0.4699, "step": 2864 }, { "epoch": 2.6675977653631286, "grad_norm": 0.2737027784135946, "learning_rate": 2.462918247671611e-06, "loss": 0.4932, "step": 2865 }, { "epoch": 2.6685288640595903, "grad_norm": 0.25705424035020635, "learning_rate": 2.4560193170058644e-06, "loss": 0.4418, "step": 2866 }, { "epoch": 2.669459962756052, "grad_norm": 0.25444209790727434, "learning_rate": 2.4491203863401176e-06, "loss": 0.4616, "step": 2867 }, { "epoch": 2.6703910614525137, "grad_norm": 0.2758636352358467, "learning_rate": 2.4422214556743705e-06, "loss": 0.4751, "step": 2868 }, { "epoch": 2.671322160148976, "grad_norm": 0.25892003617694115, "learning_rate": 2.4353225250086237e-06, "loss": 0.4543, "step": 2869 }, { "epoch": 2.6722532588454375, "grad_norm": 0.2627652837329477, "learning_rate": 2.428423594342877e-06, "loss": 0.4874, "step": 2870 }, { "epoch": 2.673184357541899, "grad_norm": 0.2592287553894815, "learning_rate": 2.4215246636771302e-06, "loss": 0.4714, "step": 2871 }, { "epoch": 2.6741154562383613, "grad_norm": 0.2535056173973547, "learning_rate": 2.4146257330113835e-06, "loss": 0.4466, "step": 2872 }, { "epoch": 2.675046554934823, "grad_norm": 0.27433068927162074, "learning_rate": 2.4077268023456367e-06, "loss": 0.4824, "step": 2873 }, { "epoch": 2.6759776536312847, "grad_norm": 0.26098939739200927, "learning_rate": 2.4008278716798896e-06, "loss": 0.4625, "step": 2874 }, { "epoch": 2.676908752327747, "grad_norm": 0.2639869495596336, "learning_rate": 2.3939289410141432e-06, "loss": 0.4835, "step": 2875 }, { "epoch": 2.6778398510242085, "grad_norm": 0.272530898818743, "learning_rate": 2.387030010348396e-06, "loss": 0.4301, "step": 2876 }, { "epoch": 2.67877094972067, "grad_norm": 0.25336547405671533, "learning_rate": 2.3801310796826493e-06, "loss": 0.4409, "step": 2877 }, { "epoch": 2.6797020484171323, "grad_norm": 0.26162853492498883, "learning_rate": 2.3732321490169026e-06, "loss": 0.4584, "step": 2878 }, { "epoch": 2.680633147113594, "grad_norm": 0.2710960169889434, "learning_rate": 2.366333218351156e-06, "loss": 0.484, "step": 2879 }, { "epoch": 2.6815642458100557, "grad_norm": 0.2864721198088171, "learning_rate": 2.359434287685409e-06, "loss": 0.4898, "step": 2880 }, { "epoch": 2.682495344506518, "grad_norm": 0.2688655781074465, "learning_rate": 2.352535357019662e-06, "loss": 0.4794, "step": 2881 }, { "epoch": 2.6834264432029795, "grad_norm": 0.2586085452246171, "learning_rate": 2.345636426353915e-06, "loss": 0.4639, "step": 2882 }, { "epoch": 2.684357541899441, "grad_norm": 0.26515858782083945, "learning_rate": 2.338737495688169e-06, "loss": 0.4852, "step": 2883 }, { "epoch": 2.6852886405959033, "grad_norm": 0.27209102467131063, "learning_rate": 2.3318385650224217e-06, "loss": 0.4848, "step": 2884 }, { "epoch": 2.686219739292365, "grad_norm": 0.2710896884739461, "learning_rate": 2.324939634356675e-06, "loss": 0.4916, "step": 2885 }, { "epoch": 2.6871508379888267, "grad_norm": 0.2585528762919927, "learning_rate": 2.318040703690928e-06, "loss": 0.4641, "step": 2886 }, { "epoch": 2.688081936685289, "grad_norm": 0.2562202167499294, "learning_rate": 2.311141773025181e-06, "loss": 0.4619, "step": 2887 }, { "epoch": 2.6890130353817505, "grad_norm": 0.2613373039907417, "learning_rate": 2.3042428423594347e-06, "loss": 0.4602, "step": 2888 }, { "epoch": 2.689944134078212, "grad_norm": 0.26168251128870107, "learning_rate": 2.2973439116936875e-06, "loss": 0.4834, "step": 2889 }, { "epoch": 2.6908752327746743, "grad_norm": 0.260895816625626, "learning_rate": 2.290444981027941e-06, "loss": 0.4476, "step": 2890 }, { "epoch": 2.691806331471136, "grad_norm": 0.2559393728039647, "learning_rate": 2.283546050362194e-06, "loss": 0.457, "step": 2891 }, { "epoch": 2.6927374301675977, "grad_norm": 0.26823820744231563, "learning_rate": 2.2766471196964473e-06, "loss": 0.4684, "step": 2892 }, { "epoch": 2.69366852886406, "grad_norm": 0.25633990545694674, "learning_rate": 2.2697481890307006e-06, "loss": 0.4697, "step": 2893 }, { "epoch": 2.6945996275605215, "grad_norm": 0.25690463964807064, "learning_rate": 2.262849258364954e-06, "loss": 0.4774, "step": 2894 }, { "epoch": 2.695530726256983, "grad_norm": 0.28875092403196617, "learning_rate": 2.2559503276992066e-06, "loss": 0.4897, "step": 2895 }, { "epoch": 2.6964618249534453, "grad_norm": 0.2658887614861057, "learning_rate": 2.24905139703346e-06, "loss": 0.4778, "step": 2896 }, { "epoch": 2.697392923649907, "grad_norm": 0.2661754652067048, "learning_rate": 2.242152466367713e-06, "loss": 0.4878, "step": 2897 }, { "epoch": 2.6983240223463687, "grad_norm": 0.25204224515003154, "learning_rate": 2.2352535357019664e-06, "loss": 0.457, "step": 2898 }, { "epoch": 2.6992551210428304, "grad_norm": 0.2626139902949437, "learning_rate": 2.2283546050362197e-06, "loss": 0.4652, "step": 2899 }, { "epoch": 2.7001862197392925, "grad_norm": 0.24722487075403185, "learning_rate": 2.2214556743704725e-06, "loss": 0.4477, "step": 2900 }, { "epoch": 2.701117318435754, "grad_norm": 0.2524814363987921, "learning_rate": 2.214556743704726e-06, "loss": 0.4481, "step": 2901 }, { "epoch": 2.702048417132216, "grad_norm": 0.2577468691767339, "learning_rate": 2.207657813038979e-06, "loss": 0.4648, "step": 2902 }, { "epoch": 2.7029795158286776, "grad_norm": 0.2675820144407637, "learning_rate": 2.2007588823732322e-06, "loss": 0.4729, "step": 2903 }, { "epoch": 2.7039106145251397, "grad_norm": 0.2516264377004244, "learning_rate": 2.1938599517074855e-06, "loss": 0.4618, "step": 2904 }, { "epoch": 2.7048417132216014, "grad_norm": 0.26015807692474174, "learning_rate": 2.1869610210417388e-06, "loss": 0.4577, "step": 2905 }, { "epoch": 2.705772811918063, "grad_norm": 0.2622705020730135, "learning_rate": 2.180062090375992e-06, "loss": 0.4785, "step": 2906 }, { "epoch": 2.706703910614525, "grad_norm": 0.26079290931704674, "learning_rate": 2.1731631597102453e-06, "loss": 0.4823, "step": 2907 }, { "epoch": 2.707635009310987, "grad_norm": 0.2570137903322602, "learning_rate": 2.166264229044498e-06, "loss": 0.4601, "step": 2908 }, { "epoch": 2.7085661080074486, "grad_norm": 0.2589097069379333, "learning_rate": 2.1593652983787513e-06, "loss": 0.4479, "step": 2909 }, { "epoch": 2.7094972067039107, "grad_norm": 0.26597236133230745, "learning_rate": 2.1524663677130046e-06, "loss": 0.4642, "step": 2910 }, { "epoch": 2.7104283054003724, "grad_norm": 0.2505620961079026, "learning_rate": 2.145567437047258e-06, "loss": 0.4391, "step": 2911 }, { "epoch": 2.711359404096834, "grad_norm": 0.26249925720026723, "learning_rate": 2.138668506381511e-06, "loss": 0.4661, "step": 2912 }, { "epoch": 2.712290502793296, "grad_norm": 0.2787397461901782, "learning_rate": 2.131769575715764e-06, "loss": 0.5043, "step": 2913 }, { "epoch": 2.713221601489758, "grad_norm": 0.25942001805338816, "learning_rate": 2.1248706450500176e-06, "loss": 0.4651, "step": 2914 }, { "epoch": 2.7141527001862196, "grad_norm": 0.26522023254446886, "learning_rate": 2.117971714384271e-06, "loss": 0.4981, "step": 2915 }, { "epoch": 2.7150837988826817, "grad_norm": 0.2547655237531487, "learning_rate": 2.1110727837185237e-06, "loss": 0.4495, "step": 2916 }, { "epoch": 2.7160148975791434, "grad_norm": 0.26578115476748715, "learning_rate": 2.104173853052777e-06, "loss": 0.4681, "step": 2917 }, { "epoch": 2.716945996275605, "grad_norm": 0.2757641343436877, "learning_rate": 2.09727492238703e-06, "loss": 0.4592, "step": 2918 }, { "epoch": 2.717877094972067, "grad_norm": 0.2577534214426986, "learning_rate": 2.0903759917212835e-06, "loss": 0.4818, "step": 2919 }, { "epoch": 2.718808193668529, "grad_norm": 0.262218767457734, "learning_rate": 2.0834770610555367e-06, "loss": 0.4729, "step": 2920 }, { "epoch": 2.7197392923649906, "grad_norm": 0.2640133496239381, "learning_rate": 2.0765781303897896e-06, "loss": 0.4865, "step": 2921 }, { "epoch": 2.7206703910614527, "grad_norm": 0.2669952750925285, "learning_rate": 2.069679199724043e-06, "loss": 0.494, "step": 2922 }, { "epoch": 2.7216014897579144, "grad_norm": 0.26136346699812607, "learning_rate": 2.062780269058296e-06, "loss": 0.4707, "step": 2923 }, { "epoch": 2.722532588454376, "grad_norm": 0.26692315014622797, "learning_rate": 2.0558813383925493e-06, "loss": 0.4853, "step": 2924 }, { "epoch": 2.723463687150838, "grad_norm": 0.2639353656255586, "learning_rate": 2.0489824077268026e-06, "loss": 0.4638, "step": 2925 }, { "epoch": 2.7243947858473, "grad_norm": 0.2785975172211725, "learning_rate": 2.042083477061056e-06, "loss": 0.5171, "step": 2926 }, { "epoch": 2.7253258845437616, "grad_norm": 0.26365526870882183, "learning_rate": 2.0351845463953087e-06, "loss": 0.4639, "step": 2927 }, { "epoch": 2.7262569832402237, "grad_norm": 0.2485011868210596, "learning_rate": 2.0282856157295623e-06, "loss": 0.4712, "step": 2928 }, { "epoch": 2.7271880819366854, "grad_norm": 0.27207539100429057, "learning_rate": 2.021386685063815e-06, "loss": 0.4777, "step": 2929 }, { "epoch": 2.728119180633147, "grad_norm": 0.24446784511198646, "learning_rate": 2.0144877543980684e-06, "loss": 0.4593, "step": 2930 }, { "epoch": 2.729050279329609, "grad_norm": 0.2576320379061204, "learning_rate": 2.0075888237323217e-06, "loss": 0.4746, "step": 2931 }, { "epoch": 2.729981378026071, "grad_norm": 0.26202801735792247, "learning_rate": 2.000689893066575e-06, "loss": 0.4779, "step": 2932 }, { "epoch": 2.7309124767225326, "grad_norm": 0.2550202290928559, "learning_rate": 1.993790962400828e-06, "loss": 0.4511, "step": 2933 }, { "epoch": 2.7318435754189943, "grad_norm": 0.25652758339908527, "learning_rate": 1.986892031735081e-06, "loss": 0.4523, "step": 2934 }, { "epoch": 2.7327746741154564, "grad_norm": 0.2606354154624924, "learning_rate": 1.9799931010693343e-06, "loss": 0.4689, "step": 2935 }, { "epoch": 2.733705772811918, "grad_norm": 0.2622911085619965, "learning_rate": 1.9730941704035875e-06, "loss": 0.4789, "step": 2936 }, { "epoch": 2.7346368715083798, "grad_norm": 0.2626845062180198, "learning_rate": 1.9661952397378408e-06, "loss": 0.4465, "step": 2937 }, { "epoch": 2.7355679702048414, "grad_norm": 0.255353621411798, "learning_rate": 1.959296309072094e-06, "loss": 0.4719, "step": 2938 }, { "epoch": 2.7364990689013036, "grad_norm": 0.2510782328857392, "learning_rate": 1.9523973784063473e-06, "loss": 0.4301, "step": 2939 }, { "epoch": 2.7374301675977653, "grad_norm": 0.26945896024024907, "learning_rate": 1.9454984477406e-06, "loss": 0.4989, "step": 2940 }, { "epoch": 2.738361266294227, "grad_norm": 0.26688305919252614, "learning_rate": 1.9385995170748538e-06, "loss": 0.4835, "step": 2941 }, { "epoch": 2.739292364990689, "grad_norm": 0.2780436395491767, "learning_rate": 1.9317005864091066e-06, "loss": 0.4641, "step": 2942 }, { "epoch": 2.7402234636871508, "grad_norm": 0.2653027533920554, "learning_rate": 1.92480165574336e-06, "loss": 0.4704, "step": 2943 }, { "epoch": 2.7411545623836124, "grad_norm": 0.2629758034710566, "learning_rate": 1.917902725077613e-06, "loss": 0.4865, "step": 2944 }, { "epoch": 2.7420856610800746, "grad_norm": 0.2641335713484004, "learning_rate": 1.9110037944118664e-06, "loss": 0.475, "step": 2945 }, { "epoch": 2.7430167597765363, "grad_norm": 0.2528596408677863, "learning_rate": 1.9041048637461196e-06, "loss": 0.4639, "step": 2946 }, { "epoch": 2.743947858472998, "grad_norm": 0.25714385664792677, "learning_rate": 1.8972059330803727e-06, "loss": 0.4539, "step": 2947 }, { "epoch": 2.74487895716946, "grad_norm": 0.25962885083821474, "learning_rate": 1.890307002414626e-06, "loss": 0.4656, "step": 2948 }, { "epoch": 2.7458100558659218, "grad_norm": 0.26480801969615364, "learning_rate": 1.883408071748879e-06, "loss": 0.4677, "step": 2949 }, { "epoch": 2.7467411545623834, "grad_norm": 0.26480909109414064, "learning_rate": 1.8765091410831324e-06, "loss": 0.4871, "step": 2950 }, { "epoch": 2.7476722532588456, "grad_norm": 0.253019689721648, "learning_rate": 1.8696102104173855e-06, "loss": 0.4653, "step": 2951 }, { "epoch": 2.7486033519553073, "grad_norm": 0.24399116952595198, "learning_rate": 1.8627112797516385e-06, "loss": 0.4605, "step": 2952 }, { "epoch": 2.749534450651769, "grad_norm": 0.2563975192676726, "learning_rate": 1.8558123490858918e-06, "loss": 0.4575, "step": 2953 }, { "epoch": 2.750465549348231, "grad_norm": 0.2626875521576296, "learning_rate": 1.8489134184201452e-06, "loss": 0.4379, "step": 2954 }, { "epoch": 2.7513966480446927, "grad_norm": 0.2694219138076785, "learning_rate": 1.8420144877543983e-06, "loss": 0.4951, "step": 2955 }, { "epoch": 2.7523277467411544, "grad_norm": 0.24666540270172266, "learning_rate": 1.8351155570886513e-06, "loss": 0.4566, "step": 2956 }, { "epoch": 2.7532588454376166, "grad_norm": 0.25264389772343887, "learning_rate": 1.8282166264229046e-06, "loss": 0.4539, "step": 2957 }, { "epoch": 2.7541899441340782, "grad_norm": 0.2618508325027973, "learning_rate": 1.8213176957571576e-06, "loss": 0.4969, "step": 2958 }, { "epoch": 2.75512104283054, "grad_norm": 0.24745490608051968, "learning_rate": 1.814418765091411e-06, "loss": 0.4693, "step": 2959 }, { "epoch": 2.756052141527002, "grad_norm": 0.26138226364325623, "learning_rate": 1.8075198344256641e-06, "loss": 0.4736, "step": 2960 }, { "epoch": 2.7569832402234637, "grad_norm": 0.25709193191681623, "learning_rate": 1.8006209037599174e-06, "loss": 0.4693, "step": 2961 }, { "epoch": 2.7579143389199254, "grad_norm": 0.2640799681616801, "learning_rate": 1.7937219730941704e-06, "loss": 0.4735, "step": 2962 }, { "epoch": 2.7588454376163876, "grad_norm": 0.2577314602472514, "learning_rate": 1.786823042428424e-06, "loss": 0.4581, "step": 2963 }, { "epoch": 2.7597765363128492, "grad_norm": 0.26999289326994225, "learning_rate": 1.779924111762677e-06, "loss": 0.4822, "step": 2964 }, { "epoch": 2.760707635009311, "grad_norm": 0.2564152026543332, "learning_rate": 1.7730251810969302e-06, "loss": 0.4537, "step": 2965 }, { "epoch": 2.761638733705773, "grad_norm": 0.2524749386730659, "learning_rate": 1.7661262504311832e-06, "loss": 0.4546, "step": 2966 }, { "epoch": 2.7625698324022347, "grad_norm": 0.26211727624935954, "learning_rate": 1.7592273197654363e-06, "loss": 0.4473, "step": 2967 }, { "epoch": 2.7635009310986964, "grad_norm": 0.25959105319819836, "learning_rate": 1.7523283890996897e-06, "loss": 0.4787, "step": 2968 }, { "epoch": 2.7644320297951586, "grad_norm": 0.2697136859422218, "learning_rate": 1.745429458433943e-06, "loss": 0.5042, "step": 2969 }, { "epoch": 2.7653631284916202, "grad_norm": 0.26303342877883984, "learning_rate": 1.738530527768196e-06, "loss": 0.4711, "step": 2970 }, { "epoch": 2.766294227188082, "grad_norm": 0.2618478386450788, "learning_rate": 1.731631597102449e-06, "loss": 0.4427, "step": 2971 }, { "epoch": 2.7672253258845436, "grad_norm": 0.2581954848067317, "learning_rate": 1.7247326664367025e-06, "loss": 0.4536, "step": 2972 }, { "epoch": 2.7681564245810057, "grad_norm": 0.253358380437741, "learning_rate": 1.7178337357709556e-06, "loss": 0.4532, "step": 2973 }, { "epoch": 2.7690875232774674, "grad_norm": 0.26279987711246716, "learning_rate": 1.7109348051052088e-06, "loss": 0.4616, "step": 2974 }, { "epoch": 2.770018621973929, "grad_norm": 0.25812154199473014, "learning_rate": 1.7040358744394619e-06, "loss": 0.494, "step": 2975 }, { "epoch": 2.770949720670391, "grad_norm": 0.25670797193464784, "learning_rate": 1.6971369437737154e-06, "loss": 0.44, "step": 2976 }, { "epoch": 2.771880819366853, "grad_norm": 0.260754360601417, "learning_rate": 1.6902380131079684e-06, "loss": 0.4695, "step": 2977 }, { "epoch": 2.7728119180633146, "grad_norm": 0.25348817013731145, "learning_rate": 1.6833390824422217e-06, "loss": 0.4429, "step": 2978 }, { "epoch": 2.7737430167597763, "grad_norm": 0.2616320558194811, "learning_rate": 1.6764401517764747e-06, "loss": 0.4811, "step": 2979 }, { "epoch": 2.7746741154562384, "grad_norm": 0.2631703010490945, "learning_rate": 1.669541221110728e-06, "loss": 0.481, "step": 2980 }, { "epoch": 2.7756052141527, "grad_norm": 0.26347611387685943, "learning_rate": 1.6626422904449812e-06, "loss": 0.4356, "step": 2981 }, { "epoch": 2.776536312849162, "grad_norm": 0.25458254915884243, "learning_rate": 1.6557433597792345e-06, "loss": 0.4602, "step": 2982 }, { "epoch": 2.777467411545624, "grad_norm": 0.26431809112563925, "learning_rate": 1.6488444291134875e-06, "loss": 0.4867, "step": 2983 }, { "epoch": 2.7783985102420856, "grad_norm": 0.2543413043375341, "learning_rate": 1.6419454984477408e-06, "loss": 0.4673, "step": 2984 }, { "epoch": 2.7793296089385473, "grad_norm": 0.27519722064402136, "learning_rate": 1.635046567781994e-06, "loss": 0.5197, "step": 2985 }, { "epoch": 2.7802607076350094, "grad_norm": 0.25368989054037644, "learning_rate": 1.6281476371162473e-06, "loss": 0.446, "step": 2986 }, { "epoch": 2.781191806331471, "grad_norm": 0.25809700707020095, "learning_rate": 1.6212487064505003e-06, "loss": 0.4568, "step": 2987 }, { "epoch": 2.782122905027933, "grad_norm": 0.262962489047267, "learning_rate": 1.6143497757847533e-06, "loss": 0.4835, "step": 2988 }, { "epoch": 2.783054003724395, "grad_norm": 0.25984697720374406, "learning_rate": 1.6074508451190066e-06, "loss": 0.4697, "step": 2989 }, { "epoch": 2.7839851024208566, "grad_norm": 0.2551093937436251, "learning_rate": 1.60055191445326e-06, "loss": 0.4717, "step": 2990 }, { "epoch": 2.7849162011173183, "grad_norm": 0.26442900572457223, "learning_rate": 1.5936529837875131e-06, "loss": 0.4673, "step": 2991 }, { "epoch": 2.7858472998137804, "grad_norm": 0.2631648324045938, "learning_rate": 1.5867540531217662e-06, "loss": 0.4558, "step": 2992 }, { "epoch": 2.786778398510242, "grad_norm": 0.2586957564524125, "learning_rate": 1.5798551224560194e-06, "loss": 0.452, "step": 2993 }, { "epoch": 2.787709497206704, "grad_norm": 0.24967298925720838, "learning_rate": 1.5729561917902727e-06, "loss": 0.4815, "step": 2994 }, { "epoch": 2.788640595903166, "grad_norm": 0.2660970677378918, "learning_rate": 1.566057261124526e-06, "loss": 0.462, "step": 2995 }, { "epoch": 2.7895716945996276, "grad_norm": 0.26087118168219137, "learning_rate": 1.559158330458779e-06, "loss": 0.479, "step": 2996 }, { "epoch": 2.7905027932960893, "grad_norm": 0.25901008672045334, "learning_rate": 1.5522593997930322e-06, "loss": 0.4756, "step": 2997 }, { "epoch": 2.7914338919925514, "grad_norm": 0.25985975384769244, "learning_rate": 1.5453604691272853e-06, "loss": 0.456, "step": 2998 }, { "epoch": 2.792364990689013, "grad_norm": 0.2648770013020573, "learning_rate": 1.5384615384615387e-06, "loss": 0.4944, "step": 2999 }, { "epoch": 2.793296089385475, "grad_norm": 0.25375853922442393, "learning_rate": 1.5315626077957918e-06, "loss": 0.4738, "step": 3000 }, { "epoch": 2.794227188081937, "grad_norm": 0.2670996248505636, "learning_rate": 1.524663677130045e-06, "loss": 0.4522, "step": 3001 }, { "epoch": 2.7951582867783986, "grad_norm": 0.3108227071888683, "learning_rate": 1.517764746464298e-06, "loss": 0.4708, "step": 3002 }, { "epoch": 2.7960893854748603, "grad_norm": 0.25423718739276, "learning_rate": 1.5108658157985515e-06, "loss": 0.4715, "step": 3003 }, { "epoch": 2.7970204841713224, "grad_norm": 0.256150064779407, "learning_rate": 1.5039668851328046e-06, "loss": 0.4556, "step": 3004 }, { "epoch": 2.797951582867784, "grad_norm": 0.2524594273261065, "learning_rate": 1.4970679544670578e-06, "loss": 0.4591, "step": 3005 }, { "epoch": 2.798882681564246, "grad_norm": 0.25730135000550597, "learning_rate": 1.4901690238013109e-06, "loss": 0.4867, "step": 3006 }, { "epoch": 2.7998137802607075, "grad_norm": 0.2536018104925059, "learning_rate": 1.483270093135564e-06, "loss": 0.4811, "step": 3007 }, { "epoch": 2.8007448789571696, "grad_norm": 0.2611263772160424, "learning_rate": 1.4763711624698174e-06, "loss": 0.485, "step": 3008 }, { "epoch": 2.8016759776536313, "grad_norm": 0.2585801381953188, "learning_rate": 1.4694722318040704e-06, "loss": 0.4559, "step": 3009 }, { "epoch": 2.802607076350093, "grad_norm": 0.26370886388311265, "learning_rate": 1.4625733011383237e-06, "loss": 0.4802, "step": 3010 }, { "epoch": 2.8035381750465547, "grad_norm": 0.2548748347184935, "learning_rate": 1.4556743704725767e-06, "loss": 0.4686, "step": 3011 }, { "epoch": 2.804469273743017, "grad_norm": 0.24332189681129487, "learning_rate": 1.4487754398068302e-06, "loss": 0.4441, "step": 3012 }, { "epoch": 2.8054003724394785, "grad_norm": 0.25986065012438914, "learning_rate": 1.4418765091410832e-06, "loss": 0.4596, "step": 3013 }, { "epoch": 2.80633147113594, "grad_norm": 0.2565099487637232, "learning_rate": 1.4349775784753365e-06, "loss": 0.4463, "step": 3014 }, { "epoch": 2.8072625698324023, "grad_norm": 0.26681174839233773, "learning_rate": 1.4280786478095895e-06, "loss": 0.4548, "step": 3015 }, { "epoch": 2.808193668528864, "grad_norm": 0.2528099128315508, "learning_rate": 1.421179717143843e-06, "loss": 0.4702, "step": 3016 }, { "epoch": 2.8091247672253257, "grad_norm": 0.2614400730251064, "learning_rate": 1.414280786478096e-06, "loss": 0.4704, "step": 3017 }, { "epoch": 2.810055865921788, "grad_norm": 0.2503380774111154, "learning_rate": 1.4073818558123493e-06, "loss": 0.4714, "step": 3018 }, { "epoch": 2.8109869646182495, "grad_norm": 0.2611334409926771, "learning_rate": 1.4004829251466023e-06, "loss": 0.4739, "step": 3019 }, { "epoch": 2.811918063314711, "grad_norm": 0.26019587048752457, "learning_rate": 1.3935839944808554e-06, "loss": 0.4742, "step": 3020 }, { "epoch": 2.8128491620111733, "grad_norm": 0.25174120071582406, "learning_rate": 1.3866850638151088e-06, "loss": 0.4605, "step": 3021 }, { "epoch": 2.813780260707635, "grad_norm": 0.2612444810937935, "learning_rate": 1.379786133149362e-06, "loss": 0.4847, "step": 3022 }, { "epoch": 2.8147113594040967, "grad_norm": 0.2521154339829096, "learning_rate": 1.3728872024836151e-06, "loss": 0.4783, "step": 3023 }, { "epoch": 2.815642458100559, "grad_norm": 0.25273916395940965, "learning_rate": 1.3659882718178682e-06, "loss": 0.4754, "step": 3024 }, { "epoch": 2.8165735567970205, "grad_norm": 0.2574105097302855, "learning_rate": 1.3590893411521216e-06, "loss": 0.4435, "step": 3025 }, { "epoch": 2.817504655493482, "grad_norm": 0.2729163138725232, "learning_rate": 1.3521904104863749e-06, "loss": 0.5079, "step": 3026 }, { "epoch": 2.8184357541899443, "grad_norm": 0.2567760379525526, "learning_rate": 1.345291479820628e-06, "loss": 0.4728, "step": 3027 }, { "epoch": 2.819366852886406, "grad_norm": 0.2600705043756998, "learning_rate": 1.338392549154881e-06, "loss": 0.4748, "step": 3028 }, { "epoch": 2.8202979515828677, "grad_norm": 0.25655979452671585, "learning_rate": 1.3314936184891342e-06, "loss": 0.4182, "step": 3029 }, { "epoch": 2.82122905027933, "grad_norm": 0.2543862406642577, "learning_rate": 1.3245946878233875e-06, "loss": 0.4725, "step": 3030 }, { "epoch": 2.8221601489757915, "grad_norm": 0.2800973613023852, "learning_rate": 1.3176957571576407e-06, "loss": 0.5201, "step": 3031 }, { "epoch": 2.823091247672253, "grad_norm": 0.26176093591166316, "learning_rate": 1.3107968264918938e-06, "loss": 0.4716, "step": 3032 }, { "epoch": 2.8240223463687153, "grad_norm": 0.24678711102086742, "learning_rate": 1.303897895826147e-06, "loss": 0.4319, "step": 3033 }, { "epoch": 2.824953445065177, "grad_norm": 0.25439484129147627, "learning_rate": 1.2969989651604003e-06, "loss": 0.4721, "step": 3034 }, { "epoch": 2.8258845437616387, "grad_norm": 0.24728648292521818, "learning_rate": 1.2901000344946535e-06, "loss": 0.4616, "step": 3035 }, { "epoch": 2.826815642458101, "grad_norm": 0.25439639779647977, "learning_rate": 1.2832011038289066e-06, "loss": 0.4809, "step": 3036 }, { "epoch": 2.8277467411545625, "grad_norm": 0.2554138340931802, "learning_rate": 1.2763021731631598e-06, "loss": 0.4746, "step": 3037 }, { "epoch": 2.828677839851024, "grad_norm": 0.2533571689197992, "learning_rate": 1.2694032424974129e-06, "loss": 0.4769, "step": 3038 }, { "epoch": 2.8296089385474863, "grad_norm": 0.25850965081218497, "learning_rate": 1.2625043118316663e-06, "loss": 0.4915, "step": 3039 }, { "epoch": 2.830540037243948, "grad_norm": 0.26568020007148185, "learning_rate": 1.2556053811659194e-06, "loss": 0.483, "step": 3040 }, { "epoch": 2.8314711359404097, "grad_norm": 0.254955942081663, "learning_rate": 1.2487064505001724e-06, "loss": 0.4817, "step": 3041 }, { "epoch": 2.8324022346368714, "grad_norm": 0.26133843895270314, "learning_rate": 1.241807519834426e-06, "loss": 0.4641, "step": 3042 }, { "epoch": 2.8333333333333335, "grad_norm": 0.26494307031595343, "learning_rate": 1.234908589168679e-06, "loss": 0.4784, "step": 3043 }, { "epoch": 2.834264432029795, "grad_norm": 0.26528919701146836, "learning_rate": 1.2280096585029322e-06, "loss": 0.4748, "step": 3044 }, { "epoch": 2.835195530726257, "grad_norm": 0.25205934724672635, "learning_rate": 1.2211107278371852e-06, "loss": 0.4391, "step": 3045 }, { "epoch": 2.8361266294227185, "grad_norm": 0.25517623776355725, "learning_rate": 1.2142117971714385e-06, "loss": 0.4573, "step": 3046 }, { "epoch": 2.8370577281191807, "grad_norm": 0.2527724644398233, "learning_rate": 1.2073128665056917e-06, "loss": 0.4559, "step": 3047 }, { "epoch": 2.8379888268156424, "grad_norm": 0.267104793995684, "learning_rate": 1.2004139358399448e-06, "loss": 0.4899, "step": 3048 }, { "epoch": 2.838919925512104, "grad_norm": 0.258752879050881, "learning_rate": 1.193515005174198e-06, "loss": 0.4774, "step": 3049 }, { "epoch": 2.839851024208566, "grad_norm": 0.27355268105407815, "learning_rate": 1.1866160745084513e-06, "loss": 0.4846, "step": 3050 }, { "epoch": 2.840782122905028, "grad_norm": 0.24833655345617103, "learning_rate": 1.1797171438427045e-06, "loss": 0.4756, "step": 3051 }, { "epoch": 2.8417132216014895, "grad_norm": 0.2523289972489985, "learning_rate": 1.1728182131769576e-06, "loss": 0.4687, "step": 3052 }, { "epoch": 2.8426443202979517, "grad_norm": 0.26052169209487536, "learning_rate": 1.1659192825112108e-06, "loss": 0.4664, "step": 3053 }, { "epoch": 2.8435754189944134, "grad_norm": 0.2565862340992464, "learning_rate": 1.159020351845464e-06, "loss": 0.4767, "step": 3054 }, { "epoch": 2.844506517690875, "grad_norm": 0.2640092538704146, "learning_rate": 1.1521214211797174e-06, "loss": 0.4768, "step": 3055 }, { "epoch": 2.845437616387337, "grad_norm": 0.2590734344447366, "learning_rate": 1.1452224905139704e-06, "loss": 0.4693, "step": 3056 }, { "epoch": 2.846368715083799, "grad_norm": 0.2555410927508353, "learning_rate": 1.1383235598482236e-06, "loss": 0.4747, "step": 3057 }, { "epoch": 2.8472998137802605, "grad_norm": 0.2518908263642949, "learning_rate": 1.131424629182477e-06, "loss": 0.4932, "step": 3058 }, { "epoch": 2.8482309124767227, "grad_norm": 0.26126706108259207, "learning_rate": 1.12452569851673e-06, "loss": 0.4789, "step": 3059 }, { "epoch": 2.8491620111731844, "grad_norm": 0.2548535689031154, "learning_rate": 1.1176267678509832e-06, "loss": 0.4642, "step": 3060 }, { "epoch": 2.850093109869646, "grad_norm": 0.2605582250626369, "learning_rate": 1.1107278371852362e-06, "loss": 0.4802, "step": 3061 }, { "epoch": 2.851024208566108, "grad_norm": 0.24926840254206212, "learning_rate": 1.1038289065194895e-06, "loss": 0.4728, "step": 3062 }, { "epoch": 2.85195530726257, "grad_norm": 0.2420780844457063, "learning_rate": 1.0969299758537427e-06, "loss": 0.4351, "step": 3063 }, { "epoch": 2.8528864059590315, "grad_norm": 0.27171120285261563, "learning_rate": 1.090031045187996e-06, "loss": 0.4908, "step": 3064 }, { "epoch": 2.8538175046554937, "grad_norm": 0.26128159194149164, "learning_rate": 1.083132114522249e-06, "loss": 0.4678, "step": 3065 }, { "epoch": 2.8547486033519553, "grad_norm": 0.24801337638416504, "learning_rate": 1.0762331838565023e-06, "loss": 0.4526, "step": 3066 }, { "epoch": 2.855679702048417, "grad_norm": 0.26006738378949085, "learning_rate": 1.0693342531907556e-06, "loss": 0.4794, "step": 3067 }, { "epoch": 2.856610800744879, "grad_norm": 0.24850256510963398, "learning_rate": 1.0624353225250088e-06, "loss": 0.4556, "step": 3068 }, { "epoch": 2.857541899441341, "grad_norm": 0.253822771654379, "learning_rate": 1.0555363918592619e-06, "loss": 0.4655, "step": 3069 }, { "epoch": 2.8584729981378025, "grad_norm": 0.2442168217799985, "learning_rate": 1.048637461193515e-06, "loss": 0.4429, "step": 3070 }, { "epoch": 2.8594040968342647, "grad_norm": 0.2542670267143624, "learning_rate": 1.0417385305277684e-06, "loss": 0.4391, "step": 3071 }, { "epoch": 2.8603351955307263, "grad_norm": 0.25844050535043434, "learning_rate": 1.0348395998620214e-06, "loss": 0.5006, "step": 3072 }, { "epoch": 2.861266294227188, "grad_norm": 0.2537248245708744, "learning_rate": 1.0279406691962747e-06, "loss": 0.4606, "step": 3073 }, { "epoch": 2.86219739292365, "grad_norm": 0.24904539590977445, "learning_rate": 1.021041738530528e-06, "loss": 0.459, "step": 3074 }, { "epoch": 2.863128491620112, "grad_norm": 0.25573874916237715, "learning_rate": 1.0141428078647812e-06, "loss": 0.4944, "step": 3075 }, { "epoch": 2.8640595903165735, "grad_norm": 0.2635456367179725, "learning_rate": 1.0072438771990342e-06, "loss": 0.4817, "step": 3076 }, { "epoch": 2.864990689013035, "grad_norm": 0.24474612868533577, "learning_rate": 1.0003449465332875e-06, "loss": 0.4514, "step": 3077 }, { "epoch": 2.8659217877094973, "grad_norm": 0.2477135341409162, "learning_rate": 9.934460158675405e-07, "loss": 0.4592, "step": 3078 }, { "epoch": 2.866852886405959, "grad_norm": 0.23828403104436688, "learning_rate": 9.865470852017938e-07, "loss": 0.4409, "step": 3079 }, { "epoch": 2.8677839851024207, "grad_norm": 0.25277313370997023, "learning_rate": 9.79648154536047e-07, "loss": 0.4614, "step": 3080 }, { "epoch": 2.868715083798883, "grad_norm": 0.24512822418184907, "learning_rate": 9.727492238703e-07, "loss": 0.4611, "step": 3081 }, { "epoch": 2.8696461824953445, "grad_norm": 0.2670351332314113, "learning_rate": 9.658502932045533e-07, "loss": 0.4881, "step": 3082 }, { "epoch": 2.870577281191806, "grad_norm": 0.2531723674701695, "learning_rate": 9.589513625388066e-07, "loss": 0.4705, "step": 3083 }, { "epoch": 2.871508379888268, "grad_norm": 0.2572123678838207, "learning_rate": 9.520524318730598e-07, "loss": 0.4777, "step": 3084 }, { "epoch": 2.87243947858473, "grad_norm": 0.24869634618391767, "learning_rate": 9.45153501207313e-07, "loss": 0.4594, "step": 3085 }, { "epoch": 2.8733705772811917, "grad_norm": 0.25412429172982376, "learning_rate": 9.382545705415662e-07, "loss": 0.4907, "step": 3086 }, { "epoch": 2.8743016759776534, "grad_norm": 0.26104800479364443, "learning_rate": 9.313556398758193e-07, "loss": 0.4814, "step": 3087 }, { "epoch": 2.8752327746741155, "grad_norm": 0.2641530954272684, "learning_rate": 9.244567092100726e-07, "loss": 0.4629, "step": 3088 }, { "epoch": 2.876163873370577, "grad_norm": 0.2556975600184679, "learning_rate": 9.175577785443257e-07, "loss": 0.4767, "step": 3089 }, { "epoch": 2.877094972067039, "grad_norm": 0.2504038227113333, "learning_rate": 9.106588478785788e-07, "loss": 0.4735, "step": 3090 }, { "epoch": 2.878026070763501, "grad_norm": 0.2582874009710989, "learning_rate": 9.037599172128321e-07, "loss": 0.4829, "step": 3091 }, { "epoch": 2.8789571694599627, "grad_norm": 0.2532466651740779, "learning_rate": 8.968609865470852e-07, "loss": 0.4583, "step": 3092 }, { "epoch": 2.8798882681564244, "grad_norm": 0.25624533866415194, "learning_rate": 8.899620558813385e-07, "loss": 0.4634, "step": 3093 }, { "epoch": 2.8808193668528865, "grad_norm": 0.250554887509465, "learning_rate": 8.830631252155916e-07, "loss": 0.4494, "step": 3094 }, { "epoch": 2.881750465549348, "grad_norm": 0.25586322498858854, "learning_rate": 8.761641945498449e-07, "loss": 0.4501, "step": 3095 }, { "epoch": 2.88268156424581, "grad_norm": 0.2541919006756237, "learning_rate": 8.69265263884098e-07, "loss": 0.4827, "step": 3096 }, { "epoch": 2.883612662942272, "grad_norm": 0.261300619752337, "learning_rate": 8.623663332183513e-07, "loss": 0.4849, "step": 3097 }, { "epoch": 2.8845437616387337, "grad_norm": 0.2527131713041992, "learning_rate": 8.554674025526044e-07, "loss": 0.4512, "step": 3098 }, { "epoch": 2.8854748603351954, "grad_norm": 0.24943402602316478, "learning_rate": 8.485684718868577e-07, "loss": 0.454, "step": 3099 }, { "epoch": 2.8864059590316575, "grad_norm": 0.25329388478508874, "learning_rate": 8.416695412211108e-07, "loss": 0.4704, "step": 3100 }, { "epoch": 2.887337057728119, "grad_norm": 0.2577277284046532, "learning_rate": 8.34770610555364e-07, "loss": 0.4838, "step": 3101 }, { "epoch": 2.888268156424581, "grad_norm": 0.25156796265939235, "learning_rate": 8.278716798896172e-07, "loss": 0.4632, "step": 3102 }, { "epoch": 2.889199255121043, "grad_norm": 0.25004166547434004, "learning_rate": 8.209727492238704e-07, "loss": 0.4675, "step": 3103 }, { "epoch": 2.8901303538175047, "grad_norm": 0.25799439395340845, "learning_rate": 8.140738185581236e-07, "loss": 0.4709, "step": 3104 }, { "epoch": 2.8910614525139664, "grad_norm": 0.2659374537868653, "learning_rate": 8.071748878923767e-07, "loss": 0.4946, "step": 3105 }, { "epoch": 2.8919925512104285, "grad_norm": 0.24286322427254448, "learning_rate": 8.0027595722663e-07, "loss": 0.456, "step": 3106 }, { "epoch": 2.89292364990689, "grad_norm": 0.2568002464162302, "learning_rate": 7.933770265608831e-07, "loss": 0.4785, "step": 3107 }, { "epoch": 2.893854748603352, "grad_norm": 0.24727421866776864, "learning_rate": 7.864780958951363e-07, "loss": 0.4319, "step": 3108 }, { "epoch": 2.894785847299814, "grad_norm": 0.2693979708755487, "learning_rate": 7.795791652293895e-07, "loss": 0.4841, "step": 3109 }, { "epoch": 2.8957169459962757, "grad_norm": 0.2512905587421915, "learning_rate": 7.726802345636426e-07, "loss": 0.4683, "step": 3110 }, { "epoch": 2.8966480446927374, "grad_norm": 0.25214538647097495, "learning_rate": 7.657813038978959e-07, "loss": 0.4457, "step": 3111 }, { "epoch": 2.8975791433891995, "grad_norm": 0.2494100056338064, "learning_rate": 7.58882373232149e-07, "loss": 0.4433, "step": 3112 }, { "epoch": 2.898510242085661, "grad_norm": 0.25051687616338886, "learning_rate": 7.519834425664023e-07, "loss": 0.4634, "step": 3113 }, { "epoch": 2.899441340782123, "grad_norm": 0.25343293828885893, "learning_rate": 7.450845119006554e-07, "loss": 0.4658, "step": 3114 }, { "epoch": 2.9003724394785846, "grad_norm": 0.2482570212363127, "learning_rate": 7.381855812349087e-07, "loss": 0.4718, "step": 3115 }, { "epoch": 2.9013035381750467, "grad_norm": 0.24633310435876873, "learning_rate": 7.312866505691618e-07, "loss": 0.4488, "step": 3116 }, { "epoch": 2.9022346368715084, "grad_norm": 0.24562168783769742, "learning_rate": 7.243877199034151e-07, "loss": 0.4764, "step": 3117 }, { "epoch": 2.90316573556797, "grad_norm": 0.24485776924874852, "learning_rate": 7.174887892376682e-07, "loss": 0.4731, "step": 3118 }, { "epoch": 2.9040968342644318, "grad_norm": 0.24841039083376304, "learning_rate": 7.105898585719215e-07, "loss": 0.4579, "step": 3119 }, { "epoch": 2.905027932960894, "grad_norm": 0.25378388017051523, "learning_rate": 7.036909279061746e-07, "loss": 0.4753, "step": 3120 }, { "epoch": 2.9059590316573556, "grad_norm": 0.26683458873739463, "learning_rate": 6.967919972404277e-07, "loss": 0.4779, "step": 3121 }, { "epoch": 2.9068901303538173, "grad_norm": 0.2515854985595953, "learning_rate": 6.89893066574681e-07, "loss": 0.4806, "step": 3122 }, { "epoch": 2.9078212290502794, "grad_norm": 0.25691436839904974, "learning_rate": 6.829941359089341e-07, "loss": 0.4868, "step": 3123 }, { "epoch": 2.908752327746741, "grad_norm": 0.24914037407612755, "learning_rate": 6.760952052431874e-07, "loss": 0.4511, "step": 3124 }, { "epoch": 2.9096834264432028, "grad_norm": 0.24906495603521744, "learning_rate": 6.691962745774405e-07, "loss": 0.4755, "step": 3125 }, { "epoch": 2.910614525139665, "grad_norm": 0.2591428027818596, "learning_rate": 6.622973439116937e-07, "loss": 0.4553, "step": 3126 }, { "epoch": 2.9115456238361266, "grad_norm": 0.25002769128530095, "learning_rate": 6.553984132459469e-07, "loss": 0.4611, "step": 3127 }, { "epoch": 2.9124767225325883, "grad_norm": 0.24895293702400093, "learning_rate": 6.484994825802001e-07, "loss": 0.4518, "step": 3128 }, { "epoch": 2.9134078212290504, "grad_norm": 0.24397250520821256, "learning_rate": 6.416005519144533e-07, "loss": 0.4636, "step": 3129 }, { "epoch": 2.914338919925512, "grad_norm": 0.24007801676707768, "learning_rate": 6.347016212487064e-07, "loss": 0.4384, "step": 3130 }, { "epoch": 2.9152700186219738, "grad_norm": 0.2415374699201975, "learning_rate": 6.278026905829597e-07, "loss": 0.4542, "step": 3131 }, { "epoch": 2.916201117318436, "grad_norm": 0.24489883904987844, "learning_rate": 6.20903759917213e-07, "loss": 0.4549, "step": 3132 }, { "epoch": 2.9171322160148976, "grad_norm": 0.2475606582030005, "learning_rate": 6.140048292514661e-07, "loss": 0.4769, "step": 3133 }, { "epoch": 2.9180633147113593, "grad_norm": 0.2456669620034374, "learning_rate": 6.071058985857192e-07, "loss": 0.4587, "step": 3134 }, { "epoch": 2.9189944134078214, "grad_norm": 0.25812791584473527, "learning_rate": 6.002069679199724e-07, "loss": 0.4642, "step": 3135 }, { "epoch": 2.919925512104283, "grad_norm": 0.25255464743931716, "learning_rate": 5.933080372542256e-07, "loss": 0.4723, "step": 3136 }, { "epoch": 2.9208566108007448, "grad_norm": 0.252063686997156, "learning_rate": 5.864091065884788e-07, "loss": 0.4963, "step": 3137 }, { "epoch": 2.921787709497207, "grad_norm": 0.24458181015715866, "learning_rate": 5.79510175922732e-07, "loss": 0.461, "step": 3138 }, { "epoch": 2.9227188081936686, "grad_norm": 0.2506665847215771, "learning_rate": 5.726112452569852e-07, "loss": 0.4906, "step": 3139 }, { "epoch": 2.9236499068901303, "grad_norm": 0.2441325667439889, "learning_rate": 5.657123145912385e-07, "loss": 0.4541, "step": 3140 }, { "epoch": 2.9245810055865924, "grad_norm": 0.25499546701719444, "learning_rate": 5.588133839254916e-07, "loss": 0.4706, "step": 3141 }, { "epoch": 2.925512104283054, "grad_norm": 0.24100062934069483, "learning_rate": 5.519144532597447e-07, "loss": 0.4535, "step": 3142 }, { "epoch": 2.9264432029795158, "grad_norm": 0.26773140080433844, "learning_rate": 5.45015522593998e-07, "loss": 0.4923, "step": 3143 }, { "epoch": 2.927374301675978, "grad_norm": 0.26547390466410425, "learning_rate": 5.381165919282512e-07, "loss": 0.4992, "step": 3144 }, { "epoch": 2.9283054003724396, "grad_norm": 0.2554943415745726, "learning_rate": 5.312176612625044e-07, "loss": 0.4661, "step": 3145 }, { "epoch": 2.9292364990689013, "grad_norm": 0.2525485267352679, "learning_rate": 5.243187305967576e-07, "loss": 0.4936, "step": 3146 }, { "epoch": 2.9301675977653634, "grad_norm": 0.2533062208582983, "learning_rate": 5.174197999310107e-07, "loss": 0.4798, "step": 3147 }, { "epoch": 2.931098696461825, "grad_norm": 0.25387401243966135, "learning_rate": 5.10520869265264e-07, "loss": 0.4648, "step": 3148 }, { "epoch": 2.9320297951582868, "grad_norm": 0.25097801826877786, "learning_rate": 5.036219385995171e-07, "loss": 0.456, "step": 3149 }, { "epoch": 2.9329608938547485, "grad_norm": 0.24960931013884397, "learning_rate": 4.967230079337703e-07, "loss": 0.4829, "step": 3150 }, { "epoch": 2.9338919925512106, "grad_norm": 0.2469525403064909, "learning_rate": 4.898240772680235e-07, "loss": 0.4634, "step": 3151 }, { "epoch": 2.9348230912476723, "grad_norm": 0.2429118321486131, "learning_rate": 4.829251466022767e-07, "loss": 0.4481, "step": 3152 }, { "epoch": 2.935754189944134, "grad_norm": 0.23952084499286547, "learning_rate": 4.760262159365299e-07, "loss": 0.4656, "step": 3153 }, { "epoch": 2.9366852886405956, "grad_norm": 0.2542038431278334, "learning_rate": 4.691272852707831e-07, "loss": 0.4593, "step": 3154 }, { "epoch": 2.9376163873370578, "grad_norm": 0.25172600250492194, "learning_rate": 4.622283546050363e-07, "loss": 0.4502, "step": 3155 }, { "epoch": 2.9385474860335195, "grad_norm": 0.2505951840881892, "learning_rate": 4.553294239392894e-07, "loss": 0.4746, "step": 3156 }, { "epoch": 2.939478584729981, "grad_norm": 0.25056141052967484, "learning_rate": 4.484304932735426e-07, "loss": 0.4868, "step": 3157 }, { "epoch": 2.9404096834264433, "grad_norm": 0.2519164508891578, "learning_rate": 4.415315626077958e-07, "loss": 0.463, "step": 3158 }, { "epoch": 2.941340782122905, "grad_norm": 0.25257189214344067, "learning_rate": 4.34632631942049e-07, "loss": 0.4556, "step": 3159 }, { "epoch": 2.9422718808193666, "grad_norm": 0.2637115868850094, "learning_rate": 4.277337012763022e-07, "loss": 0.4791, "step": 3160 }, { "epoch": 2.9432029795158288, "grad_norm": 0.25464573562659454, "learning_rate": 4.208347706105554e-07, "loss": 0.4865, "step": 3161 }, { "epoch": 2.9441340782122905, "grad_norm": 0.23522130198429594, "learning_rate": 4.139358399448086e-07, "loss": 0.454, "step": 3162 }, { "epoch": 2.945065176908752, "grad_norm": 0.2513863480437794, "learning_rate": 4.070369092790618e-07, "loss": 0.4596, "step": 3163 }, { "epoch": 2.9459962756052143, "grad_norm": 0.253247735064641, "learning_rate": 4.00137978613315e-07, "loss": 0.4755, "step": 3164 }, { "epoch": 2.946927374301676, "grad_norm": 0.24612784013101235, "learning_rate": 3.9323904794756816e-07, "loss": 0.4884, "step": 3165 }, { "epoch": 2.9478584729981376, "grad_norm": 0.2524516088760415, "learning_rate": 3.863401172818213e-07, "loss": 0.4783, "step": 3166 }, { "epoch": 2.9487895716945998, "grad_norm": 0.25015240108822456, "learning_rate": 3.794411866160745e-07, "loss": 0.4659, "step": 3167 }, { "epoch": 2.9497206703910615, "grad_norm": 0.2435912073661064, "learning_rate": 3.725422559503277e-07, "loss": 0.4585, "step": 3168 }, { "epoch": 2.950651769087523, "grad_norm": 0.2504160510236499, "learning_rate": 3.656433252845809e-07, "loss": 0.4803, "step": 3169 }, { "epoch": 2.9515828677839853, "grad_norm": 0.24617864569248624, "learning_rate": 3.587443946188341e-07, "loss": 0.458, "step": 3170 }, { "epoch": 2.952513966480447, "grad_norm": 0.2476541242814103, "learning_rate": 3.518454639530873e-07, "loss": 0.472, "step": 3171 }, { "epoch": 2.9534450651769086, "grad_norm": 0.2500215694065947, "learning_rate": 3.449465332873405e-07, "loss": 0.4743, "step": 3172 }, { "epoch": 2.9543761638733708, "grad_norm": 0.2463172210498838, "learning_rate": 3.380476026215937e-07, "loss": 0.4594, "step": 3173 }, { "epoch": 2.9553072625698324, "grad_norm": 0.25502993684060604, "learning_rate": 3.3114867195584687e-07, "loss": 0.5019, "step": 3174 }, { "epoch": 2.956238361266294, "grad_norm": 0.2483735427721142, "learning_rate": 3.2424974129010007e-07, "loss": 0.4639, "step": 3175 }, { "epoch": 2.9571694599627563, "grad_norm": 0.24922114137702278, "learning_rate": 3.173508106243532e-07, "loss": 0.466, "step": 3176 }, { "epoch": 2.958100558659218, "grad_norm": 0.24767273473112347, "learning_rate": 3.104518799586065e-07, "loss": 0.4635, "step": 3177 }, { "epoch": 2.9590316573556796, "grad_norm": 0.25114095658757907, "learning_rate": 3.035529492928596e-07, "loss": 0.4765, "step": 3178 }, { "epoch": 2.9599627560521418, "grad_norm": 0.24540090148176172, "learning_rate": 2.966540186271128e-07, "loss": 0.4504, "step": 3179 }, { "epoch": 2.9608938547486034, "grad_norm": 0.24320113350581507, "learning_rate": 2.89755087961366e-07, "loss": 0.4563, "step": 3180 }, { "epoch": 2.961824953445065, "grad_norm": 0.2465321175154487, "learning_rate": 2.828561572956192e-07, "loss": 0.4652, "step": 3181 }, { "epoch": 2.9627560521415273, "grad_norm": 0.23978807967100277, "learning_rate": 2.759572266298724e-07, "loss": 0.4452, "step": 3182 }, { "epoch": 2.963687150837989, "grad_norm": 0.2597756211205095, "learning_rate": 2.690582959641256e-07, "loss": 0.4986, "step": 3183 }, { "epoch": 2.9646182495344506, "grad_norm": 0.252127916101565, "learning_rate": 2.621593652983788e-07, "loss": 0.4498, "step": 3184 }, { "epoch": 2.9655493482309123, "grad_norm": 0.2527235818717744, "learning_rate": 2.55260434632632e-07, "loss": 0.4919, "step": 3185 }, { "epoch": 2.9664804469273744, "grad_norm": 0.2496783999698107, "learning_rate": 2.483615039668851e-07, "loss": 0.471, "step": 3186 }, { "epoch": 2.967411545623836, "grad_norm": 0.24511853654221677, "learning_rate": 2.4146257330113833e-07, "loss": 0.4374, "step": 3187 }, { "epoch": 2.968342644320298, "grad_norm": 0.2525014815443968, "learning_rate": 2.3456364263539155e-07, "loss": 0.4572, "step": 3188 }, { "epoch": 2.9692737430167595, "grad_norm": 0.24788989686090906, "learning_rate": 2.276647119696447e-07, "loss": 0.4618, "step": 3189 }, { "epoch": 2.9702048417132216, "grad_norm": 0.23985866373279385, "learning_rate": 2.207657813038979e-07, "loss": 0.4633, "step": 3190 }, { "epoch": 2.9711359404096833, "grad_norm": 0.24795493650776668, "learning_rate": 2.138668506381511e-07, "loss": 0.4609, "step": 3191 }, { "epoch": 2.972067039106145, "grad_norm": 0.24556007482187814, "learning_rate": 2.069679199724043e-07, "loss": 0.4741, "step": 3192 }, { "epoch": 2.972998137802607, "grad_norm": 0.24645665108020737, "learning_rate": 2.000689893066575e-07, "loss": 0.4698, "step": 3193 }, { "epoch": 2.973929236499069, "grad_norm": 0.25422004534470766, "learning_rate": 1.9317005864091066e-07, "loss": 0.485, "step": 3194 }, { "epoch": 2.9748603351955305, "grad_norm": 0.255926478088744, "learning_rate": 1.8627112797516386e-07, "loss": 0.4856, "step": 3195 }, { "epoch": 2.9757914338919926, "grad_norm": 0.26649466912434433, "learning_rate": 1.7937219730941706e-07, "loss": 0.5187, "step": 3196 }, { "epoch": 2.9767225325884543, "grad_norm": 0.24785471713422488, "learning_rate": 1.7247326664367026e-07, "loss": 0.4681, "step": 3197 }, { "epoch": 2.977653631284916, "grad_norm": 0.2552622060335204, "learning_rate": 1.6557433597792343e-07, "loss": 0.4568, "step": 3198 }, { "epoch": 2.978584729981378, "grad_norm": 0.2557162941220393, "learning_rate": 1.586754053121766e-07, "loss": 0.4729, "step": 3199 }, { "epoch": 2.97951582867784, "grad_norm": 0.25011622997939426, "learning_rate": 1.517764746464298e-07, "loss": 0.4534, "step": 3200 }, { "epoch": 2.9804469273743015, "grad_norm": 0.24979614811798198, "learning_rate": 1.44877543980683e-07, "loss": 0.4629, "step": 3201 }, { "epoch": 2.9813780260707636, "grad_norm": 0.2534589505110616, "learning_rate": 1.379786133149362e-07, "loss": 0.5048, "step": 3202 }, { "epoch": 2.9823091247672253, "grad_norm": 0.2528647200262641, "learning_rate": 1.310796826491894e-07, "loss": 0.476, "step": 3203 }, { "epoch": 2.983240223463687, "grad_norm": 0.25344063247604365, "learning_rate": 1.2418075198344256e-07, "loss": 0.4811, "step": 3204 }, { "epoch": 2.984171322160149, "grad_norm": 0.25488026347368425, "learning_rate": 1.1728182131769578e-07, "loss": 0.4835, "step": 3205 }, { "epoch": 2.985102420856611, "grad_norm": 0.2497437158243685, "learning_rate": 1.1038289065194895e-07, "loss": 0.4555, "step": 3206 }, { "epoch": 2.9860335195530725, "grad_norm": 0.25437336035008135, "learning_rate": 1.0348395998620215e-07, "loss": 0.4692, "step": 3207 }, { "epoch": 2.9869646182495346, "grad_norm": 0.2488997767543899, "learning_rate": 9.658502932045533e-08, "loss": 0.4466, "step": 3208 }, { "epoch": 2.9878957169459963, "grad_norm": 0.2539195039652246, "learning_rate": 8.968609865470853e-08, "loss": 0.4377, "step": 3209 }, { "epoch": 2.988826815642458, "grad_norm": 0.24825730265133988, "learning_rate": 8.278716798896172e-08, "loss": 0.4474, "step": 3210 }, { "epoch": 2.98975791433892, "grad_norm": 0.2514042094382437, "learning_rate": 7.58882373232149e-08, "loss": 0.4519, "step": 3211 }, { "epoch": 2.990689013035382, "grad_norm": 0.24926501697535453, "learning_rate": 6.89893066574681e-08, "loss": 0.4716, "step": 3212 }, { "epoch": 2.9916201117318435, "grad_norm": 0.23766387920784202, "learning_rate": 6.209037599172128e-08, "loss": 0.4503, "step": 3213 }, { "epoch": 2.9925512104283056, "grad_norm": 0.24392854056676833, "learning_rate": 5.5191445325974476e-08, "loss": 0.4234, "step": 3214 }, { "epoch": 2.9934823091247673, "grad_norm": 0.2526051715237374, "learning_rate": 4.8292514660227664e-08, "loss": 0.4692, "step": 3215 }, { "epoch": 2.994413407821229, "grad_norm": 0.24997613671945926, "learning_rate": 4.139358399448086e-08, "loss": 0.4831, "step": 3216 }, { "epoch": 2.995344506517691, "grad_norm": 0.2655674754975516, "learning_rate": 3.449465332873405e-08, "loss": 0.4729, "step": 3217 }, { "epoch": 2.996275605214153, "grad_norm": 0.2461984545477487, "learning_rate": 2.7595722662987238e-08, "loss": 0.4522, "step": 3218 }, { "epoch": 2.9972067039106145, "grad_norm": 0.24915510281003583, "learning_rate": 2.069679199724043e-08, "loss": 0.487, "step": 3219 }, { "epoch": 2.998137802607076, "grad_norm": 0.2664041466004633, "learning_rate": 1.3797861331493619e-08, "loss": 0.4707, "step": 3220 }, { "epoch": 2.9990689013035383, "grad_norm": 0.24476895593526357, "learning_rate": 6.8989306657468095e-09, "loss": 0.4516, "step": 3221 }, { "epoch": 3.0, "grad_norm": 0.25479147536801305, "learning_rate": 0.0, "loss": 0.4681, "step": 3222 }, { "epoch": 3.0, "step": 3222, "total_flos": 1468598568615936.0, "train_loss": 0.5134665109147795, "train_runtime": 107080.96, "train_samples_per_second": 0.481, "train_steps_per_second": 0.03 } ], "logging_steps": 1, "max_steps": 3222, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1468598568615936.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }