{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999254232232083, "eval_steps": 500, "global_step": 6704, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014915355358341412, "grad_norm": 36.92216491699219, "learning_rate": 0.0, "loss": 1.5458, "step": 1 }, { "epoch": 0.00029830710716682824, "grad_norm": 32.98695373535156, "learning_rate": 9.900990099009901e-08, "loss": 1.5444, "step": 2 }, { "epoch": 0.0004474606607502424, "grad_norm": 32.070518493652344, "learning_rate": 1.9801980198019803e-07, "loss": 1.5341, "step": 3 }, { "epoch": 0.0005966142143336565, "grad_norm": 33.5890998840332, "learning_rate": 2.9702970297029703e-07, "loss": 1.5701, "step": 4 }, { "epoch": 0.0007457677679170706, "grad_norm": 40.25341796875, "learning_rate": 3.9603960396039606e-07, "loss": 1.5968, "step": 5 }, { "epoch": 0.0008949213215004848, "grad_norm": 39.276485443115234, "learning_rate": 4.950495049504951e-07, "loss": 1.554, "step": 6 }, { "epoch": 0.0010440748750838989, "grad_norm": 34.6932258605957, "learning_rate": 5.940594059405941e-07, "loss": 1.5171, "step": 7 }, { "epoch": 0.001193228428667313, "grad_norm": 35.6229248046875, "learning_rate": 6.930693069306931e-07, "loss": 1.3876, "step": 8 }, { "epoch": 0.001342381982250727, "grad_norm": 26.38096046447754, "learning_rate": 7.920792079207921e-07, "loss": 1.346, "step": 9 }, { "epoch": 0.0014915355358341412, "grad_norm": 31.429758071899414, "learning_rate": 8.910891089108911e-07, "loss": 1.4256, "step": 10 }, { "epoch": 0.0016406890894175555, "grad_norm": 19.476186752319336, "learning_rate": 9.900990099009902e-07, "loss": 1.0587, "step": 11 }, { "epoch": 0.0017898426430009696, "grad_norm": 22.096965789794922, "learning_rate": 1.0891089108910893e-06, "loss": 1.0266, "step": 12 }, { "epoch": 0.0019389961965843837, "grad_norm": 20.304607391357422, "learning_rate": 1.1881188118811881e-06, "loss": 0.9451, "step": 13 }, { "epoch": 0.0020881497501677978, "grad_norm": 5.464245796203613, "learning_rate": 1.2871287128712872e-06, "loss": 0.9868, "step": 14 }, { "epoch": 0.002237303303751212, "grad_norm": 4.45393705368042, "learning_rate": 1.3861386138613863e-06, "loss": 0.9804, "step": 15 }, { "epoch": 0.002386456857334626, "grad_norm": 4.536319255828857, "learning_rate": 1.4851485148514852e-06, "loss": 0.7964, "step": 16 }, { "epoch": 0.0025356104109180403, "grad_norm": 3.868274688720703, "learning_rate": 1.5841584158415842e-06, "loss": 0.8265, "step": 17 }, { "epoch": 0.002684763964501454, "grad_norm": 3.1633100509643555, "learning_rate": 1.6831683168316833e-06, "loss": 0.7613, "step": 18 }, { "epoch": 0.0028339175180848684, "grad_norm": 2.932800054550171, "learning_rate": 1.7821782178217822e-06, "loss": 0.8127, "step": 19 }, { "epoch": 0.0029830710716682823, "grad_norm": 2.792567014694214, "learning_rate": 1.8811881188118813e-06, "loss": 0.8336, "step": 20 }, { "epoch": 0.0031322246252516966, "grad_norm": 3.2671587467193604, "learning_rate": 1.9801980198019803e-06, "loss": 0.8721, "step": 21 }, { "epoch": 0.003281378178835111, "grad_norm": 4.015764236450195, "learning_rate": 2.0792079207920794e-06, "loss": 0.8134, "step": 22 }, { "epoch": 0.003430531732418525, "grad_norm": 3.4283528327941895, "learning_rate": 2.1782178217821785e-06, "loss": 0.7923, "step": 23 }, { "epoch": 0.003579685286001939, "grad_norm": 3.314678430557251, "learning_rate": 2.2772277227722776e-06, "loss": 0.8866, "step": 24 }, { "epoch": 0.003728838839585353, "grad_norm": 2.8368282318115234, "learning_rate": 2.3762376237623762e-06, "loss": 0.7355, "step": 25 }, { "epoch": 0.0038779923931687673, "grad_norm": 4.089293003082275, "learning_rate": 2.4752475247524753e-06, "loss": 0.8772, "step": 26 }, { "epoch": 0.004027145946752181, "grad_norm": 2.707050085067749, "learning_rate": 2.5742574257425744e-06, "loss": 0.8271, "step": 27 }, { "epoch": 0.0041762995003355955, "grad_norm": 2.1034603118896484, "learning_rate": 2.6732673267326735e-06, "loss": 0.8321, "step": 28 }, { "epoch": 0.00432545305391901, "grad_norm": 2.0506651401519775, "learning_rate": 2.7722772277227726e-06, "loss": 0.8044, "step": 29 }, { "epoch": 0.004474606607502424, "grad_norm": 2.26688551902771, "learning_rate": 2.8712871287128712e-06, "loss": 0.8328, "step": 30 }, { "epoch": 0.004623760161085838, "grad_norm": 2.272365093231201, "learning_rate": 2.9702970297029703e-06, "loss": 0.807, "step": 31 }, { "epoch": 0.004772913714669252, "grad_norm": 2.2193591594696045, "learning_rate": 3.0693069306930694e-06, "loss": 0.8297, "step": 32 }, { "epoch": 0.004922067268252666, "grad_norm": 2.0150883197784424, "learning_rate": 3.1683168316831685e-06, "loss": 0.7112, "step": 33 }, { "epoch": 0.0050712208218360805, "grad_norm": 2.0374488830566406, "learning_rate": 3.2673267326732676e-06, "loss": 0.7695, "step": 34 }, { "epoch": 0.005220374375419494, "grad_norm": 2.1476571559906006, "learning_rate": 3.3663366336633666e-06, "loss": 0.7712, "step": 35 }, { "epoch": 0.005369527929002908, "grad_norm": 2.1839993000030518, "learning_rate": 3.4653465346534653e-06, "loss": 0.6723, "step": 36 }, { "epoch": 0.005518681482586323, "grad_norm": 2.841822385787964, "learning_rate": 3.5643564356435644e-06, "loss": 0.8285, "step": 37 }, { "epoch": 0.005667835036169737, "grad_norm": 2.1206412315368652, "learning_rate": 3.6633663366336635e-06, "loss": 0.8512, "step": 38 }, { "epoch": 0.005816988589753151, "grad_norm": 1.998833179473877, "learning_rate": 3.7623762376237625e-06, "loss": 0.7719, "step": 39 }, { "epoch": 0.005966142143336565, "grad_norm": 1.9250319004058838, "learning_rate": 3.861386138613862e-06, "loss": 0.7764, "step": 40 }, { "epoch": 0.006115295696919979, "grad_norm": 2.333120107650757, "learning_rate": 3.960396039603961e-06, "loss": 0.8137, "step": 41 }, { "epoch": 0.006264449250503393, "grad_norm": 2.383761167526245, "learning_rate": 4.05940594059406e-06, "loss": 0.778, "step": 42 }, { "epoch": 0.006413602804086808, "grad_norm": 2.1231133937835693, "learning_rate": 4.158415841584159e-06, "loss": 0.6558, "step": 43 }, { "epoch": 0.006562756357670222, "grad_norm": 1.9906963109970093, "learning_rate": 4.2574257425742575e-06, "loss": 0.7309, "step": 44 }, { "epoch": 0.006711909911253635, "grad_norm": 1.8994287252426147, "learning_rate": 4.356435643564357e-06, "loss": 0.7343, "step": 45 }, { "epoch": 0.00686106346483705, "grad_norm": 3.146566867828369, "learning_rate": 4.455445544554456e-06, "loss": 0.6847, "step": 46 }, { "epoch": 0.007010217018420464, "grad_norm": 2.1074068546295166, "learning_rate": 4.554455445544555e-06, "loss": 0.7678, "step": 47 }, { "epoch": 0.007159370572003878, "grad_norm": 1.8405132293701172, "learning_rate": 4.653465346534654e-06, "loss": 0.7398, "step": 48 }, { "epoch": 0.007308524125587292, "grad_norm": 2.2846107482910156, "learning_rate": 4.7524752475247525e-06, "loss": 0.7475, "step": 49 }, { "epoch": 0.007457677679170706, "grad_norm": 2.415766954421997, "learning_rate": 4.851485148514852e-06, "loss": 0.6626, "step": 50 }, { "epoch": 0.00760683123275412, "grad_norm": 1.776612639427185, "learning_rate": 4.950495049504951e-06, "loss": 0.6838, "step": 51 }, { "epoch": 0.007755984786337535, "grad_norm": 2.0669069290161133, "learning_rate": 5.04950495049505e-06, "loss": 0.7193, "step": 52 }, { "epoch": 0.007905138339920948, "grad_norm": 2.0665717124938965, "learning_rate": 5.148514851485149e-06, "loss": 0.8492, "step": 53 }, { "epoch": 0.008054291893504362, "grad_norm": 1.8973782062530518, "learning_rate": 5.247524752475248e-06, "loss": 0.7628, "step": 54 }, { "epoch": 0.008203445447087777, "grad_norm": 2.0421547889709473, "learning_rate": 5.346534653465347e-06, "loss": 0.6775, "step": 55 }, { "epoch": 0.008352599000671191, "grad_norm": 1.7664592266082764, "learning_rate": 5.4455445544554465e-06, "loss": 0.5939, "step": 56 }, { "epoch": 0.008501752554254605, "grad_norm": 1.7726327180862427, "learning_rate": 5.544554455445545e-06, "loss": 0.6817, "step": 57 }, { "epoch": 0.00865090610783802, "grad_norm": 2.1041879653930664, "learning_rate": 5.643564356435644e-06, "loss": 0.7266, "step": 58 }, { "epoch": 0.008800059661421434, "grad_norm": 1.8937770128250122, "learning_rate": 5.7425742574257425e-06, "loss": 0.7575, "step": 59 }, { "epoch": 0.008949213215004848, "grad_norm": 1.9953014850616455, "learning_rate": 5.841584158415842e-06, "loss": 0.6857, "step": 60 }, { "epoch": 0.00909836676858826, "grad_norm": 2.131871223449707, "learning_rate": 5.940594059405941e-06, "loss": 0.7392, "step": 61 }, { "epoch": 0.009247520322171675, "grad_norm": 1.2271580696105957, "learning_rate": 6.03960396039604e-06, "loss": 0.6544, "step": 62 }, { "epoch": 0.00939667387575509, "grad_norm": 2.0793979167938232, "learning_rate": 6.138613861386139e-06, "loss": 0.7338, "step": 63 }, { "epoch": 0.009545827429338504, "grad_norm": 2.0031867027282715, "learning_rate": 6.237623762376238e-06, "loss": 0.5939, "step": 64 }, { "epoch": 0.009694980982921918, "grad_norm": 1.688515067100525, "learning_rate": 6.336633663366337e-06, "loss": 0.6939, "step": 65 }, { "epoch": 0.009844134536505332, "grad_norm": 1.6522775888442993, "learning_rate": 6.4356435643564364e-06, "loss": 0.7193, "step": 66 }, { "epoch": 0.009993288090088747, "grad_norm": 2.083953619003296, "learning_rate": 6.534653465346535e-06, "loss": 0.8143, "step": 67 }, { "epoch": 0.010142441643672161, "grad_norm": 1.7839269638061523, "learning_rate": 6.633663366336635e-06, "loss": 0.6914, "step": 68 }, { "epoch": 0.010291595197255575, "grad_norm": 1.7338614463806152, "learning_rate": 6.732673267326733e-06, "loss": 0.7772, "step": 69 }, { "epoch": 0.010440748750838988, "grad_norm": 1.7594815492630005, "learning_rate": 6.831683168316833e-06, "loss": 0.6365, "step": 70 }, { "epoch": 0.010589902304422402, "grad_norm": 2.106921672821045, "learning_rate": 6.930693069306931e-06, "loss": 0.7567, "step": 71 }, { "epoch": 0.010739055858005817, "grad_norm": 1.904127597808838, "learning_rate": 7.02970297029703e-06, "loss": 0.7792, "step": 72 }, { "epoch": 0.01088820941158923, "grad_norm": 1.100720763206482, "learning_rate": 7.128712871287129e-06, "loss": 0.6307, "step": 73 }, { "epoch": 0.011037362965172645, "grad_norm": 1.8787920475006104, "learning_rate": 7.227722772277228e-06, "loss": 0.7473, "step": 74 }, { "epoch": 0.01118651651875606, "grad_norm": 1.955082654953003, "learning_rate": 7.326732673267327e-06, "loss": 0.7446, "step": 75 }, { "epoch": 0.011335670072339474, "grad_norm": 1.9005959033966064, "learning_rate": 7.425742574257426e-06, "loss": 0.8291, "step": 76 }, { "epoch": 0.011484823625922888, "grad_norm": 1.0956642627716064, "learning_rate": 7.524752475247525e-06, "loss": 0.6157, "step": 77 }, { "epoch": 0.011633977179506302, "grad_norm": 1.8281444311141968, "learning_rate": 7.6237623762376246e-06, "loss": 0.648, "step": 78 }, { "epoch": 0.011783130733089717, "grad_norm": 1.8096727132797241, "learning_rate": 7.722772277227724e-06, "loss": 0.6914, "step": 79 }, { "epoch": 0.01193228428667313, "grad_norm": 1.7675964832305908, "learning_rate": 7.821782178217822e-06, "loss": 0.6627, "step": 80 }, { "epoch": 0.012081437840256544, "grad_norm": 1.780129313468933, "learning_rate": 7.920792079207921e-06, "loss": 0.6975, "step": 81 }, { "epoch": 0.012230591393839958, "grad_norm": 2.240777015686035, "learning_rate": 8.019801980198021e-06, "loss": 0.6936, "step": 82 }, { "epoch": 0.012379744947423372, "grad_norm": 2.040356159210205, "learning_rate": 8.11881188118812e-06, "loss": 0.6192, "step": 83 }, { "epoch": 0.012528898501006787, "grad_norm": 2.420888900756836, "learning_rate": 8.217821782178218e-06, "loss": 0.7008, "step": 84 }, { "epoch": 0.0126780520545902, "grad_norm": 1.9511325359344482, "learning_rate": 8.316831683168318e-06, "loss": 0.6976, "step": 85 }, { "epoch": 0.012827205608173615, "grad_norm": 1.795839548110962, "learning_rate": 8.415841584158416e-06, "loss": 0.6745, "step": 86 }, { "epoch": 0.01297635916175703, "grad_norm": 2.099539279937744, "learning_rate": 8.514851485148515e-06, "loss": 0.6708, "step": 87 }, { "epoch": 0.013125512715340444, "grad_norm": 2.0511834621429443, "learning_rate": 8.613861386138615e-06, "loss": 0.696, "step": 88 }, { "epoch": 0.013274666268923856, "grad_norm": 2.0645246505737305, "learning_rate": 8.712871287128714e-06, "loss": 0.628, "step": 89 }, { "epoch": 0.01342381982250727, "grad_norm": 2.9790573120117188, "learning_rate": 8.811881188118812e-06, "loss": 0.7349, "step": 90 }, { "epoch": 0.013572973376090685, "grad_norm": 2.0905542373657227, "learning_rate": 8.910891089108911e-06, "loss": 0.6846, "step": 91 }, { "epoch": 0.0137221269296741, "grad_norm": 1.8695952892303467, "learning_rate": 9.009900990099011e-06, "loss": 0.6343, "step": 92 }, { "epoch": 0.013871280483257514, "grad_norm": 1.7865002155303955, "learning_rate": 9.10891089108911e-06, "loss": 0.6531, "step": 93 }, { "epoch": 0.014020434036840928, "grad_norm": 1.96958327293396, "learning_rate": 9.20792079207921e-06, "loss": 0.6654, "step": 94 }, { "epoch": 0.014169587590424342, "grad_norm": 1.7037665843963623, "learning_rate": 9.306930693069308e-06, "loss": 0.622, "step": 95 }, { "epoch": 0.014318741144007757, "grad_norm": 1.8750895261764526, "learning_rate": 9.405940594059405e-06, "loss": 0.6397, "step": 96 }, { "epoch": 0.01446789469759117, "grad_norm": 1.8848108053207397, "learning_rate": 9.504950495049505e-06, "loss": 0.7089, "step": 97 }, { "epoch": 0.014617048251174583, "grad_norm": 2.0947322845458984, "learning_rate": 9.603960396039604e-06, "loss": 0.6295, "step": 98 }, { "epoch": 0.014766201804757998, "grad_norm": 1.788132667541504, "learning_rate": 9.702970297029704e-06, "loss": 0.6823, "step": 99 }, { "epoch": 0.014915355358341412, "grad_norm": 1.9677757024765015, "learning_rate": 9.801980198019802e-06, "loss": 0.6934, "step": 100 }, { "epoch": 0.015064508911924826, "grad_norm": 2.0575408935546875, "learning_rate": 9.900990099009901e-06, "loss": 0.6757, "step": 101 }, { "epoch": 0.01521366246550824, "grad_norm": 1.68389892578125, "learning_rate": 1e-05, "loss": 0.6645, "step": 102 }, { "epoch": 0.015362816019091655, "grad_norm": 1.8255438804626465, "learning_rate": 1.00990099009901e-05, "loss": 0.7001, "step": 103 }, { "epoch": 0.01551196957267507, "grad_norm": 1.9196882247924805, "learning_rate": 1.01980198019802e-05, "loss": 0.6646, "step": 104 }, { "epoch": 0.015661123126258482, "grad_norm": 1.8504014015197754, "learning_rate": 1.0297029702970298e-05, "loss": 0.6285, "step": 105 }, { "epoch": 0.015810276679841896, "grad_norm": 2.0914018154144287, "learning_rate": 1.0396039603960397e-05, "loss": 0.7041, "step": 106 }, { "epoch": 0.01595943023342531, "grad_norm": 1.974321961402893, "learning_rate": 1.0495049504950497e-05, "loss": 0.7039, "step": 107 }, { "epoch": 0.016108583787008725, "grad_norm": 2.0240676403045654, "learning_rate": 1.0594059405940596e-05, "loss": 0.6531, "step": 108 }, { "epoch": 0.01625773734059214, "grad_norm": 1.816896677017212, "learning_rate": 1.0693069306930694e-05, "loss": 0.6904, "step": 109 }, { "epoch": 0.016406890894175553, "grad_norm": 1.6403350830078125, "learning_rate": 1.0792079207920793e-05, "loss": 0.7039, "step": 110 }, { "epoch": 0.016556044447758968, "grad_norm": 1.944907307624817, "learning_rate": 1.0891089108910893e-05, "loss": 0.6662, "step": 111 }, { "epoch": 0.016705198001342382, "grad_norm": 1.7091439962387085, "learning_rate": 1.0990099009900992e-05, "loss": 0.719, "step": 112 }, { "epoch": 0.016854351554925796, "grad_norm": 2.0399298667907715, "learning_rate": 1.108910891089109e-05, "loss": 0.6569, "step": 113 }, { "epoch": 0.01700350510850921, "grad_norm": 1.0012022256851196, "learning_rate": 1.118811881188119e-05, "loss": 0.6129, "step": 114 }, { "epoch": 0.017152658662092625, "grad_norm": 1.7217600345611572, "learning_rate": 1.1287128712871288e-05, "loss": 0.7403, "step": 115 }, { "epoch": 0.01730181221567604, "grad_norm": 2.1743268966674805, "learning_rate": 1.1386138613861385e-05, "loss": 0.6843, "step": 116 }, { "epoch": 0.017450965769259454, "grad_norm": 2.0793709754943848, "learning_rate": 1.1485148514851485e-05, "loss": 0.7181, "step": 117 }, { "epoch": 0.017600119322842868, "grad_norm": 1.9270615577697754, "learning_rate": 1.1584158415841584e-05, "loss": 0.7537, "step": 118 }, { "epoch": 0.017749272876426282, "grad_norm": 1.9419066905975342, "learning_rate": 1.1683168316831684e-05, "loss": 0.747, "step": 119 }, { "epoch": 0.017898426430009697, "grad_norm": 2.111229181289673, "learning_rate": 1.1782178217821782e-05, "loss": 0.694, "step": 120 }, { "epoch": 0.01804757998359311, "grad_norm": 1.887784719467163, "learning_rate": 1.1881188118811881e-05, "loss": 0.6498, "step": 121 }, { "epoch": 0.01819673353717652, "grad_norm": 1.6921495199203491, "learning_rate": 1.198019801980198e-05, "loss": 0.6742, "step": 122 }, { "epoch": 0.018345887090759936, "grad_norm": 1.4983669519424438, "learning_rate": 1.207920792079208e-05, "loss": 0.683, "step": 123 }, { "epoch": 0.01849504064434335, "grad_norm": 2.490964889526367, "learning_rate": 1.217821782178218e-05, "loss": 0.6176, "step": 124 }, { "epoch": 0.018644194197926765, "grad_norm": 1.806031346321106, "learning_rate": 1.2277227722772278e-05, "loss": 0.7076, "step": 125 }, { "epoch": 0.01879334775151018, "grad_norm": 1.93258798122406, "learning_rate": 1.2376237623762377e-05, "loss": 0.7495, "step": 126 }, { "epoch": 0.018942501305093593, "grad_norm": 1.9779177904129028, "learning_rate": 1.2475247524752477e-05, "loss": 0.691, "step": 127 }, { "epoch": 0.019091654858677008, "grad_norm": 1.9377275705337524, "learning_rate": 1.2574257425742576e-05, "loss": 0.7859, "step": 128 }, { "epoch": 0.019240808412260422, "grad_norm": 1.8586045503616333, "learning_rate": 1.2673267326732674e-05, "loss": 0.7028, "step": 129 }, { "epoch": 0.019389961965843836, "grad_norm": 1.9445438385009766, "learning_rate": 1.2772277227722773e-05, "loss": 0.7155, "step": 130 }, { "epoch": 0.01953911551942725, "grad_norm": 2.2063252925872803, "learning_rate": 1.2871287128712873e-05, "loss": 0.7902, "step": 131 }, { "epoch": 0.019688269073010665, "grad_norm": 1.8083064556121826, "learning_rate": 1.2970297029702972e-05, "loss": 0.6562, "step": 132 }, { "epoch": 0.01983742262659408, "grad_norm": 2.23406982421875, "learning_rate": 1.306930693069307e-05, "loss": 0.7306, "step": 133 }, { "epoch": 0.019986576180177493, "grad_norm": 2.238241672515869, "learning_rate": 1.316831683168317e-05, "loss": 0.6536, "step": 134 }, { "epoch": 0.020135729733760908, "grad_norm": 1.8440369367599487, "learning_rate": 1.326732673267327e-05, "loss": 0.7768, "step": 135 }, { "epoch": 0.020284883287344322, "grad_norm": 1.8740732669830322, "learning_rate": 1.3366336633663369e-05, "loss": 0.7338, "step": 136 }, { "epoch": 0.020434036840927736, "grad_norm": 1.6911852359771729, "learning_rate": 1.3465346534653467e-05, "loss": 0.6158, "step": 137 }, { "epoch": 0.02058319039451115, "grad_norm": 1.9738850593566895, "learning_rate": 1.3564356435643566e-05, "loss": 0.6436, "step": 138 }, { "epoch": 0.020732343948094565, "grad_norm": 1.9062066078186035, "learning_rate": 1.3663366336633666e-05, "loss": 0.7317, "step": 139 }, { "epoch": 0.020881497501677976, "grad_norm": 1.8795989751815796, "learning_rate": 1.3762376237623762e-05, "loss": 0.6351, "step": 140 }, { "epoch": 0.02103065105526139, "grad_norm": 2.1018590927124023, "learning_rate": 1.3861386138613861e-05, "loss": 0.608, "step": 141 }, { "epoch": 0.021179804608844804, "grad_norm": 2.1595077514648438, "learning_rate": 1.396039603960396e-05, "loss": 0.777, "step": 142 }, { "epoch": 0.02132895816242822, "grad_norm": 1.7263623476028442, "learning_rate": 1.405940594059406e-05, "loss": 0.5648, "step": 143 }, { "epoch": 0.021478111716011633, "grad_norm": 6.379800319671631, "learning_rate": 1.4158415841584158e-05, "loss": 0.6178, "step": 144 }, { "epoch": 0.021627265269595047, "grad_norm": 2.1058671474456787, "learning_rate": 1.4257425742574257e-05, "loss": 0.7284, "step": 145 }, { "epoch": 0.02177641882317846, "grad_norm": 2.1797609329223633, "learning_rate": 1.4356435643564357e-05, "loss": 0.7114, "step": 146 }, { "epoch": 0.021925572376761876, "grad_norm": 1.7628895044326782, "learning_rate": 1.4455445544554456e-05, "loss": 0.6392, "step": 147 }, { "epoch": 0.02207472593034529, "grad_norm": 2.1740307807922363, "learning_rate": 1.4554455445544556e-05, "loss": 0.7132, "step": 148 }, { "epoch": 0.022223879483928705, "grad_norm": 1.8813271522521973, "learning_rate": 1.4653465346534654e-05, "loss": 0.6801, "step": 149 }, { "epoch": 0.02237303303751212, "grad_norm": 1.9921282529830933, "learning_rate": 1.4752475247524753e-05, "loss": 0.7137, "step": 150 }, { "epoch": 0.022522186591095533, "grad_norm": 1.7442659139633179, "learning_rate": 1.4851485148514853e-05, "loss": 0.5836, "step": 151 }, { "epoch": 0.022671340144678948, "grad_norm": 1.8071566820144653, "learning_rate": 1.4950495049504952e-05, "loss": 0.6871, "step": 152 }, { "epoch": 0.022820493698262362, "grad_norm": 1.7797825336456299, "learning_rate": 1.504950495049505e-05, "loss": 0.6651, "step": 153 }, { "epoch": 0.022969647251845776, "grad_norm": 0.9781525731086731, "learning_rate": 1.514851485148515e-05, "loss": 0.5744, "step": 154 }, { "epoch": 0.02311880080542919, "grad_norm": 1.9098384380340576, "learning_rate": 1.5247524752475249e-05, "loss": 0.654, "step": 155 }, { "epoch": 0.023267954359012605, "grad_norm": 2.070625066757202, "learning_rate": 1.534653465346535e-05, "loss": 0.7809, "step": 156 }, { "epoch": 0.02341710791259602, "grad_norm": 2.4339547157287598, "learning_rate": 1.5445544554455448e-05, "loss": 0.7857, "step": 157 }, { "epoch": 0.023566261466179433, "grad_norm": 1.7346545457839966, "learning_rate": 1.5544554455445548e-05, "loss": 0.7042, "step": 158 }, { "epoch": 0.023715415019762844, "grad_norm": 2.0481715202331543, "learning_rate": 1.5643564356435644e-05, "loss": 0.7022, "step": 159 }, { "epoch": 0.02386456857334626, "grad_norm": 1.7905699014663696, "learning_rate": 1.5742574257425743e-05, "loss": 0.6345, "step": 160 }, { "epoch": 0.024013722126929673, "grad_norm": 2.0855937004089355, "learning_rate": 1.5841584158415843e-05, "loss": 0.6793, "step": 161 }, { "epoch": 0.024162875680513087, "grad_norm": 1.980151891708374, "learning_rate": 1.5940594059405942e-05, "loss": 0.5968, "step": 162 }, { "epoch": 0.0243120292340965, "grad_norm": 1.9153319597244263, "learning_rate": 1.6039603960396042e-05, "loss": 0.6305, "step": 163 }, { "epoch": 0.024461182787679916, "grad_norm": 2.2559401988983154, "learning_rate": 1.613861386138614e-05, "loss": 0.687, "step": 164 }, { "epoch": 0.02461033634126333, "grad_norm": 0.9921658039093018, "learning_rate": 1.623762376237624e-05, "loss": 0.6653, "step": 165 }, { "epoch": 0.024759489894846744, "grad_norm": 2.1134514808654785, "learning_rate": 1.6336633663366337e-05, "loss": 0.6611, "step": 166 }, { "epoch": 0.02490864344843016, "grad_norm": 1.9199398756027222, "learning_rate": 1.6435643564356436e-05, "loss": 0.7249, "step": 167 }, { "epoch": 0.025057797002013573, "grad_norm": 1.8748118877410889, "learning_rate": 1.6534653465346536e-05, "loss": 0.7359, "step": 168 }, { "epoch": 0.025206950555596987, "grad_norm": 2.07641339302063, "learning_rate": 1.6633663366336635e-05, "loss": 0.7101, "step": 169 }, { "epoch": 0.0253561041091804, "grad_norm": 0.9470542669296265, "learning_rate": 1.6732673267326735e-05, "loss": 0.6128, "step": 170 }, { "epoch": 0.025505257662763816, "grad_norm": 1.8873318433761597, "learning_rate": 1.683168316831683e-05, "loss": 0.6238, "step": 171 }, { "epoch": 0.02565441121634723, "grad_norm": 2.0497686862945557, "learning_rate": 1.693069306930693e-05, "loss": 0.7471, "step": 172 }, { "epoch": 0.025803564769930645, "grad_norm": 1.8734363317489624, "learning_rate": 1.702970297029703e-05, "loss": 0.7344, "step": 173 }, { "epoch": 0.02595271832351406, "grad_norm": 1.8623331785202026, "learning_rate": 1.712871287128713e-05, "loss": 0.7204, "step": 174 }, { "epoch": 0.026101871877097473, "grad_norm": 2.102417230606079, "learning_rate": 1.722772277227723e-05, "loss": 0.6573, "step": 175 }, { "epoch": 0.026251025430680888, "grad_norm": 0.9768766164779663, "learning_rate": 1.732673267326733e-05, "loss": 0.6472, "step": 176 }, { "epoch": 0.0264001789842643, "grad_norm": 2.617891311645508, "learning_rate": 1.7425742574257428e-05, "loss": 0.6863, "step": 177 }, { "epoch": 0.026549332537847713, "grad_norm": 2.2418034076690674, "learning_rate": 1.7524752475247528e-05, "loss": 0.5911, "step": 178 }, { "epoch": 0.026698486091431127, "grad_norm": 1.727517008781433, "learning_rate": 1.7623762376237624e-05, "loss": 0.6604, "step": 179 }, { "epoch": 0.02684763964501454, "grad_norm": 2.023036479949951, "learning_rate": 1.7722772277227723e-05, "loss": 0.5869, "step": 180 }, { "epoch": 0.026996793198597956, "grad_norm": 2.0967631340026855, "learning_rate": 1.7821782178217823e-05, "loss": 0.7639, "step": 181 }, { "epoch": 0.02714594675218137, "grad_norm": 0.9908057451248169, "learning_rate": 1.7920792079207922e-05, "loss": 0.6105, "step": 182 }, { "epoch": 0.027295100305764784, "grad_norm": 2.137237071990967, "learning_rate": 1.8019801980198022e-05, "loss": 0.6985, "step": 183 }, { "epoch": 0.0274442538593482, "grad_norm": 2.2505269050598145, "learning_rate": 1.811881188118812e-05, "loss": 0.7089, "step": 184 }, { "epoch": 0.027593407412931613, "grad_norm": 2.0244901180267334, "learning_rate": 1.821782178217822e-05, "loss": 0.7437, "step": 185 }, { "epoch": 0.027742560966515027, "grad_norm": 1.9282609224319458, "learning_rate": 1.831683168316832e-05, "loss": 0.6577, "step": 186 }, { "epoch": 0.02789171452009844, "grad_norm": 2.4953572750091553, "learning_rate": 1.841584158415842e-05, "loss": 0.6992, "step": 187 }, { "epoch": 0.028040868073681856, "grad_norm": 1.8070935010910034, "learning_rate": 1.8514851485148516e-05, "loss": 0.7685, "step": 188 }, { "epoch": 0.02819002162726527, "grad_norm": 1.5304025411605835, "learning_rate": 1.8613861386138615e-05, "loss": 0.6673, "step": 189 }, { "epoch": 0.028339175180848684, "grad_norm": 1.9853897094726562, "learning_rate": 1.8712871287128715e-05, "loss": 0.7049, "step": 190 }, { "epoch": 0.0284883287344321, "grad_norm": 2.0258564949035645, "learning_rate": 1.881188118811881e-05, "loss": 0.6649, "step": 191 }, { "epoch": 0.028637482288015513, "grad_norm": 1.782294750213623, "learning_rate": 1.891089108910891e-05, "loss": 0.5955, "step": 192 }, { "epoch": 0.028786635841598927, "grad_norm": 2.1279022693634033, "learning_rate": 1.900990099009901e-05, "loss": 0.6504, "step": 193 }, { "epoch": 0.02893578939518234, "grad_norm": 1.630865454673767, "learning_rate": 1.910891089108911e-05, "loss": 0.6442, "step": 194 }, { "epoch": 0.029084942948765756, "grad_norm": 1.836958646774292, "learning_rate": 1.920792079207921e-05, "loss": 0.6596, "step": 195 }, { "epoch": 0.029234096502349167, "grad_norm": 1.0128411054611206, "learning_rate": 1.930693069306931e-05, "loss": 0.6497, "step": 196 }, { "epoch": 0.02938325005593258, "grad_norm": 2.050478458404541, "learning_rate": 1.9405940594059408e-05, "loss": 0.7102, "step": 197 }, { "epoch": 0.029532403609515995, "grad_norm": 1.8514869213104248, "learning_rate": 1.9504950495049508e-05, "loss": 0.6757, "step": 198 }, { "epoch": 0.02968155716309941, "grad_norm": 2.524900436401367, "learning_rate": 1.9603960396039604e-05, "loss": 0.7139, "step": 199 }, { "epoch": 0.029830710716682824, "grad_norm": 1.824790120124817, "learning_rate": 1.9702970297029703e-05, "loss": 0.6883, "step": 200 }, { "epoch": 0.02997986427026624, "grad_norm": 1.8570177555084229, "learning_rate": 1.9801980198019803e-05, "loss": 0.6359, "step": 201 }, { "epoch": 0.030129017823849653, "grad_norm": 1.6581342220306396, "learning_rate": 1.9900990099009902e-05, "loss": 0.655, "step": 202 }, { "epoch": 0.030278171377433067, "grad_norm": 1.896280288696289, "learning_rate": 2e-05, "loss": 0.6639, "step": 203 }, { "epoch": 0.03042732493101648, "grad_norm": 2.024322271347046, "learning_rate": 1.999999883271794e-05, "loss": 0.6939, "step": 204 }, { "epoch": 0.030576478484599896, "grad_norm": 2.063051223754883, "learning_rate": 1.9999995330872033e-05, "loss": 0.7943, "step": 205 }, { "epoch": 0.03072563203818331, "grad_norm": 1.9201700687408447, "learning_rate": 1.9999989494463094e-05, "loss": 0.681, "step": 206 }, { "epoch": 0.030874785591766724, "grad_norm": 0.9090811610221863, "learning_rate": 1.9999981323492487e-05, "loss": 0.5991, "step": 207 }, { "epoch": 0.03102393914535014, "grad_norm": 1.7964750528335571, "learning_rate": 1.9999970817962122e-05, "loss": 0.5982, "step": 208 }, { "epoch": 0.031173092698933553, "grad_norm": 1.7143542766571045, "learning_rate": 1.999995797787445e-05, "loss": 0.7416, "step": 209 }, { "epoch": 0.031322246252516964, "grad_norm": 1.7925727367401123, "learning_rate": 1.9999942803232467e-05, "loss": 0.7063, "step": 210 }, { "epoch": 0.03147139980610038, "grad_norm": 1.6111809015274048, "learning_rate": 1.999992529403971e-05, "loss": 0.6691, "step": 211 }, { "epoch": 0.03162055335968379, "grad_norm": 1.5845181941986084, "learning_rate": 1.9999905450300284e-05, "loss": 0.7332, "step": 212 }, { "epoch": 0.03176970691326721, "grad_norm": 1.9301520586013794, "learning_rate": 1.9999883272018805e-05, "loss": 0.6931, "step": 213 }, { "epoch": 0.03191886046685062, "grad_norm": 1.8776655197143555, "learning_rate": 1.9999858759200455e-05, "loss": 0.689, "step": 214 }, { "epoch": 0.032068014020434035, "grad_norm": 1.7354377508163452, "learning_rate": 1.999983191185096e-05, "loss": 0.6652, "step": 215 }, { "epoch": 0.03221716757401745, "grad_norm": 1.5700976848602295, "learning_rate": 1.999980272997659e-05, "loss": 0.6222, "step": 216 }, { "epoch": 0.032366321127600864, "grad_norm": 2.075620412826538, "learning_rate": 1.9999771213584147e-05, "loss": 0.7126, "step": 217 }, { "epoch": 0.03251547468118428, "grad_norm": 1.6374626159667969, "learning_rate": 1.9999737362680997e-05, "loss": 0.7856, "step": 218 }, { "epoch": 0.03266462823476769, "grad_norm": 1.7725255489349365, "learning_rate": 1.9999701177275045e-05, "loss": 0.6793, "step": 219 }, { "epoch": 0.03281378178835111, "grad_norm": 1.9540414810180664, "learning_rate": 1.9999662657374732e-05, "loss": 0.5782, "step": 220 }, { "epoch": 0.03296293534193452, "grad_norm": 1.9151517152786255, "learning_rate": 1.999962180298905e-05, "loss": 0.7614, "step": 221 }, { "epoch": 0.033112088895517935, "grad_norm": 1.695085883140564, "learning_rate": 1.9999578614127544e-05, "loss": 0.685, "step": 222 }, { "epoch": 0.03326124244910135, "grad_norm": 1.7316575050354004, "learning_rate": 1.9999533090800293e-05, "loss": 0.6429, "step": 223 }, { "epoch": 0.033410396002684764, "grad_norm": 1.792098045349121, "learning_rate": 1.9999485233017926e-05, "loss": 0.5367, "step": 224 }, { "epoch": 0.03355954955626818, "grad_norm": 1.637241005897522, "learning_rate": 1.9999435040791612e-05, "loss": 0.7331, "step": 225 }, { "epoch": 0.03370870310985159, "grad_norm": 2.0412065982818604, "learning_rate": 1.999938251413307e-05, "loss": 0.587, "step": 226 }, { "epoch": 0.03385785666343501, "grad_norm": 1.6914960145950317, "learning_rate": 1.9999327653054563e-05, "loss": 0.6212, "step": 227 }, { "epoch": 0.03400701021701842, "grad_norm": 0.9368652105331421, "learning_rate": 1.9999270457568904e-05, "loss": 0.5993, "step": 228 }, { "epoch": 0.034156163770601836, "grad_norm": 1.577174186706543, "learning_rate": 1.9999210927689438e-05, "loss": 0.7327, "step": 229 }, { "epoch": 0.03430531732418525, "grad_norm": 0.984321117401123, "learning_rate": 1.9999149063430066e-05, "loss": 0.6367, "step": 230 }, { "epoch": 0.034454470877768664, "grad_norm": 1.6921968460083008, "learning_rate": 1.999908486480523e-05, "loss": 0.6822, "step": 231 }, { "epoch": 0.03460362443135208, "grad_norm": 1.8694889545440674, "learning_rate": 1.9999018331829916e-05, "loss": 0.7481, "step": 232 }, { "epoch": 0.03475277798493549, "grad_norm": 1.8590909242630005, "learning_rate": 1.999894946451966e-05, "loss": 0.7103, "step": 233 }, { "epoch": 0.03490193153851891, "grad_norm": 1.8738954067230225, "learning_rate": 1.999887826289054e-05, "loss": 0.6147, "step": 234 }, { "epoch": 0.03505108509210232, "grad_norm": 1.5443377494812012, "learning_rate": 1.9998804726959173e-05, "loss": 0.642, "step": 235 }, { "epoch": 0.035200238645685736, "grad_norm": 1.6340501308441162, "learning_rate": 1.9998728856742732e-05, "loss": 0.6553, "step": 236 }, { "epoch": 0.03534939219926915, "grad_norm": 1.726060152053833, "learning_rate": 1.9998650652258926e-05, "loss": 0.7165, "step": 237 }, { "epoch": 0.035498545752852564, "grad_norm": 1.8382149934768677, "learning_rate": 1.9998570113526013e-05, "loss": 0.6153, "step": 238 }, { "epoch": 0.03564769930643598, "grad_norm": 1.904850721359253, "learning_rate": 1.9998487240562798e-05, "loss": 0.6351, "step": 239 }, { "epoch": 0.03579685286001939, "grad_norm": 1.9118977785110474, "learning_rate": 1.9998402033388626e-05, "loss": 0.6608, "step": 240 }, { "epoch": 0.03594600641360281, "grad_norm": 1.9714041948318481, "learning_rate": 1.9998314492023387e-05, "loss": 0.6605, "step": 241 }, { "epoch": 0.03609515996718622, "grad_norm": 0.9456652402877808, "learning_rate": 1.9998224616487523e-05, "loss": 0.6158, "step": 242 }, { "epoch": 0.03624431352076963, "grad_norm": 2.241100788116455, "learning_rate": 1.9998132406802008e-05, "loss": 0.7405, "step": 243 }, { "epoch": 0.03639346707435304, "grad_norm": 1.6386393308639526, "learning_rate": 1.999803786298838e-05, "loss": 0.6719, "step": 244 }, { "epoch": 0.03654262062793646, "grad_norm": 1.6826163530349731, "learning_rate": 1.9997940985068702e-05, "loss": 0.7168, "step": 245 }, { "epoch": 0.03669177418151987, "grad_norm": 1.7141472101211548, "learning_rate": 1.9997841773065594e-05, "loss": 0.7708, "step": 246 }, { "epoch": 0.036840927735103286, "grad_norm": 1.6540422439575195, "learning_rate": 1.9997740227002217e-05, "loss": 0.5706, "step": 247 }, { "epoch": 0.0369900812886867, "grad_norm": 1.8182923793792725, "learning_rate": 1.9997636346902284e-05, "loss": 0.6971, "step": 248 }, { "epoch": 0.037139234842270115, "grad_norm": 1.712659239768982, "learning_rate": 1.9997530132790034e-05, "loss": 0.6956, "step": 249 }, { "epoch": 0.03728838839585353, "grad_norm": 0.9497385025024414, "learning_rate": 1.9997421584690272e-05, "loss": 0.6602, "step": 250 }, { "epoch": 0.037437541949436944, "grad_norm": 1.948074460029602, "learning_rate": 1.9997310702628338e-05, "loss": 0.6322, "step": 251 }, { "epoch": 0.03758669550302036, "grad_norm": 1.7413309812545776, "learning_rate": 1.9997197486630116e-05, "loss": 0.7428, "step": 252 }, { "epoch": 0.03773584905660377, "grad_norm": 1.6951396465301514, "learning_rate": 1.9997081936722037e-05, "loss": 0.6651, "step": 253 }, { "epoch": 0.037885002610187186, "grad_norm": 2.2283527851104736, "learning_rate": 1.9996964052931082e-05, "loss": 0.7202, "step": 254 }, { "epoch": 0.0380341561637706, "grad_norm": 1.8530325889587402, "learning_rate": 1.9996843835284765e-05, "loss": 0.6927, "step": 255 }, { "epoch": 0.038183309717354015, "grad_norm": 1.9539196491241455, "learning_rate": 1.9996721283811157e-05, "loss": 0.7317, "step": 256 }, { "epoch": 0.03833246327093743, "grad_norm": 2.5016028881073, "learning_rate": 1.9996596398538865e-05, "loss": 0.6578, "step": 257 }, { "epoch": 0.038481616824520844, "grad_norm": 1.8400236368179321, "learning_rate": 1.9996469179497045e-05, "loss": 0.6845, "step": 258 }, { "epoch": 0.03863077037810426, "grad_norm": 1.7261146306991577, "learning_rate": 1.99963396267154e-05, "loss": 0.6783, "step": 259 }, { "epoch": 0.03877992393168767, "grad_norm": 1.5373321771621704, "learning_rate": 1.999620774022417e-05, "loss": 0.6583, "step": 260 }, { "epoch": 0.03892907748527109, "grad_norm": 1.7068219184875488, "learning_rate": 1.9996073520054143e-05, "loss": 0.7067, "step": 261 }, { "epoch": 0.0390782310388545, "grad_norm": 1.7001949548721313, "learning_rate": 1.999593696623666e-05, "loss": 0.6809, "step": 262 }, { "epoch": 0.039227384592437915, "grad_norm": 1.6629447937011719, "learning_rate": 1.99957980788036e-05, "loss": 0.5827, "step": 263 }, { "epoch": 0.03937653814602133, "grad_norm": 0.945839524269104, "learning_rate": 1.9995656857787384e-05, "loss": 0.6179, "step": 264 }, { "epoch": 0.039525691699604744, "grad_norm": 1.9057893753051758, "learning_rate": 1.999551330322098e-05, "loss": 0.6139, "step": 265 }, { "epoch": 0.03967484525318816, "grad_norm": 1.7867391109466553, "learning_rate": 1.9995367415137906e-05, "loss": 0.673, "step": 266 }, { "epoch": 0.03982399880677157, "grad_norm": 2.1136422157287598, "learning_rate": 1.9995219193572216e-05, "loss": 0.6706, "step": 267 }, { "epoch": 0.03997315236035499, "grad_norm": 1.7092766761779785, "learning_rate": 1.9995068638558522e-05, "loss": 0.7293, "step": 268 }, { "epoch": 0.0401223059139384, "grad_norm": 0.9085705280303955, "learning_rate": 1.999491575013196e-05, "loss": 0.6408, "step": 269 }, { "epoch": 0.040271459467521815, "grad_norm": 1.7525678873062134, "learning_rate": 1.9994760528328226e-05, "loss": 0.6188, "step": 270 }, { "epoch": 0.04042061302110523, "grad_norm": 0.9064345359802246, "learning_rate": 1.999460297318357e-05, "loss": 0.592, "step": 271 }, { "epoch": 0.040569766574688644, "grad_norm": 1.689382553100586, "learning_rate": 1.9994443084734754e-05, "loss": 0.6563, "step": 272 }, { "epoch": 0.04071892012827206, "grad_norm": 5.918508052825928, "learning_rate": 1.999428086301912e-05, "loss": 0.6128, "step": 273 }, { "epoch": 0.04086807368185547, "grad_norm": 1.8249995708465576, "learning_rate": 1.9994116308074532e-05, "loss": 0.7415, "step": 274 }, { "epoch": 0.04101722723543889, "grad_norm": 1.6792097091674805, "learning_rate": 1.9993949419939412e-05, "loss": 0.6715, "step": 275 }, { "epoch": 0.0411663807890223, "grad_norm": 1.5557347536087036, "learning_rate": 1.9993780198652716e-05, "loss": 0.6836, "step": 276 }, { "epoch": 0.041315534342605716, "grad_norm": 1.6975330114364624, "learning_rate": 1.9993608644253954e-05, "loss": 0.6943, "step": 277 }, { "epoch": 0.04146468789618913, "grad_norm": 1.9052762985229492, "learning_rate": 1.9993434756783173e-05, "loss": 0.5835, "step": 278 }, { "epoch": 0.041613841449772544, "grad_norm": 1.7515053749084473, "learning_rate": 1.999325853628097e-05, "loss": 0.6762, "step": 279 }, { "epoch": 0.04176299500335595, "grad_norm": 1.834414005279541, "learning_rate": 1.9993079982788486e-05, "loss": 0.6995, "step": 280 }, { "epoch": 0.041912148556939366, "grad_norm": 1.795310139656067, "learning_rate": 1.9992899096347403e-05, "loss": 0.7143, "step": 281 }, { "epoch": 0.04206130211052278, "grad_norm": 1.8297550678253174, "learning_rate": 1.9992715876999953e-05, "loss": 0.746, "step": 282 }, { "epoch": 0.042210455664106195, "grad_norm": 2.1686792373657227, "learning_rate": 1.9992530324788903e-05, "loss": 0.6, "step": 283 }, { "epoch": 0.04235960921768961, "grad_norm": 1.9349682331085205, "learning_rate": 1.999234243975758e-05, "loss": 0.7762, "step": 284 }, { "epoch": 0.04250876277127302, "grad_norm": 1.5931382179260254, "learning_rate": 1.9992152221949842e-05, "loss": 0.6931, "step": 285 }, { "epoch": 0.04265791632485644, "grad_norm": 1.7943066358566284, "learning_rate": 1.99919596714101e-05, "loss": 0.7198, "step": 286 }, { "epoch": 0.04280706987843985, "grad_norm": 1.5613676309585571, "learning_rate": 1.9991764788183303e-05, "loss": 0.704, "step": 287 }, { "epoch": 0.042956223432023266, "grad_norm": 1.742148518562317, "learning_rate": 1.9991567572314948e-05, "loss": 0.6283, "step": 288 }, { "epoch": 0.04310537698560668, "grad_norm": 1.7323133945465088, "learning_rate": 1.9991368023851078e-05, "loss": 0.603, "step": 289 }, { "epoch": 0.043254530539190095, "grad_norm": 1.8742575645446777, "learning_rate": 1.9991166142838276e-05, "loss": 0.7048, "step": 290 }, { "epoch": 0.04340368409277351, "grad_norm": 1.8628225326538086, "learning_rate": 1.9990961929323674e-05, "loss": 0.7252, "step": 291 }, { "epoch": 0.04355283764635692, "grad_norm": 1.8010063171386719, "learning_rate": 1.999075538335495e-05, "loss": 0.7591, "step": 292 }, { "epoch": 0.04370199119994034, "grad_norm": 1.547349214553833, "learning_rate": 1.9990546504980318e-05, "loss": 0.6899, "step": 293 }, { "epoch": 0.04385114475352375, "grad_norm": 1.7552978992462158, "learning_rate": 1.9990335294248543e-05, "loss": 0.7076, "step": 294 }, { "epoch": 0.044000298307107166, "grad_norm": 1.5564199686050415, "learning_rate": 1.999012175120894e-05, "loss": 0.6339, "step": 295 }, { "epoch": 0.04414945186069058, "grad_norm": 1.6074401140213013, "learning_rate": 1.9989905875911353e-05, "loss": 0.6982, "step": 296 }, { "epoch": 0.044298605414273995, "grad_norm": 1.6962569952011108, "learning_rate": 1.9989687668406184e-05, "loss": 0.6374, "step": 297 }, { "epoch": 0.04444775896785741, "grad_norm": 1.758829951286316, "learning_rate": 1.998946712874438e-05, "loss": 0.7177, "step": 298 }, { "epoch": 0.044596912521440824, "grad_norm": 1.6178417205810547, "learning_rate": 1.9989244256977415e-05, "loss": 0.6831, "step": 299 }, { "epoch": 0.04474606607502424, "grad_norm": 0.9710322618484497, "learning_rate": 1.998901905315733e-05, "loss": 0.6479, "step": 300 }, { "epoch": 0.04489521962860765, "grad_norm": 1.7336548566818237, "learning_rate": 1.99887915173367e-05, "loss": 0.6309, "step": 301 }, { "epoch": 0.045044373182191066, "grad_norm": 1.655758261680603, "learning_rate": 1.9988561649568636e-05, "loss": 0.6875, "step": 302 }, { "epoch": 0.04519352673577448, "grad_norm": 1.5989564657211304, "learning_rate": 1.998832944990681e-05, "loss": 0.6796, "step": 303 }, { "epoch": 0.045342680289357895, "grad_norm": 1.759684443473816, "learning_rate": 1.9988094918405427e-05, "loss": 0.6817, "step": 304 }, { "epoch": 0.04549183384294131, "grad_norm": 1.9051567316055298, "learning_rate": 1.9987858055119243e-05, "loss": 0.7153, "step": 305 }, { "epoch": 0.045640987396524724, "grad_norm": 1.590834379196167, "learning_rate": 1.9987618860103554e-05, "loss": 0.6612, "step": 306 }, { "epoch": 0.04579014095010814, "grad_norm": 1.8708422183990479, "learning_rate": 1.9987377333414203e-05, "loss": 0.6505, "step": 307 }, { "epoch": 0.04593929450369155, "grad_norm": 1.620811104774475, "learning_rate": 1.998713347510757e-05, "loss": 0.7304, "step": 308 }, { "epoch": 0.04608844805727497, "grad_norm": 1.8140910863876343, "learning_rate": 1.9986887285240592e-05, "loss": 0.6616, "step": 309 }, { "epoch": 0.04623760161085838, "grad_norm": 1.9545270204544067, "learning_rate": 1.998663876387074e-05, "loss": 0.7244, "step": 310 }, { "epoch": 0.046386755164441795, "grad_norm": 1.7029718160629272, "learning_rate": 1.9986387911056034e-05, "loss": 0.7491, "step": 311 }, { "epoch": 0.04653590871802521, "grad_norm": 1.9743213653564453, "learning_rate": 1.9986134726855036e-05, "loss": 0.7271, "step": 312 }, { "epoch": 0.046685062271608624, "grad_norm": 1.7432786226272583, "learning_rate": 1.9985879211326857e-05, "loss": 0.6876, "step": 313 }, { "epoch": 0.04683421582519204, "grad_norm": 1.528729796409607, "learning_rate": 1.9985621364531144e-05, "loss": 0.6007, "step": 314 }, { "epoch": 0.04698336937877545, "grad_norm": 1.8192017078399658, "learning_rate": 1.9985361186528097e-05, "loss": 0.6983, "step": 315 }, { "epoch": 0.04713252293235887, "grad_norm": 1.8742650747299194, "learning_rate": 1.9985098677378456e-05, "loss": 0.7005, "step": 316 }, { "epoch": 0.047281676485942274, "grad_norm": 1.9126719236373901, "learning_rate": 1.99848338371435e-05, "loss": 0.5241, "step": 317 }, { "epoch": 0.04743083003952569, "grad_norm": 1.7431330680847168, "learning_rate": 1.9984566665885064e-05, "loss": 0.6252, "step": 318 }, { "epoch": 0.0475799835931091, "grad_norm": 1.6938427686691284, "learning_rate": 1.9984297163665518e-05, "loss": 0.6724, "step": 319 }, { "epoch": 0.04772913714669252, "grad_norm": 2.319845676422119, "learning_rate": 1.998402533054778e-05, "loss": 0.7634, "step": 320 }, { "epoch": 0.04787829070027593, "grad_norm": 1.871701955795288, "learning_rate": 1.998375116659531e-05, "loss": 0.7144, "step": 321 }, { "epoch": 0.048027444253859346, "grad_norm": 1.608954668045044, "learning_rate": 1.9983474671872112e-05, "loss": 0.6903, "step": 322 }, { "epoch": 0.04817659780744276, "grad_norm": 1.6130424737930298, "learning_rate": 1.998319584644274e-05, "loss": 0.657, "step": 323 }, { "epoch": 0.048325751361026174, "grad_norm": 1.6585006713867188, "learning_rate": 1.9982914690372282e-05, "loss": 0.6751, "step": 324 }, { "epoch": 0.04847490491460959, "grad_norm": 1.7420029640197754, "learning_rate": 1.9982631203726385e-05, "loss": 0.7124, "step": 325 }, { "epoch": 0.048624058468193, "grad_norm": 1.534053921699524, "learning_rate": 1.9982345386571217e-05, "loss": 0.6428, "step": 326 }, { "epoch": 0.04877321202177642, "grad_norm": 1.7740097045898438, "learning_rate": 1.9982057238973516e-05, "loss": 0.5917, "step": 327 }, { "epoch": 0.04892236557535983, "grad_norm": 1.9452265501022339, "learning_rate": 1.998176676100055e-05, "loss": 0.6266, "step": 328 }, { "epoch": 0.049071519128943246, "grad_norm": 1.811617136001587, "learning_rate": 1.9981473952720122e-05, "loss": 0.6402, "step": 329 }, { "epoch": 0.04922067268252666, "grad_norm": 1.4412835836410522, "learning_rate": 1.9981178814200603e-05, "loss": 0.632, "step": 330 }, { "epoch": 0.049369826236110075, "grad_norm": 1.8284388780593872, "learning_rate": 1.998088134551089e-05, "loss": 0.7324, "step": 331 }, { "epoch": 0.04951897978969349, "grad_norm": 1.7711963653564453, "learning_rate": 1.998058154672043e-05, "loss": 0.6594, "step": 332 }, { "epoch": 0.0496681333432769, "grad_norm": 1.6581401824951172, "learning_rate": 1.998027941789921e-05, "loss": 0.7524, "step": 333 }, { "epoch": 0.04981728689686032, "grad_norm": 1.8812575340270996, "learning_rate": 1.997997495911777e-05, "loss": 0.7379, "step": 334 }, { "epoch": 0.04996644045044373, "grad_norm": 1.630303978919983, "learning_rate": 1.9979668170447176e-05, "loss": 0.6875, "step": 335 }, { "epoch": 0.050115594004027146, "grad_norm": 1.566950798034668, "learning_rate": 1.9979359051959063e-05, "loss": 0.6288, "step": 336 }, { "epoch": 0.05026474755761056, "grad_norm": 1.7833932638168335, "learning_rate": 1.997904760372559e-05, "loss": 0.6287, "step": 337 }, { "epoch": 0.050413901111193975, "grad_norm": 1.4972172975540161, "learning_rate": 1.997873382581947e-05, "loss": 0.6406, "step": 338 }, { "epoch": 0.05056305466477739, "grad_norm": 1.9563583135604858, "learning_rate": 1.9978417718313953e-05, "loss": 0.7044, "step": 339 }, { "epoch": 0.0507122082183608, "grad_norm": 1.7578548192977905, "learning_rate": 1.997809928128284e-05, "loss": 0.7192, "step": 340 }, { "epoch": 0.05086136177194422, "grad_norm": 1.8881940841674805, "learning_rate": 1.9977778514800462e-05, "loss": 0.7352, "step": 341 }, { "epoch": 0.05101051532552763, "grad_norm": 1.704276204109192, "learning_rate": 1.997745541894172e-05, "loss": 0.7321, "step": 342 }, { "epoch": 0.051159668879111046, "grad_norm": 1.8175841569900513, "learning_rate": 1.997712999378203e-05, "loss": 0.677, "step": 343 }, { "epoch": 0.05130882243269446, "grad_norm": 1.9780632257461548, "learning_rate": 1.9976802239397373e-05, "loss": 0.7508, "step": 344 }, { "epoch": 0.051457975986277875, "grad_norm": 1.8277007341384888, "learning_rate": 1.9976472155864258e-05, "loss": 0.687, "step": 345 }, { "epoch": 0.05160712953986129, "grad_norm": 1.5946344137191772, "learning_rate": 1.997613974325975e-05, "loss": 0.6768, "step": 346 }, { "epoch": 0.051756283093444704, "grad_norm": 1.9946075677871704, "learning_rate": 1.997580500166145e-05, "loss": 0.7311, "step": 347 }, { "epoch": 0.05190543664702812, "grad_norm": 1.6564384698867798, "learning_rate": 1.9975467931147512e-05, "loss": 0.6745, "step": 348 }, { "epoch": 0.05205459020061153, "grad_norm": 2.0186665058135986, "learning_rate": 1.997512853179662e-05, "loss": 0.6911, "step": 349 }, { "epoch": 0.052203743754194946, "grad_norm": 1.6721726655960083, "learning_rate": 1.997478680368801e-05, "loss": 0.6945, "step": 350 }, { "epoch": 0.05235289730777836, "grad_norm": 1.6082340478897095, "learning_rate": 1.9974442746901464e-05, "loss": 0.6293, "step": 351 }, { "epoch": 0.052502050861361775, "grad_norm": 3.0723226070404053, "learning_rate": 1.9974096361517302e-05, "loss": 0.7003, "step": 352 }, { "epoch": 0.05265120441494519, "grad_norm": 4.430475234985352, "learning_rate": 1.9973747647616387e-05, "loss": 0.7422, "step": 353 }, { "epoch": 0.0528003579685286, "grad_norm": 1.7926065921783447, "learning_rate": 1.9973396605280135e-05, "loss": 0.7012, "step": 354 }, { "epoch": 0.05294951152211201, "grad_norm": 1.9095968008041382, "learning_rate": 1.9973043234590495e-05, "loss": 0.6983, "step": 355 }, { "epoch": 0.053098665075695425, "grad_norm": 1.7210172414779663, "learning_rate": 1.9972687535629962e-05, "loss": 0.6448, "step": 356 }, { "epoch": 0.05324781862927884, "grad_norm": 1.6274832487106323, "learning_rate": 1.997232950848158e-05, "loss": 0.6517, "step": 357 }, { "epoch": 0.053396972182862254, "grad_norm": 1.6906979084014893, "learning_rate": 1.9971969153228934e-05, "loss": 0.6855, "step": 358 }, { "epoch": 0.05354612573644567, "grad_norm": 1.5578092336654663, "learning_rate": 1.9971606469956146e-05, "loss": 0.5922, "step": 359 }, { "epoch": 0.05369527929002908, "grad_norm": 1.6812094449996948, "learning_rate": 1.997124145874789e-05, "loss": 0.6564, "step": 360 }, { "epoch": 0.0538444328436125, "grad_norm": 1.4542714357376099, "learning_rate": 1.997087411968938e-05, "loss": 0.7163, "step": 361 }, { "epoch": 0.05399358639719591, "grad_norm": 1.8421802520751953, "learning_rate": 1.997050445286637e-05, "loss": 0.6806, "step": 362 }, { "epoch": 0.054142739950779326, "grad_norm": 1.5713063478469849, "learning_rate": 1.9970132458365165e-05, "loss": 0.6678, "step": 363 }, { "epoch": 0.05429189350436274, "grad_norm": 1.5525243282318115, "learning_rate": 1.9969758136272614e-05, "loss": 0.5847, "step": 364 }, { "epoch": 0.054441047057946154, "grad_norm": 2.168562412261963, "learning_rate": 1.9969381486676092e-05, "loss": 0.6186, "step": 365 }, { "epoch": 0.05459020061152957, "grad_norm": 1.5176540613174438, "learning_rate": 1.9969002509663543e-05, "loss": 0.6394, "step": 366 }, { "epoch": 0.05473935416511298, "grad_norm": 1.7966879606246948, "learning_rate": 1.9968621205323434e-05, "loss": 0.6703, "step": 367 }, { "epoch": 0.0548885077186964, "grad_norm": 2.400258779525757, "learning_rate": 1.9968237573744788e-05, "loss": 0.6129, "step": 368 }, { "epoch": 0.05503766127227981, "grad_norm": 1.5387853384017944, "learning_rate": 1.9967851615017164e-05, "loss": 0.6635, "step": 369 }, { "epoch": 0.055186814825863226, "grad_norm": 1.947782278060913, "learning_rate": 1.9967463329230665e-05, "loss": 0.6853, "step": 370 }, { "epoch": 0.05533596837944664, "grad_norm": 1.5605324506759644, "learning_rate": 1.9967072716475938e-05, "loss": 0.6153, "step": 371 }, { "epoch": 0.055485121933030054, "grad_norm": 1.6440119743347168, "learning_rate": 1.996667977684418e-05, "loss": 0.6398, "step": 372 }, { "epoch": 0.05563427548661347, "grad_norm": 1.650449514389038, "learning_rate": 1.9966284510427118e-05, "loss": 0.7317, "step": 373 }, { "epoch": 0.05578342904019688, "grad_norm": 0.9983797073364258, "learning_rate": 1.9965886917317034e-05, "loss": 0.6257, "step": 374 }, { "epoch": 0.0559325825937803, "grad_norm": 1.9520258903503418, "learning_rate": 1.9965486997606747e-05, "loss": 0.6683, "step": 375 }, { "epoch": 0.05608173614736371, "grad_norm": 1.760480523109436, "learning_rate": 1.996508475138962e-05, "loss": 0.6317, "step": 376 }, { "epoch": 0.056230889700947126, "grad_norm": 2.3690285682678223, "learning_rate": 1.9964680178759565e-05, "loss": 0.6598, "step": 377 }, { "epoch": 0.05638004325453054, "grad_norm": 2.67075777053833, "learning_rate": 1.9964273279811026e-05, "loss": 0.7498, "step": 378 }, { "epoch": 0.056529196808113955, "grad_norm": 0.9335999488830566, "learning_rate": 1.9963864054639e-05, "loss": 0.6267, "step": 379 }, { "epoch": 0.05667835036169737, "grad_norm": 1.7975751161575317, "learning_rate": 1.996345250333902e-05, "loss": 0.6682, "step": 380 }, { "epoch": 0.05682750391528078, "grad_norm": 1.7660634517669678, "learning_rate": 1.996303862600717e-05, "loss": 0.6867, "step": 381 }, { "epoch": 0.0569766574688642, "grad_norm": 1.665769338607788, "learning_rate": 1.9962622422740067e-05, "loss": 0.6812, "step": 382 }, { "epoch": 0.05712581102244761, "grad_norm": 1.8262207508087158, "learning_rate": 1.996220389363488e-05, "loss": 0.5901, "step": 383 }, { "epoch": 0.057274964576031026, "grad_norm": 1.6281687021255493, "learning_rate": 1.9961783038789314e-05, "loss": 0.6812, "step": 384 }, { "epoch": 0.05742411812961444, "grad_norm": 1.859866976737976, "learning_rate": 1.9961359858301622e-05, "loss": 0.5931, "step": 385 }, { "epoch": 0.057573271683197855, "grad_norm": 1.8121986389160156, "learning_rate": 1.99609343522706e-05, "loss": 0.7031, "step": 386 }, { "epoch": 0.05772242523678127, "grad_norm": 1.6583836078643799, "learning_rate": 1.9960506520795585e-05, "loss": 0.7203, "step": 387 }, { "epoch": 0.05787157879036468, "grad_norm": 1.7455133199691772, "learning_rate": 1.9960076363976454e-05, "loss": 0.6401, "step": 388 }, { "epoch": 0.0580207323439481, "grad_norm": 1.8479375839233398, "learning_rate": 1.995964388191363e-05, "loss": 0.6756, "step": 389 }, { "epoch": 0.05816988589753151, "grad_norm": 2.093024969100952, "learning_rate": 1.9959209074708084e-05, "loss": 0.5751, "step": 390 }, { "epoch": 0.058319039451114926, "grad_norm": 1.9123907089233398, "learning_rate": 1.995877194246132e-05, "loss": 0.6946, "step": 391 }, { "epoch": 0.058468193004698334, "grad_norm": 1.8215664625167847, "learning_rate": 1.9958332485275386e-05, "loss": 0.5982, "step": 392 }, { "epoch": 0.05861734655828175, "grad_norm": 1.7527741193771362, "learning_rate": 1.9957890703252882e-05, "loss": 0.6453, "step": 393 }, { "epoch": 0.05876650011186516, "grad_norm": 1.504239797592163, "learning_rate": 1.9957446596496945e-05, "loss": 0.5988, "step": 394 }, { "epoch": 0.05891565366544858, "grad_norm": 1.7933719158172607, "learning_rate": 1.995700016511125e-05, "loss": 0.6817, "step": 395 }, { "epoch": 0.05906480721903199, "grad_norm": 1.55682373046875, "learning_rate": 1.995655140920002e-05, "loss": 0.6272, "step": 396 }, { "epoch": 0.059213960772615405, "grad_norm": 1.997829556465149, "learning_rate": 1.995610032886803e-05, "loss": 0.6589, "step": 397 }, { "epoch": 0.05936311432619882, "grad_norm": 1.7128522396087646, "learning_rate": 1.995564692422057e-05, "loss": 0.6675, "step": 398 }, { "epoch": 0.059512267879782234, "grad_norm": 1.9805389642715454, "learning_rate": 1.9955191195363505e-05, "loss": 0.6816, "step": 399 }, { "epoch": 0.05966142143336565, "grad_norm": 1.6636357307434082, "learning_rate": 1.995473314240322e-05, "loss": 0.5775, "step": 400 }, { "epoch": 0.05981057498694906, "grad_norm": 1.8860646486282349, "learning_rate": 1.9954272765446656e-05, "loss": 0.6918, "step": 401 }, { "epoch": 0.05995972854053248, "grad_norm": 1.3999172449111938, "learning_rate": 1.9953810064601284e-05, "loss": 0.6598, "step": 402 }, { "epoch": 0.06010888209411589, "grad_norm": 1.7088838815689087, "learning_rate": 1.995334503997513e-05, "loss": 0.6943, "step": 403 }, { "epoch": 0.060258035647699305, "grad_norm": 1.741254448890686, "learning_rate": 1.9952877691676754e-05, "loss": 0.5539, "step": 404 }, { "epoch": 0.06040718920128272, "grad_norm": 1.7632094621658325, "learning_rate": 1.9952408019815266e-05, "loss": 0.6658, "step": 405 }, { "epoch": 0.060556342754866134, "grad_norm": 1.7868708372116089, "learning_rate": 1.9951936024500306e-05, "loss": 0.7108, "step": 406 }, { "epoch": 0.06070549630844955, "grad_norm": 1.8950071334838867, "learning_rate": 1.9951461705842073e-05, "loss": 0.6769, "step": 407 }, { "epoch": 0.06085464986203296, "grad_norm": 1.653320074081421, "learning_rate": 1.995098506395129e-05, "loss": 0.6766, "step": 408 }, { "epoch": 0.06100380341561638, "grad_norm": 1.6158607006072998, "learning_rate": 1.9950506098939243e-05, "loss": 0.6617, "step": 409 }, { "epoch": 0.06115295696919979, "grad_norm": 1.7413448095321655, "learning_rate": 1.9950024810917745e-05, "loss": 0.642, "step": 410 }, { "epoch": 0.061302110522783206, "grad_norm": 1.8516916036605835, "learning_rate": 1.994954119999915e-05, "loss": 0.6541, "step": 411 }, { "epoch": 0.06145126407636662, "grad_norm": 1.692361831665039, "learning_rate": 1.994905526629637e-05, "loss": 0.6558, "step": 412 }, { "epoch": 0.061600417629950034, "grad_norm": 1.6203035116195679, "learning_rate": 1.9948567009922842e-05, "loss": 0.6876, "step": 413 }, { "epoch": 0.06174957118353345, "grad_norm": 1.4859381914138794, "learning_rate": 1.9948076430992557e-05, "loss": 0.6658, "step": 414 }, { "epoch": 0.06189872473711686, "grad_norm": 1.5086331367492676, "learning_rate": 1.9947583529620038e-05, "loss": 0.5785, "step": 415 }, { "epoch": 0.06204787829070028, "grad_norm": 1.9302666187286377, "learning_rate": 1.994708830592036e-05, "loss": 0.6036, "step": 416 }, { "epoch": 0.06219703184428369, "grad_norm": 1.684299349784851, "learning_rate": 1.9946590760009137e-05, "loss": 0.6895, "step": 417 }, { "epoch": 0.062346185397867106, "grad_norm": 1.8487755060195923, "learning_rate": 1.9946090892002524e-05, "loss": 0.7083, "step": 418 }, { "epoch": 0.06249533895145052, "grad_norm": 1.6743108034133911, "learning_rate": 1.9945588702017215e-05, "loss": 0.6089, "step": 419 }, { "epoch": 0.06264449250503393, "grad_norm": 1.5953309535980225, "learning_rate": 1.9945084190170456e-05, "loss": 0.6289, "step": 420 }, { "epoch": 0.06279364605861734, "grad_norm": 1.4322932958602905, "learning_rate": 1.9944577356580023e-05, "loss": 0.6578, "step": 421 }, { "epoch": 0.06294279961220076, "grad_norm": 1.5158798694610596, "learning_rate": 1.9944068201364238e-05, "loss": 0.7161, "step": 422 }, { "epoch": 0.06309195316578417, "grad_norm": 0.9372950792312622, "learning_rate": 1.9943556724641975e-05, "loss": 0.5857, "step": 423 }, { "epoch": 0.06324110671936758, "grad_norm": 1.9623982906341553, "learning_rate": 1.9943042926532634e-05, "loss": 0.6803, "step": 424 }, { "epoch": 0.063390260272951, "grad_norm": 0.9115292429924011, "learning_rate": 1.9942526807156166e-05, "loss": 0.6338, "step": 425 }, { "epoch": 0.06353941382653441, "grad_norm": 2.170140266418457, "learning_rate": 1.9942008366633063e-05, "loss": 0.5681, "step": 426 }, { "epoch": 0.06368856738011783, "grad_norm": 1.7907644510269165, "learning_rate": 1.994148760508436e-05, "loss": 0.7485, "step": 427 }, { "epoch": 0.06383772093370124, "grad_norm": 1.6841803789138794, "learning_rate": 1.994096452263163e-05, "loss": 0.6409, "step": 428 }, { "epoch": 0.06398687448728466, "grad_norm": 1.751373529434204, "learning_rate": 1.9940439119396985e-05, "loss": 0.6005, "step": 429 }, { "epoch": 0.06413602804086807, "grad_norm": 1.6386817693710327, "learning_rate": 1.9939911395503094e-05, "loss": 0.6299, "step": 430 }, { "epoch": 0.06428518159445148, "grad_norm": 1.8094133138656616, "learning_rate": 1.9939381351073153e-05, "loss": 0.6252, "step": 431 }, { "epoch": 0.0644343351480349, "grad_norm": 2.282388925552368, "learning_rate": 1.9938848986230904e-05, "loss": 0.6061, "step": 432 }, { "epoch": 0.06458348870161831, "grad_norm": 1.7226208448410034, "learning_rate": 1.993831430110063e-05, "loss": 0.7372, "step": 433 }, { "epoch": 0.06473264225520173, "grad_norm": 2.0204222202301025, "learning_rate": 1.9937777295807156e-05, "loss": 0.686, "step": 434 }, { "epoch": 0.06488179580878514, "grad_norm": 0.9400020241737366, "learning_rate": 1.9937237970475857e-05, "loss": 0.5749, "step": 435 }, { "epoch": 0.06503094936236856, "grad_norm": 0.9852834939956665, "learning_rate": 1.993669632523263e-05, "loss": 0.6391, "step": 436 }, { "epoch": 0.06518010291595197, "grad_norm": 1.7794992923736572, "learning_rate": 1.993615236020393e-05, "loss": 0.7029, "step": 437 }, { "epoch": 0.06532925646953539, "grad_norm": 0.964280903339386, "learning_rate": 1.9935606075516754e-05, "loss": 0.6221, "step": 438 }, { "epoch": 0.0654784100231188, "grad_norm": 1.728346824645996, "learning_rate": 1.9935057471298633e-05, "loss": 0.6612, "step": 439 }, { "epoch": 0.06562756357670221, "grad_norm": 1.733012318611145, "learning_rate": 1.993450654767764e-05, "loss": 0.7926, "step": 440 }, { "epoch": 0.06577671713028563, "grad_norm": 1.3469960689544678, "learning_rate": 1.993395330478239e-05, "loss": 0.6557, "step": 441 }, { "epoch": 0.06592587068386904, "grad_norm": 1.592687726020813, "learning_rate": 1.993339774274205e-05, "loss": 0.701, "step": 442 }, { "epoch": 0.06607502423745246, "grad_norm": 1.8214818239212036, "learning_rate": 1.993283986168631e-05, "loss": 0.6305, "step": 443 }, { "epoch": 0.06622417779103587, "grad_norm": 1.627408504486084, "learning_rate": 1.9932279661745416e-05, "loss": 0.5097, "step": 444 }, { "epoch": 0.06637333134461929, "grad_norm": 1.7463032007217407, "learning_rate": 1.9931717143050147e-05, "loss": 0.6544, "step": 445 }, { "epoch": 0.0665224848982027, "grad_norm": 1.6122454404830933, "learning_rate": 1.9931152305731828e-05, "loss": 0.6049, "step": 446 }, { "epoch": 0.06667163845178611, "grad_norm": 1.6395748853683472, "learning_rate": 1.9930585149922325e-05, "loss": 0.6615, "step": 447 }, { "epoch": 0.06682079200536953, "grad_norm": 1.094444751739502, "learning_rate": 1.9930015675754047e-05, "loss": 0.6349, "step": 448 }, { "epoch": 0.06696994555895294, "grad_norm": 1.569772481918335, "learning_rate": 1.9929443883359934e-05, "loss": 0.6104, "step": 449 }, { "epoch": 0.06711909911253636, "grad_norm": 1.7605676651000977, "learning_rate": 1.992886977287348e-05, "loss": 0.6532, "step": 450 }, { "epoch": 0.06726825266611977, "grad_norm": 1.7532607316970825, "learning_rate": 1.9928293344428714e-05, "loss": 0.624, "step": 451 }, { "epoch": 0.06741740621970319, "grad_norm": 1.6727925539016724, "learning_rate": 1.9927714598160204e-05, "loss": 0.6426, "step": 452 }, { "epoch": 0.0675665597732866, "grad_norm": 1.5993363857269287, "learning_rate": 1.9927133534203064e-05, "loss": 0.5968, "step": 453 }, { "epoch": 0.06771571332687001, "grad_norm": 1.6163758039474487, "learning_rate": 1.992655015269295e-05, "loss": 0.6698, "step": 454 }, { "epoch": 0.06786486688045343, "grad_norm": 1.4537652730941772, "learning_rate": 1.992596445376605e-05, "loss": 0.6456, "step": 455 }, { "epoch": 0.06801402043403684, "grad_norm": 2.1640355587005615, "learning_rate": 1.9925376437559106e-05, "loss": 0.6924, "step": 456 }, { "epoch": 0.06816317398762026, "grad_norm": 1.479079008102417, "learning_rate": 1.992478610420939e-05, "loss": 0.5863, "step": 457 }, { "epoch": 0.06831232754120367, "grad_norm": 1.5800102949142456, "learning_rate": 1.992419345385472e-05, "loss": 0.6544, "step": 458 }, { "epoch": 0.06846148109478709, "grad_norm": 1.6458271741867065, "learning_rate": 1.992359848663345e-05, "loss": 0.6087, "step": 459 }, { "epoch": 0.0686106346483705, "grad_norm": 1.6239736080169678, "learning_rate": 1.992300120268449e-05, "loss": 0.6775, "step": 460 }, { "epoch": 0.06875978820195391, "grad_norm": 1.945278525352478, "learning_rate": 1.9922401602147266e-05, "loss": 0.6788, "step": 461 }, { "epoch": 0.06890894175553733, "grad_norm": 1.7496908903121948, "learning_rate": 1.992179968516177e-05, "loss": 0.6714, "step": 462 }, { "epoch": 0.06905809530912074, "grad_norm": 1.6961671113967896, "learning_rate": 1.9921195451868514e-05, "loss": 0.6628, "step": 463 }, { "epoch": 0.06920724886270416, "grad_norm": 1.5949809551239014, "learning_rate": 1.9920588902408567e-05, "loss": 0.7165, "step": 464 }, { "epoch": 0.06935640241628757, "grad_norm": 1.533654808998108, "learning_rate": 1.991998003692353e-05, "loss": 0.6439, "step": 465 }, { "epoch": 0.06950555596987099, "grad_norm": 2.0545060634613037, "learning_rate": 1.9919368855555546e-05, "loss": 0.5611, "step": 466 }, { "epoch": 0.0696547095234544, "grad_norm": 1.690652847290039, "learning_rate": 1.9918755358447298e-05, "loss": 0.6946, "step": 467 }, { "epoch": 0.06980386307703781, "grad_norm": 1.5265007019042969, "learning_rate": 1.991813954574201e-05, "loss": 0.6232, "step": 468 }, { "epoch": 0.06995301663062123, "grad_norm": 1.7482165098190308, "learning_rate": 1.9917521417583456e-05, "loss": 0.5999, "step": 469 }, { "epoch": 0.07010217018420464, "grad_norm": 1.495908498764038, "learning_rate": 1.9916900974115932e-05, "loss": 0.6148, "step": 470 }, { "epoch": 0.07025132373778806, "grad_norm": 1.657746434211731, "learning_rate": 1.9916278215484288e-05, "loss": 0.6627, "step": 471 }, { "epoch": 0.07040047729137147, "grad_norm": 0.9675084352493286, "learning_rate": 1.991565314183391e-05, "loss": 0.6097, "step": 472 }, { "epoch": 0.07054963084495489, "grad_norm": 1.6570855379104614, "learning_rate": 1.9915025753310727e-05, "loss": 0.6851, "step": 473 }, { "epoch": 0.0706987843985383, "grad_norm": 1.5346611738204956, "learning_rate": 1.9914396050061212e-05, "loss": 0.6398, "step": 474 }, { "epoch": 0.07084793795212171, "grad_norm": 1.6426540613174438, "learning_rate": 1.9913764032232362e-05, "loss": 0.6328, "step": 475 }, { "epoch": 0.07099709150570513, "grad_norm": 2.075326681137085, "learning_rate": 1.991312969997173e-05, "loss": 0.6032, "step": 476 }, { "epoch": 0.07114624505928854, "grad_norm": 1.7251144647598267, "learning_rate": 1.991249305342741e-05, "loss": 0.677, "step": 477 }, { "epoch": 0.07129539861287196, "grad_norm": 1.7077943086624146, "learning_rate": 1.9911854092748023e-05, "loss": 0.6616, "step": 478 }, { "epoch": 0.07144455216645537, "grad_norm": 1.6846380233764648, "learning_rate": 1.9911212818082746e-05, "loss": 0.6302, "step": 479 }, { "epoch": 0.07159370572003879, "grad_norm": 3.0815391540527344, "learning_rate": 1.9910569229581288e-05, "loss": 0.6424, "step": 480 }, { "epoch": 0.0717428592736222, "grad_norm": 1.0207934379577637, "learning_rate": 1.990992332739389e-05, "loss": 0.6445, "step": 481 }, { "epoch": 0.07189201282720561, "grad_norm": 0.9705840945243835, "learning_rate": 1.9909275111671354e-05, "loss": 0.6357, "step": 482 }, { "epoch": 0.07204116638078903, "grad_norm": 1.8309470415115356, "learning_rate": 1.9908624582565002e-05, "loss": 0.5882, "step": 483 }, { "epoch": 0.07219031993437244, "grad_norm": 1.5031229257583618, "learning_rate": 1.9907971740226708e-05, "loss": 0.622, "step": 484 }, { "epoch": 0.07233947348795584, "grad_norm": 1.6272138357162476, "learning_rate": 1.990731658480888e-05, "loss": 0.6444, "step": 485 }, { "epoch": 0.07248862704153926, "grad_norm": 1.5642133951187134, "learning_rate": 1.9906659116464467e-05, "loss": 0.6886, "step": 486 }, { "epoch": 0.07263778059512267, "grad_norm": 1.6990296840667725, "learning_rate": 1.9905999335346967e-05, "loss": 0.6869, "step": 487 }, { "epoch": 0.07278693414870609, "grad_norm": 1.7603625059127808, "learning_rate": 1.99053372416104e-05, "loss": 0.7116, "step": 488 }, { "epoch": 0.0729360877022895, "grad_norm": 1.5168344974517822, "learning_rate": 1.990467283540934e-05, "loss": 0.6749, "step": 489 }, { "epoch": 0.07308524125587292, "grad_norm": 1.5461933612823486, "learning_rate": 1.9904006116898903e-05, "loss": 0.6746, "step": 490 }, { "epoch": 0.07323439480945633, "grad_norm": 1.6643576622009277, "learning_rate": 1.990333708623473e-05, "loss": 0.6274, "step": 491 }, { "epoch": 0.07338354836303974, "grad_norm": 1.5289549827575684, "learning_rate": 1.9902665743573012e-05, "loss": 0.6561, "step": 492 }, { "epoch": 0.07353270191662316, "grad_norm": 1.5743873119354248, "learning_rate": 1.9901992089070483e-05, "loss": 0.6542, "step": 493 }, { "epoch": 0.07368185547020657, "grad_norm": 1.7291470766067505, "learning_rate": 1.9901316122884405e-05, "loss": 0.6062, "step": 494 }, { "epoch": 0.07383100902378999, "grad_norm": 2.0846285820007324, "learning_rate": 1.9900637845172594e-05, "loss": 0.58, "step": 495 }, { "epoch": 0.0739801625773734, "grad_norm": 1.4196741580963135, "learning_rate": 1.9899957256093393e-05, "loss": 0.5982, "step": 496 }, { "epoch": 0.07412931613095682, "grad_norm": 1.764605164527893, "learning_rate": 1.989927435580569e-05, "loss": 0.6588, "step": 497 }, { "epoch": 0.07427846968454023, "grad_norm": 1.6349411010742188, "learning_rate": 1.9898589144468916e-05, "loss": 0.6958, "step": 498 }, { "epoch": 0.07442762323812364, "grad_norm": 1.8132356405258179, "learning_rate": 1.9897901622243038e-05, "loss": 0.755, "step": 499 }, { "epoch": 0.07457677679170706, "grad_norm": 1.4702504873275757, "learning_rate": 1.9897211789288556e-05, "loss": 0.627, "step": 500 }, { "epoch": 0.07472593034529047, "grad_norm": 1.652934193611145, "learning_rate": 1.989651964576653e-05, "loss": 0.6147, "step": 501 }, { "epoch": 0.07487508389887389, "grad_norm": 1.756883978843689, "learning_rate": 1.9895825191838524e-05, "loss": 0.7263, "step": 502 }, { "epoch": 0.0750242374524573, "grad_norm": 1.1431188583374023, "learning_rate": 1.989512842766668e-05, "loss": 0.6178, "step": 503 }, { "epoch": 0.07517339100604072, "grad_norm": 1.7256568670272827, "learning_rate": 1.989442935341366e-05, "loss": 0.5597, "step": 504 }, { "epoch": 0.07532254455962413, "grad_norm": 1.6098023653030396, "learning_rate": 1.9893727969242657e-05, "loss": 0.6395, "step": 505 }, { "epoch": 0.07547169811320754, "grad_norm": 1.7647732496261597, "learning_rate": 1.9893024275317424e-05, "loss": 0.6544, "step": 506 }, { "epoch": 0.07562085166679096, "grad_norm": 1.6159120798110962, "learning_rate": 1.989231827180224e-05, "loss": 0.6337, "step": 507 }, { "epoch": 0.07577000522037437, "grad_norm": 1.6595507860183716, "learning_rate": 1.9891609958861926e-05, "loss": 0.711, "step": 508 }, { "epoch": 0.07591915877395779, "grad_norm": 1.6622313261032104, "learning_rate": 1.989089933666184e-05, "loss": 0.7888, "step": 509 }, { "epoch": 0.0760683123275412, "grad_norm": 1.676362156867981, "learning_rate": 1.9890186405367884e-05, "loss": 0.6429, "step": 510 }, { "epoch": 0.07621746588112462, "grad_norm": 1.751158595085144, "learning_rate": 1.9889471165146495e-05, "loss": 0.6911, "step": 511 }, { "epoch": 0.07636661943470803, "grad_norm": 1.4496623277664185, "learning_rate": 1.988875361616465e-05, "loss": 0.6286, "step": 512 }, { "epoch": 0.07651577298829144, "grad_norm": 1.6740806102752686, "learning_rate": 1.988803375858987e-05, "loss": 0.651, "step": 513 }, { "epoch": 0.07666492654187486, "grad_norm": 1.6429758071899414, "learning_rate": 1.9887311592590205e-05, "loss": 0.6659, "step": 514 }, { "epoch": 0.07681408009545827, "grad_norm": 1.5839036703109741, "learning_rate": 1.9886587118334248e-05, "loss": 0.62, "step": 515 }, { "epoch": 0.07696323364904169, "grad_norm": 1.6791614294052124, "learning_rate": 1.9885860335991136e-05, "loss": 0.6833, "step": 516 }, { "epoch": 0.0771123872026251, "grad_norm": 1.4896997213363647, "learning_rate": 1.988513124573054e-05, "loss": 0.6326, "step": 517 }, { "epoch": 0.07726154075620852, "grad_norm": 1.5892245769500732, "learning_rate": 1.9884399847722676e-05, "loss": 0.7278, "step": 518 }, { "epoch": 0.07741069430979193, "grad_norm": 2.1928975582122803, "learning_rate": 1.9883666142138282e-05, "loss": 0.6592, "step": 519 }, { "epoch": 0.07755984786337534, "grad_norm": 1.6396502256393433, "learning_rate": 1.9882930129148653e-05, "loss": 0.5904, "step": 520 }, { "epoch": 0.07770900141695876, "grad_norm": 1.6556516885757446, "learning_rate": 1.988219180892562e-05, "loss": 0.7258, "step": 521 }, { "epoch": 0.07785815497054217, "grad_norm": 1.478659987449646, "learning_rate": 1.9881451181641542e-05, "loss": 0.6816, "step": 522 }, { "epoch": 0.07800730852412559, "grad_norm": 1.564424753189087, "learning_rate": 1.9880708247469328e-05, "loss": 0.6041, "step": 523 }, { "epoch": 0.078156462077709, "grad_norm": 1.5228307247161865, "learning_rate": 1.9879963006582413e-05, "loss": 0.638, "step": 524 }, { "epoch": 0.07830561563129242, "grad_norm": 1.342201590538025, "learning_rate": 1.9879215459154787e-05, "loss": 0.6, "step": 525 }, { "epoch": 0.07845476918487583, "grad_norm": 1.4340630769729614, "learning_rate": 1.9878465605360963e-05, "loss": 0.6543, "step": 526 }, { "epoch": 0.07860392273845924, "grad_norm": 1.4723255634307861, "learning_rate": 1.9877713445376005e-05, "loss": 0.6697, "step": 527 }, { "epoch": 0.07875307629204266, "grad_norm": 2.275023937225342, "learning_rate": 1.9876958979375507e-05, "loss": 0.5788, "step": 528 }, { "epoch": 0.07890222984562607, "grad_norm": 1.6123937368392944, "learning_rate": 1.98762022075356e-05, "loss": 0.681, "step": 529 }, { "epoch": 0.07905138339920949, "grad_norm": 1.8401548862457275, "learning_rate": 1.9875443130032968e-05, "loss": 0.7319, "step": 530 }, { "epoch": 0.0792005369527929, "grad_norm": 1.4318791627883911, "learning_rate": 1.987468174704481e-05, "loss": 0.6509, "step": 531 }, { "epoch": 0.07934969050637632, "grad_norm": 1.668291449546814, "learning_rate": 1.9873918058748886e-05, "loss": 0.6098, "step": 532 }, { "epoch": 0.07949884405995973, "grad_norm": 1.7580995559692383, "learning_rate": 1.9873152065323476e-05, "loss": 0.6311, "step": 533 }, { "epoch": 0.07964799761354315, "grad_norm": 1.7447493076324463, "learning_rate": 1.987238376694741e-05, "loss": 0.7222, "step": 534 }, { "epoch": 0.07979715116712656, "grad_norm": 1.7200292348861694, "learning_rate": 1.987161316380005e-05, "loss": 0.5882, "step": 535 }, { "epoch": 0.07994630472070997, "grad_norm": 1.6165827512741089, "learning_rate": 1.98708402560613e-05, "loss": 0.7271, "step": 536 }, { "epoch": 0.08009545827429339, "grad_norm": 1.5224027633666992, "learning_rate": 1.9870065043911603e-05, "loss": 0.6391, "step": 537 }, { "epoch": 0.0802446118278768, "grad_norm": 1.527768611907959, "learning_rate": 1.986928752753193e-05, "loss": 0.6368, "step": 538 }, { "epoch": 0.08039376538146022, "grad_norm": 1.5529346466064453, "learning_rate": 1.9868507707103806e-05, "loss": 0.5707, "step": 539 }, { "epoch": 0.08054291893504363, "grad_norm": 1.703432559967041, "learning_rate": 1.9867725582809278e-05, "loss": 0.5549, "step": 540 }, { "epoch": 0.08069207248862705, "grad_norm": 1.4534913301467896, "learning_rate": 1.986694115483094e-05, "loss": 0.6353, "step": 541 }, { "epoch": 0.08084122604221046, "grad_norm": 1.5535482168197632, "learning_rate": 1.9866154423351923e-05, "loss": 0.6488, "step": 542 }, { "epoch": 0.08099037959579387, "grad_norm": 1.4707636833190918, "learning_rate": 1.9865365388555896e-05, "loss": 0.6849, "step": 543 }, { "epoch": 0.08113953314937729, "grad_norm": 1.097051978111267, "learning_rate": 1.986457405062706e-05, "loss": 0.6514, "step": 544 }, { "epoch": 0.0812886867029607, "grad_norm": 1.8710356950759888, "learning_rate": 1.986378040975016e-05, "loss": 0.7304, "step": 545 }, { "epoch": 0.08143784025654412, "grad_norm": 1.7394551038742065, "learning_rate": 1.9862984466110476e-05, "loss": 0.6502, "step": 546 }, { "epoch": 0.08158699381012753, "grad_norm": 1.9933334589004517, "learning_rate": 1.9862186219893825e-05, "loss": 0.6224, "step": 547 }, { "epoch": 0.08173614736371095, "grad_norm": 1.5962252616882324, "learning_rate": 1.9861385671286565e-05, "loss": 0.6699, "step": 548 }, { "epoch": 0.08188530091729436, "grad_norm": 1.6047147512435913, "learning_rate": 1.9860582820475593e-05, "loss": 0.7011, "step": 549 }, { "epoch": 0.08203445447087777, "grad_norm": 1.8674390316009521, "learning_rate": 1.9859777667648326e-05, "loss": 0.6979, "step": 550 }, { "epoch": 0.08218360802446119, "grad_norm": 1.5255403518676758, "learning_rate": 1.985897021299275e-05, "loss": 0.6117, "step": 551 }, { "epoch": 0.0823327615780446, "grad_norm": 1.7299890518188477, "learning_rate": 1.985816045669736e-05, "loss": 0.7918, "step": 552 }, { "epoch": 0.08248191513162802, "grad_norm": 0.9653944373130798, "learning_rate": 1.98573483989512e-05, "loss": 0.6487, "step": 553 }, { "epoch": 0.08263106868521143, "grad_norm": 1.661624550819397, "learning_rate": 1.985653403994385e-05, "loss": 0.7244, "step": 554 }, { "epoch": 0.08278022223879485, "grad_norm": 1.5255143642425537, "learning_rate": 1.9855717379865424e-05, "loss": 0.6564, "step": 555 }, { "epoch": 0.08292937579237826, "grad_norm": 0.8525955677032471, "learning_rate": 1.9854898418906585e-05, "loss": 0.5891, "step": 556 }, { "epoch": 0.08307852934596167, "grad_norm": 0.8858622908592224, "learning_rate": 1.985407715725852e-05, "loss": 0.6282, "step": 557 }, { "epoch": 0.08322768289954509, "grad_norm": 1.5129183530807495, "learning_rate": 1.9853253595112955e-05, "loss": 0.639, "step": 558 }, { "epoch": 0.08337683645312849, "grad_norm": 1.810118317604065, "learning_rate": 1.985242773266216e-05, "loss": 0.6308, "step": 559 }, { "epoch": 0.0835259900067119, "grad_norm": 1.6203738451004028, "learning_rate": 1.985159957009894e-05, "loss": 0.6727, "step": 560 }, { "epoch": 0.08367514356029532, "grad_norm": 1.8611032962799072, "learning_rate": 1.985076910761663e-05, "loss": 0.7429, "step": 561 }, { "epoch": 0.08382429711387873, "grad_norm": 1.7393643856048584, "learning_rate": 1.9849936345409105e-05, "loss": 0.6942, "step": 562 }, { "epoch": 0.08397345066746215, "grad_norm": 4.72689962387085, "learning_rate": 1.9849101283670787e-05, "loss": 0.655, "step": 563 }, { "epoch": 0.08412260422104556, "grad_norm": 1.7221858501434326, "learning_rate": 1.9848263922596617e-05, "loss": 0.6812, "step": 564 }, { "epoch": 0.08427175777462897, "grad_norm": 1.9679824113845825, "learning_rate": 1.9847424262382087e-05, "loss": 0.7605, "step": 565 }, { "epoch": 0.08442091132821239, "grad_norm": 1.523980975151062, "learning_rate": 1.9846582303223224e-05, "loss": 0.6956, "step": 566 }, { "epoch": 0.0845700648817958, "grad_norm": 1.7927173376083374, "learning_rate": 1.9845738045316584e-05, "loss": 0.6283, "step": 567 }, { "epoch": 0.08471921843537922, "grad_norm": 1.589636206626892, "learning_rate": 1.9844891488859267e-05, "loss": 0.6297, "step": 568 }, { "epoch": 0.08486837198896263, "grad_norm": 1.6904953718185425, "learning_rate": 1.9844042634048905e-05, "loss": 0.7178, "step": 569 }, { "epoch": 0.08501752554254605, "grad_norm": 1.7502079010009766, "learning_rate": 1.984319148108367e-05, "loss": 0.7124, "step": 570 }, { "epoch": 0.08516667909612946, "grad_norm": 1.708628535270691, "learning_rate": 1.9842338030162273e-05, "loss": 0.6609, "step": 571 }, { "epoch": 0.08531583264971287, "grad_norm": 1.5743129253387451, "learning_rate": 1.9841482281483946e-05, "loss": 0.544, "step": 572 }, { "epoch": 0.08546498620329629, "grad_norm": 1.8545080423355103, "learning_rate": 1.984062423524848e-05, "loss": 0.733, "step": 573 }, { "epoch": 0.0856141397568797, "grad_norm": 1.51149582862854, "learning_rate": 1.9839763891656186e-05, "loss": 0.6685, "step": 574 }, { "epoch": 0.08576329331046312, "grad_norm": 1.5100282430648804, "learning_rate": 1.9838901250907924e-05, "loss": 0.6859, "step": 575 }, { "epoch": 0.08591244686404653, "grad_norm": 1.3929991722106934, "learning_rate": 1.9838036313205073e-05, "loss": 0.6466, "step": 576 }, { "epoch": 0.08606160041762995, "grad_norm": 1.570647954940796, "learning_rate": 1.9837169078749567e-05, "loss": 0.6903, "step": 577 }, { "epoch": 0.08621075397121336, "grad_norm": 1.8447463512420654, "learning_rate": 1.9836299547743856e-05, "loss": 0.6547, "step": 578 }, { "epoch": 0.08635990752479678, "grad_norm": 1.7672159671783447, "learning_rate": 1.983542772039095e-05, "loss": 0.6682, "step": 579 }, { "epoch": 0.08650906107838019, "grad_norm": 1.5601093769073486, "learning_rate": 1.9834553596894377e-05, "loss": 0.6391, "step": 580 }, { "epoch": 0.0866582146319636, "grad_norm": 1.9348547458648682, "learning_rate": 1.9833677177458207e-05, "loss": 0.7544, "step": 581 }, { "epoch": 0.08680736818554702, "grad_norm": 1.6732280254364014, "learning_rate": 1.9832798462287047e-05, "loss": 0.5781, "step": 582 }, { "epoch": 0.08695652173913043, "grad_norm": 1.618187665939331, "learning_rate": 1.9831917451586036e-05, "loss": 0.6868, "step": 583 }, { "epoch": 0.08710567529271385, "grad_norm": 1.6078842878341675, "learning_rate": 1.9831034145560854e-05, "loss": 0.6666, "step": 584 }, { "epoch": 0.08725482884629726, "grad_norm": 1.743135690689087, "learning_rate": 1.983014854441771e-05, "loss": 0.542, "step": 585 }, { "epoch": 0.08740398239988068, "grad_norm": 1.9819692373275757, "learning_rate": 1.9829260648363366e-05, "loss": 0.6646, "step": 586 }, { "epoch": 0.08755313595346409, "grad_norm": 1.6588596105575562, "learning_rate": 1.982837045760509e-05, "loss": 0.6487, "step": 587 }, { "epoch": 0.0877022895070475, "grad_norm": 1.5473445653915405, "learning_rate": 1.9827477972350713e-05, "loss": 0.6213, "step": 588 }, { "epoch": 0.08785144306063092, "grad_norm": 1.744753360748291, "learning_rate": 1.982658319280859e-05, "loss": 0.6384, "step": 589 }, { "epoch": 0.08800059661421433, "grad_norm": 1.5509107112884521, "learning_rate": 1.9825686119187613e-05, "loss": 0.6429, "step": 590 }, { "epoch": 0.08814975016779775, "grad_norm": 1.8104336261749268, "learning_rate": 1.9824786751697206e-05, "loss": 0.6315, "step": 591 }, { "epoch": 0.08829890372138116, "grad_norm": 1.8194185495376587, "learning_rate": 1.9823885090547338e-05, "loss": 0.5669, "step": 592 }, { "epoch": 0.08844805727496458, "grad_norm": 1.5641429424285889, "learning_rate": 1.98229811359485e-05, "loss": 0.6609, "step": 593 }, { "epoch": 0.08859721082854799, "grad_norm": 3.177111864089966, "learning_rate": 1.9822074888111738e-05, "loss": 0.6831, "step": 594 }, { "epoch": 0.0887463643821314, "grad_norm": 1.812391757965088, "learning_rate": 1.9821166347248607e-05, "loss": 0.7603, "step": 595 }, { "epoch": 0.08889551793571482, "grad_norm": 1.7165594100952148, "learning_rate": 1.9820255513571222e-05, "loss": 0.5898, "step": 596 }, { "epoch": 0.08904467148929823, "grad_norm": 1.4585093259811401, "learning_rate": 1.981934238729222e-05, "loss": 0.5535, "step": 597 }, { "epoch": 0.08919382504288165, "grad_norm": 1.4909954071044922, "learning_rate": 1.9818426968624772e-05, "loss": 0.6456, "step": 598 }, { "epoch": 0.08934297859646506, "grad_norm": 1.7065155506134033, "learning_rate": 1.9817509257782595e-05, "loss": 0.7047, "step": 599 }, { "epoch": 0.08949213215004848, "grad_norm": 1.1883465051651, "learning_rate": 1.9816589254979932e-05, "loss": 0.6378, "step": 600 }, { "epoch": 0.08964128570363189, "grad_norm": 1.7346761226654053, "learning_rate": 1.981566696043156e-05, "loss": 0.7996, "step": 601 }, { "epoch": 0.0897904392572153, "grad_norm": 1.8153162002563477, "learning_rate": 1.98147423743528e-05, "loss": 0.7904, "step": 602 }, { "epoch": 0.08993959281079872, "grad_norm": 1.5983500480651855, "learning_rate": 1.98138154969595e-05, "loss": 0.6961, "step": 603 }, { "epoch": 0.09008874636438213, "grad_norm": 1.952443242073059, "learning_rate": 1.9812886328468047e-05, "loss": 0.6234, "step": 604 }, { "epoch": 0.09023789991796555, "grad_norm": 1.51041841506958, "learning_rate": 1.981195486909536e-05, "loss": 0.6412, "step": 605 }, { "epoch": 0.09038705347154896, "grad_norm": 1.929039716720581, "learning_rate": 1.981102111905889e-05, "loss": 0.6737, "step": 606 }, { "epoch": 0.09053620702513238, "grad_norm": 1.6209332942962646, "learning_rate": 1.981008507857664e-05, "loss": 0.7546, "step": 607 }, { "epoch": 0.09068536057871579, "grad_norm": 1.7273919582366943, "learning_rate": 1.9809146747867116e-05, "loss": 0.6159, "step": 608 }, { "epoch": 0.0908345141322992, "grad_norm": 1.5036380290985107, "learning_rate": 1.980820612714939e-05, "loss": 0.7039, "step": 609 }, { "epoch": 0.09098366768588262, "grad_norm": 1.6828439235687256, "learning_rate": 1.9807263216643053e-05, "loss": 0.705, "step": 610 }, { "epoch": 0.09113282123946603, "grad_norm": 1.6459155082702637, "learning_rate": 1.9806318016568235e-05, "loss": 0.7257, "step": 611 }, { "epoch": 0.09128197479304945, "grad_norm": 1.5694105625152588, "learning_rate": 1.98053705271456e-05, "loss": 0.6283, "step": 612 }, { "epoch": 0.09143112834663286, "grad_norm": 1.7626816034317017, "learning_rate": 1.980442074859634e-05, "loss": 0.6463, "step": 613 }, { "epoch": 0.09158028190021628, "grad_norm": 1.5983401536941528, "learning_rate": 1.980346868114219e-05, "loss": 0.6051, "step": 614 }, { "epoch": 0.09172943545379969, "grad_norm": 1.702836513519287, "learning_rate": 1.9802514325005414e-05, "loss": 0.7102, "step": 615 }, { "epoch": 0.0918785890073831, "grad_norm": 1.7529423236846924, "learning_rate": 1.980155768040882e-05, "loss": 0.5616, "step": 616 }, { "epoch": 0.09202774256096652, "grad_norm": 1.4259239435195923, "learning_rate": 1.9800598747575734e-05, "loss": 0.6262, "step": 617 }, { "epoch": 0.09217689611454993, "grad_norm": 1.509128212928772, "learning_rate": 1.9799637526730027e-05, "loss": 0.6435, "step": 618 }, { "epoch": 0.09232604966813335, "grad_norm": 1.6280028820037842, "learning_rate": 1.9798674018096106e-05, "loss": 0.5761, "step": 619 }, { "epoch": 0.09247520322171676, "grad_norm": 1.7150768041610718, "learning_rate": 1.9797708221898906e-05, "loss": 0.6041, "step": 620 }, { "epoch": 0.09262435677530018, "grad_norm": 1.4660648107528687, "learning_rate": 1.97967401383639e-05, "loss": 0.7061, "step": 621 }, { "epoch": 0.09277351032888359, "grad_norm": 1.6705474853515625, "learning_rate": 1.9795769767717087e-05, "loss": 0.5899, "step": 622 }, { "epoch": 0.092922663882467, "grad_norm": 1.7004029750823975, "learning_rate": 1.9794797110185015e-05, "loss": 0.6726, "step": 623 }, { "epoch": 0.09307181743605042, "grad_norm": 1.6491236686706543, "learning_rate": 1.979382216599475e-05, "loss": 0.6347, "step": 624 }, { "epoch": 0.09322097098963383, "grad_norm": 1.6576178073883057, "learning_rate": 1.9792844935373905e-05, "loss": 0.5969, "step": 625 }, { "epoch": 0.09337012454321725, "grad_norm": 2.6877191066741943, "learning_rate": 1.9791865418550618e-05, "loss": 0.6511, "step": 626 }, { "epoch": 0.09351927809680066, "grad_norm": 1.5791690349578857, "learning_rate": 1.979088361575356e-05, "loss": 0.7142, "step": 627 }, { "epoch": 0.09366843165038408, "grad_norm": 1.1901443004608154, "learning_rate": 1.9789899527211943e-05, "loss": 0.6628, "step": 628 }, { "epoch": 0.09381758520396749, "grad_norm": 1.60323166847229, "learning_rate": 1.9788913153155512e-05, "loss": 0.6385, "step": 629 }, { "epoch": 0.0939667387575509, "grad_norm": 1.645762324333191, "learning_rate": 1.9787924493814533e-05, "loss": 0.6696, "step": 630 }, { "epoch": 0.09411589231113432, "grad_norm": 1.6455073356628418, "learning_rate": 1.9786933549419826e-05, "loss": 0.7324, "step": 631 }, { "epoch": 0.09426504586471773, "grad_norm": 0.9274824857711792, "learning_rate": 1.9785940320202726e-05, "loss": 0.6459, "step": 632 }, { "epoch": 0.09441419941830115, "grad_norm": 1.4830785989761353, "learning_rate": 1.978494480639511e-05, "loss": 0.5665, "step": 633 }, { "epoch": 0.09456335297188455, "grad_norm": 1.6041507720947266, "learning_rate": 1.9783947008229387e-05, "loss": 0.7368, "step": 634 }, { "epoch": 0.09471250652546796, "grad_norm": 1.750638723373413, "learning_rate": 1.97829469259385e-05, "loss": 0.6584, "step": 635 }, { "epoch": 0.09486166007905138, "grad_norm": 1.707998514175415, "learning_rate": 1.9781944559755924e-05, "loss": 0.7292, "step": 636 }, { "epoch": 0.09501081363263479, "grad_norm": 1.3763935565948486, "learning_rate": 1.9780939909915666e-05, "loss": 0.6145, "step": 637 }, { "epoch": 0.0951599671862182, "grad_norm": 1.6598960161209106, "learning_rate": 1.9779932976652272e-05, "loss": 0.749, "step": 638 }, { "epoch": 0.09530912073980162, "grad_norm": 1.4198235273361206, "learning_rate": 1.977892376020082e-05, "loss": 0.5958, "step": 639 }, { "epoch": 0.09545827429338503, "grad_norm": 1.5932530164718628, "learning_rate": 1.977791226079691e-05, "loss": 0.6449, "step": 640 }, { "epoch": 0.09560742784696845, "grad_norm": 1.5460433959960938, "learning_rate": 1.9776898478676684e-05, "loss": 0.601, "step": 641 }, { "epoch": 0.09575658140055186, "grad_norm": 1.5033069849014282, "learning_rate": 1.9775882414076822e-05, "loss": 0.636, "step": 642 }, { "epoch": 0.09590573495413528, "grad_norm": 1.4372020959854126, "learning_rate": 1.9774864067234525e-05, "loss": 0.6619, "step": 643 }, { "epoch": 0.09605488850771869, "grad_norm": 1.6321178674697876, "learning_rate": 1.9773843438387534e-05, "loss": 0.6064, "step": 644 }, { "epoch": 0.0962040420613021, "grad_norm": 1.3786038160324097, "learning_rate": 1.9772820527774127e-05, "loss": 0.5586, "step": 645 }, { "epoch": 0.09635319561488552, "grad_norm": 1.419584035873413, "learning_rate": 1.9771795335633098e-05, "loss": 0.6762, "step": 646 }, { "epoch": 0.09650234916846893, "grad_norm": 1.6361159086227417, "learning_rate": 1.9770767862203795e-05, "loss": 0.6312, "step": 647 }, { "epoch": 0.09665150272205235, "grad_norm": 1.5973362922668457, "learning_rate": 1.976973810772608e-05, "loss": 0.6578, "step": 648 }, { "epoch": 0.09680065627563576, "grad_norm": 1.6495882272720337, "learning_rate": 1.976870607244036e-05, "loss": 0.7275, "step": 649 }, { "epoch": 0.09694980982921918, "grad_norm": 1.5989162921905518, "learning_rate": 1.9767671756587577e-05, "loss": 0.6404, "step": 650 }, { "epoch": 0.09709896338280259, "grad_norm": 1.630240797996521, "learning_rate": 1.9766635160409186e-05, "loss": 0.6415, "step": 651 }, { "epoch": 0.097248116936386, "grad_norm": 1.9630850553512573, "learning_rate": 1.9765596284147192e-05, "loss": 0.6911, "step": 652 }, { "epoch": 0.09739727048996942, "grad_norm": 1.8684608936309814, "learning_rate": 1.9764555128044128e-05, "loss": 0.7054, "step": 653 }, { "epoch": 0.09754642404355283, "grad_norm": 1.814929485321045, "learning_rate": 1.9763511692343062e-05, "loss": 0.6985, "step": 654 }, { "epoch": 0.09769557759713625, "grad_norm": 1.6655855178833008, "learning_rate": 1.9762465977287587e-05, "loss": 0.6597, "step": 655 }, { "epoch": 0.09784473115071966, "grad_norm": 1.42969810962677, "learning_rate": 1.976141798312183e-05, "loss": 0.6015, "step": 656 }, { "epoch": 0.09799388470430308, "grad_norm": 1.5775527954101562, "learning_rate": 1.976036771009046e-05, "loss": 0.7099, "step": 657 }, { "epoch": 0.09814303825788649, "grad_norm": 1.567527413368225, "learning_rate": 1.9759315158438658e-05, "loss": 0.692, "step": 658 }, { "epoch": 0.0982921918114699, "grad_norm": 1.6750284433364868, "learning_rate": 1.9758260328412154e-05, "loss": 0.6952, "step": 659 }, { "epoch": 0.09844134536505332, "grad_norm": 1.6112927198410034, "learning_rate": 1.975720322025721e-05, "loss": 0.7344, "step": 660 }, { "epoch": 0.09859049891863673, "grad_norm": 1.305984377861023, "learning_rate": 1.975614383422061e-05, "loss": 0.6428, "step": 661 }, { "epoch": 0.09873965247222015, "grad_norm": 1.4494379758834839, "learning_rate": 1.9755082170549675e-05, "loss": 0.6648, "step": 662 }, { "epoch": 0.09888880602580356, "grad_norm": 1.6487088203430176, "learning_rate": 1.9754018229492254e-05, "loss": 0.7613, "step": 663 }, { "epoch": 0.09903795957938698, "grad_norm": 1.354744553565979, "learning_rate": 1.975295201129674e-05, "loss": 0.5979, "step": 664 }, { "epoch": 0.09918711313297039, "grad_norm": 1.718622088432312, "learning_rate": 1.975188351621204e-05, "loss": 0.6849, "step": 665 }, { "epoch": 0.0993362666865538, "grad_norm": 1.743534803390503, "learning_rate": 1.9750812744487605e-05, "loss": 0.6615, "step": 666 }, { "epoch": 0.09948542024013722, "grad_norm": 1.44237220287323, "learning_rate": 1.974973969637341e-05, "loss": 0.6524, "step": 667 }, { "epoch": 0.09963457379372063, "grad_norm": 1.4789397716522217, "learning_rate": 1.974866437211997e-05, "loss": 0.6996, "step": 668 }, { "epoch": 0.09978372734730405, "grad_norm": 1.7508409023284912, "learning_rate": 1.974758677197832e-05, "loss": 0.7054, "step": 669 }, { "epoch": 0.09993288090088746, "grad_norm": 1.676405668258667, "learning_rate": 1.974650689620004e-05, "loss": 0.6407, "step": 670 }, { "epoch": 0.10008203445447088, "grad_norm": 1.4089930057525635, "learning_rate": 1.9745424745037226e-05, "loss": 0.5884, "step": 671 }, { "epoch": 0.10023118800805429, "grad_norm": 1.6362996101379395, "learning_rate": 1.974434031874252e-05, "loss": 0.6606, "step": 672 }, { "epoch": 0.1003803415616377, "grad_norm": 1.5411051511764526, "learning_rate": 1.9743253617569085e-05, "loss": 0.6068, "step": 673 }, { "epoch": 0.10052949511522112, "grad_norm": 1.4013516902923584, "learning_rate": 1.9742164641770617e-05, "loss": 0.6014, "step": 674 }, { "epoch": 0.10067864866880454, "grad_norm": 1.661703109741211, "learning_rate": 1.974107339160135e-05, "loss": 0.6741, "step": 675 }, { "epoch": 0.10082780222238795, "grad_norm": 1.6580383777618408, "learning_rate": 1.9739979867316035e-05, "loss": 0.7371, "step": 676 }, { "epoch": 0.10097695577597136, "grad_norm": 1.761466145515442, "learning_rate": 1.9738884069169972e-05, "loss": 0.6889, "step": 677 }, { "epoch": 0.10112610932955478, "grad_norm": 1.5424734354019165, "learning_rate": 1.9737785997418973e-05, "loss": 0.6739, "step": 678 }, { "epoch": 0.10127526288313819, "grad_norm": 1.337469220161438, "learning_rate": 1.9736685652319398e-05, "loss": 0.6493, "step": 679 }, { "epoch": 0.1014244164367216, "grad_norm": 1.9204374551773071, "learning_rate": 1.973558303412812e-05, "loss": 0.6531, "step": 680 }, { "epoch": 0.10157356999030502, "grad_norm": 1.4878952503204346, "learning_rate": 1.973447814310256e-05, "loss": 0.6467, "step": 681 }, { "epoch": 0.10172272354388844, "grad_norm": 1.430120825767517, "learning_rate": 1.973337097950066e-05, "loss": 0.6826, "step": 682 }, { "epoch": 0.10187187709747185, "grad_norm": 1.8645867109298706, "learning_rate": 1.9732261543580894e-05, "loss": 0.6681, "step": 683 }, { "epoch": 0.10202103065105526, "grad_norm": 1.425575852394104, "learning_rate": 1.973114983560227e-05, "loss": 0.6631, "step": 684 }, { "epoch": 0.10217018420463868, "grad_norm": 1.4835702180862427, "learning_rate": 1.9730035855824317e-05, "loss": 0.6519, "step": 685 }, { "epoch": 0.10231933775822209, "grad_norm": 1.5084668397903442, "learning_rate": 1.9728919604507105e-05, "loss": 0.6935, "step": 686 }, { "epoch": 0.1024684913118055, "grad_norm": 1.4400990009307861, "learning_rate": 1.9727801081911227e-05, "loss": 0.7308, "step": 687 }, { "epoch": 0.10261764486538892, "grad_norm": 1.6669526100158691, "learning_rate": 1.9726680288297815e-05, "loss": 0.7041, "step": 688 }, { "epoch": 0.10276679841897234, "grad_norm": 1.6296944618225098, "learning_rate": 1.972555722392852e-05, "loss": 0.6502, "step": 689 }, { "epoch": 0.10291595197255575, "grad_norm": 1.4060951471328735, "learning_rate": 1.972443188906553e-05, "loss": 0.6117, "step": 690 }, { "epoch": 0.10306510552613916, "grad_norm": 1.3975706100463867, "learning_rate": 1.9723304283971566e-05, "loss": 0.6245, "step": 691 }, { "epoch": 0.10321425907972258, "grad_norm": 1.6328262090682983, "learning_rate": 1.9722174408909866e-05, "loss": 0.6639, "step": 692 }, { "epoch": 0.10336341263330599, "grad_norm": 1.471756100654602, "learning_rate": 1.9721042264144214e-05, "loss": 0.5751, "step": 693 }, { "epoch": 0.10351256618688941, "grad_norm": 1.6198612451553345, "learning_rate": 1.971990784993891e-05, "loss": 0.6388, "step": 694 }, { "epoch": 0.10366171974047282, "grad_norm": 1.6549899578094482, "learning_rate": 1.9718771166558796e-05, "loss": 0.6221, "step": 695 }, { "epoch": 0.10381087329405624, "grad_norm": 1.6740245819091797, "learning_rate": 1.971763221426924e-05, "loss": 0.753, "step": 696 }, { "epoch": 0.10396002684763965, "grad_norm": 1.5455256700515747, "learning_rate": 1.971649099333613e-05, "loss": 0.7152, "step": 697 }, { "epoch": 0.10410918040122306, "grad_norm": 1.4143493175506592, "learning_rate": 1.971534750402589e-05, "loss": 0.662, "step": 698 }, { "epoch": 0.10425833395480648, "grad_norm": 1.2760438919067383, "learning_rate": 1.971420174660549e-05, "loss": 0.6686, "step": 699 }, { "epoch": 0.10440748750838989, "grad_norm": 1.6183202266693115, "learning_rate": 1.97130537213424e-05, "loss": 0.6603, "step": 700 }, { "epoch": 0.10455664106197331, "grad_norm": 1.9950424432754517, "learning_rate": 1.971190342850464e-05, "loss": 0.678, "step": 701 }, { "epoch": 0.10470579461555672, "grad_norm": 1.9291073083877563, "learning_rate": 1.971075086836075e-05, "loss": 0.6952, "step": 702 }, { "epoch": 0.10485494816914014, "grad_norm": 1.4365795850753784, "learning_rate": 1.9709596041179802e-05, "loss": 0.6259, "step": 703 }, { "epoch": 0.10500410172272355, "grad_norm": 1.574944019317627, "learning_rate": 1.9708438947231402e-05, "loss": 0.6969, "step": 704 }, { "epoch": 0.10515325527630696, "grad_norm": 1.4736754894256592, "learning_rate": 1.970727958678568e-05, "loss": 0.6793, "step": 705 }, { "epoch": 0.10530240882989038, "grad_norm": 1.7148264646530151, "learning_rate": 1.970611796011329e-05, "loss": 0.673, "step": 706 }, { "epoch": 0.1054515623834738, "grad_norm": 1.5313961505889893, "learning_rate": 1.9704954067485432e-05, "loss": 0.6867, "step": 707 }, { "epoch": 0.1056007159370572, "grad_norm": 1.4607203006744385, "learning_rate": 1.9703787909173816e-05, "loss": 0.6475, "step": 708 }, { "epoch": 0.10574986949064061, "grad_norm": 1.445754051208496, "learning_rate": 1.970261948545069e-05, "loss": 0.6689, "step": 709 }, { "epoch": 0.10589902304422402, "grad_norm": 1.5795613527297974, "learning_rate": 1.9701448796588837e-05, "loss": 0.7951, "step": 710 }, { "epoch": 0.10604817659780744, "grad_norm": 1.41666841506958, "learning_rate": 1.970027584286155e-05, "loss": 0.6101, "step": 711 }, { "epoch": 0.10619733015139085, "grad_norm": 1.5283411741256714, "learning_rate": 1.9699100624542673e-05, "loss": 0.6122, "step": 712 }, { "epoch": 0.10634648370497427, "grad_norm": 1.5275121927261353, "learning_rate": 1.9697923141906563e-05, "loss": 0.6337, "step": 713 }, { "epoch": 0.10649563725855768, "grad_norm": 1.4900450706481934, "learning_rate": 1.9696743395228113e-05, "loss": 0.5792, "step": 714 }, { "epoch": 0.1066447908121411, "grad_norm": 1.6324076652526855, "learning_rate": 1.9695561384782743e-05, "loss": 0.6369, "step": 715 }, { "epoch": 0.10679394436572451, "grad_norm": 1.3274974822998047, "learning_rate": 1.9694377110846393e-05, "loss": 0.6181, "step": 716 }, { "epoch": 0.10694309791930792, "grad_norm": 1.3294596672058105, "learning_rate": 1.969319057369555e-05, "loss": 0.5524, "step": 717 }, { "epoch": 0.10709225147289134, "grad_norm": 1.446699857711792, "learning_rate": 1.9692001773607215e-05, "loss": 0.6086, "step": 718 }, { "epoch": 0.10724140502647475, "grad_norm": 1.7938623428344727, "learning_rate": 1.969081071085892e-05, "loss": 0.6821, "step": 719 }, { "epoch": 0.10739055858005817, "grad_norm": 1.5191060304641724, "learning_rate": 1.9689617385728726e-05, "loss": 0.6592, "step": 720 }, { "epoch": 0.10753971213364158, "grad_norm": 1.3964184522628784, "learning_rate": 1.9688421798495225e-05, "loss": 0.6035, "step": 721 }, { "epoch": 0.107688865687225, "grad_norm": 1.8221533298492432, "learning_rate": 1.968722394943753e-05, "loss": 0.7072, "step": 722 }, { "epoch": 0.10783801924080841, "grad_norm": 1.6124447584152222, "learning_rate": 1.9686023838835292e-05, "loss": 0.6386, "step": 723 }, { "epoch": 0.10798717279439182, "grad_norm": 1.6130274534225464, "learning_rate": 1.968482146696868e-05, "loss": 0.6519, "step": 724 }, { "epoch": 0.10813632634797524, "grad_norm": 1.5068434476852417, "learning_rate": 1.9683616834118398e-05, "loss": 0.5952, "step": 725 }, { "epoch": 0.10828547990155865, "grad_norm": 1.4619890451431274, "learning_rate": 1.968240994056567e-05, "loss": 0.6199, "step": 726 }, { "epoch": 0.10843463345514207, "grad_norm": 1.8968325853347778, "learning_rate": 1.9681200786592265e-05, "loss": 0.5373, "step": 727 }, { "epoch": 0.10858378700872548, "grad_norm": 1.9057029485702515, "learning_rate": 1.9679989372480456e-05, "loss": 0.7027, "step": 728 }, { "epoch": 0.1087329405623089, "grad_norm": 1.5562832355499268, "learning_rate": 1.967877569851306e-05, "loss": 0.6509, "step": 729 }, { "epoch": 0.10888209411589231, "grad_norm": 1.530210018157959, "learning_rate": 1.9677559764973416e-05, "loss": 0.658, "step": 730 }, { "epoch": 0.10903124766947572, "grad_norm": 1.441637635231018, "learning_rate": 1.967634157214539e-05, "loss": 0.5928, "step": 731 }, { "epoch": 0.10918040122305914, "grad_norm": 1.5987063646316528, "learning_rate": 1.967512112031338e-05, "loss": 0.7228, "step": 732 }, { "epoch": 0.10932955477664255, "grad_norm": 1.6674848794937134, "learning_rate": 1.9673898409762315e-05, "loss": 0.7061, "step": 733 }, { "epoch": 0.10947870833022597, "grad_norm": 1.8856816291809082, "learning_rate": 1.9672673440777628e-05, "loss": 0.5697, "step": 734 }, { "epoch": 0.10962786188380938, "grad_norm": 1.3696047067642212, "learning_rate": 1.9671446213645306e-05, "loss": 0.616, "step": 735 }, { "epoch": 0.1097770154373928, "grad_norm": 1.4925870895385742, "learning_rate": 1.9670216728651854e-05, "loss": 0.5685, "step": 736 }, { "epoch": 0.10992616899097621, "grad_norm": 1.7102123498916626, "learning_rate": 1.96689849860843e-05, "loss": 0.643, "step": 737 }, { "epoch": 0.11007532254455962, "grad_norm": 1.5816441774368286, "learning_rate": 1.9667750986230203e-05, "loss": 0.6051, "step": 738 }, { "epoch": 0.11022447609814304, "grad_norm": 1.4925551414489746, "learning_rate": 1.966651472937765e-05, "loss": 0.6151, "step": 739 }, { "epoch": 0.11037362965172645, "grad_norm": 1.4708541631698608, "learning_rate": 1.9665276215815247e-05, "loss": 0.6744, "step": 740 }, { "epoch": 0.11052278320530987, "grad_norm": 1.6490968465805054, "learning_rate": 1.966403544583214e-05, "loss": 0.6806, "step": 741 }, { "epoch": 0.11067193675889328, "grad_norm": 1.553739309310913, "learning_rate": 1.966279241971799e-05, "loss": 0.7313, "step": 742 }, { "epoch": 0.1108210903124767, "grad_norm": 1.4706971645355225, "learning_rate": 1.9661547137762994e-05, "loss": 0.6757, "step": 743 }, { "epoch": 0.11097024386606011, "grad_norm": 0.9246963262557983, "learning_rate": 1.966029960025787e-05, "loss": 0.6298, "step": 744 }, { "epoch": 0.11111939741964352, "grad_norm": 1.6134155988693237, "learning_rate": 1.965904980749386e-05, "loss": 0.6665, "step": 745 }, { "epoch": 0.11126855097322694, "grad_norm": 1.981446385383606, "learning_rate": 1.9657797759762735e-05, "loss": 0.6729, "step": 746 }, { "epoch": 0.11141770452681035, "grad_norm": 1.487993836402893, "learning_rate": 1.96565434573568e-05, "loss": 0.6428, "step": 747 }, { "epoch": 0.11156685808039377, "grad_norm": 1.7229114770889282, "learning_rate": 1.965528690056888e-05, "loss": 0.6745, "step": 748 }, { "epoch": 0.11171601163397718, "grad_norm": 2.2232961654663086, "learning_rate": 1.9654028089692317e-05, "loss": 0.5807, "step": 749 }, { "epoch": 0.1118651651875606, "grad_norm": 1.5640078783035278, "learning_rate": 1.9652767025020997e-05, "loss": 0.6646, "step": 750 }, { "epoch": 0.11201431874114401, "grad_norm": 1.601431965827942, "learning_rate": 1.965150370684932e-05, "loss": 0.6624, "step": 751 }, { "epoch": 0.11216347229472742, "grad_norm": 1.4411399364471436, "learning_rate": 1.965023813547222e-05, "loss": 0.6084, "step": 752 }, { "epoch": 0.11231262584831084, "grad_norm": 1.619117259979248, "learning_rate": 1.964897031118515e-05, "loss": 0.6407, "step": 753 }, { "epoch": 0.11246177940189425, "grad_norm": 1.5343226194381714, "learning_rate": 1.9647700234284087e-05, "loss": 0.5914, "step": 754 }, { "epoch": 0.11261093295547767, "grad_norm": 1.7336783409118652, "learning_rate": 1.9646427905065545e-05, "loss": 0.5886, "step": 755 }, { "epoch": 0.11276008650906108, "grad_norm": 1.5465288162231445, "learning_rate": 1.9645153323826558e-05, "loss": 0.6884, "step": 756 }, { "epoch": 0.1129092400626445, "grad_norm": 1.4849708080291748, "learning_rate": 1.9643876490864678e-05, "loss": 0.7081, "step": 757 }, { "epoch": 0.11305839361622791, "grad_norm": 1.543946385383606, "learning_rate": 1.9642597406478e-05, "loss": 0.6871, "step": 758 }, { "epoch": 0.11320754716981132, "grad_norm": 0.9407850503921509, "learning_rate": 1.9641316070965123e-05, "loss": 0.6788, "step": 759 }, { "epoch": 0.11335670072339474, "grad_norm": 1.662463665008545, "learning_rate": 1.964003248462519e-05, "loss": 0.6511, "step": 760 }, { "epoch": 0.11350585427697815, "grad_norm": 1.81804621219635, "learning_rate": 1.963874664775786e-05, "loss": 0.6471, "step": 761 }, { "epoch": 0.11365500783056157, "grad_norm": 1.7145870923995972, "learning_rate": 1.9637458560663323e-05, "loss": 0.7346, "step": 762 }, { "epoch": 0.11380416138414498, "grad_norm": 1.6137089729309082, "learning_rate": 1.9636168223642288e-05, "loss": 0.6782, "step": 763 }, { "epoch": 0.1139533149377284, "grad_norm": 0.8750501871109009, "learning_rate": 1.9634875636996e-05, "loss": 0.6565, "step": 764 }, { "epoch": 0.11410246849131181, "grad_norm": 1.8352198600769043, "learning_rate": 1.9633580801026207e-05, "loss": 0.6684, "step": 765 }, { "epoch": 0.11425162204489522, "grad_norm": 1.5297694206237793, "learning_rate": 1.963228371603521e-05, "loss": 0.6086, "step": 766 }, { "epoch": 0.11440077559847864, "grad_norm": 1.5526490211486816, "learning_rate": 1.9630984382325816e-05, "loss": 0.5903, "step": 767 }, { "epoch": 0.11454992915206205, "grad_norm": 1.817728877067566, "learning_rate": 1.9629682800201363e-05, "loss": 0.6742, "step": 768 }, { "epoch": 0.11469908270564547, "grad_norm": 1.8117587566375732, "learning_rate": 1.9628378969965712e-05, "loss": 0.6357, "step": 769 }, { "epoch": 0.11484823625922888, "grad_norm": 1.662339210510254, "learning_rate": 1.9627072891923258e-05, "loss": 0.6014, "step": 770 }, { "epoch": 0.1149973898128123, "grad_norm": 1.4866374731063843, "learning_rate": 1.9625764566378903e-05, "loss": 0.492, "step": 771 }, { "epoch": 0.11514654336639571, "grad_norm": 1.3596997261047363, "learning_rate": 1.9624453993638094e-05, "loss": 0.6262, "step": 772 }, { "epoch": 0.11529569691997912, "grad_norm": 1.6935088634490967, "learning_rate": 1.9623141174006785e-05, "loss": 0.6678, "step": 773 }, { "epoch": 0.11544485047356254, "grad_norm": 1.3606353998184204, "learning_rate": 1.9621826107791465e-05, "loss": 0.6707, "step": 774 }, { "epoch": 0.11559400402714595, "grad_norm": 1.6679459810256958, "learning_rate": 1.9620508795299148e-05, "loss": 0.7196, "step": 775 }, { "epoch": 0.11574315758072937, "grad_norm": 1.6773704290390015, "learning_rate": 1.961918923683736e-05, "loss": 0.6087, "step": 776 }, { "epoch": 0.11589231113431278, "grad_norm": 1.4964251518249512, "learning_rate": 1.961786743271417e-05, "loss": 0.633, "step": 777 }, { "epoch": 0.1160414646878962, "grad_norm": 0.851463794708252, "learning_rate": 1.9616543383238158e-05, "loss": 0.5866, "step": 778 }, { "epoch": 0.11619061824147961, "grad_norm": 1.5142499208450317, "learning_rate": 1.961521708871843e-05, "loss": 0.5595, "step": 779 }, { "epoch": 0.11633977179506302, "grad_norm": 1.6164716482162476, "learning_rate": 1.961388854946462e-05, "loss": 0.7111, "step": 780 }, { "epoch": 0.11648892534864644, "grad_norm": 1.4496195316314697, "learning_rate": 1.9612557765786884e-05, "loss": 0.6725, "step": 781 }, { "epoch": 0.11663807890222985, "grad_norm": 1.2521450519561768, "learning_rate": 1.96112247379959e-05, "loss": 0.5825, "step": 782 }, { "epoch": 0.11678723245581325, "grad_norm": 1.5116175413131714, "learning_rate": 1.9609889466402877e-05, "loss": 0.6324, "step": 783 }, { "epoch": 0.11693638600939667, "grad_norm": 1.5167490243911743, "learning_rate": 1.9608551951319535e-05, "loss": 0.6268, "step": 784 }, { "epoch": 0.11708553956298008, "grad_norm": 1.5131114721298218, "learning_rate": 1.960721219305813e-05, "loss": 0.6704, "step": 785 }, { "epoch": 0.1172346931165635, "grad_norm": 1.677506446838379, "learning_rate": 1.960587019193144e-05, "loss": 0.6383, "step": 786 }, { "epoch": 0.11738384667014691, "grad_norm": 1.4031912088394165, "learning_rate": 1.9604525948252758e-05, "loss": 0.6542, "step": 787 }, { "epoch": 0.11753300022373032, "grad_norm": 1.657568335533142, "learning_rate": 1.9603179462335907e-05, "loss": 0.7375, "step": 788 }, { "epoch": 0.11768215377731374, "grad_norm": 1.594401478767395, "learning_rate": 1.9601830734495236e-05, "loss": 0.7024, "step": 789 }, { "epoch": 0.11783130733089715, "grad_norm": 1.4124023914337158, "learning_rate": 1.9600479765045615e-05, "loss": 0.5891, "step": 790 }, { "epoch": 0.11798046088448057, "grad_norm": 1.6087177991867065, "learning_rate": 1.959912655430243e-05, "loss": 0.6559, "step": 791 }, { "epoch": 0.11812961443806398, "grad_norm": 0.9645440578460693, "learning_rate": 1.9597771102581607e-05, "loss": 0.6595, "step": 792 }, { "epoch": 0.1182787679916474, "grad_norm": 1.820672869682312, "learning_rate": 1.9596413410199574e-05, "loss": 0.6921, "step": 793 }, { "epoch": 0.11842792154523081, "grad_norm": 0.8966417908668518, "learning_rate": 1.9595053477473302e-05, "loss": 0.6651, "step": 794 }, { "epoch": 0.11857707509881422, "grad_norm": 1.3886945247650146, "learning_rate": 1.959369130472027e-05, "loss": 0.6065, "step": 795 }, { "epoch": 0.11872622865239764, "grad_norm": 1.6320503950119019, "learning_rate": 1.9592326892258486e-05, "loss": 0.6423, "step": 796 }, { "epoch": 0.11887538220598105, "grad_norm": 1.681839942932129, "learning_rate": 1.9590960240406483e-05, "loss": 0.6494, "step": 797 }, { "epoch": 0.11902453575956447, "grad_norm": 1.579268217086792, "learning_rate": 1.9589591349483316e-05, "loss": 0.6275, "step": 798 }, { "epoch": 0.11917368931314788, "grad_norm": 1.7285776138305664, "learning_rate": 1.9588220219808554e-05, "loss": 0.6832, "step": 799 }, { "epoch": 0.1193228428667313, "grad_norm": 1.6061307191848755, "learning_rate": 1.9586846851702307e-05, "loss": 0.6532, "step": 800 }, { "epoch": 0.11947199642031471, "grad_norm": 1.6383086442947388, "learning_rate": 1.9585471245485193e-05, "loss": 0.5704, "step": 801 }, { "epoch": 0.11962114997389812, "grad_norm": 1.564918041229248, "learning_rate": 1.958409340147835e-05, "loss": 0.6428, "step": 802 }, { "epoch": 0.11977030352748154, "grad_norm": 1.5666872262954712, "learning_rate": 1.9582713320003454e-05, "loss": 0.7696, "step": 803 }, { "epoch": 0.11991945708106495, "grad_norm": 1.3370929956436157, "learning_rate": 1.9581331001382683e-05, "loss": 0.6362, "step": 804 }, { "epoch": 0.12006861063464837, "grad_norm": 1.7126227617263794, "learning_rate": 1.9579946445938755e-05, "loss": 0.6741, "step": 805 }, { "epoch": 0.12021776418823178, "grad_norm": 1.483420491218567, "learning_rate": 1.9578559653994905e-05, "loss": 0.6817, "step": 806 }, { "epoch": 0.1203669177418152, "grad_norm": 1.4730356931686401, "learning_rate": 1.9577170625874885e-05, "loss": 0.6301, "step": 807 }, { "epoch": 0.12051607129539861, "grad_norm": 1.6696966886520386, "learning_rate": 1.957577936190297e-05, "loss": 0.6898, "step": 808 }, { "epoch": 0.12066522484898203, "grad_norm": 1.5378910303115845, "learning_rate": 1.9574385862403965e-05, "loss": 0.6539, "step": 809 }, { "epoch": 0.12081437840256544, "grad_norm": 2.118105888366699, "learning_rate": 1.957299012770319e-05, "loss": 0.6633, "step": 810 }, { "epoch": 0.12096353195614885, "grad_norm": 1.5070902109146118, "learning_rate": 1.9571592158126488e-05, "loss": 0.6649, "step": 811 }, { "epoch": 0.12111268550973227, "grad_norm": 1.552781105041504, "learning_rate": 1.9570191954000225e-05, "loss": 0.6155, "step": 812 }, { "epoch": 0.12126183906331568, "grad_norm": 1.5785325765609741, "learning_rate": 1.956878951565128e-05, "loss": 0.7281, "step": 813 }, { "epoch": 0.1214109926168991, "grad_norm": 1.492634892463684, "learning_rate": 1.9567384843407068e-05, "loss": 0.6798, "step": 814 }, { "epoch": 0.12156014617048251, "grad_norm": 1.8530266284942627, "learning_rate": 1.9565977937595524e-05, "loss": 0.6683, "step": 815 }, { "epoch": 0.12170929972406593, "grad_norm": 2.7555248737335205, "learning_rate": 1.9564568798545086e-05, "loss": 0.6464, "step": 816 }, { "epoch": 0.12185845327764934, "grad_norm": 1.4758299589157104, "learning_rate": 1.9563157426584737e-05, "loss": 0.6571, "step": 817 }, { "epoch": 0.12200760683123275, "grad_norm": 1.5490100383758545, "learning_rate": 1.9561743822043968e-05, "loss": 0.6578, "step": 818 }, { "epoch": 0.12215676038481617, "grad_norm": 0.9287109971046448, "learning_rate": 1.9560327985252794e-05, "loss": 0.6749, "step": 819 }, { "epoch": 0.12230591393839958, "grad_norm": 1.6672943830490112, "learning_rate": 1.9558909916541746e-05, "loss": 0.7421, "step": 820 }, { "epoch": 0.122455067491983, "grad_norm": 1.4506025314331055, "learning_rate": 1.955748961624189e-05, "loss": 0.5689, "step": 821 }, { "epoch": 0.12260422104556641, "grad_norm": 1.6330589056015015, "learning_rate": 1.95560670846848e-05, "loss": 0.6544, "step": 822 }, { "epoch": 0.12275337459914983, "grad_norm": 1.54534912109375, "learning_rate": 1.9554642322202574e-05, "loss": 0.6323, "step": 823 }, { "epoch": 0.12290252815273324, "grad_norm": 1.7351492643356323, "learning_rate": 1.9553215329127834e-05, "loss": 0.6339, "step": 824 }, { "epoch": 0.12305168170631665, "grad_norm": 1.6160366535186768, "learning_rate": 1.955178610579372e-05, "loss": 0.6269, "step": 825 }, { "epoch": 0.12320083525990007, "grad_norm": 1.7346217632293701, "learning_rate": 1.955035465253389e-05, "loss": 0.6767, "step": 826 }, { "epoch": 0.12334998881348348, "grad_norm": 1.5567959547042847, "learning_rate": 1.9548920969682535e-05, "loss": 0.6922, "step": 827 }, { "epoch": 0.1234991423670669, "grad_norm": 1.4904392957687378, "learning_rate": 1.954748505757435e-05, "loss": 0.613, "step": 828 }, { "epoch": 0.12364829592065031, "grad_norm": 0.9151891469955444, "learning_rate": 1.9546046916544555e-05, "loss": 0.6429, "step": 829 }, { "epoch": 0.12379744947423373, "grad_norm": 1.7624964714050293, "learning_rate": 1.95446065469289e-05, "loss": 0.6034, "step": 830 }, { "epoch": 0.12394660302781714, "grad_norm": 1.5199700593948364, "learning_rate": 1.9543163949063648e-05, "loss": 0.6861, "step": 831 }, { "epoch": 0.12409575658140055, "grad_norm": 1.3501672744750977, "learning_rate": 1.954171912328558e-05, "loss": 0.6029, "step": 832 }, { "epoch": 0.12424491013498397, "grad_norm": 1.476468801498413, "learning_rate": 1.9540272069932e-05, "loss": 0.6206, "step": 833 }, { "epoch": 0.12439406368856738, "grad_norm": 1.4222196340560913, "learning_rate": 1.9538822789340734e-05, "loss": 0.6645, "step": 834 }, { "epoch": 0.1245432172421508, "grad_norm": 1.5910958051681519, "learning_rate": 1.9537371281850123e-05, "loss": 0.7078, "step": 835 }, { "epoch": 0.12469237079573421, "grad_norm": 1.5304949283599854, "learning_rate": 1.9535917547799036e-05, "loss": 0.6836, "step": 836 }, { "epoch": 0.12484152434931763, "grad_norm": 1.5092002153396606, "learning_rate": 1.9534461587526847e-05, "loss": 0.6417, "step": 837 }, { "epoch": 0.12499067790290104, "grad_norm": 1.613373041152954, "learning_rate": 1.953300340137347e-05, "loss": 0.6935, "step": 838 }, { "epoch": 0.12513983145648444, "grad_norm": 1.988693118095398, "learning_rate": 1.953154298967932e-05, "loss": 0.6106, "step": 839 }, { "epoch": 0.12528898501006785, "grad_norm": 1.802764892578125, "learning_rate": 1.9530080352785343e-05, "loss": 0.7409, "step": 840 }, { "epoch": 0.12543813856365127, "grad_norm": 1.5118399858474731, "learning_rate": 1.9528615491033e-05, "loss": 0.6628, "step": 841 }, { "epoch": 0.12558729211723468, "grad_norm": 1.438130497932434, "learning_rate": 1.9527148404764275e-05, "loss": 0.6594, "step": 842 }, { "epoch": 0.1257364456708181, "grad_norm": 1.5122684240341187, "learning_rate": 1.9525679094321667e-05, "loss": 0.6744, "step": 843 }, { "epoch": 0.1258855992244015, "grad_norm": 1.3045170307159424, "learning_rate": 1.952420756004819e-05, "loss": 0.5894, "step": 844 }, { "epoch": 0.12603475277798493, "grad_norm": 1.3779053688049316, "learning_rate": 1.9522733802287394e-05, "loss": 0.5264, "step": 845 }, { "epoch": 0.12618390633156834, "grad_norm": 1.3755486011505127, "learning_rate": 1.952125782138333e-05, "loss": 0.629, "step": 846 }, { "epoch": 0.12633305988515175, "grad_norm": 1.525646448135376, "learning_rate": 1.9519779617680577e-05, "loss": 0.7794, "step": 847 }, { "epoch": 0.12648221343873517, "grad_norm": 1.7408534288406372, "learning_rate": 1.9518299191524232e-05, "loss": 0.6808, "step": 848 }, { "epoch": 0.12663136699231858, "grad_norm": 1.6876170635223389, "learning_rate": 1.9516816543259908e-05, "loss": 0.6076, "step": 849 }, { "epoch": 0.126780520545902, "grad_norm": 1.6301829814910889, "learning_rate": 1.951533167323374e-05, "loss": 0.6692, "step": 850 }, { "epoch": 0.1269296740994854, "grad_norm": 1.4304838180541992, "learning_rate": 1.951384458179238e-05, "loss": 0.681, "step": 851 }, { "epoch": 0.12707882765306883, "grad_norm": 1.401353359222412, "learning_rate": 1.9512355269283e-05, "loss": 0.6212, "step": 852 }, { "epoch": 0.12722798120665224, "grad_norm": 1.4479111433029175, "learning_rate": 1.9510863736053286e-05, "loss": 0.6569, "step": 853 }, { "epoch": 0.12737713476023566, "grad_norm": 1.7036678791046143, "learning_rate": 1.950936998245145e-05, "loss": 0.6994, "step": 854 }, { "epoch": 0.12752628831381907, "grad_norm": 1.756085991859436, "learning_rate": 1.950787400882622e-05, "loss": 0.6496, "step": 855 }, { "epoch": 0.12767544186740248, "grad_norm": 1.911574363708496, "learning_rate": 1.9506375815526833e-05, "loss": 0.6762, "step": 856 }, { "epoch": 0.1278245954209859, "grad_norm": 1.4940425157546997, "learning_rate": 1.950487540290306e-05, "loss": 0.6581, "step": 857 }, { "epoch": 0.1279737489745693, "grad_norm": 1.5660769939422607, "learning_rate": 1.950337277130518e-05, "loss": 0.6485, "step": 858 }, { "epoch": 0.12812290252815273, "grad_norm": 1.5064774751663208, "learning_rate": 1.950186792108399e-05, "loss": 0.5884, "step": 859 }, { "epoch": 0.12827205608173614, "grad_norm": 1.7257388830184937, "learning_rate": 1.9500360852590806e-05, "loss": 0.6393, "step": 860 }, { "epoch": 0.12842120963531956, "grad_norm": 1.550993800163269, "learning_rate": 1.9498851566177462e-05, "loss": 0.6854, "step": 861 }, { "epoch": 0.12857036318890297, "grad_norm": 0.9539319276809692, "learning_rate": 1.9497340062196318e-05, "loss": 0.6633, "step": 862 }, { "epoch": 0.12871951674248638, "grad_norm": 1.3828151226043701, "learning_rate": 1.9495826341000237e-05, "loss": 0.6309, "step": 863 }, { "epoch": 0.1288686702960698, "grad_norm": 1.4001799821853638, "learning_rate": 1.9494310402942607e-05, "loss": 0.5948, "step": 864 }, { "epoch": 0.1290178238496532, "grad_norm": 1.564426302909851, "learning_rate": 1.9492792248377337e-05, "loss": 0.6807, "step": 865 }, { "epoch": 0.12916697740323663, "grad_norm": 1.4127323627471924, "learning_rate": 1.949127187765885e-05, "loss": 0.6751, "step": 866 }, { "epoch": 0.12931613095682004, "grad_norm": 1.5546022653579712, "learning_rate": 1.948974929114208e-05, "loss": 0.7203, "step": 867 }, { "epoch": 0.12946528451040346, "grad_norm": 1.5511524677276611, "learning_rate": 1.9488224489182496e-05, "loss": 0.6372, "step": 868 }, { "epoch": 0.12961443806398687, "grad_norm": 1.5529199838638306, "learning_rate": 1.9486697472136063e-05, "loss": 0.6566, "step": 869 }, { "epoch": 0.12976359161757028, "grad_norm": 1.516981840133667, "learning_rate": 1.9485168240359277e-05, "loss": 0.6456, "step": 870 }, { "epoch": 0.1299127451711537, "grad_norm": 0.9110028743743896, "learning_rate": 1.9483636794209143e-05, "loss": 0.6747, "step": 871 }, { "epoch": 0.1300618987247371, "grad_norm": 1.5483478307724, "learning_rate": 1.9482103134043194e-05, "loss": 0.5916, "step": 872 }, { "epoch": 0.13021105227832053, "grad_norm": 1.5689467191696167, "learning_rate": 1.9480567260219466e-05, "loss": 0.6605, "step": 873 }, { "epoch": 0.13036020583190394, "grad_norm": 1.5118343830108643, "learning_rate": 1.9479029173096523e-05, "loss": 0.6816, "step": 874 }, { "epoch": 0.13050935938548736, "grad_norm": 1.5046688318252563, "learning_rate": 1.9477488873033435e-05, "loss": 0.6112, "step": 875 }, { "epoch": 0.13065851293907077, "grad_norm": 1.5160242319107056, "learning_rate": 1.947594636038981e-05, "loss": 0.6925, "step": 876 }, { "epoch": 0.13080766649265418, "grad_norm": 1.6549893617630005, "learning_rate": 1.9474401635525738e-05, "loss": 0.6048, "step": 877 }, { "epoch": 0.1309568200462376, "grad_norm": 1.4058926105499268, "learning_rate": 1.9472854698801855e-05, "loss": 0.5244, "step": 878 }, { "epoch": 0.131105973599821, "grad_norm": 0.8700395226478577, "learning_rate": 1.9471305550579305e-05, "loss": 0.6611, "step": 879 }, { "epoch": 0.13125512715340443, "grad_norm": 1.4892795085906982, "learning_rate": 1.9469754191219743e-05, "loss": 0.5838, "step": 880 }, { "epoch": 0.13140428070698784, "grad_norm": 1.4120880365371704, "learning_rate": 1.946820062108534e-05, "loss": 0.5746, "step": 881 }, { "epoch": 0.13155343426057126, "grad_norm": 1.6421972513198853, "learning_rate": 1.94666448405388e-05, "loss": 0.7007, "step": 882 }, { "epoch": 0.13170258781415467, "grad_norm": 1.6275463104248047, "learning_rate": 1.9465086849943318e-05, "loss": 0.5919, "step": 883 }, { "epoch": 0.13185174136773808, "grad_norm": 1.324291706085205, "learning_rate": 1.9463526649662617e-05, "loss": 0.5089, "step": 884 }, { "epoch": 0.1320008949213215, "grad_norm": 1.4657506942749023, "learning_rate": 1.9461964240060944e-05, "loss": 0.6289, "step": 885 }, { "epoch": 0.1321500484749049, "grad_norm": 1.3500118255615234, "learning_rate": 1.9460399621503047e-05, "loss": 0.6446, "step": 886 }, { "epoch": 0.13229920202848833, "grad_norm": 1.6964609622955322, "learning_rate": 1.9458832794354198e-05, "loss": 0.6932, "step": 887 }, { "epoch": 0.13244835558207174, "grad_norm": 1.6575273275375366, "learning_rate": 1.9457263758980182e-05, "loss": 0.5917, "step": 888 }, { "epoch": 0.13259750913565516, "grad_norm": 1.3738632202148438, "learning_rate": 1.9455692515747298e-05, "loss": 0.6299, "step": 889 }, { "epoch": 0.13274666268923857, "grad_norm": 1.4597760438919067, "learning_rate": 1.945411906502237e-05, "loss": 0.7082, "step": 890 }, { "epoch": 0.13289581624282198, "grad_norm": 1.5865765810012817, "learning_rate": 1.9452543407172727e-05, "loss": 0.6495, "step": 891 }, { "epoch": 0.1330449697964054, "grad_norm": 1.4424782991409302, "learning_rate": 1.9450965542566217e-05, "loss": 0.6247, "step": 892 }, { "epoch": 0.1331941233499888, "grad_norm": 1.4119563102722168, "learning_rate": 1.9449385471571197e-05, "loss": 0.5567, "step": 893 }, { "epoch": 0.13334327690357223, "grad_norm": 1.480054259300232, "learning_rate": 1.9447803194556548e-05, "loss": 0.6463, "step": 894 }, { "epoch": 0.13349243045715564, "grad_norm": 0.9054480195045471, "learning_rate": 1.9446218711891666e-05, "loss": 0.6671, "step": 895 }, { "epoch": 0.13364158401073906, "grad_norm": 1.5539562702178955, "learning_rate": 1.9444632023946456e-05, "loss": 0.5763, "step": 896 }, { "epoch": 0.13379073756432247, "grad_norm": 1.3084995746612549, "learning_rate": 1.9443043131091343e-05, "loss": 0.5252, "step": 897 }, { "epoch": 0.13393989111790588, "grad_norm": 0.8488853573799133, "learning_rate": 1.944145203369726e-05, "loss": 0.6633, "step": 898 }, { "epoch": 0.1340890446714893, "grad_norm": 1.421862006187439, "learning_rate": 1.9439858732135657e-05, "loss": 0.6963, "step": 899 }, { "epoch": 0.1342381982250727, "grad_norm": 2.0769102573394775, "learning_rate": 1.9438263226778508e-05, "loss": 0.7024, "step": 900 }, { "epoch": 0.13438735177865613, "grad_norm": 1.4536478519439697, "learning_rate": 1.943666551799829e-05, "loss": 0.6365, "step": 901 }, { "epoch": 0.13453650533223954, "grad_norm": 1.5668044090270996, "learning_rate": 1.9435065606168e-05, "loss": 0.6198, "step": 902 }, { "epoch": 0.13468565888582296, "grad_norm": 1.8230884075164795, "learning_rate": 1.9433463491661143e-05, "loss": 0.7108, "step": 903 }, { "epoch": 0.13483481243940637, "grad_norm": 1.7247576713562012, "learning_rate": 1.9431859174851748e-05, "loss": 0.6483, "step": 904 }, { "epoch": 0.13498396599298979, "grad_norm": 1.3521872758865356, "learning_rate": 1.943025265611435e-05, "loss": 0.611, "step": 905 }, { "epoch": 0.1351331195465732, "grad_norm": 1.3995323181152344, "learning_rate": 1.9428643935824006e-05, "loss": 0.6387, "step": 906 }, { "epoch": 0.1352822731001566, "grad_norm": 1.4333465099334717, "learning_rate": 1.9427033014356276e-05, "loss": 0.6545, "step": 907 }, { "epoch": 0.13543142665374003, "grad_norm": 1.8076012134552002, "learning_rate": 1.942541989208724e-05, "loss": 0.7267, "step": 908 }, { "epoch": 0.13558058020732344, "grad_norm": 1.3868751525878906, "learning_rate": 1.9423804569393497e-05, "loss": 0.5543, "step": 909 }, { "epoch": 0.13572973376090686, "grad_norm": 1.603040337562561, "learning_rate": 1.942218704665215e-05, "loss": 0.581, "step": 910 }, { "epoch": 0.13587888731449027, "grad_norm": 1.9827330112457275, "learning_rate": 1.9420567324240822e-05, "loss": 0.6402, "step": 911 }, { "epoch": 0.13602804086807369, "grad_norm": 1.5093046426773071, "learning_rate": 1.9418945402537647e-05, "loss": 0.6176, "step": 912 }, { "epoch": 0.1361771944216571, "grad_norm": 1.5751417875289917, "learning_rate": 1.9417321281921275e-05, "loss": 0.593, "step": 913 }, { "epoch": 0.1363263479752405, "grad_norm": 1.8224979639053345, "learning_rate": 1.941569496277086e-05, "loss": 0.7302, "step": 914 }, { "epoch": 0.13647550152882393, "grad_norm": 1.5081151723861694, "learning_rate": 1.941406644546609e-05, "loss": 0.7286, "step": 915 }, { "epoch": 0.13662465508240734, "grad_norm": 0.948645293712616, "learning_rate": 1.941243573038714e-05, "loss": 0.7084, "step": 916 }, { "epoch": 0.13677380863599076, "grad_norm": 1.6339545249938965, "learning_rate": 1.9410802817914715e-05, "loss": 0.6057, "step": 917 }, { "epoch": 0.13692296218957417, "grad_norm": 1.4897102117538452, "learning_rate": 1.9409167708430036e-05, "loss": 0.5659, "step": 918 }, { "epoch": 0.13707211574315759, "grad_norm": 1.393404483795166, "learning_rate": 1.9407530402314818e-05, "loss": 0.661, "step": 919 }, { "epoch": 0.137221269296741, "grad_norm": 1.3721895217895508, "learning_rate": 1.9405890899951306e-05, "loss": 0.6574, "step": 920 }, { "epoch": 0.13737042285032441, "grad_norm": 1.5507398843765259, "learning_rate": 1.9404249201722255e-05, "loss": 0.6102, "step": 921 }, { "epoch": 0.13751957640390783, "grad_norm": 1.832520842552185, "learning_rate": 1.9402605308010924e-05, "loss": 0.6646, "step": 922 }, { "epoch": 0.13766872995749124, "grad_norm": 1.5094544887542725, "learning_rate": 1.9400959219201096e-05, "loss": 0.649, "step": 923 }, { "epoch": 0.13781788351107466, "grad_norm": 1.6475598812103271, "learning_rate": 1.939931093567706e-05, "loss": 0.6247, "step": 924 }, { "epoch": 0.13796703706465807, "grad_norm": 1.6897364854812622, "learning_rate": 1.9397660457823618e-05, "loss": 0.7016, "step": 925 }, { "epoch": 0.13811619061824149, "grad_norm": 1.3586102724075317, "learning_rate": 1.9396007786026085e-05, "loss": 0.6432, "step": 926 }, { "epoch": 0.1382653441718249, "grad_norm": 1.4555892944335938, "learning_rate": 1.9394352920670282e-05, "loss": 0.663, "step": 927 }, { "epoch": 0.13841449772540831, "grad_norm": 1.4866385459899902, "learning_rate": 1.9392695862142556e-05, "loss": 0.6762, "step": 928 }, { "epoch": 0.13856365127899173, "grad_norm": 1.4807230234146118, "learning_rate": 1.9391036610829753e-05, "loss": 0.6846, "step": 929 }, { "epoch": 0.13871280483257514, "grad_norm": 1.6604470014572144, "learning_rate": 1.9389375167119237e-05, "loss": 0.6318, "step": 930 }, { "epoch": 0.13886195838615856, "grad_norm": 1.718672275543213, "learning_rate": 1.9387711531398883e-05, "loss": 0.6067, "step": 931 }, { "epoch": 0.13901111193974197, "grad_norm": 1.5362226963043213, "learning_rate": 1.9386045704057083e-05, "loss": 0.7255, "step": 932 }, { "epoch": 0.13916026549332539, "grad_norm": 1.5970402956008911, "learning_rate": 1.9384377685482725e-05, "loss": 0.6544, "step": 933 }, { "epoch": 0.1393094190469088, "grad_norm": 1.6372041702270508, "learning_rate": 1.9382707476065224e-05, "loss": 0.6786, "step": 934 }, { "epoch": 0.13945857260049221, "grad_norm": 1.5289205312728882, "learning_rate": 1.9381035076194502e-05, "loss": 0.7005, "step": 935 }, { "epoch": 0.13960772615407563, "grad_norm": 1.690678596496582, "learning_rate": 1.9379360486260988e-05, "loss": 0.6754, "step": 936 }, { "epoch": 0.13975687970765904, "grad_norm": 1.5176031589508057, "learning_rate": 1.9377683706655626e-05, "loss": 0.6797, "step": 937 }, { "epoch": 0.13990603326124246, "grad_norm": 1.4545443058013916, "learning_rate": 1.9376004737769878e-05, "loss": 0.6127, "step": 938 }, { "epoch": 0.14005518681482587, "grad_norm": 1.4286227226257324, "learning_rate": 1.93743235799957e-05, "loss": 0.6737, "step": 939 }, { "epoch": 0.14020434036840929, "grad_norm": 1.7515792846679688, "learning_rate": 1.9372640233725576e-05, "loss": 0.5687, "step": 940 }, { "epoch": 0.1403534939219927, "grad_norm": 1.4060001373291016, "learning_rate": 1.937095469935249e-05, "loss": 0.6478, "step": 941 }, { "epoch": 0.14050264747557611, "grad_norm": 0.942544162273407, "learning_rate": 1.9369266977269946e-05, "loss": 0.6563, "step": 942 }, { "epoch": 0.14065180102915953, "grad_norm": 1.399582862854004, "learning_rate": 1.9367577067871948e-05, "loss": 0.6016, "step": 943 }, { "epoch": 0.14080095458274294, "grad_norm": 1.4572498798370361, "learning_rate": 1.9365884971553014e-05, "loss": 0.7329, "step": 944 }, { "epoch": 0.14095010813632636, "grad_norm": 1.5974618196487427, "learning_rate": 1.9364190688708184e-05, "loss": 0.6479, "step": 945 }, { "epoch": 0.14109926168990977, "grad_norm": 1.4281435012817383, "learning_rate": 1.9362494219732994e-05, "loss": 0.6153, "step": 946 }, { "epoch": 0.14124841524349319, "grad_norm": 1.6174651384353638, "learning_rate": 1.9360795565023494e-05, "loss": 0.7675, "step": 947 }, { "epoch": 0.1413975687970766, "grad_norm": 1.5844557285308838, "learning_rate": 1.9359094724976248e-05, "loss": 0.6116, "step": 948 }, { "epoch": 0.14154672235066001, "grad_norm": 1.3768600225448608, "learning_rate": 1.935739169998833e-05, "loss": 0.6101, "step": 949 }, { "epoch": 0.14169587590424343, "grad_norm": 1.521358609199524, "learning_rate": 1.9355686490457318e-05, "loss": 0.6723, "step": 950 }, { "epoch": 0.14184502945782684, "grad_norm": 2.1254284381866455, "learning_rate": 1.9353979096781304e-05, "loss": 0.6899, "step": 951 }, { "epoch": 0.14199418301141026, "grad_norm": 1.7371901273727417, "learning_rate": 1.9352269519358895e-05, "loss": 0.6062, "step": 952 }, { "epoch": 0.14214333656499367, "grad_norm": 1.6049017906188965, "learning_rate": 1.9350557758589195e-05, "loss": 0.6638, "step": 953 }, { "epoch": 0.1422924901185771, "grad_norm": 1.616687536239624, "learning_rate": 1.9348843814871836e-05, "loss": 0.5761, "step": 954 }, { "epoch": 0.1424416436721605, "grad_norm": 1.8408141136169434, "learning_rate": 1.934712768860694e-05, "loss": 0.5766, "step": 955 }, { "epoch": 0.14259079722574391, "grad_norm": 1.3992584943771362, "learning_rate": 1.9345409380195154e-05, "loss": 0.5988, "step": 956 }, { "epoch": 0.14273995077932733, "grad_norm": 1.2747400999069214, "learning_rate": 1.934368889003762e-05, "loss": 0.589, "step": 957 }, { "epoch": 0.14288910433291074, "grad_norm": 1.550287127494812, "learning_rate": 1.9341966218536007e-05, "loss": 0.6665, "step": 958 }, { "epoch": 0.14303825788649416, "grad_norm": 1.6733554601669312, "learning_rate": 1.9340241366092475e-05, "loss": 0.5455, "step": 959 }, { "epoch": 0.14318741144007757, "grad_norm": 1.2762788534164429, "learning_rate": 1.933851433310971e-05, "loss": 0.6051, "step": 960 }, { "epoch": 0.143336564993661, "grad_norm": 1.5110527276992798, "learning_rate": 1.9336785119990894e-05, "loss": 0.5517, "step": 961 }, { "epoch": 0.1434857185472444, "grad_norm": 1.5689961910247803, "learning_rate": 1.933505372713972e-05, "loss": 0.6791, "step": 962 }, { "epoch": 0.14363487210082782, "grad_norm": 4.542744159698486, "learning_rate": 1.9333320154960403e-05, "loss": 0.7387, "step": 963 }, { "epoch": 0.14378402565441123, "grad_norm": 1.7783539295196533, "learning_rate": 1.9331584403857645e-05, "loss": 0.7024, "step": 964 }, { "epoch": 0.14393317920799464, "grad_norm": 1.552791953086853, "learning_rate": 1.9329846474236676e-05, "loss": 0.6278, "step": 965 }, { "epoch": 0.14408233276157806, "grad_norm": 1.507901906967163, "learning_rate": 1.9328106366503227e-05, "loss": 0.5924, "step": 966 }, { "epoch": 0.14423148631516147, "grad_norm": 1.407757043838501, "learning_rate": 1.932636408106353e-05, "loss": 0.584, "step": 967 }, { "epoch": 0.1443806398687449, "grad_norm": 1.305061936378479, "learning_rate": 1.9324619618324338e-05, "loss": 0.6469, "step": 968 }, { "epoch": 0.14452979342232827, "grad_norm": 1.2945342063903809, "learning_rate": 1.9322872978692907e-05, "loss": 0.6095, "step": 969 }, { "epoch": 0.1446789469759117, "grad_norm": 1.415724277496338, "learning_rate": 1.9321124162577e-05, "loss": 0.7019, "step": 970 }, { "epoch": 0.1448281005294951, "grad_norm": 1.4556816816329956, "learning_rate": 1.9319373170384895e-05, "loss": 0.6325, "step": 971 }, { "epoch": 0.14497725408307852, "grad_norm": 1.5119601488113403, "learning_rate": 1.931762000252536e-05, "loss": 0.654, "step": 972 }, { "epoch": 0.14512640763666193, "grad_norm": 1.3662675619125366, "learning_rate": 1.9315864659407696e-05, "loss": 0.6273, "step": 973 }, { "epoch": 0.14527556119024534, "grad_norm": 1.5338358879089355, "learning_rate": 1.931410714144169e-05, "loss": 0.7065, "step": 974 }, { "epoch": 0.14542471474382876, "grad_norm": 1.9505730867385864, "learning_rate": 1.931234744903765e-05, "loss": 0.6697, "step": 975 }, { "epoch": 0.14557386829741217, "grad_norm": 1.4641551971435547, "learning_rate": 1.9310585582606385e-05, "loss": 0.7042, "step": 976 }, { "epoch": 0.1457230218509956, "grad_norm": 1.568439245223999, "learning_rate": 1.930882154255922e-05, "loss": 0.5913, "step": 977 }, { "epoch": 0.145872175404579, "grad_norm": 1.6481705904006958, "learning_rate": 1.9307055329307975e-05, "loss": 0.6965, "step": 978 }, { "epoch": 0.14602132895816242, "grad_norm": 1.590644359588623, "learning_rate": 1.930528694326499e-05, "loss": 0.743, "step": 979 }, { "epoch": 0.14617048251174583, "grad_norm": 1.4484859704971313, "learning_rate": 1.9303516384843093e-05, "loss": 0.6764, "step": 980 }, { "epoch": 0.14631963606532924, "grad_norm": 0.9182529449462891, "learning_rate": 1.9301743654455652e-05, "loss": 0.6304, "step": 981 }, { "epoch": 0.14646878961891266, "grad_norm": 1.5920664072036743, "learning_rate": 1.9299968752516505e-05, "loss": 0.7105, "step": 982 }, { "epoch": 0.14661794317249607, "grad_norm": 2.0303611755371094, "learning_rate": 1.9298191679440024e-05, "loss": 0.6461, "step": 983 }, { "epoch": 0.1467670967260795, "grad_norm": 1.5056872367858887, "learning_rate": 1.9296412435641073e-05, "loss": 0.6277, "step": 984 }, { "epoch": 0.1469162502796629, "grad_norm": 1.4650496244430542, "learning_rate": 1.929463102153503e-05, "loss": 0.6693, "step": 985 }, { "epoch": 0.14706540383324632, "grad_norm": 1.4713587760925293, "learning_rate": 1.9292847437537784e-05, "loss": 0.5867, "step": 986 }, { "epoch": 0.14721455738682973, "grad_norm": 1.6108994483947754, "learning_rate": 1.929106168406571e-05, "loss": 0.6741, "step": 987 }, { "epoch": 0.14736371094041315, "grad_norm": 1.6348682641983032, "learning_rate": 1.9289273761535713e-05, "loss": 0.6754, "step": 988 }, { "epoch": 0.14751286449399656, "grad_norm": 1.702499270439148, "learning_rate": 1.9287483670365193e-05, "loss": 0.7277, "step": 989 }, { "epoch": 0.14766201804757997, "grad_norm": 1.450744390487671, "learning_rate": 1.928569141097206e-05, "loss": 0.6643, "step": 990 }, { "epoch": 0.1478111716011634, "grad_norm": 1.3206202983856201, "learning_rate": 1.9283896983774727e-05, "loss": 0.5792, "step": 991 }, { "epoch": 0.1479603251547468, "grad_norm": 1.3496859073638916, "learning_rate": 1.9282100389192116e-05, "loss": 0.6398, "step": 992 }, { "epoch": 0.14810947870833022, "grad_norm": 1.5549994707107544, "learning_rate": 1.9280301627643647e-05, "loss": 0.6556, "step": 993 }, { "epoch": 0.14825863226191363, "grad_norm": 1.3517659902572632, "learning_rate": 1.927850069954926e-05, "loss": 0.6432, "step": 994 }, { "epoch": 0.14840778581549705, "grad_norm": 1.3966952562332153, "learning_rate": 1.9276697605329392e-05, "loss": 0.6313, "step": 995 }, { "epoch": 0.14855693936908046, "grad_norm": 1.884355068206787, "learning_rate": 1.9274892345404985e-05, "loss": 0.651, "step": 996 }, { "epoch": 0.14870609292266387, "grad_norm": 1.5374759435653687, "learning_rate": 1.9273084920197488e-05, "loss": 0.7294, "step": 997 }, { "epoch": 0.1488552464762473, "grad_norm": 1.4927349090576172, "learning_rate": 1.9271275330128856e-05, "loss": 0.6664, "step": 998 }, { "epoch": 0.1490044000298307, "grad_norm": 2.117462158203125, "learning_rate": 1.9269463575621552e-05, "loss": 0.6656, "step": 999 }, { "epoch": 0.14915355358341412, "grad_norm": 1.6184810400009155, "learning_rate": 1.926764965709854e-05, "loss": 0.6371, "step": 1000 }, { "epoch": 0.14930270713699753, "grad_norm": 1.7844403982162476, "learning_rate": 1.926583357498329e-05, "loss": 0.5948, "step": 1001 }, { "epoch": 0.14945186069058095, "grad_norm": 1.4174373149871826, "learning_rate": 1.926401532969978e-05, "loss": 0.5841, "step": 1002 }, { "epoch": 0.14960101424416436, "grad_norm": 1.6657294034957886, "learning_rate": 1.926219492167249e-05, "loss": 0.6595, "step": 1003 }, { "epoch": 0.14975016779774777, "grad_norm": 1.6455507278442383, "learning_rate": 1.9260372351326406e-05, "loss": 0.722, "step": 1004 }, { "epoch": 0.1498993213513312, "grad_norm": 1.5831420421600342, "learning_rate": 1.9258547619087017e-05, "loss": 0.6952, "step": 1005 }, { "epoch": 0.1500484749049146, "grad_norm": 1.4525619745254517, "learning_rate": 1.9256720725380323e-05, "loss": 0.6297, "step": 1006 }, { "epoch": 0.15019762845849802, "grad_norm": 1.4124433994293213, "learning_rate": 1.9254891670632823e-05, "loss": 0.5996, "step": 1007 }, { "epoch": 0.15034678201208143, "grad_norm": 1.3310402631759644, "learning_rate": 1.9253060455271516e-05, "loss": 0.6343, "step": 1008 }, { "epoch": 0.15049593556566485, "grad_norm": 1.3750406503677368, "learning_rate": 1.9251227079723917e-05, "loss": 0.5649, "step": 1009 }, { "epoch": 0.15064508911924826, "grad_norm": 1.7676305770874023, "learning_rate": 1.924939154441803e-05, "loss": 0.6413, "step": 1010 }, { "epoch": 0.15079424267283167, "grad_norm": 1.4614896774291992, "learning_rate": 1.924755384978239e-05, "loss": 0.6221, "step": 1011 }, { "epoch": 0.1509433962264151, "grad_norm": 1.514262318611145, "learning_rate": 1.9245713996246e-05, "loss": 0.6044, "step": 1012 }, { "epoch": 0.1510925497799985, "grad_norm": 1.7073415517807007, "learning_rate": 1.92438719842384e-05, "loss": 0.6396, "step": 1013 }, { "epoch": 0.15124170333358192, "grad_norm": 1.922240972518921, "learning_rate": 1.924202781418961e-05, "loss": 0.6703, "step": 1014 }, { "epoch": 0.15139085688716533, "grad_norm": 1.715849757194519, "learning_rate": 1.9240181486530166e-05, "loss": 0.6176, "step": 1015 }, { "epoch": 0.15154001044074875, "grad_norm": 1.4715633392333984, "learning_rate": 1.9238333001691107e-05, "loss": 0.6159, "step": 1016 }, { "epoch": 0.15168916399433216, "grad_norm": 1.595647931098938, "learning_rate": 1.923648236010397e-05, "loss": 0.6151, "step": 1017 }, { "epoch": 0.15183831754791557, "grad_norm": 1.7256382703781128, "learning_rate": 1.9234629562200805e-05, "loss": 0.6109, "step": 1018 }, { "epoch": 0.151987471101499, "grad_norm": 0.9697238802909851, "learning_rate": 1.9232774608414153e-05, "loss": 0.6425, "step": 1019 }, { "epoch": 0.1521366246550824, "grad_norm": 1.363922119140625, "learning_rate": 1.9230917499177067e-05, "loss": 0.5973, "step": 1020 }, { "epoch": 0.15228577820866582, "grad_norm": 1.4988878965377808, "learning_rate": 1.9229058234923104e-05, "loss": 0.6326, "step": 1021 }, { "epoch": 0.15243493176224923, "grad_norm": 1.5369861125946045, "learning_rate": 1.922719681608632e-05, "loss": 0.6824, "step": 1022 }, { "epoch": 0.15258408531583265, "grad_norm": 1.270534873008728, "learning_rate": 1.9225333243101275e-05, "loss": 0.6668, "step": 1023 }, { "epoch": 0.15273323886941606, "grad_norm": 1.48065984249115, "learning_rate": 1.9223467516403028e-05, "loss": 0.6604, "step": 1024 }, { "epoch": 0.15288239242299947, "grad_norm": 1.2985717058181763, "learning_rate": 1.922159963642715e-05, "loss": 0.5918, "step": 1025 }, { "epoch": 0.1530315459765829, "grad_norm": 1.45894193649292, "learning_rate": 1.9219729603609706e-05, "loss": 0.6082, "step": 1026 }, { "epoch": 0.1531806995301663, "grad_norm": 1.2777208089828491, "learning_rate": 1.921785741838727e-05, "loss": 0.5015, "step": 1027 }, { "epoch": 0.15332985308374972, "grad_norm": 1.4816066026687622, "learning_rate": 1.921598308119691e-05, "loss": 0.668, "step": 1028 }, { "epoch": 0.15347900663733313, "grad_norm": 1.8151222467422485, "learning_rate": 1.9214106592476215e-05, "loss": 0.6182, "step": 1029 }, { "epoch": 0.15362816019091655, "grad_norm": 1.664941668510437, "learning_rate": 1.9212227952663247e-05, "loss": 0.6943, "step": 1030 }, { "epoch": 0.15377731374449996, "grad_norm": 1.4634292125701904, "learning_rate": 1.9210347162196598e-05, "loss": 0.7207, "step": 1031 }, { "epoch": 0.15392646729808337, "grad_norm": 1.5645536184310913, "learning_rate": 1.9208464221515347e-05, "loss": 0.6709, "step": 1032 }, { "epoch": 0.1540756208516668, "grad_norm": 1.551578164100647, "learning_rate": 1.9206579131059076e-05, "loss": 0.7015, "step": 1033 }, { "epoch": 0.1542247744052502, "grad_norm": 1.405856728553772, "learning_rate": 1.9204691891267875e-05, "loss": 0.6727, "step": 1034 }, { "epoch": 0.15437392795883362, "grad_norm": 1.7385042905807495, "learning_rate": 1.9202802502582334e-05, "loss": 0.6578, "step": 1035 }, { "epoch": 0.15452308151241703, "grad_norm": 1.4064158201217651, "learning_rate": 1.9200910965443537e-05, "loss": 0.7013, "step": 1036 }, { "epoch": 0.15467223506600045, "grad_norm": 1.5435022115707397, "learning_rate": 1.919901728029308e-05, "loss": 0.709, "step": 1037 }, { "epoch": 0.15482138861958386, "grad_norm": 1.353402018547058, "learning_rate": 1.9197121447573053e-05, "loss": 0.6129, "step": 1038 }, { "epoch": 0.15497054217316727, "grad_norm": 1.4920427799224854, "learning_rate": 1.9195223467726056e-05, "loss": 0.5843, "step": 1039 }, { "epoch": 0.1551196957267507, "grad_norm": 1.7493505477905273, "learning_rate": 1.9193323341195175e-05, "loss": 0.5847, "step": 1040 }, { "epoch": 0.1552688492803341, "grad_norm": 1.4028246402740479, "learning_rate": 1.9191421068424017e-05, "loss": 0.6884, "step": 1041 }, { "epoch": 0.15541800283391752, "grad_norm": 1.4965529441833496, "learning_rate": 1.918951664985667e-05, "loss": 0.5548, "step": 1042 }, { "epoch": 0.15556715638750093, "grad_norm": 1.6072664260864258, "learning_rate": 1.918761008593774e-05, "loss": 0.61, "step": 1043 }, { "epoch": 0.15571630994108435, "grad_norm": 1.5268888473510742, "learning_rate": 1.9185701377112324e-05, "loss": 0.6307, "step": 1044 }, { "epoch": 0.15586546349466776, "grad_norm": 0.9760767817497253, "learning_rate": 1.9183790523826022e-05, "loss": 0.6576, "step": 1045 }, { "epoch": 0.15601461704825118, "grad_norm": 1.5051852464675903, "learning_rate": 1.9181877526524937e-05, "loss": 0.616, "step": 1046 }, { "epoch": 0.1561637706018346, "grad_norm": 1.29994797706604, "learning_rate": 1.9179962385655665e-05, "loss": 0.6081, "step": 1047 }, { "epoch": 0.156312924155418, "grad_norm": 1.6211333274841309, "learning_rate": 1.9178045101665317e-05, "loss": 0.6223, "step": 1048 }, { "epoch": 0.15646207770900142, "grad_norm": 1.5613127946853638, "learning_rate": 1.9176125675001487e-05, "loss": 0.6324, "step": 1049 }, { "epoch": 0.15661123126258483, "grad_norm": 2.069457769393921, "learning_rate": 1.917420410611228e-05, "loss": 0.6975, "step": 1050 }, { "epoch": 0.15676038481616825, "grad_norm": 1.4184070825576782, "learning_rate": 1.91722803954463e-05, "loss": 0.6248, "step": 1051 }, { "epoch": 0.15690953836975166, "grad_norm": 1.3615570068359375, "learning_rate": 1.917035454345265e-05, "loss": 0.5897, "step": 1052 }, { "epoch": 0.15705869192333508, "grad_norm": 1.7934825420379639, "learning_rate": 1.916842655058093e-05, "loss": 0.6576, "step": 1053 }, { "epoch": 0.1572078454769185, "grad_norm": 1.6616452932357788, "learning_rate": 1.9166496417281243e-05, "loss": 0.7032, "step": 1054 }, { "epoch": 0.1573569990305019, "grad_norm": 1.5213255882263184, "learning_rate": 1.916456414400419e-05, "loss": 0.6587, "step": 1055 }, { "epoch": 0.15750615258408532, "grad_norm": 1.7758487462997437, "learning_rate": 1.9162629731200878e-05, "loss": 0.7264, "step": 1056 }, { "epoch": 0.15765530613766873, "grad_norm": 1.4280145168304443, "learning_rate": 1.91606931793229e-05, "loss": 0.7115, "step": 1057 }, { "epoch": 0.15780445969125215, "grad_norm": 1.338557481765747, "learning_rate": 1.9158754488822366e-05, "loss": 0.5684, "step": 1058 }, { "epoch": 0.15795361324483556, "grad_norm": 1.6577867269515991, "learning_rate": 1.9156813660151866e-05, "loss": 0.5807, "step": 1059 }, { "epoch": 0.15810276679841898, "grad_norm": 1.6332929134368896, "learning_rate": 1.9154870693764504e-05, "loss": 0.7125, "step": 1060 }, { "epoch": 0.1582519203520024, "grad_norm": 1.8335760831832886, "learning_rate": 1.9152925590113878e-05, "loss": 0.6589, "step": 1061 }, { "epoch": 0.1584010739055858, "grad_norm": 1.5669751167297363, "learning_rate": 1.9150978349654082e-05, "loss": 0.6337, "step": 1062 }, { "epoch": 0.15855022745916922, "grad_norm": 1.3472189903259277, "learning_rate": 1.9149028972839718e-05, "loss": 0.7003, "step": 1063 }, { "epoch": 0.15869938101275263, "grad_norm": 1.4309176206588745, "learning_rate": 1.9147077460125873e-05, "loss": 0.6302, "step": 1064 }, { "epoch": 0.15884853456633605, "grad_norm": 1.5372934341430664, "learning_rate": 1.9145123811968145e-05, "loss": 0.5647, "step": 1065 }, { "epoch": 0.15899768811991946, "grad_norm": 1.5280526876449585, "learning_rate": 1.9143168028822623e-05, "loss": 0.6211, "step": 1066 }, { "epoch": 0.15914684167350288, "grad_norm": 1.3908239603042603, "learning_rate": 1.9141210111145898e-05, "loss": 0.6189, "step": 1067 }, { "epoch": 0.1592959952270863, "grad_norm": 1.4274159669876099, "learning_rate": 1.913925005939506e-05, "loss": 0.6776, "step": 1068 }, { "epoch": 0.1594451487806697, "grad_norm": 1.411412239074707, "learning_rate": 1.9137287874027696e-05, "loss": 0.6344, "step": 1069 }, { "epoch": 0.15959430233425312, "grad_norm": 1.3883172273635864, "learning_rate": 1.9135323555501885e-05, "loss": 0.6578, "step": 1070 }, { "epoch": 0.15974345588783653, "grad_norm": 1.4515570402145386, "learning_rate": 1.9133357104276218e-05, "loss": 0.7096, "step": 1071 }, { "epoch": 0.15989260944141995, "grad_norm": 1.7453056573867798, "learning_rate": 1.913138852080977e-05, "loss": 0.6371, "step": 1072 }, { "epoch": 0.16004176299500336, "grad_norm": 1.6641805171966553, "learning_rate": 1.912941780556212e-05, "loss": 0.6452, "step": 1073 }, { "epoch": 0.16019091654858678, "grad_norm": 1.3486908674240112, "learning_rate": 1.9127444958993345e-05, "loss": 0.5799, "step": 1074 }, { "epoch": 0.1603400701021702, "grad_norm": 1.4468834400177002, "learning_rate": 1.912546998156402e-05, "loss": 0.6558, "step": 1075 }, { "epoch": 0.1604892236557536, "grad_norm": 1.3956903219223022, "learning_rate": 1.9123492873735216e-05, "loss": 0.5668, "step": 1076 }, { "epoch": 0.16063837720933702, "grad_norm": 3.3424594402313232, "learning_rate": 1.9121513635968497e-05, "loss": 0.6772, "step": 1077 }, { "epoch": 0.16078753076292043, "grad_norm": 1.4657800197601318, "learning_rate": 1.9119532268725935e-05, "loss": 0.5216, "step": 1078 }, { "epoch": 0.16093668431650385, "grad_norm": 1.2940051555633545, "learning_rate": 1.9117548772470093e-05, "loss": 0.6551, "step": 1079 }, { "epoch": 0.16108583787008726, "grad_norm": 1.6287957429885864, "learning_rate": 1.9115563147664022e-05, "loss": 0.6774, "step": 1080 }, { "epoch": 0.16123499142367068, "grad_norm": 1.5144708156585693, "learning_rate": 1.9113575394771287e-05, "loss": 0.6856, "step": 1081 }, { "epoch": 0.1613841449772541, "grad_norm": 1.3280799388885498, "learning_rate": 1.9111585514255943e-05, "loss": 0.594, "step": 1082 }, { "epoch": 0.1615332985308375, "grad_norm": 1.3391056060791016, "learning_rate": 1.910959350658253e-05, "loss": 0.6467, "step": 1083 }, { "epoch": 0.16168245208442092, "grad_norm": 1.3103628158569336, "learning_rate": 1.9107599372216107e-05, "loss": 0.6746, "step": 1084 }, { "epoch": 0.16183160563800433, "grad_norm": 1.703957200050354, "learning_rate": 1.9105603111622212e-05, "loss": 0.6075, "step": 1085 }, { "epoch": 0.16198075919158775, "grad_norm": 1.9383080005645752, "learning_rate": 1.910360472526688e-05, "loss": 0.6783, "step": 1086 }, { "epoch": 0.16212991274517116, "grad_norm": 1.272289752960205, "learning_rate": 1.910160421361666e-05, "loss": 0.6124, "step": 1087 }, { "epoch": 0.16227906629875458, "grad_norm": 1.4213980436325073, "learning_rate": 1.9099601577138572e-05, "loss": 0.6876, "step": 1088 }, { "epoch": 0.162428219852338, "grad_norm": 1.3474937677383423, "learning_rate": 1.909759681630015e-05, "loss": 0.6026, "step": 1089 }, { "epoch": 0.1625773734059214, "grad_norm": 1.7115230560302734, "learning_rate": 1.9095589931569418e-05, "loss": 0.7699, "step": 1090 }, { "epoch": 0.16272652695950482, "grad_norm": 1.4891178607940674, "learning_rate": 1.909358092341489e-05, "loss": 0.7196, "step": 1091 }, { "epoch": 0.16287568051308823, "grad_norm": 1.3673642873764038, "learning_rate": 1.9091569792305593e-05, "loss": 0.6263, "step": 1092 }, { "epoch": 0.16302483406667165, "grad_norm": 1.7842583656311035, "learning_rate": 1.908955653871103e-05, "loss": 0.6139, "step": 1093 }, { "epoch": 0.16317398762025506, "grad_norm": 1.6124930381774902, "learning_rate": 1.908754116310121e-05, "loss": 0.6719, "step": 1094 }, { "epoch": 0.16332314117383848, "grad_norm": 1.5146113634109497, "learning_rate": 1.908552366594664e-05, "loss": 0.6237, "step": 1095 }, { "epoch": 0.1634722947274219, "grad_norm": 1.5498182773590088, "learning_rate": 1.9083504047718308e-05, "loss": 0.6734, "step": 1096 }, { "epoch": 0.1636214482810053, "grad_norm": 1.7749072313308716, "learning_rate": 1.9081482308887716e-05, "loss": 0.5904, "step": 1097 }, { "epoch": 0.16377060183458872, "grad_norm": 1.398216962814331, "learning_rate": 1.9079458449926847e-05, "loss": 0.6512, "step": 1098 }, { "epoch": 0.16391975538817213, "grad_norm": 1.6050751209259033, "learning_rate": 1.9077432471308182e-05, "loss": 0.6497, "step": 1099 }, { "epoch": 0.16406890894175555, "grad_norm": 1.3458592891693115, "learning_rate": 1.9075404373504705e-05, "loss": 0.6247, "step": 1100 }, { "epoch": 0.16421806249533896, "grad_norm": 1.6560689210891724, "learning_rate": 1.9073374156989888e-05, "loss": 0.6069, "step": 1101 }, { "epoch": 0.16436721604892238, "grad_norm": 1.4312423467636108, "learning_rate": 1.907134182223769e-05, "loss": 0.545, "step": 1102 }, { "epoch": 0.1645163696025058, "grad_norm": 1.510786533355713, "learning_rate": 1.906930736972258e-05, "loss": 0.6182, "step": 1103 }, { "epoch": 0.1646655231560892, "grad_norm": 1.7488878965377808, "learning_rate": 1.9067270799919512e-05, "loss": 0.6355, "step": 1104 }, { "epoch": 0.16481467670967262, "grad_norm": 1.5005724430084229, "learning_rate": 1.9065232113303934e-05, "loss": 0.6613, "step": 1105 }, { "epoch": 0.16496383026325603, "grad_norm": 1.589894413948059, "learning_rate": 1.9063191310351797e-05, "loss": 0.7185, "step": 1106 }, { "epoch": 0.16511298381683945, "grad_norm": 0.8852970600128174, "learning_rate": 1.9061148391539534e-05, "loss": 0.6578, "step": 1107 }, { "epoch": 0.16526213737042286, "grad_norm": 1.3562698364257812, "learning_rate": 1.9059103357344075e-05, "loss": 0.5951, "step": 1108 }, { "epoch": 0.16541129092400628, "grad_norm": 1.5450339317321777, "learning_rate": 1.905705620824285e-05, "loss": 0.6675, "step": 1109 }, { "epoch": 0.1655604444775897, "grad_norm": 2.186734676361084, "learning_rate": 1.9055006944713782e-05, "loss": 0.6028, "step": 1110 }, { "epoch": 0.1657095980311731, "grad_norm": 1.3572760820388794, "learning_rate": 1.905295556723528e-05, "loss": 0.6321, "step": 1111 }, { "epoch": 0.16585875158475652, "grad_norm": 1.5582304000854492, "learning_rate": 1.9050902076286253e-05, "loss": 0.6302, "step": 1112 }, { "epoch": 0.16600790513833993, "grad_norm": 1.6895772218704224, "learning_rate": 1.9048846472346102e-05, "loss": 0.6111, "step": 1113 }, { "epoch": 0.16615705869192335, "grad_norm": 1.524172067642212, "learning_rate": 1.9046788755894722e-05, "loss": 0.6331, "step": 1114 }, { "epoch": 0.16630621224550676, "grad_norm": 1.657718539237976, "learning_rate": 1.9044728927412495e-05, "loss": 0.7181, "step": 1115 }, { "epoch": 0.16645536579909018, "grad_norm": 1.4018399715423584, "learning_rate": 1.904266698738031e-05, "loss": 0.6341, "step": 1116 }, { "epoch": 0.1666045193526736, "grad_norm": 1.5382318496704102, "learning_rate": 1.904060293627953e-05, "loss": 0.6503, "step": 1117 }, { "epoch": 0.16675367290625698, "grad_norm": 1.58961021900177, "learning_rate": 1.903853677459203e-05, "loss": 0.6618, "step": 1118 }, { "epoch": 0.1669028264598404, "grad_norm": 1.6111150979995728, "learning_rate": 1.903646850280016e-05, "loss": 0.7222, "step": 1119 }, { "epoch": 0.1670519800134238, "grad_norm": 1.5633646249771118, "learning_rate": 1.903439812138678e-05, "loss": 0.649, "step": 1120 }, { "epoch": 0.16720113356700722, "grad_norm": 1.5280356407165527, "learning_rate": 1.9032325630835227e-05, "loss": 0.5771, "step": 1121 }, { "epoch": 0.16735028712059064, "grad_norm": 1.8484342098236084, "learning_rate": 1.9030251031629338e-05, "loss": 0.678, "step": 1122 }, { "epoch": 0.16749944067417405, "grad_norm": 1.5404763221740723, "learning_rate": 1.902817432425345e-05, "loss": 0.6086, "step": 1123 }, { "epoch": 0.16764859422775746, "grad_norm": 1.3896712064743042, "learning_rate": 1.902609550919237e-05, "loss": 0.5853, "step": 1124 }, { "epoch": 0.16779774778134088, "grad_norm": 1.4394842386245728, "learning_rate": 1.902401458693142e-05, "loss": 0.677, "step": 1125 }, { "epoch": 0.1679469013349243, "grad_norm": 1.500293493270874, "learning_rate": 1.9021931557956404e-05, "loss": 0.7024, "step": 1126 }, { "epoch": 0.1680960548885077, "grad_norm": 1.4812233448028564, "learning_rate": 1.9019846422753615e-05, "loss": 0.7197, "step": 1127 }, { "epoch": 0.16824520844209112, "grad_norm": 1.535314917564392, "learning_rate": 1.9017759181809846e-05, "loss": 0.6618, "step": 1128 }, { "epoch": 0.16839436199567454, "grad_norm": 0.9263057708740234, "learning_rate": 1.9015669835612375e-05, "loss": 0.6661, "step": 1129 }, { "epoch": 0.16854351554925795, "grad_norm": 1.7342206239700317, "learning_rate": 1.9013578384648968e-05, "loss": 0.7329, "step": 1130 }, { "epoch": 0.16869266910284136, "grad_norm": 1.5662215948104858, "learning_rate": 1.901148482940789e-05, "loss": 0.6653, "step": 1131 }, { "epoch": 0.16884182265642478, "grad_norm": 1.5517185926437378, "learning_rate": 1.90093891703779e-05, "loss": 0.6245, "step": 1132 }, { "epoch": 0.1689909762100082, "grad_norm": 1.7379666566848755, "learning_rate": 1.9007291408048238e-05, "loss": 0.621, "step": 1133 }, { "epoch": 0.1691401297635916, "grad_norm": 1.519832968711853, "learning_rate": 1.900519154290864e-05, "loss": 0.625, "step": 1134 }, { "epoch": 0.16928928331717502, "grad_norm": 1.611550211906433, "learning_rate": 1.900308957544934e-05, "loss": 0.6739, "step": 1135 }, { "epoch": 0.16943843687075844, "grad_norm": 2.043959379196167, "learning_rate": 1.9000985506161047e-05, "loss": 0.6965, "step": 1136 }, { "epoch": 0.16958759042434185, "grad_norm": 1.5628546476364136, "learning_rate": 1.8998879335534973e-05, "loss": 0.636, "step": 1137 }, { "epoch": 0.16973674397792526, "grad_norm": 1.3309130668640137, "learning_rate": 1.899677106406282e-05, "loss": 0.6554, "step": 1138 }, { "epoch": 0.16988589753150868, "grad_norm": 1.4749866724014282, "learning_rate": 1.899466069223677e-05, "loss": 0.5647, "step": 1139 }, { "epoch": 0.1700350510850921, "grad_norm": 1.4821521043777466, "learning_rate": 1.899254822054951e-05, "loss": 0.6495, "step": 1140 }, { "epoch": 0.1701842046386755, "grad_norm": 1.5176280736923218, "learning_rate": 1.899043364949421e-05, "loss": 0.6125, "step": 1141 }, { "epoch": 0.17033335819225892, "grad_norm": 1.3631629943847656, "learning_rate": 1.8988316979564523e-05, "loss": 0.6278, "step": 1142 }, { "epoch": 0.17048251174584234, "grad_norm": 1.4002752304077148, "learning_rate": 1.8986198211254604e-05, "loss": 0.6922, "step": 1143 }, { "epoch": 0.17063166529942575, "grad_norm": 1.3879475593566895, "learning_rate": 1.8984077345059092e-05, "loss": 0.6433, "step": 1144 }, { "epoch": 0.17078081885300916, "grad_norm": 1.2906699180603027, "learning_rate": 1.8981954381473122e-05, "loss": 0.4885, "step": 1145 }, { "epoch": 0.17092997240659258, "grad_norm": 1.4701439142227173, "learning_rate": 1.8979829320992307e-05, "loss": 0.7159, "step": 1146 }, { "epoch": 0.171079125960176, "grad_norm": 1.3974473476409912, "learning_rate": 1.8977702164112757e-05, "loss": 0.6375, "step": 1147 }, { "epoch": 0.1712282795137594, "grad_norm": 1.3830673694610596, "learning_rate": 1.897557291133107e-05, "loss": 0.6179, "step": 1148 }, { "epoch": 0.17137743306734282, "grad_norm": 1.5369625091552734, "learning_rate": 1.8973441563144338e-05, "loss": 0.6061, "step": 1149 }, { "epoch": 0.17152658662092624, "grad_norm": 1.3398537635803223, "learning_rate": 1.8971308120050133e-05, "loss": 0.6519, "step": 1150 }, { "epoch": 0.17167574017450965, "grad_norm": 0.9115089774131775, "learning_rate": 1.8969172582546528e-05, "loss": 0.7202, "step": 1151 }, { "epoch": 0.17182489372809306, "grad_norm": 1.3985161781311035, "learning_rate": 1.8967034951132066e-05, "loss": 0.5797, "step": 1152 }, { "epoch": 0.17197404728167648, "grad_norm": 1.5053555965423584, "learning_rate": 1.8964895226305802e-05, "loss": 0.7262, "step": 1153 }, { "epoch": 0.1721232008352599, "grad_norm": 1.519168734550476, "learning_rate": 1.8962753408567263e-05, "loss": 0.66, "step": 1154 }, { "epoch": 0.1722723543888433, "grad_norm": 1.2498778104782104, "learning_rate": 1.896060949841647e-05, "loss": 0.5583, "step": 1155 }, { "epoch": 0.17242150794242672, "grad_norm": 1.3280532360076904, "learning_rate": 1.8958463496353935e-05, "loss": 0.6463, "step": 1156 }, { "epoch": 0.17257066149601014, "grad_norm": 1.4712183475494385, "learning_rate": 1.8956315402880655e-05, "loss": 0.631, "step": 1157 }, { "epoch": 0.17271981504959355, "grad_norm": 1.6997454166412354, "learning_rate": 1.8954165218498115e-05, "loss": 0.719, "step": 1158 }, { "epoch": 0.17286896860317696, "grad_norm": 1.5430630445480347, "learning_rate": 1.895201294370829e-05, "loss": 0.6249, "step": 1159 }, { "epoch": 0.17301812215676038, "grad_norm": 1.5205302238464355, "learning_rate": 1.8949858579013645e-05, "loss": 0.5876, "step": 1160 }, { "epoch": 0.1731672757103438, "grad_norm": 1.3417888879776, "learning_rate": 1.8947702124917126e-05, "loss": 0.6504, "step": 1161 }, { "epoch": 0.1733164292639272, "grad_norm": 1.4637010097503662, "learning_rate": 1.8945543581922173e-05, "loss": 0.6122, "step": 1162 }, { "epoch": 0.17346558281751062, "grad_norm": 1.5515425205230713, "learning_rate": 1.8943382950532713e-05, "loss": 0.6202, "step": 1163 }, { "epoch": 0.17361473637109404, "grad_norm": 1.3739547729492188, "learning_rate": 1.894122023125316e-05, "loss": 0.6216, "step": 1164 }, { "epoch": 0.17376388992467745, "grad_norm": 1.586924433708191, "learning_rate": 1.8939055424588407e-05, "loss": 0.6442, "step": 1165 }, { "epoch": 0.17391304347826086, "grad_norm": 1.7484856843948364, "learning_rate": 1.8936888531043853e-05, "loss": 0.6532, "step": 1166 }, { "epoch": 0.17406219703184428, "grad_norm": 1.6698132753372192, "learning_rate": 1.8934719551125364e-05, "loss": 0.6221, "step": 1167 }, { "epoch": 0.1742113505854277, "grad_norm": 1.8987964391708374, "learning_rate": 1.8932548485339304e-05, "loss": 0.653, "step": 1168 }, { "epoch": 0.1743605041390111, "grad_norm": 1.4403737783432007, "learning_rate": 1.893037533419253e-05, "loss": 0.6741, "step": 1169 }, { "epoch": 0.17450965769259452, "grad_norm": 1.3815851211547852, "learning_rate": 1.8928200098192372e-05, "loss": 0.6735, "step": 1170 }, { "epoch": 0.17465881124617794, "grad_norm": 1.5761692523956299, "learning_rate": 1.8926022777846647e-05, "loss": 0.6851, "step": 1171 }, { "epoch": 0.17480796479976135, "grad_norm": 1.5829963684082031, "learning_rate": 1.8923843373663676e-05, "loss": 0.5978, "step": 1172 }, { "epoch": 0.17495711835334476, "grad_norm": 1.9175711870193481, "learning_rate": 1.8921661886152248e-05, "loss": 0.5947, "step": 1173 }, { "epoch": 0.17510627190692818, "grad_norm": 1.4320920705795288, "learning_rate": 1.8919478315821646e-05, "loss": 0.6839, "step": 1174 }, { "epoch": 0.1752554254605116, "grad_norm": 1.569101333618164, "learning_rate": 1.8917292663181638e-05, "loss": 0.5262, "step": 1175 }, { "epoch": 0.175404579014095, "grad_norm": 1.36593759059906, "learning_rate": 1.8915104928742484e-05, "loss": 0.6562, "step": 1176 }, { "epoch": 0.17555373256767842, "grad_norm": 1.4847958087921143, "learning_rate": 1.8912915113014918e-05, "loss": 0.6878, "step": 1177 }, { "epoch": 0.17570288612126184, "grad_norm": 1.5202105045318604, "learning_rate": 1.8910723216510168e-05, "loss": 0.5718, "step": 1178 }, { "epoch": 0.17585203967484525, "grad_norm": 1.4339072704315186, "learning_rate": 1.8908529239739946e-05, "loss": 0.6111, "step": 1179 }, { "epoch": 0.17600119322842867, "grad_norm": 1.49117112159729, "learning_rate": 1.8906333183216455e-05, "loss": 0.6299, "step": 1180 }, { "epoch": 0.17615034678201208, "grad_norm": 1.3770793676376343, "learning_rate": 1.890413504745237e-05, "loss": 0.6323, "step": 1181 }, { "epoch": 0.1762995003355955, "grad_norm": 1.5681291818618774, "learning_rate": 1.890193483296087e-05, "loss": 0.661, "step": 1182 }, { "epoch": 0.1764486538891789, "grad_norm": 1.4671696424484253, "learning_rate": 1.88997325402556e-05, "loss": 0.5329, "step": 1183 }, { "epoch": 0.17659780744276232, "grad_norm": 1.5009853839874268, "learning_rate": 1.8897528169850706e-05, "loss": 0.6746, "step": 1184 }, { "epoch": 0.17674696099634574, "grad_norm": 1.946761965751648, "learning_rate": 1.8895321722260806e-05, "loss": 0.6197, "step": 1185 }, { "epoch": 0.17689611454992915, "grad_norm": 1.4566432237625122, "learning_rate": 1.8893113198001015e-05, "loss": 0.6443, "step": 1186 }, { "epoch": 0.17704526810351257, "grad_norm": 1.4040168523788452, "learning_rate": 1.8890902597586926e-05, "loss": 0.6558, "step": 1187 }, { "epoch": 0.17719442165709598, "grad_norm": 1.4405826330184937, "learning_rate": 1.8888689921534612e-05, "loss": 0.5828, "step": 1188 }, { "epoch": 0.1773435752106794, "grad_norm": 1.5154199600219727, "learning_rate": 1.8886475170360644e-05, "loss": 0.6355, "step": 1189 }, { "epoch": 0.1774927287642628, "grad_norm": 1.4301873445510864, "learning_rate": 1.888425834458207e-05, "loss": 0.6242, "step": 1190 }, { "epoch": 0.17764188231784622, "grad_norm": 1.282226800918579, "learning_rate": 1.8882039444716417e-05, "loss": 0.6128, "step": 1191 }, { "epoch": 0.17779103587142964, "grad_norm": 1.3548729419708252, "learning_rate": 1.8879818471281703e-05, "loss": 0.5787, "step": 1192 }, { "epoch": 0.17794018942501305, "grad_norm": 1.9704927206039429, "learning_rate": 1.8877595424796425e-05, "loss": 0.649, "step": 1193 }, { "epoch": 0.17808934297859647, "grad_norm": 1.418317437171936, "learning_rate": 1.887537030577958e-05, "loss": 0.5958, "step": 1194 }, { "epoch": 0.17823849653217988, "grad_norm": 1.421128511428833, "learning_rate": 1.887314311475062e-05, "loss": 0.5578, "step": 1195 }, { "epoch": 0.1783876500857633, "grad_norm": 1.4407752752304077, "learning_rate": 1.8870913852229513e-05, "loss": 0.7014, "step": 1196 }, { "epoch": 0.1785368036393467, "grad_norm": 1.3606255054473877, "learning_rate": 1.886868251873668e-05, "loss": 0.6656, "step": 1197 }, { "epoch": 0.17868595719293012, "grad_norm": 1.3811169862747192, "learning_rate": 1.886644911479305e-05, "loss": 0.5323, "step": 1198 }, { "epoch": 0.17883511074651354, "grad_norm": 1.0009609460830688, "learning_rate": 1.8864213640920023e-05, "loss": 0.6675, "step": 1199 }, { "epoch": 0.17898426430009695, "grad_norm": 1.4730992317199707, "learning_rate": 1.886197609763948e-05, "loss": 0.5761, "step": 1200 }, { "epoch": 0.17913341785368037, "grad_norm": 1.4595147371292114, "learning_rate": 1.88597364854738e-05, "loss": 0.6338, "step": 1201 }, { "epoch": 0.17928257140726378, "grad_norm": 1.450883388519287, "learning_rate": 1.8857494804945822e-05, "loss": 0.6257, "step": 1202 }, { "epoch": 0.1794317249608472, "grad_norm": 1.472597360610962, "learning_rate": 1.885525105657889e-05, "loss": 0.7045, "step": 1203 }, { "epoch": 0.1795808785144306, "grad_norm": 1.622525691986084, "learning_rate": 1.8853005240896818e-05, "loss": 0.6158, "step": 1204 }, { "epoch": 0.17973003206801402, "grad_norm": 0.8600418567657471, "learning_rate": 1.8850757358423907e-05, "loss": 0.6295, "step": 1205 }, { "epoch": 0.17987918562159744, "grad_norm": 1.3104486465454102, "learning_rate": 1.884850740968494e-05, "loss": 0.6024, "step": 1206 }, { "epoch": 0.18002833917518085, "grad_norm": 1.7322404384613037, "learning_rate": 1.884625539520518e-05, "loss": 0.6811, "step": 1207 }, { "epoch": 0.18017749272876427, "grad_norm": 2.031306266784668, "learning_rate": 1.8844001315510375e-05, "loss": 0.6888, "step": 1208 }, { "epoch": 0.18032664628234768, "grad_norm": 1.720130205154419, "learning_rate": 1.8841745171126757e-05, "loss": 0.5969, "step": 1209 }, { "epoch": 0.1804757998359311, "grad_norm": 1.532108187675476, "learning_rate": 1.8839486962581035e-05, "loss": 0.6957, "step": 1210 }, { "epoch": 0.1806249533895145, "grad_norm": 0.9644238352775574, "learning_rate": 1.88372266904004e-05, "loss": 0.6456, "step": 1211 }, { "epoch": 0.18077410694309792, "grad_norm": 1.935927391052246, "learning_rate": 1.8834964355112532e-05, "loss": 0.5885, "step": 1212 }, { "epoch": 0.18092326049668134, "grad_norm": 1.5631916522979736, "learning_rate": 1.8832699957245585e-05, "loss": 0.6695, "step": 1213 }, { "epoch": 0.18107241405026475, "grad_norm": 1.4092533588409424, "learning_rate": 1.8830433497328194e-05, "loss": 0.6652, "step": 1214 }, { "epoch": 0.18122156760384817, "grad_norm": 1.6805059909820557, "learning_rate": 1.8828164975889486e-05, "loss": 0.6374, "step": 1215 }, { "epoch": 0.18137072115743158, "grad_norm": 1.61348295211792, "learning_rate": 1.8825894393459058e-05, "loss": 0.6136, "step": 1216 }, { "epoch": 0.181519874711015, "grad_norm": 1.5685430765151978, "learning_rate": 1.882362175056699e-05, "loss": 0.5965, "step": 1217 }, { "epoch": 0.1816690282645984, "grad_norm": 1.4745314121246338, "learning_rate": 1.8821347047743846e-05, "loss": 0.581, "step": 1218 }, { "epoch": 0.18181818181818182, "grad_norm": 0.8973982930183411, "learning_rate": 1.8819070285520673e-05, "loss": 0.6612, "step": 1219 }, { "epoch": 0.18196733537176524, "grad_norm": 1.3091226816177368, "learning_rate": 1.8816791464428993e-05, "loss": 0.6214, "step": 1220 }, { "epoch": 0.18211648892534865, "grad_norm": 1.3595060110092163, "learning_rate": 1.8814510585000813e-05, "loss": 0.6454, "step": 1221 }, { "epoch": 0.18226564247893207, "grad_norm": 1.2475500106811523, "learning_rate": 1.8812227647768616e-05, "loss": 0.5991, "step": 1222 }, { "epoch": 0.18241479603251548, "grad_norm": 1.6719046831130981, "learning_rate": 1.880994265326537e-05, "loss": 0.7109, "step": 1223 }, { "epoch": 0.1825639495860989, "grad_norm": 1.2738361358642578, "learning_rate": 1.8807655602024523e-05, "loss": 0.5798, "step": 1224 }, { "epoch": 0.1827131031396823, "grad_norm": 1.2956708669662476, "learning_rate": 1.8805366494580002e-05, "loss": 0.6329, "step": 1225 }, { "epoch": 0.18286225669326572, "grad_norm": 1.5879621505737305, "learning_rate": 1.880307533146621e-05, "loss": 0.5614, "step": 1226 }, { "epoch": 0.18301141024684914, "grad_norm": 1.8932478427886963, "learning_rate": 1.8800782113218038e-05, "loss": 0.6862, "step": 1227 }, { "epoch": 0.18316056380043255, "grad_norm": 1.6981372833251953, "learning_rate": 1.879848684037085e-05, "loss": 0.6657, "step": 1228 }, { "epoch": 0.18330971735401597, "grad_norm": 1.758528470993042, "learning_rate": 1.8796189513460495e-05, "loss": 0.6097, "step": 1229 }, { "epoch": 0.18345887090759938, "grad_norm": 1.5783731937408447, "learning_rate": 1.8793890133023295e-05, "loss": 0.6391, "step": 1230 }, { "epoch": 0.1836080244611828, "grad_norm": 1.4699631929397583, "learning_rate": 1.8791588699596057e-05, "loss": 0.6569, "step": 1231 }, { "epoch": 0.1837571780147662, "grad_norm": 1.2237045764923096, "learning_rate": 1.878928521371606e-05, "loss": 0.6418, "step": 1232 }, { "epoch": 0.18390633156834962, "grad_norm": 1.5339936017990112, "learning_rate": 1.878697967592108e-05, "loss": 0.6721, "step": 1233 }, { "epoch": 0.18405548512193304, "grad_norm": 1.364813208580017, "learning_rate": 1.878467208674935e-05, "loss": 0.7578, "step": 1234 }, { "epoch": 0.18420463867551645, "grad_norm": 1.4014410972595215, "learning_rate": 1.8782362446739594e-05, "loss": 0.6122, "step": 1235 }, { "epoch": 0.18435379222909987, "grad_norm": 0.8798079490661621, "learning_rate": 1.878005075643101e-05, "loss": 0.6419, "step": 1236 }, { "epoch": 0.18450294578268328, "grad_norm": 1.8743327856063843, "learning_rate": 1.877773701636328e-05, "loss": 0.6757, "step": 1237 }, { "epoch": 0.1846520993362667, "grad_norm": 1.418105125427246, "learning_rate": 1.877542122707656e-05, "loss": 0.5776, "step": 1238 }, { "epoch": 0.1848012528898501, "grad_norm": 1.4556788206100464, "learning_rate": 1.8773103389111486e-05, "loss": 0.5999, "step": 1239 }, { "epoch": 0.18495040644343352, "grad_norm": 1.3604297637939453, "learning_rate": 1.8770783503009174e-05, "loss": 0.6663, "step": 1240 }, { "epoch": 0.18509955999701694, "grad_norm": 1.5168594121932983, "learning_rate": 1.8768461569311215e-05, "loss": 0.6807, "step": 1241 }, { "epoch": 0.18524871355060035, "grad_norm": 1.4219532012939453, "learning_rate": 1.8766137588559676e-05, "loss": 0.5605, "step": 1242 }, { "epoch": 0.18539786710418377, "grad_norm": 1.2603161334991455, "learning_rate": 1.876381156129711e-05, "loss": 0.5746, "step": 1243 }, { "epoch": 0.18554702065776718, "grad_norm": 1.4637203216552734, "learning_rate": 1.876148348806654e-05, "loss": 0.5741, "step": 1244 }, { "epoch": 0.1856961742113506, "grad_norm": 1.4053809642791748, "learning_rate": 1.875915336941147e-05, "loss": 0.6406, "step": 1245 }, { "epoch": 0.185845327764934, "grad_norm": 1.2872384786605835, "learning_rate": 1.8756821205875885e-05, "loss": 0.6558, "step": 1246 }, { "epoch": 0.18599448131851742, "grad_norm": 1.4855068922042847, "learning_rate": 1.875448699800424e-05, "loss": 0.6228, "step": 1247 }, { "epoch": 0.18614363487210084, "grad_norm": 1.4418187141418457, "learning_rate": 1.8752150746341468e-05, "loss": 0.6197, "step": 1248 }, { "epoch": 0.18629278842568425, "grad_norm": 1.4935717582702637, "learning_rate": 1.874981245143299e-05, "loss": 0.6265, "step": 1249 }, { "epoch": 0.18644194197926767, "grad_norm": 1.5026881694793701, "learning_rate": 1.8747472113824687e-05, "loss": 0.7629, "step": 1250 }, { "epoch": 0.18659109553285108, "grad_norm": 1.315131425857544, "learning_rate": 1.8745129734062934e-05, "loss": 0.5708, "step": 1251 }, { "epoch": 0.1867402490864345, "grad_norm": 1.41728937625885, "learning_rate": 1.8742785312694564e-05, "loss": 0.6905, "step": 1252 }, { "epoch": 0.1868894026400179, "grad_norm": 1.2602134943008423, "learning_rate": 1.8740438850266907e-05, "loss": 0.4963, "step": 1253 }, { "epoch": 0.18703855619360132, "grad_norm": 1.3588732481002808, "learning_rate": 1.873809034732776e-05, "loss": 0.6461, "step": 1254 }, { "epoch": 0.18718770974718474, "grad_norm": 1.3598297834396362, "learning_rate": 1.873573980442539e-05, "loss": 0.617, "step": 1255 }, { "epoch": 0.18733686330076815, "grad_norm": 1.509682059288025, "learning_rate": 1.8733387222108546e-05, "loss": 0.7025, "step": 1256 }, { "epoch": 0.18748601685435157, "grad_norm": 1.6145687103271484, "learning_rate": 1.873103260092646e-05, "loss": 0.7007, "step": 1257 }, { "epoch": 0.18763517040793498, "grad_norm": 1.370503306388855, "learning_rate": 1.8728675941428827e-05, "loss": 0.6036, "step": 1258 }, { "epoch": 0.1877843239615184, "grad_norm": 1.2320828437805176, "learning_rate": 1.872631724416583e-05, "loss": 0.6401, "step": 1259 }, { "epoch": 0.1879334775151018, "grad_norm": 1.7329795360565186, "learning_rate": 1.8723956509688115e-05, "loss": 0.6935, "step": 1260 }, { "epoch": 0.18808263106868522, "grad_norm": 1.319450855255127, "learning_rate": 1.8721593738546815e-05, "loss": 0.5994, "step": 1261 }, { "epoch": 0.18823178462226864, "grad_norm": 1.6344311237335205, "learning_rate": 1.8719228931293537e-05, "loss": 0.6979, "step": 1262 }, { "epoch": 0.18838093817585205, "grad_norm": 1.5964792966842651, "learning_rate": 1.8716862088480353e-05, "loss": 0.5742, "step": 1263 }, { "epoch": 0.18853009172943547, "grad_norm": 1.481916904449463, "learning_rate": 1.8714493210659824e-05, "loss": 0.6539, "step": 1264 }, { "epoch": 0.18867924528301888, "grad_norm": 1.3767485618591309, "learning_rate": 1.8712122298384977e-05, "loss": 0.6538, "step": 1265 }, { "epoch": 0.1888283988366023, "grad_norm": 1.428017497062683, "learning_rate": 1.8709749352209315e-05, "loss": 0.6907, "step": 1266 }, { "epoch": 0.18897755239018568, "grad_norm": 0.8932793140411377, "learning_rate": 1.8707374372686825e-05, "loss": 0.6676, "step": 1267 }, { "epoch": 0.1891267059437691, "grad_norm": 1.3861432075500488, "learning_rate": 1.870499736037195e-05, "loss": 0.6978, "step": 1268 }, { "epoch": 0.1892758594973525, "grad_norm": 1.4558639526367188, "learning_rate": 1.8702618315819628e-05, "loss": 0.5808, "step": 1269 }, { "epoch": 0.18942501305093593, "grad_norm": 1.5559412240982056, "learning_rate": 1.8700237239585253e-05, "loss": 0.6192, "step": 1270 }, { "epoch": 0.18957416660451934, "grad_norm": 1.5399868488311768, "learning_rate": 1.8697854132224713e-05, "loss": 0.6231, "step": 1271 }, { "epoch": 0.18972332015810275, "grad_norm": 1.5013521909713745, "learning_rate": 1.8695468994294355e-05, "loss": 0.5669, "step": 1272 }, { "epoch": 0.18987247371168617, "grad_norm": 1.5841975212097168, "learning_rate": 1.8693081826351002e-05, "loss": 0.582, "step": 1273 }, { "epoch": 0.19002162726526958, "grad_norm": 1.3623557090759277, "learning_rate": 1.869069262895196e-05, "loss": 0.5854, "step": 1274 }, { "epoch": 0.190170780818853, "grad_norm": 1.6275941133499146, "learning_rate": 1.8688301402654995e-05, "loss": 0.5605, "step": 1275 }, { "epoch": 0.1903199343724364, "grad_norm": 1.6300413608551025, "learning_rate": 1.8685908148018362e-05, "loss": 0.7547, "step": 1276 }, { "epoch": 0.19046908792601983, "grad_norm": 1.3830575942993164, "learning_rate": 1.868351286560077e-05, "loss": 0.6221, "step": 1277 }, { "epoch": 0.19061824147960324, "grad_norm": 1.9525814056396484, "learning_rate": 1.868111555596143e-05, "loss": 0.7019, "step": 1278 }, { "epoch": 0.19076739503318665, "grad_norm": 1.3397562503814697, "learning_rate": 1.8678716219659992e-05, "loss": 0.5649, "step": 1279 }, { "epoch": 0.19091654858677007, "grad_norm": 1.5691924095153809, "learning_rate": 1.867631485725661e-05, "loss": 0.6107, "step": 1280 }, { "epoch": 0.19106570214035348, "grad_norm": 1.400588870048523, "learning_rate": 1.867391146931189e-05, "loss": 0.6078, "step": 1281 }, { "epoch": 0.1912148556939369, "grad_norm": 1.376910924911499, "learning_rate": 1.8671506056386918e-05, "loss": 0.734, "step": 1282 }, { "epoch": 0.1913640092475203, "grad_norm": 0.8464600443840027, "learning_rate": 1.866909861904326e-05, "loss": 0.6257, "step": 1283 }, { "epoch": 0.19151316280110373, "grad_norm": 1.4386844635009766, "learning_rate": 1.8666689157842935e-05, "loss": 0.6205, "step": 1284 }, { "epoch": 0.19166231635468714, "grad_norm": 1.4984304904937744, "learning_rate": 1.8664277673348463e-05, "loss": 0.6573, "step": 1285 }, { "epoch": 0.19181146990827055, "grad_norm": 1.2374540567398071, "learning_rate": 1.866186416612281e-05, "loss": 0.5978, "step": 1286 }, { "epoch": 0.19196062346185397, "grad_norm": 1.4786046743392944, "learning_rate": 1.8659448636729426e-05, "loss": 0.5656, "step": 1287 }, { "epoch": 0.19210977701543738, "grad_norm": 1.6280573606491089, "learning_rate": 1.865703108573223e-05, "loss": 0.7041, "step": 1288 }, { "epoch": 0.1922589305690208, "grad_norm": 1.563763976097107, "learning_rate": 1.8654611513695622e-05, "loss": 0.6851, "step": 1289 }, { "epoch": 0.1924080841226042, "grad_norm": 1.4373338222503662, "learning_rate": 1.8652189921184462e-05, "loss": 0.6272, "step": 1290 }, { "epoch": 0.19255723767618763, "grad_norm": 1.4515392780303955, "learning_rate": 1.8649766308764085e-05, "loss": 0.5669, "step": 1291 }, { "epoch": 0.19270639122977104, "grad_norm": 0.9444079995155334, "learning_rate": 1.8647340677000302e-05, "loss": 0.6559, "step": 1292 }, { "epoch": 0.19285554478335445, "grad_norm": 1.4180933237075806, "learning_rate": 1.864491302645939e-05, "loss": 0.65, "step": 1293 }, { "epoch": 0.19300469833693787, "grad_norm": 1.4296058416366577, "learning_rate": 1.8642483357708102e-05, "loss": 0.6809, "step": 1294 }, { "epoch": 0.19315385189052128, "grad_norm": 1.7247369289398193, "learning_rate": 1.8640051671313656e-05, "loss": 0.6922, "step": 1295 }, { "epoch": 0.1933030054441047, "grad_norm": 1.259236454963684, "learning_rate": 1.8637617967843748e-05, "loss": 0.6389, "step": 1296 }, { "epoch": 0.1934521589976881, "grad_norm": 1.2297576665878296, "learning_rate": 1.8635182247866545e-05, "loss": 0.587, "step": 1297 }, { "epoch": 0.19360131255127153, "grad_norm": 1.5677320957183838, "learning_rate": 1.863274451195067e-05, "loss": 0.6538, "step": 1298 }, { "epoch": 0.19375046610485494, "grad_norm": 1.3353112936019897, "learning_rate": 1.8630304760665237e-05, "loss": 0.6026, "step": 1299 }, { "epoch": 0.19389961965843835, "grad_norm": 0.8992443680763245, "learning_rate": 1.8627862994579823e-05, "loss": 0.6794, "step": 1300 }, { "epoch": 0.19404877321202177, "grad_norm": 1.4349677562713623, "learning_rate": 1.862541921426447e-05, "loss": 0.6187, "step": 1301 }, { "epoch": 0.19419792676560518, "grad_norm": 1.7024645805358887, "learning_rate": 1.8622973420289692e-05, "loss": 0.7106, "step": 1302 }, { "epoch": 0.1943470803191886, "grad_norm": 1.3487988710403442, "learning_rate": 1.862052561322648e-05, "loss": 0.6287, "step": 1303 }, { "epoch": 0.194496233872772, "grad_norm": 1.4695945978164673, "learning_rate": 1.8618075793646292e-05, "loss": 0.7115, "step": 1304 }, { "epoch": 0.19464538742635543, "grad_norm": 1.3477613925933838, "learning_rate": 1.8615623962121043e-05, "loss": 0.5634, "step": 1305 }, { "epoch": 0.19479454097993884, "grad_norm": 1.4078925848007202, "learning_rate": 1.861317011922314e-05, "loss": 0.6621, "step": 1306 }, { "epoch": 0.19494369453352225, "grad_norm": 1.6086459159851074, "learning_rate": 1.861071426552545e-05, "loss": 0.6296, "step": 1307 }, { "epoch": 0.19509284808710567, "grad_norm": 1.4081411361694336, "learning_rate": 1.8608256401601294e-05, "loss": 0.7347, "step": 1308 }, { "epoch": 0.19524200164068908, "grad_norm": 1.6984888315200806, "learning_rate": 1.860579652802449e-05, "loss": 0.697, "step": 1309 }, { "epoch": 0.1953911551942725, "grad_norm": 1.2147901058197021, "learning_rate": 1.8603334645369302e-05, "loss": 0.5954, "step": 1310 }, { "epoch": 0.1955403087478559, "grad_norm": 1.630525827407837, "learning_rate": 1.8600870754210477e-05, "loss": 0.623, "step": 1311 }, { "epoch": 0.19568946230143933, "grad_norm": 1.3885773420333862, "learning_rate": 1.859840485512323e-05, "loss": 0.6301, "step": 1312 }, { "epoch": 0.19583861585502274, "grad_norm": 1.2969934940338135, "learning_rate": 1.8595936948683234e-05, "loss": 0.6438, "step": 1313 }, { "epoch": 0.19598776940860616, "grad_norm": 1.409196138381958, "learning_rate": 1.8593467035466635e-05, "loss": 0.6414, "step": 1314 }, { "epoch": 0.19613692296218957, "grad_norm": 1.4673084020614624, "learning_rate": 1.859099511605006e-05, "loss": 0.6347, "step": 1315 }, { "epoch": 0.19628607651577298, "grad_norm": 1.485574722290039, "learning_rate": 1.8588521191010586e-05, "loss": 0.682, "step": 1316 }, { "epoch": 0.1964352300693564, "grad_norm": 1.8939121961593628, "learning_rate": 1.8586045260925773e-05, "loss": 0.6315, "step": 1317 }, { "epoch": 0.1965843836229398, "grad_norm": 1.7831989526748657, "learning_rate": 1.858356732637364e-05, "loss": 0.6036, "step": 1318 }, { "epoch": 0.19673353717652323, "grad_norm": 1.3208905458450317, "learning_rate": 1.8581087387932676e-05, "loss": 0.5636, "step": 1319 }, { "epoch": 0.19688269073010664, "grad_norm": 1.5097081661224365, "learning_rate": 1.8578605446181838e-05, "loss": 0.5852, "step": 1320 }, { "epoch": 0.19703184428369006, "grad_norm": 1.3541789054870605, "learning_rate": 1.8576121501700553e-05, "loss": 0.5636, "step": 1321 }, { "epoch": 0.19718099783727347, "grad_norm": 1.5350843667984009, "learning_rate": 1.857363555506871e-05, "loss": 0.6861, "step": 1322 }, { "epoch": 0.19733015139085688, "grad_norm": 1.6044998168945312, "learning_rate": 1.8571147606866677e-05, "loss": 0.6491, "step": 1323 }, { "epoch": 0.1974793049444403, "grad_norm": 1.714700698852539, "learning_rate": 1.8568657657675272e-05, "loss": 0.7012, "step": 1324 }, { "epoch": 0.1976284584980237, "grad_norm": 1.2228955030441284, "learning_rate": 1.85661657080758e-05, "loss": 0.647, "step": 1325 }, { "epoch": 0.19777761205160713, "grad_norm": 1.5632832050323486, "learning_rate": 1.8563671758650013e-05, "loss": 0.6027, "step": 1326 }, { "epoch": 0.19792676560519054, "grad_norm": 0.9359027743339539, "learning_rate": 1.8561175809980144e-05, "loss": 0.6897, "step": 1327 }, { "epoch": 0.19807591915877396, "grad_norm": 1.353830099105835, "learning_rate": 1.8558677862648887e-05, "loss": 0.5934, "step": 1328 }, { "epoch": 0.19822507271235737, "grad_norm": 1.3862870931625366, "learning_rate": 1.8556177917239406e-05, "loss": 0.6454, "step": 1329 }, { "epoch": 0.19837422626594078, "grad_norm": 1.4813116788864136, "learning_rate": 1.8553675974335328e-05, "loss": 0.6556, "step": 1330 }, { "epoch": 0.1985233798195242, "grad_norm": 1.430580496788025, "learning_rate": 1.8551172034520746e-05, "loss": 0.6433, "step": 1331 }, { "epoch": 0.1986725333731076, "grad_norm": 0.9024822115898132, "learning_rate": 1.854866609838022e-05, "loss": 0.6696, "step": 1332 }, { "epoch": 0.19882168692669103, "grad_norm": 1.3125412464141846, "learning_rate": 1.8546158166498783e-05, "loss": 0.6189, "step": 1333 }, { "epoch": 0.19897084048027444, "grad_norm": 1.496769666671753, "learning_rate": 1.854364823946192e-05, "loss": 0.6928, "step": 1334 }, { "epoch": 0.19911999403385786, "grad_norm": 1.8067626953125, "learning_rate": 1.8541136317855598e-05, "loss": 0.7059, "step": 1335 }, { "epoch": 0.19926914758744127, "grad_norm": 1.5083048343658447, "learning_rate": 1.8538622402266232e-05, "loss": 0.6417, "step": 1336 }, { "epoch": 0.19941830114102468, "grad_norm": 1.2663179636001587, "learning_rate": 1.853610649328072e-05, "loss": 0.6802, "step": 1337 }, { "epoch": 0.1995674546946081, "grad_norm": 1.5120121240615845, "learning_rate": 1.853358859148641e-05, "loss": 0.6175, "step": 1338 }, { "epoch": 0.1997166082481915, "grad_norm": 1.3263062238693237, "learning_rate": 1.8531068697471125e-05, "loss": 0.5092, "step": 1339 }, { "epoch": 0.19986576180177493, "grad_norm": 1.334193229675293, "learning_rate": 1.8528546811823156e-05, "loss": 0.6531, "step": 1340 }, { "epoch": 0.20001491535535834, "grad_norm": 1.473410725593567, "learning_rate": 1.8526022935131244e-05, "loss": 0.6762, "step": 1341 }, { "epoch": 0.20016406890894176, "grad_norm": 1.2390917539596558, "learning_rate": 1.852349706798461e-05, "loss": 0.568, "step": 1342 }, { "epoch": 0.20031322246252517, "grad_norm": 1.3791261911392212, "learning_rate": 1.8520969210972932e-05, "loss": 0.6986, "step": 1343 }, { "epoch": 0.20046237601610858, "grad_norm": 1.5221673250198364, "learning_rate": 1.8518439364686358e-05, "loss": 0.618, "step": 1344 }, { "epoch": 0.200611529569692, "grad_norm": 1.2670681476593018, "learning_rate": 1.8515907529715492e-05, "loss": 0.6272, "step": 1345 }, { "epoch": 0.2007606831232754, "grad_norm": 2.5504581928253174, "learning_rate": 1.8513373706651406e-05, "loss": 0.6399, "step": 1346 }, { "epoch": 0.20090983667685883, "grad_norm": 1.614308476448059, "learning_rate": 1.8510837896085642e-05, "loss": 0.6986, "step": 1347 }, { "epoch": 0.20105899023044224, "grad_norm": 1.2548677921295166, "learning_rate": 1.85083000986102e-05, "loss": 0.6741, "step": 1348 }, { "epoch": 0.20120814378402566, "grad_norm": 1.618677020072937, "learning_rate": 1.8505760314817544e-05, "loss": 0.5452, "step": 1349 }, { "epoch": 0.20135729733760907, "grad_norm": 1.5982130765914917, "learning_rate": 1.8503218545300603e-05, "loss": 0.6647, "step": 1350 }, { "epoch": 0.20150645089119248, "grad_norm": 1.5673950910568237, "learning_rate": 1.850067479065277e-05, "loss": 0.684, "step": 1351 }, { "epoch": 0.2016556044447759, "grad_norm": 1.5313522815704346, "learning_rate": 1.84981290514679e-05, "loss": 0.6374, "step": 1352 }, { "epoch": 0.2018047579983593, "grad_norm": 1.2154070138931274, "learning_rate": 1.8495581328340315e-05, "loss": 0.6417, "step": 1353 }, { "epoch": 0.20195391155194273, "grad_norm": 1.6047157049179077, "learning_rate": 1.8493031621864792e-05, "loss": 0.6721, "step": 1354 }, { "epoch": 0.20210306510552614, "grad_norm": 2.581110954284668, "learning_rate": 1.849047993263658e-05, "loss": 0.6447, "step": 1355 }, { "epoch": 0.20225221865910956, "grad_norm": 3.259166955947876, "learning_rate": 1.8487926261251386e-05, "loss": 0.5727, "step": 1356 }, { "epoch": 0.20240137221269297, "grad_norm": 2.0545551776885986, "learning_rate": 1.8485370608305384e-05, "loss": 0.6063, "step": 1357 }, { "epoch": 0.20255052576627638, "grad_norm": 1.4512176513671875, "learning_rate": 1.8482812974395205e-05, "loss": 0.6482, "step": 1358 }, { "epoch": 0.2026996793198598, "grad_norm": 1.7948102951049805, "learning_rate": 1.848025336011794e-05, "loss": 0.6778, "step": 1359 }, { "epoch": 0.2028488328734432, "grad_norm": 2.4941227436065674, "learning_rate": 1.8477691766071156e-05, "loss": 0.6827, "step": 1360 }, { "epoch": 0.20299798642702663, "grad_norm": 1.6176631450653076, "learning_rate": 1.847512819285287e-05, "loss": 0.5786, "step": 1361 }, { "epoch": 0.20314713998061004, "grad_norm": 1.4728142023086548, "learning_rate": 1.8472562641061564e-05, "loss": 0.592, "step": 1362 }, { "epoch": 0.20329629353419346, "grad_norm": 1.3641703128814697, "learning_rate": 1.8469995111296183e-05, "loss": 0.7247, "step": 1363 }, { "epoch": 0.20344544708777687, "grad_norm": 1.8784265518188477, "learning_rate": 1.8467425604156133e-05, "loss": 0.6433, "step": 1364 }, { "epoch": 0.20359460064136028, "grad_norm": 1.8874529600143433, "learning_rate": 1.846485412024128e-05, "loss": 0.6065, "step": 1365 }, { "epoch": 0.2037437541949437, "grad_norm": 1.2595866918563843, "learning_rate": 1.8462280660151963e-05, "loss": 0.6499, "step": 1366 }, { "epoch": 0.2038929077485271, "grad_norm": 1.4554293155670166, "learning_rate": 1.8459705224488958e-05, "loss": 0.5737, "step": 1367 }, { "epoch": 0.20404206130211053, "grad_norm": 1.890343189239502, "learning_rate": 1.845712781385353e-05, "loss": 0.6532, "step": 1368 }, { "epoch": 0.20419121485569394, "grad_norm": 1.3886644840240479, "learning_rate": 1.8454548428847383e-05, "loss": 0.6165, "step": 1369 }, { "epoch": 0.20434036840927736, "grad_norm": 1.5977064371109009, "learning_rate": 1.8451967070072693e-05, "loss": 0.6099, "step": 1370 }, { "epoch": 0.20448952196286077, "grad_norm": 1.787224292755127, "learning_rate": 1.84493837381321e-05, "loss": 0.5566, "step": 1371 }, { "epoch": 0.20463867551644419, "grad_norm": 2.0102477073669434, "learning_rate": 1.844679843362869e-05, "loss": 0.6363, "step": 1372 }, { "epoch": 0.2047878290700276, "grad_norm": 1.4064933061599731, "learning_rate": 1.844421115716603e-05, "loss": 0.6979, "step": 1373 }, { "epoch": 0.204936982623611, "grad_norm": 1.4698415994644165, "learning_rate": 1.8441621909348132e-05, "loss": 0.6834, "step": 1374 }, { "epoch": 0.20508613617719443, "grad_norm": 1.3006391525268555, "learning_rate": 1.8439030690779468e-05, "loss": 0.6197, "step": 1375 }, { "epoch": 0.20523528973077784, "grad_norm": 1.7660263776779175, "learning_rate": 1.8436437502064976e-05, "loss": 0.6258, "step": 1376 }, { "epoch": 0.20538444328436126, "grad_norm": 1.6987355947494507, "learning_rate": 1.8433842343810058e-05, "loss": 0.705, "step": 1377 }, { "epoch": 0.20553359683794467, "grad_norm": 1.8453700542449951, "learning_rate": 1.8431245216620562e-05, "loss": 0.59, "step": 1378 }, { "epoch": 0.20568275039152809, "grad_norm": 1.3456887006759644, "learning_rate": 1.8428646121102815e-05, "loss": 0.5582, "step": 1379 }, { "epoch": 0.2058319039451115, "grad_norm": 1.2971794605255127, "learning_rate": 1.8426045057863585e-05, "loss": 0.609, "step": 1380 }, { "epoch": 0.2059810574986949, "grad_norm": 1.427031397819519, "learning_rate": 1.8423442027510104e-05, "loss": 0.6775, "step": 1381 }, { "epoch": 0.20613021105227833, "grad_norm": 1.524569034576416, "learning_rate": 1.8420837030650073e-05, "loss": 0.5941, "step": 1382 }, { "epoch": 0.20627936460586174, "grad_norm": 1.4452675580978394, "learning_rate": 1.8418230067891644e-05, "loss": 0.6728, "step": 1383 }, { "epoch": 0.20642851815944516, "grad_norm": 1.3720546960830688, "learning_rate": 1.8415621139843426e-05, "loss": 0.6125, "step": 1384 }, { "epoch": 0.20657767171302857, "grad_norm": 1.4367510080337524, "learning_rate": 1.8413010247114492e-05, "loss": 0.5678, "step": 1385 }, { "epoch": 0.20672682526661199, "grad_norm": 1.31992769241333, "learning_rate": 1.841039739031437e-05, "loss": 0.6399, "step": 1386 }, { "epoch": 0.2068759788201954, "grad_norm": 1.3428953886032104, "learning_rate": 1.840778257005305e-05, "loss": 0.7019, "step": 1387 }, { "epoch": 0.20702513237377881, "grad_norm": 1.5204704999923706, "learning_rate": 1.8405165786940976e-05, "loss": 0.6945, "step": 1388 }, { "epoch": 0.20717428592736223, "grad_norm": 1.5271633863449097, "learning_rate": 1.8402547041589057e-05, "loss": 0.6179, "step": 1389 }, { "epoch": 0.20732343948094564, "grad_norm": 1.683884859085083, "learning_rate": 1.8399926334608654e-05, "loss": 0.6788, "step": 1390 }, { "epoch": 0.20747259303452906, "grad_norm": 2.091454267501831, "learning_rate": 1.8397303666611588e-05, "loss": 0.6281, "step": 1391 }, { "epoch": 0.20762174658811247, "grad_norm": 1.7286335229873657, "learning_rate": 1.839467903821014e-05, "loss": 0.6867, "step": 1392 }, { "epoch": 0.20777090014169589, "grad_norm": 2.1683189868927, "learning_rate": 1.8392052450017036e-05, "loss": 0.5951, "step": 1393 }, { "epoch": 0.2079200536952793, "grad_norm": 1.4540793895721436, "learning_rate": 1.838942390264548e-05, "loss": 0.6812, "step": 1394 }, { "epoch": 0.20806920724886271, "grad_norm": 1.3320671319961548, "learning_rate": 1.8386793396709123e-05, "loss": 0.6222, "step": 1395 }, { "epoch": 0.20821836080244613, "grad_norm": 1.5376887321472168, "learning_rate": 1.838416093282207e-05, "loss": 0.6148, "step": 1396 }, { "epoch": 0.20836751435602954, "grad_norm": 1.3959344625473022, "learning_rate": 1.838152651159889e-05, "loss": 0.6419, "step": 1397 }, { "epoch": 0.20851666790961296, "grad_norm": 1.4068557024002075, "learning_rate": 1.83788901336546e-05, "loss": 0.7099, "step": 1398 }, { "epoch": 0.20866582146319637, "grad_norm": 1.166268229484558, "learning_rate": 1.8376251799604684e-05, "loss": 0.5755, "step": 1399 }, { "epoch": 0.20881497501677979, "grad_norm": 1.5081936120986938, "learning_rate": 1.8373611510065077e-05, "loss": 0.6282, "step": 1400 }, { "epoch": 0.2089641285703632, "grad_norm": 1.2991784811019897, "learning_rate": 1.837096926565217e-05, "loss": 0.6784, "step": 1401 }, { "epoch": 0.20911328212394661, "grad_norm": 1.3631840944290161, "learning_rate": 1.8368325066982817e-05, "loss": 0.6657, "step": 1402 }, { "epoch": 0.20926243567753003, "grad_norm": 1.774310827255249, "learning_rate": 1.836567891467431e-05, "loss": 0.6186, "step": 1403 }, { "epoch": 0.20941158923111344, "grad_norm": 1.416585922241211, "learning_rate": 1.8363030809344425e-05, "loss": 0.6862, "step": 1404 }, { "epoch": 0.20956074278469686, "grad_norm": 1.4408421516418457, "learning_rate": 1.8360380751611375e-05, "loss": 0.6191, "step": 1405 }, { "epoch": 0.20970989633828027, "grad_norm": 1.4214866161346436, "learning_rate": 1.8357728742093827e-05, "loss": 0.6715, "step": 1406 }, { "epoch": 0.20985904989186369, "grad_norm": 0.944375216960907, "learning_rate": 1.8355074781410918e-05, "loss": 0.7022, "step": 1407 }, { "epoch": 0.2100082034454471, "grad_norm": 1.3932101726531982, "learning_rate": 1.835241887018223e-05, "loss": 0.5873, "step": 1408 }, { "epoch": 0.21015735699903051, "grad_norm": 1.2770437002182007, "learning_rate": 1.8349761009027794e-05, "loss": 0.5531, "step": 1409 }, { "epoch": 0.21030651055261393, "grad_norm": 1.4588594436645508, "learning_rate": 1.8347101198568116e-05, "loss": 0.6613, "step": 1410 }, { "epoch": 0.21045566410619734, "grad_norm": 1.33334219455719, "learning_rate": 1.8344439439424142e-05, "loss": 0.5888, "step": 1411 }, { "epoch": 0.21060481765978076, "grad_norm": 2.2795000076293945, "learning_rate": 1.8341775732217275e-05, "loss": 0.6363, "step": 1412 }, { "epoch": 0.21075397121336417, "grad_norm": 1.6168673038482666, "learning_rate": 1.8339110077569376e-05, "loss": 0.6607, "step": 1413 }, { "epoch": 0.2109031247669476, "grad_norm": 0.9011684656143188, "learning_rate": 1.8336442476102757e-05, "loss": 0.6262, "step": 1414 }, { "epoch": 0.211052278320531, "grad_norm": 1.5088927745819092, "learning_rate": 1.8333772928440187e-05, "loss": 0.6305, "step": 1415 }, { "epoch": 0.2112014318741144, "grad_norm": 1.5102105140686035, "learning_rate": 1.8331101435204896e-05, "loss": 0.7364, "step": 1416 }, { "epoch": 0.2113505854276978, "grad_norm": 1.3336843252182007, "learning_rate": 1.832842799702055e-05, "loss": 0.7002, "step": 1417 }, { "epoch": 0.21149973898128122, "grad_norm": 1.6744365692138672, "learning_rate": 1.832575261451129e-05, "loss": 0.5711, "step": 1418 }, { "epoch": 0.21164889253486463, "grad_norm": 1.5493055582046509, "learning_rate": 1.8323075288301693e-05, "loss": 0.6326, "step": 1419 }, { "epoch": 0.21179804608844804, "grad_norm": 2.607449531555176, "learning_rate": 1.8320396019016805e-05, "loss": 0.6285, "step": 1420 }, { "epoch": 0.21194719964203146, "grad_norm": 1.3099993467330933, "learning_rate": 1.8317714807282115e-05, "loss": 0.6486, "step": 1421 }, { "epoch": 0.21209635319561487, "grad_norm": 1.4259964227676392, "learning_rate": 1.831503165372357e-05, "loss": 0.6212, "step": 1422 }, { "epoch": 0.2122455067491983, "grad_norm": 2.2357370853424072, "learning_rate": 1.831234655896757e-05, "loss": 0.6588, "step": 1423 }, { "epoch": 0.2123946603027817, "grad_norm": 1.3710728883743286, "learning_rate": 1.8309659523640965e-05, "loss": 0.7115, "step": 1424 }, { "epoch": 0.21254381385636512, "grad_norm": 1.2354657649993896, "learning_rate": 1.8306970548371062e-05, "loss": 0.5508, "step": 1425 }, { "epoch": 0.21269296740994853, "grad_norm": 1.6481537818908691, "learning_rate": 1.830427963378562e-05, "loss": 0.6531, "step": 1426 }, { "epoch": 0.21284212096353194, "grad_norm": 1.3843965530395508, "learning_rate": 1.830158678051285e-05, "loss": 0.624, "step": 1427 }, { "epoch": 0.21299127451711536, "grad_norm": 1.942237138748169, "learning_rate": 1.8298891989181417e-05, "loss": 0.6066, "step": 1428 }, { "epoch": 0.21314042807069877, "grad_norm": 1.4058195352554321, "learning_rate": 1.8296195260420438e-05, "loss": 0.7708, "step": 1429 }, { "epoch": 0.2132895816242822, "grad_norm": 1.4389703273773193, "learning_rate": 1.8293496594859478e-05, "loss": 0.5753, "step": 1430 }, { "epoch": 0.2134387351778656, "grad_norm": 1.9938374757766724, "learning_rate": 1.829079599312856e-05, "loss": 0.6126, "step": 1431 }, { "epoch": 0.21358788873144902, "grad_norm": 1.5858170986175537, "learning_rate": 1.828809345585816e-05, "loss": 0.6804, "step": 1432 }, { "epoch": 0.21373704228503243, "grad_norm": 1.3262016773223877, "learning_rate": 1.8285388983679192e-05, "loss": 0.6593, "step": 1433 }, { "epoch": 0.21388619583861584, "grad_norm": 0.8898391723632812, "learning_rate": 1.8282682577223044e-05, "loss": 0.6515, "step": 1434 }, { "epoch": 0.21403534939219926, "grad_norm": 1.3992209434509277, "learning_rate": 1.8279974237121537e-05, "loss": 0.6478, "step": 1435 }, { "epoch": 0.21418450294578267, "grad_norm": 2.194972515106201, "learning_rate": 1.8277263964006958e-05, "loss": 0.6771, "step": 1436 }, { "epoch": 0.2143336564993661, "grad_norm": 1.314536690711975, "learning_rate": 1.8274551758512026e-05, "loss": 0.6713, "step": 1437 }, { "epoch": 0.2144828100529495, "grad_norm": 1.4887241125106812, "learning_rate": 1.8271837621269933e-05, "loss": 0.4807, "step": 1438 }, { "epoch": 0.21463196360653292, "grad_norm": 1.592965006828308, "learning_rate": 1.8269121552914307e-05, "loss": 0.6222, "step": 1439 }, { "epoch": 0.21478111716011633, "grad_norm": 0.9641322493553162, "learning_rate": 1.8266403554079237e-05, "loss": 0.6999, "step": 1440 }, { "epoch": 0.21493027071369974, "grad_norm": 1.8463094234466553, "learning_rate": 1.8263683625399244e-05, "loss": 0.6837, "step": 1441 }, { "epoch": 0.21507942426728316, "grad_norm": 1.5093715190887451, "learning_rate": 1.826096176750933e-05, "loss": 0.6324, "step": 1442 }, { "epoch": 0.21522857782086657, "grad_norm": 1.5338178873062134, "learning_rate": 1.8258237981044915e-05, "loss": 0.6069, "step": 1443 }, { "epoch": 0.21537773137445, "grad_norm": 1.288299560546875, "learning_rate": 1.8255512266641894e-05, "loss": 0.5858, "step": 1444 }, { "epoch": 0.2155268849280334, "grad_norm": 1.5146247148513794, "learning_rate": 1.82527846249366e-05, "loss": 0.633, "step": 1445 }, { "epoch": 0.21567603848161682, "grad_norm": 1.5307804346084595, "learning_rate": 1.825005505656582e-05, "loss": 0.7647, "step": 1446 }, { "epoch": 0.21582519203520023, "grad_norm": 1.3728532791137695, "learning_rate": 1.8247323562166785e-05, "loss": 0.6452, "step": 1447 }, { "epoch": 0.21597434558878364, "grad_norm": 1.462992787361145, "learning_rate": 1.8244590142377183e-05, "loss": 0.6262, "step": 1448 }, { "epoch": 0.21612349914236706, "grad_norm": 1.460697889328003, "learning_rate": 1.824185479783515e-05, "loss": 0.6926, "step": 1449 }, { "epoch": 0.21627265269595047, "grad_norm": 1.4181333780288696, "learning_rate": 1.8239117529179263e-05, "loss": 0.6288, "step": 1450 }, { "epoch": 0.2164218062495339, "grad_norm": 1.4001593589782715, "learning_rate": 1.8236378337048562e-05, "loss": 0.6093, "step": 1451 }, { "epoch": 0.2165709598031173, "grad_norm": 1.3782023191452026, "learning_rate": 1.8233637222082524e-05, "loss": 0.5659, "step": 1452 }, { "epoch": 0.21672011335670072, "grad_norm": 0.9567224979400635, "learning_rate": 1.823089418492108e-05, "loss": 0.6877, "step": 1453 }, { "epoch": 0.21686926691028413, "grad_norm": 1.5506627559661865, "learning_rate": 1.8228149226204617e-05, "loss": 0.5863, "step": 1454 }, { "epoch": 0.21701842046386755, "grad_norm": 1.4999243021011353, "learning_rate": 1.8225402346573958e-05, "loss": 0.6955, "step": 1455 }, { "epoch": 0.21716757401745096, "grad_norm": 0.8986415863037109, "learning_rate": 1.8222653546670377e-05, "loss": 0.6596, "step": 1456 }, { "epoch": 0.21731672757103437, "grad_norm": 1.5123649835586548, "learning_rate": 1.82199028271356e-05, "loss": 0.7287, "step": 1457 }, { "epoch": 0.2174658811246178, "grad_norm": 1.6540396213531494, "learning_rate": 1.8217150188611807e-05, "loss": 0.5303, "step": 1458 }, { "epoch": 0.2176150346782012, "grad_norm": 1.4568673372268677, "learning_rate": 1.821439563174161e-05, "loss": 0.6291, "step": 1459 }, { "epoch": 0.21776418823178462, "grad_norm": 1.5157722234725952, "learning_rate": 1.8211639157168082e-05, "loss": 0.6056, "step": 1460 }, { "epoch": 0.21791334178536803, "grad_norm": 1.300078272819519, "learning_rate": 1.820888076553474e-05, "loss": 0.6269, "step": 1461 }, { "epoch": 0.21806249533895145, "grad_norm": 1.702947735786438, "learning_rate": 1.820612045748555e-05, "loss": 0.6239, "step": 1462 }, { "epoch": 0.21821164889253486, "grad_norm": 1.4987508058547974, "learning_rate": 1.8203358233664915e-05, "loss": 0.6311, "step": 1463 }, { "epoch": 0.21836080244611827, "grad_norm": 1.493156909942627, "learning_rate": 1.8200594094717708e-05, "loss": 0.7043, "step": 1464 }, { "epoch": 0.2185099559997017, "grad_norm": 1.3240333795547485, "learning_rate": 1.819782804128922e-05, "loss": 0.5916, "step": 1465 }, { "epoch": 0.2186591095532851, "grad_norm": 1.474668264389038, "learning_rate": 1.8195060074025216e-05, "loss": 0.5951, "step": 1466 }, { "epoch": 0.21880826310686852, "grad_norm": 1.584133267402649, "learning_rate": 1.819229019357189e-05, "loss": 0.5662, "step": 1467 }, { "epoch": 0.21895741666045193, "grad_norm": 1.343352198600769, "learning_rate": 1.8189518400575886e-05, "loss": 0.5876, "step": 1468 }, { "epoch": 0.21910657021403535, "grad_norm": 1.4037505388259888, "learning_rate": 1.81867446956843e-05, "loss": 0.5907, "step": 1469 }, { "epoch": 0.21925572376761876, "grad_norm": 2.032794237136841, "learning_rate": 1.8183969079544677e-05, "loss": 0.6362, "step": 1470 }, { "epoch": 0.21940487732120217, "grad_norm": 1.2949451208114624, "learning_rate": 1.818119155280499e-05, "loss": 0.5115, "step": 1471 }, { "epoch": 0.2195540308747856, "grad_norm": 1.3471038341522217, "learning_rate": 1.817841211611368e-05, "loss": 0.5481, "step": 1472 }, { "epoch": 0.219703184428369, "grad_norm": 1.2267916202545166, "learning_rate": 1.817563077011962e-05, "loss": 0.4882, "step": 1473 }, { "epoch": 0.21985233798195242, "grad_norm": 1.4784480333328247, "learning_rate": 1.8172847515472134e-05, "loss": 0.6304, "step": 1474 }, { "epoch": 0.22000149153553583, "grad_norm": 1.7158411741256714, "learning_rate": 1.8170062352820993e-05, "loss": 0.603, "step": 1475 }, { "epoch": 0.22015064508911925, "grad_norm": 1.6355379819869995, "learning_rate": 1.8167275282816406e-05, "loss": 0.6805, "step": 1476 }, { "epoch": 0.22029979864270266, "grad_norm": 1.3527284860610962, "learning_rate": 1.816448630610904e-05, "loss": 0.6507, "step": 1477 }, { "epoch": 0.22044895219628607, "grad_norm": 1.4301058053970337, "learning_rate": 1.816169542334999e-05, "loss": 0.6009, "step": 1478 }, { "epoch": 0.2205981057498695, "grad_norm": 1.374657392501831, "learning_rate": 1.8158902635190812e-05, "loss": 0.6901, "step": 1479 }, { "epoch": 0.2207472593034529, "grad_norm": 0.9802853465080261, "learning_rate": 1.81561079422835e-05, "loss": 0.6644, "step": 1480 }, { "epoch": 0.22089641285703632, "grad_norm": 0.955808162689209, "learning_rate": 1.815331134528049e-05, "loss": 0.6638, "step": 1481 }, { "epoch": 0.22104556641061973, "grad_norm": 1.467132568359375, "learning_rate": 1.8150512844834668e-05, "loss": 0.5415, "step": 1482 }, { "epoch": 0.22119471996420315, "grad_norm": 1.472556471824646, "learning_rate": 1.814771244159936e-05, "loss": 0.6513, "step": 1483 }, { "epoch": 0.22134387351778656, "grad_norm": 1.4036933183670044, "learning_rate": 1.814491013622834e-05, "loss": 0.6306, "step": 1484 }, { "epoch": 0.22149302707136997, "grad_norm": 1.5479012727737427, "learning_rate": 1.8142105929375823e-05, "loss": 0.691, "step": 1485 }, { "epoch": 0.2216421806249534, "grad_norm": 0.9454494118690491, "learning_rate": 1.813929982169647e-05, "loss": 0.6275, "step": 1486 }, { "epoch": 0.2217913341785368, "grad_norm": 1.9143458604812622, "learning_rate": 1.813649181384538e-05, "loss": 0.667, "step": 1487 }, { "epoch": 0.22194048773212022, "grad_norm": 1.2348941564559937, "learning_rate": 1.813368190647811e-05, "loss": 0.5698, "step": 1488 }, { "epoch": 0.22208964128570363, "grad_norm": 1.844443440437317, "learning_rate": 1.8130870100250643e-05, "loss": 0.6188, "step": 1489 }, { "epoch": 0.22223879483928705, "grad_norm": 1.4116038084030151, "learning_rate": 1.8128056395819414e-05, "loss": 0.6462, "step": 1490 }, { "epoch": 0.22238794839287046, "grad_norm": 1.8104190826416016, "learning_rate": 1.8125240793841304e-05, "loss": 0.6508, "step": 1491 }, { "epoch": 0.22253710194645387, "grad_norm": 1.525492787361145, "learning_rate": 1.812242329497363e-05, "loss": 0.6348, "step": 1492 }, { "epoch": 0.2226862555000373, "grad_norm": 1.5770686864852905, "learning_rate": 1.8119603899874163e-05, "loss": 0.6924, "step": 1493 }, { "epoch": 0.2228354090536207, "grad_norm": 1.336362600326538, "learning_rate": 1.8116782609201095e-05, "loss": 0.6575, "step": 1494 }, { "epoch": 0.22298456260720412, "grad_norm": 1.5875811576843262, "learning_rate": 1.8113959423613084e-05, "loss": 0.5835, "step": 1495 }, { "epoch": 0.22313371616078753, "grad_norm": 1.832481026649475, "learning_rate": 1.811113434376922e-05, "loss": 0.5855, "step": 1496 }, { "epoch": 0.22328286971437095, "grad_norm": 1.5573136806488037, "learning_rate": 1.8108307370329032e-05, "loss": 0.6468, "step": 1497 }, { "epoch": 0.22343202326795436, "grad_norm": 1.7159879207611084, "learning_rate": 1.81054785039525e-05, "loss": 0.6115, "step": 1498 }, { "epoch": 0.22358117682153777, "grad_norm": 1.2660387754440308, "learning_rate": 1.810264774530004e-05, "loss": 0.496, "step": 1499 }, { "epoch": 0.2237303303751212, "grad_norm": 1.2481436729431152, "learning_rate": 1.8099815095032502e-05, "loss": 0.5949, "step": 1500 }, { "epoch": 0.2238794839287046, "grad_norm": 1.3859013319015503, "learning_rate": 1.80969805538112e-05, "loss": 0.6475, "step": 1501 }, { "epoch": 0.22402863748228802, "grad_norm": 1.0244033336639404, "learning_rate": 1.8094144122297867e-05, "loss": 0.709, "step": 1502 }, { "epoch": 0.22417779103587143, "grad_norm": 1.380932331085205, "learning_rate": 1.809130580115469e-05, "loss": 0.6209, "step": 1503 }, { "epoch": 0.22432694458945485, "grad_norm": 1.4462984800338745, "learning_rate": 1.8088465591044292e-05, "loss": 0.609, "step": 1504 }, { "epoch": 0.22447609814303826, "grad_norm": 1.8942548036575317, "learning_rate": 1.808562349262974e-05, "loss": 0.6702, "step": 1505 }, { "epoch": 0.22462525169662168, "grad_norm": 1.35386061668396, "learning_rate": 1.8082779506574534e-05, "loss": 0.6299, "step": 1506 }, { "epoch": 0.2247744052502051, "grad_norm": 1.465444564819336, "learning_rate": 1.807993363354263e-05, "loss": 0.609, "step": 1507 }, { "epoch": 0.2249235588037885, "grad_norm": 1.494319200515747, "learning_rate": 1.8077085874198404e-05, "loss": 0.6754, "step": 1508 }, { "epoch": 0.22507271235737192, "grad_norm": 1.4070513248443604, "learning_rate": 1.8074236229206694e-05, "loss": 0.6381, "step": 1509 }, { "epoch": 0.22522186591095533, "grad_norm": 1.3208796977996826, "learning_rate": 1.8071384699232766e-05, "loss": 0.6142, "step": 1510 }, { "epoch": 0.22537101946453875, "grad_norm": 1.4894647598266602, "learning_rate": 1.8068531284942324e-05, "loss": 0.6582, "step": 1511 }, { "epoch": 0.22552017301812216, "grad_norm": 1.279082179069519, "learning_rate": 1.8065675987001517e-05, "loss": 0.6806, "step": 1512 }, { "epoch": 0.22566932657170558, "grad_norm": 1.490735650062561, "learning_rate": 1.8062818806076934e-05, "loss": 0.6483, "step": 1513 }, { "epoch": 0.225818480125289, "grad_norm": 1.5290030241012573, "learning_rate": 1.8059959742835604e-05, "loss": 0.726, "step": 1514 }, { "epoch": 0.2259676336788724, "grad_norm": 1.330775260925293, "learning_rate": 1.8057098797944987e-05, "loss": 0.6447, "step": 1515 }, { "epoch": 0.22611678723245582, "grad_norm": 1.7985119819641113, "learning_rate": 1.8054235972072994e-05, "loss": 0.6277, "step": 1516 }, { "epoch": 0.22626594078603923, "grad_norm": 1.3391904830932617, "learning_rate": 1.805137126588797e-05, "loss": 0.6865, "step": 1517 }, { "epoch": 0.22641509433962265, "grad_norm": 1.3750075101852417, "learning_rate": 1.8048504680058704e-05, "loss": 0.5847, "step": 1518 }, { "epoch": 0.22656424789320606, "grad_norm": 1.4797227382659912, "learning_rate": 1.8045636215254407e-05, "loss": 0.5965, "step": 1519 }, { "epoch": 0.22671340144678948, "grad_norm": 1.347749948501587, "learning_rate": 1.8042765872144747e-05, "loss": 0.6358, "step": 1520 }, { "epoch": 0.2268625550003729, "grad_norm": 1.2999018430709839, "learning_rate": 1.8039893651399823e-05, "loss": 0.6345, "step": 1521 }, { "epoch": 0.2270117085539563, "grad_norm": 1.450956106185913, "learning_rate": 1.8037019553690176e-05, "loss": 0.6073, "step": 1522 }, { "epoch": 0.22716086210753972, "grad_norm": 1.4865448474884033, "learning_rate": 1.803414357968678e-05, "loss": 0.5881, "step": 1523 }, { "epoch": 0.22731001566112313, "grad_norm": 1.4087820053100586, "learning_rate": 1.803126573006105e-05, "loss": 0.6982, "step": 1524 }, { "epoch": 0.22745916921470655, "grad_norm": 1.517796516418457, "learning_rate": 1.8028386005484837e-05, "loss": 0.6521, "step": 1525 }, { "epoch": 0.22760832276828996, "grad_norm": 1.5844897031784058, "learning_rate": 1.8025504406630434e-05, "loss": 0.6601, "step": 1526 }, { "epoch": 0.22775747632187338, "grad_norm": 1.2662209272384644, "learning_rate": 1.8022620934170568e-05, "loss": 0.573, "step": 1527 }, { "epoch": 0.2279066298754568, "grad_norm": 1.4757745265960693, "learning_rate": 1.8019735588778404e-05, "loss": 0.5957, "step": 1528 }, { "epoch": 0.2280557834290402, "grad_norm": 1.4634623527526855, "learning_rate": 1.801684837112754e-05, "loss": 0.6006, "step": 1529 }, { "epoch": 0.22820493698262362, "grad_norm": 1.3479057550430298, "learning_rate": 1.8013959281892025e-05, "loss": 0.6231, "step": 1530 }, { "epoch": 0.22835409053620703, "grad_norm": 1.2675048112869263, "learning_rate": 1.801106832174633e-05, "loss": 0.6298, "step": 1531 }, { "epoch": 0.22850324408979045, "grad_norm": 1.7733403444290161, "learning_rate": 1.8008175491365364e-05, "loss": 0.6115, "step": 1532 }, { "epoch": 0.22865239764337386, "grad_norm": 1.3354272842407227, "learning_rate": 1.8005280791424483e-05, "loss": 0.6015, "step": 1533 }, { "epoch": 0.22880155119695728, "grad_norm": 1.6779979467391968, "learning_rate": 1.800238422259947e-05, "loss": 0.7007, "step": 1534 }, { "epoch": 0.2289507047505407, "grad_norm": 1.3972091674804688, "learning_rate": 1.799948578556655e-05, "loss": 0.666, "step": 1535 }, { "epoch": 0.2290998583041241, "grad_norm": 1.369598388671875, "learning_rate": 1.799658548100238e-05, "loss": 0.5867, "step": 1536 }, { "epoch": 0.22924901185770752, "grad_norm": 1.4746103286743164, "learning_rate": 1.799368330958405e-05, "loss": 0.643, "step": 1537 }, { "epoch": 0.22939816541129093, "grad_norm": 1.4778708219528198, "learning_rate": 1.7990779271989103e-05, "loss": 0.639, "step": 1538 }, { "epoch": 0.22954731896487435, "grad_norm": 1.3347612619400024, "learning_rate": 1.7987873368895494e-05, "loss": 0.6812, "step": 1539 }, { "epoch": 0.22969647251845776, "grad_norm": 1.4590048789978027, "learning_rate": 1.798496560098163e-05, "loss": 0.5485, "step": 1540 }, { "epoch": 0.22984562607204118, "grad_norm": 1.5473507642745972, "learning_rate": 1.7982055968926344e-05, "loss": 0.6126, "step": 1541 }, { "epoch": 0.2299947796256246, "grad_norm": 1.3557721376419067, "learning_rate": 1.7979144473408912e-05, "loss": 0.6291, "step": 1542 }, { "epoch": 0.230143933179208, "grad_norm": 1.550419807434082, "learning_rate": 1.797623111510904e-05, "loss": 0.6022, "step": 1543 }, { "epoch": 0.23029308673279142, "grad_norm": 1.3703124523162842, "learning_rate": 1.7973315894706872e-05, "loss": 0.5296, "step": 1544 }, { "epoch": 0.23044224028637483, "grad_norm": 1.5797070264816284, "learning_rate": 1.7970398812882982e-05, "loss": 0.6319, "step": 1545 }, { "epoch": 0.23059139383995825, "grad_norm": 1.3151891231536865, "learning_rate": 1.7967479870318384e-05, "loss": 0.5515, "step": 1546 }, { "epoch": 0.23074054739354166, "grad_norm": 3.5083694458007812, "learning_rate": 1.796455906769452e-05, "loss": 0.5927, "step": 1547 }, { "epoch": 0.23088970094712508, "grad_norm": 1.6181076765060425, "learning_rate": 1.7961636405693274e-05, "loss": 0.6747, "step": 1548 }, { "epoch": 0.2310388545007085, "grad_norm": 1.5383940935134888, "learning_rate": 1.795871188499696e-05, "loss": 0.742, "step": 1549 }, { "epoch": 0.2311880080542919, "grad_norm": 1.372560739517212, "learning_rate": 1.7955785506288324e-05, "loss": 0.5882, "step": 1550 }, { "epoch": 0.23133716160787532, "grad_norm": 1.5869297981262207, "learning_rate": 1.795285727025055e-05, "loss": 0.6375, "step": 1551 }, { "epoch": 0.23148631516145873, "grad_norm": 1.4803245067596436, "learning_rate": 1.794992717756725e-05, "loss": 0.6297, "step": 1552 }, { "epoch": 0.23163546871504215, "grad_norm": 1.4275753498077393, "learning_rate": 1.7946995228922474e-05, "loss": 0.6278, "step": 1553 }, { "epoch": 0.23178462226862556, "grad_norm": 0.8814929723739624, "learning_rate": 1.794406142500071e-05, "loss": 0.6609, "step": 1554 }, { "epoch": 0.23193377582220898, "grad_norm": 1.3790603876113892, "learning_rate": 1.7941125766486865e-05, "loss": 0.6667, "step": 1555 }, { "epoch": 0.2320829293757924, "grad_norm": 1.6946220397949219, "learning_rate": 1.7938188254066293e-05, "loss": 0.6984, "step": 1556 }, { "epoch": 0.2322320829293758, "grad_norm": 1.4110963344573975, "learning_rate": 1.793524888842477e-05, "loss": 0.6665, "step": 1557 }, { "epoch": 0.23238123648295922, "grad_norm": 1.8897591829299927, "learning_rate": 1.7932307670248518e-05, "loss": 0.6716, "step": 1558 }, { "epoch": 0.23253039003654263, "grad_norm": 1.3196293115615845, "learning_rate": 1.792936460022417e-05, "loss": 0.6101, "step": 1559 }, { "epoch": 0.23267954359012605, "grad_norm": 2.394683361053467, "learning_rate": 1.7926419679038823e-05, "loss": 0.6512, "step": 1560 }, { "epoch": 0.23282869714370946, "grad_norm": 1.2774478197097778, "learning_rate": 1.7923472907379968e-05, "loss": 0.612, "step": 1561 }, { "epoch": 0.23297785069729288, "grad_norm": 1.8455109596252441, "learning_rate": 1.7920524285935563e-05, "loss": 0.7244, "step": 1562 }, { "epoch": 0.2331270042508763, "grad_norm": 1.7498373985290527, "learning_rate": 1.7917573815393975e-05, "loss": 0.6305, "step": 1563 }, { "epoch": 0.2332761578044597, "grad_norm": 1.1938070058822632, "learning_rate": 1.7914621496444015e-05, "loss": 0.5118, "step": 1564 }, { "epoch": 0.2334253113580431, "grad_norm": 0.8908499479293823, "learning_rate": 1.7911667329774914e-05, "loss": 0.6553, "step": 1565 }, { "epoch": 0.2335744649116265, "grad_norm": 1.576061487197876, "learning_rate": 1.7908711316076345e-05, "loss": 0.6012, "step": 1566 }, { "epoch": 0.23372361846520992, "grad_norm": 1.590785264968872, "learning_rate": 1.790575345603841e-05, "loss": 0.7373, "step": 1567 }, { "epoch": 0.23387277201879333, "grad_norm": 1.4674735069274902, "learning_rate": 1.790279375035164e-05, "loss": 0.6671, "step": 1568 }, { "epoch": 0.23402192557237675, "grad_norm": 1.4286231994628906, "learning_rate": 1.7899832199706993e-05, "loss": 0.6899, "step": 1569 }, { "epoch": 0.23417107912596016, "grad_norm": 1.9318158626556396, "learning_rate": 1.7896868804795863e-05, "loss": 0.6423, "step": 1570 }, { "epoch": 0.23432023267954358, "grad_norm": 1.3109651803970337, "learning_rate": 1.789390356631008e-05, "loss": 0.5911, "step": 1571 }, { "epoch": 0.234469386233127, "grad_norm": 1.5626848936080933, "learning_rate": 1.7890936484941894e-05, "loss": 0.5736, "step": 1572 }, { "epoch": 0.2346185397867104, "grad_norm": 1.3764516115188599, "learning_rate": 1.7887967561383986e-05, "loss": 0.5949, "step": 1573 }, { "epoch": 0.23476769334029382, "grad_norm": 1.4394055604934692, "learning_rate": 1.7884996796329472e-05, "loss": 0.63, "step": 1574 }, { "epoch": 0.23491684689387723, "grad_norm": 1.505794882774353, "learning_rate": 1.78820241904719e-05, "loss": 0.6696, "step": 1575 }, { "epoch": 0.23506600044746065, "grad_norm": 1.7175425291061401, "learning_rate": 1.787904974450524e-05, "loss": 0.6811, "step": 1576 }, { "epoch": 0.23521515400104406, "grad_norm": 1.3731330633163452, "learning_rate": 1.7876073459123895e-05, "loss": 0.5155, "step": 1577 }, { "epoch": 0.23536430755462748, "grad_norm": 1.4895727634429932, "learning_rate": 1.78730953350227e-05, "loss": 0.6117, "step": 1578 }, { "epoch": 0.2355134611082109, "grad_norm": 1.6035317182540894, "learning_rate": 1.7870115372896915e-05, "loss": 0.7078, "step": 1579 }, { "epoch": 0.2356626146617943, "grad_norm": 1.2479075193405151, "learning_rate": 1.7867133573442234e-05, "loss": 0.6049, "step": 1580 }, { "epoch": 0.23581176821537772, "grad_norm": 1.5350741147994995, "learning_rate": 1.786414993735478e-05, "loss": 0.6639, "step": 1581 }, { "epoch": 0.23596092176896113, "grad_norm": 1.4818187952041626, "learning_rate": 1.786116446533109e-05, "loss": 0.6601, "step": 1582 }, { "epoch": 0.23611007532254455, "grad_norm": 2.043093681335449, "learning_rate": 1.7858177158068154e-05, "loss": 0.6366, "step": 1583 }, { "epoch": 0.23625922887612796, "grad_norm": 1.6004092693328857, "learning_rate": 1.7855188016263377e-05, "loss": 0.5458, "step": 1584 }, { "epoch": 0.23640838242971138, "grad_norm": 1.4262516498565674, "learning_rate": 1.7852197040614583e-05, "loss": 0.637, "step": 1585 }, { "epoch": 0.2365575359832948, "grad_norm": 1.5311403274536133, "learning_rate": 1.7849204231820042e-05, "loss": 0.6862, "step": 1586 }, { "epoch": 0.2367066895368782, "grad_norm": 0.8927789926528931, "learning_rate": 1.784620959057845e-05, "loss": 0.6547, "step": 1587 }, { "epoch": 0.23685584309046162, "grad_norm": 1.5022419691085815, "learning_rate": 1.7843213117588913e-05, "loss": 0.7179, "step": 1588 }, { "epoch": 0.23700499664404504, "grad_norm": 1.2606054544448853, "learning_rate": 1.7840214813550986e-05, "loss": 0.6444, "step": 1589 }, { "epoch": 0.23715415019762845, "grad_norm": 1.3371379375457764, "learning_rate": 1.7837214679164635e-05, "loss": 0.5978, "step": 1590 }, { "epoch": 0.23730330375121186, "grad_norm": 1.626734733581543, "learning_rate": 1.783421271513027e-05, "loss": 0.676, "step": 1591 }, { "epoch": 0.23745245730479528, "grad_norm": 1.6532362699508667, "learning_rate": 1.7831208922148708e-05, "loss": 0.6869, "step": 1592 }, { "epoch": 0.2376016108583787, "grad_norm": 1.4897336959838867, "learning_rate": 1.7828203300921216e-05, "loss": 0.6737, "step": 1593 }, { "epoch": 0.2377507644119621, "grad_norm": 1.370158076286316, "learning_rate": 1.7825195852149463e-05, "loss": 0.6523, "step": 1594 }, { "epoch": 0.23789991796554552, "grad_norm": 1.1541160345077515, "learning_rate": 1.7822186576535566e-05, "loss": 0.549, "step": 1595 }, { "epoch": 0.23804907151912894, "grad_norm": 1.4921197891235352, "learning_rate": 1.781917547478205e-05, "loss": 0.6976, "step": 1596 }, { "epoch": 0.23819822507271235, "grad_norm": 1.5189990997314453, "learning_rate": 1.781616254759189e-05, "loss": 0.5626, "step": 1597 }, { "epoch": 0.23834737862629576, "grad_norm": 1.5448380708694458, "learning_rate": 1.7813147795668465e-05, "loss": 0.6624, "step": 1598 }, { "epoch": 0.23849653217987918, "grad_norm": 1.60421884059906, "learning_rate": 1.7810131219715585e-05, "loss": 0.6116, "step": 1599 }, { "epoch": 0.2386456857334626, "grad_norm": 1.3567403554916382, "learning_rate": 1.7807112820437496e-05, "loss": 0.6798, "step": 1600 }, { "epoch": 0.238794839287046, "grad_norm": 1.2709243297576904, "learning_rate": 1.7804092598538857e-05, "loss": 0.5568, "step": 1601 }, { "epoch": 0.23894399284062942, "grad_norm": 1.245456337928772, "learning_rate": 1.7801070554724763e-05, "loss": 0.6889, "step": 1602 }, { "epoch": 0.23909314639421284, "grad_norm": 1.5409926176071167, "learning_rate": 1.7798046689700728e-05, "loss": 0.6597, "step": 1603 }, { "epoch": 0.23924229994779625, "grad_norm": 1.5256680250167847, "learning_rate": 1.779502100417269e-05, "loss": 0.5883, "step": 1604 }, { "epoch": 0.23939145350137966, "grad_norm": 1.2196996212005615, "learning_rate": 1.7791993498847016e-05, "loss": 0.6517, "step": 1605 }, { "epoch": 0.23954060705496308, "grad_norm": 1.5101934671401978, "learning_rate": 1.77889641744305e-05, "loss": 0.7224, "step": 1606 }, { "epoch": 0.2396897606085465, "grad_norm": 1.53179132938385, "learning_rate": 1.778593303163035e-05, "loss": 0.7197, "step": 1607 }, { "epoch": 0.2398389141621299, "grad_norm": 1.6579805612564087, "learning_rate": 1.7782900071154215e-05, "loss": 0.6023, "step": 1608 }, { "epoch": 0.23998806771571332, "grad_norm": 1.5123745203018188, "learning_rate": 1.777986529371015e-05, "loss": 0.6254, "step": 1609 }, { "epoch": 0.24013722126929674, "grad_norm": 1.2618039846420288, "learning_rate": 1.777682870000665e-05, "loss": 0.6165, "step": 1610 }, { "epoch": 0.24028637482288015, "grad_norm": 1.4601575136184692, "learning_rate": 1.7773790290752626e-05, "loss": 0.6178, "step": 1611 }, { "epoch": 0.24043552837646356, "grad_norm": 1.487582802772522, "learning_rate": 1.777075006665741e-05, "loss": 0.5829, "step": 1612 }, { "epoch": 0.24058468193004698, "grad_norm": 1.2821811437606812, "learning_rate": 1.7767708028430767e-05, "loss": 0.6203, "step": 1613 }, { "epoch": 0.2407338354836304, "grad_norm": 1.4375407695770264, "learning_rate": 1.7764664176782872e-05, "loss": 0.4976, "step": 1614 }, { "epoch": 0.2408829890372138, "grad_norm": 1.4415748119354248, "learning_rate": 1.7761618512424347e-05, "loss": 0.629, "step": 1615 }, { "epoch": 0.24103214259079722, "grad_norm": 1.3688033819198608, "learning_rate": 1.7758571036066206e-05, "loss": 0.6539, "step": 1616 }, { "epoch": 0.24118129614438064, "grad_norm": 1.2892802953720093, "learning_rate": 1.7755521748419912e-05, "loss": 0.548, "step": 1617 }, { "epoch": 0.24133044969796405, "grad_norm": 1.405456304550171, "learning_rate": 1.775247065019733e-05, "loss": 0.5944, "step": 1618 }, { "epoch": 0.24147960325154746, "grad_norm": 1.2305102348327637, "learning_rate": 1.7749417742110772e-05, "loss": 0.6206, "step": 1619 }, { "epoch": 0.24162875680513088, "grad_norm": 1.5753188133239746, "learning_rate": 1.774636302487295e-05, "loss": 0.5854, "step": 1620 }, { "epoch": 0.2417779103587143, "grad_norm": 1.4000777006149292, "learning_rate": 1.7743306499197014e-05, "loss": 0.5604, "step": 1621 }, { "epoch": 0.2419270639122977, "grad_norm": 1.4551440477371216, "learning_rate": 1.774024816579652e-05, "loss": 0.6267, "step": 1622 }, { "epoch": 0.24207621746588112, "grad_norm": 1.82671058177948, "learning_rate": 1.7737188025385466e-05, "loss": 0.6767, "step": 1623 }, { "epoch": 0.24222537101946454, "grad_norm": 1.255409598350525, "learning_rate": 1.7734126078678252e-05, "loss": 0.5429, "step": 1624 }, { "epoch": 0.24237452457304795, "grad_norm": 1.0308001041412354, "learning_rate": 1.7731062326389716e-05, "loss": 0.709, "step": 1625 }, { "epoch": 0.24252367812663136, "grad_norm": 1.3807954788208008, "learning_rate": 1.772799676923511e-05, "loss": 0.6783, "step": 1626 }, { "epoch": 0.24267283168021478, "grad_norm": 1.3966336250305176, "learning_rate": 1.77249294079301e-05, "loss": 0.5399, "step": 1627 }, { "epoch": 0.2428219852337982, "grad_norm": 1.3915586471557617, "learning_rate": 1.772186024319079e-05, "loss": 0.6276, "step": 1628 }, { "epoch": 0.2429711387873816, "grad_norm": 1.4253753423690796, "learning_rate": 1.7718789275733694e-05, "loss": 0.569, "step": 1629 }, { "epoch": 0.24312029234096502, "grad_norm": 1.3504079580307007, "learning_rate": 1.7715716506275747e-05, "loss": 0.6618, "step": 1630 }, { "epoch": 0.24326944589454844, "grad_norm": 1.3888778686523438, "learning_rate": 1.771264193553431e-05, "loss": 0.6161, "step": 1631 }, { "epoch": 0.24341859944813185, "grad_norm": 1.6484953165054321, "learning_rate": 1.770956556422716e-05, "loss": 0.5768, "step": 1632 }, { "epoch": 0.24356775300171526, "grad_norm": 1.9095500707626343, "learning_rate": 1.7706487393072492e-05, "loss": 0.6467, "step": 1633 }, { "epoch": 0.24371690655529868, "grad_norm": 1.9717929363250732, "learning_rate": 1.7703407422788933e-05, "loss": 0.6686, "step": 1634 }, { "epoch": 0.2438660601088821, "grad_norm": 1.3440768718719482, "learning_rate": 1.770032565409551e-05, "loss": 0.6626, "step": 1635 }, { "epoch": 0.2440152136624655, "grad_norm": 1.511070966720581, "learning_rate": 1.769724208771169e-05, "loss": 0.5651, "step": 1636 }, { "epoch": 0.24416436721604892, "grad_norm": 1.5017033815383911, "learning_rate": 1.7694156724357352e-05, "loss": 0.6894, "step": 1637 }, { "epoch": 0.24431352076963234, "grad_norm": 1.2748154401779175, "learning_rate": 1.7691069564752793e-05, "loss": 0.6007, "step": 1638 }, { "epoch": 0.24446267432321575, "grad_norm": 1.4561686515808105, "learning_rate": 1.7687980609618726e-05, "loss": 0.6198, "step": 1639 }, { "epoch": 0.24461182787679916, "grad_norm": 1.6621625423431396, "learning_rate": 1.768488985967629e-05, "loss": 0.6445, "step": 1640 }, { "epoch": 0.24476098143038258, "grad_norm": 1.4802933931350708, "learning_rate": 1.768179731564704e-05, "loss": 0.6939, "step": 1641 }, { "epoch": 0.244910134983966, "grad_norm": 1.435078740119934, "learning_rate": 1.767870297825295e-05, "loss": 0.6639, "step": 1642 }, { "epoch": 0.2450592885375494, "grad_norm": 1.2784687280654907, "learning_rate": 1.767560684821642e-05, "loss": 0.6047, "step": 1643 }, { "epoch": 0.24520844209113282, "grad_norm": 1.3988903760910034, "learning_rate": 1.7672508926260244e-05, "loss": 0.6601, "step": 1644 }, { "epoch": 0.24535759564471624, "grad_norm": 1.5268919467926025, "learning_rate": 1.7669409213107674e-05, "loss": 0.6361, "step": 1645 }, { "epoch": 0.24550674919829965, "grad_norm": 1.391412615776062, "learning_rate": 1.766630770948234e-05, "loss": 0.7292, "step": 1646 }, { "epoch": 0.24565590275188307, "grad_norm": 1.6500132083892822, "learning_rate": 1.7663204416108315e-05, "loss": 0.5514, "step": 1647 }, { "epoch": 0.24580505630546648, "grad_norm": 1.270914912223816, "learning_rate": 1.7660099333710084e-05, "loss": 0.6076, "step": 1648 }, { "epoch": 0.2459542098590499, "grad_norm": 1.7986265420913696, "learning_rate": 1.7656992463012548e-05, "loss": 0.572, "step": 1649 }, { "epoch": 0.2461033634126333, "grad_norm": 1.317816972732544, "learning_rate": 1.765388380474102e-05, "loss": 0.553, "step": 1650 }, { "epoch": 0.24625251696621672, "grad_norm": 1.2533848285675049, "learning_rate": 1.765077335962124e-05, "loss": 0.5676, "step": 1651 }, { "epoch": 0.24640167051980014, "grad_norm": 1.6275513172149658, "learning_rate": 1.7647661128379373e-05, "loss": 0.579, "step": 1652 }, { "epoch": 0.24655082407338355, "grad_norm": 1.286580204963684, "learning_rate": 1.7644547111741968e-05, "loss": 0.5716, "step": 1653 }, { "epoch": 0.24669997762696697, "grad_norm": 1.377732753753662, "learning_rate": 1.7641431310436025e-05, "loss": 0.7143, "step": 1654 }, { "epoch": 0.24684913118055038, "grad_norm": 1.4112045764923096, "learning_rate": 1.7638313725188948e-05, "loss": 0.7072, "step": 1655 }, { "epoch": 0.2469982847341338, "grad_norm": 1.8585268259048462, "learning_rate": 1.7635194356728553e-05, "loss": 0.6851, "step": 1656 }, { "epoch": 0.2471474382877172, "grad_norm": 1.1544177532196045, "learning_rate": 1.7632073205783076e-05, "loss": 0.6563, "step": 1657 }, { "epoch": 0.24729659184130062, "grad_norm": 1.352359414100647, "learning_rate": 1.7628950273081176e-05, "loss": 0.6204, "step": 1658 }, { "epoch": 0.24744574539488404, "grad_norm": 1.2867547273635864, "learning_rate": 1.7625825559351917e-05, "loss": 0.6974, "step": 1659 }, { "epoch": 0.24759489894846745, "grad_norm": 1.3794203996658325, "learning_rate": 1.762269906532478e-05, "loss": 0.6577, "step": 1660 }, { "epoch": 0.24774405250205087, "grad_norm": 1.6383658647537231, "learning_rate": 1.7619570791729676e-05, "loss": 0.555, "step": 1661 }, { "epoch": 0.24789320605563428, "grad_norm": 1.4562058448791504, "learning_rate": 1.7616440739296908e-05, "loss": 0.5854, "step": 1662 }, { "epoch": 0.2480423596092177, "grad_norm": 1.3918094635009766, "learning_rate": 1.7613308908757215e-05, "loss": 0.6423, "step": 1663 }, { "epoch": 0.2481915131628011, "grad_norm": 1.635695219039917, "learning_rate": 1.761017530084174e-05, "loss": 0.5805, "step": 1664 }, { "epoch": 0.24834066671638452, "grad_norm": 1.4627962112426758, "learning_rate": 1.7607039916282044e-05, "loss": 0.602, "step": 1665 }, { "epoch": 0.24848982026996794, "grad_norm": 1.2962591648101807, "learning_rate": 1.7603902755810102e-05, "loss": 0.4958, "step": 1666 }, { "epoch": 0.24863897382355135, "grad_norm": 1.409719467163086, "learning_rate": 1.7600763820158308e-05, "loss": 0.5616, "step": 1667 }, { "epoch": 0.24878812737713477, "grad_norm": 1.416193962097168, "learning_rate": 1.7597623110059462e-05, "loss": 0.6658, "step": 1668 }, { "epoch": 0.24893728093071818, "grad_norm": 0.9034914374351501, "learning_rate": 1.7594480626246784e-05, "loss": 0.655, "step": 1669 }, { "epoch": 0.2490864344843016, "grad_norm": 1.4019187688827515, "learning_rate": 1.759133636945391e-05, "loss": 0.6327, "step": 1670 }, { "epoch": 0.249235588037885, "grad_norm": 1.4824625253677368, "learning_rate": 1.7588190340414882e-05, "loss": 0.6154, "step": 1671 }, { "epoch": 0.24938474159146842, "grad_norm": 1.385596752166748, "learning_rate": 1.7585042539864164e-05, "loss": 0.6766, "step": 1672 }, { "epoch": 0.24953389514505184, "grad_norm": 0.7624297142028809, "learning_rate": 1.758189296853663e-05, "loss": 0.5535, "step": 1673 }, { "epoch": 0.24968304869863525, "grad_norm": 1.387397050857544, "learning_rate": 1.757874162716757e-05, "loss": 0.6353, "step": 1674 }, { "epoch": 0.24983220225221867, "grad_norm": 1.2834422588348389, "learning_rate": 1.7575588516492677e-05, "loss": 0.6589, "step": 1675 }, { "epoch": 0.24998135580580208, "grad_norm": 1.3642830848693848, "learning_rate": 1.757243363724807e-05, "loss": 0.6535, "step": 1676 }, { "epoch": 0.25013050935938547, "grad_norm": 0.9251272678375244, "learning_rate": 1.7569276990170276e-05, "loss": 0.6926, "step": 1677 }, { "epoch": 0.2502796629129689, "grad_norm": 1.5507023334503174, "learning_rate": 1.7566118575996238e-05, "loss": 0.6169, "step": 1678 }, { "epoch": 0.2504288164665523, "grad_norm": 1.4439655542373657, "learning_rate": 1.75629583954633e-05, "loss": 0.6103, "step": 1679 }, { "epoch": 0.2505779700201357, "grad_norm": 1.4807868003845215, "learning_rate": 1.7559796449309233e-05, "loss": 0.6271, "step": 1680 }, { "epoch": 0.2507271235737191, "grad_norm": 1.476283311843872, "learning_rate": 1.755663273827221e-05, "loss": 0.6602, "step": 1681 }, { "epoch": 0.25087627712730254, "grad_norm": 1.4265711307525635, "learning_rate": 1.7553467263090822e-05, "loss": 0.6914, "step": 1682 }, { "epoch": 0.25102543068088595, "grad_norm": 3.733325242996216, "learning_rate": 1.7550300024504067e-05, "loss": 0.6328, "step": 1683 }, { "epoch": 0.25117458423446937, "grad_norm": 1.3736717700958252, "learning_rate": 1.754713102325136e-05, "loss": 0.681, "step": 1684 }, { "epoch": 0.2513237377880528, "grad_norm": 1.450537085533142, "learning_rate": 1.7543960260072522e-05, "loss": 0.5954, "step": 1685 }, { "epoch": 0.2514728913416362, "grad_norm": 1.314718246459961, "learning_rate": 1.754078773570779e-05, "loss": 0.4998, "step": 1686 }, { "epoch": 0.2516220448952196, "grad_norm": 0.8699203729629517, "learning_rate": 1.753761345089781e-05, "loss": 0.6357, "step": 1687 }, { "epoch": 0.251771198448803, "grad_norm": 1.2230931520462036, "learning_rate": 1.7534437406383637e-05, "loss": 0.6415, "step": 1688 }, { "epoch": 0.25192035200238644, "grad_norm": 1.4305014610290527, "learning_rate": 1.753125960290674e-05, "loss": 0.6526, "step": 1689 }, { "epoch": 0.25206950555596985, "grad_norm": 1.3331302404403687, "learning_rate": 1.7528080041209e-05, "loss": 0.7085, "step": 1690 }, { "epoch": 0.25221865910955327, "grad_norm": 1.3612983226776123, "learning_rate": 1.7524898722032704e-05, "loss": 0.6181, "step": 1691 }, { "epoch": 0.2523678126631367, "grad_norm": 1.4104301929473877, "learning_rate": 1.7521715646120547e-05, "loss": 0.6387, "step": 1692 }, { "epoch": 0.2525169662167201, "grad_norm": 0.8694811463356018, "learning_rate": 1.751853081421565e-05, "loss": 0.6485, "step": 1693 }, { "epoch": 0.2526661197703035, "grad_norm": 1.1490302085876465, "learning_rate": 1.751534422706152e-05, "loss": 0.5353, "step": 1694 }, { "epoch": 0.2528152733238869, "grad_norm": 1.3070634603500366, "learning_rate": 1.7512155885402095e-05, "loss": 0.6493, "step": 1695 }, { "epoch": 0.25296442687747034, "grad_norm": 1.5266324281692505, "learning_rate": 1.7508965789981706e-05, "loss": 0.6121, "step": 1696 }, { "epoch": 0.25311358043105375, "grad_norm": 1.4078775644302368, "learning_rate": 1.7505773941545108e-05, "loss": 0.621, "step": 1697 }, { "epoch": 0.25326273398463717, "grad_norm": 1.3867372274398804, "learning_rate": 1.7502580340837455e-05, "loss": 0.6292, "step": 1698 }, { "epoch": 0.2534118875382206, "grad_norm": 1.3852862119674683, "learning_rate": 1.7499384988604316e-05, "loss": 0.6675, "step": 1699 }, { "epoch": 0.253561041091804, "grad_norm": 1.573508381843567, "learning_rate": 1.7496187885591664e-05, "loss": 0.6638, "step": 1700 }, { "epoch": 0.2537101946453874, "grad_norm": 1.468759298324585, "learning_rate": 1.7492989032545886e-05, "loss": 0.5693, "step": 1701 }, { "epoch": 0.2538593481989708, "grad_norm": 1.3417880535125732, "learning_rate": 1.7489788430213774e-05, "loss": 0.567, "step": 1702 }, { "epoch": 0.25400850175255424, "grad_norm": 1.7432224750518799, "learning_rate": 1.7486586079342523e-05, "loss": 0.5353, "step": 1703 }, { "epoch": 0.25415765530613765, "grad_norm": 1.4428009986877441, "learning_rate": 1.748338198067975e-05, "loss": 0.6173, "step": 1704 }, { "epoch": 0.25430680885972107, "grad_norm": 1.9197745323181152, "learning_rate": 1.7480176134973474e-05, "loss": 0.6617, "step": 1705 }, { "epoch": 0.2544559624133045, "grad_norm": 1.446564793586731, "learning_rate": 1.7476968542972112e-05, "loss": 0.6005, "step": 1706 }, { "epoch": 0.2546051159668879, "grad_norm": 1.4733344316482544, "learning_rate": 1.74737592054245e-05, "loss": 0.5325, "step": 1707 }, { "epoch": 0.2547542695204713, "grad_norm": 1.3704560995101929, "learning_rate": 1.7470548123079884e-05, "loss": 0.6458, "step": 1708 }, { "epoch": 0.2549034230740547, "grad_norm": 1.4132846593856812, "learning_rate": 1.7467335296687903e-05, "loss": 0.6816, "step": 1709 }, { "epoch": 0.25505257662763814, "grad_norm": 1.4155969619750977, "learning_rate": 1.7464120726998616e-05, "loss": 0.7192, "step": 1710 }, { "epoch": 0.25520173018122155, "grad_norm": 1.5089930295944214, "learning_rate": 1.7460904414762488e-05, "loss": 0.5811, "step": 1711 }, { "epoch": 0.25535088373480497, "grad_norm": 1.3479551076889038, "learning_rate": 1.7457686360730382e-05, "loss": 0.7173, "step": 1712 }, { "epoch": 0.2555000372883884, "grad_norm": 1.5239436626434326, "learning_rate": 1.745446656565358e-05, "loss": 0.6554, "step": 1713 }, { "epoch": 0.2556491908419718, "grad_norm": 1.4193402528762817, "learning_rate": 1.7451245030283755e-05, "loss": 0.5999, "step": 1714 }, { "epoch": 0.2557983443955552, "grad_norm": 1.3801301717758179, "learning_rate": 1.7448021755373005e-05, "loss": 0.5877, "step": 1715 }, { "epoch": 0.2559474979491386, "grad_norm": 0.919295608997345, "learning_rate": 1.7444796741673814e-05, "loss": 0.6798, "step": 1716 }, { "epoch": 0.25609665150272204, "grad_norm": 1.4532160758972168, "learning_rate": 1.7441569989939092e-05, "loss": 0.6466, "step": 1717 }, { "epoch": 0.25624580505630545, "grad_norm": 1.3542284965515137, "learning_rate": 1.7438341500922137e-05, "loss": 0.5869, "step": 1718 }, { "epoch": 0.25639495860988887, "grad_norm": 1.5726194381713867, "learning_rate": 1.7435111275376668e-05, "loss": 0.6397, "step": 1719 }, { "epoch": 0.2565441121634723, "grad_norm": 1.4009287357330322, "learning_rate": 1.7431879314056792e-05, "loss": 0.6326, "step": 1720 }, { "epoch": 0.2566932657170557, "grad_norm": 1.2994890213012695, "learning_rate": 1.742864561771704e-05, "loss": 0.6093, "step": 1721 }, { "epoch": 0.2568424192706391, "grad_norm": 1.698498249053955, "learning_rate": 1.7425410187112334e-05, "loss": 0.6912, "step": 1722 }, { "epoch": 0.2569915728242225, "grad_norm": 1.5155774354934692, "learning_rate": 1.742217302299801e-05, "loss": 0.6932, "step": 1723 }, { "epoch": 0.25714072637780594, "grad_norm": 1.8353424072265625, "learning_rate": 1.74189341261298e-05, "loss": 0.5954, "step": 1724 }, { "epoch": 0.25728987993138935, "grad_norm": 1.9098496437072754, "learning_rate": 1.741569349726385e-05, "loss": 0.6022, "step": 1725 }, { "epoch": 0.25743903348497277, "grad_norm": 1.2846547365188599, "learning_rate": 1.74124511371567e-05, "loss": 0.6042, "step": 1726 }, { "epoch": 0.2575881870385562, "grad_norm": 1.5157731771469116, "learning_rate": 1.7409207046565306e-05, "loss": 0.5646, "step": 1727 }, { "epoch": 0.2577373405921396, "grad_norm": 1.5261644124984741, "learning_rate": 1.7405961226247022e-05, "loss": 0.6425, "step": 1728 }, { "epoch": 0.257886494145723, "grad_norm": 1.2660001516342163, "learning_rate": 1.7402713676959598e-05, "loss": 0.5606, "step": 1729 }, { "epoch": 0.2580356476993064, "grad_norm": 1.3647371530532837, "learning_rate": 1.73994643994612e-05, "loss": 0.612, "step": 1730 }, { "epoch": 0.25818480125288984, "grad_norm": 1.3523637056350708, "learning_rate": 1.7396213394510393e-05, "loss": 0.5979, "step": 1731 }, { "epoch": 0.25833395480647325, "grad_norm": 0.8957549333572388, "learning_rate": 1.7392960662866143e-05, "loss": 0.677, "step": 1732 }, { "epoch": 0.25848310836005667, "grad_norm": 1.6817289590835571, "learning_rate": 1.7389706205287824e-05, "loss": 0.6224, "step": 1733 }, { "epoch": 0.2586322619136401, "grad_norm": 1.3004745244979858, "learning_rate": 1.7386450022535207e-05, "loss": 0.6249, "step": 1734 }, { "epoch": 0.2587814154672235, "grad_norm": 1.3966636657714844, "learning_rate": 1.738319211536847e-05, "loss": 0.6234, "step": 1735 }, { "epoch": 0.2589305690208069, "grad_norm": 1.3413000106811523, "learning_rate": 1.7379932484548193e-05, "loss": 0.6183, "step": 1736 }, { "epoch": 0.2590797225743903, "grad_norm": 1.3386712074279785, "learning_rate": 1.7376671130835362e-05, "loss": 0.551, "step": 1737 }, { "epoch": 0.25922887612797374, "grad_norm": 1.3468600511550903, "learning_rate": 1.7373408054991348e-05, "loss": 0.607, "step": 1738 }, { "epoch": 0.25937802968155715, "grad_norm": 1.475676417350769, "learning_rate": 1.737014325777795e-05, "loss": 0.6462, "step": 1739 }, { "epoch": 0.25952718323514057, "grad_norm": 1.3899799585342407, "learning_rate": 1.7366876739957346e-05, "loss": 0.5667, "step": 1740 }, { "epoch": 0.259676336788724, "grad_norm": 1.3055793046951294, "learning_rate": 1.7363608502292136e-05, "loss": 0.6248, "step": 1741 }, { "epoch": 0.2598254903423074, "grad_norm": 1.3645350933074951, "learning_rate": 1.7360338545545303e-05, "loss": 0.6692, "step": 1742 }, { "epoch": 0.2599746438958908, "grad_norm": 1.3463623523712158, "learning_rate": 1.735706687048024e-05, "loss": 0.6906, "step": 1743 }, { "epoch": 0.2601237974494742, "grad_norm": 0.9537790417671204, "learning_rate": 1.7353793477860746e-05, "loss": 0.6633, "step": 1744 }, { "epoch": 0.26027295100305764, "grad_norm": 1.4612796306610107, "learning_rate": 1.735051836845101e-05, "loss": 0.6136, "step": 1745 }, { "epoch": 0.26042210455664105, "grad_norm": 1.3230234384536743, "learning_rate": 1.734724154301563e-05, "loss": 0.5721, "step": 1746 }, { "epoch": 0.26057125811022447, "grad_norm": 1.4637725353240967, "learning_rate": 1.7343963002319597e-05, "loss": 0.6223, "step": 1747 }, { "epoch": 0.2607204116638079, "grad_norm": 1.334269642829895, "learning_rate": 1.7340682747128314e-05, "loss": 0.6004, "step": 1748 }, { "epoch": 0.2608695652173913, "grad_norm": 1.416332483291626, "learning_rate": 1.7337400778207578e-05, "loss": 0.6373, "step": 1749 }, { "epoch": 0.2610187187709747, "grad_norm": 1.424120545387268, "learning_rate": 1.7334117096323578e-05, "loss": 0.6593, "step": 1750 }, { "epoch": 0.2611678723245581, "grad_norm": 1.4437241554260254, "learning_rate": 1.733083170224292e-05, "loss": 0.6963, "step": 1751 }, { "epoch": 0.26131702587814154, "grad_norm": 1.4660828113555908, "learning_rate": 1.732754459673259e-05, "loss": 0.6685, "step": 1752 }, { "epoch": 0.26146617943172495, "grad_norm": 1.4511146545410156, "learning_rate": 1.7324255780559993e-05, "loss": 0.6821, "step": 1753 }, { "epoch": 0.26161533298530837, "grad_norm": 1.2610998153686523, "learning_rate": 1.732096525449292e-05, "loss": 0.6347, "step": 1754 }, { "epoch": 0.2617644865388918, "grad_norm": 1.4641693830490112, "learning_rate": 1.7317673019299566e-05, "loss": 0.6111, "step": 1755 }, { "epoch": 0.2619136400924752, "grad_norm": 1.5883253812789917, "learning_rate": 1.7314379075748524e-05, "loss": 0.5712, "step": 1756 }, { "epoch": 0.2620627936460586, "grad_norm": 1.5469884872436523, "learning_rate": 1.7311083424608785e-05, "loss": 0.6271, "step": 1757 }, { "epoch": 0.262211947199642, "grad_norm": 1.1279354095458984, "learning_rate": 1.7307786066649742e-05, "loss": 0.5757, "step": 1758 }, { "epoch": 0.26236110075322544, "grad_norm": 1.5205494165420532, "learning_rate": 1.730448700264119e-05, "loss": 0.5947, "step": 1759 }, { "epoch": 0.26251025430680885, "grad_norm": 1.3853553533554077, "learning_rate": 1.7301186233353303e-05, "loss": 0.5961, "step": 1760 }, { "epoch": 0.26265940786039227, "grad_norm": 1.5861032009124756, "learning_rate": 1.7297883759556676e-05, "loss": 0.6636, "step": 1761 }, { "epoch": 0.2628085614139757, "grad_norm": 1.6566332578659058, "learning_rate": 1.7294579582022296e-05, "loss": 0.5732, "step": 1762 }, { "epoch": 0.2629577149675591, "grad_norm": 1.3974113464355469, "learning_rate": 1.7291273701521534e-05, "loss": 0.6837, "step": 1763 }, { "epoch": 0.2631068685211425, "grad_norm": 1.3938452005386353, "learning_rate": 1.7287966118826174e-05, "loss": 0.6675, "step": 1764 }, { "epoch": 0.2632560220747259, "grad_norm": 1.5462125539779663, "learning_rate": 1.72846568347084e-05, "loss": 0.5941, "step": 1765 }, { "epoch": 0.26340517562830934, "grad_norm": 1.4804638624191284, "learning_rate": 1.728134584994077e-05, "loss": 0.603, "step": 1766 }, { "epoch": 0.26355432918189275, "grad_norm": 1.4382147789001465, "learning_rate": 1.7278033165296267e-05, "loss": 0.6516, "step": 1767 }, { "epoch": 0.26370348273547617, "grad_norm": 1.409494400024414, "learning_rate": 1.7274718781548256e-05, "loss": 0.5564, "step": 1768 }, { "epoch": 0.2638526362890596, "grad_norm": 1.4384077787399292, "learning_rate": 1.7271402699470498e-05, "loss": 0.6103, "step": 1769 }, { "epoch": 0.264001789842643, "grad_norm": 1.926253080368042, "learning_rate": 1.7268084919837155e-05, "loss": 0.7348, "step": 1770 }, { "epoch": 0.2641509433962264, "grad_norm": 1.2676042318344116, "learning_rate": 1.7264765443422783e-05, "loss": 0.606, "step": 1771 }, { "epoch": 0.2643000969498098, "grad_norm": 0.9487889409065247, "learning_rate": 1.726144427100234e-05, "loss": 0.7157, "step": 1772 }, { "epoch": 0.26444925050339324, "grad_norm": 1.3411369323730469, "learning_rate": 1.7258121403351168e-05, "loss": 0.5298, "step": 1773 }, { "epoch": 0.26459840405697665, "grad_norm": 1.6351044178009033, "learning_rate": 1.7254796841245017e-05, "loss": 0.6355, "step": 1774 }, { "epoch": 0.26474755761056007, "grad_norm": 0.9030153155326843, "learning_rate": 1.7251470585460026e-05, "loss": 0.6856, "step": 1775 }, { "epoch": 0.2648967111641435, "grad_norm": 1.6989761590957642, "learning_rate": 1.724814263677273e-05, "loss": 0.5929, "step": 1776 }, { "epoch": 0.2650458647177269, "grad_norm": 1.345104694366455, "learning_rate": 1.7244812995960056e-05, "loss": 0.6541, "step": 1777 }, { "epoch": 0.2651950182713103, "grad_norm": 1.7011958360671997, "learning_rate": 1.7241481663799337e-05, "loss": 0.6736, "step": 1778 }, { "epoch": 0.2653441718248937, "grad_norm": 1.4988036155700684, "learning_rate": 1.7238148641068292e-05, "loss": 0.7287, "step": 1779 }, { "epoch": 0.26549332537847714, "grad_norm": 1.4901416301727295, "learning_rate": 1.7234813928545034e-05, "loss": 0.6111, "step": 1780 }, { "epoch": 0.26564247893206056, "grad_norm": 1.3500661849975586, "learning_rate": 1.7231477527008074e-05, "loss": 0.6436, "step": 1781 }, { "epoch": 0.26579163248564397, "grad_norm": 1.5107824802398682, "learning_rate": 1.722813943723632e-05, "loss": 0.6937, "step": 1782 }, { "epoch": 0.2659407860392274, "grad_norm": 1.4919524192810059, "learning_rate": 1.7224799660009064e-05, "loss": 0.6242, "step": 1783 }, { "epoch": 0.2660899395928108, "grad_norm": 1.288519263267517, "learning_rate": 1.7221458196106003e-05, "loss": 0.61, "step": 1784 }, { "epoch": 0.2662390931463942, "grad_norm": 1.6195833683013916, "learning_rate": 1.721811504630722e-05, "loss": 0.5888, "step": 1785 }, { "epoch": 0.2663882466999776, "grad_norm": 1.289764642715454, "learning_rate": 1.72147702113932e-05, "loss": 0.613, "step": 1786 }, { "epoch": 0.26653740025356104, "grad_norm": 1.348167896270752, "learning_rate": 1.721142369214481e-05, "loss": 0.6518, "step": 1787 }, { "epoch": 0.26668655380714446, "grad_norm": 1.262546181678772, "learning_rate": 1.7208075489343318e-05, "loss": 0.6942, "step": 1788 }, { "epoch": 0.26683570736072787, "grad_norm": 1.6190863847732544, "learning_rate": 1.7204725603770387e-05, "loss": 0.7161, "step": 1789 }, { "epoch": 0.2669848609143113, "grad_norm": 1.5422422885894775, "learning_rate": 1.7201374036208066e-05, "loss": 0.5137, "step": 1790 }, { "epoch": 0.2671340144678947, "grad_norm": 1.3794503211975098, "learning_rate": 1.71980207874388e-05, "loss": 0.629, "step": 1791 }, { "epoch": 0.2672831680214781, "grad_norm": 1.2889314889907837, "learning_rate": 1.7194665858245428e-05, "loss": 0.5761, "step": 1792 }, { "epoch": 0.2674323215750615, "grad_norm": 1.6477231979370117, "learning_rate": 1.719130924941118e-05, "loss": 0.5748, "step": 1793 }, { "epoch": 0.26758147512864494, "grad_norm": 1.6097553968429565, "learning_rate": 1.718795096171968e-05, "loss": 0.6397, "step": 1794 }, { "epoch": 0.26773062868222836, "grad_norm": 1.3461345434188843, "learning_rate": 1.718459099595493e-05, "loss": 0.6728, "step": 1795 }, { "epoch": 0.26787978223581177, "grad_norm": 1.8099303245544434, "learning_rate": 1.718122935290135e-05, "loss": 0.6909, "step": 1796 }, { "epoch": 0.2680289357893952, "grad_norm": 1.3339370489120483, "learning_rate": 1.717786603334373e-05, "loss": 0.6872, "step": 1797 }, { "epoch": 0.2681780893429786, "grad_norm": 1.4758830070495605, "learning_rate": 1.717450103806726e-05, "loss": 0.5481, "step": 1798 }, { "epoch": 0.268327242896562, "grad_norm": 1.3427356481552124, "learning_rate": 1.717113436785752e-05, "loss": 0.6534, "step": 1799 }, { "epoch": 0.2684763964501454, "grad_norm": 1.4497851133346558, "learning_rate": 1.716776602350048e-05, "loss": 0.5923, "step": 1800 }, { "epoch": 0.26862555000372884, "grad_norm": 1.363694190979004, "learning_rate": 1.71643960057825e-05, "loss": 0.6781, "step": 1801 }, { "epoch": 0.26877470355731226, "grad_norm": 1.3640751838684082, "learning_rate": 1.7161024315490336e-05, "loss": 0.6188, "step": 1802 }, { "epoch": 0.26892385711089567, "grad_norm": 1.273188829421997, "learning_rate": 1.715765095341113e-05, "loss": 0.5789, "step": 1803 }, { "epoch": 0.2690730106644791, "grad_norm": 1.5710242986679077, "learning_rate": 1.715427592033241e-05, "loss": 0.6589, "step": 1804 }, { "epoch": 0.2692221642180625, "grad_norm": 1.3960870504379272, "learning_rate": 1.715089921704211e-05, "loss": 0.6287, "step": 1805 }, { "epoch": 0.2693713177716459, "grad_norm": 1.4035412073135376, "learning_rate": 1.7147520844328526e-05, "loss": 0.6033, "step": 1806 }, { "epoch": 0.2695204713252293, "grad_norm": 1.2797893285751343, "learning_rate": 1.7144140802980377e-05, "loss": 0.5948, "step": 1807 }, { "epoch": 0.26966962487881274, "grad_norm": 1.6085422039031982, "learning_rate": 1.714075909378675e-05, "loss": 0.6818, "step": 1808 }, { "epoch": 0.26981877843239616, "grad_norm": 1.4665457010269165, "learning_rate": 1.7137375717537122e-05, "loss": 0.6566, "step": 1809 }, { "epoch": 0.26996793198597957, "grad_norm": 1.549971342086792, "learning_rate": 1.7133990675021367e-05, "loss": 0.5612, "step": 1810 }, { "epoch": 0.270117085539563, "grad_norm": 1.3530558347702026, "learning_rate": 1.713060396702975e-05, "loss": 0.6002, "step": 1811 }, { "epoch": 0.2702662390931464, "grad_norm": 1.5619463920593262, "learning_rate": 1.7127215594352914e-05, "loss": 0.7175, "step": 1812 }, { "epoch": 0.2704153926467298, "grad_norm": 1.4235050678253174, "learning_rate": 1.7123825557781894e-05, "loss": 0.6656, "step": 1813 }, { "epoch": 0.2705645462003132, "grad_norm": 2.3145558834075928, "learning_rate": 1.7120433858108123e-05, "loss": 0.5523, "step": 1814 }, { "epoch": 0.27071369975389664, "grad_norm": 1.6949762105941772, "learning_rate": 1.7117040496123408e-05, "loss": 0.6119, "step": 1815 }, { "epoch": 0.27086285330748006, "grad_norm": 1.3369582891464233, "learning_rate": 1.711364547261996e-05, "loss": 0.6657, "step": 1816 }, { "epoch": 0.27101200686106347, "grad_norm": 1.4795678853988647, "learning_rate": 1.7110248788390358e-05, "loss": 0.6179, "step": 1817 }, { "epoch": 0.2711611604146469, "grad_norm": 1.4077922105789185, "learning_rate": 1.7106850444227588e-05, "loss": 0.6633, "step": 1818 }, { "epoch": 0.2713103139682303, "grad_norm": 1.4685730934143066, "learning_rate": 1.7103450440925013e-05, "loss": 0.7394, "step": 1819 }, { "epoch": 0.2714594675218137, "grad_norm": 1.2820994853973389, "learning_rate": 1.710004877927638e-05, "loss": 0.6006, "step": 1820 }, { "epoch": 0.2716086210753971, "grad_norm": 1.461163878440857, "learning_rate": 1.7096645460075837e-05, "loss": 0.7035, "step": 1821 }, { "epoch": 0.27175777462898054, "grad_norm": 1.231568694114685, "learning_rate": 1.7093240484117907e-05, "loss": 0.5581, "step": 1822 }, { "epoch": 0.27190692818256396, "grad_norm": 1.2439531087875366, "learning_rate": 1.7089833852197508e-05, "loss": 0.6592, "step": 1823 }, { "epoch": 0.27205608173614737, "grad_norm": 1.4597004652023315, "learning_rate": 1.708642556510993e-05, "loss": 0.6066, "step": 1824 }, { "epoch": 0.2722052352897308, "grad_norm": 1.5293354988098145, "learning_rate": 1.7083015623650867e-05, "loss": 0.621, "step": 1825 }, { "epoch": 0.2723543888433142, "grad_norm": 1.2700155973434448, "learning_rate": 1.707960402861639e-05, "loss": 0.6728, "step": 1826 }, { "epoch": 0.2725035423968976, "grad_norm": 1.5105369091033936, "learning_rate": 1.707619078080296e-05, "loss": 0.6909, "step": 1827 }, { "epoch": 0.272652695950481, "grad_norm": 1.7971665859222412, "learning_rate": 1.707277588100742e-05, "loss": 0.6289, "step": 1828 }, { "epoch": 0.27280184950406444, "grad_norm": 1.366029143333435, "learning_rate": 1.7069359330027e-05, "loss": 0.5708, "step": 1829 }, { "epoch": 0.27295100305764786, "grad_norm": 1.3654471635818481, "learning_rate": 1.706594112865931e-05, "loss": 0.5933, "step": 1830 }, { "epoch": 0.27310015661123127, "grad_norm": 2.229281187057495, "learning_rate": 1.706252127770236e-05, "loss": 0.5575, "step": 1831 }, { "epoch": 0.2732493101648147, "grad_norm": 1.521018147468567, "learning_rate": 1.7059099777954532e-05, "loss": 0.6311, "step": 1832 }, { "epoch": 0.2733984637183981, "grad_norm": 1.5441393852233887, "learning_rate": 1.7055676630214598e-05, "loss": 0.5984, "step": 1833 }, { "epoch": 0.2735476172719815, "grad_norm": 1.3817882537841797, "learning_rate": 1.7052251835281716e-05, "loss": 0.6111, "step": 1834 }, { "epoch": 0.27369677082556493, "grad_norm": 1.3587907552719116, "learning_rate": 1.704882539395542e-05, "loss": 0.7104, "step": 1835 }, { "epoch": 0.27384592437914834, "grad_norm": 1.5518059730529785, "learning_rate": 1.704539730703564e-05, "loss": 0.7027, "step": 1836 }, { "epoch": 0.27399507793273176, "grad_norm": 1.4255080223083496, "learning_rate": 1.704196757532268e-05, "loss": 0.5481, "step": 1837 }, { "epoch": 0.27414423148631517, "grad_norm": 1.4236544370651245, "learning_rate": 1.703853619961724e-05, "loss": 0.7022, "step": 1838 }, { "epoch": 0.2742933850398986, "grad_norm": 1.3875446319580078, "learning_rate": 1.7035103180720392e-05, "loss": 0.6327, "step": 1839 }, { "epoch": 0.274442538593482, "grad_norm": 1.4625823497772217, "learning_rate": 1.70316685194336e-05, "loss": 0.6345, "step": 1840 }, { "epoch": 0.2745916921470654, "grad_norm": 1.2989475727081299, "learning_rate": 1.70282322165587e-05, "loss": 0.6307, "step": 1841 }, { "epoch": 0.27474084570064883, "grad_norm": 1.4507551193237305, "learning_rate": 1.7024794272897926e-05, "loss": 0.6328, "step": 1842 }, { "epoch": 0.27488999925423224, "grad_norm": 1.3311562538146973, "learning_rate": 1.7021354689253888e-05, "loss": 0.5633, "step": 1843 }, { "epoch": 0.27503915280781566, "grad_norm": 1.7244848012924194, "learning_rate": 1.7017913466429572e-05, "loss": 0.6585, "step": 1844 }, { "epoch": 0.27518830636139907, "grad_norm": 1.5256977081298828, "learning_rate": 1.701447060522836e-05, "loss": 0.7445, "step": 1845 }, { "epoch": 0.2753374599149825, "grad_norm": 1.311116337776184, "learning_rate": 1.7011026106454008e-05, "loss": 0.5815, "step": 1846 }, { "epoch": 0.2754866134685659, "grad_norm": 1.3261629343032837, "learning_rate": 1.7007579970910657e-05, "loss": 0.6267, "step": 1847 }, { "epoch": 0.2756357670221493, "grad_norm": 1.522608757019043, "learning_rate": 1.700413219940283e-05, "loss": 0.7239, "step": 1848 }, { "epoch": 0.27578492057573273, "grad_norm": 1.545955777168274, "learning_rate": 1.7000682792735427e-05, "loss": 0.666, "step": 1849 }, { "epoch": 0.27593407412931614, "grad_norm": 1.4692282676696777, "learning_rate": 1.699723175171374e-05, "loss": 0.4915, "step": 1850 }, { "epoch": 0.27608322768289956, "grad_norm": 1.8183945417404175, "learning_rate": 1.6993779077143437e-05, "loss": 0.6215, "step": 1851 }, { "epoch": 0.27623238123648297, "grad_norm": 1.4324359893798828, "learning_rate": 1.6990324769830557e-05, "loss": 0.5509, "step": 1852 }, { "epoch": 0.2763815347900664, "grad_norm": 1.002772331237793, "learning_rate": 1.6986868830581542e-05, "loss": 0.6489, "step": 1853 }, { "epoch": 0.2765306883436498, "grad_norm": 1.2690870761871338, "learning_rate": 1.6983411260203196e-05, "loss": 0.5654, "step": 1854 }, { "epoch": 0.2766798418972332, "grad_norm": 2.0242578983306885, "learning_rate": 1.6979952059502715e-05, "loss": 0.6215, "step": 1855 }, { "epoch": 0.27682899545081663, "grad_norm": 1.5043601989746094, "learning_rate": 1.697649122928767e-05, "loss": 0.6784, "step": 1856 }, { "epoch": 0.27697814900440004, "grad_norm": 1.3444693088531494, "learning_rate": 1.6973028770366015e-05, "loss": 0.6808, "step": 1857 }, { "epoch": 0.27712730255798346, "grad_norm": 1.5234291553497314, "learning_rate": 1.6969564683546077e-05, "loss": 0.5428, "step": 1858 }, { "epoch": 0.27727645611156687, "grad_norm": 1.507757544517517, "learning_rate": 1.6966098969636583e-05, "loss": 0.7259, "step": 1859 }, { "epoch": 0.2774256096651503, "grad_norm": 1.344505786895752, "learning_rate": 1.696263162944661e-05, "loss": 0.6145, "step": 1860 }, { "epoch": 0.2775747632187337, "grad_norm": 1.5853021144866943, "learning_rate": 1.695916266378564e-05, "loss": 0.6671, "step": 1861 }, { "epoch": 0.2777239167723171, "grad_norm": 1.3034197092056274, "learning_rate": 1.695569207346353e-05, "loss": 0.6076, "step": 1862 }, { "epoch": 0.27787307032590053, "grad_norm": 1.616239309310913, "learning_rate": 1.69522198592905e-05, "loss": 0.6275, "step": 1863 }, { "epoch": 0.27802222387948394, "grad_norm": 1.3354512453079224, "learning_rate": 1.6948746022077167e-05, "loss": 0.616, "step": 1864 }, { "epoch": 0.27817137743306736, "grad_norm": 0.9275630712509155, "learning_rate": 1.694527056263452e-05, "loss": 0.6724, "step": 1865 }, { "epoch": 0.27832053098665077, "grad_norm": 1.637774109840393, "learning_rate": 1.6941793481773924e-05, "loss": 0.7171, "step": 1866 }, { "epoch": 0.2784696845402342, "grad_norm": 1.4441298246383667, "learning_rate": 1.693831478030713e-05, "loss": 0.6214, "step": 1867 }, { "epoch": 0.2786188380938176, "grad_norm": 1.544645071029663, "learning_rate": 1.6934834459046262e-05, "loss": 0.6417, "step": 1868 }, { "epoch": 0.278767991647401, "grad_norm": 1.5595685243606567, "learning_rate": 1.6931352518803825e-05, "loss": 0.7038, "step": 1869 }, { "epoch": 0.27891714520098443, "grad_norm": 1.3073158264160156, "learning_rate": 1.6927868960392698e-05, "loss": 0.6018, "step": 1870 }, { "epoch": 0.27906629875456784, "grad_norm": 1.514514684677124, "learning_rate": 1.692438378462614e-05, "loss": 0.6321, "step": 1871 }, { "epoch": 0.27921545230815126, "grad_norm": 1.4666273593902588, "learning_rate": 1.6920896992317785e-05, "loss": 0.6391, "step": 1872 }, { "epoch": 0.27936460586173467, "grad_norm": 1.4737470149993896, "learning_rate": 1.6917408584281654e-05, "loss": 0.6118, "step": 1873 }, { "epoch": 0.2795137594153181, "grad_norm": 1.5596047639846802, "learning_rate": 1.6913918561332132e-05, "loss": 0.589, "step": 1874 }, { "epoch": 0.2796629129689015, "grad_norm": 1.3901772499084473, "learning_rate": 1.6910426924283993e-05, "loss": 0.5947, "step": 1875 }, { "epoch": 0.2798120665224849, "grad_norm": 1.3241841793060303, "learning_rate": 1.6906933673952375e-05, "loss": 0.5718, "step": 1876 }, { "epoch": 0.27996122007606833, "grad_norm": 1.5135271549224854, "learning_rate": 1.6903438811152803e-05, "loss": 0.6836, "step": 1877 }, { "epoch": 0.28011037362965174, "grad_norm": 1.381235122680664, "learning_rate": 1.6899942336701176e-05, "loss": 0.6324, "step": 1878 }, { "epoch": 0.28025952718323516, "grad_norm": 1.4453009366989136, "learning_rate": 1.6896444251413768e-05, "loss": 0.5686, "step": 1879 }, { "epoch": 0.28040868073681857, "grad_norm": 1.611377239227295, "learning_rate": 1.6892944556107233e-05, "loss": 0.596, "step": 1880 }, { "epoch": 0.280557834290402, "grad_norm": 1.4060300588607788, "learning_rate": 1.688944325159859e-05, "loss": 0.5771, "step": 1881 }, { "epoch": 0.2807069878439854, "grad_norm": 1.5931023359298706, "learning_rate": 1.6885940338705243e-05, "loss": 0.5893, "step": 1882 }, { "epoch": 0.2808561413975688, "grad_norm": 0.9531072974205017, "learning_rate": 1.6882435818244976e-05, "loss": 0.6597, "step": 1883 }, { "epoch": 0.28100529495115223, "grad_norm": 1.382480263710022, "learning_rate": 1.687892969103593e-05, "loss": 0.5917, "step": 1884 }, { "epoch": 0.28115444850473564, "grad_norm": 1.6374940872192383, "learning_rate": 1.6875421957896646e-05, "loss": 0.6518, "step": 1885 }, { "epoch": 0.28130360205831906, "grad_norm": 1.4638607501983643, "learning_rate": 1.6871912619646017e-05, "loss": 0.626, "step": 1886 }, { "epoch": 0.28145275561190247, "grad_norm": 1.3857260942459106, "learning_rate": 1.6868401677103324e-05, "loss": 0.6395, "step": 1887 }, { "epoch": 0.2816019091654859, "grad_norm": 1.5483602285385132, "learning_rate": 1.6864889131088223e-05, "loss": 0.585, "step": 1888 }, { "epoch": 0.2817510627190693, "grad_norm": 1.4890350103378296, "learning_rate": 1.686137498242073e-05, "loss": 0.6345, "step": 1889 }, { "epoch": 0.2819002162726527, "grad_norm": 1.4107059240341187, "learning_rate": 1.6857859231921258e-05, "loss": 0.6531, "step": 1890 }, { "epoch": 0.28204936982623613, "grad_norm": 1.3140944242477417, "learning_rate": 1.6854341880410573e-05, "loss": 0.6574, "step": 1891 }, { "epoch": 0.28219852337981954, "grad_norm": 1.2756398916244507, "learning_rate": 1.6850822928709825e-05, "loss": 0.6183, "step": 1892 }, { "epoch": 0.28234767693340296, "grad_norm": 1.353764533996582, "learning_rate": 1.6847302377640538e-05, "loss": 0.6439, "step": 1893 }, { "epoch": 0.28249683048698637, "grad_norm": 1.6111738681793213, "learning_rate": 1.6843780228024605e-05, "loss": 0.6605, "step": 1894 }, { "epoch": 0.2826459840405698, "grad_norm": 1.3807262182235718, "learning_rate": 1.6840256480684294e-05, "loss": 0.6114, "step": 1895 }, { "epoch": 0.2827951375941532, "grad_norm": 1.2712197303771973, "learning_rate": 1.683673113644225e-05, "loss": 0.5711, "step": 1896 }, { "epoch": 0.2829442911477366, "grad_norm": 1.470528483390808, "learning_rate": 1.683320419612148e-05, "loss": 0.5818, "step": 1897 }, { "epoch": 0.28309344470132003, "grad_norm": 1.3026968240737915, "learning_rate": 1.682967566054538e-05, "loss": 0.5269, "step": 1898 }, { "epoch": 0.28324259825490344, "grad_norm": 1.323801875114441, "learning_rate": 1.6826145530537705e-05, "loss": 0.6237, "step": 1899 }, { "epoch": 0.28339175180848686, "grad_norm": 1.314994215965271, "learning_rate": 1.682261380692259e-05, "loss": 0.6099, "step": 1900 }, { "epoch": 0.2835409053620703, "grad_norm": 1.4150397777557373, "learning_rate": 1.6819080490524527e-05, "loss": 0.67, "step": 1901 }, { "epoch": 0.2836900589156537, "grad_norm": 1.282925009727478, "learning_rate": 1.6815545582168403e-05, "loss": 0.5836, "step": 1902 }, { "epoch": 0.2838392124692371, "grad_norm": 1.6622217893600464, "learning_rate": 1.681200908267946e-05, "loss": 0.6885, "step": 1903 }, { "epoch": 0.2839883660228205, "grad_norm": 1.287517786026001, "learning_rate": 1.680847099288332e-05, "loss": 0.6515, "step": 1904 }, { "epoch": 0.28413751957640393, "grad_norm": 1.2610805034637451, "learning_rate": 1.680493131360597e-05, "loss": 0.617, "step": 1905 }, { "epoch": 0.28428667312998734, "grad_norm": 1.4163181781768799, "learning_rate": 1.680139004567377e-05, "loss": 0.6055, "step": 1906 }, { "epoch": 0.28443582668357076, "grad_norm": 1.4730970859527588, "learning_rate": 1.6797847189913456e-05, "loss": 0.5984, "step": 1907 }, { "epoch": 0.2845849802371542, "grad_norm": 1.3224161863327026, "learning_rate": 1.6794302747152125e-05, "loss": 0.575, "step": 1908 }, { "epoch": 0.2847341337907376, "grad_norm": 1.2813093662261963, "learning_rate": 1.6790756718217252e-05, "loss": 0.5485, "step": 1909 }, { "epoch": 0.284883287344321, "grad_norm": 1.4812794923782349, "learning_rate": 1.6787209103936677e-05, "loss": 0.6053, "step": 1910 }, { "epoch": 0.2850324408979044, "grad_norm": 1.5519534349441528, "learning_rate": 1.6783659905138626e-05, "loss": 0.5881, "step": 1911 }, { "epoch": 0.28518159445148783, "grad_norm": 1.3216958045959473, "learning_rate": 1.6780109122651665e-05, "loss": 0.6957, "step": 1912 }, { "epoch": 0.28533074800507124, "grad_norm": 1.3308093547821045, "learning_rate": 1.677655675730476e-05, "loss": 0.5856, "step": 1913 }, { "epoch": 0.28547990155865466, "grad_norm": 1.49772310256958, "learning_rate": 1.6773002809927228e-05, "loss": 0.6794, "step": 1914 }, { "epoch": 0.2856290551122381, "grad_norm": 1.3824000358581543, "learning_rate": 1.6769447281348757e-05, "loss": 0.595, "step": 1915 }, { "epoch": 0.2857782086658215, "grad_norm": 1.3769811391830444, "learning_rate": 1.676589017239942e-05, "loss": 0.6537, "step": 1916 }, { "epoch": 0.2859273622194049, "grad_norm": 1.265631079673767, "learning_rate": 1.676233148390963e-05, "loss": 0.5479, "step": 1917 }, { "epoch": 0.2860765157729883, "grad_norm": 1.5111699104309082, "learning_rate": 1.6758771216710205e-05, "loss": 0.663, "step": 1918 }, { "epoch": 0.28622566932657173, "grad_norm": 1.3961681127548218, "learning_rate": 1.675520937163229e-05, "loss": 0.5909, "step": 1919 }, { "epoch": 0.28637482288015514, "grad_norm": 1.2213572263717651, "learning_rate": 1.675164594950744e-05, "loss": 0.6356, "step": 1920 }, { "epoch": 0.28652397643373856, "grad_norm": 1.362841010093689, "learning_rate": 1.6748080951167552e-05, "loss": 0.6902, "step": 1921 }, { "epoch": 0.286673129987322, "grad_norm": 1.474843978881836, "learning_rate": 1.6744514377444895e-05, "loss": 0.6409, "step": 1922 }, { "epoch": 0.2868222835409054, "grad_norm": 1.3912980556488037, "learning_rate": 1.674094622917211e-05, "loss": 0.6399, "step": 1923 }, { "epoch": 0.2869714370944888, "grad_norm": 2.0231986045837402, "learning_rate": 1.6737376507182205e-05, "loss": 0.6672, "step": 1924 }, { "epoch": 0.2871205906480722, "grad_norm": 1.452856183052063, "learning_rate": 1.6733805212308553e-05, "loss": 0.6183, "step": 1925 }, { "epoch": 0.28726974420165563, "grad_norm": 1.5779762268066406, "learning_rate": 1.67302323453849e-05, "loss": 0.6219, "step": 1926 }, { "epoch": 0.28741889775523904, "grad_norm": 1.4246139526367188, "learning_rate": 1.6726657907245348e-05, "loss": 0.5808, "step": 1927 }, { "epoch": 0.28756805130882246, "grad_norm": 1.4888684749603271, "learning_rate": 1.6723081898724377e-05, "loss": 0.6255, "step": 1928 }, { "epoch": 0.2877172048624059, "grad_norm": 1.3490139245986938, "learning_rate": 1.6719504320656827e-05, "loss": 0.6379, "step": 1929 }, { "epoch": 0.2878663584159893, "grad_norm": 1.5755207538604736, "learning_rate": 1.671592517387791e-05, "loss": 0.7068, "step": 1930 }, { "epoch": 0.2880155119695727, "grad_norm": 1.2833284139633179, "learning_rate": 1.6712344459223198e-05, "loss": 0.5708, "step": 1931 }, { "epoch": 0.2881646655231561, "grad_norm": 1.2733246088027954, "learning_rate": 1.6708762177528634e-05, "loss": 0.6455, "step": 1932 }, { "epoch": 0.28831381907673953, "grad_norm": 1.2076524496078491, "learning_rate": 1.670517832963052e-05, "loss": 0.5095, "step": 1933 }, { "epoch": 0.28846297263032294, "grad_norm": 1.405225157737732, "learning_rate": 1.670159291636553e-05, "loss": 0.6191, "step": 1934 }, { "epoch": 0.28861212618390636, "grad_norm": 1.2973049879074097, "learning_rate": 1.6698005938570702e-05, "loss": 0.6354, "step": 1935 }, { "epoch": 0.2887612797374898, "grad_norm": 1.7898961305618286, "learning_rate": 1.6694417397083446e-05, "loss": 0.6477, "step": 1936 }, { "epoch": 0.28891043329107313, "grad_norm": 2.252800941467285, "learning_rate": 1.669082729274152e-05, "loss": 0.5175, "step": 1937 }, { "epoch": 0.28905958684465655, "grad_norm": 1.3412672281265259, "learning_rate": 1.6687235626383057e-05, "loss": 0.645, "step": 1938 }, { "epoch": 0.28920874039823996, "grad_norm": 1.59958815574646, "learning_rate": 1.6683642398846563e-05, "loss": 0.6221, "step": 1939 }, { "epoch": 0.2893578939518234, "grad_norm": 1.408883810043335, "learning_rate": 1.6680047610970894e-05, "loss": 0.6168, "step": 1940 }, { "epoch": 0.2895070475054068, "grad_norm": 1.4432262182235718, "learning_rate": 1.6676451263595276e-05, "loss": 0.6113, "step": 1941 }, { "epoch": 0.2896562010589902, "grad_norm": 1.3263152837753296, "learning_rate": 1.6672853357559304e-05, "loss": 0.598, "step": 1942 }, { "epoch": 0.2898053546125736, "grad_norm": 1.6088311672210693, "learning_rate": 1.666925389370293e-05, "loss": 0.6526, "step": 1943 }, { "epoch": 0.28995450816615703, "grad_norm": 1.5223469734191895, "learning_rate": 1.666565287286647e-05, "loss": 0.6883, "step": 1944 }, { "epoch": 0.29010366171974045, "grad_norm": 1.4354796409606934, "learning_rate": 1.6662050295890605e-05, "loss": 0.599, "step": 1945 }, { "epoch": 0.29025281527332386, "grad_norm": 1.510246753692627, "learning_rate": 1.6658446163616376e-05, "loss": 0.6255, "step": 1946 }, { "epoch": 0.2904019688269073, "grad_norm": 1.017948865890503, "learning_rate": 1.6654840476885205e-05, "loss": 0.6663, "step": 1947 }, { "epoch": 0.2905511223804907, "grad_norm": 1.2810256481170654, "learning_rate": 1.665123323653885e-05, "loss": 0.6574, "step": 1948 }, { "epoch": 0.2907002759340741, "grad_norm": 1.5433368682861328, "learning_rate": 1.6647624443419446e-05, "loss": 0.5948, "step": 1949 }, { "epoch": 0.2908494294876575, "grad_norm": 1.32457435131073, "learning_rate": 1.664401409836949e-05, "loss": 0.6619, "step": 1950 }, { "epoch": 0.29099858304124093, "grad_norm": 1.3197888135910034, "learning_rate": 1.6640402202231847e-05, "loss": 0.6523, "step": 1951 }, { "epoch": 0.29114773659482435, "grad_norm": 1.2842531204223633, "learning_rate": 1.6636788755849725e-05, "loss": 0.565, "step": 1952 }, { "epoch": 0.29129689014840776, "grad_norm": 1.5106279850006104, "learning_rate": 1.6633173760066717e-05, "loss": 0.6247, "step": 1953 }, { "epoch": 0.2914460437019912, "grad_norm": 1.2928729057312012, "learning_rate": 1.6629557215726762e-05, "loss": 0.6672, "step": 1954 }, { "epoch": 0.2915951972555746, "grad_norm": 1.2685939073562622, "learning_rate": 1.6625939123674165e-05, "loss": 0.6313, "step": 1955 }, { "epoch": 0.291744350809158, "grad_norm": 1.4165551662445068, "learning_rate": 1.6622319484753595e-05, "loss": 0.6129, "step": 1956 }, { "epoch": 0.2918935043627414, "grad_norm": 1.6441643238067627, "learning_rate": 1.6618698299810078e-05, "loss": 0.6757, "step": 1957 }, { "epoch": 0.29204265791632483, "grad_norm": 1.3934208154678345, "learning_rate": 1.6615075569689005e-05, "loss": 0.6382, "step": 1958 }, { "epoch": 0.29219181146990825, "grad_norm": 1.4095122814178467, "learning_rate": 1.661145129523612e-05, "loss": 0.6529, "step": 1959 }, { "epoch": 0.29234096502349166, "grad_norm": 0.9360311031341553, "learning_rate": 1.660782547729754e-05, "loss": 0.7028, "step": 1960 }, { "epoch": 0.2924901185770751, "grad_norm": 1.4579097032546997, "learning_rate": 1.6604198116719735e-05, "loss": 0.6621, "step": 1961 }, { "epoch": 0.2926392721306585, "grad_norm": 1.4067695140838623, "learning_rate": 1.6600569214349528e-05, "loss": 0.653, "step": 1962 }, { "epoch": 0.2927884256842419, "grad_norm": 1.5427539348602295, "learning_rate": 1.6596938771034116e-05, "loss": 0.5437, "step": 1963 }, { "epoch": 0.2929375792378253, "grad_norm": 1.2800698280334473, "learning_rate": 1.6593306787621052e-05, "loss": 0.6348, "step": 1964 }, { "epoch": 0.29308673279140873, "grad_norm": 1.3839848041534424, "learning_rate": 1.658967326495824e-05, "loss": 0.5821, "step": 1965 }, { "epoch": 0.29323588634499215, "grad_norm": 1.3725221157073975, "learning_rate": 1.658603820389395e-05, "loss": 0.5898, "step": 1966 }, { "epoch": 0.29338503989857556, "grad_norm": 1.4022873640060425, "learning_rate": 1.6582401605276813e-05, "loss": 0.6675, "step": 1967 }, { "epoch": 0.293534193452159, "grad_norm": 1.6462911367416382, "learning_rate": 1.657876346995581e-05, "loss": 0.6733, "step": 1968 }, { "epoch": 0.2936833470057424, "grad_norm": 1.4021949768066406, "learning_rate": 1.65751237987803e-05, "loss": 0.7057, "step": 1969 }, { "epoch": 0.2938325005593258, "grad_norm": 1.2436349391937256, "learning_rate": 1.6571482592599974e-05, "loss": 0.555, "step": 1970 }, { "epoch": 0.2939816541129092, "grad_norm": 1.2626678943634033, "learning_rate": 1.6567839852264898e-05, "loss": 0.6237, "step": 1971 }, { "epoch": 0.29413080766649263, "grad_norm": 1.3758355379104614, "learning_rate": 1.65641955786255e-05, "loss": 0.6691, "step": 1972 }, { "epoch": 0.29427996122007605, "grad_norm": 1.309510588645935, "learning_rate": 1.656054977253255e-05, "loss": 0.5863, "step": 1973 }, { "epoch": 0.29442911477365946, "grad_norm": 1.3538167476654053, "learning_rate": 1.655690243483719e-05, "loss": 0.6023, "step": 1974 }, { "epoch": 0.2945782683272429, "grad_norm": 1.1560258865356445, "learning_rate": 1.6553253566390916e-05, "loss": 0.6058, "step": 1975 }, { "epoch": 0.2947274218808263, "grad_norm": 1.4156017303466797, "learning_rate": 1.6549603168045577e-05, "loss": 0.6371, "step": 1976 }, { "epoch": 0.2948765754344097, "grad_norm": 1.3921419382095337, "learning_rate": 1.6545951240653383e-05, "loss": 0.6131, "step": 1977 }, { "epoch": 0.2950257289879931, "grad_norm": 1.5945065021514893, "learning_rate": 1.6542297785066898e-05, "loss": 0.6924, "step": 1978 }, { "epoch": 0.29517488254157653, "grad_norm": 1.4636839628219604, "learning_rate": 1.6538642802139042e-05, "loss": 0.6068, "step": 1979 }, { "epoch": 0.29532403609515995, "grad_norm": 1.5501056909561157, "learning_rate": 1.65349862927231e-05, "loss": 0.5934, "step": 1980 }, { "epoch": 0.29547318964874336, "grad_norm": 1.5504966974258423, "learning_rate": 1.6531328257672707e-05, "loss": 0.6536, "step": 1981 }, { "epoch": 0.2956223432023268, "grad_norm": 1.3990472555160522, "learning_rate": 1.6527668697841853e-05, "loss": 0.5381, "step": 1982 }, { "epoch": 0.2957714967559102, "grad_norm": 1.5491306781768799, "learning_rate": 1.6524007614084886e-05, "loss": 0.607, "step": 1983 }, { "epoch": 0.2959206503094936, "grad_norm": 2.5545480251312256, "learning_rate": 1.652034500725651e-05, "loss": 0.6559, "step": 1984 }, { "epoch": 0.296069803863077, "grad_norm": 1.260956883430481, "learning_rate": 1.651668087821178e-05, "loss": 0.5556, "step": 1985 }, { "epoch": 0.29621895741666043, "grad_norm": 1.6497353315353394, "learning_rate": 1.6513015227806117e-05, "loss": 0.6037, "step": 1986 }, { "epoch": 0.29636811097024385, "grad_norm": 1.4230433702468872, "learning_rate": 1.6509348056895284e-05, "loss": 0.6203, "step": 1987 }, { "epoch": 0.29651726452382726, "grad_norm": 1.4473177194595337, "learning_rate": 1.650567936633541e-05, "loss": 0.5585, "step": 1988 }, { "epoch": 0.2966664180774107, "grad_norm": 1.5561856031417847, "learning_rate": 1.6502009156982974e-05, "loss": 0.7037, "step": 1989 }, { "epoch": 0.2968155716309941, "grad_norm": 1.5350993871688843, "learning_rate": 1.649833742969481e-05, "loss": 0.6303, "step": 1990 }, { "epoch": 0.2969647251845775, "grad_norm": 1.2315847873687744, "learning_rate": 1.6494664185328103e-05, "loss": 0.5974, "step": 1991 }, { "epoch": 0.2971138787381609, "grad_norm": 1.5069975852966309, "learning_rate": 1.64909894247404e-05, "loss": 0.6472, "step": 1992 }, { "epoch": 0.29726303229174433, "grad_norm": 1.605261206626892, "learning_rate": 1.6487313148789597e-05, "loss": 0.7017, "step": 1993 }, { "epoch": 0.29741218584532775, "grad_norm": 1.766030192375183, "learning_rate": 1.648363535833394e-05, "loss": 0.5656, "step": 1994 }, { "epoch": 0.29756133939891116, "grad_norm": 1.415791630744934, "learning_rate": 1.6479956054232034e-05, "loss": 0.5549, "step": 1995 }, { "epoch": 0.2977104929524946, "grad_norm": 1.9180561304092407, "learning_rate": 1.647627523734284e-05, "loss": 0.6982, "step": 1996 }, { "epoch": 0.297859646506078, "grad_norm": 1.6013078689575195, "learning_rate": 1.6472592908525666e-05, "loss": 0.6662, "step": 1997 }, { "epoch": 0.2980088000596614, "grad_norm": 1.4039103984832764, "learning_rate": 1.6468909068640174e-05, "loss": 0.6132, "step": 1998 }, { "epoch": 0.2981579536132448, "grad_norm": 1.3655917644500732, "learning_rate": 1.6465223718546383e-05, "loss": 0.6657, "step": 1999 }, { "epoch": 0.29830710716682823, "grad_norm": 1.356916904449463, "learning_rate": 1.6461536859104658e-05, "loss": 0.6576, "step": 2000 }, { "epoch": 0.29845626072041165, "grad_norm": 1.275683045387268, "learning_rate": 1.645784849117572e-05, "loss": 0.6598, "step": 2001 }, { "epoch": 0.29860541427399506, "grad_norm": 1.40847647190094, "learning_rate": 1.6454158615620643e-05, "loss": 0.6041, "step": 2002 }, { "epoch": 0.2987545678275785, "grad_norm": 1.5808830261230469, "learning_rate": 1.6450467233300854e-05, "loss": 0.6052, "step": 2003 }, { "epoch": 0.2989037213811619, "grad_norm": 1.2972066402435303, "learning_rate": 1.644677434507813e-05, "loss": 0.6512, "step": 2004 }, { "epoch": 0.2990528749347453, "grad_norm": 1.4889371395111084, "learning_rate": 1.64430799518146e-05, "loss": 0.6868, "step": 2005 }, { "epoch": 0.2992020284883287, "grad_norm": 1.2646896839141846, "learning_rate": 1.643938405437274e-05, "loss": 0.5573, "step": 2006 }, { "epoch": 0.29935118204191213, "grad_norm": 1.277628779411316, "learning_rate": 1.643568665361538e-05, "loss": 0.6353, "step": 2007 }, { "epoch": 0.29950033559549555, "grad_norm": 1.1758439540863037, "learning_rate": 1.6431987750405708e-05, "loss": 0.6307, "step": 2008 }, { "epoch": 0.29964948914907896, "grad_norm": 1.794702410697937, "learning_rate": 1.6428287345607255e-05, "loss": 0.6194, "step": 2009 }, { "epoch": 0.2997986427026624, "grad_norm": 1.3829922676086426, "learning_rate": 1.64245854400839e-05, "loss": 0.6806, "step": 2010 }, { "epoch": 0.2999477962562458, "grad_norm": 1.5591367483139038, "learning_rate": 1.6420882034699882e-05, "loss": 0.6385, "step": 2011 }, { "epoch": 0.3000969498098292, "grad_norm": 1.0107914209365845, "learning_rate": 1.641717713031978e-05, "loss": 0.7008, "step": 2012 }, { "epoch": 0.3002461033634126, "grad_norm": 1.5598164796829224, "learning_rate": 1.6413470727808533e-05, "loss": 0.5335, "step": 2013 }, { "epoch": 0.30039525691699603, "grad_norm": 1.3145413398742676, "learning_rate": 1.6409762828031416e-05, "loss": 0.6217, "step": 2014 }, { "epoch": 0.30054441047057945, "grad_norm": 1.3839796781539917, "learning_rate": 1.6406053431854066e-05, "loss": 0.628, "step": 2015 }, { "epoch": 0.30069356402416286, "grad_norm": 1.3420144319534302, "learning_rate": 1.6402342540142474e-05, "loss": 0.6078, "step": 2016 }, { "epoch": 0.3008427175777463, "grad_norm": 1.3530988693237305, "learning_rate": 1.639863015376296e-05, "loss": 0.6216, "step": 2017 }, { "epoch": 0.3009918711313297, "grad_norm": 1.9502394199371338, "learning_rate": 1.6394916273582208e-05, "loss": 0.5263, "step": 2018 }, { "epoch": 0.3011410246849131, "grad_norm": 1.3343700170516968, "learning_rate": 1.6391200900467245e-05, "loss": 0.6484, "step": 2019 }, { "epoch": 0.3012901782384965, "grad_norm": 1.3432685136795044, "learning_rate": 1.6387484035285456e-05, "loss": 0.5797, "step": 2020 }, { "epoch": 0.30143933179207993, "grad_norm": 1.5705273151397705, "learning_rate": 1.6383765678904563e-05, "loss": 0.6731, "step": 2021 }, { "epoch": 0.30158848534566335, "grad_norm": 1.5122575759887695, "learning_rate": 1.6380045832192634e-05, "loss": 0.5977, "step": 2022 }, { "epoch": 0.30173763889924676, "grad_norm": 1.4025397300720215, "learning_rate": 1.6376324496018096e-05, "loss": 0.6, "step": 2023 }, { "epoch": 0.3018867924528302, "grad_norm": 1.6973533630371094, "learning_rate": 1.6372601671249724e-05, "loss": 0.6483, "step": 2024 }, { "epoch": 0.3020359460064136, "grad_norm": 0.9038329124450684, "learning_rate": 1.636887735875663e-05, "loss": 0.6599, "step": 2025 }, { "epoch": 0.302185099559997, "grad_norm": 1.4274970293045044, "learning_rate": 1.6365151559408276e-05, "loss": 0.6125, "step": 2026 }, { "epoch": 0.3023342531135804, "grad_norm": 1.4704160690307617, "learning_rate": 1.636142427407448e-05, "loss": 0.5466, "step": 2027 }, { "epoch": 0.30248340666716383, "grad_norm": 1.4417363405227661, "learning_rate": 1.6357695503625394e-05, "loss": 0.6, "step": 2028 }, { "epoch": 0.30263256022074725, "grad_norm": 1.3108099699020386, "learning_rate": 1.635396524893153e-05, "loss": 0.6174, "step": 2029 }, { "epoch": 0.30278171377433066, "grad_norm": 2.8962149620056152, "learning_rate": 1.6350233510863736e-05, "loss": 0.7026, "step": 2030 }, { "epoch": 0.3029308673279141, "grad_norm": 1.6879838705062866, "learning_rate": 1.634650029029321e-05, "loss": 0.661, "step": 2031 }, { "epoch": 0.3030800208814975, "grad_norm": 0.8573647141456604, "learning_rate": 1.63427655880915e-05, "loss": 0.645, "step": 2032 }, { "epoch": 0.3032291744350809, "grad_norm": 1.3501299619674683, "learning_rate": 1.633902940513049e-05, "loss": 0.6703, "step": 2033 }, { "epoch": 0.3033783279886643, "grad_norm": 1.5503976345062256, "learning_rate": 1.633529174228242e-05, "loss": 0.6678, "step": 2034 }, { "epoch": 0.30352748154224773, "grad_norm": 1.3818004131317139, "learning_rate": 1.633155260041987e-05, "loss": 0.638, "step": 2035 }, { "epoch": 0.30367663509583115, "grad_norm": 1.6037509441375732, "learning_rate": 1.632781198041577e-05, "loss": 0.6646, "step": 2036 }, { "epoch": 0.30382578864941456, "grad_norm": 1.4805375337600708, "learning_rate": 1.632406988314339e-05, "loss": 0.6178, "step": 2037 }, { "epoch": 0.303974942202998, "grad_norm": 1.21210777759552, "learning_rate": 1.632032630947634e-05, "loss": 0.589, "step": 2038 }, { "epoch": 0.3041240957565814, "grad_norm": 1.3575013875961304, "learning_rate": 1.631658126028859e-05, "loss": 0.6635, "step": 2039 }, { "epoch": 0.3042732493101648, "grad_norm": 1.6390568017959595, "learning_rate": 1.6312834736454446e-05, "loss": 0.6093, "step": 2040 }, { "epoch": 0.3044224028637482, "grad_norm": 0.8544076085090637, "learning_rate": 1.630908673884855e-05, "loss": 0.6876, "step": 2041 }, { "epoch": 0.30457155641733163, "grad_norm": 1.4908703565597534, "learning_rate": 1.63053372683459e-05, "loss": 0.6271, "step": 2042 }, { "epoch": 0.30472070997091505, "grad_norm": 1.3345221281051636, "learning_rate": 1.630158632582184e-05, "loss": 0.5953, "step": 2043 }, { "epoch": 0.30486986352449846, "grad_norm": 0.8308353424072266, "learning_rate": 1.6297833912152043e-05, "loss": 0.6726, "step": 2044 }, { "epoch": 0.3050190170780819, "grad_norm": 1.5146783590316772, "learning_rate": 1.6294080028212532e-05, "loss": 0.6084, "step": 2045 }, { "epoch": 0.3051681706316653, "grad_norm": 1.598583459854126, "learning_rate": 1.629032467487969e-05, "loss": 0.6433, "step": 2046 }, { "epoch": 0.3053173241852487, "grad_norm": 1.4705984592437744, "learning_rate": 1.6286567853030212e-05, "loss": 0.6102, "step": 2047 }, { "epoch": 0.3054664777388321, "grad_norm": 1.192093014717102, "learning_rate": 1.628280956354116e-05, "loss": 0.5604, "step": 2048 }, { "epoch": 0.30561563129241553, "grad_norm": 0.8658578395843506, "learning_rate": 1.6279049807289936e-05, "loss": 0.6531, "step": 2049 }, { "epoch": 0.30576478484599895, "grad_norm": 1.4998868703842163, "learning_rate": 1.6275288585154267e-05, "loss": 0.6389, "step": 2050 }, { "epoch": 0.30591393839958236, "grad_norm": 1.5091336965560913, "learning_rate": 1.6271525898012242e-05, "loss": 0.6114, "step": 2051 }, { "epoch": 0.3060630919531658, "grad_norm": 1.4198334217071533, "learning_rate": 1.626776174674228e-05, "loss": 0.5642, "step": 2052 }, { "epoch": 0.3062122455067492, "grad_norm": 1.7666246891021729, "learning_rate": 1.6263996132223155e-05, "loss": 0.6345, "step": 2053 }, { "epoch": 0.3063613990603326, "grad_norm": 1.66013503074646, "learning_rate": 1.6260229055333962e-05, "loss": 0.5559, "step": 2054 }, { "epoch": 0.306510552613916, "grad_norm": 1.5032237768173218, "learning_rate": 1.625646051695416e-05, "loss": 0.6495, "step": 2055 }, { "epoch": 0.30665970616749944, "grad_norm": 1.4095394611358643, "learning_rate": 1.625269051796353e-05, "loss": 0.6598, "step": 2056 }, { "epoch": 0.30680885972108285, "grad_norm": 1.3262598514556885, "learning_rate": 1.624891905924221e-05, "loss": 0.6901, "step": 2057 }, { "epoch": 0.30695801327466626, "grad_norm": 1.4294100999832153, "learning_rate": 1.6245146141670662e-05, "loss": 0.619, "step": 2058 }, { "epoch": 0.3071071668282497, "grad_norm": 1.382291316986084, "learning_rate": 1.6241371766129707e-05, "loss": 0.6344, "step": 2059 }, { "epoch": 0.3072563203818331, "grad_norm": 1.6078826189041138, "learning_rate": 1.6237595933500495e-05, "loss": 0.6645, "step": 2060 }, { "epoch": 0.3074054739354165, "grad_norm": 1.685223937034607, "learning_rate": 1.6233818644664514e-05, "loss": 0.5813, "step": 2061 }, { "epoch": 0.3075546274889999, "grad_norm": 1.4414054155349731, "learning_rate": 1.6230039900503598e-05, "loss": 0.6487, "step": 2062 }, { "epoch": 0.30770378104258334, "grad_norm": 1.4448586702346802, "learning_rate": 1.6226259701899922e-05, "loss": 0.6313, "step": 2063 }, { "epoch": 0.30785293459616675, "grad_norm": 1.1726714372634888, "learning_rate": 1.622247804973599e-05, "loss": 0.4939, "step": 2064 }, { "epoch": 0.30800208814975016, "grad_norm": 1.4375495910644531, "learning_rate": 1.6218694944894666e-05, "loss": 0.6028, "step": 2065 }, { "epoch": 0.3081512417033336, "grad_norm": 1.2552495002746582, "learning_rate": 1.621491038825913e-05, "loss": 0.6101, "step": 2066 }, { "epoch": 0.308300395256917, "grad_norm": 1.203904628753662, "learning_rate": 1.6211124380712914e-05, "loss": 0.5524, "step": 2067 }, { "epoch": 0.3084495488105004, "grad_norm": 1.4036675691604614, "learning_rate": 1.6207336923139886e-05, "loss": 0.5507, "step": 2068 }, { "epoch": 0.3085987023640838, "grad_norm": 1.6227880716323853, "learning_rate": 1.620354801642425e-05, "loss": 0.5883, "step": 2069 }, { "epoch": 0.30874785591766724, "grad_norm": 1.32687246799469, "learning_rate": 1.6199757661450552e-05, "loss": 0.6012, "step": 2070 }, { "epoch": 0.30889700947125065, "grad_norm": 1.6462887525558472, "learning_rate": 1.6195965859103675e-05, "loss": 0.6961, "step": 2071 }, { "epoch": 0.30904616302483406, "grad_norm": 1.4199628829956055, "learning_rate": 1.6192172610268838e-05, "loss": 0.602, "step": 2072 }, { "epoch": 0.3091953165784175, "grad_norm": 1.6709076166152954, "learning_rate": 1.6188377915831605e-05, "loss": 0.7364, "step": 2073 }, { "epoch": 0.3093444701320009, "grad_norm": 1.377123236656189, "learning_rate": 1.6184581776677864e-05, "loss": 0.5302, "step": 2074 }, { "epoch": 0.3094936236855843, "grad_norm": 1.4669947624206543, "learning_rate": 1.6180784193693852e-05, "loss": 0.6874, "step": 2075 }, { "epoch": 0.3096427772391677, "grad_norm": 1.535884141921997, "learning_rate": 1.617698516776614e-05, "loss": 0.7387, "step": 2076 }, { "epoch": 0.30979193079275114, "grad_norm": 1.2540698051452637, "learning_rate": 1.6173184699781632e-05, "loss": 0.6268, "step": 2077 }, { "epoch": 0.30994108434633455, "grad_norm": 1.4611767530441284, "learning_rate": 1.6169382790627575e-05, "loss": 0.5945, "step": 2078 }, { "epoch": 0.31009023789991796, "grad_norm": 1.4018155336380005, "learning_rate": 1.6165579441191546e-05, "loss": 0.602, "step": 2079 }, { "epoch": 0.3102393914535014, "grad_norm": 1.7503902912139893, "learning_rate": 1.6161774652361463e-05, "loss": 0.6346, "step": 2080 }, { "epoch": 0.3103885450070848, "grad_norm": 1.4635274410247803, "learning_rate": 1.6157968425025577e-05, "loss": 0.5145, "step": 2081 }, { "epoch": 0.3105376985606682, "grad_norm": 1.3850113153457642, "learning_rate": 1.6154160760072478e-05, "loss": 0.6201, "step": 2082 }, { "epoch": 0.3106868521142516, "grad_norm": 1.3110090494155884, "learning_rate": 1.6150351658391086e-05, "loss": 0.5162, "step": 2083 }, { "epoch": 0.31083600566783504, "grad_norm": 1.366493582725525, "learning_rate": 1.6146541120870667e-05, "loss": 0.5726, "step": 2084 }, { "epoch": 0.31098515922141845, "grad_norm": 1.349230408668518, "learning_rate": 1.614272914840081e-05, "loss": 0.6062, "step": 2085 }, { "epoch": 0.31113431277500186, "grad_norm": 1.319385051727295, "learning_rate": 1.6138915741871445e-05, "loss": 0.6037, "step": 2086 }, { "epoch": 0.3112834663285853, "grad_norm": 1.2546027898788452, "learning_rate": 1.6135100902172838e-05, "loss": 0.5681, "step": 2087 }, { "epoch": 0.3114326198821687, "grad_norm": 1.3213942050933838, "learning_rate": 1.6131284630195588e-05, "loss": 0.6359, "step": 2088 }, { "epoch": 0.3115817734357521, "grad_norm": 1.1647599935531616, "learning_rate": 1.6127466926830625e-05, "loss": 0.5719, "step": 2089 }, { "epoch": 0.3117309269893355, "grad_norm": 1.3150192499160767, "learning_rate": 1.6123647792969217e-05, "loss": 0.6033, "step": 2090 }, { "epoch": 0.31188008054291894, "grad_norm": 1.5017974376678467, "learning_rate": 1.6119827229502972e-05, "loss": 0.6969, "step": 2091 }, { "epoch": 0.31202923409650235, "grad_norm": 1.3218584060668945, "learning_rate": 1.611600523732382e-05, "loss": 0.7236, "step": 2092 }, { "epoch": 0.31217838765008576, "grad_norm": 1.430372953414917, "learning_rate": 1.611218181732402e-05, "loss": 0.6343, "step": 2093 }, { "epoch": 0.3123275412036692, "grad_norm": 1.396397590637207, "learning_rate": 1.6108356970396187e-05, "loss": 0.591, "step": 2094 }, { "epoch": 0.3124766947572526, "grad_norm": 1.3597290515899658, "learning_rate": 1.6104530697433258e-05, "loss": 0.6161, "step": 2095 }, { "epoch": 0.312625848310836, "grad_norm": 1.738528847694397, "learning_rate": 1.6100702999328494e-05, "loss": 0.6115, "step": 2096 }, { "epoch": 0.3127750018644194, "grad_norm": 1.5409936904907227, "learning_rate": 1.6096873876975492e-05, "loss": 0.6388, "step": 2097 }, { "epoch": 0.31292415541800284, "grad_norm": 1.4397642612457275, "learning_rate": 1.6093043331268193e-05, "loss": 0.6193, "step": 2098 }, { "epoch": 0.31307330897158625, "grad_norm": 1.3779733180999756, "learning_rate": 1.6089211363100858e-05, "loss": 0.5817, "step": 2099 }, { "epoch": 0.31322246252516966, "grad_norm": 1.5320181846618652, "learning_rate": 1.6085377973368088e-05, "loss": 0.6524, "step": 2100 }, { "epoch": 0.3133716160787531, "grad_norm": 1.50092351436615, "learning_rate": 1.608154316296481e-05, "loss": 0.6065, "step": 2101 }, { "epoch": 0.3135207696323365, "grad_norm": 1.4289485216140747, "learning_rate": 1.6077706932786285e-05, "loss": 0.6457, "step": 2102 }, { "epoch": 0.3136699231859199, "grad_norm": 1.2892022132873535, "learning_rate": 1.6073869283728103e-05, "loss": 0.5822, "step": 2103 }, { "epoch": 0.3138190767395033, "grad_norm": 1.454527735710144, "learning_rate": 1.6070030216686196e-05, "loss": 0.6069, "step": 2104 }, { "epoch": 0.31396823029308674, "grad_norm": 1.2204837799072266, "learning_rate": 1.6066189732556812e-05, "loss": 0.6405, "step": 2105 }, { "epoch": 0.31411738384667015, "grad_norm": 2.0215678215026855, "learning_rate": 1.6062347832236538e-05, "loss": 0.6148, "step": 2106 }, { "epoch": 0.31426653740025356, "grad_norm": 1.5818294286727905, "learning_rate": 1.6058504516622288e-05, "loss": 0.6242, "step": 2107 }, { "epoch": 0.314415690953837, "grad_norm": 1.5675928592681885, "learning_rate": 1.6054659786611314e-05, "loss": 0.6308, "step": 2108 }, { "epoch": 0.3145648445074204, "grad_norm": 1.384768009185791, "learning_rate": 1.6050813643101194e-05, "loss": 0.5996, "step": 2109 }, { "epoch": 0.3147139980610038, "grad_norm": 1.4395204782485962, "learning_rate": 1.6046966086989827e-05, "loss": 0.546, "step": 2110 }, { "epoch": 0.3148631516145872, "grad_norm": 1.538133978843689, "learning_rate": 1.604311711917545e-05, "loss": 0.5318, "step": 2111 }, { "epoch": 0.31501230516817064, "grad_norm": 2.1387391090393066, "learning_rate": 1.6039266740556638e-05, "loss": 0.7279, "step": 2112 }, { "epoch": 0.31516145872175405, "grad_norm": 1.6765025854110718, "learning_rate": 1.6035414952032277e-05, "loss": 0.6389, "step": 2113 }, { "epoch": 0.31531061227533747, "grad_norm": 1.2807096242904663, "learning_rate": 1.6031561754501602e-05, "loss": 0.6129, "step": 2114 }, { "epoch": 0.3154597658289209, "grad_norm": 1.6594388484954834, "learning_rate": 1.6027707148864155e-05, "loss": 0.5594, "step": 2115 }, { "epoch": 0.3156089193825043, "grad_norm": 1.2925529479980469, "learning_rate": 1.6023851136019827e-05, "loss": 0.7021, "step": 2116 }, { "epoch": 0.3157580729360877, "grad_norm": 1.6234654188156128, "learning_rate": 1.601999371686883e-05, "loss": 0.6065, "step": 2117 }, { "epoch": 0.3159072264896711, "grad_norm": 1.3870835304260254, "learning_rate": 1.6016134892311694e-05, "loss": 0.5799, "step": 2118 }, { "epoch": 0.31605638004325454, "grad_norm": 1.5893462896347046, "learning_rate": 1.6012274663249293e-05, "loss": 0.7098, "step": 2119 }, { "epoch": 0.31620553359683795, "grad_norm": 1.8062630891799927, "learning_rate": 1.600841303058282e-05, "loss": 0.6144, "step": 2120 }, { "epoch": 0.31635468715042137, "grad_norm": 1.3190611600875854, "learning_rate": 1.60045499952138e-05, "loss": 0.5385, "step": 2121 }, { "epoch": 0.3165038407040048, "grad_norm": 0.9327918887138367, "learning_rate": 1.6000685558044082e-05, "loss": 0.656, "step": 2122 }, { "epoch": 0.3166529942575882, "grad_norm": 1.3045293092727661, "learning_rate": 1.599681971997584e-05, "loss": 0.47, "step": 2123 }, { "epoch": 0.3168021478111716, "grad_norm": 1.2198277711868286, "learning_rate": 1.599295248191159e-05, "loss": 0.4953, "step": 2124 }, { "epoch": 0.316951301364755, "grad_norm": 1.473901391029358, "learning_rate": 1.5989083844754153e-05, "loss": 0.5927, "step": 2125 }, { "epoch": 0.31710045491833844, "grad_norm": 1.8126953840255737, "learning_rate": 1.5985213809406686e-05, "loss": 0.5857, "step": 2126 }, { "epoch": 0.31724960847192185, "grad_norm": 1.6870522499084473, "learning_rate": 1.5981342376772687e-05, "loss": 0.6602, "step": 2127 }, { "epoch": 0.31739876202550527, "grad_norm": 1.4805234670639038, "learning_rate": 1.597746954775595e-05, "loss": 0.6228, "step": 2128 }, { "epoch": 0.3175479155790887, "grad_norm": 1.3196616172790527, "learning_rate": 1.597359532326062e-05, "loss": 0.626, "step": 2129 }, { "epoch": 0.3176970691326721, "grad_norm": 1.8678332567214966, "learning_rate": 1.5969719704191164e-05, "loss": 0.5887, "step": 2130 }, { "epoch": 0.3178462226862555, "grad_norm": 0.9233201146125793, "learning_rate": 1.596584269145236e-05, "loss": 0.6694, "step": 2131 }, { "epoch": 0.3179953762398389, "grad_norm": 1.6183472871780396, "learning_rate": 1.5961964285949326e-05, "loss": 0.6816, "step": 2132 }, { "epoch": 0.31814452979342234, "grad_norm": 1.651528000831604, "learning_rate": 1.59580844885875e-05, "loss": 0.5808, "step": 2133 }, { "epoch": 0.31829368334700575, "grad_norm": 1.1690278053283691, "learning_rate": 1.5954203300272653e-05, "loss": 0.5855, "step": 2134 }, { "epoch": 0.31844283690058917, "grad_norm": 1.5959882736206055, "learning_rate": 1.5950320721910863e-05, "loss": 0.5433, "step": 2135 }, { "epoch": 0.3185919904541726, "grad_norm": 1.4447853565216064, "learning_rate": 1.5946436754408548e-05, "loss": 0.6089, "step": 2136 }, { "epoch": 0.318741144007756, "grad_norm": 1.4597251415252686, "learning_rate": 1.5942551398672443e-05, "loss": 0.5608, "step": 2137 }, { "epoch": 0.3188902975613394, "grad_norm": 1.572150468826294, "learning_rate": 1.5938664655609612e-05, "loss": 0.5561, "step": 2138 }, { "epoch": 0.3190394511149228, "grad_norm": 1.3787873983383179, "learning_rate": 1.5934776526127437e-05, "loss": 0.6652, "step": 2139 }, { "epoch": 0.31918860466850624, "grad_norm": 1.519724726676941, "learning_rate": 1.5930887011133626e-05, "loss": 0.7341, "step": 2140 }, { "epoch": 0.31933775822208965, "grad_norm": 1.306077003479004, "learning_rate": 1.5926996111536212e-05, "loss": 0.5929, "step": 2141 }, { "epoch": 0.31948691177567307, "grad_norm": 1.1563773155212402, "learning_rate": 1.592310382824356e-05, "loss": 0.6465, "step": 2142 }, { "epoch": 0.3196360653292565, "grad_norm": 1.8401292562484741, "learning_rate": 1.591921016216433e-05, "loss": 0.5994, "step": 2143 }, { "epoch": 0.3197852188828399, "grad_norm": 1.5912971496582031, "learning_rate": 1.591531511420754e-05, "loss": 0.6945, "step": 2144 }, { "epoch": 0.3199343724364233, "grad_norm": 1.3358731269836426, "learning_rate": 1.5911418685282506e-05, "loss": 0.6326, "step": 2145 }, { "epoch": 0.3200835259900067, "grad_norm": 1.313046932220459, "learning_rate": 1.5907520876298872e-05, "loss": 0.6587, "step": 2146 }, { "epoch": 0.32023267954359014, "grad_norm": 1.2921274900436401, "learning_rate": 1.5903621688166614e-05, "loss": 0.6403, "step": 2147 }, { "epoch": 0.32038183309717355, "grad_norm": 0.9021490216255188, "learning_rate": 1.589972112179602e-05, "loss": 0.6782, "step": 2148 }, { "epoch": 0.32053098665075697, "grad_norm": 1.687164545059204, "learning_rate": 1.58958191780977e-05, "loss": 0.6049, "step": 2149 }, { "epoch": 0.3206801402043404, "grad_norm": 1.2924014329910278, "learning_rate": 1.5891915857982583e-05, "loss": 0.691, "step": 2150 }, { "epoch": 0.3208292937579238, "grad_norm": 1.3321958780288696, "learning_rate": 1.588801116236194e-05, "loss": 0.6905, "step": 2151 }, { "epoch": 0.3209784473115072, "grad_norm": 1.365463137626648, "learning_rate": 1.5884105092147328e-05, "loss": 0.5929, "step": 2152 }, { "epoch": 0.3211276008650906, "grad_norm": 1.190185785293579, "learning_rate": 1.5880197648250658e-05, "loss": 0.669, "step": 2153 }, { "epoch": 0.32127675441867404, "grad_norm": 0.9288480281829834, "learning_rate": 1.587628883158414e-05, "loss": 0.6639, "step": 2154 }, { "epoch": 0.32142590797225745, "grad_norm": 1.9520463943481445, "learning_rate": 1.587237864306032e-05, "loss": 0.6603, "step": 2155 }, { "epoch": 0.32157506152584087, "grad_norm": 1.4933278560638428, "learning_rate": 1.5868467083592044e-05, "loss": 0.5773, "step": 2156 }, { "epoch": 0.3217242150794243, "grad_norm": 1.389356017112732, "learning_rate": 1.5864554154092503e-05, "loss": 0.6154, "step": 2157 }, { "epoch": 0.3218733686330077, "grad_norm": 1.5184823274612427, "learning_rate": 1.5860639855475194e-05, "loss": 0.6538, "step": 2158 }, { "epoch": 0.3220225221865911, "grad_norm": 1.3634299039840698, "learning_rate": 1.5856724188653928e-05, "loss": 0.6647, "step": 2159 }, { "epoch": 0.3221716757401745, "grad_norm": 1.3298825025558472, "learning_rate": 1.585280715454285e-05, "loss": 0.5747, "step": 2160 }, { "epoch": 0.32232082929375794, "grad_norm": 1.618520975112915, "learning_rate": 1.5848888754056408e-05, "loss": 0.6136, "step": 2161 }, { "epoch": 0.32246998284734135, "grad_norm": 1.5712827444076538, "learning_rate": 1.584496898810939e-05, "loss": 0.6293, "step": 2162 }, { "epoch": 0.32261913640092477, "grad_norm": 1.2945003509521484, "learning_rate": 1.5841047857616876e-05, "loss": 0.6632, "step": 2163 }, { "epoch": 0.3227682899545082, "grad_norm": 1.4081289768218994, "learning_rate": 1.583712536349429e-05, "loss": 0.5903, "step": 2164 }, { "epoch": 0.3229174435080916, "grad_norm": 1.2859951257705688, "learning_rate": 1.583320150665736e-05, "loss": 0.5804, "step": 2165 }, { "epoch": 0.323066597061675, "grad_norm": 1.5365771055221558, "learning_rate": 1.5829276288022138e-05, "loss": 0.5772, "step": 2166 }, { "epoch": 0.3232157506152584, "grad_norm": 1.4201042652130127, "learning_rate": 1.5825349708504988e-05, "loss": 0.6414, "step": 2167 }, { "epoch": 0.32336490416884184, "grad_norm": 1.3273671865463257, "learning_rate": 1.5821421769022593e-05, "loss": 0.6656, "step": 2168 }, { "epoch": 0.32351405772242525, "grad_norm": 1.2589359283447266, "learning_rate": 1.5817492470491962e-05, "loss": 0.6288, "step": 2169 }, { "epoch": 0.32366321127600867, "grad_norm": 1.531293272972107, "learning_rate": 1.581356181383041e-05, "loss": 0.6683, "step": 2170 }, { "epoch": 0.3238123648295921, "grad_norm": 1.465621829032898, "learning_rate": 1.5809629799955576e-05, "loss": 0.5838, "step": 2171 }, { "epoch": 0.3239615183831755, "grad_norm": 1.4013937711715698, "learning_rate": 1.5805696429785414e-05, "loss": 0.5927, "step": 2172 }, { "epoch": 0.3241106719367589, "grad_norm": 1.5326961278915405, "learning_rate": 1.5801761704238197e-05, "loss": 0.5699, "step": 2173 }, { "epoch": 0.3242598254903423, "grad_norm": 1.4705685377120972, "learning_rate": 1.5797825624232506e-05, "loss": 0.6719, "step": 2174 }, { "epoch": 0.32440897904392574, "grad_norm": 1.5924772024154663, "learning_rate": 1.5793888190687247e-05, "loss": 0.6348, "step": 2175 }, { "epoch": 0.32455813259750915, "grad_norm": 1.8827167749404907, "learning_rate": 1.578994940452164e-05, "loss": 0.6709, "step": 2176 }, { "epoch": 0.32470728615109257, "grad_norm": 1.3628208637237549, "learning_rate": 1.578600926665522e-05, "loss": 0.5786, "step": 2177 }, { "epoch": 0.324856439704676, "grad_norm": 0.9932774901390076, "learning_rate": 1.5782067778007835e-05, "loss": 0.6641, "step": 2178 }, { "epoch": 0.3250055932582594, "grad_norm": 1.3363542556762695, "learning_rate": 1.5778124939499654e-05, "loss": 0.5732, "step": 2179 }, { "epoch": 0.3251547468118428, "grad_norm": 1.4842033386230469, "learning_rate": 1.5774180752051152e-05, "loss": 0.6802, "step": 2180 }, { "epoch": 0.3253039003654262, "grad_norm": 1.5396174192428589, "learning_rate": 1.5770235216583136e-05, "loss": 0.7001, "step": 2181 }, { "epoch": 0.32545305391900964, "grad_norm": 1.5729281902313232, "learning_rate": 1.5766288334016705e-05, "loss": 0.5413, "step": 2182 }, { "epoch": 0.32560220747259305, "grad_norm": 1.4223219156265259, "learning_rate": 1.576234010527329e-05, "loss": 0.6069, "step": 2183 }, { "epoch": 0.32575136102617647, "grad_norm": 1.4502803087234497, "learning_rate": 1.575839053127463e-05, "loss": 0.5689, "step": 2184 }, { "epoch": 0.3259005145797599, "grad_norm": 1.4351332187652588, "learning_rate": 1.5754439612942774e-05, "loss": 0.5729, "step": 2185 }, { "epoch": 0.3260496681333433, "grad_norm": 0.9932023286819458, "learning_rate": 1.5750487351200096e-05, "loss": 0.668, "step": 2186 }, { "epoch": 0.3261988216869267, "grad_norm": 1.748258113861084, "learning_rate": 1.5746533746969275e-05, "loss": 0.6269, "step": 2187 }, { "epoch": 0.3263479752405101, "grad_norm": 1.3141412734985352, "learning_rate": 1.57425788011733e-05, "loss": 0.5916, "step": 2188 }, { "epoch": 0.32649712879409354, "grad_norm": 1.3008859157562256, "learning_rate": 1.5738622514735483e-05, "loss": 0.5999, "step": 2189 }, { "epoch": 0.32664628234767695, "grad_norm": 1.4347164630889893, "learning_rate": 1.5734664888579448e-05, "loss": 0.6482, "step": 2190 }, { "epoch": 0.32679543590126037, "grad_norm": 1.2656762599945068, "learning_rate": 1.5730705923629116e-05, "loss": 0.5698, "step": 2191 }, { "epoch": 0.3269445894548438, "grad_norm": 1.3130953311920166, "learning_rate": 1.572674562080875e-05, "loss": 0.5738, "step": 2192 }, { "epoch": 0.3270937430084272, "grad_norm": 1.2200802564620972, "learning_rate": 1.5722783981042892e-05, "loss": 0.6393, "step": 2193 }, { "epoch": 0.3272428965620106, "grad_norm": 1.3627034425735474, "learning_rate": 1.571882100525642e-05, "loss": 0.5521, "step": 2194 }, { "epoch": 0.327392050115594, "grad_norm": 1.6698126792907715, "learning_rate": 1.5714856694374514e-05, "loss": 0.6071, "step": 2195 }, { "epoch": 0.32754120366917744, "grad_norm": 1.317511796951294, "learning_rate": 1.5710891049322672e-05, "loss": 0.6864, "step": 2196 }, { "epoch": 0.32769035722276085, "grad_norm": 1.517012596130371, "learning_rate": 1.5706924071026693e-05, "loss": 0.637, "step": 2197 }, { "epoch": 0.32783951077634427, "grad_norm": 1.3912756443023682, "learning_rate": 1.57029557604127e-05, "loss": 0.6567, "step": 2198 }, { "epoch": 0.3279886643299277, "grad_norm": 1.3597909212112427, "learning_rate": 1.5698986118407113e-05, "loss": 0.6462, "step": 2199 }, { "epoch": 0.3281378178835111, "grad_norm": 5.05464506149292, "learning_rate": 1.569501514593668e-05, "loss": 0.6522, "step": 2200 }, { "epoch": 0.3282869714370945, "grad_norm": 1.3558425903320312, "learning_rate": 1.569104284392844e-05, "loss": 0.6204, "step": 2201 }, { "epoch": 0.3284361249906779, "grad_norm": 1.2119511365890503, "learning_rate": 1.568706921330976e-05, "loss": 0.5646, "step": 2202 }, { "epoch": 0.32858527854426134, "grad_norm": 1.3483299016952515, "learning_rate": 1.5683094255008304e-05, "loss": 0.6148, "step": 2203 }, { "epoch": 0.32873443209784475, "grad_norm": 1.291448712348938, "learning_rate": 1.5679117969952055e-05, "loss": 0.5512, "step": 2204 }, { "epoch": 0.32888358565142817, "grad_norm": 1.3024290800094604, "learning_rate": 1.5675140359069302e-05, "loss": 0.6591, "step": 2205 }, { "epoch": 0.3290327392050116, "grad_norm": 1.4912421703338623, "learning_rate": 1.5671161423288642e-05, "loss": 0.6027, "step": 2206 }, { "epoch": 0.329181892758595, "grad_norm": 1.3645119667053223, "learning_rate": 1.566718116353898e-05, "loss": 0.6307, "step": 2207 }, { "epoch": 0.3293310463121784, "grad_norm": 1.2648485898971558, "learning_rate": 1.5663199580749543e-05, "loss": 0.5582, "step": 2208 }, { "epoch": 0.3294801998657618, "grad_norm": 1.1844173669815063, "learning_rate": 1.565921667584985e-05, "loss": 0.6139, "step": 2209 }, { "epoch": 0.32962935341934524, "grad_norm": 1.2767449617385864, "learning_rate": 1.5655232449769738e-05, "loss": 0.5888, "step": 2210 }, { "epoch": 0.32977850697292865, "grad_norm": 1.3845880031585693, "learning_rate": 1.5651246903439344e-05, "loss": 0.5942, "step": 2211 }, { "epoch": 0.32992766052651207, "grad_norm": 1.4451779127120972, "learning_rate": 1.564726003778913e-05, "loss": 0.6096, "step": 2212 }, { "epoch": 0.3300768140800955, "grad_norm": 1.331573247909546, "learning_rate": 1.5643271853749848e-05, "loss": 0.6242, "step": 2213 }, { "epoch": 0.3302259676336789, "grad_norm": 1.5943613052368164, "learning_rate": 1.5639282352252568e-05, "loss": 0.6919, "step": 2214 }, { "epoch": 0.3303751211872623, "grad_norm": 2.258574962615967, "learning_rate": 1.563529153422866e-05, "loss": 0.6474, "step": 2215 }, { "epoch": 0.3305242747408457, "grad_norm": 1.591476559638977, "learning_rate": 1.563129940060981e-05, "loss": 0.5128, "step": 2216 }, { "epoch": 0.33067342829442914, "grad_norm": 1.488922119140625, "learning_rate": 1.562730595232801e-05, "loss": 0.5434, "step": 2217 }, { "epoch": 0.33082258184801255, "grad_norm": 1.4708597660064697, "learning_rate": 1.5623311190315554e-05, "loss": 0.6527, "step": 2218 }, { "epoch": 0.33097173540159597, "grad_norm": 1.9927445650100708, "learning_rate": 1.5619315115505037e-05, "loss": 0.6703, "step": 2219 }, { "epoch": 0.3311208889551794, "grad_norm": 2.546950101852417, "learning_rate": 1.5615317728829383e-05, "loss": 0.5799, "step": 2220 }, { "epoch": 0.3312700425087628, "grad_norm": 1.4078491926193237, "learning_rate": 1.5611319031221793e-05, "loss": 0.598, "step": 2221 }, { "epoch": 0.3314191960623462, "grad_norm": 1.3758666515350342, "learning_rate": 1.5607319023615798e-05, "loss": 0.5901, "step": 2222 }, { "epoch": 0.3315683496159296, "grad_norm": 1.4458343982696533, "learning_rate": 1.5603317706945224e-05, "loss": 0.6948, "step": 2223 }, { "epoch": 0.33171750316951304, "grad_norm": 1.4096457958221436, "learning_rate": 1.55993150821442e-05, "loss": 0.6723, "step": 2224 }, { "epoch": 0.33186665672309645, "grad_norm": 1.3177330493927002, "learning_rate": 1.5595311150147167e-05, "loss": 0.5741, "step": 2225 }, { "epoch": 0.33201581027667987, "grad_norm": 1.72636878490448, "learning_rate": 1.5591305911888876e-05, "loss": 0.5753, "step": 2226 }, { "epoch": 0.3321649638302633, "grad_norm": 1.6341135501861572, "learning_rate": 1.5587299368304362e-05, "loss": 0.7051, "step": 2227 }, { "epoch": 0.3323141173838467, "grad_norm": 1.425805687904358, "learning_rate": 1.558329152032898e-05, "loss": 0.6321, "step": 2228 }, { "epoch": 0.3324632709374301, "grad_norm": 1.2926446199417114, "learning_rate": 1.55792823688984e-05, "loss": 0.6562, "step": 2229 }, { "epoch": 0.3326124244910135, "grad_norm": 1.5072052478790283, "learning_rate": 1.5575271914948575e-05, "loss": 0.5614, "step": 2230 }, { "epoch": 0.33276157804459694, "grad_norm": 1.2701880931854248, "learning_rate": 1.557126015941577e-05, "loss": 0.5539, "step": 2231 }, { "epoch": 0.33291073159818035, "grad_norm": 1.344932198524475, "learning_rate": 1.5567247103236556e-05, "loss": 0.6135, "step": 2232 }, { "epoch": 0.33305988515176377, "grad_norm": 1.3560121059417725, "learning_rate": 1.5563232747347813e-05, "loss": 0.6147, "step": 2233 }, { "epoch": 0.3332090387053472, "grad_norm": 1.449654459953308, "learning_rate": 1.555921709268671e-05, "loss": 0.5157, "step": 2234 }, { "epoch": 0.33335819225893054, "grad_norm": 1.3567866086959839, "learning_rate": 1.5555200140190732e-05, "loss": 0.513, "step": 2235 }, { "epoch": 0.33350734581251396, "grad_norm": 1.9360895156860352, "learning_rate": 1.555118189079766e-05, "loss": 0.6039, "step": 2236 }, { "epoch": 0.33365649936609737, "grad_norm": 1.2896267175674438, "learning_rate": 1.5547162345445584e-05, "loss": 0.5703, "step": 2237 }, { "epoch": 0.3338056529196808, "grad_norm": 1.585139274597168, "learning_rate": 1.5543141505072888e-05, "loss": 0.6176, "step": 2238 }, { "epoch": 0.3339548064732642, "grad_norm": 1.3787919282913208, "learning_rate": 1.5539119370618267e-05, "loss": 0.6815, "step": 2239 }, { "epoch": 0.3341039600268476, "grad_norm": 1.2322299480438232, "learning_rate": 1.553509594302071e-05, "loss": 0.6212, "step": 2240 }, { "epoch": 0.334253113580431, "grad_norm": 1.3813562393188477, "learning_rate": 1.5531071223219513e-05, "loss": 0.602, "step": 2241 }, { "epoch": 0.33440226713401444, "grad_norm": 1.3897764682769775, "learning_rate": 1.5527045212154274e-05, "loss": 0.5871, "step": 2242 }, { "epoch": 0.33455142068759786, "grad_norm": 1.380163550376892, "learning_rate": 1.5523017910764892e-05, "loss": 0.7093, "step": 2243 }, { "epoch": 0.33470057424118127, "grad_norm": 1.698799729347229, "learning_rate": 1.5518989319991563e-05, "loss": 0.5194, "step": 2244 }, { "epoch": 0.3348497277947647, "grad_norm": 1.441102147102356, "learning_rate": 1.551495944077479e-05, "loss": 0.6314, "step": 2245 }, { "epoch": 0.3349988813483481, "grad_norm": 1.2942289113998413, "learning_rate": 1.5510928274055373e-05, "loss": 0.6371, "step": 2246 }, { "epoch": 0.3351480349019315, "grad_norm": 1.291725516319275, "learning_rate": 1.5506895820774416e-05, "loss": 0.6684, "step": 2247 }, { "epoch": 0.3352971884555149, "grad_norm": 1.4306926727294922, "learning_rate": 1.550286208187332e-05, "loss": 0.6492, "step": 2248 }, { "epoch": 0.33544634200909834, "grad_norm": 1.607772946357727, "learning_rate": 1.5498827058293785e-05, "loss": 0.5818, "step": 2249 }, { "epoch": 0.33559549556268176, "grad_norm": 1.5156835317611694, "learning_rate": 1.5494790750977814e-05, "loss": 0.5802, "step": 2250 }, { "epoch": 0.33574464911626517, "grad_norm": 1.2292872667312622, "learning_rate": 1.549075316086771e-05, "loss": 0.5559, "step": 2251 }, { "epoch": 0.3358938026698486, "grad_norm": 1.494850993156433, "learning_rate": 1.5486714288906072e-05, "loss": 0.6231, "step": 2252 }, { "epoch": 0.336042956223432, "grad_norm": 1.57041335105896, "learning_rate": 1.5482674136035804e-05, "loss": 0.6219, "step": 2253 }, { "epoch": 0.3361921097770154, "grad_norm": 1.5680615901947021, "learning_rate": 1.5478632703200104e-05, "loss": 0.638, "step": 2254 }, { "epoch": 0.3363412633305988, "grad_norm": 1.3825570344924927, "learning_rate": 1.5474589991342468e-05, "loss": 0.6436, "step": 2255 }, { "epoch": 0.33649041688418224, "grad_norm": 1.3928557634353638, "learning_rate": 1.5470546001406698e-05, "loss": 0.6353, "step": 2256 }, { "epoch": 0.33663957043776566, "grad_norm": 1.5789309740066528, "learning_rate": 1.5466500734336886e-05, "loss": 0.7337, "step": 2257 }, { "epoch": 0.33678872399134907, "grad_norm": 1.5988726615905762, "learning_rate": 1.5462454191077427e-05, "loss": 0.5976, "step": 2258 }, { "epoch": 0.3369378775449325, "grad_norm": 1.5231906175613403, "learning_rate": 1.5458406372573006e-05, "loss": 0.6627, "step": 2259 }, { "epoch": 0.3370870310985159, "grad_norm": 1.284934163093567, "learning_rate": 1.5454357279768624e-05, "loss": 0.5818, "step": 2260 }, { "epoch": 0.3372361846520993, "grad_norm": 2.205129861831665, "learning_rate": 1.5450306913609557e-05, "loss": 0.5917, "step": 2261 }, { "epoch": 0.3373853382056827, "grad_norm": 1.7805219888687134, "learning_rate": 1.5446255275041398e-05, "loss": 0.6017, "step": 2262 }, { "epoch": 0.33753449175926614, "grad_norm": 1.317962646484375, "learning_rate": 1.5442202365010022e-05, "loss": 0.6204, "step": 2263 }, { "epoch": 0.33768364531284956, "grad_norm": 1.639054536819458, "learning_rate": 1.5438148184461606e-05, "loss": 0.6062, "step": 2264 }, { "epoch": 0.33783279886643297, "grad_norm": 1.429862380027771, "learning_rate": 1.543409273434263e-05, "loss": 0.684, "step": 2265 }, { "epoch": 0.3379819524200164, "grad_norm": 1.239419937133789, "learning_rate": 1.543003601559986e-05, "loss": 0.6536, "step": 2266 }, { "epoch": 0.3381311059735998, "grad_norm": 1.6145957708358765, "learning_rate": 1.5425978029180367e-05, "loss": 0.5773, "step": 2267 }, { "epoch": 0.3382802595271832, "grad_norm": 1.2152186632156372, "learning_rate": 1.5421918776031506e-05, "loss": 0.644, "step": 2268 }, { "epoch": 0.33842941308076663, "grad_norm": 4.232125282287598, "learning_rate": 1.5417858257100946e-05, "loss": 0.5612, "step": 2269 }, { "epoch": 0.33857856663435004, "grad_norm": 1.4625729322433472, "learning_rate": 1.5413796473336635e-05, "loss": 0.6273, "step": 2270 }, { "epoch": 0.33872772018793346, "grad_norm": 1.4437870979309082, "learning_rate": 1.5409733425686822e-05, "loss": 0.631, "step": 2271 }, { "epoch": 0.33887687374151687, "grad_norm": 1.426066279411316, "learning_rate": 1.5405669115100057e-05, "loss": 0.6296, "step": 2272 }, { "epoch": 0.3390260272951003, "grad_norm": 1.462817668914795, "learning_rate": 1.5401603542525172e-05, "loss": 0.7085, "step": 2273 }, { "epoch": 0.3391751808486837, "grad_norm": 1.6982699632644653, "learning_rate": 1.5397536708911308e-05, "loss": 0.6777, "step": 2274 }, { "epoch": 0.3393243344022671, "grad_norm": 1.3241850137710571, "learning_rate": 1.5393468615207887e-05, "loss": 0.5796, "step": 2275 }, { "epoch": 0.33947348795585053, "grad_norm": 1.2258719205856323, "learning_rate": 1.5389399262364636e-05, "loss": 0.7374, "step": 2276 }, { "epoch": 0.33962264150943394, "grad_norm": 1.1302882432937622, "learning_rate": 1.538532865133157e-05, "loss": 0.6047, "step": 2277 }, { "epoch": 0.33977179506301736, "grad_norm": 1.277732014656067, "learning_rate": 1.5381256783059e-05, "loss": 0.6926, "step": 2278 }, { "epoch": 0.33992094861660077, "grad_norm": 1.2619059085845947, "learning_rate": 1.537718365849753e-05, "loss": 0.6235, "step": 2279 }, { "epoch": 0.3400701021701842, "grad_norm": 1.250474452972412, "learning_rate": 1.5373109278598055e-05, "loss": 0.5613, "step": 2280 }, { "epoch": 0.3402192557237676, "grad_norm": 1.641465425491333, "learning_rate": 1.5369033644311768e-05, "loss": 0.4994, "step": 2281 }, { "epoch": 0.340368409277351, "grad_norm": 1.3159593343734741, "learning_rate": 1.536495675659015e-05, "loss": 0.5908, "step": 2282 }, { "epoch": 0.34051756283093443, "grad_norm": 1.3462785482406616, "learning_rate": 1.5360878616384975e-05, "loss": 0.6461, "step": 2283 }, { "epoch": 0.34066671638451784, "grad_norm": 1.4956661462783813, "learning_rate": 1.5356799224648312e-05, "loss": 0.6273, "step": 2284 }, { "epoch": 0.34081586993810126, "grad_norm": 1.4044474363327026, "learning_rate": 1.5352718582332524e-05, "loss": 0.6004, "step": 2285 }, { "epoch": 0.34096502349168467, "grad_norm": 1.2347677946090698, "learning_rate": 1.534863669039026e-05, "loss": 0.5643, "step": 2286 }, { "epoch": 0.3411141770452681, "grad_norm": 1.305984377861023, "learning_rate": 1.5344553549774466e-05, "loss": 0.6412, "step": 2287 }, { "epoch": 0.3412633305988515, "grad_norm": 1.2377548217773438, "learning_rate": 1.534046916143838e-05, "loss": 0.5389, "step": 2288 }, { "epoch": 0.3414124841524349, "grad_norm": 1.4191210269927979, "learning_rate": 1.5336383526335517e-05, "loss": 0.6434, "step": 2289 }, { "epoch": 0.34156163770601833, "grad_norm": 1.663354754447937, "learning_rate": 1.5332296645419707e-05, "loss": 0.6924, "step": 2290 }, { "epoch": 0.34171079125960174, "grad_norm": 1.421285629272461, "learning_rate": 1.5328208519645052e-05, "loss": 0.6121, "step": 2291 }, { "epoch": 0.34185994481318516, "grad_norm": 1.334591269493103, "learning_rate": 1.5324119149965957e-05, "loss": 0.6269, "step": 2292 }, { "epoch": 0.34200909836676857, "grad_norm": 1.9982657432556152, "learning_rate": 1.5320028537337108e-05, "loss": 0.6413, "step": 2293 }, { "epoch": 0.342158251920352, "grad_norm": 1.386319875717163, "learning_rate": 1.531593668271348e-05, "loss": 0.5723, "step": 2294 }, { "epoch": 0.3423074054739354, "grad_norm": 1.3551024198532104, "learning_rate": 1.5311843587050352e-05, "loss": 0.6379, "step": 2295 }, { "epoch": 0.3424565590275188, "grad_norm": 1.293383240699768, "learning_rate": 1.5307749251303278e-05, "loss": 0.5231, "step": 2296 }, { "epoch": 0.34260571258110223, "grad_norm": 1.3882404565811157, "learning_rate": 1.5303653676428106e-05, "loss": 0.6068, "step": 2297 }, { "epoch": 0.34275486613468564, "grad_norm": 1.5707648992538452, "learning_rate": 1.529955686338098e-05, "loss": 0.5235, "step": 2298 }, { "epoch": 0.34290401968826906, "grad_norm": 1.3145161867141724, "learning_rate": 1.529545881311832e-05, "loss": 0.5351, "step": 2299 }, { "epoch": 0.34305317324185247, "grad_norm": 1.4600378274917603, "learning_rate": 1.529135952659684e-05, "loss": 0.5835, "step": 2300 }, { "epoch": 0.3432023267954359, "grad_norm": 1.3371648788452148, "learning_rate": 1.528725900477356e-05, "loss": 0.5854, "step": 2301 }, { "epoch": 0.3433514803490193, "grad_norm": 1.4696460962295532, "learning_rate": 1.5283157248605758e-05, "loss": 0.6155, "step": 2302 }, { "epoch": 0.3435006339026027, "grad_norm": 1.4553134441375732, "learning_rate": 1.5279054259051022e-05, "loss": 0.5996, "step": 2303 }, { "epoch": 0.34364978745618613, "grad_norm": 1.6291544437408447, "learning_rate": 1.527495003706722e-05, "loss": 0.5951, "step": 2304 }, { "epoch": 0.34379894100976954, "grad_norm": 1.4386537075042725, "learning_rate": 1.5270844583612507e-05, "loss": 0.7008, "step": 2305 }, { "epoch": 0.34394809456335296, "grad_norm": 1.3455911874771118, "learning_rate": 1.5266737899645333e-05, "loss": 0.5523, "step": 2306 }, { "epoch": 0.34409724811693637, "grad_norm": 1.4461076259613037, "learning_rate": 1.5262629986124422e-05, "loss": 0.5824, "step": 2307 }, { "epoch": 0.3442464016705198, "grad_norm": 1.5062274932861328, "learning_rate": 1.5258520844008797e-05, "loss": 0.6453, "step": 2308 }, { "epoch": 0.3443955552241032, "grad_norm": 1.2667440176010132, "learning_rate": 1.5254410474257765e-05, "loss": 0.5628, "step": 2309 }, { "epoch": 0.3445447087776866, "grad_norm": 1.4169492721557617, "learning_rate": 1.5250298877830916e-05, "loss": 0.594, "step": 2310 }, { "epoch": 0.34469386233127003, "grad_norm": 0.8783372044563293, "learning_rate": 1.5246186055688128e-05, "loss": 0.6577, "step": 2311 }, { "epoch": 0.34484301588485344, "grad_norm": 1.4177353382110596, "learning_rate": 1.5242072008789564e-05, "loss": 0.74, "step": 2312 }, { "epoch": 0.34499216943843686, "grad_norm": 1.166412115097046, "learning_rate": 1.5237956738095681e-05, "loss": 0.5465, "step": 2313 }, { "epoch": 0.34514132299202027, "grad_norm": 1.5208103656768799, "learning_rate": 1.5233840244567208e-05, "loss": 0.6723, "step": 2314 }, { "epoch": 0.3452904765456037, "grad_norm": 1.4723765850067139, "learning_rate": 1.5229722529165175e-05, "loss": 0.6852, "step": 2315 }, { "epoch": 0.3454396300991871, "grad_norm": 1.60991632938385, "learning_rate": 1.5225603592850881e-05, "loss": 0.7271, "step": 2316 }, { "epoch": 0.3455887836527705, "grad_norm": 1.3115088939666748, "learning_rate": 1.5221483436585923e-05, "loss": 0.6087, "step": 2317 }, { "epoch": 0.34573793720635393, "grad_norm": 1.4922658205032349, "learning_rate": 1.5217362061332176e-05, "loss": 0.5932, "step": 2318 }, { "epoch": 0.34588709075993734, "grad_norm": 1.4017760753631592, "learning_rate": 1.5213239468051801e-05, "loss": 0.6248, "step": 2319 }, { "epoch": 0.34603624431352076, "grad_norm": 1.512686014175415, "learning_rate": 1.5209115657707247e-05, "loss": 0.5529, "step": 2320 }, { "epoch": 0.34618539786710417, "grad_norm": 1.341679334640503, "learning_rate": 1.520499063126124e-05, "loss": 0.6472, "step": 2321 }, { "epoch": 0.3463345514206876, "grad_norm": 1.353592038154602, "learning_rate": 1.5200864389676793e-05, "loss": 0.658, "step": 2322 }, { "epoch": 0.346483704974271, "grad_norm": 1.3081461191177368, "learning_rate": 1.5196736933917211e-05, "loss": 0.6106, "step": 2323 }, { "epoch": 0.3466328585278544, "grad_norm": 1.4033993482589722, "learning_rate": 1.519260826494607e-05, "loss": 0.6428, "step": 2324 }, { "epoch": 0.34678201208143783, "grad_norm": 1.7482832670211792, "learning_rate": 1.518847838372723e-05, "loss": 0.652, "step": 2325 }, { "epoch": 0.34693116563502124, "grad_norm": 1.3051584959030151, "learning_rate": 1.5184347291224843e-05, "loss": 0.6023, "step": 2326 }, { "epoch": 0.34708031918860466, "grad_norm": 2.006012439727783, "learning_rate": 1.5180214988403343e-05, "loss": 0.4882, "step": 2327 }, { "epoch": 0.34722947274218807, "grad_norm": 1.334136962890625, "learning_rate": 1.5176081476227436e-05, "loss": 0.6064, "step": 2328 }, { "epoch": 0.3473786262957715, "grad_norm": 1.3082942962646484, "learning_rate": 1.5171946755662116e-05, "loss": 0.4869, "step": 2329 }, { "epoch": 0.3475277798493549, "grad_norm": 1.342846155166626, "learning_rate": 1.5167810827672669e-05, "loss": 0.5855, "step": 2330 }, { "epoch": 0.3476769334029383, "grad_norm": 1.215337872505188, "learning_rate": 1.5163673693224644e-05, "loss": 0.5425, "step": 2331 }, { "epoch": 0.34782608695652173, "grad_norm": 1.313134789466858, "learning_rate": 1.5159535353283887e-05, "loss": 0.6287, "step": 2332 }, { "epoch": 0.34797524051010514, "grad_norm": 1.3598800897598267, "learning_rate": 1.5155395808816518e-05, "loss": 0.6602, "step": 2333 }, { "epoch": 0.34812439406368856, "grad_norm": 1.5535180568695068, "learning_rate": 1.5151255060788941e-05, "loss": 0.5943, "step": 2334 }, { "epoch": 0.348273547617272, "grad_norm": 1.1467945575714111, "learning_rate": 1.5147113110167841e-05, "loss": 0.5772, "step": 2335 }, { "epoch": 0.3484227011708554, "grad_norm": 1.3799134492874146, "learning_rate": 1.5142969957920181e-05, "loss": 0.5995, "step": 2336 }, { "epoch": 0.3485718547244388, "grad_norm": 1.195359230041504, "learning_rate": 1.5138825605013208e-05, "loss": 0.5262, "step": 2337 }, { "epoch": 0.3487210082780222, "grad_norm": 1.551190733909607, "learning_rate": 1.5134680052414446e-05, "loss": 0.6428, "step": 2338 }, { "epoch": 0.34887016183160563, "grad_norm": 1.418825626373291, "learning_rate": 1.51305333010917e-05, "loss": 0.6498, "step": 2339 }, { "epoch": 0.34901931538518904, "grad_norm": 1.5663390159606934, "learning_rate": 1.5126385352013065e-05, "loss": 0.66, "step": 2340 }, { "epoch": 0.34916846893877246, "grad_norm": 1.5547071695327759, "learning_rate": 1.5122236206146892e-05, "loss": 0.6433, "step": 2341 }, { "epoch": 0.3493176224923559, "grad_norm": 1.1972657442092896, "learning_rate": 1.5118085864461835e-05, "loss": 0.5055, "step": 2342 }, { "epoch": 0.3494667760459393, "grad_norm": 1.4089006185531616, "learning_rate": 1.5113934327926817e-05, "loss": 0.593, "step": 2343 }, { "epoch": 0.3496159295995227, "grad_norm": 1.5217297077178955, "learning_rate": 1.5109781597511038e-05, "loss": 0.6058, "step": 2344 }, { "epoch": 0.3497650831531061, "grad_norm": 1.8521603345870972, "learning_rate": 1.510562767418398e-05, "loss": 0.68, "step": 2345 }, { "epoch": 0.34991423670668953, "grad_norm": 1.3350898027420044, "learning_rate": 1.5101472558915408e-05, "loss": 0.681, "step": 2346 }, { "epoch": 0.35006339026027294, "grad_norm": 1.4647161960601807, "learning_rate": 1.5097316252675352e-05, "loss": 0.6037, "step": 2347 }, { "epoch": 0.35021254381385636, "grad_norm": 1.2833197116851807, "learning_rate": 1.5093158756434134e-05, "loss": 0.6288, "step": 2348 }, { "epoch": 0.3503616973674398, "grad_norm": 1.336604118347168, "learning_rate": 1.5089000071162347e-05, "loss": 0.6155, "step": 2349 }, { "epoch": 0.3505108509210232, "grad_norm": 1.3559132814407349, "learning_rate": 1.5084840197830861e-05, "loss": 0.5602, "step": 2350 }, { "epoch": 0.3506600044746066, "grad_norm": 1.407055139541626, "learning_rate": 1.508067913741083e-05, "loss": 0.6572, "step": 2351 }, { "epoch": 0.35080915802819, "grad_norm": 1.2623789310455322, "learning_rate": 1.5076516890873674e-05, "loss": 0.5031, "step": 2352 }, { "epoch": 0.35095831158177343, "grad_norm": 1.6573220491409302, "learning_rate": 1.50723534591911e-05, "loss": 0.5631, "step": 2353 }, { "epoch": 0.35110746513535684, "grad_norm": 1.3112711906433105, "learning_rate": 1.5068188843335087e-05, "loss": 0.6122, "step": 2354 }, { "epoch": 0.35125661868894026, "grad_norm": 1.3095752000808716, "learning_rate": 1.5064023044277891e-05, "loss": 0.6785, "step": 2355 }, { "epoch": 0.3514057722425237, "grad_norm": 1.2755002975463867, "learning_rate": 1.5059856062992042e-05, "loss": 0.5248, "step": 2356 }, { "epoch": 0.3515549257961071, "grad_norm": 1.5105929374694824, "learning_rate": 1.5055687900450355e-05, "loss": 0.623, "step": 2357 }, { "epoch": 0.3517040793496905, "grad_norm": 1.4682127237319946, "learning_rate": 1.5051518557625908e-05, "loss": 0.5925, "step": 2358 }, { "epoch": 0.3518532329032739, "grad_norm": 1.2904258966445923, "learning_rate": 1.5047348035492067e-05, "loss": 0.5563, "step": 2359 }, { "epoch": 0.35200238645685733, "grad_norm": 1.4431731700897217, "learning_rate": 1.504317633502246e-05, "loss": 0.6198, "step": 2360 }, { "epoch": 0.35215154001044074, "grad_norm": 1.363305687904358, "learning_rate": 1.5039003457191e-05, "loss": 0.5202, "step": 2361 }, { "epoch": 0.35230069356402416, "grad_norm": 1.2571818828582764, "learning_rate": 1.5034829402971874e-05, "loss": 0.6313, "step": 2362 }, { "epoch": 0.3524498471176076, "grad_norm": 1.5536329746246338, "learning_rate": 1.503065417333954e-05, "loss": 0.643, "step": 2363 }, { "epoch": 0.352599000671191, "grad_norm": 1.329214334487915, "learning_rate": 1.5026477769268732e-05, "loss": 0.5716, "step": 2364 }, { "epoch": 0.3527481542247744, "grad_norm": 1.436128854751587, "learning_rate": 1.502230019173446e-05, "loss": 0.685, "step": 2365 }, { "epoch": 0.3528973077783578, "grad_norm": 1.523117184638977, "learning_rate": 1.5018121441712005e-05, "loss": 0.6551, "step": 2366 }, { "epoch": 0.35304646133194123, "grad_norm": 1.3938050270080566, "learning_rate": 1.5013941520176922e-05, "loss": 0.5317, "step": 2367 }, { "epoch": 0.35319561488552464, "grad_norm": 1.5511327981948853, "learning_rate": 1.5009760428105045e-05, "loss": 0.5046, "step": 2368 }, { "epoch": 0.35334476843910806, "grad_norm": 2.186826705932617, "learning_rate": 1.500557816647247e-05, "loss": 0.6439, "step": 2369 }, { "epoch": 0.3534939219926915, "grad_norm": 1.433489441871643, "learning_rate": 1.5001394736255575e-05, "loss": 0.623, "step": 2370 }, { "epoch": 0.3536430755462749, "grad_norm": 1.2371318340301514, "learning_rate": 1.4997210138431011e-05, "loss": 0.6164, "step": 2371 }, { "epoch": 0.3537922290998583, "grad_norm": 1.5109866857528687, "learning_rate": 1.4993024373975698e-05, "loss": 0.7151, "step": 2372 }, { "epoch": 0.3539413826534417, "grad_norm": 1.2454653978347778, "learning_rate": 1.4988837443866829e-05, "loss": 0.5569, "step": 2373 }, { "epoch": 0.35409053620702513, "grad_norm": 1.5612750053405762, "learning_rate": 1.4984649349081872e-05, "loss": 0.6965, "step": 2374 }, { "epoch": 0.35423968976060854, "grad_norm": 1.336452603340149, "learning_rate": 1.4980460090598562e-05, "loss": 0.6114, "step": 2375 }, { "epoch": 0.35438884331419196, "grad_norm": 1.446066975593567, "learning_rate": 1.4976269669394908e-05, "loss": 0.6689, "step": 2376 }, { "epoch": 0.3545379968677754, "grad_norm": 0.8815528750419617, "learning_rate": 1.497207808644919e-05, "loss": 0.6448, "step": 2377 }, { "epoch": 0.3546871504213588, "grad_norm": 1.3985427618026733, "learning_rate": 1.4967885342739963e-05, "loss": 0.6491, "step": 2378 }, { "epoch": 0.3548363039749422, "grad_norm": 1.5335149765014648, "learning_rate": 1.496369143924605e-05, "loss": 0.5921, "step": 2379 }, { "epoch": 0.3549854575285256, "grad_norm": 1.4567806720733643, "learning_rate": 1.495949637694654e-05, "loss": 0.6192, "step": 2380 }, { "epoch": 0.35513461108210903, "grad_norm": 0.8384304642677307, "learning_rate": 1.4955300156820805e-05, "loss": 0.6424, "step": 2381 }, { "epoch": 0.35528376463569245, "grad_norm": 1.3538787364959717, "learning_rate": 1.4951102779848473e-05, "loss": 0.5944, "step": 2382 }, { "epoch": 0.35543291818927586, "grad_norm": 1.3149421215057373, "learning_rate": 1.4946904247009446e-05, "loss": 0.4938, "step": 2383 }, { "epoch": 0.3555820717428593, "grad_norm": 1.228684425354004, "learning_rate": 1.494270455928391e-05, "loss": 0.6625, "step": 2384 }, { "epoch": 0.3557312252964427, "grad_norm": 1.2348965406417847, "learning_rate": 1.49385037176523e-05, "loss": 0.5364, "step": 2385 }, { "epoch": 0.3558803788500261, "grad_norm": 1.4604551792144775, "learning_rate": 1.4934301723095325e-05, "loss": 0.5993, "step": 2386 }, { "epoch": 0.3560295324036095, "grad_norm": 0.852407693862915, "learning_rate": 1.4930098576593978e-05, "loss": 0.6368, "step": 2387 }, { "epoch": 0.35617868595719293, "grad_norm": 1.398706078529358, "learning_rate": 1.4925894279129509e-05, "loss": 0.4867, "step": 2388 }, { "epoch": 0.35632783951077635, "grad_norm": 1.2902576923370361, "learning_rate": 1.4921688831683433e-05, "loss": 0.657, "step": 2389 }, { "epoch": 0.35647699306435976, "grad_norm": 1.6409502029418945, "learning_rate": 1.4917482235237541e-05, "loss": 0.5756, "step": 2390 }, { "epoch": 0.3566261466179432, "grad_norm": 1.458490252494812, "learning_rate": 1.491327449077389e-05, "loss": 0.5771, "step": 2391 }, { "epoch": 0.3567753001715266, "grad_norm": 1.5514377355575562, "learning_rate": 1.4909065599274806e-05, "loss": 0.5963, "step": 2392 }, { "epoch": 0.35692445372511, "grad_norm": 1.7252800464630127, "learning_rate": 1.4904855561722881e-05, "loss": 0.5526, "step": 2393 }, { "epoch": 0.3570736072786934, "grad_norm": 1.3536803722381592, "learning_rate": 1.4900644379100974e-05, "loss": 0.642, "step": 2394 }, { "epoch": 0.35722276083227683, "grad_norm": 1.8865491151809692, "learning_rate": 1.4896432052392213e-05, "loss": 0.5916, "step": 2395 }, { "epoch": 0.35737191438586025, "grad_norm": 1.2840046882629395, "learning_rate": 1.4892218582579997e-05, "loss": 0.5718, "step": 2396 }, { "epoch": 0.35752106793944366, "grad_norm": 1.2745556831359863, "learning_rate": 1.4888003970647979e-05, "loss": 0.625, "step": 2397 }, { "epoch": 0.3576702214930271, "grad_norm": 1.4107623100280762, "learning_rate": 1.4883788217580093e-05, "loss": 0.5295, "step": 2398 }, { "epoch": 0.3578193750466105, "grad_norm": 1.3600133657455444, "learning_rate": 1.4879571324360533e-05, "loss": 0.6557, "step": 2399 }, { "epoch": 0.3579685286001939, "grad_norm": 1.4187524318695068, "learning_rate": 1.487535329197376e-05, "loss": 0.6667, "step": 2400 }, { "epoch": 0.3581176821537773, "grad_norm": 1.5320662260055542, "learning_rate": 1.4871134121404503e-05, "loss": 0.6435, "step": 2401 }, { "epoch": 0.35826683570736073, "grad_norm": 1.781565546989441, "learning_rate": 1.4866913813637749e-05, "loss": 0.6059, "step": 2402 }, { "epoch": 0.35841598926094415, "grad_norm": 1.281266689300537, "learning_rate": 1.4862692369658755e-05, "loss": 0.5961, "step": 2403 }, { "epoch": 0.35856514281452756, "grad_norm": 0.9345998167991638, "learning_rate": 1.4858469790453049e-05, "loss": 0.6848, "step": 2404 }, { "epoch": 0.358714296368111, "grad_norm": 1.4544305801391602, "learning_rate": 1.485424607700642e-05, "loss": 0.6123, "step": 2405 }, { "epoch": 0.3588634499216944, "grad_norm": 2.572352409362793, "learning_rate": 1.4850021230304919e-05, "loss": 0.6462, "step": 2406 }, { "epoch": 0.3590126034752778, "grad_norm": 1.5287284851074219, "learning_rate": 1.4845795251334863e-05, "loss": 0.5515, "step": 2407 }, { "epoch": 0.3591617570288612, "grad_norm": 1.316375970840454, "learning_rate": 1.4841568141082832e-05, "loss": 0.6124, "step": 2408 }, { "epoch": 0.35931091058244463, "grad_norm": 1.246339201927185, "learning_rate": 1.4837339900535674e-05, "loss": 0.5032, "step": 2409 }, { "epoch": 0.35946006413602805, "grad_norm": 2.1428706645965576, "learning_rate": 1.4833110530680501e-05, "loss": 0.66, "step": 2410 }, { "epoch": 0.35960921768961146, "grad_norm": 1.506111741065979, "learning_rate": 1.4828880032504684e-05, "loss": 0.6244, "step": 2411 }, { "epoch": 0.3597583712431949, "grad_norm": 1.3143367767333984, "learning_rate": 1.4824648406995858e-05, "loss": 0.6198, "step": 2412 }, { "epoch": 0.3599075247967783, "grad_norm": 1.4582536220550537, "learning_rate": 1.4820415655141932e-05, "loss": 0.597, "step": 2413 }, { "epoch": 0.3600566783503617, "grad_norm": 1.421584963798523, "learning_rate": 1.4816181777931056e-05, "loss": 0.7061, "step": 2414 }, { "epoch": 0.3602058319039451, "grad_norm": 1.2499207258224487, "learning_rate": 1.4811946776351667e-05, "loss": 0.6289, "step": 2415 }, { "epoch": 0.36035498545752853, "grad_norm": 1.3427486419677734, "learning_rate": 1.4807710651392446e-05, "loss": 0.6014, "step": 2416 }, { "epoch": 0.36050413901111195, "grad_norm": 1.5593451261520386, "learning_rate": 1.4803473404042345e-05, "loss": 0.6638, "step": 2417 }, { "epoch": 0.36065329256469536, "grad_norm": 1.5563275814056396, "learning_rate": 1.479923503529058e-05, "loss": 0.5948, "step": 2418 }, { "epoch": 0.3608024461182788, "grad_norm": 0.9175319671630859, "learning_rate": 1.4794995546126625e-05, "loss": 0.6813, "step": 2419 }, { "epoch": 0.3609515996718622, "grad_norm": 1.3622435331344604, "learning_rate": 1.479075493754021e-05, "loss": 0.603, "step": 2420 }, { "epoch": 0.3611007532254456, "grad_norm": 1.655418872833252, "learning_rate": 1.4786513210521339e-05, "loss": 0.6634, "step": 2421 }, { "epoch": 0.361249906779029, "grad_norm": 1.6304911375045776, "learning_rate": 1.4782270366060266e-05, "loss": 0.6163, "step": 2422 }, { "epoch": 0.36139906033261243, "grad_norm": 1.4547383785247803, "learning_rate": 1.4778026405147515e-05, "loss": 0.6439, "step": 2423 }, { "epoch": 0.36154821388619585, "grad_norm": 1.3083680868148804, "learning_rate": 1.477378132877386e-05, "loss": 0.5454, "step": 2424 }, { "epoch": 0.36169736743977926, "grad_norm": 1.1809180974960327, "learning_rate": 1.4769535137930343e-05, "loss": 0.6072, "step": 2425 }, { "epoch": 0.3618465209933627, "grad_norm": 1.2507154941558838, "learning_rate": 1.4765287833608268e-05, "loss": 0.6345, "step": 2426 }, { "epoch": 0.3619956745469461, "grad_norm": 1.3932803869247437, "learning_rate": 1.4761039416799192e-05, "loss": 0.5529, "step": 2427 }, { "epoch": 0.3621448281005295, "grad_norm": 1.1919039487838745, "learning_rate": 1.4756789888494938e-05, "loss": 0.6086, "step": 2428 }, { "epoch": 0.3622939816541129, "grad_norm": 1.4677561521530151, "learning_rate": 1.4752539249687583e-05, "loss": 0.5661, "step": 2429 }, { "epoch": 0.36244313520769633, "grad_norm": 1.1900148391723633, "learning_rate": 1.4748287501369464e-05, "loss": 0.6713, "step": 2430 }, { "epoch": 0.36259228876127975, "grad_norm": 1.2935681343078613, "learning_rate": 1.4744034644533185e-05, "loss": 0.6405, "step": 2431 }, { "epoch": 0.36274144231486316, "grad_norm": 1.5296745300292969, "learning_rate": 1.4739780680171598e-05, "loss": 0.54, "step": 2432 }, { "epoch": 0.3628905958684466, "grad_norm": 1.6082706451416016, "learning_rate": 1.4735525609277819e-05, "loss": 0.6302, "step": 2433 }, { "epoch": 0.36303974942203, "grad_norm": 1.1798797845840454, "learning_rate": 1.4731269432845223e-05, "loss": 0.5915, "step": 2434 }, { "epoch": 0.3631889029756134, "grad_norm": 1.3431527614593506, "learning_rate": 1.4727012151867442e-05, "loss": 0.6262, "step": 2435 }, { "epoch": 0.3633380565291968, "grad_norm": 1.3926432132720947, "learning_rate": 1.472275376733836e-05, "loss": 0.6485, "step": 2436 }, { "epoch": 0.36348721008278023, "grad_norm": 1.2594666481018066, "learning_rate": 1.4718494280252133e-05, "loss": 0.6102, "step": 2437 }, { "epoch": 0.36363636363636365, "grad_norm": 1.4267654418945312, "learning_rate": 1.4714233691603161e-05, "loss": 0.6147, "step": 2438 }, { "epoch": 0.36378551718994706, "grad_norm": 1.1769641637802124, "learning_rate": 1.4709972002386104e-05, "loss": 0.507, "step": 2439 }, { "epoch": 0.3639346707435305, "grad_norm": 2.334836721420288, "learning_rate": 1.4705709213595882e-05, "loss": 0.6038, "step": 2440 }, { "epoch": 0.3640838242971139, "grad_norm": 1.5697169303894043, "learning_rate": 1.4701445326227675e-05, "loss": 0.6682, "step": 2441 }, { "epoch": 0.3642329778506973, "grad_norm": 1.2909228801727295, "learning_rate": 1.4697180341276907e-05, "loss": 0.6368, "step": 2442 }, { "epoch": 0.3643821314042807, "grad_norm": 1.6492975950241089, "learning_rate": 1.4692914259739268e-05, "loss": 0.6448, "step": 2443 }, { "epoch": 0.36453128495786413, "grad_norm": 1.3983752727508545, "learning_rate": 1.4688647082610707e-05, "loss": 0.552, "step": 2444 }, { "epoch": 0.36468043851144755, "grad_norm": 1.4040621519088745, "learning_rate": 1.4684378810887422e-05, "loss": 0.6562, "step": 2445 }, { "epoch": 0.36482959206503096, "grad_norm": 1.6544519662857056, "learning_rate": 1.4680109445565864e-05, "loss": 0.6289, "step": 2446 }, { "epoch": 0.3649787456186144, "grad_norm": 1.1667836904525757, "learning_rate": 1.467583898764275e-05, "loss": 0.5719, "step": 2447 }, { "epoch": 0.3651278991721978, "grad_norm": 1.5307092666625977, "learning_rate": 1.4671567438115039e-05, "loss": 0.6531, "step": 2448 }, { "epoch": 0.3652770527257812, "grad_norm": 1.1937378644943237, "learning_rate": 1.4667294797979958e-05, "loss": 0.5936, "step": 2449 }, { "epoch": 0.3654262062793646, "grad_norm": 1.2995668649673462, "learning_rate": 1.466302106823498e-05, "loss": 0.6368, "step": 2450 }, { "epoch": 0.36557535983294803, "grad_norm": 1.3037502765655518, "learning_rate": 1.4658746249877833e-05, "loss": 0.5762, "step": 2451 }, { "epoch": 0.36572451338653145, "grad_norm": 1.2605153322219849, "learning_rate": 1.4654470343906501e-05, "loss": 0.6521, "step": 2452 }, { "epoch": 0.36587366694011486, "grad_norm": 1.5131503343582153, "learning_rate": 1.4650193351319224e-05, "loss": 0.5997, "step": 2453 }, { "epoch": 0.3660228204936983, "grad_norm": 1.7671395540237427, "learning_rate": 1.4645915273114492e-05, "loss": 0.6335, "step": 2454 }, { "epoch": 0.3661719740472817, "grad_norm": 1.9021618366241455, "learning_rate": 1.4641636110291051e-05, "loss": 0.6817, "step": 2455 }, { "epoch": 0.3663211276008651, "grad_norm": 1.426406979560852, "learning_rate": 1.4637355863847893e-05, "loss": 0.6666, "step": 2456 }, { "epoch": 0.3664702811544485, "grad_norm": 1.516305685043335, "learning_rate": 1.4633074534784278e-05, "loss": 0.6163, "step": 2457 }, { "epoch": 0.36661943470803193, "grad_norm": 2.7177839279174805, "learning_rate": 1.4628792124099704e-05, "loss": 0.5928, "step": 2458 }, { "epoch": 0.36676858826161535, "grad_norm": 1.2819218635559082, "learning_rate": 1.4624508632793928e-05, "loss": 0.6287, "step": 2459 }, { "epoch": 0.36691774181519876, "grad_norm": 1.4549734592437744, "learning_rate": 1.462022406186696e-05, "loss": 0.6631, "step": 2460 }, { "epoch": 0.3670668953687822, "grad_norm": 1.2421208620071411, "learning_rate": 1.461593841231906e-05, "loss": 0.535, "step": 2461 }, { "epoch": 0.3672160489223656, "grad_norm": 1.6216588020324707, "learning_rate": 1.4611651685150738e-05, "loss": 0.6664, "step": 2462 }, { "epoch": 0.367365202475949, "grad_norm": 1.364933729171753, "learning_rate": 1.4607363881362765e-05, "loss": 0.6325, "step": 2463 }, { "epoch": 0.3675143560295324, "grad_norm": 1.1427947282791138, "learning_rate": 1.4603075001956145e-05, "loss": 0.5248, "step": 2464 }, { "epoch": 0.36766350958311583, "grad_norm": 1.5346816778182983, "learning_rate": 1.4598785047932153e-05, "loss": 0.6438, "step": 2465 }, { "epoch": 0.36781266313669925, "grad_norm": 1.5155739784240723, "learning_rate": 1.4594494020292307e-05, "loss": 0.6134, "step": 2466 }, { "epoch": 0.36796181669028266, "grad_norm": 1.484546184539795, "learning_rate": 1.4590201920038367e-05, "loss": 0.6807, "step": 2467 }, { "epoch": 0.3681109702438661, "grad_norm": 1.607858657836914, "learning_rate": 1.4585908748172361e-05, "loss": 0.6756, "step": 2468 }, { "epoch": 0.3682601237974495, "grad_norm": 1.438592791557312, "learning_rate": 1.4581614505696551e-05, "loss": 0.5892, "step": 2469 }, { "epoch": 0.3684092773510329, "grad_norm": 1.3654567003250122, "learning_rate": 1.4577319193613455e-05, "loss": 0.6251, "step": 2470 }, { "epoch": 0.3685584309046163, "grad_norm": 0.8926857709884644, "learning_rate": 1.4573022812925845e-05, "loss": 0.6927, "step": 2471 }, { "epoch": 0.36870758445819973, "grad_norm": 1.356096625328064, "learning_rate": 1.4568725364636738e-05, "loss": 0.6165, "step": 2472 }, { "epoch": 0.36885673801178315, "grad_norm": 1.9932430982589722, "learning_rate": 1.45644268497494e-05, "loss": 0.5758, "step": 2473 }, { "epoch": 0.36900589156536656, "grad_norm": 1.4791566133499146, "learning_rate": 1.4560127269267344e-05, "loss": 0.6094, "step": 2474 }, { "epoch": 0.36915504511895, "grad_norm": 1.4988890886306763, "learning_rate": 1.4555826624194339e-05, "loss": 0.631, "step": 2475 }, { "epoch": 0.3693041986725334, "grad_norm": 1.2625867128372192, "learning_rate": 1.4551524915534396e-05, "loss": 0.5962, "step": 2476 }, { "epoch": 0.3694533522261168, "grad_norm": 1.4073843955993652, "learning_rate": 1.4547222144291777e-05, "loss": 0.6428, "step": 2477 }, { "epoch": 0.3696025057797002, "grad_norm": 1.5808062553405762, "learning_rate": 1.4542918311470988e-05, "loss": 0.5812, "step": 2478 }, { "epoch": 0.36975165933328363, "grad_norm": 1.199461579322815, "learning_rate": 1.4538613418076795e-05, "loss": 0.491, "step": 2479 }, { "epoch": 0.36990081288686705, "grad_norm": 1.6667282581329346, "learning_rate": 1.4534307465114199e-05, "loss": 0.6674, "step": 2480 }, { "epoch": 0.37004996644045046, "grad_norm": 1.3550142049789429, "learning_rate": 1.4530000453588447e-05, "loss": 0.6002, "step": 2481 }, { "epoch": 0.3701991199940339, "grad_norm": 1.4156137704849243, "learning_rate": 1.4525692384505043e-05, "loss": 0.6229, "step": 2482 }, { "epoch": 0.3703482735476173, "grad_norm": 1.3793970346450806, "learning_rate": 1.4521383258869735e-05, "loss": 0.6266, "step": 2483 }, { "epoch": 0.3704974271012007, "grad_norm": 1.4924191236495972, "learning_rate": 1.4517073077688513e-05, "loss": 0.5997, "step": 2484 }, { "epoch": 0.3706465806547841, "grad_norm": 1.5809972286224365, "learning_rate": 1.4512761841967615e-05, "loss": 0.6371, "step": 2485 }, { "epoch": 0.37079573420836753, "grad_norm": 1.390340805053711, "learning_rate": 1.4508449552713532e-05, "loss": 0.5708, "step": 2486 }, { "epoch": 0.37094488776195095, "grad_norm": 1.4805101156234741, "learning_rate": 1.450413621093299e-05, "loss": 0.5255, "step": 2487 }, { "epoch": 0.37109404131553436, "grad_norm": 1.43162202835083, "learning_rate": 1.4499821817632973e-05, "loss": 0.6293, "step": 2488 }, { "epoch": 0.3712431948691178, "grad_norm": 1.5199288129806519, "learning_rate": 1.4495506373820695e-05, "loss": 0.6635, "step": 2489 }, { "epoch": 0.3713923484227012, "grad_norm": 1.4294666051864624, "learning_rate": 1.4491189880503633e-05, "loss": 0.6142, "step": 2490 }, { "epoch": 0.3715415019762846, "grad_norm": 1.7597689628601074, "learning_rate": 1.4486872338689492e-05, "loss": 0.6345, "step": 2491 }, { "epoch": 0.371690655529868, "grad_norm": 1.370326042175293, "learning_rate": 1.4482553749386234e-05, "loss": 0.5672, "step": 2492 }, { "epoch": 0.37183980908345143, "grad_norm": 1.3362101316452026, "learning_rate": 1.4478234113602063e-05, "loss": 0.5563, "step": 2493 }, { "epoch": 0.37198896263703485, "grad_norm": 1.3943010568618774, "learning_rate": 1.4473913432345426e-05, "loss": 0.5921, "step": 2494 }, { "epoch": 0.37213811619061826, "grad_norm": 1.3489060401916504, "learning_rate": 1.4469591706625003e-05, "loss": 0.6416, "step": 2495 }, { "epoch": 0.3722872697442017, "grad_norm": 1.4745732545852661, "learning_rate": 1.4465268937449743e-05, "loss": 0.5908, "step": 2496 }, { "epoch": 0.3724364232977851, "grad_norm": 1.3166165351867676, "learning_rate": 1.446094512582882e-05, "loss": 0.5967, "step": 2497 }, { "epoch": 0.3725855768513685, "grad_norm": 1.364662528038025, "learning_rate": 1.445662027277165e-05, "loss": 0.6135, "step": 2498 }, { "epoch": 0.3727347304049519, "grad_norm": 1.3202462196350098, "learning_rate": 1.44522943792879e-05, "loss": 0.6086, "step": 2499 }, { "epoch": 0.37288388395853533, "grad_norm": 1.6374355554580688, "learning_rate": 1.4447967446387482e-05, "loss": 0.6219, "step": 2500 }, { "epoch": 0.37303303751211875, "grad_norm": 1.2087225914001465, "learning_rate": 1.444363947508054e-05, "loss": 0.6227, "step": 2501 }, { "epoch": 0.37318219106570216, "grad_norm": 1.5642573833465576, "learning_rate": 1.4439310466377474e-05, "loss": 0.6706, "step": 2502 }, { "epoch": 0.3733313446192856, "grad_norm": 2.710829734802246, "learning_rate": 1.4434980421288911e-05, "loss": 0.5668, "step": 2503 }, { "epoch": 0.373480498172869, "grad_norm": 1.3453031778335571, "learning_rate": 1.443064934082573e-05, "loss": 0.7264, "step": 2504 }, { "epoch": 0.3736296517264524, "grad_norm": 1.441496729850769, "learning_rate": 1.4426317225999055e-05, "loss": 0.6445, "step": 2505 }, { "epoch": 0.3737788052800358, "grad_norm": 1.3765355348587036, "learning_rate": 1.4421984077820242e-05, "loss": 0.6927, "step": 2506 }, { "epoch": 0.37392795883361923, "grad_norm": 1.290779948234558, "learning_rate": 1.4417649897300891e-05, "loss": 0.6353, "step": 2507 }, { "epoch": 0.37407711238720265, "grad_norm": 1.5200486183166504, "learning_rate": 1.4413314685452844e-05, "loss": 0.7079, "step": 2508 }, { "epoch": 0.37422626594078606, "grad_norm": 1.2445107698440552, "learning_rate": 1.4408978443288186e-05, "loss": 0.6543, "step": 2509 }, { "epoch": 0.3743754194943695, "grad_norm": 1.263728380203247, "learning_rate": 1.440464117181924e-05, "loss": 0.6198, "step": 2510 }, { "epoch": 0.3745245730479529, "grad_norm": 1.6051058769226074, "learning_rate": 1.4400302872058568e-05, "loss": 0.6459, "step": 2511 }, { "epoch": 0.3746737266015363, "grad_norm": 1.6305835247039795, "learning_rate": 1.439596354501898e-05, "loss": 0.6219, "step": 2512 }, { "epoch": 0.3748228801551197, "grad_norm": 1.463494896888733, "learning_rate": 1.4391623191713513e-05, "loss": 0.5814, "step": 2513 }, { "epoch": 0.37497203370870313, "grad_norm": 1.4058635234832764, "learning_rate": 1.4387281813155451e-05, "loss": 0.6546, "step": 2514 }, { "epoch": 0.37512118726228655, "grad_norm": 1.5597623586654663, "learning_rate": 1.438293941035832e-05, "loss": 0.6581, "step": 2515 }, { "epoch": 0.37527034081586996, "grad_norm": 1.4559625387191772, "learning_rate": 1.4378595984335881e-05, "loss": 0.6266, "step": 2516 }, { "epoch": 0.3754194943694534, "grad_norm": 1.5035407543182373, "learning_rate": 1.4374251536102131e-05, "loss": 0.5379, "step": 2517 }, { "epoch": 0.3755686479230368, "grad_norm": 1.535958170890808, "learning_rate": 1.4369906066671313e-05, "loss": 0.6349, "step": 2518 }, { "epoch": 0.3757178014766202, "grad_norm": 1.4486464262008667, "learning_rate": 1.4365559577057905e-05, "loss": 0.7304, "step": 2519 }, { "epoch": 0.3758669550302036, "grad_norm": 1.5651185512542725, "learning_rate": 1.4361212068276622e-05, "loss": 0.5827, "step": 2520 }, { "epoch": 0.37601610858378703, "grad_norm": 1.331693410873413, "learning_rate": 1.4356863541342416e-05, "loss": 0.695, "step": 2521 }, { "epoch": 0.37616526213737045, "grad_norm": 1.3785667419433594, "learning_rate": 1.435251399727048e-05, "loss": 0.5452, "step": 2522 }, { "epoch": 0.37631441569095386, "grad_norm": 0.9011234045028687, "learning_rate": 1.4348163437076243e-05, "loss": 0.6856, "step": 2523 }, { "epoch": 0.3764635692445373, "grad_norm": 1.3273377418518066, "learning_rate": 1.4343811861775373e-05, "loss": 0.5006, "step": 2524 }, { "epoch": 0.3766127227981207, "grad_norm": 4.082608699798584, "learning_rate": 1.4339459272383766e-05, "loss": 0.6007, "step": 2525 }, { "epoch": 0.3767618763517041, "grad_norm": 1.263804316520691, "learning_rate": 1.433510566991757e-05, "loss": 0.5882, "step": 2526 }, { "epoch": 0.3769110299052875, "grad_norm": 1.2819451093673706, "learning_rate": 1.4330751055393162e-05, "loss": 0.5857, "step": 2527 }, { "epoch": 0.37706018345887093, "grad_norm": 1.3603779077529907, "learning_rate": 1.4326395429827147e-05, "loss": 0.6564, "step": 2528 }, { "epoch": 0.37720933701245435, "grad_norm": 1.3779441118240356, "learning_rate": 1.4322038794236379e-05, "loss": 0.5281, "step": 2529 }, { "epoch": 0.37735849056603776, "grad_norm": 1.2768834829330444, "learning_rate": 1.4317681149637941e-05, "loss": 0.5571, "step": 2530 }, { "epoch": 0.3775076441196212, "grad_norm": 1.3357239961624146, "learning_rate": 1.4313322497049153e-05, "loss": 0.5318, "step": 2531 }, { "epoch": 0.3776567976732046, "grad_norm": 1.3735703229904175, "learning_rate": 1.4308962837487573e-05, "loss": 0.6555, "step": 2532 }, { "epoch": 0.37780595122678795, "grad_norm": 1.5462576150894165, "learning_rate": 1.430460217197099e-05, "loss": 0.6096, "step": 2533 }, { "epoch": 0.37795510478037136, "grad_norm": 1.2982357740402222, "learning_rate": 1.4300240501517424e-05, "loss": 0.5944, "step": 2534 }, { "epoch": 0.3781042583339548, "grad_norm": 1.3661144971847534, "learning_rate": 1.4295877827145144e-05, "loss": 0.5851, "step": 2535 }, { "epoch": 0.3782534118875382, "grad_norm": 1.2553074359893799, "learning_rate": 1.4291514149872638e-05, "loss": 0.5664, "step": 2536 }, { "epoch": 0.3784025654411216, "grad_norm": 1.2275813817977905, "learning_rate": 1.4287149470718635e-05, "loss": 0.608, "step": 2537 }, { "epoch": 0.378551718994705, "grad_norm": 1.2240493297576904, "learning_rate": 1.4282783790702102e-05, "loss": 0.591, "step": 2538 }, { "epoch": 0.37870087254828844, "grad_norm": 1.1928024291992188, "learning_rate": 1.427841711084223e-05, "loss": 0.5428, "step": 2539 }, { "epoch": 0.37885002610187185, "grad_norm": 0.9052636027336121, "learning_rate": 1.427404943215845e-05, "loss": 0.6085, "step": 2540 }, { "epoch": 0.37899917965545526, "grad_norm": 2.617525577545166, "learning_rate": 1.4269680755670425e-05, "loss": 0.5548, "step": 2541 }, { "epoch": 0.3791483332090387, "grad_norm": 1.277124285697937, "learning_rate": 1.426531108239805e-05, "loss": 0.6063, "step": 2542 }, { "epoch": 0.3792974867626221, "grad_norm": 1.8338987827301025, "learning_rate": 1.4260940413361452e-05, "loss": 0.638, "step": 2543 }, { "epoch": 0.3794466403162055, "grad_norm": 1.4088326692581177, "learning_rate": 1.4256568749580996e-05, "loss": 0.6573, "step": 2544 }, { "epoch": 0.3795957938697889, "grad_norm": 1.4297958612442017, "learning_rate": 1.425219609207727e-05, "loss": 0.6201, "step": 2545 }, { "epoch": 0.37974494742337234, "grad_norm": 1.306304931640625, "learning_rate": 1.4247822441871105e-05, "loss": 0.5499, "step": 2546 }, { "epoch": 0.37989410097695575, "grad_norm": 1.606823444366455, "learning_rate": 1.424344779998355e-05, "loss": 0.6149, "step": 2547 }, { "epoch": 0.38004325453053917, "grad_norm": 1.4545131921768188, "learning_rate": 1.4239072167435897e-05, "loss": 0.5346, "step": 2548 }, { "epoch": 0.3801924080841226, "grad_norm": 1.6641201972961426, "learning_rate": 1.4234695545249666e-05, "loss": 0.5791, "step": 2549 }, { "epoch": 0.380341561637706, "grad_norm": 1.3364194631576538, "learning_rate": 1.4230317934446607e-05, "loss": 0.654, "step": 2550 }, { "epoch": 0.3804907151912894, "grad_norm": 1.2916626930236816, "learning_rate": 1.4225939336048703e-05, "loss": 0.5795, "step": 2551 }, { "epoch": 0.3806398687448728, "grad_norm": 1.2702902555465698, "learning_rate": 1.422155975107816e-05, "loss": 0.6577, "step": 2552 }, { "epoch": 0.38078902229845624, "grad_norm": 1.5744303464889526, "learning_rate": 1.4217179180557428e-05, "loss": 0.5011, "step": 2553 }, { "epoch": 0.38093817585203965, "grad_norm": 1.4325357675552368, "learning_rate": 1.4212797625509173e-05, "loss": 0.6122, "step": 2554 }, { "epoch": 0.38108732940562307, "grad_norm": 1.3460679054260254, "learning_rate": 1.4208415086956305e-05, "loss": 0.6173, "step": 2555 }, { "epoch": 0.3812364829592065, "grad_norm": 1.279718279838562, "learning_rate": 1.4204031565921944e-05, "loss": 0.6074, "step": 2556 }, { "epoch": 0.3813856365127899, "grad_norm": 1.8365299701690674, "learning_rate": 1.419964706342946e-05, "loss": 0.6689, "step": 2557 }, { "epoch": 0.3815347900663733, "grad_norm": 1.315317988395691, "learning_rate": 1.4195261580502442e-05, "loss": 0.6857, "step": 2558 }, { "epoch": 0.3816839436199567, "grad_norm": 1.5159540176391602, "learning_rate": 1.4190875118164706e-05, "loss": 0.662, "step": 2559 }, { "epoch": 0.38183309717354014, "grad_norm": 1.3023651838302612, "learning_rate": 1.4186487677440304e-05, "loss": 0.5975, "step": 2560 }, { "epoch": 0.38198225072712355, "grad_norm": 1.238080382347107, "learning_rate": 1.4182099259353508e-05, "loss": 0.6444, "step": 2561 }, { "epoch": 0.38213140428070697, "grad_norm": 1.2815024852752686, "learning_rate": 1.4177709864928822e-05, "loss": 0.593, "step": 2562 }, { "epoch": 0.3822805578342904, "grad_norm": 1.1958640813827515, "learning_rate": 1.4173319495190984e-05, "loss": 0.5584, "step": 2563 }, { "epoch": 0.3824297113878738, "grad_norm": 1.4226444959640503, "learning_rate": 1.416892815116495e-05, "loss": 0.6006, "step": 2564 }, { "epoch": 0.3825788649414572, "grad_norm": 1.30903959274292, "learning_rate": 1.4164535833875905e-05, "loss": 0.4863, "step": 2565 }, { "epoch": 0.3827280184950406, "grad_norm": 1.55120849609375, "learning_rate": 1.416014254434927e-05, "loss": 0.6894, "step": 2566 }, { "epoch": 0.38287717204862404, "grad_norm": 1.279475450515747, "learning_rate": 1.415574828361068e-05, "loss": 0.6459, "step": 2567 }, { "epoch": 0.38302632560220745, "grad_norm": 1.2893919944763184, "learning_rate": 1.4151353052686008e-05, "loss": 0.6626, "step": 2568 }, { "epoch": 0.38317547915579087, "grad_norm": 1.334452509880066, "learning_rate": 1.4146956852601349e-05, "loss": 0.636, "step": 2569 }, { "epoch": 0.3833246327093743, "grad_norm": 1.2965819835662842, "learning_rate": 1.4142559684383018e-05, "loss": 0.5921, "step": 2570 }, { "epoch": 0.3834737862629577, "grad_norm": 1.6774803400039673, "learning_rate": 1.413816154905757e-05, "loss": 0.594, "step": 2571 }, { "epoch": 0.3836229398165411, "grad_norm": 1.5450586080551147, "learning_rate": 1.4133762447651774e-05, "loss": 0.5431, "step": 2572 }, { "epoch": 0.3837720933701245, "grad_norm": 1.5749175548553467, "learning_rate": 1.4129362381192626e-05, "loss": 0.5607, "step": 2573 }, { "epoch": 0.38392124692370794, "grad_norm": 1.8432332277297974, "learning_rate": 1.4124961350707354e-05, "loss": 0.6137, "step": 2574 }, { "epoch": 0.38407040047729135, "grad_norm": 1.7091487646102905, "learning_rate": 1.4120559357223407e-05, "loss": 0.6369, "step": 2575 }, { "epoch": 0.38421955403087477, "grad_norm": 1.904576063156128, "learning_rate": 1.4116156401768452e-05, "loss": 0.6327, "step": 2576 }, { "epoch": 0.3843687075844582, "grad_norm": 1.4804645776748657, "learning_rate": 1.4111752485370399e-05, "loss": 0.6298, "step": 2577 }, { "epoch": 0.3845178611380416, "grad_norm": 1.3715661764144897, "learning_rate": 1.4107347609057358e-05, "loss": 0.5447, "step": 2578 }, { "epoch": 0.384667014691625, "grad_norm": 1.6905465126037598, "learning_rate": 1.4102941773857683e-05, "loss": 0.5678, "step": 2579 }, { "epoch": 0.3848161682452084, "grad_norm": 1.053989052772522, "learning_rate": 1.4098534980799943e-05, "loss": 0.6657, "step": 2580 }, { "epoch": 0.38496532179879184, "grad_norm": 1.2402158975601196, "learning_rate": 1.4094127230912931e-05, "loss": 0.5827, "step": 2581 }, { "epoch": 0.38511447535237525, "grad_norm": 1.2327030897140503, "learning_rate": 1.4089718525225667e-05, "loss": 0.5566, "step": 2582 }, { "epoch": 0.38526362890595867, "grad_norm": 1.791276216506958, "learning_rate": 1.4085308864767389e-05, "loss": 0.5927, "step": 2583 }, { "epoch": 0.3854127824595421, "grad_norm": 1.4473415613174438, "learning_rate": 1.4080898250567559e-05, "loss": 0.6568, "step": 2584 }, { "epoch": 0.3855619360131255, "grad_norm": 1.2887232303619385, "learning_rate": 1.407648668365587e-05, "loss": 0.5546, "step": 2585 }, { "epoch": 0.3857110895667089, "grad_norm": 2.122526168823242, "learning_rate": 1.4072074165062224e-05, "loss": 0.5991, "step": 2586 }, { "epoch": 0.3858602431202923, "grad_norm": 1.6572256088256836, "learning_rate": 1.4067660695816751e-05, "loss": 0.6633, "step": 2587 }, { "epoch": 0.38600939667387574, "grad_norm": 1.2745429277420044, "learning_rate": 1.4063246276949811e-05, "loss": 0.4804, "step": 2588 }, { "epoch": 0.38615855022745915, "grad_norm": 1.4361491203308105, "learning_rate": 1.4058830909491971e-05, "loss": 0.6685, "step": 2589 }, { "epoch": 0.38630770378104257, "grad_norm": 1.314062237739563, "learning_rate": 1.4054414594474033e-05, "loss": 0.5917, "step": 2590 }, { "epoch": 0.386456857334626, "grad_norm": 1.3138309717178345, "learning_rate": 1.4049997332927007e-05, "loss": 0.6499, "step": 2591 }, { "epoch": 0.3866060108882094, "grad_norm": 1.368401050567627, "learning_rate": 1.4045579125882136e-05, "loss": 0.6331, "step": 2592 }, { "epoch": 0.3867551644417928, "grad_norm": 1.4533637762069702, "learning_rate": 1.4041159974370881e-05, "loss": 0.6676, "step": 2593 }, { "epoch": 0.3869043179953762, "grad_norm": 0.9553125500679016, "learning_rate": 1.4036739879424916e-05, "loss": 0.6808, "step": 2594 }, { "epoch": 0.38705347154895964, "grad_norm": 1.3945581912994385, "learning_rate": 1.403231884207614e-05, "loss": 0.6633, "step": 2595 }, { "epoch": 0.38720262510254305, "grad_norm": 1.6634315252304077, "learning_rate": 1.4027896863356679e-05, "loss": 0.6233, "step": 2596 }, { "epoch": 0.38735177865612647, "grad_norm": 1.378443717956543, "learning_rate": 1.4023473944298864e-05, "loss": 0.612, "step": 2597 }, { "epoch": 0.3875009322097099, "grad_norm": 0.9324147701263428, "learning_rate": 1.401905008593526e-05, "loss": 0.7098, "step": 2598 }, { "epoch": 0.3876500857632933, "grad_norm": 1.9350593090057373, "learning_rate": 1.4014625289298645e-05, "loss": 0.6008, "step": 2599 }, { "epoch": 0.3877992393168767, "grad_norm": 0.8624157905578613, "learning_rate": 1.401019955542201e-05, "loss": 0.6237, "step": 2600 }, { "epoch": 0.3879483928704601, "grad_norm": 1.3732593059539795, "learning_rate": 1.4005772885338578e-05, "loss": 0.6146, "step": 2601 }, { "epoch": 0.38809754642404354, "grad_norm": 1.48618745803833, "learning_rate": 1.4001345280081782e-05, "loss": 0.5824, "step": 2602 }, { "epoch": 0.38824669997762695, "grad_norm": 1.691602349281311, "learning_rate": 1.399691674068527e-05, "loss": 0.5903, "step": 2603 }, { "epoch": 0.38839585353121037, "grad_norm": 1.4706984758377075, "learning_rate": 1.399248726818292e-05, "loss": 0.6442, "step": 2604 }, { "epoch": 0.3885450070847938, "grad_norm": 1.5043466091156006, "learning_rate": 1.3988056863608815e-05, "loss": 0.6893, "step": 2605 }, { "epoch": 0.3886941606383772, "grad_norm": 1.483999252319336, "learning_rate": 1.3983625527997264e-05, "loss": 0.6259, "step": 2606 }, { "epoch": 0.3888433141919606, "grad_norm": 1.2685719728469849, "learning_rate": 1.3979193262382791e-05, "loss": 0.6179, "step": 2607 }, { "epoch": 0.388992467745544, "grad_norm": 1.3183403015136719, "learning_rate": 1.3974760067800137e-05, "loss": 0.6291, "step": 2608 }, { "epoch": 0.38914162129912744, "grad_norm": 1.5757942199707031, "learning_rate": 1.3970325945284255e-05, "loss": 0.6674, "step": 2609 }, { "epoch": 0.38929077485271085, "grad_norm": 1.2935892343521118, "learning_rate": 1.3965890895870328e-05, "loss": 0.6586, "step": 2610 }, { "epoch": 0.38943992840629427, "grad_norm": 1.2838190793991089, "learning_rate": 1.3961454920593743e-05, "loss": 0.6459, "step": 2611 }, { "epoch": 0.3895890819598777, "grad_norm": 1.380974292755127, "learning_rate": 1.3957018020490101e-05, "loss": 0.7694, "step": 2612 }, { "epoch": 0.3897382355134611, "grad_norm": 1.489141821861267, "learning_rate": 1.3952580196595232e-05, "loss": 0.6621, "step": 2613 }, { "epoch": 0.3898873890670445, "grad_norm": 1.5885937213897705, "learning_rate": 1.3948141449945172e-05, "loss": 0.5874, "step": 2614 }, { "epoch": 0.3900365426206279, "grad_norm": 1.3811534643173218, "learning_rate": 1.3943701781576172e-05, "loss": 0.7149, "step": 2615 }, { "epoch": 0.39018569617421134, "grad_norm": 1.9110087156295776, "learning_rate": 1.3939261192524708e-05, "loss": 0.5939, "step": 2616 }, { "epoch": 0.39033484972779475, "grad_norm": 1.4104758501052856, "learning_rate": 1.3934819683827457e-05, "loss": 0.6672, "step": 2617 }, { "epoch": 0.39048400328137817, "grad_norm": 1.3371906280517578, "learning_rate": 1.393037725652132e-05, "loss": 0.6242, "step": 2618 }, { "epoch": 0.3906331568349616, "grad_norm": 1.4720473289489746, "learning_rate": 1.3925933911643415e-05, "loss": 0.6279, "step": 2619 }, { "epoch": 0.390782310388545, "grad_norm": 1.3025965690612793, "learning_rate": 1.3921489650231061e-05, "loss": 0.5466, "step": 2620 }, { "epoch": 0.3909314639421284, "grad_norm": 1.6404949426651, "learning_rate": 1.3917044473321805e-05, "loss": 0.604, "step": 2621 }, { "epoch": 0.3910806174957118, "grad_norm": 1.3354896306991577, "learning_rate": 1.39125983819534e-05, "loss": 0.5616, "step": 2622 }, { "epoch": 0.39122977104929524, "grad_norm": 1.2983475923538208, "learning_rate": 1.3908151377163815e-05, "loss": 0.541, "step": 2623 }, { "epoch": 0.39137892460287865, "grad_norm": 1.4688913822174072, "learning_rate": 1.3903703459991234e-05, "loss": 0.6203, "step": 2624 }, { "epoch": 0.39152807815646207, "grad_norm": 1.3325077295303345, "learning_rate": 1.3899254631474048e-05, "loss": 0.641, "step": 2625 }, { "epoch": 0.3916772317100455, "grad_norm": 1.4229092597961426, "learning_rate": 1.3894804892650864e-05, "loss": 0.6303, "step": 2626 }, { "epoch": 0.3918263852636289, "grad_norm": 1.3600205183029175, "learning_rate": 1.3890354244560507e-05, "loss": 0.6209, "step": 2627 }, { "epoch": 0.3919755388172123, "grad_norm": 2.2685117721557617, "learning_rate": 1.3885902688242006e-05, "loss": 0.5351, "step": 2628 }, { "epoch": 0.3921246923707957, "grad_norm": 1.3890990018844604, "learning_rate": 1.3881450224734604e-05, "loss": 0.6661, "step": 2629 }, { "epoch": 0.39227384592437914, "grad_norm": 1.3795175552368164, "learning_rate": 1.3876996855077763e-05, "loss": 0.5613, "step": 2630 }, { "epoch": 0.39242299947796255, "grad_norm": 1.6138033866882324, "learning_rate": 1.3872542580311144e-05, "loss": 0.668, "step": 2631 }, { "epoch": 0.39257215303154597, "grad_norm": 1.0314300060272217, "learning_rate": 1.3868087401474628e-05, "loss": 0.6631, "step": 2632 }, { "epoch": 0.3927213065851294, "grad_norm": 1.2766019105911255, "learning_rate": 1.3863631319608306e-05, "loss": 0.6142, "step": 2633 }, { "epoch": 0.3928704601387128, "grad_norm": 1.337773323059082, "learning_rate": 1.385917433575248e-05, "loss": 0.5428, "step": 2634 }, { "epoch": 0.3930196136922962, "grad_norm": 2.1536049842834473, "learning_rate": 1.3854716450947658e-05, "loss": 0.6996, "step": 2635 }, { "epoch": 0.3931687672458796, "grad_norm": 1.4842548370361328, "learning_rate": 1.3850257666234569e-05, "loss": 0.6126, "step": 2636 }, { "epoch": 0.39331792079946304, "grad_norm": 1.5764963626861572, "learning_rate": 1.3845797982654134e-05, "loss": 0.6281, "step": 2637 }, { "epoch": 0.39346707435304645, "grad_norm": 1.2192084789276123, "learning_rate": 1.3841337401247503e-05, "loss": 0.5496, "step": 2638 }, { "epoch": 0.39361622790662987, "grad_norm": 2.33760666847229, "learning_rate": 1.3836875923056026e-05, "loss": 0.6265, "step": 2639 }, { "epoch": 0.3937653814602133, "grad_norm": 0.9894026517868042, "learning_rate": 1.383241354912126e-05, "loss": 0.7006, "step": 2640 }, { "epoch": 0.3939145350137967, "grad_norm": 1.4342260360717773, "learning_rate": 1.3827950280484981e-05, "loss": 0.6678, "step": 2641 }, { "epoch": 0.3940636885673801, "grad_norm": 1.3512144088745117, "learning_rate": 1.382348611818916e-05, "loss": 0.5963, "step": 2642 }, { "epoch": 0.3942128421209635, "grad_norm": 1.4554380178451538, "learning_rate": 1.381902106327599e-05, "loss": 0.6169, "step": 2643 }, { "epoch": 0.39436199567454694, "grad_norm": 1.3002437353134155, "learning_rate": 1.3814555116787864e-05, "loss": 0.5741, "step": 2644 }, { "epoch": 0.39451114922813035, "grad_norm": 1.2582083940505981, "learning_rate": 1.3810088279767389e-05, "loss": 0.5929, "step": 2645 }, { "epoch": 0.39466030278171377, "grad_norm": 1.4374549388885498, "learning_rate": 1.3805620553257374e-05, "loss": 0.5159, "step": 2646 }, { "epoch": 0.3948094563352972, "grad_norm": 1.477744698524475, "learning_rate": 1.380115193830084e-05, "loss": 0.6169, "step": 2647 }, { "epoch": 0.3949586098888806, "grad_norm": 1.2359845638275146, "learning_rate": 1.379668243594101e-05, "loss": 0.5181, "step": 2648 }, { "epoch": 0.395107763442464, "grad_norm": 1.4396018981933594, "learning_rate": 1.3792212047221326e-05, "loss": 0.6568, "step": 2649 }, { "epoch": 0.3952569169960474, "grad_norm": 1.3883228302001953, "learning_rate": 1.3787740773185418e-05, "loss": 0.6157, "step": 2650 }, { "epoch": 0.39540607054963084, "grad_norm": 1.554826021194458, "learning_rate": 1.3783268614877144e-05, "loss": 0.5331, "step": 2651 }, { "epoch": 0.39555522410321425, "grad_norm": 1.2838960886001587, "learning_rate": 1.3778795573340551e-05, "loss": 0.6439, "step": 2652 }, { "epoch": 0.39570437765679767, "grad_norm": 1.403650164604187, "learning_rate": 1.3774321649619902e-05, "loss": 0.5924, "step": 2653 }, { "epoch": 0.3958535312103811, "grad_norm": 1.7345750331878662, "learning_rate": 1.376984684475966e-05, "loss": 0.5545, "step": 2654 }, { "epoch": 0.3960026847639645, "grad_norm": 1.256150245666504, "learning_rate": 1.3765371159804503e-05, "loss": 0.613, "step": 2655 }, { "epoch": 0.3961518383175479, "grad_norm": 1.6497026681900024, "learning_rate": 1.3760894595799305e-05, "loss": 0.6703, "step": 2656 }, { "epoch": 0.3963009918711313, "grad_norm": 1.470178246498108, "learning_rate": 1.3756417153789148e-05, "loss": 0.5983, "step": 2657 }, { "epoch": 0.39645014542471474, "grad_norm": 1.412251353263855, "learning_rate": 1.375193883481932e-05, "loss": 0.644, "step": 2658 }, { "epoch": 0.39659929897829815, "grad_norm": 1.4804946184158325, "learning_rate": 1.3747459639935312e-05, "loss": 0.5414, "step": 2659 }, { "epoch": 0.39674845253188157, "grad_norm": 1.2949097156524658, "learning_rate": 1.3742979570182827e-05, "loss": 0.5368, "step": 2660 }, { "epoch": 0.396897606085465, "grad_norm": 1.2574301958084106, "learning_rate": 1.3738498626607758e-05, "loss": 0.5985, "step": 2661 }, { "epoch": 0.3970467596390484, "grad_norm": 1.4499951601028442, "learning_rate": 1.3734016810256213e-05, "loss": 0.554, "step": 2662 }, { "epoch": 0.3971959131926318, "grad_norm": 1.2230170965194702, "learning_rate": 1.37295341221745e-05, "loss": 0.6357, "step": 2663 }, { "epoch": 0.3973450667462152, "grad_norm": 1.361437201499939, "learning_rate": 1.3725050563409135e-05, "loss": 0.5908, "step": 2664 }, { "epoch": 0.39749422029979864, "grad_norm": 1.8221193552017212, "learning_rate": 1.372056613500683e-05, "loss": 0.5486, "step": 2665 }, { "epoch": 0.39764337385338205, "grad_norm": 1.47232186794281, "learning_rate": 1.37160808380145e-05, "loss": 0.6234, "step": 2666 }, { "epoch": 0.39779252740696547, "grad_norm": 1.463076114654541, "learning_rate": 1.3711594673479279e-05, "loss": 0.6031, "step": 2667 }, { "epoch": 0.3979416809605489, "grad_norm": 1.3208192586898804, "learning_rate": 1.3707107642448477e-05, "loss": 0.6324, "step": 2668 }, { "epoch": 0.3980908345141323, "grad_norm": 1.5105957984924316, "learning_rate": 1.3702619745969628e-05, "loss": 0.6509, "step": 2669 }, { "epoch": 0.3982399880677157, "grad_norm": 1.5486592054367065, "learning_rate": 1.3698130985090455e-05, "loss": 0.6952, "step": 2670 }, { "epoch": 0.3983891416212991, "grad_norm": 1.165446400642395, "learning_rate": 1.3693641360858891e-05, "loss": 0.5551, "step": 2671 }, { "epoch": 0.39853829517488254, "grad_norm": 1.4531687498092651, "learning_rate": 1.3689150874323072e-05, "loss": 0.5642, "step": 2672 }, { "epoch": 0.39868744872846595, "grad_norm": 1.3231862783432007, "learning_rate": 1.368465952653132e-05, "loss": 0.6617, "step": 2673 }, { "epoch": 0.39883660228204937, "grad_norm": 1.4776737689971924, "learning_rate": 1.3680167318532182e-05, "loss": 0.6368, "step": 2674 }, { "epoch": 0.3989857558356328, "grad_norm": 2.1385622024536133, "learning_rate": 1.3675674251374382e-05, "loss": 0.561, "step": 2675 }, { "epoch": 0.3991349093892162, "grad_norm": 0.966179609298706, "learning_rate": 1.367118032610686e-05, "loss": 0.6967, "step": 2676 }, { "epoch": 0.3992840629427996, "grad_norm": 1.8057880401611328, "learning_rate": 1.3666685543778755e-05, "loss": 0.6272, "step": 2677 }, { "epoch": 0.399433216496383, "grad_norm": 1.3206974267959595, "learning_rate": 1.3662189905439394e-05, "loss": 0.6346, "step": 2678 }, { "epoch": 0.39958237004996644, "grad_norm": 1.3336271047592163, "learning_rate": 1.3657693412138318e-05, "loss": 0.6781, "step": 2679 }, { "epoch": 0.39973152360354985, "grad_norm": 1.3197405338287354, "learning_rate": 1.3653196064925264e-05, "loss": 0.5748, "step": 2680 }, { "epoch": 0.39988067715713327, "grad_norm": 1.6003574132919312, "learning_rate": 1.3648697864850162e-05, "loss": 0.6503, "step": 2681 }, { "epoch": 0.4000298307107167, "grad_norm": 1.7071232795715332, "learning_rate": 1.364419881296315e-05, "loss": 0.5737, "step": 2682 }, { "epoch": 0.4001789842643001, "grad_norm": 1.2804206609725952, "learning_rate": 1.3639698910314556e-05, "loss": 0.6406, "step": 2683 }, { "epoch": 0.4003281378178835, "grad_norm": 1.5004640817642212, "learning_rate": 1.3635198157954915e-05, "loss": 0.6277, "step": 2684 }, { "epoch": 0.4004772913714669, "grad_norm": 1.1806477308273315, "learning_rate": 1.3630696556934955e-05, "loss": 0.5593, "step": 2685 }, { "epoch": 0.40062644492505034, "grad_norm": 1.5256479978561401, "learning_rate": 1.3626194108305606e-05, "loss": 0.6303, "step": 2686 }, { "epoch": 0.40077559847863375, "grad_norm": 1.806844711303711, "learning_rate": 1.3621690813117987e-05, "loss": 0.6088, "step": 2687 }, { "epoch": 0.40092475203221717, "grad_norm": 1.3573462963104248, "learning_rate": 1.3617186672423426e-05, "loss": 0.5822, "step": 2688 }, { "epoch": 0.4010739055858006, "grad_norm": 1.2354780435562134, "learning_rate": 1.3612681687273445e-05, "loss": 0.6436, "step": 2689 }, { "epoch": 0.401223059139384, "grad_norm": 1.460715889930725, "learning_rate": 1.3608175858719757e-05, "loss": 0.6085, "step": 2690 }, { "epoch": 0.4013722126929674, "grad_norm": 1.5280369520187378, "learning_rate": 1.360366918781428e-05, "loss": 0.4766, "step": 2691 }, { "epoch": 0.4015213662465508, "grad_norm": 1.504925012588501, "learning_rate": 1.3599161675609125e-05, "loss": 0.6927, "step": 2692 }, { "epoch": 0.40167051980013424, "grad_norm": 1.3781099319458008, "learning_rate": 1.3594653323156597e-05, "loss": 0.6041, "step": 2693 }, { "epoch": 0.40181967335371765, "grad_norm": 1.546555757522583, "learning_rate": 1.3590144131509205e-05, "loss": 0.5739, "step": 2694 }, { "epoch": 0.40196882690730107, "grad_norm": 1.5760716199874878, "learning_rate": 1.3585634101719642e-05, "loss": 0.6679, "step": 2695 }, { "epoch": 0.4021179804608845, "grad_norm": 1.3745776414871216, "learning_rate": 1.3581123234840807e-05, "loss": 0.6366, "step": 2696 }, { "epoch": 0.4022671340144679, "grad_norm": 0.8694431781768799, "learning_rate": 1.3576611531925791e-05, "loss": 0.6592, "step": 2697 }, { "epoch": 0.4024162875680513, "grad_norm": 1.3872408866882324, "learning_rate": 1.357209899402788e-05, "loss": 0.6415, "step": 2698 }, { "epoch": 0.4025654411216347, "grad_norm": 1.539610505104065, "learning_rate": 1.3567585622200556e-05, "loss": 0.6898, "step": 2699 }, { "epoch": 0.40271459467521814, "grad_norm": 1.7593547105789185, "learning_rate": 1.3563071417497493e-05, "loss": 0.5533, "step": 2700 }, { "epoch": 0.40286374822880155, "grad_norm": 1.3431875705718994, "learning_rate": 1.3558556380972555e-05, "loss": 0.5648, "step": 2701 }, { "epoch": 0.40301290178238497, "grad_norm": 1.349142074584961, "learning_rate": 1.3554040513679821e-05, "loss": 0.6113, "step": 2702 }, { "epoch": 0.4031620553359684, "grad_norm": 1.343367099761963, "learning_rate": 1.3549523816673536e-05, "loss": 0.5916, "step": 2703 }, { "epoch": 0.4033112088895518, "grad_norm": 1.4670281410217285, "learning_rate": 1.3545006291008155e-05, "loss": 0.6778, "step": 2704 }, { "epoch": 0.4034603624431352, "grad_norm": 1.5022770166397095, "learning_rate": 1.3540487937738327e-05, "loss": 0.658, "step": 2705 }, { "epoch": 0.4036095159967186, "grad_norm": 0.8477670550346375, "learning_rate": 1.3535968757918887e-05, "loss": 0.6703, "step": 2706 }, { "epoch": 0.40375866955030204, "grad_norm": 1.3171154260635376, "learning_rate": 1.3531448752604867e-05, "loss": 0.6321, "step": 2707 }, { "epoch": 0.40390782310388545, "grad_norm": 1.3761805295944214, "learning_rate": 1.3526927922851495e-05, "loss": 0.5536, "step": 2708 }, { "epoch": 0.40405697665746887, "grad_norm": 1.3516720533370972, "learning_rate": 1.3522406269714182e-05, "loss": 0.6237, "step": 2709 }, { "epoch": 0.4042061302110523, "grad_norm": 2.2637505531311035, "learning_rate": 1.3517883794248539e-05, "loss": 0.6651, "step": 2710 }, { "epoch": 0.4043552837646357, "grad_norm": 1.3516602516174316, "learning_rate": 1.351336049751037e-05, "loss": 0.6227, "step": 2711 }, { "epoch": 0.4045044373182191, "grad_norm": 1.4307539463043213, "learning_rate": 1.3508836380555662e-05, "loss": 0.6422, "step": 2712 }, { "epoch": 0.4046535908718025, "grad_norm": 1.274104356765747, "learning_rate": 1.3504311444440605e-05, "loss": 0.589, "step": 2713 }, { "epoch": 0.40480274442538594, "grad_norm": 1.2545112371444702, "learning_rate": 1.3499785690221571e-05, "loss": 0.5294, "step": 2714 }, { "epoch": 0.40495189797896936, "grad_norm": 1.2452549934387207, "learning_rate": 1.3495259118955124e-05, "loss": 0.5371, "step": 2715 }, { "epoch": 0.40510105153255277, "grad_norm": 1.5214576721191406, "learning_rate": 1.3490731731698028e-05, "loss": 0.5916, "step": 2716 }, { "epoch": 0.4052502050861362, "grad_norm": 1.6115037202835083, "learning_rate": 1.3486203529507225e-05, "loss": 0.6266, "step": 2717 }, { "epoch": 0.4053993586397196, "grad_norm": 1.325646162033081, "learning_rate": 1.3481674513439853e-05, "loss": 0.6597, "step": 2718 }, { "epoch": 0.405548512193303, "grad_norm": 1.3816797733306885, "learning_rate": 1.3477144684553243e-05, "loss": 0.6524, "step": 2719 }, { "epoch": 0.4056976657468864, "grad_norm": 1.330805778503418, "learning_rate": 1.347261404390491e-05, "loss": 0.557, "step": 2720 }, { "epoch": 0.40584681930046984, "grad_norm": 1.2392902374267578, "learning_rate": 1.3468082592552562e-05, "loss": 0.5937, "step": 2721 }, { "epoch": 0.40599597285405326, "grad_norm": 1.3355604410171509, "learning_rate": 1.3463550331554096e-05, "loss": 0.6466, "step": 2722 }, { "epoch": 0.40614512640763667, "grad_norm": 1.2675881385803223, "learning_rate": 1.3459017261967593e-05, "loss": 0.5976, "step": 2723 }, { "epoch": 0.4062942799612201, "grad_norm": 1.3453642129898071, "learning_rate": 1.3454483384851335e-05, "loss": 0.5438, "step": 2724 }, { "epoch": 0.4064434335148035, "grad_norm": 1.4643280506134033, "learning_rate": 1.3449948701263782e-05, "loss": 0.6101, "step": 2725 }, { "epoch": 0.4065925870683869, "grad_norm": 1.5467309951782227, "learning_rate": 1.344541321226358e-05, "loss": 0.6117, "step": 2726 }, { "epoch": 0.4067417406219703, "grad_norm": 4.094663143157959, "learning_rate": 1.3440876918909571e-05, "loss": 0.6391, "step": 2727 }, { "epoch": 0.40689089417555374, "grad_norm": 1.3700027465820312, "learning_rate": 1.3436339822260785e-05, "loss": 0.6121, "step": 2728 }, { "epoch": 0.40704004772913716, "grad_norm": 1.1934555768966675, "learning_rate": 1.343180192337643e-05, "loss": 0.5531, "step": 2729 }, { "epoch": 0.40718920128272057, "grad_norm": 1.435781478881836, "learning_rate": 1.3427263223315916e-05, "loss": 0.6227, "step": 2730 }, { "epoch": 0.407338354836304, "grad_norm": 1.4622364044189453, "learning_rate": 1.3422723723138824e-05, "loss": 0.5498, "step": 2731 }, { "epoch": 0.4074875083898874, "grad_norm": 1.4165250062942505, "learning_rate": 1.3418183423904931e-05, "loss": 0.5602, "step": 2732 }, { "epoch": 0.4076366619434708, "grad_norm": 0.9462141394615173, "learning_rate": 1.34136423266742e-05, "loss": 0.6909, "step": 2733 }, { "epoch": 0.4077858154970542, "grad_norm": 1.3482221364974976, "learning_rate": 1.3409100432506783e-05, "loss": 0.6333, "step": 2734 }, { "epoch": 0.40793496905063764, "grad_norm": 1.5598130226135254, "learning_rate": 1.3404557742463009e-05, "loss": 0.565, "step": 2735 }, { "epoch": 0.40808412260422106, "grad_norm": 1.7951252460479736, "learning_rate": 1.3400014257603399e-05, "loss": 0.4953, "step": 2736 }, { "epoch": 0.40823327615780447, "grad_norm": 1.431670904159546, "learning_rate": 1.339546997898866e-05, "loss": 0.5831, "step": 2737 }, { "epoch": 0.4083824297113879, "grad_norm": 1.4956104755401611, "learning_rate": 1.3390924907679683e-05, "loss": 0.6287, "step": 2738 }, { "epoch": 0.4085315832649713, "grad_norm": 1.2598068714141846, "learning_rate": 1.3386379044737545e-05, "loss": 0.6379, "step": 2739 }, { "epoch": 0.4086807368185547, "grad_norm": 1.609480381011963, "learning_rate": 1.3381832391223499e-05, "loss": 0.6348, "step": 2740 }, { "epoch": 0.4088298903721381, "grad_norm": 1.2810381650924683, "learning_rate": 1.3377284948199006e-05, "loss": 0.6231, "step": 2741 }, { "epoch": 0.40897904392572154, "grad_norm": 1.5874565839767456, "learning_rate": 1.337273671672568e-05, "loss": 0.551, "step": 2742 }, { "epoch": 0.40912819747930496, "grad_norm": 1.2737326622009277, "learning_rate": 1.3368187697865342e-05, "loss": 0.5811, "step": 2743 }, { "epoch": 0.40927735103288837, "grad_norm": 1.919179081916809, "learning_rate": 1.336363789267999e-05, "loss": 0.6371, "step": 2744 }, { "epoch": 0.4094265045864718, "grad_norm": 1.398988962173462, "learning_rate": 1.3359087302231806e-05, "loss": 0.6401, "step": 2745 }, { "epoch": 0.4095756581400552, "grad_norm": 1.9539378881454468, "learning_rate": 1.3354535927583153e-05, "loss": 0.6042, "step": 2746 }, { "epoch": 0.4097248116936386, "grad_norm": 1.3294740915298462, "learning_rate": 1.3349983769796574e-05, "loss": 0.6158, "step": 2747 }, { "epoch": 0.409873965247222, "grad_norm": 1.6293931007385254, "learning_rate": 1.3345430829934806e-05, "loss": 0.5177, "step": 2748 }, { "epoch": 0.41002311880080544, "grad_norm": 1.3363049030303955, "learning_rate": 1.3340877109060762e-05, "loss": 0.5965, "step": 2749 }, { "epoch": 0.41017227235438886, "grad_norm": 1.4314886331558228, "learning_rate": 1.3336322608237534e-05, "loss": 0.6099, "step": 2750 }, { "epoch": 0.41032142590797227, "grad_norm": 1.304391622543335, "learning_rate": 1.3331767328528398e-05, "loss": 0.5853, "step": 2751 }, { "epoch": 0.4104705794615557, "grad_norm": 1.2416895627975464, "learning_rate": 1.3327211270996818e-05, "loss": 0.6388, "step": 2752 }, { "epoch": 0.4106197330151391, "grad_norm": 1.370160698890686, "learning_rate": 1.332265443670643e-05, "loss": 0.6262, "step": 2753 }, { "epoch": 0.4107688865687225, "grad_norm": 1.2026695013046265, "learning_rate": 1.3318096826721061e-05, "loss": 0.5891, "step": 2754 }, { "epoch": 0.4109180401223059, "grad_norm": 1.5032657384872437, "learning_rate": 1.3313538442104714e-05, "loss": 0.5749, "step": 2755 }, { "epoch": 0.41106719367588934, "grad_norm": 1.2721303701400757, "learning_rate": 1.3308979283921568e-05, "loss": 0.6182, "step": 2756 }, { "epoch": 0.41121634722947276, "grad_norm": 1.4179010391235352, "learning_rate": 1.3304419353235991e-05, "loss": 0.5877, "step": 2757 }, { "epoch": 0.41136550078305617, "grad_norm": 1.452286720275879, "learning_rate": 1.3299858651112529e-05, "loss": 0.5615, "step": 2758 }, { "epoch": 0.4115146543366396, "grad_norm": 1.4062329530715942, "learning_rate": 1.3295297178615904e-05, "loss": 0.5066, "step": 2759 }, { "epoch": 0.411663807890223, "grad_norm": 1.3202285766601562, "learning_rate": 1.3290734936811027e-05, "loss": 0.6641, "step": 2760 }, { "epoch": 0.4118129614438064, "grad_norm": 1.3979346752166748, "learning_rate": 1.3286171926762977e-05, "loss": 0.6016, "step": 2761 }, { "epoch": 0.4119621149973898, "grad_norm": 1.7437498569488525, "learning_rate": 1.3281608149537018e-05, "loss": 0.6705, "step": 2762 }, { "epoch": 0.41211126855097324, "grad_norm": 1.4110934734344482, "learning_rate": 1.3277043606198596e-05, "loss": 0.6843, "step": 2763 }, { "epoch": 0.41226042210455666, "grad_norm": 2.0222909450531006, "learning_rate": 1.3272478297813334e-05, "loss": 0.7121, "step": 2764 }, { "epoch": 0.41240957565814007, "grad_norm": 1.6030899286270142, "learning_rate": 1.3267912225447026e-05, "loss": 0.6328, "step": 2765 }, { "epoch": 0.4125587292117235, "grad_norm": 3.5489392280578613, "learning_rate": 1.3263345390165654e-05, "loss": 0.6248, "step": 2766 }, { "epoch": 0.4127078827653069, "grad_norm": 1.511398196220398, "learning_rate": 1.325877779303538e-05, "loss": 0.6231, "step": 2767 }, { "epoch": 0.4128570363188903, "grad_norm": 1.4233312606811523, "learning_rate": 1.3254209435122533e-05, "loss": 0.636, "step": 2768 }, { "epoch": 0.41300618987247373, "grad_norm": 1.235385537147522, "learning_rate": 1.3249640317493628e-05, "loss": 0.5282, "step": 2769 }, { "epoch": 0.41315534342605714, "grad_norm": 1.3324297666549683, "learning_rate": 1.3245070441215355e-05, "loss": 0.6178, "step": 2770 }, { "epoch": 0.41330449697964056, "grad_norm": 1.2130330801010132, "learning_rate": 1.3240499807354577e-05, "loss": 0.5933, "step": 2771 }, { "epoch": 0.41345365053322397, "grad_norm": 1.755144476890564, "learning_rate": 1.3235928416978343e-05, "loss": 0.7189, "step": 2772 }, { "epoch": 0.4136028040868074, "grad_norm": 1.4428461790084839, "learning_rate": 1.323135627115387e-05, "loss": 0.6426, "step": 2773 }, { "epoch": 0.4137519576403908, "grad_norm": 1.4827487468719482, "learning_rate": 1.3226783370948559e-05, "loss": 0.6068, "step": 2774 }, { "epoch": 0.4139011111939742, "grad_norm": 1.4808746576309204, "learning_rate": 1.3222209717429974e-05, "loss": 0.6247, "step": 2775 }, { "epoch": 0.41405026474755763, "grad_norm": 1.587462067604065, "learning_rate": 1.3217635311665876e-05, "loss": 0.6682, "step": 2776 }, { "epoch": 0.41419941830114104, "grad_norm": 1.7389107942581177, "learning_rate": 1.3213060154724179e-05, "loss": 0.6504, "step": 2777 }, { "epoch": 0.41434857185472446, "grad_norm": 1.4747099876403809, "learning_rate": 1.3208484247672988e-05, "loss": 0.6318, "step": 2778 }, { "epoch": 0.41449772540830787, "grad_norm": 0.873177170753479, "learning_rate": 1.3203907591580573e-05, "loss": 0.6868, "step": 2779 }, { "epoch": 0.4146468789618913, "grad_norm": 1.4342392683029175, "learning_rate": 1.3199330187515391e-05, "loss": 0.585, "step": 2780 }, { "epoch": 0.4147960325154747, "grad_norm": 2.7979955673217773, "learning_rate": 1.3194752036546063e-05, "loss": 0.6077, "step": 2781 }, { "epoch": 0.4149451860690581, "grad_norm": 1.6348875761032104, "learning_rate": 1.3190173139741384e-05, "loss": 0.5689, "step": 2782 }, { "epoch": 0.41509433962264153, "grad_norm": 1.4440720081329346, "learning_rate": 1.3185593498170334e-05, "loss": 0.5552, "step": 2783 }, { "epoch": 0.41524349317622494, "grad_norm": 1.1966913938522339, "learning_rate": 1.3181013112902052e-05, "loss": 0.5538, "step": 2784 }, { "epoch": 0.41539264672980836, "grad_norm": 1.3341851234436035, "learning_rate": 1.3176431985005864e-05, "loss": 0.6087, "step": 2785 }, { "epoch": 0.41554180028339177, "grad_norm": 1.286266803741455, "learning_rate": 1.317185011555126e-05, "loss": 0.6196, "step": 2786 }, { "epoch": 0.4156909538369752, "grad_norm": 1.652770757675171, "learning_rate": 1.316726750560791e-05, "loss": 0.6234, "step": 2787 }, { "epoch": 0.4158401073905586, "grad_norm": 1.254477620124817, "learning_rate": 1.3162684156245654e-05, "loss": 0.5359, "step": 2788 }, { "epoch": 0.415989260944142, "grad_norm": 1.5677679777145386, "learning_rate": 1.31581000685345e-05, "loss": 0.6611, "step": 2789 }, { "epoch": 0.41613841449772543, "grad_norm": 1.765747308731079, "learning_rate": 1.3153515243544635e-05, "loss": 0.6023, "step": 2790 }, { "epoch": 0.41628756805130884, "grad_norm": 1.2475675344467163, "learning_rate": 1.3148929682346418e-05, "loss": 0.5394, "step": 2791 }, { "epoch": 0.41643672160489226, "grad_norm": 1.3167380094528198, "learning_rate": 1.3144343386010375e-05, "loss": 0.6225, "step": 2792 }, { "epoch": 0.41658587515847567, "grad_norm": 1.3290250301361084, "learning_rate": 1.3139756355607203e-05, "loss": 0.6077, "step": 2793 }, { "epoch": 0.4167350287120591, "grad_norm": 1.312089443206787, "learning_rate": 1.3135168592207781e-05, "loss": 0.561, "step": 2794 }, { "epoch": 0.4168841822656425, "grad_norm": 1.3227812051773071, "learning_rate": 1.313058009688315e-05, "loss": 0.5464, "step": 2795 }, { "epoch": 0.4170333358192259, "grad_norm": 1.4278802871704102, "learning_rate": 1.312599087070452e-05, "loss": 0.5671, "step": 2796 }, { "epoch": 0.41718248937280933, "grad_norm": 1.9599859714508057, "learning_rate": 1.3121400914743275e-05, "loss": 0.5337, "step": 2797 }, { "epoch": 0.41733164292639274, "grad_norm": 0.8422990441322327, "learning_rate": 1.3116810230070976e-05, "loss": 0.6499, "step": 2798 }, { "epoch": 0.41748079647997616, "grad_norm": 0.9003839492797852, "learning_rate": 1.3112218817759338e-05, "loss": 0.6515, "step": 2799 }, { "epoch": 0.41762995003355957, "grad_norm": 1.4549075365066528, "learning_rate": 1.3107626678880267e-05, "loss": 0.6125, "step": 2800 }, { "epoch": 0.417779103587143, "grad_norm": 0.9237825274467468, "learning_rate": 1.3103033814505817e-05, "loss": 0.7053, "step": 2801 }, { "epoch": 0.4179282571407264, "grad_norm": 1.3549556732177734, "learning_rate": 1.3098440225708232e-05, "loss": 0.5639, "step": 2802 }, { "epoch": 0.4180774106943098, "grad_norm": 1.5905383825302124, "learning_rate": 1.3093845913559906e-05, "loss": 0.5679, "step": 2803 }, { "epoch": 0.41822656424789323, "grad_norm": 1.2860442399978638, "learning_rate": 1.3089250879133412e-05, "loss": 0.5814, "step": 2804 }, { "epoch": 0.41837571780147664, "grad_norm": 1.541646122932434, "learning_rate": 1.3084655123501495e-05, "loss": 0.6174, "step": 2805 }, { "epoch": 0.41852487135506006, "grad_norm": 0.9077971577644348, "learning_rate": 1.3080058647737058e-05, "loss": 0.7115, "step": 2806 }, { "epoch": 0.41867402490864347, "grad_norm": 1.2618132829666138, "learning_rate": 1.3075461452913181e-05, "loss": 0.5653, "step": 2807 }, { "epoch": 0.4188231784622269, "grad_norm": 1.2992109060287476, "learning_rate": 1.307086354010311e-05, "loss": 0.5624, "step": 2808 }, { "epoch": 0.4189723320158103, "grad_norm": 1.3207539319992065, "learning_rate": 1.3066264910380251e-05, "loss": 0.5496, "step": 2809 }, { "epoch": 0.4191214855693937, "grad_norm": 1.465536117553711, "learning_rate": 1.306166556481819e-05, "loss": 0.6101, "step": 2810 }, { "epoch": 0.41927063912297713, "grad_norm": 1.405893087387085, "learning_rate": 1.3057065504490672e-05, "loss": 0.6381, "step": 2811 }, { "epoch": 0.41941979267656054, "grad_norm": 1.3882068395614624, "learning_rate": 1.3052464730471607e-05, "loss": 0.7081, "step": 2812 }, { "epoch": 0.41956894623014396, "grad_norm": 1.5246784687042236, "learning_rate": 1.3047863243835081e-05, "loss": 0.5786, "step": 2813 }, { "epoch": 0.41971809978372737, "grad_norm": 1.4612220525741577, "learning_rate": 1.3043261045655338e-05, "loss": 0.6162, "step": 2814 }, { "epoch": 0.4198672533373108, "grad_norm": 1.4033594131469727, "learning_rate": 1.3038658137006788e-05, "loss": 0.6911, "step": 2815 }, { "epoch": 0.4200164068908942, "grad_norm": 1.4004828929901123, "learning_rate": 1.3034054518964014e-05, "loss": 0.5759, "step": 2816 }, { "epoch": 0.4201655604444776, "grad_norm": 1.143629789352417, "learning_rate": 1.3029450192601758e-05, "loss": 0.5846, "step": 2817 }, { "epoch": 0.42031471399806103, "grad_norm": 1.3830417394638062, "learning_rate": 1.3024845158994927e-05, "loss": 0.615, "step": 2818 }, { "epoch": 0.42046386755164444, "grad_norm": 1.320692539215088, "learning_rate": 1.30202394192186e-05, "loss": 0.602, "step": 2819 }, { "epoch": 0.42061302110522786, "grad_norm": 1.3892267942428589, "learning_rate": 1.3015632974348015e-05, "loss": 0.6357, "step": 2820 }, { "epoch": 0.42076217465881127, "grad_norm": 1.3793569803237915, "learning_rate": 1.3011025825458576e-05, "loss": 0.6064, "step": 2821 }, { "epoch": 0.4209113282123947, "grad_norm": 1.7797636985778809, "learning_rate": 1.3006417973625853e-05, "loss": 0.6232, "step": 2822 }, { "epoch": 0.4210604817659781, "grad_norm": 1.192082405090332, "learning_rate": 1.3001809419925575e-05, "loss": 0.5098, "step": 2823 }, { "epoch": 0.4212096353195615, "grad_norm": 1.4696162939071655, "learning_rate": 1.2997200165433639e-05, "loss": 0.6522, "step": 2824 }, { "epoch": 0.42135878887314493, "grad_norm": 1.8172860145568848, "learning_rate": 1.2992590211226106e-05, "loss": 0.6404, "step": 2825 }, { "epoch": 0.42150794242672834, "grad_norm": 1.4928392171859741, "learning_rate": 1.29879795583792e-05, "loss": 0.6105, "step": 2826 }, { "epoch": 0.42165709598031176, "grad_norm": 1.3091026544570923, "learning_rate": 1.2983368207969309e-05, "loss": 0.5389, "step": 2827 }, { "epoch": 0.4218062495338952, "grad_norm": 1.342060923576355, "learning_rate": 1.2978756161072978e-05, "loss": 0.6079, "step": 2828 }, { "epoch": 0.4219554030874786, "grad_norm": 1.4563429355621338, "learning_rate": 1.2974143418766922e-05, "loss": 0.6102, "step": 2829 }, { "epoch": 0.422104556641062, "grad_norm": 0.9329102039337158, "learning_rate": 1.2969529982128017e-05, "loss": 0.7333, "step": 2830 }, { "epoch": 0.42225371019464536, "grad_norm": 1.4475524425506592, "learning_rate": 1.2964915852233295e-05, "loss": 0.6584, "step": 2831 }, { "epoch": 0.4224028637482288, "grad_norm": 2.129635810852051, "learning_rate": 1.2960301030159955e-05, "loss": 0.5962, "step": 2832 }, { "epoch": 0.4225520173018122, "grad_norm": 1.4403536319732666, "learning_rate": 1.295568551698536e-05, "loss": 0.5234, "step": 2833 }, { "epoch": 0.4227011708553956, "grad_norm": 1.4860936403274536, "learning_rate": 1.2951069313787029e-05, "loss": 0.6096, "step": 2834 }, { "epoch": 0.422850324408979, "grad_norm": 1.45269775390625, "learning_rate": 1.2946452421642643e-05, "loss": 0.6121, "step": 2835 }, { "epoch": 0.42299947796256243, "grad_norm": 1.304847240447998, "learning_rate": 1.2941834841630046e-05, "loss": 0.6278, "step": 2836 }, { "epoch": 0.42314863151614585, "grad_norm": 1.6705883741378784, "learning_rate": 1.2937216574827245e-05, "loss": 0.6483, "step": 2837 }, { "epoch": 0.42329778506972926, "grad_norm": 1.2926887273788452, "learning_rate": 1.2932597622312396e-05, "loss": 0.6135, "step": 2838 }, { "epoch": 0.4234469386233127, "grad_norm": 1.3682975769042969, "learning_rate": 1.2927977985163834e-05, "loss": 0.6241, "step": 2839 }, { "epoch": 0.4235960921768961, "grad_norm": 1.331040859222412, "learning_rate": 1.2923357664460032e-05, "loss": 0.61, "step": 2840 }, { "epoch": 0.4237452457304795, "grad_norm": 1.2633788585662842, "learning_rate": 1.291873666127964e-05, "loss": 0.6458, "step": 2841 }, { "epoch": 0.4238943992840629, "grad_norm": 1.5853201150894165, "learning_rate": 1.2914114976701463e-05, "loss": 0.5437, "step": 2842 }, { "epoch": 0.42404355283764633, "grad_norm": 1.3664401769638062, "learning_rate": 1.2909492611804455e-05, "loss": 0.6536, "step": 2843 }, { "epoch": 0.42419270639122975, "grad_norm": 1.3735769987106323, "learning_rate": 1.2904869567667743e-05, "loss": 0.5913, "step": 2844 }, { "epoch": 0.42434185994481316, "grad_norm": 1.8482838869094849, "learning_rate": 1.2900245845370603e-05, "loss": 0.7637, "step": 2845 }, { "epoch": 0.4244910134983966, "grad_norm": 1.8315893411636353, "learning_rate": 1.2895621445992474e-05, "loss": 0.6195, "step": 2846 }, { "epoch": 0.42464016705198, "grad_norm": 1.3825931549072266, "learning_rate": 1.2890996370612954e-05, "loss": 0.6748, "step": 2847 }, { "epoch": 0.4247893206055634, "grad_norm": 1.420507550239563, "learning_rate": 1.2886370620311789e-05, "loss": 0.6042, "step": 2848 }, { "epoch": 0.4249384741591468, "grad_norm": 1.3571494817733765, "learning_rate": 1.28817441961689e-05, "loss": 0.6021, "step": 2849 }, { "epoch": 0.42508762771273023, "grad_norm": 1.9566062688827515, "learning_rate": 1.2877117099264349e-05, "loss": 0.5652, "step": 2850 }, { "epoch": 0.42523678126631365, "grad_norm": 1.4763141870498657, "learning_rate": 1.2872489330678363e-05, "loss": 0.5515, "step": 2851 }, { "epoch": 0.42538593481989706, "grad_norm": 1.284827709197998, "learning_rate": 1.2867860891491326e-05, "loss": 0.4905, "step": 2852 }, { "epoch": 0.4255350883734805, "grad_norm": 0.8963513374328613, "learning_rate": 1.2863231782783774e-05, "loss": 0.6294, "step": 2853 }, { "epoch": 0.4256842419270639, "grad_norm": 1.2861192226409912, "learning_rate": 1.28586020056364e-05, "loss": 0.6556, "step": 2854 }, { "epoch": 0.4258333954806473, "grad_norm": 1.5662394762039185, "learning_rate": 1.2853971561130062e-05, "loss": 0.5892, "step": 2855 }, { "epoch": 0.4259825490342307, "grad_norm": 1.2389341592788696, "learning_rate": 1.2849340450345765e-05, "loss": 0.6545, "step": 2856 }, { "epoch": 0.42613170258781413, "grad_norm": 1.5090856552124023, "learning_rate": 1.2844708674364665e-05, "loss": 0.6701, "step": 2857 }, { "epoch": 0.42628085614139755, "grad_norm": 1.398473858833313, "learning_rate": 1.2840076234268083e-05, "loss": 0.597, "step": 2858 }, { "epoch": 0.42643000969498096, "grad_norm": 1.4565997123718262, "learning_rate": 1.2835443131137502e-05, "loss": 0.6832, "step": 2859 }, { "epoch": 0.4265791632485644, "grad_norm": 1.4922658205032349, "learning_rate": 1.2830809366054533e-05, "loss": 0.6096, "step": 2860 }, { "epoch": 0.4267283168021478, "grad_norm": 1.4416062831878662, "learning_rate": 1.282617494010097e-05, "loss": 0.6884, "step": 2861 }, { "epoch": 0.4268774703557312, "grad_norm": 1.161832332611084, "learning_rate": 1.2821539854358745e-05, "loss": 0.5839, "step": 2862 }, { "epoch": 0.4270266239093146, "grad_norm": 1.550451636314392, "learning_rate": 1.2816904109909948e-05, "loss": 0.7117, "step": 2863 }, { "epoch": 0.42717577746289803, "grad_norm": 1.5259424448013306, "learning_rate": 1.2812267707836827e-05, "loss": 0.5494, "step": 2864 }, { "epoch": 0.42732493101648145, "grad_norm": 1.6482173204421997, "learning_rate": 1.2807630649221777e-05, "loss": 0.6038, "step": 2865 }, { "epoch": 0.42747408457006486, "grad_norm": 1.2822569608688354, "learning_rate": 1.2802992935147348e-05, "loss": 0.6127, "step": 2866 }, { "epoch": 0.4276232381236483, "grad_norm": 1.426236629486084, "learning_rate": 1.2798354566696245e-05, "loss": 0.6599, "step": 2867 }, { "epoch": 0.4277723916772317, "grad_norm": 1.4473243951797485, "learning_rate": 1.2793715544951324e-05, "loss": 0.6488, "step": 2868 }, { "epoch": 0.4279215452308151, "grad_norm": 1.338742971420288, "learning_rate": 1.27890758709956e-05, "loss": 0.5095, "step": 2869 }, { "epoch": 0.4280706987843985, "grad_norm": 1.3553467988967896, "learning_rate": 1.2784435545912228e-05, "loss": 0.6285, "step": 2870 }, { "epoch": 0.42821985233798193, "grad_norm": 1.4080358743667603, "learning_rate": 1.277979457078452e-05, "loss": 0.6477, "step": 2871 }, { "epoch": 0.42836900589156535, "grad_norm": 1.885533332824707, "learning_rate": 1.2775152946695953e-05, "loss": 0.5942, "step": 2872 }, { "epoch": 0.42851815944514876, "grad_norm": 1.3345555067062378, "learning_rate": 1.2770510674730132e-05, "loss": 0.5513, "step": 2873 }, { "epoch": 0.4286673129987322, "grad_norm": 1.7714983224868774, "learning_rate": 1.276586775597083e-05, "loss": 0.5587, "step": 2874 }, { "epoch": 0.4288164665523156, "grad_norm": 1.1543136835098267, "learning_rate": 1.2761224191501964e-05, "loss": 0.539, "step": 2875 }, { "epoch": 0.428965620105899, "grad_norm": 0.8494943976402283, "learning_rate": 1.2756579982407606e-05, "loss": 0.6471, "step": 2876 }, { "epoch": 0.4291147736594824, "grad_norm": 2.131624221801758, "learning_rate": 1.2751935129771974e-05, "loss": 0.7216, "step": 2877 }, { "epoch": 0.42926392721306583, "grad_norm": 0.8045415282249451, "learning_rate": 1.2747289634679445e-05, "loss": 0.643, "step": 2878 }, { "epoch": 0.42941308076664925, "grad_norm": 1.3642933368682861, "learning_rate": 1.2742643498214534e-05, "loss": 0.5164, "step": 2879 }, { "epoch": 0.42956223432023266, "grad_norm": 1.3953828811645508, "learning_rate": 1.2737996721461907e-05, "loss": 0.6133, "step": 2880 }, { "epoch": 0.4297113878738161, "grad_norm": 1.1389135122299194, "learning_rate": 1.2733349305506395e-05, "loss": 0.5267, "step": 2881 }, { "epoch": 0.4298605414273995, "grad_norm": 1.3574358224868774, "learning_rate": 1.272870125143296e-05, "loss": 0.5366, "step": 2882 }, { "epoch": 0.4300096949809829, "grad_norm": 1.3144354820251465, "learning_rate": 1.2724052560326722e-05, "loss": 0.6634, "step": 2883 }, { "epoch": 0.4301588485345663, "grad_norm": 1.4774386882781982, "learning_rate": 1.2719403233272947e-05, "loss": 0.5442, "step": 2884 }, { "epoch": 0.43030800208814973, "grad_norm": 1.2934259176254272, "learning_rate": 1.2714753271357047e-05, "loss": 0.596, "step": 2885 }, { "epoch": 0.43045715564173315, "grad_norm": 1.2004774808883667, "learning_rate": 1.2710102675664593e-05, "loss": 0.6745, "step": 2886 }, { "epoch": 0.43060630919531656, "grad_norm": 1.4383896589279175, "learning_rate": 1.2705451447281289e-05, "loss": 0.6621, "step": 2887 }, { "epoch": 0.4307554627489, "grad_norm": 1.4333405494689941, "learning_rate": 1.2700799587293e-05, "loss": 0.6099, "step": 2888 }, { "epoch": 0.4309046163024834, "grad_norm": 1.320076584815979, "learning_rate": 1.2696147096785727e-05, "loss": 0.6118, "step": 2889 }, { "epoch": 0.4310537698560668, "grad_norm": 1.2157047986984253, "learning_rate": 1.2691493976845627e-05, "loss": 0.5031, "step": 2890 }, { "epoch": 0.4312029234096502, "grad_norm": 1.7147903442382812, "learning_rate": 1.2686840228559001e-05, "loss": 0.5412, "step": 2891 }, { "epoch": 0.43135207696323363, "grad_norm": 1.3250778913497925, "learning_rate": 1.2682185853012296e-05, "loss": 0.6192, "step": 2892 }, { "epoch": 0.43150123051681705, "grad_norm": 1.312738060951233, "learning_rate": 1.26775308512921e-05, "loss": 0.5612, "step": 2893 }, { "epoch": 0.43165038407040046, "grad_norm": 1.1319564580917358, "learning_rate": 1.2672875224485166e-05, "loss": 0.5307, "step": 2894 }, { "epoch": 0.4317995376239839, "grad_norm": 1.309796929359436, "learning_rate": 1.266821897367837e-05, "loss": 0.6952, "step": 2895 }, { "epoch": 0.4319486911775673, "grad_norm": 1.4658277034759521, "learning_rate": 1.2663562099958746e-05, "loss": 0.615, "step": 2896 }, { "epoch": 0.4320978447311507, "grad_norm": 1.6223219633102417, "learning_rate": 1.2658904604413468e-05, "loss": 0.5937, "step": 2897 }, { "epoch": 0.4322469982847341, "grad_norm": 1.4645618200302124, "learning_rate": 1.2654246488129864e-05, "loss": 0.5328, "step": 2898 }, { "epoch": 0.43239615183831753, "grad_norm": 1.3584039211273193, "learning_rate": 1.2649587752195397e-05, "loss": 0.7078, "step": 2899 }, { "epoch": 0.43254530539190095, "grad_norm": 1.6275959014892578, "learning_rate": 1.2644928397697683e-05, "loss": 0.5525, "step": 2900 }, { "epoch": 0.43269445894548436, "grad_norm": 1.5476053953170776, "learning_rate": 1.2640268425724469e-05, "loss": 0.6046, "step": 2901 }, { "epoch": 0.4328436124990678, "grad_norm": 1.5181632041931152, "learning_rate": 1.2635607837363665e-05, "loss": 0.6974, "step": 2902 }, { "epoch": 0.4329927660526512, "grad_norm": 1.1595547199249268, "learning_rate": 1.2630946633703314e-05, "loss": 0.5576, "step": 2903 }, { "epoch": 0.4331419196062346, "grad_norm": 1.2720162868499756, "learning_rate": 1.2626284815831597e-05, "loss": 0.579, "step": 2904 }, { "epoch": 0.433291073159818, "grad_norm": 1.5680865049362183, "learning_rate": 1.2621622384836853e-05, "loss": 0.685, "step": 2905 }, { "epoch": 0.43344022671340143, "grad_norm": 0.9926380515098572, "learning_rate": 1.2616959341807553e-05, "loss": 0.6648, "step": 2906 }, { "epoch": 0.43358938026698485, "grad_norm": 1.4296263456344604, "learning_rate": 1.2612295687832315e-05, "loss": 0.6827, "step": 2907 }, { "epoch": 0.43373853382056826, "grad_norm": 1.574023962020874, "learning_rate": 1.2607631423999898e-05, "loss": 0.4894, "step": 2908 }, { "epoch": 0.4338876873741517, "grad_norm": 1.3784430027008057, "learning_rate": 1.2602966551399206e-05, "loss": 0.5691, "step": 2909 }, { "epoch": 0.4340368409277351, "grad_norm": 1.695260763168335, "learning_rate": 1.2598301071119277e-05, "loss": 0.6281, "step": 2910 }, { "epoch": 0.4341859944813185, "grad_norm": 1.26972496509552, "learning_rate": 1.2593634984249307e-05, "loss": 0.5962, "step": 2911 }, { "epoch": 0.4343351480349019, "grad_norm": 1.396884799003601, "learning_rate": 1.2588968291878621e-05, "loss": 0.5712, "step": 2912 }, { "epoch": 0.43448430158848533, "grad_norm": 1.3649811744689941, "learning_rate": 1.2584300995096684e-05, "loss": 0.5971, "step": 2913 }, { "epoch": 0.43463345514206875, "grad_norm": 1.3364648818969727, "learning_rate": 1.257963309499311e-05, "loss": 0.5823, "step": 2914 }, { "epoch": 0.43478260869565216, "grad_norm": 1.6622114181518555, "learning_rate": 1.2574964592657648e-05, "loss": 0.5957, "step": 2915 }, { "epoch": 0.4349317622492356, "grad_norm": 1.679434061050415, "learning_rate": 1.257029548918019e-05, "loss": 0.5748, "step": 2916 }, { "epoch": 0.435080915802819, "grad_norm": 1.4607447385787964, "learning_rate": 1.2565625785650774e-05, "loss": 0.6255, "step": 2917 }, { "epoch": 0.4352300693564024, "grad_norm": 1.2890745401382446, "learning_rate": 1.2560955483159562e-05, "loss": 0.6238, "step": 2918 }, { "epoch": 0.4353792229099858, "grad_norm": 1.182826280593872, "learning_rate": 1.2556284582796874e-05, "loss": 0.5016, "step": 2919 }, { "epoch": 0.43552837646356923, "grad_norm": 1.1404504776000977, "learning_rate": 1.255161308565316e-05, "loss": 0.6312, "step": 2920 }, { "epoch": 0.43567753001715265, "grad_norm": 1.2888678312301636, "learning_rate": 1.254694099281901e-05, "loss": 0.547, "step": 2921 }, { "epoch": 0.43582668357073606, "grad_norm": 1.4660462141036987, "learning_rate": 1.2542268305385155e-05, "loss": 0.5931, "step": 2922 }, { "epoch": 0.4359758371243195, "grad_norm": 1.2698676586151123, "learning_rate": 1.2537595024442462e-05, "loss": 0.6285, "step": 2923 }, { "epoch": 0.4361249906779029, "grad_norm": 0.9506227970123291, "learning_rate": 1.2532921151081935e-05, "loss": 0.6729, "step": 2924 }, { "epoch": 0.4362741442314863, "grad_norm": 1.2974222898483276, "learning_rate": 1.2528246686394732e-05, "loss": 0.5102, "step": 2925 }, { "epoch": 0.4364232977850697, "grad_norm": 1.3783997297286987, "learning_rate": 1.2523571631472123e-05, "loss": 0.5839, "step": 2926 }, { "epoch": 0.43657245133865313, "grad_norm": 1.2726730108261108, "learning_rate": 1.2518895987405539e-05, "loss": 0.6177, "step": 2927 }, { "epoch": 0.43672160489223655, "grad_norm": 1.3347272872924805, "learning_rate": 1.2514219755286531e-05, "loss": 0.6314, "step": 2928 }, { "epoch": 0.43687075844581996, "grad_norm": 1.2808862924575806, "learning_rate": 1.2509542936206802e-05, "loss": 0.5751, "step": 2929 }, { "epoch": 0.4370199119994034, "grad_norm": 1.4561465978622437, "learning_rate": 1.2504865531258186e-05, "loss": 0.6311, "step": 2930 }, { "epoch": 0.4371690655529868, "grad_norm": 1.3744219541549683, "learning_rate": 1.250018754153265e-05, "loss": 0.6116, "step": 2931 }, { "epoch": 0.4373182191065702, "grad_norm": 1.508948802947998, "learning_rate": 1.2495508968122297e-05, "loss": 0.6018, "step": 2932 }, { "epoch": 0.4374673726601536, "grad_norm": 1.2443615198135376, "learning_rate": 1.2490829812119376e-05, "loss": 0.6047, "step": 2933 }, { "epoch": 0.43761652621373703, "grad_norm": 1.680609107017517, "learning_rate": 1.2486150074616268e-05, "loss": 0.6131, "step": 2934 }, { "epoch": 0.43776567976732045, "grad_norm": 1.4828801155090332, "learning_rate": 1.2481469756705478e-05, "loss": 0.6599, "step": 2935 }, { "epoch": 0.43791483332090386, "grad_norm": 1.5460773706436157, "learning_rate": 1.2476788859479667e-05, "loss": 0.5622, "step": 2936 }, { "epoch": 0.4380639868744873, "grad_norm": 1.657008171081543, "learning_rate": 1.247210738403161e-05, "loss": 0.6666, "step": 2937 }, { "epoch": 0.4382131404280707, "grad_norm": 1.48369562625885, "learning_rate": 1.2467425331454237e-05, "loss": 0.6416, "step": 2938 }, { "epoch": 0.4383622939816541, "grad_norm": 1.2172691822052002, "learning_rate": 1.2462742702840597e-05, "loss": 0.6246, "step": 2939 }, { "epoch": 0.4385114475352375, "grad_norm": 1.5488229990005493, "learning_rate": 1.2458059499283884e-05, "loss": 0.6078, "step": 2940 }, { "epoch": 0.43866060108882093, "grad_norm": 1.334357500076294, "learning_rate": 1.2453375721877417e-05, "loss": 0.6122, "step": 2941 }, { "epoch": 0.43880975464240435, "grad_norm": 0.8202334046363831, "learning_rate": 1.2448691371714661e-05, "loss": 0.6485, "step": 2942 }, { "epoch": 0.43895890819598776, "grad_norm": 1.4191128015518188, "learning_rate": 1.2444006449889198e-05, "loss": 0.629, "step": 2943 }, { "epoch": 0.4391080617495712, "grad_norm": 1.8242844343185425, "learning_rate": 1.2439320957494762e-05, "loss": 0.5767, "step": 2944 }, { "epoch": 0.4392572153031546, "grad_norm": 1.3301384449005127, "learning_rate": 1.2434634895625206e-05, "loss": 0.6648, "step": 2945 }, { "epoch": 0.439406368856738, "grad_norm": 1.3720117807388306, "learning_rate": 1.242994826537452e-05, "loss": 0.6657, "step": 2946 }, { "epoch": 0.4395555224103214, "grad_norm": 1.2758325338363647, "learning_rate": 1.2425261067836835e-05, "loss": 0.5405, "step": 2947 }, { "epoch": 0.43970467596390483, "grad_norm": 1.4382389783859253, "learning_rate": 1.2420573304106402e-05, "loss": 0.5776, "step": 2948 }, { "epoch": 0.43985382951748825, "grad_norm": 1.4210561513900757, "learning_rate": 1.241588497527761e-05, "loss": 0.6444, "step": 2949 }, { "epoch": 0.44000298307107166, "grad_norm": 1.3147252798080444, "learning_rate": 1.2411196082444978e-05, "loss": 0.5887, "step": 2950 }, { "epoch": 0.4401521366246551, "grad_norm": 0.9875873327255249, "learning_rate": 1.2406506626703163e-05, "loss": 0.7217, "step": 2951 }, { "epoch": 0.4403012901782385, "grad_norm": 1.4131087064743042, "learning_rate": 1.2401816609146942e-05, "loss": 0.5856, "step": 2952 }, { "epoch": 0.4404504437318219, "grad_norm": 1.3667243719100952, "learning_rate": 1.2397126030871235e-05, "loss": 0.6196, "step": 2953 }, { "epoch": 0.4405995972854053, "grad_norm": 2.870513439178467, "learning_rate": 1.2392434892971086e-05, "loss": 0.5495, "step": 2954 }, { "epoch": 0.44074875083898873, "grad_norm": 1.4485896825790405, "learning_rate": 1.2387743196541669e-05, "loss": 0.7087, "step": 2955 }, { "epoch": 0.44089790439257215, "grad_norm": 1.8617008924484253, "learning_rate": 1.2383050942678295e-05, "loss": 0.5627, "step": 2956 }, { "epoch": 0.44104705794615556, "grad_norm": 1.373354434967041, "learning_rate": 1.2378358132476395e-05, "loss": 0.6075, "step": 2957 }, { "epoch": 0.441196211499739, "grad_norm": 1.2668018341064453, "learning_rate": 1.237366476703154e-05, "loss": 0.6337, "step": 2958 }, { "epoch": 0.4413453650533224, "grad_norm": 1.3831751346588135, "learning_rate": 1.2368970847439426e-05, "loss": 0.6618, "step": 2959 }, { "epoch": 0.4414945186069058, "grad_norm": 1.2273398637771606, "learning_rate": 1.2364276374795878e-05, "loss": 0.6029, "step": 2960 }, { "epoch": 0.4416436721604892, "grad_norm": 1.4284721612930298, "learning_rate": 1.235958135019685e-05, "loss": 0.641, "step": 2961 }, { "epoch": 0.44179282571407263, "grad_norm": 0.8612790703773499, "learning_rate": 1.2354885774738428e-05, "loss": 0.6375, "step": 2962 }, { "epoch": 0.44194197926765605, "grad_norm": 1.5293192863464355, "learning_rate": 1.2350189649516818e-05, "loss": 0.6183, "step": 2963 }, { "epoch": 0.44209113282123946, "grad_norm": 1.3418625593185425, "learning_rate": 1.2345492975628368e-05, "loss": 0.5773, "step": 2964 }, { "epoch": 0.4422402863748229, "grad_norm": 1.656998634338379, "learning_rate": 1.2340795754169544e-05, "loss": 0.5657, "step": 2965 }, { "epoch": 0.4423894399284063, "grad_norm": 1.267690896987915, "learning_rate": 1.233609798623694e-05, "loss": 0.6746, "step": 2966 }, { "epoch": 0.4425385934819897, "grad_norm": 1.3446617126464844, "learning_rate": 1.233139967292728e-05, "loss": 0.6884, "step": 2967 }, { "epoch": 0.4426877470355731, "grad_norm": 1.4666199684143066, "learning_rate": 1.2326700815337422e-05, "loss": 0.6473, "step": 2968 }, { "epoch": 0.44283690058915653, "grad_norm": 1.4590274095535278, "learning_rate": 1.2322001414564336e-05, "loss": 0.5826, "step": 2969 }, { "epoch": 0.44298605414273995, "grad_norm": 1.3758817911148071, "learning_rate": 1.2317301471705134e-05, "loss": 0.578, "step": 2970 }, { "epoch": 0.44313520769632336, "grad_norm": 1.4298042058944702, "learning_rate": 1.2312600987857041e-05, "loss": 0.6524, "step": 2971 }, { "epoch": 0.4432843612499068, "grad_norm": 1.3538893461227417, "learning_rate": 1.2307899964117422e-05, "loss": 0.5927, "step": 2972 }, { "epoch": 0.4434335148034902, "grad_norm": 1.4276371002197266, "learning_rate": 1.2303198401583759e-05, "loss": 0.607, "step": 2973 }, { "epoch": 0.4435826683570736, "grad_norm": 1.3324967622756958, "learning_rate": 1.2298496301353657e-05, "loss": 0.5068, "step": 2974 }, { "epoch": 0.443731821910657, "grad_norm": 1.2273110151290894, "learning_rate": 1.229379366452486e-05, "loss": 0.54, "step": 2975 }, { "epoch": 0.44388097546424043, "grad_norm": 1.3016968965530396, "learning_rate": 1.228909049219522e-05, "loss": 0.5389, "step": 2976 }, { "epoch": 0.44403012901782385, "grad_norm": 1.4224015474319458, "learning_rate": 1.2284386785462728e-05, "loss": 0.563, "step": 2977 }, { "epoch": 0.44417928257140726, "grad_norm": 1.4534032344818115, "learning_rate": 1.2279682545425495e-05, "loss": 0.5669, "step": 2978 }, { "epoch": 0.4443284361249907, "grad_norm": 1.7046502828598022, "learning_rate": 1.2274977773181753e-05, "loss": 0.6534, "step": 2979 }, { "epoch": 0.4444775896785741, "grad_norm": 1.2831318378448486, "learning_rate": 1.2270272469829862e-05, "loss": 0.5316, "step": 2980 }, { "epoch": 0.4446267432321575, "grad_norm": 1.377819538116455, "learning_rate": 1.2265566636468309e-05, "loss": 0.7118, "step": 2981 }, { "epoch": 0.4447758967857409, "grad_norm": 1.7639020681381226, "learning_rate": 1.2260860274195694e-05, "loss": 0.5508, "step": 2982 }, { "epoch": 0.44492505033932434, "grad_norm": 1.2251850366592407, "learning_rate": 1.2256153384110754e-05, "loss": 0.5915, "step": 2983 }, { "epoch": 0.44507420389290775, "grad_norm": 1.2388451099395752, "learning_rate": 1.2251445967312341e-05, "loss": 0.6353, "step": 2984 }, { "epoch": 0.44522335744649116, "grad_norm": 1.4759005308151245, "learning_rate": 1.2246738024899424e-05, "loss": 0.6588, "step": 2985 }, { "epoch": 0.4453725110000746, "grad_norm": 1.3825047016143799, "learning_rate": 1.2242029557971116e-05, "loss": 0.624, "step": 2986 }, { "epoch": 0.445521664553658, "grad_norm": 1.190752387046814, "learning_rate": 1.223732056762663e-05, "loss": 0.5715, "step": 2987 }, { "epoch": 0.4456708181072414, "grad_norm": 1.384837031364441, "learning_rate": 1.2232611054965308e-05, "loss": 0.5916, "step": 2988 }, { "epoch": 0.4458199716608248, "grad_norm": 1.7721785306930542, "learning_rate": 1.2227901021086624e-05, "loss": 0.5791, "step": 2989 }, { "epoch": 0.44596912521440824, "grad_norm": 1.4821721315383911, "learning_rate": 1.222319046709016e-05, "loss": 0.5887, "step": 2990 }, { "epoch": 0.44611827876799165, "grad_norm": 1.302541732788086, "learning_rate": 1.2218479394075624e-05, "loss": 0.626, "step": 2991 }, { "epoch": 0.44626743232157506, "grad_norm": 1.2485913038253784, "learning_rate": 1.2213767803142854e-05, "loss": 0.6068, "step": 2992 }, { "epoch": 0.4464165858751585, "grad_norm": 1.5762100219726562, "learning_rate": 1.220905569539179e-05, "loss": 0.6612, "step": 2993 }, { "epoch": 0.4465657394287419, "grad_norm": 1.3548043966293335, "learning_rate": 1.2204343071922511e-05, "loss": 0.6375, "step": 2994 }, { "epoch": 0.4467148929823253, "grad_norm": 1.6688846349716187, "learning_rate": 1.2199629933835208e-05, "loss": 0.6762, "step": 2995 }, { "epoch": 0.4468640465359087, "grad_norm": 1.2327684164047241, "learning_rate": 1.2194916282230192e-05, "loss": 0.6044, "step": 2996 }, { "epoch": 0.44701320008949214, "grad_norm": 1.3564848899841309, "learning_rate": 1.21902021182079e-05, "loss": 0.6383, "step": 2997 }, { "epoch": 0.44716235364307555, "grad_norm": 3.6814780235290527, "learning_rate": 1.2185487442868876e-05, "loss": 0.6072, "step": 2998 }, { "epoch": 0.44731150719665896, "grad_norm": 1.4640330076217651, "learning_rate": 1.2180772257313793e-05, "loss": 0.6715, "step": 2999 }, { "epoch": 0.4474606607502424, "grad_norm": 1.5582736730575562, "learning_rate": 1.2176056562643448e-05, "loss": 0.6692, "step": 3000 }, { "epoch": 0.4476098143038258, "grad_norm": 1.510858178138733, "learning_rate": 1.2171340359958742e-05, "loss": 0.5952, "step": 3001 }, { "epoch": 0.4477589678574092, "grad_norm": 1.4165573120117188, "learning_rate": 1.2166623650360707e-05, "loss": 0.6333, "step": 3002 }, { "epoch": 0.4479081214109926, "grad_norm": 1.4301799535751343, "learning_rate": 1.216190643495049e-05, "loss": 0.606, "step": 3003 }, { "epoch": 0.44805727496457604, "grad_norm": 1.4153624773025513, "learning_rate": 1.2157188714829353e-05, "loss": 0.5891, "step": 3004 }, { "epoch": 0.44820642851815945, "grad_norm": 1.4282846450805664, "learning_rate": 1.2152470491098678e-05, "loss": 0.5763, "step": 3005 }, { "epoch": 0.44835558207174286, "grad_norm": 1.6309911012649536, "learning_rate": 1.2147751764859967e-05, "loss": 0.5881, "step": 3006 }, { "epoch": 0.4485047356253263, "grad_norm": 1.3341093063354492, "learning_rate": 1.2143032537214832e-05, "loss": 0.7106, "step": 3007 }, { "epoch": 0.4486538891789097, "grad_norm": 1.2838845252990723, "learning_rate": 1.2138312809265012e-05, "loss": 0.5502, "step": 3008 }, { "epoch": 0.4488030427324931, "grad_norm": 1.640278697013855, "learning_rate": 1.2133592582112354e-05, "loss": 0.679, "step": 3009 }, { "epoch": 0.4489521962860765, "grad_norm": 1.4003946781158447, "learning_rate": 1.2128871856858828e-05, "loss": 0.5989, "step": 3010 }, { "epoch": 0.44910134983965994, "grad_norm": 1.4649884700775146, "learning_rate": 1.2124150634606515e-05, "loss": 0.6398, "step": 3011 }, { "epoch": 0.44925050339324335, "grad_norm": 1.3663169145584106, "learning_rate": 1.211942891645762e-05, "loss": 0.5425, "step": 3012 }, { "epoch": 0.44939965694682676, "grad_norm": 1.3811053037643433, "learning_rate": 1.2114706703514452e-05, "loss": 0.5646, "step": 3013 }, { "epoch": 0.4495488105004102, "grad_norm": 1.3072563409805298, "learning_rate": 1.2109983996879446e-05, "loss": 0.642, "step": 3014 }, { "epoch": 0.4496979640539936, "grad_norm": 1.2295228242874146, "learning_rate": 1.2105260797655144e-05, "loss": 0.5731, "step": 3015 }, { "epoch": 0.449847117607577, "grad_norm": 1.5919591188430786, "learning_rate": 1.2100537106944213e-05, "loss": 0.657, "step": 3016 }, { "epoch": 0.4499962711611604, "grad_norm": 1.509827971458435, "learning_rate": 1.2095812925849424e-05, "loss": 0.7284, "step": 3017 }, { "epoch": 0.45014542471474384, "grad_norm": 1.7247333526611328, "learning_rate": 1.2091088255473669e-05, "loss": 0.563, "step": 3018 }, { "epoch": 0.45029457826832725, "grad_norm": 1.3005133867263794, "learning_rate": 1.2086363096919953e-05, "loss": 0.6154, "step": 3019 }, { "epoch": 0.45044373182191066, "grad_norm": 1.2678179740905762, "learning_rate": 1.2081637451291393e-05, "loss": 0.5775, "step": 3020 }, { "epoch": 0.4505928853754941, "grad_norm": 1.5079742670059204, "learning_rate": 1.2076911319691222e-05, "loss": 0.6803, "step": 3021 }, { "epoch": 0.4507420389290775, "grad_norm": 0.7661880254745483, "learning_rate": 1.2072184703222791e-05, "loss": 0.5758, "step": 3022 }, { "epoch": 0.4508911924826609, "grad_norm": 2.3838038444519043, "learning_rate": 1.2067457602989552e-05, "loss": 0.5979, "step": 3023 }, { "epoch": 0.4510403460362443, "grad_norm": 1.7023390531539917, "learning_rate": 1.2062730020095073e-05, "loss": 0.574, "step": 3024 }, { "epoch": 0.45118949958982774, "grad_norm": 1.295325517654419, "learning_rate": 1.205800195564305e-05, "loss": 0.6376, "step": 3025 }, { "epoch": 0.45133865314341115, "grad_norm": 1.2944139242172241, "learning_rate": 1.2053273410737275e-05, "loss": 0.6159, "step": 3026 }, { "epoch": 0.45148780669699456, "grad_norm": 1.3055732250213623, "learning_rate": 1.2048544386481656e-05, "loss": 0.6679, "step": 3027 }, { "epoch": 0.451636960250578, "grad_norm": 1.4171578884124756, "learning_rate": 1.204381488398021e-05, "loss": 0.5638, "step": 3028 }, { "epoch": 0.4517861138041614, "grad_norm": 1.405145525932312, "learning_rate": 1.2039084904337082e-05, "loss": 0.6025, "step": 3029 }, { "epoch": 0.4519352673577448, "grad_norm": 2.2469794750213623, "learning_rate": 1.2034354448656505e-05, "loss": 0.6555, "step": 3030 }, { "epoch": 0.4520844209113282, "grad_norm": 1.3778496980667114, "learning_rate": 1.2029623518042837e-05, "loss": 0.5823, "step": 3031 }, { "epoch": 0.45223357446491164, "grad_norm": 1.4256372451782227, "learning_rate": 1.2024892113600544e-05, "loss": 0.6477, "step": 3032 }, { "epoch": 0.45238272801849505, "grad_norm": 1.3926936388015747, "learning_rate": 1.2020160236434203e-05, "loss": 0.629, "step": 3033 }, { "epoch": 0.45253188157207846, "grad_norm": 1.3530102968215942, "learning_rate": 1.2015427887648505e-05, "loss": 0.6458, "step": 3034 }, { "epoch": 0.4526810351256619, "grad_norm": 1.2881691455841064, "learning_rate": 1.2010695068348238e-05, "loss": 0.708, "step": 3035 }, { "epoch": 0.4528301886792453, "grad_norm": 1.5435696840286255, "learning_rate": 1.2005961779638322e-05, "loss": 0.5724, "step": 3036 }, { "epoch": 0.4529793422328287, "grad_norm": 1.9229711294174194, "learning_rate": 1.2001228022623762e-05, "loss": 0.5622, "step": 3037 }, { "epoch": 0.4531284957864121, "grad_norm": 1.2454609870910645, "learning_rate": 1.1996493798409687e-05, "loss": 0.5827, "step": 3038 }, { "epoch": 0.45327764933999554, "grad_norm": 1.8612465858459473, "learning_rate": 1.1991759108101335e-05, "loss": 0.5344, "step": 3039 }, { "epoch": 0.45342680289357895, "grad_norm": 1.3347642421722412, "learning_rate": 1.1987023952804049e-05, "loss": 0.5315, "step": 3040 }, { "epoch": 0.45357595644716237, "grad_norm": 2.4795494079589844, "learning_rate": 1.1982288333623277e-05, "loss": 0.6235, "step": 3041 }, { "epoch": 0.4537251100007458, "grad_norm": 1.726442813873291, "learning_rate": 1.1977552251664585e-05, "loss": 0.5639, "step": 3042 }, { "epoch": 0.4538742635543292, "grad_norm": 1.221500039100647, "learning_rate": 1.197281570803364e-05, "loss": 0.5878, "step": 3043 }, { "epoch": 0.4540234171079126, "grad_norm": 1.3631987571716309, "learning_rate": 1.1968078703836218e-05, "loss": 0.5603, "step": 3044 }, { "epoch": 0.454172570661496, "grad_norm": 1.3440566062927246, "learning_rate": 1.1963341240178206e-05, "loss": 0.5548, "step": 3045 }, { "epoch": 0.45432172421507944, "grad_norm": 1.4926748275756836, "learning_rate": 1.1958603318165586e-05, "loss": 0.6778, "step": 3046 }, { "epoch": 0.45447087776866285, "grad_norm": 1.686561942100525, "learning_rate": 1.1953864938904467e-05, "loss": 0.5985, "step": 3047 }, { "epoch": 0.45462003132224627, "grad_norm": 1.4514939785003662, "learning_rate": 1.194912610350105e-05, "loss": 0.5778, "step": 3048 }, { "epoch": 0.4547691848758297, "grad_norm": 1.2954976558685303, "learning_rate": 1.1944386813061644e-05, "loss": 0.6075, "step": 3049 }, { "epoch": 0.4549183384294131, "grad_norm": 1.5054324865341187, "learning_rate": 1.193964706869267e-05, "loss": 0.6339, "step": 3050 }, { "epoch": 0.4550674919829965, "grad_norm": 1.7090013027191162, "learning_rate": 1.1934906871500654e-05, "loss": 0.6121, "step": 3051 }, { "epoch": 0.4552166455365799, "grad_norm": 1.1749073266983032, "learning_rate": 1.1930166222592217e-05, "loss": 0.6288, "step": 3052 }, { "epoch": 0.45536579909016334, "grad_norm": 1.304486870765686, "learning_rate": 1.1925425123074102e-05, "loss": 0.5473, "step": 3053 }, { "epoch": 0.45551495264374675, "grad_norm": 1.3589576482772827, "learning_rate": 1.1920683574053145e-05, "loss": 0.642, "step": 3054 }, { "epoch": 0.45566410619733017, "grad_norm": 1.458654761314392, "learning_rate": 1.1915941576636293e-05, "loss": 0.6665, "step": 3055 }, { "epoch": 0.4558132597509136, "grad_norm": 1.2676576375961304, "learning_rate": 1.1911199131930593e-05, "loss": 0.6306, "step": 3056 }, { "epoch": 0.455962413304497, "grad_norm": 0.828477680683136, "learning_rate": 1.1906456241043203e-05, "loss": 0.6728, "step": 3057 }, { "epoch": 0.4561115668580804, "grad_norm": 1.2985824346542358, "learning_rate": 1.190171290508138e-05, "loss": 0.6822, "step": 3058 }, { "epoch": 0.4562607204116638, "grad_norm": 1.2757413387298584, "learning_rate": 1.1896969125152482e-05, "loss": 0.5704, "step": 3059 }, { "epoch": 0.45640987396524724, "grad_norm": 1.2067838907241821, "learning_rate": 1.189222490236398e-05, "loss": 0.524, "step": 3060 }, { "epoch": 0.45655902751883065, "grad_norm": 1.2220978736877441, "learning_rate": 1.1887480237823443e-05, "loss": 0.585, "step": 3061 }, { "epoch": 0.45670818107241407, "grad_norm": 1.3874657154083252, "learning_rate": 1.1882735132638544e-05, "loss": 0.6262, "step": 3062 }, { "epoch": 0.4568573346259975, "grad_norm": 1.2357858419418335, "learning_rate": 1.1877989587917046e-05, "loss": 0.6557, "step": 3063 }, { "epoch": 0.4570064881795809, "grad_norm": 1.8322066068649292, "learning_rate": 1.1873243604766846e-05, "loss": 0.6306, "step": 3064 }, { "epoch": 0.4571556417331643, "grad_norm": 1.2957563400268555, "learning_rate": 1.1868497184295916e-05, "loss": 0.5634, "step": 3065 }, { "epoch": 0.4573047952867477, "grad_norm": 1.2058967351913452, "learning_rate": 1.1863750327612333e-05, "loss": 0.6311, "step": 3066 }, { "epoch": 0.45745394884033114, "grad_norm": 1.3018174171447754, "learning_rate": 1.1859003035824289e-05, "loss": 0.5161, "step": 3067 }, { "epoch": 0.45760310239391455, "grad_norm": 1.614906907081604, "learning_rate": 1.1854255310040062e-05, "loss": 0.5679, "step": 3068 }, { "epoch": 0.45775225594749797, "grad_norm": 1.4113761186599731, "learning_rate": 1.1849507151368045e-05, "loss": 0.5921, "step": 3069 }, { "epoch": 0.4579014095010814, "grad_norm": 2.34480619430542, "learning_rate": 1.1844758560916728e-05, "loss": 0.6097, "step": 3070 }, { "epoch": 0.4580505630546648, "grad_norm": 1.3503657579421997, "learning_rate": 1.184000953979469e-05, "loss": 0.5923, "step": 3071 }, { "epoch": 0.4581997166082482, "grad_norm": 1.297326683998108, "learning_rate": 1.183526008911063e-05, "loss": 0.6, "step": 3072 }, { "epoch": 0.4583488701618316, "grad_norm": 1.3796523809432983, "learning_rate": 1.1830510209973335e-05, "loss": 0.6116, "step": 3073 }, { "epoch": 0.45849802371541504, "grad_norm": 1.205617904663086, "learning_rate": 1.1825759903491694e-05, "loss": 0.5384, "step": 3074 }, { "epoch": 0.45864717726899845, "grad_norm": 1.4181894063949585, "learning_rate": 1.1821009170774697e-05, "loss": 0.6682, "step": 3075 }, { "epoch": 0.45879633082258187, "grad_norm": 1.4070316553115845, "learning_rate": 1.1816258012931434e-05, "loss": 0.5626, "step": 3076 }, { "epoch": 0.4589454843761653, "grad_norm": 0.850975513458252, "learning_rate": 1.1811506431071088e-05, "loss": 0.6391, "step": 3077 }, { "epoch": 0.4590946379297487, "grad_norm": 1.6081602573394775, "learning_rate": 1.1806754426302954e-05, "loss": 0.6039, "step": 3078 }, { "epoch": 0.4592437914833321, "grad_norm": 1.3498306274414062, "learning_rate": 1.1802001999736412e-05, "loss": 0.5859, "step": 3079 }, { "epoch": 0.4593929450369155, "grad_norm": 1.3134981393814087, "learning_rate": 1.179724915248095e-05, "loss": 0.5955, "step": 3080 }, { "epoch": 0.45954209859049894, "grad_norm": 0.8272199034690857, "learning_rate": 1.1792495885646148e-05, "loss": 0.6349, "step": 3081 }, { "epoch": 0.45969125214408235, "grad_norm": 1.3574316501617432, "learning_rate": 1.1787742200341687e-05, "loss": 0.5819, "step": 3082 }, { "epoch": 0.45984040569766577, "grad_norm": 1.6638301610946655, "learning_rate": 1.1782988097677349e-05, "loss": 0.598, "step": 3083 }, { "epoch": 0.4599895592512492, "grad_norm": 1.28603994846344, "learning_rate": 1.1778233578763005e-05, "loss": 0.6249, "step": 3084 }, { "epoch": 0.4601387128048326, "grad_norm": 1.411498785018921, "learning_rate": 1.1773478644708631e-05, "loss": 0.635, "step": 3085 }, { "epoch": 0.460287866358416, "grad_norm": 1.2276073694229126, "learning_rate": 1.1768723296624293e-05, "loss": 0.5824, "step": 3086 }, { "epoch": 0.4604370199119994, "grad_norm": 1.115873098373413, "learning_rate": 1.1763967535620164e-05, "loss": 0.5052, "step": 3087 }, { "epoch": 0.46058617346558284, "grad_norm": 1.305465817451477, "learning_rate": 1.1759211362806501e-05, "loss": 0.5752, "step": 3088 }, { "epoch": 0.46073532701916625, "grad_norm": 1.306601643562317, "learning_rate": 1.1754454779293669e-05, "loss": 0.5562, "step": 3089 }, { "epoch": 0.46088448057274967, "grad_norm": 1.4293071031570435, "learning_rate": 1.1749697786192113e-05, "loss": 0.5647, "step": 3090 }, { "epoch": 0.4610336341263331, "grad_norm": 1.503427505493164, "learning_rate": 1.1744940384612394e-05, "loss": 0.6436, "step": 3091 }, { "epoch": 0.4611827876799165, "grad_norm": 1.4617866277694702, "learning_rate": 1.1740182575665154e-05, "loss": 0.6294, "step": 3092 }, { "epoch": 0.4613319412334999, "grad_norm": 1.156212329864502, "learning_rate": 1.1735424360461134e-05, "loss": 0.5137, "step": 3093 }, { "epoch": 0.4614810947870833, "grad_norm": 1.5833170413970947, "learning_rate": 1.1730665740111164e-05, "loss": 0.5702, "step": 3094 }, { "epoch": 0.46163024834066674, "grad_norm": 1.5328749418258667, "learning_rate": 1.1725906715726185e-05, "loss": 0.5946, "step": 3095 }, { "epoch": 0.46177940189425015, "grad_norm": 1.34681236743927, "learning_rate": 1.1721147288417214e-05, "loss": 0.6228, "step": 3096 }, { "epoch": 0.46192855544783357, "grad_norm": 1.2463065385818481, "learning_rate": 1.1716387459295375e-05, "loss": 0.5945, "step": 3097 }, { "epoch": 0.462077709001417, "grad_norm": 1.193031668663025, "learning_rate": 1.1711627229471876e-05, "loss": 0.5531, "step": 3098 }, { "epoch": 0.4622268625550004, "grad_norm": 1.1892788410186768, "learning_rate": 1.1706866600058025e-05, "loss": 0.6167, "step": 3099 }, { "epoch": 0.4623760161085838, "grad_norm": 1.5464938879013062, "learning_rate": 1.1702105572165223e-05, "loss": 0.6926, "step": 3100 }, { "epoch": 0.4625251696621672, "grad_norm": 1.3795039653778076, "learning_rate": 1.1697344146904964e-05, "loss": 0.6044, "step": 3101 }, { "epoch": 0.46267432321575064, "grad_norm": 1.1625953912734985, "learning_rate": 1.1692582325388824e-05, "loss": 0.4907, "step": 3102 }, { "epoch": 0.46282347676933405, "grad_norm": 1.5473356246948242, "learning_rate": 1.1687820108728491e-05, "loss": 0.6061, "step": 3103 }, { "epoch": 0.46297263032291747, "grad_norm": 1.264533281326294, "learning_rate": 1.1683057498035733e-05, "loss": 0.6391, "step": 3104 }, { "epoch": 0.4631217838765009, "grad_norm": 1.3295261859893799, "learning_rate": 1.1678294494422406e-05, "loss": 0.6286, "step": 3105 }, { "epoch": 0.4632709374300843, "grad_norm": 1.3927392959594727, "learning_rate": 1.167353109900047e-05, "loss": 0.6514, "step": 3106 }, { "epoch": 0.4634200909836677, "grad_norm": 1.4175844192504883, "learning_rate": 1.1668767312881967e-05, "loss": 0.5965, "step": 3107 }, { "epoch": 0.4635692445372511, "grad_norm": 1.2978308200836182, "learning_rate": 1.1664003137179036e-05, "loss": 0.6257, "step": 3108 }, { "epoch": 0.46371839809083454, "grad_norm": 1.527985692024231, "learning_rate": 1.1659238573003903e-05, "loss": 0.5559, "step": 3109 }, { "epoch": 0.46386755164441795, "grad_norm": 1.4274119138717651, "learning_rate": 1.1654473621468888e-05, "loss": 0.7213, "step": 3110 }, { "epoch": 0.46401670519800137, "grad_norm": 1.3694255352020264, "learning_rate": 1.1649708283686394e-05, "loss": 0.6463, "step": 3111 }, { "epoch": 0.4641658587515848, "grad_norm": 1.3245636224746704, "learning_rate": 1.1644942560768926e-05, "loss": 0.6124, "step": 3112 }, { "epoch": 0.4643150123051682, "grad_norm": 1.5947943925857544, "learning_rate": 1.1640176453829066e-05, "loss": 0.6655, "step": 3113 }, { "epoch": 0.4644641658587516, "grad_norm": 1.2968270778656006, "learning_rate": 1.16354099639795e-05, "loss": 0.6118, "step": 3114 }, { "epoch": 0.464613319412335, "grad_norm": 1.3235737085342407, "learning_rate": 1.163064309233299e-05, "loss": 0.4702, "step": 3115 }, { "epoch": 0.46476247296591844, "grad_norm": 1.1446477174758911, "learning_rate": 1.1625875840002392e-05, "loss": 0.6072, "step": 3116 }, { "epoch": 0.46491162651950185, "grad_norm": 1.2059239149093628, "learning_rate": 1.1621108208100657e-05, "loss": 0.6167, "step": 3117 }, { "epoch": 0.46506078007308527, "grad_norm": 1.4052633047103882, "learning_rate": 1.161634019774082e-05, "loss": 0.6706, "step": 3118 }, { "epoch": 0.4652099336266687, "grad_norm": 1.2738192081451416, "learning_rate": 1.1611571810035991e-05, "loss": 0.5064, "step": 3119 }, { "epoch": 0.4653590871802521, "grad_norm": 1.378536581993103, "learning_rate": 1.1606803046099392e-05, "loss": 0.6034, "step": 3120 }, { "epoch": 0.4655082407338355, "grad_norm": 1.608900547027588, "learning_rate": 1.1602033907044324e-05, "loss": 0.5327, "step": 3121 }, { "epoch": 0.4656573942874189, "grad_norm": 1.2364070415496826, "learning_rate": 1.1597264393984165e-05, "loss": 0.5725, "step": 3122 }, { "epoch": 0.46580654784100234, "grad_norm": 1.236802101135254, "learning_rate": 1.1592494508032393e-05, "loss": 0.5258, "step": 3123 }, { "epoch": 0.46595570139458575, "grad_norm": 1.325352430343628, "learning_rate": 1.1587724250302564e-05, "loss": 0.6803, "step": 3124 }, { "epoch": 0.46610485494816917, "grad_norm": 1.4221678972244263, "learning_rate": 1.1582953621908328e-05, "loss": 0.6434, "step": 3125 }, { "epoch": 0.4662540085017526, "grad_norm": 1.4304556846618652, "learning_rate": 1.1578182623963422e-05, "loss": 0.5886, "step": 3126 }, { "epoch": 0.466403162055336, "grad_norm": 1.3152570724487305, "learning_rate": 1.1573411257581659e-05, "loss": 0.6008, "step": 3127 }, { "epoch": 0.4665523156089194, "grad_norm": 1.3201889991760254, "learning_rate": 1.1568639523876955e-05, "loss": 0.6388, "step": 3128 }, { "epoch": 0.46670146916250277, "grad_norm": 1.2944974899291992, "learning_rate": 1.1563867423963291e-05, "loss": 0.5942, "step": 3129 }, { "epoch": 0.4668506227160862, "grad_norm": 1.4368386268615723, "learning_rate": 1.155909495895475e-05, "loss": 0.5763, "step": 3130 }, { "epoch": 0.4669997762696696, "grad_norm": 1.342576265335083, "learning_rate": 1.1554322129965495e-05, "loss": 0.588, "step": 3131 }, { "epoch": 0.467148929823253, "grad_norm": 1.3105968236923218, "learning_rate": 1.1549548938109775e-05, "loss": 0.614, "step": 3132 }, { "epoch": 0.4672980833768364, "grad_norm": 1.49301278591156, "learning_rate": 1.1544775384501914e-05, "loss": 0.7238, "step": 3133 }, { "epoch": 0.46744723693041984, "grad_norm": 0.9436220526695251, "learning_rate": 1.1540001470256339e-05, "loss": 0.6793, "step": 3134 }, { "epoch": 0.46759639048400325, "grad_norm": 1.4037415981292725, "learning_rate": 1.1535227196487545e-05, "loss": 0.5622, "step": 3135 }, { "epoch": 0.46774554403758667, "grad_norm": 1.4555433988571167, "learning_rate": 1.1530452564310117e-05, "loss": 0.6302, "step": 3136 }, { "epoch": 0.4678946975911701, "grad_norm": 1.5780012607574463, "learning_rate": 1.1525677574838728e-05, "loss": 0.5841, "step": 3137 }, { "epoch": 0.4680438511447535, "grad_norm": 1.4990370273590088, "learning_rate": 1.1520902229188122e-05, "loss": 0.6539, "step": 3138 }, { "epoch": 0.4681930046983369, "grad_norm": 1.1819881200790405, "learning_rate": 1.151612652847314e-05, "loss": 0.5535, "step": 3139 }, { "epoch": 0.4683421582519203, "grad_norm": 1.4944770336151123, "learning_rate": 1.1511350473808699e-05, "loss": 0.5938, "step": 3140 }, { "epoch": 0.46849131180550374, "grad_norm": 1.683229684829712, "learning_rate": 1.1506574066309796e-05, "loss": 0.6239, "step": 3141 }, { "epoch": 0.46864046535908715, "grad_norm": 1.4048620462417603, "learning_rate": 1.150179730709152e-05, "loss": 0.6324, "step": 3142 }, { "epoch": 0.46878961891267057, "grad_norm": 1.3313919305801392, "learning_rate": 1.1497020197269033e-05, "loss": 0.5865, "step": 3143 }, { "epoch": 0.468938772466254, "grad_norm": 1.614654779434204, "learning_rate": 1.1492242737957582e-05, "loss": 0.5695, "step": 3144 }, { "epoch": 0.4690879260198374, "grad_norm": 1.7979727983474731, "learning_rate": 1.1487464930272496e-05, "loss": 0.5279, "step": 3145 }, { "epoch": 0.4692370795734208, "grad_norm": 1.1714813709259033, "learning_rate": 1.1482686775329183e-05, "loss": 0.624, "step": 3146 }, { "epoch": 0.4693862331270042, "grad_norm": 1.319219708442688, "learning_rate": 1.1477908274243135e-05, "loss": 0.6063, "step": 3147 }, { "epoch": 0.46953538668058764, "grad_norm": 1.4279792308807373, "learning_rate": 1.1473129428129926e-05, "loss": 0.6158, "step": 3148 }, { "epoch": 0.46968454023417106, "grad_norm": 1.3327264785766602, "learning_rate": 1.1468350238105203e-05, "loss": 0.6293, "step": 3149 }, { "epoch": 0.46983369378775447, "grad_norm": 1.2260433435440063, "learning_rate": 1.1463570705284705e-05, "loss": 0.4848, "step": 3150 }, { "epoch": 0.4699828473413379, "grad_norm": 1.6274962425231934, "learning_rate": 1.1458790830784242e-05, "loss": 0.6465, "step": 3151 }, { "epoch": 0.4701320008949213, "grad_norm": 1.460648536682129, "learning_rate": 1.1454010615719701e-05, "loss": 0.5753, "step": 3152 }, { "epoch": 0.4702811544485047, "grad_norm": 1.48252272605896, "learning_rate": 1.1449230061207064e-05, "loss": 0.619, "step": 3153 }, { "epoch": 0.4704303080020881, "grad_norm": 1.3861128091812134, "learning_rate": 1.1444449168362375e-05, "loss": 0.632, "step": 3154 }, { "epoch": 0.47057946155567154, "grad_norm": 1.3202292919158936, "learning_rate": 1.1439667938301762e-05, "loss": 0.5999, "step": 3155 }, { "epoch": 0.47072861510925496, "grad_norm": 1.1904553174972534, "learning_rate": 1.1434886372141443e-05, "loss": 0.6351, "step": 3156 }, { "epoch": 0.47087776866283837, "grad_norm": 1.2496788501739502, "learning_rate": 1.1430104470997698e-05, "loss": 0.6138, "step": 3157 }, { "epoch": 0.4710269222164218, "grad_norm": 0.9090105295181274, "learning_rate": 1.1425322235986893e-05, "loss": 0.674, "step": 3158 }, { "epoch": 0.4711760757700052, "grad_norm": 1.5114023685455322, "learning_rate": 1.1420539668225475e-05, "loss": 0.5261, "step": 3159 }, { "epoch": 0.4713252293235886, "grad_norm": 1.5947058200836182, "learning_rate": 1.141575676882996e-05, "loss": 0.5794, "step": 3160 }, { "epoch": 0.471474382877172, "grad_norm": 1.2649372816085815, "learning_rate": 1.141097353891695e-05, "loss": 0.6496, "step": 3161 }, { "epoch": 0.47162353643075544, "grad_norm": 1.3863120079040527, "learning_rate": 1.1406189979603122e-05, "loss": 0.6089, "step": 3162 }, { "epoch": 0.47177268998433886, "grad_norm": 1.244263768196106, "learning_rate": 1.1401406092005226e-05, "loss": 0.6015, "step": 3163 }, { "epoch": 0.47192184353792227, "grad_norm": 2.11189866065979, "learning_rate": 1.139662187724009e-05, "loss": 0.612, "step": 3164 }, { "epoch": 0.4720709970915057, "grad_norm": 1.3479894399642944, "learning_rate": 1.1391837336424625e-05, "loss": 0.644, "step": 3165 }, { "epoch": 0.4722201506450891, "grad_norm": 1.3285026550292969, "learning_rate": 1.1387052470675806e-05, "loss": 0.5944, "step": 3166 }, { "epoch": 0.4723693041986725, "grad_norm": 1.2383962869644165, "learning_rate": 1.1382267281110697e-05, "loss": 0.6386, "step": 3167 }, { "epoch": 0.4725184577522559, "grad_norm": 1.3094176054000854, "learning_rate": 1.1377481768846426e-05, "loss": 0.6034, "step": 3168 }, { "epoch": 0.47266761130583934, "grad_norm": 1.2780113220214844, "learning_rate": 1.1372695935000204e-05, "loss": 0.4657, "step": 3169 }, { "epoch": 0.47281676485942276, "grad_norm": 1.2465600967407227, "learning_rate": 1.1367909780689315e-05, "loss": 0.6194, "step": 3170 }, { "epoch": 0.47296591841300617, "grad_norm": 1.34720778465271, "learning_rate": 1.1363123307031118e-05, "loss": 0.5392, "step": 3171 }, { "epoch": 0.4731150719665896, "grad_norm": 1.4701436758041382, "learning_rate": 1.135833651514304e-05, "loss": 0.6791, "step": 3172 }, { "epoch": 0.473264225520173, "grad_norm": 1.2824476957321167, "learning_rate": 1.1353549406142596e-05, "loss": 0.5677, "step": 3173 }, { "epoch": 0.4734133790737564, "grad_norm": 1.5104235410690308, "learning_rate": 1.1348761981147366e-05, "loss": 0.6242, "step": 3174 }, { "epoch": 0.4735625326273398, "grad_norm": 1.9762814044952393, "learning_rate": 1.1343974241274998e-05, "loss": 0.6107, "step": 3175 }, { "epoch": 0.47371168618092324, "grad_norm": 1.841521143913269, "learning_rate": 1.1339186187643229e-05, "loss": 0.6812, "step": 3176 }, { "epoch": 0.47386083973450666, "grad_norm": 1.4956399202346802, "learning_rate": 1.1334397821369858e-05, "loss": 0.5483, "step": 3177 }, { "epoch": 0.47400999328809007, "grad_norm": 1.2552597522735596, "learning_rate": 1.1329609143572757e-05, "loss": 0.5248, "step": 3178 }, { "epoch": 0.4741591468416735, "grad_norm": 1.4801335334777832, "learning_rate": 1.1324820155369878e-05, "loss": 0.6684, "step": 3179 }, { "epoch": 0.4743083003952569, "grad_norm": 1.3940550088882446, "learning_rate": 1.1320030857879238e-05, "loss": 0.6526, "step": 3180 }, { "epoch": 0.4744574539488403, "grad_norm": 1.3824905157089233, "learning_rate": 1.1315241252218929e-05, "loss": 0.5861, "step": 3181 }, { "epoch": 0.4746066075024237, "grad_norm": 1.4952727556228638, "learning_rate": 1.131045133950712e-05, "loss": 0.6972, "step": 3182 }, { "epoch": 0.47475576105600714, "grad_norm": 1.4284406900405884, "learning_rate": 1.130566112086204e-05, "loss": 0.5719, "step": 3183 }, { "epoch": 0.47490491460959056, "grad_norm": 1.2481786012649536, "learning_rate": 1.1300870597402e-05, "loss": 0.6697, "step": 3184 }, { "epoch": 0.47505406816317397, "grad_norm": 1.3727505207061768, "learning_rate": 1.1296079770245378e-05, "loss": 0.6341, "step": 3185 }, { "epoch": 0.4752032217167574, "grad_norm": 1.4281083345413208, "learning_rate": 1.1291288640510623e-05, "loss": 0.5614, "step": 3186 }, { "epoch": 0.4753523752703408, "grad_norm": 1.3504784107208252, "learning_rate": 1.1286497209316256e-05, "loss": 0.626, "step": 3187 }, { "epoch": 0.4755015288239242, "grad_norm": 1.1984691619873047, "learning_rate": 1.1281705477780866e-05, "loss": 0.5993, "step": 3188 }, { "epoch": 0.4756506823775076, "grad_norm": 1.528154969215393, "learning_rate": 1.1276913447023114e-05, "loss": 0.5975, "step": 3189 }, { "epoch": 0.47579983593109104, "grad_norm": 1.415468692779541, "learning_rate": 1.1272121118161729e-05, "loss": 0.5476, "step": 3190 }, { "epoch": 0.47594898948467446, "grad_norm": 1.2418889999389648, "learning_rate": 1.1267328492315513e-05, "loss": 0.4725, "step": 3191 }, { "epoch": 0.47609814303825787, "grad_norm": 1.3830853700637817, "learning_rate": 1.1262535570603335e-05, "loss": 0.6202, "step": 3192 }, { "epoch": 0.4762472965918413, "grad_norm": 1.3159596920013428, "learning_rate": 1.1257742354144132e-05, "loss": 0.5621, "step": 3193 }, { "epoch": 0.4763964501454247, "grad_norm": 1.4454196691513062, "learning_rate": 1.1252948844056912e-05, "loss": 0.625, "step": 3194 }, { "epoch": 0.4765456036990081, "grad_norm": 1.3325862884521484, "learning_rate": 1.1248155041460749e-05, "loss": 0.6717, "step": 3195 }, { "epoch": 0.47669475725259153, "grad_norm": 0.9188811779022217, "learning_rate": 1.124336094747479e-05, "loss": 0.6689, "step": 3196 }, { "epoch": 0.47684391080617494, "grad_norm": 1.375221848487854, "learning_rate": 1.1238566563218244e-05, "loss": 0.6547, "step": 3197 }, { "epoch": 0.47699306435975836, "grad_norm": 1.3961973190307617, "learning_rate": 1.1233771889810394e-05, "loss": 0.5906, "step": 3198 }, { "epoch": 0.47714221791334177, "grad_norm": 1.333466649055481, "learning_rate": 1.1228976928370583e-05, "loss": 0.5397, "step": 3199 }, { "epoch": 0.4772913714669252, "grad_norm": 1.1809340715408325, "learning_rate": 1.122418168001823e-05, "loss": 0.5142, "step": 3200 }, { "epoch": 0.4774405250205086, "grad_norm": 1.4984462261199951, "learning_rate": 1.1219386145872812e-05, "loss": 0.6161, "step": 3201 }, { "epoch": 0.477589678574092, "grad_norm": 1.2841031551361084, "learning_rate": 1.121459032705388e-05, "loss": 0.5971, "step": 3202 }, { "epoch": 0.47773883212767543, "grad_norm": 1.2941426038742065, "learning_rate": 1.1209794224681048e-05, "loss": 0.4991, "step": 3203 }, { "epoch": 0.47788798568125884, "grad_norm": 1.5893994569778442, "learning_rate": 1.1204997839874e-05, "loss": 0.5323, "step": 3204 }, { "epoch": 0.47803713923484226, "grad_norm": 1.298120141029358, "learning_rate": 1.1200201173752476e-05, "loss": 0.5666, "step": 3205 }, { "epoch": 0.47818629278842567, "grad_norm": 1.4018875360488892, "learning_rate": 1.1195404227436295e-05, "loss": 0.5425, "step": 3206 }, { "epoch": 0.4783354463420091, "grad_norm": 1.6044450998306274, "learning_rate": 1.1190607002045332e-05, "loss": 0.5462, "step": 3207 }, { "epoch": 0.4784845998955925, "grad_norm": 1.3809698820114136, "learning_rate": 1.1185809498699526e-05, "loss": 0.6062, "step": 3208 }, { "epoch": 0.4786337534491759, "grad_norm": 1.3422496318817139, "learning_rate": 1.1181011718518895e-05, "loss": 0.6344, "step": 3209 }, { "epoch": 0.47878290700275933, "grad_norm": 1.470626711845398, "learning_rate": 1.1176213662623502e-05, "loss": 0.6548, "step": 3210 }, { "epoch": 0.47893206055634274, "grad_norm": 1.378137469291687, "learning_rate": 1.1171415332133488e-05, "loss": 0.5651, "step": 3211 }, { "epoch": 0.47908121410992616, "grad_norm": 1.3331114053726196, "learning_rate": 1.1166616728169052e-05, "loss": 0.6302, "step": 3212 }, { "epoch": 0.47923036766350957, "grad_norm": 1.3955473899841309, "learning_rate": 1.1161817851850464e-05, "loss": 0.6121, "step": 3213 }, { "epoch": 0.479379521217093, "grad_norm": 1.3747190237045288, "learning_rate": 1.1157018704298049e-05, "loss": 0.6268, "step": 3214 }, { "epoch": 0.4795286747706764, "grad_norm": 1.5777395963668823, "learning_rate": 1.1152219286632197e-05, "loss": 0.6903, "step": 3215 }, { "epoch": 0.4796778283242598, "grad_norm": 1.3115524053573608, "learning_rate": 1.1147419599973364e-05, "loss": 0.597, "step": 3216 }, { "epoch": 0.47982698187784323, "grad_norm": 1.4960838556289673, "learning_rate": 1.1142619645442068e-05, "loss": 0.5533, "step": 3217 }, { "epoch": 0.47997613543142664, "grad_norm": 1.3186331987380981, "learning_rate": 1.1137819424158891e-05, "loss": 0.5986, "step": 3218 }, { "epoch": 0.48012528898501006, "grad_norm": 1.4009501934051514, "learning_rate": 1.1133018937244471e-05, "loss": 0.5751, "step": 3219 }, { "epoch": 0.48027444253859347, "grad_norm": 1.33328115940094, "learning_rate": 1.1128218185819517e-05, "loss": 0.6637, "step": 3220 }, { "epoch": 0.4804235960921769, "grad_norm": 1.4328620433807373, "learning_rate": 1.1123417171004794e-05, "loss": 0.5982, "step": 3221 }, { "epoch": 0.4805727496457603, "grad_norm": 1.3958890438079834, "learning_rate": 1.1118615893921125e-05, "loss": 0.5509, "step": 3222 }, { "epoch": 0.4807219031993437, "grad_norm": 1.2960511445999146, "learning_rate": 1.1113814355689408e-05, "loss": 0.563, "step": 3223 }, { "epoch": 0.48087105675292713, "grad_norm": 1.4578129053115845, "learning_rate": 1.1109012557430585e-05, "loss": 0.5879, "step": 3224 }, { "epoch": 0.48102021030651054, "grad_norm": 1.8438864946365356, "learning_rate": 1.1104210500265668e-05, "loss": 0.5633, "step": 3225 }, { "epoch": 0.48116936386009396, "grad_norm": 1.3074839115142822, "learning_rate": 1.1099408185315734e-05, "loss": 0.5195, "step": 3226 }, { "epoch": 0.48131851741367737, "grad_norm": 1.9422056674957275, "learning_rate": 1.1094605613701905e-05, "loss": 0.5484, "step": 3227 }, { "epoch": 0.4814676709672608, "grad_norm": 1.5386478900909424, "learning_rate": 1.108980278654538e-05, "loss": 0.6027, "step": 3228 }, { "epoch": 0.4816168245208442, "grad_norm": 1.384503722190857, "learning_rate": 1.1084999704967406e-05, "loss": 0.5964, "step": 3229 }, { "epoch": 0.4817659780744276, "grad_norm": 1.338595986366272, "learning_rate": 1.1080196370089293e-05, "loss": 0.5605, "step": 3230 }, { "epoch": 0.48191513162801103, "grad_norm": 1.5428544282913208, "learning_rate": 1.1075392783032412e-05, "loss": 0.6315, "step": 3231 }, { "epoch": 0.48206428518159444, "grad_norm": 1.3240290880203247, "learning_rate": 1.1070588944918193e-05, "loss": 0.6213, "step": 3232 }, { "epoch": 0.48221343873517786, "grad_norm": 1.7280809879302979, "learning_rate": 1.1065784856868116e-05, "loss": 0.5833, "step": 3233 }, { "epoch": 0.48236259228876127, "grad_norm": 1.452459692955017, "learning_rate": 1.106098052000373e-05, "loss": 0.6043, "step": 3234 }, { "epoch": 0.4825117458423447, "grad_norm": 1.3524953126907349, "learning_rate": 1.1056175935446642e-05, "loss": 0.5758, "step": 3235 }, { "epoch": 0.4826608993959281, "grad_norm": 1.3446011543273926, "learning_rate": 1.1051371104318507e-05, "loss": 0.6537, "step": 3236 }, { "epoch": 0.4828100529495115, "grad_norm": 1.377507209777832, "learning_rate": 1.1046566027741048e-05, "loss": 0.5292, "step": 3237 }, { "epoch": 0.48295920650309493, "grad_norm": 1.5730911493301392, "learning_rate": 1.1041760706836037e-05, "loss": 0.6902, "step": 3238 }, { "epoch": 0.48310836005667834, "grad_norm": 1.5443447828292847, "learning_rate": 1.1036955142725309e-05, "loss": 0.5624, "step": 3239 }, { "epoch": 0.48325751361026176, "grad_norm": 1.1534013748168945, "learning_rate": 1.1032149336530757e-05, "loss": 0.6349, "step": 3240 }, { "epoch": 0.48340666716384517, "grad_norm": 1.481441617012024, "learning_rate": 1.1027343289374322e-05, "loss": 0.6082, "step": 3241 }, { "epoch": 0.4835558207174286, "grad_norm": 1.2268853187561035, "learning_rate": 1.102253700237801e-05, "loss": 0.6216, "step": 3242 }, { "epoch": 0.483704974271012, "grad_norm": 0.9500306844711304, "learning_rate": 1.1017730476663878e-05, "loss": 0.6537, "step": 3243 }, { "epoch": 0.4838541278245954, "grad_norm": 1.6447943449020386, "learning_rate": 1.1012923713354039e-05, "loss": 0.5143, "step": 3244 }, { "epoch": 0.48400328137817883, "grad_norm": 1.5148676633834839, "learning_rate": 1.1008116713570664e-05, "loss": 0.6005, "step": 3245 }, { "epoch": 0.48415243493176224, "grad_norm": 1.3915389776229858, "learning_rate": 1.1003309478435982e-05, "loss": 0.6069, "step": 3246 }, { "epoch": 0.48430158848534566, "grad_norm": 1.341483473777771, "learning_rate": 1.0998502009072264e-05, "loss": 0.6115, "step": 3247 }, { "epoch": 0.48445074203892907, "grad_norm": 1.3595848083496094, "learning_rate": 1.0993694306601852e-05, "loss": 0.6126, "step": 3248 }, { "epoch": 0.4845998955925125, "grad_norm": 1.3356845378875732, "learning_rate": 1.0988886372147135e-05, "loss": 0.6471, "step": 3249 }, { "epoch": 0.4847490491460959, "grad_norm": 1.363598108291626, "learning_rate": 1.0984078206830548e-05, "loss": 0.5862, "step": 3250 }, { "epoch": 0.4848982026996793, "grad_norm": 1.619311809539795, "learning_rate": 1.0979269811774598e-05, "loss": 0.6874, "step": 3251 }, { "epoch": 0.48504735625326273, "grad_norm": 1.528523564338684, "learning_rate": 1.0974461188101831e-05, "loss": 0.6183, "step": 3252 }, { "epoch": 0.48519650980684614, "grad_norm": 1.4251080751419067, "learning_rate": 1.096965233693485e-05, "loss": 0.5662, "step": 3253 }, { "epoch": 0.48534566336042956, "grad_norm": 0.8586249351501465, "learning_rate": 1.0964843259396313e-05, "loss": 0.6872, "step": 3254 }, { "epoch": 0.48549481691401297, "grad_norm": 1.2609446048736572, "learning_rate": 1.0960033956608931e-05, "loss": 0.6342, "step": 3255 }, { "epoch": 0.4856439704675964, "grad_norm": 1.443968653678894, "learning_rate": 1.0955224429695466e-05, "loss": 0.546, "step": 3256 }, { "epoch": 0.4857931240211798, "grad_norm": 1.298537254333496, "learning_rate": 1.0950414679778736e-05, "loss": 0.5534, "step": 3257 }, { "epoch": 0.4859422775747632, "grad_norm": 1.3945790529251099, "learning_rate": 1.0945604707981601e-05, "loss": 0.5955, "step": 3258 }, { "epoch": 0.48609143112834663, "grad_norm": 1.572113037109375, "learning_rate": 1.0940794515426986e-05, "loss": 0.6264, "step": 3259 }, { "epoch": 0.48624058468193004, "grad_norm": 1.639184832572937, "learning_rate": 1.0935984103237857e-05, "loss": 0.6731, "step": 3260 }, { "epoch": 0.48638973823551346, "grad_norm": 1.2427130937576294, "learning_rate": 1.0931173472537237e-05, "loss": 0.6323, "step": 3261 }, { "epoch": 0.4865388917890969, "grad_norm": 1.3503055572509766, "learning_rate": 1.0926362624448202e-05, "loss": 0.512, "step": 3262 }, { "epoch": 0.4866880453426803, "grad_norm": 1.3327441215515137, "learning_rate": 1.0921551560093872e-05, "loss": 0.6337, "step": 3263 }, { "epoch": 0.4868371988962637, "grad_norm": 1.3514659404754639, "learning_rate": 1.0916740280597417e-05, "loss": 0.6266, "step": 3264 }, { "epoch": 0.4869863524498471, "grad_norm": 1.3396317958831787, "learning_rate": 1.091192878708207e-05, "loss": 0.6221, "step": 3265 }, { "epoch": 0.48713550600343053, "grad_norm": 1.4263402223587036, "learning_rate": 1.0907117080671099e-05, "loss": 0.5619, "step": 3266 }, { "epoch": 0.48728465955701394, "grad_norm": 1.2875627279281616, "learning_rate": 1.0902305162487829e-05, "loss": 0.5848, "step": 3267 }, { "epoch": 0.48743381311059736, "grad_norm": 1.811907410621643, "learning_rate": 1.0897493033655636e-05, "loss": 0.5652, "step": 3268 }, { "epoch": 0.4875829666641808, "grad_norm": 1.4558477401733398, "learning_rate": 1.0892680695297932e-05, "loss": 0.5954, "step": 3269 }, { "epoch": 0.4877321202177642, "grad_norm": 1.2578856945037842, "learning_rate": 1.0887868148538204e-05, "loss": 0.5702, "step": 3270 }, { "epoch": 0.4878812737713476, "grad_norm": 1.3935718536376953, "learning_rate": 1.0883055394499962e-05, "loss": 0.7071, "step": 3271 }, { "epoch": 0.488030427324931, "grad_norm": 1.6278897523880005, "learning_rate": 1.0878242434306772e-05, "loss": 0.6982, "step": 3272 }, { "epoch": 0.48817958087851443, "grad_norm": 1.2587488889694214, "learning_rate": 1.0873429269082256e-05, "loss": 0.5644, "step": 3273 }, { "epoch": 0.48832873443209784, "grad_norm": 1.3285843133926392, "learning_rate": 1.086861589995008e-05, "loss": 0.5973, "step": 3274 }, { "epoch": 0.48847788798568126, "grad_norm": 1.273674488067627, "learning_rate": 1.0863802328033947e-05, "loss": 0.5568, "step": 3275 }, { "epoch": 0.4886270415392647, "grad_norm": 1.4555160999298096, "learning_rate": 1.0858988554457626e-05, "loss": 0.5926, "step": 3276 }, { "epoch": 0.4887761950928481, "grad_norm": 1.426594614982605, "learning_rate": 1.0854174580344918e-05, "loss": 0.6177, "step": 3277 }, { "epoch": 0.4889253486464315, "grad_norm": 1.3970787525177002, "learning_rate": 1.0849360406819676e-05, "loss": 0.5907, "step": 3278 }, { "epoch": 0.4890745022000149, "grad_norm": 1.4239784479141235, "learning_rate": 1.0844546035005803e-05, "loss": 0.5693, "step": 3279 }, { "epoch": 0.48922365575359833, "grad_norm": 1.2229193449020386, "learning_rate": 1.0839731466027242e-05, "loss": 0.6552, "step": 3280 }, { "epoch": 0.48937280930718174, "grad_norm": 1.4714407920837402, "learning_rate": 1.0834916701007985e-05, "loss": 0.6075, "step": 3281 }, { "epoch": 0.48952196286076516, "grad_norm": 1.6459972858428955, "learning_rate": 1.0830101741072069e-05, "loss": 0.6076, "step": 3282 }, { "epoch": 0.4896711164143486, "grad_norm": 1.2642930746078491, "learning_rate": 1.0825286587343582e-05, "loss": 0.5755, "step": 3283 }, { "epoch": 0.489820269967932, "grad_norm": 1.6143985986709595, "learning_rate": 1.082047124094665e-05, "loss": 0.6653, "step": 3284 }, { "epoch": 0.4899694235215154, "grad_norm": 1.3599377870559692, "learning_rate": 1.0815655703005446e-05, "loss": 0.583, "step": 3285 }, { "epoch": 0.4901185770750988, "grad_norm": 1.338110327720642, "learning_rate": 1.0810839974644183e-05, "loss": 0.6061, "step": 3286 }, { "epoch": 0.49026773062868223, "grad_norm": 1.3525364398956299, "learning_rate": 1.0806024056987132e-05, "loss": 0.5272, "step": 3287 }, { "epoch": 0.49041688418226564, "grad_norm": 1.2426683902740479, "learning_rate": 1.0801207951158599e-05, "loss": 0.5748, "step": 3288 }, { "epoch": 0.49056603773584906, "grad_norm": 1.3670449256896973, "learning_rate": 1.079639165828293e-05, "loss": 0.5881, "step": 3289 }, { "epoch": 0.4907151912894325, "grad_norm": 1.3419731855392456, "learning_rate": 1.0791575179484523e-05, "loss": 0.5569, "step": 3290 }, { "epoch": 0.4908643448430159, "grad_norm": 1.199697494506836, "learning_rate": 1.0786758515887814e-05, "loss": 0.5474, "step": 3291 }, { "epoch": 0.4910134983965993, "grad_norm": 1.9213967323303223, "learning_rate": 1.0781941668617285e-05, "loss": 0.6154, "step": 3292 }, { "epoch": 0.4911626519501827, "grad_norm": 1.293725609779358, "learning_rate": 1.077712463879746e-05, "loss": 0.5057, "step": 3293 }, { "epoch": 0.49131180550376613, "grad_norm": 1.1783353090286255, "learning_rate": 1.0772307427552903e-05, "loss": 0.5391, "step": 3294 }, { "epoch": 0.49146095905734954, "grad_norm": 1.3873546123504639, "learning_rate": 1.0767490036008225e-05, "loss": 0.6015, "step": 3295 }, { "epoch": 0.49161011261093296, "grad_norm": 1.7648768424987793, "learning_rate": 1.0762672465288079e-05, "loss": 0.6613, "step": 3296 }, { "epoch": 0.4917592661645164, "grad_norm": 1.6565651893615723, "learning_rate": 1.0757854716517156e-05, "loss": 0.5519, "step": 3297 }, { "epoch": 0.4919084197180998, "grad_norm": 1.6313934326171875, "learning_rate": 1.075303679082019e-05, "loss": 0.4918, "step": 3298 }, { "epoch": 0.4920575732716832, "grad_norm": 1.6606498956680298, "learning_rate": 1.0748218689321954e-05, "loss": 0.6626, "step": 3299 }, { "epoch": 0.4922067268252666, "grad_norm": 1.1442662477493286, "learning_rate": 1.0743400413147269e-05, "loss": 0.5903, "step": 3300 }, { "epoch": 0.49235588037885003, "grad_norm": 1.3834233283996582, "learning_rate": 1.0738581963420994e-05, "loss": 0.5587, "step": 3301 }, { "epoch": 0.49250503393243344, "grad_norm": 1.5858856439590454, "learning_rate": 1.073376334126802e-05, "loss": 0.5835, "step": 3302 }, { "epoch": 0.49265418748601686, "grad_norm": 1.5974929332733154, "learning_rate": 1.0728944547813289e-05, "loss": 0.552, "step": 3303 }, { "epoch": 0.4928033410396003, "grad_norm": 1.524249792098999, "learning_rate": 1.072412558418178e-05, "loss": 0.6473, "step": 3304 }, { "epoch": 0.4929524945931837, "grad_norm": 1.3901283740997314, "learning_rate": 1.0719306451498513e-05, "loss": 0.5345, "step": 3305 }, { "epoch": 0.4931016481467671, "grad_norm": 1.5815790891647339, "learning_rate": 1.0714487150888537e-05, "loss": 0.6104, "step": 3306 }, { "epoch": 0.4932508017003505, "grad_norm": 1.1780887842178345, "learning_rate": 1.0709667683476962e-05, "loss": 0.522, "step": 3307 }, { "epoch": 0.49339995525393393, "grad_norm": 1.323871374130249, "learning_rate": 1.0704848050388905e-05, "loss": 0.6083, "step": 3308 }, { "epoch": 0.49354910880751734, "grad_norm": 1.1945488452911377, "learning_rate": 1.0700028252749559e-05, "loss": 0.4992, "step": 3309 }, { "epoch": 0.49369826236110076, "grad_norm": 1.2799359560012817, "learning_rate": 1.069520829168413e-05, "loss": 0.636, "step": 3310 }, { "epoch": 0.4938474159146842, "grad_norm": 1.5034830570220947, "learning_rate": 1.0690388168317863e-05, "loss": 0.6119, "step": 3311 }, { "epoch": 0.4939965694682676, "grad_norm": 1.549656629562378, "learning_rate": 1.0685567883776054e-05, "loss": 0.6191, "step": 3312 }, { "epoch": 0.494145723021851, "grad_norm": 1.379866600036621, "learning_rate": 1.0680747439184025e-05, "loss": 0.5627, "step": 3313 }, { "epoch": 0.4942948765754344, "grad_norm": 1.5112868547439575, "learning_rate": 1.0675926835667142e-05, "loss": 0.5638, "step": 3314 }, { "epoch": 0.49444403012901783, "grad_norm": 1.4076337814331055, "learning_rate": 1.0671106074350805e-05, "loss": 0.6114, "step": 3315 }, { "epoch": 0.49459318368260125, "grad_norm": 1.2856630086898804, "learning_rate": 1.0666285156360451e-05, "loss": 0.6313, "step": 3316 }, { "epoch": 0.49474233723618466, "grad_norm": 1.258575439453125, "learning_rate": 1.0661464082821558e-05, "loss": 0.5566, "step": 3317 }, { "epoch": 0.4948914907897681, "grad_norm": 1.4207590818405151, "learning_rate": 1.065664285485963e-05, "loss": 0.5407, "step": 3318 }, { "epoch": 0.4950406443433515, "grad_norm": 1.2912606000900269, "learning_rate": 1.0651821473600218e-05, "loss": 0.5666, "step": 3319 }, { "epoch": 0.4951897978969349, "grad_norm": 1.3246463537216187, "learning_rate": 1.0646999940168908e-05, "loss": 0.6005, "step": 3320 }, { "epoch": 0.4953389514505183, "grad_norm": 1.4577873945236206, "learning_rate": 1.064217825569131e-05, "loss": 0.5527, "step": 3321 }, { "epoch": 0.49548810500410173, "grad_norm": 1.3280727863311768, "learning_rate": 1.0637356421293077e-05, "loss": 0.5527, "step": 3322 }, { "epoch": 0.49563725855768515, "grad_norm": 1.2323260307312012, "learning_rate": 1.0632534438099906e-05, "loss": 0.5822, "step": 3323 }, { "epoch": 0.49578641211126856, "grad_norm": 0.8816760778427124, "learning_rate": 1.0627712307237513e-05, "loss": 0.6948, "step": 3324 }, { "epoch": 0.495935565664852, "grad_norm": 1.3437100648880005, "learning_rate": 1.0622890029831656e-05, "loss": 0.6415, "step": 3325 }, { "epoch": 0.4960847192184354, "grad_norm": 1.359902024269104, "learning_rate": 1.0618067607008127e-05, "loss": 0.519, "step": 3326 }, { "epoch": 0.4962338727720188, "grad_norm": 1.4755587577819824, "learning_rate": 1.0613245039892755e-05, "loss": 0.5759, "step": 3327 }, { "epoch": 0.4963830263256022, "grad_norm": 1.232862114906311, "learning_rate": 1.0608422329611393e-05, "loss": 0.5814, "step": 3328 }, { "epoch": 0.49653217987918563, "grad_norm": 1.193577527999878, "learning_rate": 1.0603599477289939e-05, "loss": 0.5653, "step": 3329 }, { "epoch": 0.49668133343276905, "grad_norm": 1.5815953016281128, "learning_rate": 1.0598776484054313e-05, "loss": 0.6457, "step": 3330 }, { "epoch": 0.49683048698635246, "grad_norm": 1.4956341981887817, "learning_rate": 1.0593953351030481e-05, "loss": 0.6807, "step": 3331 }, { "epoch": 0.4969796405399359, "grad_norm": 1.3336454629898071, "learning_rate": 1.0589130079344431e-05, "loss": 0.648, "step": 3332 }, { "epoch": 0.4971287940935193, "grad_norm": 1.2526781558990479, "learning_rate": 1.0584306670122186e-05, "loss": 0.5926, "step": 3333 }, { "epoch": 0.4972779476471027, "grad_norm": 1.2451351881027222, "learning_rate": 1.05794831244898e-05, "loss": 0.6635, "step": 3334 }, { "epoch": 0.4974271012006861, "grad_norm": 0.8629302382469177, "learning_rate": 1.0574659443573367e-05, "loss": 0.6511, "step": 3335 }, { "epoch": 0.49757625475426953, "grad_norm": 1.254061222076416, "learning_rate": 1.0569835628498998e-05, "loss": 0.5753, "step": 3336 }, { "epoch": 0.49772540830785295, "grad_norm": 1.3356281518936157, "learning_rate": 1.0565011680392852e-05, "loss": 0.5671, "step": 3337 }, { "epoch": 0.49787456186143636, "grad_norm": 1.3190007209777832, "learning_rate": 1.0560187600381104e-05, "loss": 0.6544, "step": 3338 }, { "epoch": 0.4980237154150198, "grad_norm": 1.2324984073638916, "learning_rate": 1.0555363389589966e-05, "loss": 0.6194, "step": 3339 }, { "epoch": 0.4981728689686032, "grad_norm": 1.930246114730835, "learning_rate": 1.0550539049145687e-05, "loss": 0.5764, "step": 3340 }, { "epoch": 0.4983220225221866, "grad_norm": 1.235938549041748, "learning_rate": 1.054571458017454e-05, "loss": 0.5372, "step": 3341 }, { "epoch": 0.49847117607577, "grad_norm": 0.939266562461853, "learning_rate": 1.054088998380282e-05, "loss": 0.7002, "step": 3342 }, { "epoch": 0.49862032962935343, "grad_norm": 1.2602063417434692, "learning_rate": 1.0536065261156864e-05, "loss": 0.6399, "step": 3343 }, { "epoch": 0.49876948318293685, "grad_norm": 1.3316775560379028, "learning_rate": 1.053124041336304e-05, "loss": 0.5163, "step": 3344 }, { "epoch": 0.49891863673652026, "grad_norm": 1.330432653427124, "learning_rate": 1.0526415441547732e-05, "loss": 0.6677, "step": 3345 }, { "epoch": 0.4990677902901037, "grad_norm": 1.2071478366851807, "learning_rate": 1.0521590346837366e-05, "loss": 0.5021, "step": 3346 }, { "epoch": 0.4992169438436871, "grad_norm": 1.468559980392456, "learning_rate": 1.0516765130358389e-05, "loss": 0.6203, "step": 3347 }, { "epoch": 0.4993660973972705, "grad_norm": 1.2231239080429077, "learning_rate": 1.0511939793237275e-05, "loss": 0.5314, "step": 3348 }, { "epoch": 0.4995152509508539, "grad_norm": 1.3105889558792114, "learning_rate": 1.0507114336600539e-05, "loss": 0.6718, "step": 3349 }, { "epoch": 0.49966440450443733, "grad_norm": 1.2636955976486206, "learning_rate": 1.0502288761574706e-05, "loss": 0.6575, "step": 3350 }, { "epoch": 0.49981355805802075, "grad_norm": 2.102067708969116, "learning_rate": 1.0497463069286343e-05, "loss": 0.6144, "step": 3351 }, { "epoch": 0.49996271161160416, "grad_norm": 1.2931180000305176, "learning_rate": 1.0492637260862036e-05, "loss": 0.542, "step": 3352 }, { "epoch": 0.5001118651651876, "grad_norm": 1.7535372972488403, "learning_rate": 1.04878113374284e-05, "loss": 0.5425, "step": 3353 }, { "epoch": 0.5002610187187709, "grad_norm": 1.2018756866455078, "learning_rate": 1.0482985300112081e-05, "loss": 0.6024, "step": 3354 }, { "epoch": 0.5004101722723544, "grad_norm": 1.309267520904541, "learning_rate": 1.0478159150039745e-05, "loss": 0.4502, "step": 3355 }, { "epoch": 0.5005593258259378, "grad_norm": 1.3748480081558228, "learning_rate": 1.047333288833809e-05, "loss": 0.5801, "step": 3356 }, { "epoch": 0.5007084793795212, "grad_norm": 1.2613935470581055, "learning_rate": 1.046850651613384e-05, "loss": 0.5305, "step": 3357 }, { "epoch": 0.5008576329331046, "grad_norm": 1.5014898777008057, "learning_rate": 1.0463680034553738e-05, "loss": 0.6084, "step": 3358 }, { "epoch": 0.5010067864866881, "grad_norm": 1.5629446506500244, "learning_rate": 1.045885344472456e-05, "loss": 0.5792, "step": 3359 }, { "epoch": 0.5011559400402714, "grad_norm": 1.3211073875427246, "learning_rate": 1.0454026747773103e-05, "loss": 0.6184, "step": 3360 }, { "epoch": 0.5013050935938549, "grad_norm": 1.3999401330947876, "learning_rate": 1.0449199944826185e-05, "loss": 0.6449, "step": 3361 }, { "epoch": 0.5014542471474382, "grad_norm": 1.3820087909698486, "learning_rate": 1.0444373037010667e-05, "loss": 0.6193, "step": 3362 }, { "epoch": 0.5016034007010217, "grad_norm": 1.5000883340835571, "learning_rate": 1.0439546025453411e-05, "loss": 0.6867, "step": 3363 }, { "epoch": 0.5017525542546051, "grad_norm": 1.6316825151443481, "learning_rate": 1.0434718911281316e-05, "loss": 0.6974, "step": 3364 }, { "epoch": 0.5019017078081885, "grad_norm": 1.2203965187072754, "learning_rate": 1.0429891695621304e-05, "loss": 0.5237, "step": 3365 }, { "epoch": 0.5020508613617719, "grad_norm": 1.361584186553955, "learning_rate": 1.042506437960032e-05, "loss": 0.5277, "step": 3366 }, { "epoch": 0.5022000149153554, "grad_norm": 1.3048430681228638, "learning_rate": 1.0420236964345332e-05, "loss": 0.5324, "step": 3367 }, { "epoch": 0.5023491684689387, "grad_norm": 1.259044885635376, "learning_rate": 1.041540945098333e-05, "loss": 0.5671, "step": 3368 }, { "epoch": 0.5024983220225222, "grad_norm": 1.3413528203964233, "learning_rate": 1.0410581840641324e-05, "loss": 0.6307, "step": 3369 }, { "epoch": 0.5026474755761056, "grad_norm": 1.2264540195465088, "learning_rate": 1.040575413444636e-05, "loss": 0.5277, "step": 3370 }, { "epoch": 0.502796629129689, "grad_norm": 1.5332016944885254, "learning_rate": 1.040092633352549e-05, "loss": 0.6511, "step": 3371 }, { "epoch": 0.5029457826832724, "grad_norm": 1.3333910703659058, "learning_rate": 1.0396098439005796e-05, "loss": 0.5645, "step": 3372 }, { "epoch": 0.5030949362368559, "grad_norm": 1.3020069599151611, "learning_rate": 1.0391270452014382e-05, "loss": 0.655, "step": 3373 }, { "epoch": 0.5032440897904392, "grad_norm": 1.4358298778533936, "learning_rate": 1.0386442373678372e-05, "loss": 0.6066, "step": 3374 }, { "epoch": 0.5033932433440227, "grad_norm": 1.52225923538208, "learning_rate": 1.038161420512491e-05, "loss": 0.6, "step": 3375 }, { "epoch": 0.503542396897606, "grad_norm": 1.3636285066604614, "learning_rate": 1.0376785947481168e-05, "loss": 0.6333, "step": 3376 }, { "epoch": 0.5036915504511895, "grad_norm": 1.214476466178894, "learning_rate": 1.037195760187433e-05, "loss": 0.5911, "step": 3377 }, { "epoch": 0.5038407040047729, "grad_norm": 1.3264460563659668, "learning_rate": 1.03671291694316e-05, "loss": 0.593, "step": 3378 }, { "epoch": 0.5039898575583563, "grad_norm": 1.3323793411254883, "learning_rate": 1.0362300651280217e-05, "loss": 0.582, "step": 3379 }, { "epoch": 0.5041390111119397, "grad_norm": 1.2079086303710938, "learning_rate": 1.0357472048547423e-05, "loss": 0.5197, "step": 3380 }, { "epoch": 0.5042881646655232, "grad_norm": 1.368369460105896, "learning_rate": 1.0352643362360486e-05, "loss": 0.596, "step": 3381 }, { "epoch": 0.5044373182191065, "grad_norm": 1.3156956434249878, "learning_rate": 1.0347814593846694e-05, "loss": 0.529, "step": 3382 }, { "epoch": 0.50458647177269, "grad_norm": 1.1474183797836304, "learning_rate": 1.0342985744133358e-05, "loss": 0.4669, "step": 3383 }, { "epoch": 0.5047356253262734, "grad_norm": 1.5670992136001587, "learning_rate": 1.0338156814347799e-05, "loss": 0.5476, "step": 3384 }, { "epoch": 0.5048847788798568, "grad_norm": 1.2621113061904907, "learning_rate": 1.0333327805617367e-05, "loss": 0.5856, "step": 3385 }, { "epoch": 0.5050339324334402, "grad_norm": 1.383210301399231, "learning_rate": 1.0328498719069416e-05, "loss": 0.6284, "step": 3386 }, { "epoch": 0.5051830859870237, "grad_norm": 1.4607704877853394, "learning_rate": 1.0323669555831332e-05, "loss": 0.579, "step": 3387 }, { "epoch": 0.505332239540607, "grad_norm": 2.3485076427459717, "learning_rate": 1.0318840317030518e-05, "loss": 0.6496, "step": 3388 }, { "epoch": 0.5054813930941905, "grad_norm": 1.4320592880249023, "learning_rate": 1.0314011003794386e-05, "loss": 0.6122, "step": 3389 }, { "epoch": 0.5056305466477738, "grad_norm": 1.856672763824463, "learning_rate": 1.0309181617250374e-05, "loss": 0.6494, "step": 3390 }, { "epoch": 0.5057797002013573, "grad_norm": 1.574416995048523, "learning_rate": 1.030435215852593e-05, "loss": 0.5795, "step": 3391 }, { "epoch": 0.5059288537549407, "grad_norm": 1.3718221187591553, "learning_rate": 1.0299522628748522e-05, "loss": 0.5786, "step": 3392 }, { "epoch": 0.5060780073085241, "grad_norm": 1.2431868314743042, "learning_rate": 1.0294693029045636e-05, "loss": 0.5549, "step": 3393 }, { "epoch": 0.5062271608621075, "grad_norm": 1.2278034687042236, "learning_rate": 1.0289863360544775e-05, "loss": 0.6101, "step": 3394 }, { "epoch": 0.506376314415691, "grad_norm": 1.416882872581482, "learning_rate": 1.0285033624373453e-05, "loss": 0.6203, "step": 3395 }, { "epoch": 0.5065254679692743, "grad_norm": 1.4986239671707153, "learning_rate": 1.0280203821659203e-05, "loss": 0.6662, "step": 3396 }, { "epoch": 0.5066746215228578, "grad_norm": 1.2734578847885132, "learning_rate": 1.0275373953529572e-05, "loss": 0.6291, "step": 3397 }, { "epoch": 0.5068237750764412, "grad_norm": 1.304229974746704, "learning_rate": 1.027054402111213e-05, "loss": 0.5596, "step": 3398 }, { "epoch": 0.5069729286300246, "grad_norm": 1.390791416168213, "learning_rate": 1.0265714025534451e-05, "loss": 0.6089, "step": 3399 }, { "epoch": 0.507122082183608, "grad_norm": 1.28506600856781, "learning_rate": 1.0260883967924123e-05, "loss": 0.5958, "step": 3400 }, { "epoch": 0.5072712357371915, "grad_norm": 1.3349751234054565, "learning_rate": 1.0256053849408768e-05, "loss": 0.5568, "step": 3401 }, { "epoch": 0.5074203892907748, "grad_norm": 1.493990421295166, "learning_rate": 1.0251223671115995e-05, "loss": 0.5298, "step": 3402 }, { "epoch": 0.5075695428443583, "grad_norm": 1.6013855934143066, "learning_rate": 1.0246393434173446e-05, "loss": 0.6186, "step": 3403 }, { "epoch": 0.5077186963979416, "grad_norm": 1.54166579246521, "learning_rate": 1.024156313970877e-05, "loss": 0.6659, "step": 3404 }, { "epoch": 0.5078678499515251, "grad_norm": 1.2343562841415405, "learning_rate": 1.023673278884963e-05, "loss": 0.5529, "step": 3405 }, { "epoch": 0.5080170035051085, "grad_norm": 1.3538986444473267, "learning_rate": 1.0231902382723704e-05, "loss": 0.5646, "step": 3406 }, { "epoch": 0.508166157058692, "grad_norm": 1.2611132860183716, "learning_rate": 1.022707192245868e-05, "loss": 0.5669, "step": 3407 }, { "epoch": 0.5083153106122753, "grad_norm": 1.4359564781188965, "learning_rate": 1.0222241409182256e-05, "loss": 0.5739, "step": 3408 }, { "epoch": 0.5084644641658588, "grad_norm": 1.218382716178894, "learning_rate": 1.0217410844022154e-05, "loss": 0.5618, "step": 3409 }, { "epoch": 0.5086136177194421, "grad_norm": 1.2193211317062378, "learning_rate": 1.0212580228106094e-05, "loss": 0.5868, "step": 3410 }, { "epoch": 0.5087627712730256, "grad_norm": 1.7714143991470337, "learning_rate": 1.0207749562561817e-05, "loss": 0.5954, "step": 3411 }, { "epoch": 0.508911924826609, "grad_norm": 1.2643669843673706, "learning_rate": 1.0202918848517075e-05, "loss": 0.4619, "step": 3412 }, { "epoch": 0.5090610783801924, "grad_norm": 1.3101304769515991, "learning_rate": 1.0198088087099624e-05, "loss": 0.6468, "step": 3413 }, { "epoch": 0.5092102319337758, "grad_norm": 1.1916309595108032, "learning_rate": 1.0193257279437238e-05, "loss": 0.5542, "step": 3414 }, { "epoch": 0.5093593854873593, "grad_norm": 1.4466485977172852, "learning_rate": 1.0188426426657705e-05, "loss": 0.5764, "step": 3415 }, { "epoch": 0.5095085390409426, "grad_norm": 1.2500336170196533, "learning_rate": 1.0183595529888812e-05, "loss": 0.5875, "step": 3416 }, { "epoch": 0.5096576925945261, "grad_norm": 1.3378709554672241, "learning_rate": 1.0178764590258363e-05, "loss": 0.5789, "step": 3417 }, { "epoch": 0.5098068461481094, "grad_norm": 1.5174978971481323, "learning_rate": 1.0173933608894177e-05, "loss": 0.5786, "step": 3418 }, { "epoch": 0.5099559997016929, "grad_norm": 1.3273062705993652, "learning_rate": 1.0169102586924077e-05, "loss": 0.5961, "step": 3419 }, { "epoch": 0.5101051532552763, "grad_norm": 1.2640342712402344, "learning_rate": 1.016427152547589e-05, "loss": 0.6372, "step": 3420 }, { "epoch": 0.5102543068088597, "grad_norm": 1.5463588237762451, "learning_rate": 1.0159440425677466e-05, "loss": 0.5855, "step": 3421 }, { "epoch": 0.5104034603624431, "grad_norm": 1.418470859527588, "learning_rate": 1.0154609288656647e-05, "loss": 0.5921, "step": 3422 }, { "epoch": 0.5105526139160266, "grad_norm": 1.5056943893432617, "learning_rate": 1.01497781155413e-05, "loss": 0.5642, "step": 3423 }, { "epoch": 0.5107017674696099, "grad_norm": 1.3350257873535156, "learning_rate": 1.0144946907459294e-05, "loss": 0.6431, "step": 3424 }, { "epoch": 0.5108509210231934, "grad_norm": 1.3274770975112915, "learning_rate": 1.0140115665538502e-05, "loss": 0.6008, "step": 3425 }, { "epoch": 0.5110000745767768, "grad_norm": 1.2915635108947754, "learning_rate": 1.0135284390906805e-05, "loss": 0.6508, "step": 3426 }, { "epoch": 0.5111492281303602, "grad_norm": 1.4518226385116577, "learning_rate": 1.0130453084692108e-05, "loss": 0.618, "step": 3427 }, { "epoch": 0.5112983816839436, "grad_norm": 1.1795285940170288, "learning_rate": 1.0125621748022295e-05, "loss": 0.6269, "step": 3428 }, { "epoch": 0.5114475352375271, "grad_norm": 1.6954988241195679, "learning_rate": 1.0120790382025282e-05, "loss": 0.6445, "step": 3429 }, { "epoch": 0.5115966887911104, "grad_norm": 1.5290448665618896, "learning_rate": 1.0115958987828977e-05, "loss": 0.5434, "step": 3430 }, { "epoch": 0.5117458423446939, "grad_norm": 1.4130157232284546, "learning_rate": 1.0111127566561305e-05, "loss": 0.6642, "step": 3431 }, { "epoch": 0.5118949958982772, "grad_norm": 1.4596461057662964, "learning_rate": 1.0106296119350192e-05, "loss": 0.5509, "step": 3432 }, { "epoch": 0.5120441494518607, "grad_norm": 1.3677605390548706, "learning_rate": 1.0101464647323567e-05, "loss": 0.6224, "step": 3433 }, { "epoch": 0.5121933030054441, "grad_norm": 1.3757985830307007, "learning_rate": 1.009663315160937e-05, "loss": 0.6025, "step": 3434 }, { "epoch": 0.5123424565590275, "grad_norm": 1.3844548463821411, "learning_rate": 1.0091801633335544e-05, "loss": 0.5785, "step": 3435 }, { "epoch": 0.5124916101126109, "grad_norm": 0.8960702419281006, "learning_rate": 1.0086970093630036e-05, "loss": 0.683, "step": 3436 }, { "epoch": 0.5126407636661944, "grad_norm": 1.3172160387039185, "learning_rate": 1.0082138533620803e-05, "loss": 0.5627, "step": 3437 }, { "epoch": 0.5127899172197777, "grad_norm": 1.3176970481872559, "learning_rate": 1.0077306954435804e-05, "loss": 0.5988, "step": 3438 }, { "epoch": 0.5129390707733612, "grad_norm": 1.181814193725586, "learning_rate": 1.0072475357202998e-05, "loss": 0.5102, "step": 3439 }, { "epoch": 0.5130882243269446, "grad_norm": 1.7280603647232056, "learning_rate": 1.0067643743050357e-05, "loss": 0.6828, "step": 3440 }, { "epoch": 0.513237377880528, "grad_norm": 1.3502470254898071, "learning_rate": 1.006281211310585e-05, "loss": 0.5203, "step": 3441 }, { "epoch": 0.5133865314341114, "grad_norm": 1.190325140953064, "learning_rate": 1.0057980468497453e-05, "loss": 0.4573, "step": 3442 }, { "epoch": 0.5135356849876949, "grad_norm": 1.4335753917694092, "learning_rate": 1.0053148810353146e-05, "loss": 0.6729, "step": 3443 }, { "epoch": 0.5136848385412782, "grad_norm": 1.325122594833374, "learning_rate": 1.0048317139800906e-05, "loss": 0.6073, "step": 3444 }, { "epoch": 0.5138339920948617, "grad_norm": 1.2154244184494019, "learning_rate": 1.0043485457968717e-05, "loss": 0.4913, "step": 3445 }, { "epoch": 0.513983145648445, "grad_norm": 1.2726032733917236, "learning_rate": 1.0038653765984573e-05, "loss": 0.615, "step": 3446 }, { "epoch": 0.5141322992020285, "grad_norm": 1.4359837770462036, "learning_rate": 1.0033822064976457e-05, "loss": 0.6001, "step": 3447 }, { "epoch": 0.5142814527556119, "grad_norm": 1.4312962293624878, "learning_rate": 1.0028990356072364e-05, "loss": 0.5993, "step": 3448 }, { "epoch": 0.5144306063091953, "grad_norm": 1.4384535551071167, "learning_rate": 1.0024158640400288e-05, "loss": 0.5538, "step": 3449 }, { "epoch": 0.5145797598627787, "grad_norm": 1.3327771425247192, "learning_rate": 1.0019326919088221e-05, "loss": 0.5908, "step": 3450 }, { "epoch": 0.5147289134163622, "grad_norm": 1.2773202657699585, "learning_rate": 1.0014495193264162e-05, "loss": 0.5647, "step": 3451 }, { "epoch": 0.5148780669699455, "grad_norm": 1.902603030204773, "learning_rate": 1.0009663464056108e-05, "loss": 0.6351, "step": 3452 }, { "epoch": 0.515027220523529, "grad_norm": 1.3631012439727783, "learning_rate": 1.0004831732592053e-05, "loss": 0.6001, "step": 3453 }, { "epoch": 0.5151763740771124, "grad_norm": 1.6833815574645996, "learning_rate": 1e-05, "loss": 0.6245, "step": 3454 }, { "epoch": 0.5153255276306958, "grad_norm": 1.3775784969329834, "learning_rate": 9.99516826740795e-06, "loss": 0.5977, "step": 3455 }, { "epoch": 0.5154746811842792, "grad_norm": 1.3126366138458252, "learning_rate": 9.990336535943897e-06, "loss": 0.5782, "step": 3456 }, { "epoch": 0.5156238347378627, "grad_norm": 1.2373501062393188, "learning_rate": 9.985504806735841e-06, "loss": 0.5627, "step": 3457 }, { "epoch": 0.515772988291446, "grad_norm": 1.4882410764694214, "learning_rate": 9.98067308091178e-06, "loss": 0.6547, "step": 3458 }, { "epoch": 0.5159221418450295, "grad_norm": 1.395204782485962, "learning_rate": 9.975841359599712e-06, "loss": 0.4818, "step": 3459 }, { "epoch": 0.5160712953986129, "grad_norm": 1.160656452178955, "learning_rate": 9.971009643927636e-06, "loss": 0.6164, "step": 3460 }, { "epoch": 0.5162204489521963, "grad_norm": 1.4795291423797607, "learning_rate": 9.966177935023545e-06, "loss": 0.6017, "step": 3461 }, { "epoch": 0.5163696025057797, "grad_norm": 1.3300648927688599, "learning_rate": 9.96134623401543e-06, "loss": 0.5828, "step": 3462 }, { "epoch": 0.5165187560593631, "grad_norm": 1.3185757398605347, "learning_rate": 9.956514542031286e-06, "loss": 0.5789, "step": 3463 }, { "epoch": 0.5166679096129465, "grad_norm": 1.326230764389038, "learning_rate": 9.9516828601991e-06, "loss": 0.6571, "step": 3464 }, { "epoch": 0.51681706316653, "grad_norm": 1.2773761749267578, "learning_rate": 9.94685118964686e-06, "loss": 0.6218, "step": 3465 }, { "epoch": 0.5169662167201133, "grad_norm": 1.4228438138961792, "learning_rate": 9.942019531502552e-06, "loss": 0.5517, "step": 3466 }, { "epoch": 0.5171153702736968, "grad_norm": 1.328234314918518, "learning_rate": 9.937187886894153e-06, "loss": 0.6764, "step": 3467 }, { "epoch": 0.5172645238272802, "grad_norm": 1.5172969102859497, "learning_rate": 9.932356256949643e-06, "loss": 0.6217, "step": 3468 }, { "epoch": 0.5174136773808636, "grad_norm": 1.5112603902816772, "learning_rate": 9.927524642797003e-06, "loss": 0.6228, "step": 3469 }, { "epoch": 0.517562830934447, "grad_norm": 1.5774394273757935, "learning_rate": 9.9226930455642e-06, "loss": 0.6529, "step": 3470 }, { "epoch": 0.5177119844880305, "grad_norm": 1.3873803615570068, "learning_rate": 9.9178614663792e-06, "loss": 0.5536, "step": 3471 }, { "epoch": 0.5178611380416138, "grad_norm": 0.8530063629150391, "learning_rate": 9.91302990636997e-06, "loss": 0.6808, "step": 3472 }, { "epoch": 0.5180102915951973, "grad_norm": 1.2443541288375854, "learning_rate": 9.908198366664461e-06, "loss": 0.5454, "step": 3473 }, { "epoch": 0.5181594451487807, "grad_norm": 1.3757593631744385, "learning_rate": 9.903366848390635e-06, "loss": 0.5912, "step": 3474 }, { "epoch": 0.5183085987023641, "grad_norm": 1.3610268831253052, "learning_rate": 9.898535352676438e-06, "loss": 0.5472, "step": 3475 }, { "epoch": 0.5184577522559475, "grad_norm": 1.5267423391342163, "learning_rate": 9.893703880649808e-06, "loss": 0.5732, "step": 3476 }, { "epoch": 0.518606905809531, "grad_norm": 1.2385008335113525, "learning_rate": 9.888872433438695e-06, "loss": 0.6207, "step": 3477 }, { "epoch": 0.5187560593631143, "grad_norm": 1.43735671043396, "learning_rate": 9.884041012171023e-06, "loss": 0.5674, "step": 3478 }, { "epoch": 0.5189052129166978, "grad_norm": 1.4006319046020508, "learning_rate": 9.879209617974721e-06, "loss": 0.6941, "step": 3479 }, { "epoch": 0.5190543664702811, "grad_norm": 1.3954801559448242, "learning_rate": 9.874378251977709e-06, "loss": 0.6122, "step": 3480 }, { "epoch": 0.5192035200238646, "grad_norm": 1.3190706968307495, "learning_rate": 9.869546915307897e-06, "loss": 0.6186, "step": 3481 }, { "epoch": 0.519352673577448, "grad_norm": 1.2867462635040283, "learning_rate": 9.864715609093196e-06, "loss": 0.6637, "step": 3482 }, { "epoch": 0.5195018271310314, "grad_norm": 1.265716791152954, "learning_rate": 9.859884334461503e-06, "loss": 0.5841, "step": 3483 }, { "epoch": 0.5196509806846148, "grad_norm": 1.1313331127166748, "learning_rate": 9.85505309254071e-06, "loss": 0.4671, "step": 3484 }, { "epoch": 0.5198001342381983, "grad_norm": 1.201914668083191, "learning_rate": 9.8502218844587e-06, "loss": 0.5474, "step": 3485 }, { "epoch": 0.5199492877917816, "grad_norm": 1.3420534133911133, "learning_rate": 9.845390711343356e-06, "loss": 0.5572, "step": 3486 }, { "epoch": 0.5200984413453651, "grad_norm": 1.4330081939697266, "learning_rate": 9.840559574322538e-06, "loss": 0.5315, "step": 3487 }, { "epoch": 0.5202475948989485, "grad_norm": 1.3807034492492676, "learning_rate": 9.835728474524113e-06, "loss": 0.6875, "step": 3488 }, { "epoch": 0.5203967484525319, "grad_norm": 1.416406512260437, "learning_rate": 9.830897413075926e-06, "loss": 0.5893, "step": 3489 }, { "epoch": 0.5205459020061153, "grad_norm": 1.506115436553955, "learning_rate": 9.826066391105824e-06, "loss": 0.6413, "step": 3490 }, { "epoch": 0.5206950555596987, "grad_norm": 1.2433364391326904, "learning_rate": 9.82123540974164e-06, "loss": 0.6323, "step": 3491 }, { "epoch": 0.5208442091132821, "grad_norm": 1.4753998517990112, "learning_rate": 9.816404470111191e-06, "loss": 0.6186, "step": 3492 }, { "epoch": 0.5209933626668656, "grad_norm": 1.3348984718322754, "learning_rate": 9.8115735733423e-06, "loss": 0.6161, "step": 3493 }, { "epoch": 0.5211425162204489, "grad_norm": 1.449156641960144, "learning_rate": 9.806742720562762e-06, "loss": 0.6272, "step": 3494 }, { "epoch": 0.5212916697740324, "grad_norm": 1.1644972562789917, "learning_rate": 9.801911912900378e-06, "loss": 0.5214, "step": 3495 }, { "epoch": 0.5214408233276158, "grad_norm": 1.5615466833114624, "learning_rate": 9.797081151482928e-06, "loss": 0.5659, "step": 3496 }, { "epoch": 0.5215899768811992, "grad_norm": 1.2919455766677856, "learning_rate": 9.792250437438186e-06, "loss": 0.5362, "step": 3497 }, { "epoch": 0.5217391304347826, "grad_norm": 1.3259048461914062, "learning_rate": 9.787419771893907e-06, "loss": 0.5962, "step": 3498 }, { "epoch": 0.5218882839883661, "grad_norm": 1.376833200454712, "learning_rate": 9.78258915597785e-06, "loss": 0.6368, "step": 3499 }, { "epoch": 0.5220374375419494, "grad_norm": 4.408949851989746, "learning_rate": 9.777758590817746e-06, "loss": 0.608, "step": 3500 }, { "epoch": 0.5221865910955329, "grad_norm": 1.6882200241088867, "learning_rate": 9.772928077541325e-06, "loss": 0.6485, "step": 3501 }, { "epoch": 0.5223357446491163, "grad_norm": 1.2293647527694702, "learning_rate": 9.768097617276303e-06, "loss": 0.5214, "step": 3502 }, { "epoch": 0.5224848982026997, "grad_norm": 1.2127455472946167, "learning_rate": 9.763267211150372e-06, "loss": 0.546, "step": 3503 }, { "epoch": 0.5226340517562831, "grad_norm": 1.2467243671417236, "learning_rate": 9.758436860291232e-06, "loss": 0.5335, "step": 3504 }, { "epoch": 0.5227832053098665, "grad_norm": 1.4504010677337646, "learning_rate": 9.753606565826556e-06, "loss": 0.6361, "step": 3505 }, { "epoch": 0.5229323588634499, "grad_norm": 1.2796071767807007, "learning_rate": 9.748776328884008e-06, "loss": 0.5695, "step": 3506 }, { "epoch": 0.5230815124170334, "grad_norm": 1.3172944784164429, "learning_rate": 9.743946150591237e-06, "loss": 0.5651, "step": 3507 }, { "epoch": 0.5232306659706167, "grad_norm": 1.315944790840149, "learning_rate": 9.739116032075879e-06, "loss": 0.5845, "step": 3508 }, { "epoch": 0.5233798195242002, "grad_norm": 1.8187698125839233, "learning_rate": 9.734285974465554e-06, "loss": 0.6313, "step": 3509 }, { "epoch": 0.5235289730777836, "grad_norm": 1.3729525804519653, "learning_rate": 9.729455978887877e-06, "loss": 0.6584, "step": 3510 }, { "epoch": 0.523678126631367, "grad_norm": 1.7876579761505127, "learning_rate": 9.72462604647043e-06, "loss": 0.6642, "step": 3511 }, { "epoch": 0.5238272801849504, "grad_norm": 1.1108829975128174, "learning_rate": 9.719796178340799e-06, "loss": 0.4987, "step": 3512 }, { "epoch": 0.5239764337385339, "grad_norm": 1.283148169517517, "learning_rate": 9.71496637562655e-06, "loss": 0.5519, "step": 3513 }, { "epoch": 0.5241255872921172, "grad_norm": 1.349249243736267, "learning_rate": 9.710136639455229e-06, "loss": 0.628, "step": 3514 }, { "epoch": 0.5242747408457007, "grad_norm": 1.4943630695343018, "learning_rate": 9.705306970954365e-06, "loss": 0.6605, "step": 3515 }, { "epoch": 0.524423894399284, "grad_norm": 1.4258447885513306, "learning_rate": 9.700477371251481e-06, "loss": 0.568, "step": 3516 }, { "epoch": 0.5245730479528675, "grad_norm": 1.4392647743225098, "learning_rate": 9.695647841474073e-06, "loss": 0.6406, "step": 3517 }, { "epoch": 0.5247222015064509, "grad_norm": 1.5908141136169434, "learning_rate": 9.69081838274963e-06, "loss": 0.5842, "step": 3518 }, { "epoch": 0.5248713550600344, "grad_norm": 1.6949490308761597, "learning_rate": 9.685988996205616e-06, "loss": 0.5527, "step": 3519 }, { "epoch": 0.5250205086136177, "grad_norm": 1.53755784034729, "learning_rate": 9.681159682969483e-06, "loss": 0.5211, "step": 3520 }, { "epoch": 0.5251696621672012, "grad_norm": 1.5737504959106445, "learning_rate": 9.676330444168668e-06, "loss": 0.6023, "step": 3521 }, { "epoch": 0.5253188157207845, "grad_norm": 1.3178600072860718, "learning_rate": 9.671501280930588e-06, "loss": 0.6021, "step": 3522 }, { "epoch": 0.525467969274368, "grad_norm": 1.4055143594741821, "learning_rate": 9.666672194382639e-06, "loss": 0.6073, "step": 3523 }, { "epoch": 0.5256171228279514, "grad_norm": 1.3621550798416138, "learning_rate": 9.661843185652202e-06, "loss": 0.6478, "step": 3524 }, { "epoch": 0.5257662763815348, "grad_norm": 1.9902141094207764, "learning_rate": 9.657014255866643e-06, "loss": 0.6538, "step": 3525 }, { "epoch": 0.5259154299351182, "grad_norm": 1.5314973592758179, "learning_rate": 9.652185406153307e-06, "loss": 0.5312, "step": 3526 }, { "epoch": 0.5260645834887017, "grad_norm": 1.3614912033081055, "learning_rate": 9.647356637639518e-06, "loss": 0.6569, "step": 3527 }, { "epoch": 0.526213737042285, "grad_norm": 1.3529804944992065, "learning_rate": 9.64252795145258e-06, "loss": 0.4673, "step": 3528 }, { "epoch": 0.5263628905958685, "grad_norm": 1.4371600151062012, "learning_rate": 9.637699348719783e-06, "loss": 0.693, "step": 3529 }, { "epoch": 0.5265120441494519, "grad_norm": 1.2703758478164673, "learning_rate": 9.632870830568399e-06, "loss": 0.5942, "step": 3530 }, { "epoch": 0.5266611977030353, "grad_norm": 1.2906749248504639, "learning_rate": 9.628042398125673e-06, "loss": 0.6006, "step": 3531 }, { "epoch": 0.5268103512566187, "grad_norm": 1.2508511543273926, "learning_rate": 9.623214052518836e-06, "loss": 0.5584, "step": 3532 }, { "epoch": 0.5269595048102022, "grad_norm": 1.326625108718872, "learning_rate": 9.618385794875094e-06, "loss": 0.6145, "step": 3533 }, { "epoch": 0.5271086583637855, "grad_norm": 1.3702932596206665, "learning_rate": 9.613557626321633e-06, "loss": 0.6912, "step": 3534 }, { "epoch": 0.527257811917369, "grad_norm": 1.3065927028656006, "learning_rate": 9.608729547985623e-06, "loss": 0.6192, "step": 3535 }, { "epoch": 0.5274069654709523, "grad_norm": 1.4740855693817139, "learning_rate": 9.60390156099421e-06, "loss": 0.6002, "step": 3536 }, { "epoch": 0.5275561190245358, "grad_norm": 1.2197153568267822, "learning_rate": 9.599073666474516e-06, "loss": 0.5816, "step": 3537 }, { "epoch": 0.5277052725781192, "grad_norm": 1.5692273378372192, "learning_rate": 9.594245865553641e-06, "loss": 0.5828, "step": 3538 }, { "epoch": 0.5278544261317026, "grad_norm": 1.1801440715789795, "learning_rate": 9.589418159358677e-06, "loss": 0.6057, "step": 3539 }, { "epoch": 0.528003579685286, "grad_norm": 1.4896751642227173, "learning_rate": 9.584590549016674e-06, "loss": 0.606, "step": 3540 }, { "epoch": 0.5281527332388695, "grad_norm": 1.5981919765472412, "learning_rate": 9.579763035654671e-06, "loss": 0.5723, "step": 3541 }, { "epoch": 0.5283018867924528, "grad_norm": 1.2379094362258911, "learning_rate": 9.574935620399681e-06, "loss": 0.5602, "step": 3542 }, { "epoch": 0.5284510403460363, "grad_norm": 1.482500433921814, "learning_rate": 9.5701083043787e-06, "loss": 0.5824, "step": 3543 }, { "epoch": 0.5286001938996197, "grad_norm": 0.8998609185218811, "learning_rate": 9.56528108871869e-06, "loss": 0.6609, "step": 3544 }, { "epoch": 0.5287493474532031, "grad_norm": 1.5257513523101807, "learning_rate": 9.560453974546594e-06, "loss": 0.5736, "step": 3545 }, { "epoch": 0.5288985010067865, "grad_norm": 1.2318733930587769, "learning_rate": 9.555626962989335e-06, "loss": 0.5671, "step": 3546 }, { "epoch": 0.52904765456037, "grad_norm": 1.224536657333374, "learning_rate": 9.550800055173815e-06, "loss": 0.5508, "step": 3547 }, { "epoch": 0.5291968081139533, "grad_norm": 1.3572498559951782, "learning_rate": 9.5459732522269e-06, "loss": 0.5626, "step": 3548 }, { "epoch": 0.5293459616675368, "grad_norm": 1.445861577987671, "learning_rate": 9.541146555275444e-06, "loss": 0.65, "step": 3549 }, { "epoch": 0.5294951152211201, "grad_norm": 1.554071307182312, "learning_rate": 9.536319965446265e-06, "loss": 0.6649, "step": 3550 }, { "epoch": 0.5296442687747036, "grad_norm": 0.8379166722297668, "learning_rate": 9.531493483866163e-06, "loss": 0.6469, "step": 3551 }, { "epoch": 0.529793422328287, "grad_norm": 1.285592794418335, "learning_rate": 9.526667111661912e-06, "loss": 0.5814, "step": 3552 }, { "epoch": 0.5299425758818704, "grad_norm": 1.3333189487457275, "learning_rate": 9.521840849960256e-06, "loss": 0.5546, "step": 3553 }, { "epoch": 0.5300917294354538, "grad_norm": 1.2744275331497192, "learning_rate": 9.517014699887924e-06, "loss": 0.5765, "step": 3554 }, { "epoch": 0.5302408829890373, "grad_norm": 1.4556970596313477, "learning_rate": 9.512188662571601e-06, "loss": 0.6291, "step": 3555 }, { "epoch": 0.5303900365426206, "grad_norm": 1.5210309028625488, "learning_rate": 9.50736273913797e-06, "loss": 0.6971, "step": 3556 }, { "epoch": 0.5305391900962041, "grad_norm": 1.3034366369247437, "learning_rate": 9.502536930713659e-06, "loss": 0.5474, "step": 3557 }, { "epoch": 0.5306883436497875, "grad_norm": 1.4473110437393188, "learning_rate": 9.497711238425296e-06, "loss": 0.5881, "step": 3558 }, { "epoch": 0.5308374972033709, "grad_norm": 1.3380916118621826, "learning_rate": 9.492885663399465e-06, "loss": 0.6452, "step": 3559 }, { "epoch": 0.5309866507569543, "grad_norm": 1.2923154830932617, "learning_rate": 9.488060206762727e-06, "loss": 0.5431, "step": 3560 }, { "epoch": 0.5311358043105378, "grad_norm": 0.8274335861206055, "learning_rate": 9.483234869641616e-06, "loss": 0.6628, "step": 3561 }, { "epoch": 0.5312849578641211, "grad_norm": 1.6766492128372192, "learning_rate": 9.478409653162639e-06, "loss": 0.6447, "step": 3562 }, { "epoch": 0.5314341114177046, "grad_norm": 1.3955528736114502, "learning_rate": 9.473584558452273e-06, "loss": 0.661, "step": 3563 }, { "epoch": 0.5315832649712879, "grad_norm": 1.862745761871338, "learning_rate": 9.468759586636963e-06, "loss": 0.6266, "step": 3564 }, { "epoch": 0.5317324185248714, "grad_norm": 1.4965338706970215, "learning_rate": 9.463934738843135e-06, "loss": 0.6222, "step": 3565 }, { "epoch": 0.5318815720784548, "grad_norm": 1.3399966955184937, "learning_rate": 9.459110016197184e-06, "loss": 0.6296, "step": 3566 }, { "epoch": 0.5320307256320382, "grad_norm": 1.2317312955856323, "learning_rate": 9.454285419825464e-06, "loss": 0.6534, "step": 3567 }, { "epoch": 0.5321798791856216, "grad_norm": 1.4715334177017212, "learning_rate": 9.449460950854315e-06, "loss": 0.6546, "step": 3568 }, { "epoch": 0.5323290327392051, "grad_norm": 1.2670706510543823, "learning_rate": 9.444636610410036e-06, "loss": 0.602, "step": 3569 }, { "epoch": 0.5324781862927884, "grad_norm": 1.3485397100448608, "learning_rate": 9.439812399618901e-06, "loss": 0.5936, "step": 3570 }, { "epoch": 0.5326273398463719, "grad_norm": 1.8327305316925049, "learning_rate": 9.434988319607153e-06, "loss": 0.6044, "step": 3571 }, { "epoch": 0.5327764933999553, "grad_norm": 1.184992790222168, "learning_rate": 9.430164371501002e-06, "loss": 0.5434, "step": 3572 }, { "epoch": 0.5329256469535387, "grad_norm": 1.4170277118682861, "learning_rate": 9.425340556426635e-06, "loss": 0.7031, "step": 3573 }, { "epoch": 0.5330748005071221, "grad_norm": 1.5247188806533813, "learning_rate": 9.420516875510201e-06, "loss": 0.6163, "step": 3574 }, { "epoch": 0.5332239540607056, "grad_norm": 1.4020105600357056, "learning_rate": 9.415693329877818e-06, "loss": 0.592, "step": 3575 }, { "epoch": 0.5333731076142889, "grad_norm": 1.4091089963912964, "learning_rate": 9.41086992065557e-06, "loss": 0.57, "step": 3576 }, { "epoch": 0.5335222611678723, "grad_norm": 1.1592916250228882, "learning_rate": 9.40604664896952e-06, "loss": 0.6008, "step": 3577 }, { "epoch": 0.5336714147214557, "grad_norm": 0.9097036719322205, "learning_rate": 9.401223515945688e-06, "loss": 0.7004, "step": 3578 }, { "epoch": 0.5338205682750391, "grad_norm": 1.2064710855484009, "learning_rate": 9.396400522710066e-06, "loss": 0.5172, "step": 3579 }, { "epoch": 0.5339697218286226, "grad_norm": 1.7449960708618164, "learning_rate": 9.391577670388612e-06, "loss": 0.567, "step": 3580 }, { "epoch": 0.5341188753822059, "grad_norm": 1.1686441898345947, "learning_rate": 9.38675496010725e-06, "loss": 0.5743, "step": 3581 }, { "epoch": 0.5342680289357894, "grad_norm": 1.467468023300171, "learning_rate": 9.381932392991874e-06, "loss": 0.6191, "step": 3582 }, { "epoch": 0.5344171824893728, "grad_norm": 1.3584939241409302, "learning_rate": 9.377109970168348e-06, "loss": 0.6656, "step": 3583 }, { "epoch": 0.5345663360429562, "grad_norm": 1.2978435754776, "learning_rate": 9.372287692762489e-06, "loss": 0.6032, "step": 3584 }, { "epoch": 0.5347154895965396, "grad_norm": 1.3968000411987305, "learning_rate": 9.367465561900097e-06, "loss": 0.5178, "step": 3585 }, { "epoch": 0.534864643150123, "grad_norm": 1.4848228693008423, "learning_rate": 9.362643578706926e-06, "loss": 0.6266, "step": 3586 }, { "epoch": 0.5350137967037064, "grad_norm": 1.3024628162384033, "learning_rate": 9.357821744308696e-06, "loss": 0.5383, "step": 3587 }, { "epoch": 0.5351629502572899, "grad_norm": 1.2944105863571167, "learning_rate": 9.353000059831097e-06, "loss": 0.5852, "step": 3588 }, { "epoch": 0.5353121038108732, "grad_norm": 0.8473522067070007, "learning_rate": 9.348178526399783e-06, "loss": 0.6347, "step": 3589 }, { "epoch": 0.5354612573644567, "grad_norm": 1.3820356130599976, "learning_rate": 9.343357145140368e-06, "loss": 0.5481, "step": 3590 }, { "epoch": 0.5356104109180401, "grad_norm": 1.3442339897155762, "learning_rate": 9.338535917178444e-06, "loss": 0.6181, "step": 3591 }, { "epoch": 0.5357595644716235, "grad_norm": 1.243635654449463, "learning_rate": 9.33371484363955e-06, "loss": 0.5898, "step": 3592 }, { "epoch": 0.5359087180252069, "grad_norm": 1.2292420864105225, "learning_rate": 9.328893925649196e-06, "loss": 0.5368, "step": 3593 }, { "epoch": 0.5360578715787904, "grad_norm": 1.386193037033081, "learning_rate": 9.324073164332861e-06, "loss": 0.6409, "step": 3594 }, { "epoch": 0.5362070251323737, "grad_norm": 0.8656385540962219, "learning_rate": 9.319252560815977e-06, "loss": 0.6241, "step": 3595 }, { "epoch": 0.5363561786859572, "grad_norm": 1.3668031692504883, "learning_rate": 9.31443211622395e-06, "loss": 0.5139, "step": 3596 }, { "epoch": 0.5365053322395406, "grad_norm": 1.512447476387024, "learning_rate": 9.309611831682142e-06, "loss": 0.6388, "step": 3597 }, { "epoch": 0.536654485793124, "grad_norm": 1.5393844842910767, "learning_rate": 9.304791708315876e-06, "loss": 0.6468, "step": 3598 }, { "epoch": 0.5368036393467074, "grad_norm": 1.6049600839614868, "learning_rate": 9.29997174725044e-06, "loss": 0.5465, "step": 3599 }, { "epoch": 0.5369527929002909, "grad_norm": 1.1970694065093994, "learning_rate": 9.295151949611095e-06, "loss": 0.4675, "step": 3600 }, { "epoch": 0.5371019464538742, "grad_norm": 1.3572754859924316, "learning_rate": 9.290332316523043e-06, "loss": 0.6154, "step": 3601 }, { "epoch": 0.5372511000074577, "grad_norm": 1.47848641872406, "learning_rate": 9.285512849111465e-06, "loss": 0.6316, "step": 3602 }, { "epoch": 0.537400253561041, "grad_norm": 0.8645781874656677, "learning_rate": 9.28069354850149e-06, "loss": 0.7037, "step": 3603 }, { "epoch": 0.5375494071146245, "grad_norm": 1.430933952331543, "learning_rate": 9.275874415818222e-06, "loss": 0.6439, "step": 3604 }, { "epoch": 0.5376985606682079, "grad_norm": 2.1390576362609863, "learning_rate": 9.271055452186716e-06, "loss": 0.6169, "step": 3605 }, { "epoch": 0.5378477142217913, "grad_norm": 1.3742624521255493, "learning_rate": 9.266236658731985e-06, "loss": 0.608, "step": 3606 }, { "epoch": 0.5379968677753747, "grad_norm": 2.218332290649414, "learning_rate": 9.261418036579008e-06, "loss": 0.6355, "step": 3607 }, { "epoch": 0.5381460213289582, "grad_norm": 1.1485450267791748, "learning_rate": 9.256599586852731e-06, "loss": 0.5905, "step": 3608 }, { "epoch": 0.5382951748825415, "grad_norm": 1.8525573015213013, "learning_rate": 9.251781310678046e-06, "loss": 0.5682, "step": 3609 }, { "epoch": 0.538444328436125, "grad_norm": 1.2708206176757812, "learning_rate": 9.246963209179813e-06, "loss": 0.6052, "step": 3610 }, { "epoch": 0.5385934819897084, "grad_norm": 1.3820112943649292, "learning_rate": 9.242145283482848e-06, "loss": 0.6554, "step": 3611 }, { "epoch": 0.5387426355432918, "grad_norm": 1.3133370876312256, "learning_rate": 9.237327534711922e-06, "loss": 0.6364, "step": 3612 }, { "epoch": 0.5388917890968752, "grad_norm": 1.4056167602539062, "learning_rate": 9.232509963991776e-06, "loss": 0.6738, "step": 3613 }, { "epoch": 0.5390409426504587, "grad_norm": 1.292124629020691, "learning_rate": 9.2276925724471e-06, "loss": 0.5308, "step": 3614 }, { "epoch": 0.539190096204042, "grad_norm": 1.1730949878692627, "learning_rate": 9.222875361202546e-06, "loss": 0.6167, "step": 3615 }, { "epoch": 0.5393392497576255, "grad_norm": 0.8063609600067139, "learning_rate": 9.218058331382717e-06, "loss": 0.6219, "step": 3616 }, { "epoch": 0.5394884033112088, "grad_norm": 1.2703139781951904, "learning_rate": 9.213241484112188e-06, "loss": 0.61, "step": 3617 }, { "epoch": 0.5396375568647923, "grad_norm": 1.4455251693725586, "learning_rate": 9.208424820515478e-06, "loss": 0.6505, "step": 3618 }, { "epoch": 0.5397867104183757, "grad_norm": 1.1628228425979614, "learning_rate": 9.203608341717073e-06, "loss": 0.5563, "step": 3619 }, { "epoch": 0.5399358639719591, "grad_norm": 1.2497256994247437, "learning_rate": 9.198792048841403e-06, "loss": 0.521, "step": 3620 }, { "epoch": 0.5400850175255425, "grad_norm": 1.1144877672195435, "learning_rate": 9.19397594301287e-06, "loss": 0.5846, "step": 3621 }, { "epoch": 0.540234171079126, "grad_norm": 1.5630241632461548, "learning_rate": 9.18916002535582e-06, "loss": 0.6008, "step": 3622 }, { "epoch": 0.5403833246327093, "grad_norm": 1.7209970951080322, "learning_rate": 9.184344296994559e-06, "loss": 0.6056, "step": 3623 }, { "epoch": 0.5405324781862928, "grad_norm": 1.6262564659118652, "learning_rate": 9.179528759053355e-06, "loss": 0.6406, "step": 3624 }, { "epoch": 0.5406816317398762, "grad_norm": 1.1379350423812866, "learning_rate": 9.174713412656418e-06, "loss": 0.5714, "step": 3625 }, { "epoch": 0.5408307852934596, "grad_norm": 1.329461932182312, "learning_rate": 9.16989825892793e-06, "loss": 0.6656, "step": 3626 }, { "epoch": 0.540979938847043, "grad_norm": 1.5895103216171265, "learning_rate": 9.165083298992019e-06, "loss": 0.6803, "step": 3627 }, { "epoch": 0.5411290924006265, "grad_norm": 1.3114371299743652, "learning_rate": 9.160268533972763e-06, "loss": 0.6241, "step": 3628 }, { "epoch": 0.5412782459542098, "grad_norm": 1.1522798538208008, "learning_rate": 9.155453964994202e-06, "loss": 0.5368, "step": 3629 }, { "epoch": 0.5414273995077933, "grad_norm": 1.3370405435562134, "learning_rate": 9.150639593180327e-06, "loss": 0.5783, "step": 3630 }, { "epoch": 0.5415765530613766, "grad_norm": 1.2296408414840698, "learning_rate": 9.145825419655086e-06, "loss": 0.5501, "step": 3631 }, { "epoch": 0.5417257066149601, "grad_norm": 1.367531180381775, "learning_rate": 9.141011445542377e-06, "loss": 0.6328, "step": 3632 }, { "epoch": 0.5418748601685435, "grad_norm": 1.3412874937057495, "learning_rate": 9.136197671966058e-06, "loss": 0.6114, "step": 3633 }, { "epoch": 0.5420240137221269, "grad_norm": 1.4513336420059204, "learning_rate": 9.131384100049924e-06, "loss": 0.7096, "step": 3634 }, { "epoch": 0.5421731672757103, "grad_norm": 1.2714160680770874, "learning_rate": 9.126570730917744e-06, "loss": 0.6229, "step": 3635 }, { "epoch": 0.5423223208292938, "grad_norm": 1.2595086097717285, "learning_rate": 9.12175756569323e-06, "loss": 0.5908, "step": 3636 }, { "epoch": 0.5424714743828771, "grad_norm": 1.3313097953796387, "learning_rate": 9.116944605500041e-06, "loss": 0.617, "step": 3637 }, { "epoch": 0.5426206279364606, "grad_norm": 1.448720097541809, "learning_rate": 9.1121318514618e-06, "loss": 0.6457, "step": 3638 }, { "epoch": 0.542769781490044, "grad_norm": 1.173765778541565, "learning_rate": 9.10731930470207e-06, "loss": 0.4977, "step": 3639 }, { "epoch": 0.5429189350436274, "grad_norm": 1.4197989702224731, "learning_rate": 9.10250696634437e-06, "loss": 0.5766, "step": 3640 }, { "epoch": 0.5430680885972108, "grad_norm": 1.230093002319336, "learning_rate": 9.097694837512175e-06, "loss": 0.5377, "step": 3641 }, { "epoch": 0.5432172421507943, "grad_norm": 1.1761765480041504, "learning_rate": 9.092882919328901e-06, "loss": 0.5873, "step": 3642 }, { "epoch": 0.5433663957043776, "grad_norm": 1.4173728227615356, "learning_rate": 9.08807121291793e-06, "loss": 0.606, "step": 3643 }, { "epoch": 0.5435155492579611, "grad_norm": 0.8932705521583557, "learning_rate": 9.083259719402583e-06, "loss": 0.6598, "step": 3644 }, { "epoch": 0.5436647028115444, "grad_norm": 1.441568374633789, "learning_rate": 9.07844843990613e-06, "loss": 0.5736, "step": 3645 }, { "epoch": 0.5438138563651279, "grad_norm": 1.251240611076355, "learning_rate": 9.0736373755518e-06, "loss": 0.5059, "step": 3646 }, { "epoch": 0.5439630099187113, "grad_norm": 1.2431440353393555, "learning_rate": 9.068826527462766e-06, "loss": 0.6091, "step": 3647 }, { "epoch": 0.5441121634722947, "grad_norm": 1.5650755167007446, "learning_rate": 9.064015896762146e-06, "loss": 0.6147, "step": 3648 }, { "epoch": 0.5442613170258781, "grad_norm": 1.2987229824066162, "learning_rate": 9.059205484573019e-06, "loss": 0.6295, "step": 3649 }, { "epoch": 0.5444104705794616, "grad_norm": 1.1900094747543335, "learning_rate": 9.054395292018402e-06, "loss": 0.5661, "step": 3650 }, { "epoch": 0.5445596241330449, "grad_norm": 1.3122448921203613, "learning_rate": 9.049585320221266e-06, "loss": 0.4766, "step": 3651 }, { "epoch": 0.5447087776866284, "grad_norm": 1.1826890707015991, "learning_rate": 9.044775570304534e-06, "loss": 0.6221, "step": 3652 }, { "epoch": 0.5448579312402118, "grad_norm": 1.749520182609558, "learning_rate": 9.03996604339107e-06, "loss": 0.5041, "step": 3653 }, { "epoch": 0.5450070847937952, "grad_norm": 1.629091739654541, "learning_rate": 9.035156740603689e-06, "loss": 0.561, "step": 3654 }, { "epoch": 0.5451562383473786, "grad_norm": 1.5627838373184204, "learning_rate": 9.030347663065152e-06, "loss": 0.579, "step": 3655 }, { "epoch": 0.545305391900962, "grad_norm": 1.2468516826629639, "learning_rate": 9.025538811898172e-06, "loss": 0.4915, "step": 3656 }, { "epoch": 0.5454545454545454, "grad_norm": 1.3869367837905884, "learning_rate": 9.020730188225405e-06, "loss": 0.5532, "step": 3657 }, { "epoch": 0.5456036990081289, "grad_norm": 1.6602556705474854, "learning_rate": 9.015921793169455e-06, "loss": 0.6267, "step": 3658 }, { "epoch": 0.5457528525617122, "grad_norm": 1.4927510023117065, "learning_rate": 9.01111362785287e-06, "loss": 0.6032, "step": 3659 }, { "epoch": 0.5459020061152957, "grad_norm": 1.4062409400939941, "learning_rate": 9.006305693398148e-06, "loss": 0.5795, "step": 3660 }, { "epoch": 0.5460511596688791, "grad_norm": 1.7411869764328003, "learning_rate": 9.001497990927738e-06, "loss": 0.5131, "step": 3661 }, { "epoch": 0.5462003132224625, "grad_norm": 1.4492788314819336, "learning_rate": 8.996690521564021e-06, "loss": 0.6029, "step": 3662 }, { "epoch": 0.5463494667760459, "grad_norm": 1.251030445098877, "learning_rate": 8.991883286429337e-06, "loss": 0.5212, "step": 3663 }, { "epoch": 0.5464986203296294, "grad_norm": 1.623414397239685, "learning_rate": 8.987076286645965e-06, "loss": 0.5699, "step": 3664 }, { "epoch": 0.5466477738832127, "grad_norm": 1.340505599975586, "learning_rate": 8.982269523336126e-06, "loss": 0.6706, "step": 3665 }, { "epoch": 0.5467969274367962, "grad_norm": 1.2657452821731567, "learning_rate": 8.977462997621994e-06, "loss": 0.6291, "step": 3666 }, { "epoch": 0.5469460809903796, "grad_norm": 1.6135315895080566, "learning_rate": 8.972656710625682e-06, "loss": 0.7206, "step": 3667 }, { "epoch": 0.547095234543963, "grad_norm": 1.317594289779663, "learning_rate": 8.967850663469248e-06, "loss": 0.542, "step": 3668 }, { "epoch": 0.5472443880975464, "grad_norm": 1.3806248903274536, "learning_rate": 8.963044857274691e-06, "loss": 0.6026, "step": 3669 }, { "epoch": 0.5473935416511299, "grad_norm": 1.2111942768096924, "learning_rate": 8.958239293163966e-06, "loss": 0.599, "step": 3670 }, { "epoch": 0.5475426952047132, "grad_norm": 1.18038809299469, "learning_rate": 8.953433972258955e-06, "loss": 0.5451, "step": 3671 }, { "epoch": 0.5476918487582967, "grad_norm": 0.8748316168785095, "learning_rate": 8.948628895681498e-06, "loss": 0.6472, "step": 3672 }, { "epoch": 0.54784100231188, "grad_norm": 1.4422858953475952, "learning_rate": 8.943824064553361e-06, "loss": 0.6912, "step": 3673 }, { "epoch": 0.5479901558654635, "grad_norm": 1.2936667203903198, "learning_rate": 8.939019479996272e-06, "loss": 0.5945, "step": 3674 }, { "epoch": 0.5481393094190469, "grad_norm": 1.4169868230819702, "learning_rate": 8.934215143131891e-06, "loss": 0.6353, "step": 3675 }, { "epoch": 0.5482884629726303, "grad_norm": 1.2868907451629639, "learning_rate": 8.929411055081812e-06, "loss": 0.638, "step": 3676 }, { "epoch": 0.5484376165262137, "grad_norm": 1.3880484104156494, "learning_rate": 8.924607216967588e-06, "loss": 0.5054, "step": 3677 }, { "epoch": 0.5485867700797972, "grad_norm": 0.8523768186569214, "learning_rate": 8.919803629910709e-06, "loss": 0.6335, "step": 3678 }, { "epoch": 0.5487359236333805, "grad_norm": 1.4929184913635254, "learning_rate": 8.915000295032594e-06, "loss": 0.605, "step": 3679 }, { "epoch": 0.548885077186964, "grad_norm": 1.2246884107589722, "learning_rate": 8.910197213454622e-06, "loss": 0.4972, "step": 3680 }, { "epoch": 0.5490342307405474, "grad_norm": 1.5293850898742676, "learning_rate": 8.905394386298098e-06, "loss": 0.6585, "step": 3681 }, { "epoch": 0.5491833842941308, "grad_norm": 1.3532230854034424, "learning_rate": 8.900591814684269e-06, "loss": 0.6012, "step": 3682 }, { "epoch": 0.5493325378477142, "grad_norm": 1.2255606651306152, "learning_rate": 8.895789499734335e-06, "loss": 0.5904, "step": 3683 }, { "epoch": 0.5494816914012977, "grad_norm": 1.237815260887146, "learning_rate": 8.890987442569419e-06, "loss": 0.5661, "step": 3684 }, { "epoch": 0.549630844954881, "grad_norm": 1.3448225259780884, "learning_rate": 8.886185644310597e-06, "loss": 0.6054, "step": 3685 }, { "epoch": 0.5497799985084645, "grad_norm": 1.4451076984405518, "learning_rate": 8.881384106078875e-06, "loss": 0.646, "step": 3686 }, { "epoch": 0.5499291520620478, "grad_norm": 1.3037209510803223, "learning_rate": 8.876582828995211e-06, "loss": 0.5868, "step": 3687 }, { "epoch": 0.5500783056156313, "grad_norm": 1.3348017930984497, "learning_rate": 8.871781814180486e-06, "loss": 0.6336, "step": 3688 }, { "epoch": 0.5502274591692147, "grad_norm": 1.597851037979126, "learning_rate": 8.866981062755532e-06, "loss": 0.5301, "step": 3689 }, { "epoch": 0.5503766127227981, "grad_norm": 1.3011411428451538, "learning_rate": 8.862180575841112e-06, "loss": 0.5591, "step": 3690 }, { "epoch": 0.5505257662763815, "grad_norm": 1.3477572202682495, "learning_rate": 8.857380354557937e-06, "loss": 0.5647, "step": 3691 }, { "epoch": 0.550674919829965, "grad_norm": 1.3763991594314575, "learning_rate": 8.85258040002664e-06, "loss": 0.6308, "step": 3692 }, { "epoch": 0.5508240733835483, "grad_norm": 1.4953938722610474, "learning_rate": 8.847780713367808e-06, "loss": 0.5171, "step": 3693 }, { "epoch": 0.5509732269371318, "grad_norm": 1.3431884050369263, "learning_rate": 8.842981295701956e-06, "loss": 0.6053, "step": 3694 }, { "epoch": 0.5511223804907152, "grad_norm": 1.522209882736206, "learning_rate": 8.838182148149537e-06, "loss": 0.5495, "step": 3695 }, { "epoch": 0.5512715340442986, "grad_norm": 1.5771316289901733, "learning_rate": 8.833383271830946e-06, "loss": 0.5971, "step": 3696 }, { "epoch": 0.551420687597882, "grad_norm": 1.1982396841049194, "learning_rate": 8.828584667866514e-06, "loss": 0.5842, "step": 3697 }, { "epoch": 0.5515698411514655, "grad_norm": 1.2502871751785278, "learning_rate": 8.8237863373765e-06, "loss": 0.5123, "step": 3698 }, { "epoch": 0.5517189947050488, "grad_norm": 1.3376829624176025, "learning_rate": 8.818988281481109e-06, "loss": 0.5639, "step": 3699 }, { "epoch": 0.5518681482586323, "grad_norm": 1.4704622030258179, "learning_rate": 8.814190501300475e-06, "loss": 0.5907, "step": 3700 }, { "epoch": 0.5520173018122156, "grad_norm": 1.2495944499969482, "learning_rate": 8.809392997954673e-06, "loss": 0.6514, "step": 3701 }, { "epoch": 0.5521664553657991, "grad_norm": 1.3096723556518555, "learning_rate": 8.80459577256371e-06, "loss": 0.5914, "step": 3702 }, { "epoch": 0.5523156089193825, "grad_norm": 1.5380429029464722, "learning_rate": 8.799798826247526e-06, "loss": 0.7033, "step": 3703 }, { "epoch": 0.5524647624729659, "grad_norm": 1.368354082107544, "learning_rate": 8.795002160126002e-06, "loss": 0.5884, "step": 3704 }, { "epoch": 0.5526139160265493, "grad_norm": 1.2871867418289185, "learning_rate": 8.790205775318952e-06, "loss": 0.5737, "step": 3705 }, { "epoch": 0.5527630695801328, "grad_norm": 1.378084659576416, "learning_rate": 8.785409672946123e-06, "loss": 0.5919, "step": 3706 }, { "epoch": 0.5529122231337161, "grad_norm": 1.3850791454315186, "learning_rate": 8.78061385412719e-06, "loss": 0.54, "step": 3707 }, { "epoch": 0.5530613766872996, "grad_norm": 1.50929594039917, "learning_rate": 8.775818319981776e-06, "loss": 0.6266, "step": 3708 }, { "epoch": 0.553210530240883, "grad_norm": 2.654308557510376, "learning_rate": 8.77102307162942e-06, "loss": 0.6444, "step": 3709 }, { "epoch": 0.5533596837944664, "grad_norm": 1.3745782375335693, "learning_rate": 8.76622811018961e-06, "loss": 0.6179, "step": 3710 }, { "epoch": 0.5535088373480498, "grad_norm": 2.718702554702759, "learning_rate": 8.76143343678176e-06, "loss": 0.6062, "step": 3711 }, { "epoch": 0.5536579909016333, "grad_norm": 1.6211977005004883, "learning_rate": 8.756639052525213e-06, "loss": 0.5704, "step": 3712 }, { "epoch": 0.5538071444552166, "grad_norm": 1.3585487604141235, "learning_rate": 8.751844958539251e-06, "loss": 0.585, "step": 3713 }, { "epoch": 0.5539562980088001, "grad_norm": 1.4278167486190796, "learning_rate": 8.747051155943091e-06, "loss": 0.7193, "step": 3714 }, { "epoch": 0.5541054515623834, "grad_norm": 1.2676736116409302, "learning_rate": 8.74225764585587e-06, "loss": 0.6234, "step": 3715 }, { "epoch": 0.5542546051159669, "grad_norm": 1.521454930305481, "learning_rate": 8.737464429396668e-06, "loss": 0.5607, "step": 3716 }, { "epoch": 0.5544037586695503, "grad_norm": 1.3980764150619507, "learning_rate": 8.73267150768449e-06, "loss": 0.6489, "step": 3717 }, { "epoch": 0.5545529122231337, "grad_norm": 1.4428128004074097, "learning_rate": 8.727878881838273e-06, "loss": 0.6575, "step": 3718 }, { "epoch": 0.5547020657767171, "grad_norm": 1.3959758281707764, "learning_rate": 8.72308655297689e-06, "loss": 0.5694, "step": 3719 }, { "epoch": 0.5548512193303006, "grad_norm": 1.279783844947815, "learning_rate": 8.718294522219137e-06, "loss": 0.5482, "step": 3720 }, { "epoch": 0.5550003728838839, "grad_norm": 0.9235059022903442, "learning_rate": 8.713502790683743e-06, "loss": 0.6446, "step": 3721 }, { "epoch": 0.5551495264374674, "grad_norm": 1.4083302021026611, "learning_rate": 8.708711359489377e-06, "loss": 0.611, "step": 3722 }, { "epoch": 0.5552986799910508, "grad_norm": 1.2888797521591187, "learning_rate": 8.703920229754624e-06, "loss": 0.6015, "step": 3723 }, { "epoch": 0.5554478335446342, "grad_norm": 1.268739938735962, "learning_rate": 8.699129402598001e-06, "loss": 0.5251, "step": 3724 }, { "epoch": 0.5555969870982176, "grad_norm": 1.358841896057129, "learning_rate": 8.694338879137962e-06, "loss": 0.5731, "step": 3725 }, { "epoch": 0.5557461406518011, "grad_norm": 2.301024913787842, "learning_rate": 8.689548660492882e-06, "loss": 0.6736, "step": 3726 }, { "epoch": 0.5558952942053844, "grad_norm": 1.4847357273101807, "learning_rate": 8.684758747781073e-06, "loss": 0.5872, "step": 3727 }, { "epoch": 0.5560444477589679, "grad_norm": 1.4054360389709473, "learning_rate": 8.679969142120765e-06, "loss": 0.6139, "step": 3728 }, { "epoch": 0.5561936013125512, "grad_norm": 1.3887158632278442, "learning_rate": 8.675179844630125e-06, "loss": 0.6072, "step": 3729 }, { "epoch": 0.5563427548661347, "grad_norm": 1.4626717567443848, "learning_rate": 8.670390856427242e-06, "loss": 0.6199, "step": 3730 }, { "epoch": 0.5564919084197181, "grad_norm": 1.2793527841567993, "learning_rate": 8.665602178630146e-06, "loss": 0.482, "step": 3731 }, { "epoch": 0.5566410619733015, "grad_norm": 1.5861188173294067, "learning_rate": 8.660813812356773e-06, "loss": 0.6366, "step": 3732 }, { "epoch": 0.5567902155268849, "grad_norm": 1.4287104606628418, "learning_rate": 8.656025758725004e-06, "loss": 0.5847, "step": 3733 }, { "epoch": 0.5569393690804684, "grad_norm": 1.2759472131729126, "learning_rate": 8.651238018852638e-06, "loss": 0.6788, "step": 3734 }, { "epoch": 0.5570885226340517, "grad_norm": 1.3793432712554932, "learning_rate": 8.646450593857407e-06, "loss": 0.5218, "step": 3735 }, { "epoch": 0.5572376761876352, "grad_norm": 2.079179525375366, "learning_rate": 8.641663484856964e-06, "loss": 0.588, "step": 3736 }, { "epoch": 0.5573868297412186, "grad_norm": 1.367468237876892, "learning_rate": 8.636876692968887e-06, "loss": 0.5803, "step": 3737 }, { "epoch": 0.557535983294802, "grad_norm": 1.38997220993042, "learning_rate": 8.632090219310688e-06, "loss": 0.6056, "step": 3738 }, { "epoch": 0.5576851368483854, "grad_norm": 0.8542543053627014, "learning_rate": 8.627304064999798e-06, "loss": 0.6635, "step": 3739 }, { "epoch": 0.5578342904019689, "grad_norm": 1.5818606615066528, "learning_rate": 8.622518231153574e-06, "loss": 0.5826, "step": 3740 }, { "epoch": 0.5579834439555522, "grad_norm": 1.3167203664779663, "learning_rate": 8.617732718889305e-06, "loss": 0.5759, "step": 3741 }, { "epoch": 0.5581325975091357, "grad_norm": 1.4335002899169922, "learning_rate": 8.612947529324196e-06, "loss": 0.649, "step": 3742 }, { "epoch": 0.558281751062719, "grad_norm": 1.4412827491760254, "learning_rate": 8.608162663575378e-06, "loss": 0.5576, "step": 3743 }, { "epoch": 0.5584309046163025, "grad_norm": 1.444968581199646, "learning_rate": 8.603378122759912e-06, "loss": 0.6562, "step": 3744 }, { "epoch": 0.5585800581698859, "grad_norm": 1.3116463422775269, "learning_rate": 8.598593907994778e-06, "loss": 0.5318, "step": 3745 }, { "epoch": 0.5587292117234693, "grad_norm": 1.294175624847412, "learning_rate": 8.593810020396882e-06, "loss": 0.5442, "step": 3746 }, { "epoch": 0.5588783652770527, "grad_norm": 1.3812941312789917, "learning_rate": 8.58902646108305e-06, "loss": 0.6621, "step": 3747 }, { "epoch": 0.5590275188306362, "grad_norm": 1.4474889039993286, "learning_rate": 8.584243231170042e-06, "loss": 0.5707, "step": 3748 }, { "epoch": 0.5591766723842195, "grad_norm": 0.8896284699440002, "learning_rate": 8.579460331774529e-06, "loss": 0.6757, "step": 3749 }, { "epoch": 0.559325825937803, "grad_norm": 1.2456148862838745, "learning_rate": 8.57467776401311e-06, "loss": 0.4796, "step": 3750 }, { "epoch": 0.5594749794913864, "grad_norm": 1.3051154613494873, "learning_rate": 8.569895529002305e-06, "loss": 0.6094, "step": 3751 }, { "epoch": 0.5596241330449698, "grad_norm": 1.5485191345214844, "learning_rate": 8.565113627858562e-06, "loss": 0.6046, "step": 3752 }, { "epoch": 0.5597732865985532, "grad_norm": 1.3476548194885254, "learning_rate": 8.560332061698242e-06, "loss": 0.5579, "step": 3753 }, { "epoch": 0.5599224401521367, "grad_norm": 1.2357107400894165, "learning_rate": 8.55555083163763e-06, "loss": 0.5747, "step": 3754 }, { "epoch": 0.56007159370572, "grad_norm": 1.2405842542648315, "learning_rate": 8.550769938792943e-06, "loss": 0.5207, "step": 3755 }, { "epoch": 0.5602207472593035, "grad_norm": 1.3715606927871704, "learning_rate": 8.5459893842803e-06, "loss": 0.5022, "step": 3756 }, { "epoch": 0.5603699008128868, "grad_norm": 1.4574596881866455, "learning_rate": 8.54120916921576e-06, "loss": 0.5887, "step": 3757 }, { "epoch": 0.5605190543664703, "grad_norm": 1.4619243144989014, "learning_rate": 8.536429294715296e-06, "loss": 0.5838, "step": 3758 }, { "epoch": 0.5606682079200537, "grad_norm": 1.272911787033081, "learning_rate": 8.5316497618948e-06, "loss": 0.6075, "step": 3759 }, { "epoch": 0.5608173614736371, "grad_norm": 1.2630513906478882, "learning_rate": 8.526870571870077e-06, "loss": 0.6128, "step": 3760 }, { "epoch": 0.5609665150272205, "grad_norm": 1.3344424962997437, "learning_rate": 8.522091725756868e-06, "loss": 0.5605, "step": 3761 }, { "epoch": 0.561115668580804, "grad_norm": 1.5745210647583008, "learning_rate": 8.51731322467082e-06, "loss": 0.6759, "step": 3762 }, { "epoch": 0.5612648221343873, "grad_norm": 1.4416600465774536, "learning_rate": 8.51253506972751e-06, "loss": 0.5891, "step": 3763 }, { "epoch": 0.5614139756879708, "grad_norm": 0.8879626989364624, "learning_rate": 8.507757262042423e-06, "loss": 0.6536, "step": 3764 }, { "epoch": 0.5615631292415542, "grad_norm": 1.1836239099502563, "learning_rate": 8.502979802730968e-06, "loss": 0.4816, "step": 3765 }, { "epoch": 0.5617122827951376, "grad_norm": 1.2331706285476685, "learning_rate": 8.49820269290848e-06, "loss": 0.6397, "step": 3766 }, { "epoch": 0.561861436348721, "grad_norm": 1.2043613195419312, "learning_rate": 8.493425933690205e-06, "loss": 0.6072, "step": 3767 }, { "epoch": 0.5620105899023045, "grad_norm": 1.247153401374817, "learning_rate": 8.488649526191303e-06, "loss": 0.6022, "step": 3768 }, { "epoch": 0.5621597434558878, "grad_norm": 1.2051231861114502, "learning_rate": 8.483873471526865e-06, "loss": 0.5584, "step": 3769 }, { "epoch": 0.5623088970094713, "grad_norm": 1.2532057762145996, "learning_rate": 8.479097770811881e-06, "loss": 0.6108, "step": 3770 }, { "epoch": 0.5624580505630546, "grad_norm": 0.8800726532936096, "learning_rate": 8.474322425161279e-06, "loss": 0.696, "step": 3771 }, { "epoch": 0.5626072041166381, "grad_norm": 1.377514362335205, "learning_rate": 8.469547435689888e-06, "loss": 0.6196, "step": 3772 }, { "epoch": 0.5627563576702215, "grad_norm": 1.38112211227417, "learning_rate": 8.464772803512458e-06, "loss": 0.6371, "step": 3773 }, { "epoch": 0.5629055112238049, "grad_norm": 1.4808980226516724, "learning_rate": 8.459998529743661e-06, "loss": 0.625, "step": 3774 }, { "epoch": 0.5630546647773883, "grad_norm": 1.1545478105545044, "learning_rate": 8.455224615498086e-06, "loss": 0.5852, "step": 3775 }, { "epoch": 0.5632038183309718, "grad_norm": 0.8235055804252625, "learning_rate": 8.450451061890228e-06, "loss": 0.635, "step": 3776 }, { "epoch": 0.5633529718845551, "grad_norm": 1.3566768169403076, "learning_rate": 8.445677870034506e-06, "loss": 0.5676, "step": 3777 }, { "epoch": 0.5635021254381386, "grad_norm": 1.3008859157562256, "learning_rate": 8.440905041045253e-06, "loss": 0.6004, "step": 3778 }, { "epoch": 0.563651278991722, "grad_norm": 1.311112880706787, "learning_rate": 8.43613257603671e-06, "loss": 0.593, "step": 3779 }, { "epoch": 0.5638004325453054, "grad_norm": 1.3850491046905518, "learning_rate": 8.43136047612305e-06, "loss": 0.6322, "step": 3780 }, { "epoch": 0.5639495860988888, "grad_norm": 1.3427860736846924, "learning_rate": 8.426588742418343e-06, "loss": 0.5458, "step": 3781 }, { "epoch": 0.5640987396524723, "grad_norm": 1.4817816019058228, "learning_rate": 8.421817376036578e-06, "loss": 0.5367, "step": 3782 }, { "epoch": 0.5642478932060556, "grad_norm": 1.3136659860610962, "learning_rate": 8.417046378091674e-06, "loss": 0.6476, "step": 3783 }, { "epoch": 0.5643970467596391, "grad_norm": 1.4052814245224, "learning_rate": 8.41227574969744e-06, "loss": 0.6144, "step": 3784 }, { "epoch": 0.5645462003132224, "grad_norm": 1.3279670476913452, "learning_rate": 8.40750549196761e-06, "loss": 0.6288, "step": 3785 }, { "epoch": 0.5646953538668059, "grad_norm": 1.2597132921218872, "learning_rate": 8.40273560601584e-06, "loss": 0.5929, "step": 3786 }, { "epoch": 0.5648445074203893, "grad_norm": 1.2333678007125854, "learning_rate": 8.397966092955678e-06, "loss": 0.6144, "step": 3787 }, { "epoch": 0.5649936609739727, "grad_norm": 1.28394615650177, "learning_rate": 8.39319695390061e-06, "loss": 0.6119, "step": 3788 }, { "epoch": 0.5651428145275561, "grad_norm": 0.9202291369438171, "learning_rate": 8.388428189964014e-06, "loss": 0.6799, "step": 3789 }, { "epoch": 0.5652919680811396, "grad_norm": 1.4208210706710815, "learning_rate": 8.383659802259187e-06, "loss": 0.5252, "step": 3790 }, { "epoch": 0.5654411216347229, "grad_norm": 1.2461885213851929, "learning_rate": 8.378891791899343e-06, "loss": 0.602, "step": 3791 }, { "epoch": 0.5655902751883064, "grad_norm": 1.5889301300048828, "learning_rate": 8.37412415999761e-06, "loss": 0.6166, "step": 3792 }, { "epoch": 0.5657394287418898, "grad_norm": 1.4934686422348022, "learning_rate": 8.369356907667013e-06, "loss": 0.604, "step": 3793 }, { "epoch": 0.5658885822954732, "grad_norm": 1.5034891366958618, "learning_rate": 8.364590036020503e-06, "loss": 0.6169, "step": 3794 }, { "epoch": 0.5660377358490566, "grad_norm": 1.2858333587646484, "learning_rate": 8.359823546170936e-06, "loss": 0.565, "step": 3795 }, { "epoch": 0.5661868894026401, "grad_norm": 0.8604456782341003, "learning_rate": 8.355057439231078e-06, "loss": 0.6708, "step": 3796 }, { "epoch": 0.5663360429562234, "grad_norm": 1.1659153699874878, "learning_rate": 8.35029171631361e-06, "loss": 0.5549, "step": 3797 }, { "epoch": 0.5664851965098069, "grad_norm": 1.4337071180343628, "learning_rate": 8.345526378531117e-06, "loss": 0.5567, "step": 3798 }, { "epoch": 0.5666343500633902, "grad_norm": 1.4094858169555664, "learning_rate": 8.3407614269961e-06, "loss": 0.5653, "step": 3799 }, { "epoch": 0.5667835036169737, "grad_norm": 1.2767021656036377, "learning_rate": 8.335996862820964e-06, "loss": 0.5338, "step": 3800 }, { "epoch": 0.5669326571705571, "grad_norm": 1.3198059797286987, "learning_rate": 8.331232687118035e-06, "loss": 0.6834, "step": 3801 }, { "epoch": 0.5670818107241405, "grad_norm": 1.482847809791565, "learning_rate": 8.326468900999532e-06, "loss": 0.5749, "step": 3802 }, { "epoch": 0.5672309642777239, "grad_norm": 1.3283475637435913, "learning_rate": 8.321705505577597e-06, "loss": 0.5975, "step": 3803 }, { "epoch": 0.5673801178313074, "grad_norm": 1.269721508026123, "learning_rate": 8.31694250196427e-06, "loss": 0.5514, "step": 3804 }, { "epoch": 0.5675292713848907, "grad_norm": 1.7527053356170654, "learning_rate": 8.312179891271512e-06, "loss": 0.6305, "step": 3805 }, { "epoch": 0.5676784249384742, "grad_norm": 1.1900012493133545, "learning_rate": 8.30741767461118e-06, "loss": 0.5221, "step": 3806 }, { "epoch": 0.5678275784920576, "grad_norm": 1.3172372579574585, "learning_rate": 8.302655853095043e-06, "loss": 0.5395, "step": 3807 }, { "epoch": 0.567976732045641, "grad_norm": 1.4768359661102295, "learning_rate": 8.297894427834777e-06, "loss": 0.512, "step": 3808 }, { "epoch": 0.5681258855992244, "grad_norm": 1.302437424659729, "learning_rate": 8.293133399941977e-06, "loss": 0.5448, "step": 3809 }, { "epoch": 0.5682750391528079, "grad_norm": 1.4927462339401245, "learning_rate": 8.288372770528125e-06, "loss": 0.5619, "step": 3810 }, { "epoch": 0.5684241927063912, "grad_norm": 1.3806920051574707, "learning_rate": 8.283612540704628e-06, "loss": 0.6088, "step": 3811 }, { "epoch": 0.5685733462599747, "grad_norm": 1.41744863986969, "learning_rate": 8.27885271158279e-06, "loss": 0.5529, "step": 3812 }, { "epoch": 0.568722499813558, "grad_norm": 1.3535982370376587, "learning_rate": 8.274093284273819e-06, "loss": 0.5898, "step": 3813 }, { "epoch": 0.5688716533671415, "grad_norm": 1.368456244468689, "learning_rate": 8.26933425988884e-06, "loss": 0.5393, "step": 3814 }, { "epoch": 0.5690208069207249, "grad_norm": 1.423256754875183, "learning_rate": 8.264575639538873e-06, "loss": 0.5904, "step": 3815 }, { "epoch": 0.5691699604743083, "grad_norm": 1.4922939538955688, "learning_rate": 8.259817424334851e-06, "loss": 0.6945, "step": 3816 }, { "epoch": 0.5693191140278917, "grad_norm": 1.446753740310669, "learning_rate": 8.255059615387606e-06, "loss": 0.635, "step": 3817 }, { "epoch": 0.5694682675814752, "grad_norm": 1.2760361433029175, "learning_rate": 8.250302213807886e-06, "loss": 0.5903, "step": 3818 }, { "epoch": 0.5696174211350585, "grad_norm": 1.2555078268051147, "learning_rate": 8.245545220706334e-06, "loss": 0.5379, "step": 3819 }, { "epoch": 0.569766574688642, "grad_norm": 1.235505223274231, "learning_rate": 8.2407886371935e-06, "loss": 0.4805, "step": 3820 }, { "epoch": 0.5699157282422254, "grad_norm": 1.4293981790542603, "learning_rate": 8.236032464379838e-06, "loss": 0.417, "step": 3821 }, { "epoch": 0.5700648817958088, "grad_norm": 1.1821634769439697, "learning_rate": 8.231276703375708e-06, "loss": 0.5651, "step": 3822 }, { "epoch": 0.5702140353493922, "grad_norm": 1.2274906635284424, "learning_rate": 8.226521355291372e-06, "loss": 0.5513, "step": 3823 }, { "epoch": 0.5703631889029757, "grad_norm": 1.3841887712478638, "learning_rate": 8.221766421237e-06, "loss": 0.6098, "step": 3824 }, { "epoch": 0.570512342456559, "grad_norm": 1.5575422048568726, "learning_rate": 8.217011902322656e-06, "loss": 0.573, "step": 3825 }, { "epoch": 0.5706614960101425, "grad_norm": 1.2302415370941162, "learning_rate": 8.212257799658315e-06, "loss": 0.5562, "step": 3826 }, { "epoch": 0.5708106495637258, "grad_norm": 1.475637674331665, "learning_rate": 8.207504114353854e-06, "loss": 0.628, "step": 3827 }, { "epoch": 0.5709598031173093, "grad_norm": 1.468366026878357, "learning_rate": 8.202750847519055e-06, "loss": 0.5824, "step": 3828 }, { "epoch": 0.5711089566708927, "grad_norm": 1.5497323274612427, "learning_rate": 8.197998000263591e-06, "loss": 0.5655, "step": 3829 }, { "epoch": 0.5712581102244761, "grad_norm": 1.4783234596252441, "learning_rate": 8.193245573697051e-06, "loss": 0.5413, "step": 3830 }, { "epoch": 0.5714072637780595, "grad_norm": 1.1361087560653687, "learning_rate": 8.188493568928916e-06, "loss": 0.4699, "step": 3831 }, { "epoch": 0.571556417331643, "grad_norm": 1.2633676528930664, "learning_rate": 8.18374198706857e-06, "loss": 0.4748, "step": 3832 }, { "epoch": 0.5717055708852263, "grad_norm": 1.2955957651138306, "learning_rate": 8.178990829225308e-06, "loss": 0.6488, "step": 3833 }, { "epoch": 0.5718547244388098, "grad_norm": 1.3607399463653564, "learning_rate": 8.17424009650831e-06, "loss": 0.5402, "step": 3834 }, { "epoch": 0.5720038779923932, "grad_norm": 1.4092133045196533, "learning_rate": 8.169489790026664e-06, "loss": 0.6434, "step": 3835 }, { "epoch": 0.5721530315459766, "grad_norm": 1.307054042816162, "learning_rate": 8.16473991088937e-06, "loss": 0.574, "step": 3836 }, { "epoch": 0.57230218509956, "grad_norm": 1.4485487937927246, "learning_rate": 8.159990460205312e-06, "loss": 0.5807, "step": 3837 }, { "epoch": 0.5724513386531435, "grad_norm": 1.4016304016113281, "learning_rate": 8.155241439083277e-06, "loss": 0.6282, "step": 3838 }, { "epoch": 0.5726004922067268, "grad_norm": 1.2052271366119385, "learning_rate": 8.150492848631958e-06, "loss": 0.5157, "step": 3839 }, { "epoch": 0.5727496457603103, "grad_norm": 1.127413034439087, "learning_rate": 8.14574468995994e-06, "loss": 0.5106, "step": 3840 }, { "epoch": 0.5728987993138936, "grad_norm": 1.2802897691726685, "learning_rate": 8.140996964175716e-06, "loss": 0.5843, "step": 3841 }, { "epoch": 0.5730479528674771, "grad_norm": 1.2683857679367065, "learning_rate": 8.136249672387673e-06, "loss": 0.562, "step": 3842 }, { "epoch": 0.5731971064210605, "grad_norm": 1.3734012842178345, "learning_rate": 8.131502815704087e-06, "loss": 0.595, "step": 3843 }, { "epoch": 0.573346259974644, "grad_norm": 0.9221314787864685, "learning_rate": 8.126756395233154e-06, "loss": 0.6515, "step": 3844 }, { "epoch": 0.5734954135282273, "grad_norm": 1.2809404134750366, "learning_rate": 8.122010412082952e-06, "loss": 0.6568, "step": 3845 }, { "epoch": 0.5736445670818108, "grad_norm": 1.3781918287277222, "learning_rate": 8.117264867361461e-06, "loss": 0.6101, "step": 3846 }, { "epoch": 0.5737937206353941, "grad_norm": 1.5603338479995728, "learning_rate": 8.112519762176559e-06, "loss": 0.648, "step": 3847 }, { "epoch": 0.5739428741889776, "grad_norm": 1.4627447128295898, "learning_rate": 8.107775097636023e-06, "loss": 0.5954, "step": 3848 }, { "epoch": 0.574092027742561, "grad_norm": 1.3361165523529053, "learning_rate": 8.103030874847521e-06, "loss": 0.5549, "step": 3849 }, { "epoch": 0.5742411812961444, "grad_norm": 1.1637228727340698, "learning_rate": 8.098287094918625e-06, "loss": 0.5977, "step": 3850 }, { "epoch": 0.5743903348497278, "grad_norm": 1.2707346677780151, "learning_rate": 8.093543758956802e-06, "loss": 0.5874, "step": 3851 }, { "epoch": 0.5745394884033113, "grad_norm": 1.354382038116455, "learning_rate": 8.088800868069406e-06, "loss": 0.5906, "step": 3852 }, { "epoch": 0.5746886419568946, "grad_norm": 4.5954389572143555, "learning_rate": 8.084058423363709e-06, "loss": 0.6541, "step": 3853 }, { "epoch": 0.5748377955104781, "grad_norm": 1.2714996337890625, "learning_rate": 8.079316425946858e-06, "loss": 0.6134, "step": 3854 }, { "epoch": 0.5749869490640614, "grad_norm": 1.508148431777954, "learning_rate": 8.0745748769259e-06, "loss": 0.6362, "step": 3855 }, { "epoch": 0.5751361026176449, "grad_norm": 1.2771574258804321, "learning_rate": 8.069833777407786e-06, "loss": 0.6529, "step": 3856 }, { "epoch": 0.5752852561712283, "grad_norm": 1.4533445835113525, "learning_rate": 8.065093128499351e-06, "loss": 0.5869, "step": 3857 }, { "epoch": 0.5754344097248117, "grad_norm": 0.8235033750534058, "learning_rate": 8.060352931307332e-06, "loss": 0.64, "step": 3858 }, { "epoch": 0.5755835632783951, "grad_norm": 1.5283024311065674, "learning_rate": 8.055613186938357e-06, "loss": 0.6097, "step": 3859 }, { "epoch": 0.5757327168319786, "grad_norm": 1.4586937427520752, "learning_rate": 8.050873896498955e-06, "loss": 0.5527, "step": 3860 }, { "epoch": 0.5758818703855619, "grad_norm": 1.6200847625732422, "learning_rate": 8.046135061095534e-06, "loss": 0.6292, "step": 3861 }, { "epoch": 0.5760310239391454, "grad_norm": 1.2940634489059448, "learning_rate": 8.041396681834415e-06, "loss": 0.5136, "step": 3862 }, { "epoch": 0.5761801774927288, "grad_norm": 1.2420419454574585, "learning_rate": 8.036658759821799e-06, "loss": 0.524, "step": 3863 }, { "epoch": 0.5763293310463122, "grad_norm": 1.6397463083267212, "learning_rate": 8.031921296163785e-06, "loss": 0.4965, "step": 3864 }, { "epoch": 0.5764784845998956, "grad_norm": 1.465620517730713, "learning_rate": 8.027184291966361e-06, "loss": 0.5937, "step": 3865 }, { "epoch": 0.5766276381534791, "grad_norm": 1.5831716060638428, "learning_rate": 8.022447748335418e-06, "loss": 0.5488, "step": 3866 }, { "epoch": 0.5767767917070624, "grad_norm": 1.1597630977630615, "learning_rate": 8.017711666376726e-06, "loss": 0.5751, "step": 3867 }, { "epoch": 0.5769259452606459, "grad_norm": 1.5529264211654663, "learning_rate": 8.012976047195955e-06, "loss": 0.5501, "step": 3868 }, { "epoch": 0.5770750988142292, "grad_norm": 1.3606805801391602, "learning_rate": 8.00824089189867e-06, "loss": 0.5262, "step": 3869 }, { "epoch": 0.5772242523678127, "grad_norm": 0.8639048933982849, "learning_rate": 8.003506201590315e-06, "loss": 0.6598, "step": 3870 }, { "epoch": 0.5773734059213961, "grad_norm": 1.3485599756240845, "learning_rate": 7.99877197737624e-06, "loss": 0.6228, "step": 3871 }, { "epoch": 0.5775225594749795, "grad_norm": 1.3043369054794312, "learning_rate": 7.994038220361682e-06, "loss": 0.5927, "step": 3872 }, { "epoch": 0.5776717130285629, "grad_norm": 1.2691482305526733, "learning_rate": 7.989304931651763e-06, "loss": 0.666, "step": 3873 }, { "epoch": 0.5778208665821463, "grad_norm": 1.3367236852645874, "learning_rate": 7.984572112351499e-06, "loss": 0.6391, "step": 3874 }, { "epoch": 0.5779700201357297, "grad_norm": 1.1338223218917847, "learning_rate": 7.9798397635658e-06, "loss": 0.4712, "step": 3875 }, { "epoch": 0.5781191736893131, "grad_norm": 1.4544349908828735, "learning_rate": 7.975107886399457e-06, "loss": 0.6437, "step": 3876 }, { "epoch": 0.5782683272428966, "grad_norm": 1.7033140659332275, "learning_rate": 7.970376481957166e-06, "loss": 0.5212, "step": 3877 }, { "epoch": 0.5784174807964799, "grad_norm": 1.5752676725387573, "learning_rate": 7.965645551343497e-06, "loss": 0.4904, "step": 3878 }, { "epoch": 0.5785666343500634, "grad_norm": 1.291908860206604, "learning_rate": 7.960915095662922e-06, "loss": 0.7016, "step": 3879 }, { "epoch": 0.5787157879036468, "grad_norm": 1.3415755033493042, "learning_rate": 7.956185116019787e-06, "loss": 0.5186, "step": 3880 }, { "epoch": 0.5788649414572302, "grad_norm": 1.3119882345199585, "learning_rate": 7.951455613518348e-06, "loss": 0.6154, "step": 3881 }, { "epoch": 0.5790140950108136, "grad_norm": 1.2215855121612549, "learning_rate": 7.946726589262726e-06, "loss": 0.6445, "step": 3882 }, { "epoch": 0.579163248564397, "grad_norm": 1.3357770442962646, "learning_rate": 7.941998044356951e-06, "loss": 0.5909, "step": 3883 }, { "epoch": 0.5793124021179804, "grad_norm": 1.3423863649368286, "learning_rate": 7.937269979904928e-06, "loss": 0.6284, "step": 3884 }, { "epoch": 0.5794615556715639, "grad_norm": 1.4234533309936523, "learning_rate": 7.932542397010453e-06, "loss": 0.5826, "step": 3885 }, { "epoch": 0.5796107092251472, "grad_norm": 0.8563059568405151, "learning_rate": 7.927815296777216e-06, "loss": 0.6615, "step": 3886 }, { "epoch": 0.5797598627787307, "grad_norm": 1.281204342842102, "learning_rate": 7.923088680308777e-06, "loss": 0.5571, "step": 3887 }, { "epoch": 0.5799090163323141, "grad_norm": 1.2330150604248047, "learning_rate": 7.918362548708607e-06, "loss": 0.4644, "step": 3888 }, { "epoch": 0.5800581698858975, "grad_norm": 1.2344567775726318, "learning_rate": 7.91363690308005e-06, "loss": 0.6228, "step": 3889 }, { "epoch": 0.5802073234394809, "grad_norm": 1.3636059761047363, "learning_rate": 7.908911744526334e-06, "loss": 0.576, "step": 3890 }, { "epoch": 0.5803564769930644, "grad_norm": 1.3065179586410522, "learning_rate": 7.90418707415058e-06, "loss": 0.6886, "step": 3891 }, { "epoch": 0.5805056305466477, "grad_norm": 2.071660041809082, "learning_rate": 7.899462893055792e-06, "loss": 0.4951, "step": 3892 }, { "epoch": 0.5806547841002312, "grad_norm": 1.2430893182754517, "learning_rate": 7.894739202344857e-06, "loss": 0.6014, "step": 3893 }, { "epoch": 0.5808039376538146, "grad_norm": 1.928389072418213, "learning_rate": 7.890016003120559e-06, "loss": 0.5828, "step": 3894 }, { "epoch": 0.580953091207398, "grad_norm": 1.4901416301727295, "learning_rate": 7.885293296485551e-06, "loss": 0.5739, "step": 3895 }, { "epoch": 0.5811022447609814, "grad_norm": 1.432561993598938, "learning_rate": 7.880571083542381e-06, "loss": 0.5882, "step": 3896 }, { "epoch": 0.5812513983145648, "grad_norm": 1.2510432004928589, "learning_rate": 7.875849365393484e-06, "loss": 0.5462, "step": 3897 }, { "epoch": 0.5814005518681482, "grad_norm": 2.0296905040740967, "learning_rate": 7.871128143141175e-06, "loss": 0.5147, "step": 3898 }, { "epoch": 0.5815497054217317, "grad_norm": 1.2797744274139404, "learning_rate": 7.866407417887647e-06, "loss": 0.5835, "step": 3899 }, { "epoch": 0.581698858975315, "grad_norm": 1.5211126804351807, "learning_rate": 7.861687190734992e-06, "loss": 0.5782, "step": 3900 }, { "epoch": 0.5818480125288985, "grad_norm": 1.575217366218567, "learning_rate": 7.85696746278517e-06, "loss": 0.6257, "step": 3901 }, { "epoch": 0.5819971660824819, "grad_norm": 1.237031102180481, "learning_rate": 7.852248235140038e-06, "loss": 0.5729, "step": 3902 }, { "epoch": 0.5821463196360653, "grad_norm": 1.3201026916503906, "learning_rate": 7.847529508901327e-06, "loss": 0.5425, "step": 3903 }, { "epoch": 0.5822954731896487, "grad_norm": 1.4313859939575195, "learning_rate": 7.84281128517065e-06, "loss": 0.5494, "step": 3904 }, { "epoch": 0.5824446267432322, "grad_norm": 1.409014344215393, "learning_rate": 7.83809356504951e-06, "loss": 0.6263, "step": 3905 }, { "epoch": 0.5825937802968155, "grad_norm": 0.8429812788963318, "learning_rate": 7.833376349639295e-06, "loss": 0.659, "step": 3906 }, { "epoch": 0.582742933850399, "grad_norm": 1.3833163976669312, "learning_rate": 7.82865964004126e-06, "loss": 0.5402, "step": 3907 }, { "epoch": 0.5828920874039824, "grad_norm": 1.1591717004776, "learning_rate": 7.823943437356556e-06, "loss": 0.5321, "step": 3908 }, { "epoch": 0.5830412409575658, "grad_norm": 1.465923547744751, "learning_rate": 7.81922774268621e-06, "loss": 0.6432, "step": 3909 }, { "epoch": 0.5831903945111492, "grad_norm": 1.3524662256240845, "learning_rate": 7.81451255713113e-06, "loss": 0.5305, "step": 3910 }, { "epoch": 0.5833395480647326, "grad_norm": 0.8437208533287048, "learning_rate": 7.809797881792108e-06, "loss": 0.6102, "step": 3911 }, { "epoch": 0.583488701618316, "grad_norm": 1.4885165691375732, "learning_rate": 7.80508371776981e-06, "loss": 0.5676, "step": 3912 }, { "epoch": 0.5836378551718995, "grad_norm": 1.478703260421753, "learning_rate": 7.800370066164793e-06, "loss": 0.6029, "step": 3913 }, { "epoch": 0.5837870087254828, "grad_norm": 1.3561201095581055, "learning_rate": 7.79565692807749e-06, "loss": 0.6392, "step": 3914 }, { "epoch": 0.5839361622790663, "grad_norm": 1.2653135061264038, "learning_rate": 7.790944304608214e-06, "loss": 0.6198, "step": 3915 }, { "epoch": 0.5840853158326497, "grad_norm": 1.754876732826233, "learning_rate": 7.786232196857151e-06, "loss": 0.5661, "step": 3916 }, { "epoch": 0.5842344693862331, "grad_norm": 1.641527771949768, "learning_rate": 7.781520605924378e-06, "loss": 0.4905, "step": 3917 }, { "epoch": 0.5843836229398165, "grad_norm": 1.2619950771331787, "learning_rate": 7.776809532909843e-06, "loss": 0.5862, "step": 3918 }, { "epoch": 0.5845327764934, "grad_norm": 1.415120244026184, "learning_rate": 7.772098978913381e-06, "loss": 0.5027, "step": 3919 }, { "epoch": 0.5846819300469833, "grad_norm": 1.4680074453353882, "learning_rate": 7.767388945034695e-06, "loss": 0.6326, "step": 3920 }, { "epoch": 0.5848310836005668, "grad_norm": 1.2367874383926392, "learning_rate": 7.762679432373376e-06, "loss": 0.6276, "step": 3921 }, { "epoch": 0.5849802371541502, "grad_norm": 2.360558032989502, "learning_rate": 7.757970442028886e-06, "loss": 0.4988, "step": 3922 }, { "epoch": 0.5851293907077336, "grad_norm": 1.618985652923584, "learning_rate": 7.753261975100577e-06, "loss": 0.5719, "step": 3923 }, { "epoch": 0.585278544261317, "grad_norm": 1.4808857440948486, "learning_rate": 7.748554032687664e-06, "loss": 0.4502, "step": 3924 }, { "epoch": 0.5854276978149004, "grad_norm": 1.192893147468567, "learning_rate": 7.74384661588925e-06, "loss": 0.5455, "step": 3925 }, { "epoch": 0.5855768513684838, "grad_norm": 0.8874445557594299, "learning_rate": 7.73913972580431e-06, "loss": 0.6834, "step": 3926 }, { "epoch": 0.5857260049220673, "grad_norm": 1.362600564956665, "learning_rate": 7.734433363531694e-06, "loss": 0.512, "step": 3927 }, { "epoch": 0.5858751584756506, "grad_norm": 1.269500970840454, "learning_rate": 7.729727530170141e-06, "loss": 0.6017, "step": 3928 }, { "epoch": 0.5860243120292341, "grad_norm": 1.3201123476028442, "learning_rate": 7.72502222681825e-06, "loss": 0.5746, "step": 3929 }, { "epoch": 0.5861734655828175, "grad_norm": 1.142930030822754, "learning_rate": 7.72031745457451e-06, "loss": 0.5113, "step": 3930 }, { "epoch": 0.5863226191364009, "grad_norm": 1.3659281730651855, "learning_rate": 7.715613214537272e-06, "loss": 0.6422, "step": 3931 }, { "epoch": 0.5864717726899843, "grad_norm": 1.415631890296936, "learning_rate": 7.710909507804782e-06, "loss": 0.5698, "step": 3932 }, { "epoch": 0.5866209262435678, "grad_norm": 1.1608390808105469, "learning_rate": 7.706206335475143e-06, "loss": 0.578, "step": 3933 }, { "epoch": 0.5867700797971511, "grad_norm": 1.1564010381698608, "learning_rate": 7.701503698646345e-06, "loss": 0.5718, "step": 3934 }, { "epoch": 0.5869192333507346, "grad_norm": 1.647506833076477, "learning_rate": 7.696801598416245e-06, "loss": 0.6119, "step": 3935 }, { "epoch": 0.587068386904318, "grad_norm": 1.3896939754486084, "learning_rate": 7.692100035882581e-06, "loss": 0.5428, "step": 3936 }, { "epoch": 0.5872175404579014, "grad_norm": 1.4753667116165161, "learning_rate": 7.687399012142964e-06, "loss": 0.575, "step": 3937 }, { "epoch": 0.5873666940114848, "grad_norm": 1.446696400642395, "learning_rate": 7.682698528294872e-06, "loss": 0.625, "step": 3938 }, { "epoch": 0.5875158475650682, "grad_norm": 1.1963647603988647, "learning_rate": 7.677998585435669e-06, "loss": 0.4847, "step": 3939 }, { "epoch": 0.5876650011186516, "grad_norm": 1.2984048128128052, "learning_rate": 7.673299184662582e-06, "loss": 0.5908, "step": 3940 }, { "epoch": 0.5878141546722351, "grad_norm": 1.4098061323165894, "learning_rate": 7.668600327072721e-06, "loss": 0.5697, "step": 3941 }, { "epoch": 0.5879633082258184, "grad_norm": 1.443963885307312, "learning_rate": 7.663902013763064e-06, "loss": 0.5196, "step": 3942 }, { "epoch": 0.5881124617794019, "grad_norm": 1.5297573804855347, "learning_rate": 7.65920424583046e-06, "loss": 0.628, "step": 3943 }, { "epoch": 0.5882616153329853, "grad_norm": 1.3184280395507812, "learning_rate": 7.654507024371635e-06, "loss": 0.6461, "step": 3944 }, { "epoch": 0.5884107688865687, "grad_norm": 1.2999205589294434, "learning_rate": 7.649810350483187e-06, "loss": 0.5939, "step": 3945 }, { "epoch": 0.5885599224401521, "grad_norm": 1.3051857948303223, "learning_rate": 7.645114225261577e-06, "loss": 0.6371, "step": 3946 }, { "epoch": 0.5887090759937356, "grad_norm": 1.3323040008544922, "learning_rate": 7.640418649803155e-06, "loss": 0.5427, "step": 3947 }, { "epoch": 0.5888582295473189, "grad_norm": 1.6035672426223755, "learning_rate": 7.635723625204124e-06, "loss": 0.6283, "step": 3948 }, { "epoch": 0.5890073831009024, "grad_norm": 0.8204013705253601, "learning_rate": 7.631029152560574e-06, "loss": 0.6334, "step": 3949 }, { "epoch": 0.5891565366544858, "grad_norm": 1.2638459205627441, "learning_rate": 7.62633523296846e-06, "loss": 0.5734, "step": 3950 }, { "epoch": 0.5893056902080692, "grad_norm": 1.326932430267334, "learning_rate": 7.621641867523608e-06, "loss": 0.6162, "step": 3951 }, { "epoch": 0.5894548437616526, "grad_norm": 1.4736734628677368, "learning_rate": 7.6169490573217085e-06, "loss": 0.6144, "step": 3952 }, { "epoch": 0.589603997315236, "grad_norm": 1.58966863155365, "learning_rate": 7.612256803458335e-06, "loss": 0.6508, "step": 3953 }, { "epoch": 0.5897531508688194, "grad_norm": 1.377236247062683, "learning_rate": 7.607565107028918e-06, "loss": 0.6485, "step": 3954 }, { "epoch": 0.5899023044224029, "grad_norm": 1.1739699840545654, "learning_rate": 7.602873969128769e-06, "loss": 0.4621, "step": 3955 }, { "epoch": 0.5900514579759862, "grad_norm": 1.2667630910873413, "learning_rate": 7.598183390853063e-06, "loss": 0.5184, "step": 3956 }, { "epoch": 0.5902006115295697, "grad_norm": 0.8976048827171326, "learning_rate": 7.593493373296841e-06, "loss": 0.6477, "step": 3957 }, { "epoch": 0.5903497650831531, "grad_norm": 3.6351945400238037, "learning_rate": 7.588803917555023e-06, "loss": 0.542, "step": 3958 }, { "epoch": 0.5904989186367365, "grad_norm": 1.3653740882873535, "learning_rate": 7.584115024722392e-06, "loss": 0.5357, "step": 3959 }, { "epoch": 0.5906480721903199, "grad_norm": 1.346408724784851, "learning_rate": 7.579426695893599e-06, "loss": 0.6233, "step": 3960 }, { "epoch": 0.5907972257439034, "grad_norm": 1.3939111232757568, "learning_rate": 7.574738932163167e-06, "loss": 0.5685, "step": 3961 }, { "epoch": 0.5909463792974867, "grad_norm": 1.5128757953643799, "learning_rate": 7.570051734625481e-06, "loss": 0.5612, "step": 3962 }, { "epoch": 0.5910955328510702, "grad_norm": 1.3549760580062866, "learning_rate": 7.565365104374798e-06, "loss": 0.5633, "step": 3963 }, { "epoch": 0.5912446864046536, "grad_norm": 1.1872590780258179, "learning_rate": 7.560679042505242e-06, "loss": 0.5906, "step": 3964 }, { "epoch": 0.591393839958237, "grad_norm": 1.206207275390625, "learning_rate": 7.555993550110805e-06, "loss": 0.6442, "step": 3965 }, { "epoch": 0.5915429935118204, "grad_norm": 1.4706193208694458, "learning_rate": 7.551308628285341e-06, "loss": 0.5365, "step": 3966 }, { "epoch": 0.5916921470654039, "grad_norm": 1.2843029499053955, "learning_rate": 7.546624278122583e-06, "loss": 0.6447, "step": 3967 }, { "epoch": 0.5918413006189872, "grad_norm": 1.3563631772994995, "learning_rate": 7.5419405007161195e-06, "loss": 0.6204, "step": 3968 }, { "epoch": 0.5919904541725707, "grad_norm": 1.3582322597503662, "learning_rate": 7.537257297159404e-06, "loss": 0.5485, "step": 3969 }, { "epoch": 0.592139607726154, "grad_norm": 1.18340265750885, "learning_rate": 7.532574668545767e-06, "loss": 0.5172, "step": 3970 }, { "epoch": 0.5922887612797375, "grad_norm": 1.4247597455978394, "learning_rate": 7.527892615968392e-06, "loss": 0.6349, "step": 3971 }, { "epoch": 0.5924379148333209, "grad_norm": 1.2630172967910767, "learning_rate": 7.523211140520339e-06, "loss": 0.5408, "step": 3972 }, { "epoch": 0.5925870683869043, "grad_norm": 1.2120277881622314, "learning_rate": 7.518530243294526e-06, "loss": 0.4904, "step": 3973 }, { "epoch": 0.5927362219404877, "grad_norm": 1.3934606313705444, "learning_rate": 7.513849925383736e-06, "loss": 0.5206, "step": 3974 }, { "epoch": 0.5928853754940712, "grad_norm": 1.32155179977417, "learning_rate": 7.509170187880623e-06, "loss": 0.6109, "step": 3975 }, { "epoch": 0.5930345290476545, "grad_norm": 1.544219732284546, "learning_rate": 7.504491031877704e-06, "loss": 0.5903, "step": 3976 }, { "epoch": 0.593183682601238, "grad_norm": 1.2565559148788452, "learning_rate": 7.499812458467353e-06, "loss": 0.5718, "step": 3977 }, { "epoch": 0.5933328361548214, "grad_norm": 1.4791655540466309, "learning_rate": 7.495134468741816e-06, "loss": 0.4577, "step": 3978 }, { "epoch": 0.5934819897084048, "grad_norm": 1.191705346107483, "learning_rate": 7.490457063793199e-06, "loss": 0.6432, "step": 3979 }, { "epoch": 0.5936311432619882, "grad_norm": 0.9088422060012817, "learning_rate": 7.4857802447134706e-06, "loss": 0.7012, "step": 3980 }, { "epoch": 0.5937802968155717, "grad_norm": 1.5830819606781006, "learning_rate": 7.481104012594466e-06, "loss": 0.5436, "step": 3981 }, { "epoch": 0.593929450369155, "grad_norm": 1.6865324974060059, "learning_rate": 7.476428368527879e-06, "loss": 0.6732, "step": 3982 }, { "epoch": 0.5940786039227385, "grad_norm": 0.8435749411582947, "learning_rate": 7.47175331360527e-06, "loss": 0.649, "step": 3983 }, { "epoch": 0.5942277574763218, "grad_norm": 1.3998169898986816, "learning_rate": 7.467078848918065e-06, "loss": 0.6366, "step": 3984 }, { "epoch": 0.5943769110299053, "grad_norm": 1.4172818660736084, "learning_rate": 7.46240497555754e-06, "loss": 0.6376, "step": 3985 }, { "epoch": 0.5945260645834887, "grad_norm": 1.5451463460922241, "learning_rate": 7.457731694614848e-06, "loss": 0.6036, "step": 3986 }, { "epoch": 0.5946752181370721, "grad_norm": 1.3155930042266846, "learning_rate": 7.453059007180994e-06, "loss": 0.4991, "step": 3987 }, { "epoch": 0.5948243716906555, "grad_norm": 1.398343801498413, "learning_rate": 7.448386914346842e-06, "loss": 0.6019, "step": 3988 }, { "epoch": 0.594973525244239, "grad_norm": 1.2883998155593872, "learning_rate": 7.443715417203128e-06, "loss": 0.6149, "step": 3989 }, { "epoch": 0.5951226787978223, "grad_norm": 1.3741885423660278, "learning_rate": 7.439044516840439e-06, "loss": 0.6331, "step": 3990 }, { "epoch": 0.5952718323514058, "grad_norm": 1.306242823600769, "learning_rate": 7.434374214349232e-06, "loss": 0.5335, "step": 3991 }, { "epoch": 0.5954209859049892, "grad_norm": 1.4564566612243652, "learning_rate": 7.42970451081981e-06, "loss": 0.5735, "step": 3992 }, { "epoch": 0.5955701394585726, "grad_norm": 1.5528901815414429, "learning_rate": 7.425035407342355e-06, "loss": 0.6048, "step": 3993 }, { "epoch": 0.595719293012156, "grad_norm": 1.143750548362732, "learning_rate": 7.420366905006893e-06, "loss": 0.5445, "step": 3994 }, { "epoch": 0.5958684465657395, "grad_norm": 1.2993354797363281, "learning_rate": 7.415699004903319e-06, "loss": 0.5521, "step": 3995 }, { "epoch": 0.5960176001193228, "grad_norm": 1.5423287153244019, "learning_rate": 7.4110317081213825e-06, "loss": 0.6347, "step": 3996 }, { "epoch": 0.5961667536729063, "grad_norm": 1.4102466106414795, "learning_rate": 7.406365015750696e-06, "loss": 0.6509, "step": 3997 }, { "epoch": 0.5963159072264896, "grad_norm": 1.6948713064193726, "learning_rate": 7.401698928880726e-06, "loss": 0.6131, "step": 3998 }, { "epoch": 0.5964650607800731, "grad_norm": 1.3199708461761475, "learning_rate": 7.3970334486008e-06, "loss": 0.5609, "step": 3999 }, { "epoch": 0.5966142143336565, "grad_norm": 1.354393720626831, "learning_rate": 7.3923685760001085e-06, "loss": 0.5702, "step": 4000 }, { "epoch": 0.5967633678872399, "grad_norm": 1.348929762840271, "learning_rate": 7.387704312167687e-06, "loss": 0.6257, "step": 4001 }, { "epoch": 0.5969125214408233, "grad_norm": 1.3312245607376099, "learning_rate": 7.383040658192449e-06, "loss": 0.5608, "step": 4002 }, { "epoch": 0.5970616749944068, "grad_norm": 1.3339041471481323, "learning_rate": 7.378377615163148e-06, "loss": 0.5997, "step": 4003 }, { "epoch": 0.5972108285479901, "grad_norm": 1.3274552822113037, "learning_rate": 7.373715184168405e-06, "loss": 0.516, "step": 4004 }, { "epoch": 0.5973599821015736, "grad_norm": 1.278127670288086, "learning_rate": 7.36905336629669e-06, "loss": 0.6303, "step": 4005 }, { "epoch": 0.597509135655157, "grad_norm": 1.3054856061935425, "learning_rate": 7.364392162636338e-06, "loss": 0.6394, "step": 4006 }, { "epoch": 0.5976582892087404, "grad_norm": 1.2377463579177856, "learning_rate": 7.359731574275533e-06, "loss": 0.5839, "step": 4007 }, { "epoch": 0.5978074427623238, "grad_norm": 1.2972005605697632, "learning_rate": 7.355071602302324e-06, "loss": 0.6646, "step": 4008 }, { "epoch": 0.5979565963159073, "grad_norm": 1.1930978298187256, "learning_rate": 7.350412247804603e-06, "loss": 0.547, "step": 4009 }, { "epoch": 0.5981057498694906, "grad_norm": 1.3394184112548828, "learning_rate": 7.345753511870139e-06, "loss": 0.6016, "step": 4010 }, { "epoch": 0.5982549034230741, "grad_norm": 1.6431901454925537, "learning_rate": 7.3410953955865324e-06, "loss": 0.6309, "step": 4011 }, { "epoch": 0.5984040569766574, "grad_norm": 1.2262152433395386, "learning_rate": 7.336437900041258e-06, "loss": 0.5719, "step": 4012 }, { "epoch": 0.5985532105302409, "grad_norm": 1.3407062292099, "learning_rate": 7.331781026321631e-06, "loss": 0.5728, "step": 4013 }, { "epoch": 0.5987023640838243, "grad_norm": 1.3254854679107666, "learning_rate": 7.327124775514837e-06, "loss": 0.5653, "step": 4014 }, { "epoch": 0.5988515176374077, "grad_norm": 1.5286028385162354, "learning_rate": 7.3224691487079e-06, "loss": 0.5886, "step": 4015 }, { "epoch": 0.5990006711909911, "grad_norm": 1.2791836261749268, "learning_rate": 7.317814146987708e-06, "loss": 0.5344, "step": 4016 }, { "epoch": 0.5991498247445746, "grad_norm": 1.2611509561538696, "learning_rate": 7.313159771441003e-06, "loss": 0.5973, "step": 4017 }, { "epoch": 0.5992989782981579, "grad_norm": 1.4620908498764038, "learning_rate": 7.308506023154375e-06, "loss": 0.5789, "step": 4018 }, { "epoch": 0.5994481318517414, "grad_norm": 0.8864767551422119, "learning_rate": 7.303852903214274e-06, "loss": 0.6591, "step": 4019 }, { "epoch": 0.5995972854053248, "grad_norm": 1.3487534523010254, "learning_rate": 7.299200412707004e-06, "loss": 0.5407, "step": 4020 }, { "epoch": 0.5997464389589082, "grad_norm": 1.997146725654602, "learning_rate": 7.294548552718714e-06, "loss": 0.5878, "step": 4021 }, { "epoch": 0.5998955925124916, "grad_norm": 1.3072201013565063, "learning_rate": 7.289897324335411e-06, "loss": 0.6154, "step": 4022 }, { "epoch": 0.600044746066075, "grad_norm": 1.3389346599578857, "learning_rate": 7.285246728642956e-06, "loss": 0.6172, "step": 4023 }, { "epoch": 0.6001938996196584, "grad_norm": 1.5410206317901611, "learning_rate": 7.280596766727057e-06, "loss": 0.5049, "step": 4024 }, { "epoch": 0.6003430531732419, "grad_norm": 1.646315336227417, "learning_rate": 7.2759474396732835e-06, "loss": 0.616, "step": 4025 }, { "epoch": 0.6004922067268252, "grad_norm": 1.5214403867721558, "learning_rate": 7.271298748567043e-06, "loss": 0.6016, "step": 4026 }, { "epoch": 0.6006413602804087, "grad_norm": 1.6594899892807007, "learning_rate": 7.2666506944936045e-06, "loss": 0.4898, "step": 4027 }, { "epoch": 0.6007905138339921, "grad_norm": 1.1939775943756104, "learning_rate": 7.262003278538092e-06, "loss": 0.5687, "step": 4028 }, { "epoch": 0.6009396673875755, "grad_norm": 1.2913479804992676, "learning_rate": 7.25735650178547e-06, "loss": 0.5059, "step": 4029 }, { "epoch": 0.6010888209411589, "grad_norm": 1.5231773853302002, "learning_rate": 7.252710365320557e-06, "loss": 0.5697, "step": 4030 }, { "epoch": 0.6012379744947424, "grad_norm": 1.4086357355117798, "learning_rate": 7.248064870228028e-06, "loss": 0.5577, "step": 4031 }, { "epoch": 0.6013871280483257, "grad_norm": 1.43175208568573, "learning_rate": 7.243420017592397e-06, "loss": 0.5113, "step": 4032 }, { "epoch": 0.6015362816019092, "grad_norm": 1.4796160459518433, "learning_rate": 7.2387758084980405e-06, "loss": 0.5331, "step": 4033 }, { "epoch": 0.6016854351554926, "grad_norm": 1.264186978340149, "learning_rate": 7.234132244029177e-06, "loss": 0.5673, "step": 4034 }, { "epoch": 0.601834588709076, "grad_norm": 1.307248592376709, "learning_rate": 7.229489325269874e-06, "loss": 0.5861, "step": 4035 }, { "epoch": 0.6019837422626594, "grad_norm": 1.3396944999694824, "learning_rate": 7.224847053304049e-06, "loss": 0.5815, "step": 4036 }, { "epoch": 0.6021328958162429, "grad_norm": 1.2666964530944824, "learning_rate": 7.22020542921548e-06, "loss": 0.5628, "step": 4037 }, { "epoch": 0.6022820493698262, "grad_norm": 1.2714629173278809, "learning_rate": 7.215564454087775e-06, "loss": 0.5432, "step": 4038 }, { "epoch": 0.6024312029234097, "grad_norm": 1.3964225053787231, "learning_rate": 7.210924129004404e-06, "loss": 0.5659, "step": 4039 }, { "epoch": 0.602580356476993, "grad_norm": 1.3619718551635742, "learning_rate": 7.206284455048677e-06, "loss": 0.5972, "step": 4040 }, { "epoch": 0.6027295100305765, "grad_norm": 0.8902430534362793, "learning_rate": 7.2016454333037585e-06, "loss": 0.6426, "step": 4041 }, { "epoch": 0.6028786635841599, "grad_norm": 1.4108794927597046, "learning_rate": 7.1970070648526565e-06, "loss": 0.5621, "step": 4042 }, { "epoch": 0.6030278171377433, "grad_norm": 1.4978773593902588, "learning_rate": 7.1923693507782276e-06, "loss": 0.6211, "step": 4043 }, { "epoch": 0.6031769706913267, "grad_norm": 1.6193046569824219, "learning_rate": 7.187732292163173e-06, "loss": 0.5187, "step": 4044 }, { "epoch": 0.6033261242449102, "grad_norm": 1.9523062705993652, "learning_rate": 7.183095890090052e-06, "loss": 0.5588, "step": 4045 }, { "epoch": 0.6034752777984935, "grad_norm": 1.661716341972351, "learning_rate": 7.178460145641257e-06, "loss": 0.5203, "step": 4046 }, { "epoch": 0.603624431352077, "grad_norm": 1.525107741355896, "learning_rate": 7.173825059899031e-06, "loss": 0.5912, "step": 4047 }, { "epoch": 0.6037735849056604, "grad_norm": 2.2942817211151123, "learning_rate": 7.1691906339454685e-06, "loss": 0.5811, "step": 4048 }, { "epoch": 0.6039227384592438, "grad_norm": 0.8259332180023193, "learning_rate": 7.164556868862502e-06, "loss": 0.6243, "step": 4049 }, { "epoch": 0.6040718920128272, "grad_norm": 1.3642394542694092, "learning_rate": 7.159923765731917e-06, "loss": 0.5471, "step": 4050 }, { "epoch": 0.6042210455664107, "grad_norm": 1.8198944330215454, "learning_rate": 7.1552913256353405e-06, "loss": 0.5906, "step": 4051 }, { "epoch": 0.604370199119994, "grad_norm": 1.2826786041259766, "learning_rate": 7.150659549654242e-06, "loss": 0.6023, "step": 4052 }, { "epoch": 0.6045193526735775, "grad_norm": 1.2733609676361084, "learning_rate": 7.146028438869938e-06, "loss": 0.6183, "step": 4053 }, { "epoch": 0.6046685062271608, "grad_norm": 1.3136694431304932, "learning_rate": 7.141397994363602e-06, "loss": 0.6527, "step": 4054 }, { "epoch": 0.6048176597807443, "grad_norm": 1.317527174949646, "learning_rate": 7.136768217216227e-06, "loss": 0.669, "step": 4055 }, { "epoch": 0.6049668133343277, "grad_norm": 1.307837963104248, "learning_rate": 7.132139108508678e-06, "loss": 0.5715, "step": 4056 }, { "epoch": 0.6051159668879111, "grad_norm": 1.5534504652023315, "learning_rate": 7.12751066932164e-06, "loss": 0.6608, "step": 4057 }, { "epoch": 0.6052651204414945, "grad_norm": 1.2679387331008911, "learning_rate": 7.122882900735653e-06, "loss": 0.6261, "step": 4058 }, { "epoch": 0.605414273995078, "grad_norm": 1.5037537813186646, "learning_rate": 7.118255803831104e-06, "loss": 0.6431, "step": 4059 }, { "epoch": 0.6055634275486613, "grad_norm": 1.431168556213379, "learning_rate": 7.113629379688212e-06, "loss": 0.604, "step": 4060 }, { "epoch": 0.6057125811022448, "grad_norm": 1.324157476425171, "learning_rate": 7.109003629387052e-06, "loss": 0.5587, "step": 4061 }, { "epoch": 0.6058617346558282, "grad_norm": 1.209755778312683, "learning_rate": 7.104378554007527e-06, "loss": 0.5522, "step": 4062 }, { "epoch": 0.6060108882094116, "grad_norm": 1.5231677293777466, "learning_rate": 7.099754154629399e-06, "loss": 0.5432, "step": 4063 }, { "epoch": 0.606160041762995, "grad_norm": 1.4292051792144775, "learning_rate": 7.09513043233226e-06, "loss": 0.5463, "step": 4064 }, { "epoch": 0.6063091953165785, "grad_norm": 1.4150515794754028, "learning_rate": 7.090507388195549e-06, "loss": 0.6496, "step": 4065 }, { "epoch": 0.6064583488701618, "grad_norm": 2.1820731163024902, "learning_rate": 7.085885023298541e-06, "loss": 0.6251, "step": 4066 }, { "epoch": 0.6066075024237453, "grad_norm": 1.388872504234314, "learning_rate": 7.081263338720362e-06, "loss": 0.6831, "step": 4067 }, { "epoch": 0.6067566559773286, "grad_norm": 1.7821003198623657, "learning_rate": 7.076642335539969e-06, "loss": 0.6054, "step": 4068 }, { "epoch": 0.6069058095309121, "grad_norm": 1.244022250175476, "learning_rate": 7.072022014836172e-06, "loss": 0.513, "step": 4069 }, { "epoch": 0.6070549630844955, "grad_norm": 1.1746314764022827, "learning_rate": 7.0674023776876086e-06, "loss": 0.6054, "step": 4070 }, { "epoch": 0.6072041166380789, "grad_norm": 1.3016761541366577, "learning_rate": 7.062783425172759e-06, "loss": 0.5786, "step": 4071 }, { "epoch": 0.6073532701916623, "grad_norm": 1.3183561563491821, "learning_rate": 7.058165158369955e-06, "loss": 0.5297, "step": 4072 }, { "epoch": 0.6075024237452458, "grad_norm": 1.4660429954528809, "learning_rate": 7.0535475783573606e-06, "loss": 0.5505, "step": 4073 }, { "epoch": 0.6076515772988291, "grad_norm": 1.2908294200897217, "learning_rate": 7.048930686212974e-06, "loss": 0.5733, "step": 4074 }, { "epoch": 0.6078007308524126, "grad_norm": 1.3974268436431885, "learning_rate": 7.044314483014642e-06, "loss": 0.6224, "step": 4075 }, { "epoch": 0.607949884405996, "grad_norm": 1.4810569286346436, "learning_rate": 7.039698969840049e-06, "loss": 0.5769, "step": 4076 }, { "epoch": 0.6080990379595794, "grad_norm": 1.3655635118484497, "learning_rate": 7.035084147766709e-06, "loss": 0.5899, "step": 4077 }, { "epoch": 0.6082481915131628, "grad_norm": 1.3952393531799316, "learning_rate": 7.030470017871989e-06, "loss": 0.5585, "step": 4078 }, { "epoch": 0.6083973450667463, "grad_norm": 1.9027944803237915, "learning_rate": 7.025856581233078e-06, "loss": 0.5393, "step": 4079 }, { "epoch": 0.6085464986203296, "grad_norm": 1.4070593118667603, "learning_rate": 7.021243838927021e-06, "loss": 0.6324, "step": 4080 }, { "epoch": 0.6086956521739131, "grad_norm": 1.3680896759033203, "learning_rate": 7.016631792030692e-06, "loss": 0.617, "step": 4081 }, { "epoch": 0.6088448057274964, "grad_norm": 1.3161826133728027, "learning_rate": 7.012020441620801e-06, "loss": 0.5817, "step": 4082 }, { "epoch": 0.6089939592810799, "grad_norm": 1.4752676486968994, "learning_rate": 7.007409788773895e-06, "loss": 0.6118, "step": 4083 }, { "epoch": 0.6091431128346633, "grad_norm": 1.6104536056518555, "learning_rate": 7.002799834566365e-06, "loss": 0.5736, "step": 4084 }, { "epoch": 0.6092922663882467, "grad_norm": 1.2091403007507324, "learning_rate": 6.998190580074429e-06, "loss": 0.5447, "step": 4085 }, { "epoch": 0.6094414199418301, "grad_norm": 1.1135931015014648, "learning_rate": 6.993582026374152e-06, "loss": 0.5132, "step": 4086 }, { "epoch": 0.6095905734954136, "grad_norm": 1.2265046834945679, "learning_rate": 6.988974174541428e-06, "loss": 0.5816, "step": 4087 }, { "epoch": 0.6097397270489969, "grad_norm": 1.6278334856033325, "learning_rate": 6.9843670256519855e-06, "loss": 0.5971, "step": 4088 }, { "epoch": 0.6098888806025804, "grad_norm": 1.2830063104629517, "learning_rate": 6.979760580781399e-06, "loss": 0.5737, "step": 4089 }, { "epoch": 0.6100380341561638, "grad_norm": 1.7100818157196045, "learning_rate": 6.975154841005074e-06, "loss": 0.6076, "step": 4090 }, { "epoch": 0.6101871877097472, "grad_norm": 1.3304803371429443, "learning_rate": 6.970549807398244e-06, "loss": 0.5771, "step": 4091 }, { "epoch": 0.6103363412633306, "grad_norm": 1.3079723119735718, "learning_rate": 6.965945481035989e-06, "loss": 0.7026, "step": 4092 }, { "epoch": 0.610485494816914, "grad_norm": 1.2609714269638062, "learning_rate": 6.961341862993215e-06, "loss": 0.6334, "step": 4093 }, { "epoch": 0.6106346483704974, "grad_norm": 1.4635899066925049, "learning_rate": 6.9567389543446665e-06, "loss": 0.697, "step": 4094 }, { "epoch": 0.6107838019240809, "grad_norm": 1.6356874704360962, "learning_rate": 6.952136756164922e-06, "loss": 0.5979, "step": 4095 }, { "epoch": 0.6109329554776642, "grad_norm": 1.1604259014129639, "learning_rate": 6.947535269528396e-06, "loss": 0.6157, "step": 4096 }, { "epoch": 0.6110821090312477, "grad_norm": 1.2260150909423828, "learning_rate": 6.942934495509329e-06, "loss": 0.6066, "step": 4097 }, { "epoch": 0.6112312625848311, "grad_norm": 1.4103161096572876, "learning_rate": 6.938334435181812e-06, "loss": 0.5629, "step": 4098 }, { "epoch": 0.6113804161384145, "grad_norm": 1.3218656778335571, "learning_rate": 6.933735089619751e-06, "loss": 0.6448, "step": 4099 }, { "epoch": 0.6115295696919979, "grad_norm": 1.302703619003296, "learning_rate": 6.929136459896893e-06, "loss": 0.5963, "step": 4100 }, { "epoch": 0.6116787232455814, "grad_norm": 1.3054460287094116, "learning_rate": 6.924538547086822e-06, "loss": 0.5662, "step": 4101 }, { "epoch": 0.6118278767991647, "grad_norm": 1.5058913230895996, "learning_rate": 6.919941352262944e-06, "loss": 0.606, "step": 4102 }, { "epoch": 0.6119770303527482, "grad_norm": 1.3237392902374268, "learning_rate": 6.915344876498509e-06, "loss": 0.6381, "step": 4103 }, { "epoch": 0.6121261839063316, "grad_norm": 1.470487356185913, "learning_rate": 6.910749120866592e-06, "loss": 0.5425, "step": 4104 }, { "epoch": 0.612275337459915, "grad_norm": 1.6584994792938232, "learning_rate": 6.9061540864400986e-06, "loss": 0.6802, "step": 4105 }, { "epoch": 0.6124244910134984, "grad_norm": 1.5611647367477417, "learning_rate": 6.901559774291769e-06, "loss": 0.577, "step": 4106 }, { "epoch": 0.6125736445670819, "grad_norm": 1.0969271659851074, "learning_rate": 6.8969661854941826e-06, "loss": 0.4984, "step": 4107 }, { "epoch": 0.6127227981206652, "grad_norm": 1.2232015132904053, "learning_rate": 6.892373321119734e-06, "loss": 0.6235, "step": 4108 }, { "epoch": 0.6128719516742487, "grad_norm": 1.4623095989227295, "learning_rate": 6.8877811822406625e-06, "loss": 0.5456, "step": 4109 }, { "epoch": 0.613021105227832, "grad_norm": 1.3869434595108032, "learning_rate": 6.883189769929028e-06, "loss": 0.567, "step": 4110 }, { "epoch": 0.6131702587814155, "grad_norm": 1.3708064556121826, "learning_rate": 6.878599085256728e-06, "loss": 0.5998, "step": 4111 }, { "epoch": 0.6133194123349989, "grad_norm": 1.59707772731781, "learning_rate": 6.874009129295487e-06, "loss": 0.6199, "step": 4112 }, { "epoch": 0.6134685658885823, "grad_norm": 1.1940721273422241, "learning_rate": 6.8694199031168555e-06, "loss": 0.5476, "step": 4113 }, { "epoch": 0.6136177194421657, "grad_norm": 1.254177451133728, "learning_rate": 6.864831407792218e-06, "loss": 0.5725, "step": 4114 }, { "epoch": 0.6137668729957492, "grad_norm": 1.3389326333999634, "learning_rate": 6.8602436443927975e-06, "loss": 0.5356, "step": 4115 }, { "epoch": 0.6139160265493325, "grad_norm": 1.4941619634628296, "learning_rate": 6.855656613989627e-06, "loss": 0.6107, "step": 4116 }, { "epoch": 0.614065180102916, "grad_norm": 1.2451146841049194, "learning_rate": 6.851070317653585e-06, "loss": 0.5491, "step": 4117 }, { "epoch": 0.6142143336564994, "grad_norm": 1.4053058624267578, "learning_rate": 6.846484756455368e-06, "loss": 0.6748, "step": 4118 }, { "epoch": 0.6143634872100828, "grad_norm": 1.5385348796844482, "learning_rate": 6.841899931465503e-06, "loss": 0.5642, "step": 4119 }, { "epoch": 0.6145126407636662, "grad_norm": 0.8298593163490295, "learning_rate": 6.837315843754351e-06, "loss": 0.6339, "step": 4120 }, { "epoch": 0.6146617943172497, "grad_norm": 1.2453224658966064, "learning_rate": 6.832732494392092e-06, "loss": 0.5616, "step": 4121 }, { "epoch": 0.614810947870833, "grad_norm": 1.337477684020996, "learning_rate": 6.828149884448743e-06, "loss": 0.5962, "step": 4122 }, { "epoch": 0.6149601014244165, "grad_norm": 2.1389944553375244, "learning_rate": 6.823568014994138e-06, "loss": 0.6928, "step": 4123 }, { "epoch": 0.6151092549779998, "grad_norm": 1.2741502523422241, "learning_rate": 6.818986887097949e-06, "loss": 0.5714, "step": 4124 }, { "epoch": 0.6152584085315833, "grad_norm": 1.295197606086731, "learning_rate": 6.814406501829668e-06, "loss": 0.5915, "step": 4125 }, { "epoch": 0.6154075620851667, "grad_norm": 1.4388962984085083, "learning_rate": 6.809826860258617e-06, "loss": 0.5284, "step": 4126 }, { "epoch": 0.6155567156387501, "grad_norm": 0.8620814681053162, "learning_rate": 6.8052479634539395e-06, "loss": 0.6663, "step": 4127 }, { "epoch": 0.6157058691923335, "grad_norm": 1.3481159210205078, "learning_rate": 6.8006698124846106e-06, "loss": 0.5966, "step": 4128 }, { "epoch": 0.615855022745917, "grad_norm": 1.2837384939193726, "learning_rate": 6.796092408419429e-06, "loss": 0.5565, "step": 4129 }, { "epoch": 0.6160041762995003, "grad_norm": 1.3344029188156128, "learning_rate": 6.791515752327016e-06, "loss": 0.574, "step": 4130 }, { "epoch": 0.6161533298530838, "grad_norm": 1.3554158210754395, "learning_rate": 6.786939845275826e-06, "loss": 0.5654, "step": 4131 }, { "epoch": 0.6163024834066672, "grad_norm": 1.5721361637115479, "learning_rate": 6.782364688334127e-06, "loss": 0.6718, "step": 4132 }, { "epoch": 0.6164516369602506, "grad_norm": 1.8920093774795532, "learning_rate": 6.777790282570025e-06, "loss": 0.6868, "step": 4133 }, { "epoch": 0.616600790513834, "grad_norm": 1.2037335634231567, "learning_rate": 6.773216629051444e-06, "loss": 0.5764, "step": 4134 }, { "epoch": 0.6167499440674175, "grad_norm": 1.1669737100601196, "learning_rate": 6.768643728846132e-06, "loss": 0.5004, "step": 4135 }, { "epoch": 0.6168990976210008, "grad_norm": 1.2288038730621338, "learning_rate": 6.764071583021659e-06, "loss": 0.6211, "step": 4136 }, { "epoch": 0.6170482511745843, "grad_norm": 1.2795354127883911, "learning_rate": 6.759500192645425e-06, "loss": 0.5671, "step": 4137 }, { "epoch": 0.6171974047281676, "grad_norm": 1.44048011302948, "learning_rate": 6.754929558784648e-06, "loss": 0.6454, "step": 4138 }, { "epoch": 0.6173465582817511, "grad_norm": 1.2773847579956055, "learning_rate": 6.750359682506376e-06, "loss": 0.5142, "step": 4139 }, { "epoch": 0.6174957118353345, "grad_norm": 1.3730168342590332, "learning_rate": 6.745790564877471e-06, "loss": 0.5463, "step": 4140 }, { "epoch": 0.6176448653889179, "grad_norm": 1.525073766708374, "learning_rate": 6.741222206964622e-06, "loss": 0.5554, "step": 4141 }, { "epoch": 0.6177940189425013, "grad_norm": 1.8501322269439697, "learning_rate": 6.7366546098343455e-06, "loss": 0.6227, "step": 4142 }, { "epoch": 0.6179431724960848, "grad_norm": 1.2608877420425415, "learning_rate": 6.732087774552978e-06, "loss": 0.5361, "step": 4143 }, { "epoch": 0.6180923260496681, "grad_norm": 1.3863871097564697, "learning_rate": 6.7275217021866705e-06, "loss": 0.6269, "step": 4144 }, { "epoch": 0.6182414796032516, "grad_norm": 0.7706866264343262, "learning_rate": 6.722956393801408e-06, "loss": 0.6158, "step": 4145 }, { "epoch": 0.618390633156835, "grad_norm": 1.4449398517608643, "learning_rate": 6.718391850462986e-06, "loss": 0.6051, "step": 4146 }, { "epoch": 0.6185397867104184, "grad_norm": 1.450653314590454, "learning_rate": 6.7138280732370274e-06, "loss": 0.6063, "step": 4147 }, { "epoch": 0.6186889402640018, "grad_norm": 1.2779874801635742, "learning_rate": 6.709265063188978e-06, "loss": 0.5875, "step": 4148 }, { "epoch": 0.6188380938175853, "grad_norm": 1.2964617013931274, "learning_rate": 6.704702821384096e-06, "loss": 0.4957, "step": 4149 }, { "epoch": 0.6189872473711686, "grad_norm": 1.2848423719406128, "learning_rate": 6.700141348887472e-06, "loss": 0.5308, "step": 4150 }, { "epoch": 0.6191364009247521, "grad_norm": 1.4274498224258423, "learning_rate": 6.69558064676401e-06, "loss": 0.6388, "step": 4151 }, { "epoch": 0.6192855544783354, "grad_norm": 1.2696034908294678, "learning_rate": 6.691020716078434e-06, "loss": 0.5747, "step": 4152 }, { "epoch": 0.6194347080319189, "grad_norm": 1.277219295501709, "learning_rate": 6.68646155789529e-06, "loss": 0.6328, "step": 4153 }, { "epoch": 0.6195838615855023, "grad_norm": 1.5301183462142944, "learning_rate": 6.6819031732789405e-06, "loss": 0.6027, "step": 4154 }, { "epoch": 0.6197330151390857, "grad_norm": 1.5418559312820435, "learning_rate": 6.677345563293571e-06, "loss": 0.593, "step": 4155 }, { "epoch": 0.6198821686926691, "grad_norm": 1.8771049976348877, "learning_rate": 6.6727887290031865e-06, "loss": 0.4403, "step": 4156 }, { "epoch": 0.6200313222462526, "grad_norm": 1.444189190864563, "learning_rate": 6.668232671471605e-06, "loss": 0.5916, "step": 4157 }, { "epoch": 0.6201804757998359, "grad_norm": 1.7711542844772339, "learning_rate": 6.663677391762468e-06, "loss": 0.5447, "step": 4158 }, { "epoch": 0.6203296293534194, "grad_norm": 1.9742673635482788, "learning_rate": 6.65912289093924e-06, "loss": 0.5721, "step": 4159 }, { "epoch": 0.6204787829070028, "grad_norm": 1.2819390296936035, "learning_rate": 6.654569170065195e-06, "loss": 0.6027, "step": 4160 }, { "epoch": 0.6206279364605862, "grad_norm": 1.2971469163894653, "learning_rate": 6.6500162302034265e-06, "loss": 0.6364, "step": 4161 }, { "epoch": 0.6207770900141696, "grad_norm": 1.3789860010147095, "learning_rate": 6.6454640724168514e-06, "loss": 0.6244, "step": 4162 }, { "epoch": 0.6209262435677531, "grad_norm": 1.5429041385650635, "learning_rate": 6.640912697768196e-06, "loss": 0.6999, "step": 4163 }, { "epoch": 0.6210753971213364, "grad_norm": 1.3432726860046387, "learning_rate": 6.636362107320011e-06, "loss": 0.5566, "step": 4164 }, { "epoch": 0.6212245506749199, "grad_norm": 1.329800009727478, "learning_rate": 6.631812302134662e-06, "loss": 0.6356, "step": 4165 }, { "epoch": 0.6213737042285032, "grad_norm": 1.5220112800598145, "learning_rate": 6.6272632832743234e-06, "loss": 0.5969, "step": 4166 }, { "epoch": 0.6215228577820867, "grad_norm": 1.3797721862792969, "learning_rate": 6.6227150518009965e-06, "loss": 0.5436, "step": 4167 }, { "epoch": 0.6216720113356701, "grad_norm": 1.290481448173523, "learning_rate": 6.6181676087765e-06, "loss": 0.4931, "step": 4168 }, { "epoch": 0.6218211648892535, "grad_norm": 1.615143060684204, "learning_rate": 6.613620955262459e-06, "loss": 0.6046, "step": 4169 }, { "epoch": 0.6219703184428369, "grad_norm": 1.293009638786316, "learning_rate": 6.60907509232032e-06, "loss": 0.5775, "step": 4170 }, { "epoch": 0.6221194719964204, "grad_norm": 2.0182697772979736, "learning_rate": 6.604530021011344e-06, "loss": 0.5403, "step": 4171 }, { "epoch": 0.6222686255500037, "grad_norm": 1.279016375541687, "learning_rate": 6.599985742396604e-06, "loss": 0.6395, "step": 4172 }, { "epoch": 0.6224177791035871, "grad_norm": 1.5283817052841187, "learning_rate": 6.595442257536995e-06, "loss": 0.6523, "step": 4173 }, { "epoch": 0.6225669326571706, "grad_norm": 1.372780442237854, "learning_rate": 6.590899567493221e-06, "loss": 0.6489, "step": 4174 }, { "epoch": 0.6227160862107539, "grad_norm": 1.3408451080322266, "learning_rate": 6.586357673325798e-06, "loss": 0.5806, "step": 4175 }, { "epoch": 0.6228652397643374, "grad_norm": 1.314491629600525, "learning_rate": 6.58181657609507e-06, "loss": 0.5686, "step": 4176 }, { "epoch": 0.6230143933179207, "grad_norm": 1.5537625551223755, "learning_rate": 6.57727627686118e-06, "loss": 0.5715, "step": 4177 }, { "epoch": 0.6231635468715042, "grad_norm": 1.5828392505645752, "learning_rate": 6.572736776684087e-06, "loss": 0.6148, "step": 4178 }, { "epoch": 0.6233127004250876, "grad_norm": 1.3811787366867065, "learning_rate": 6.568198076623571e-06, "loss": 0.5821, "step": 4179 }, { "epoch": 0.623461853978671, "grad_norm": 1.533576250076294, "learning_rate": 6.563660177739217e-06, "loss": 0.6059, "step": 4180 }, { "epoch": 0.6236110075322544, "grad_norm": 1.2405296564102173, "learning_rate": 6.5591230810904316e-06, "loss": 0.5399, "step": 4181 }, { "epoch": 0.6237601610858379, "grad_norm": 1.5004185438156128, "learning_rate": 6.554586787736425e-06, "loss": 0.6498, "step": 4182 }, { "epoch": 0.6239093146394212, "grad_norm": 1.4064257144927979, "learning_rate": 6.550051298736223e-06, "loss": 0.5979, "step": 4183 }, { "epoch": 0.6240584681930047, "grad_norm": 1.3863916397094727, "learning_rate": 6.5455166151486645e-06, "loss": 0.6096, "step": 4184 }, { "epoch": 0.6242076217465881, "grad_norm": 1.4880441427230835, "learning_rate": 6.540982738032406e-06, "loss": 0.6919, "step": 4185 }, { "epoch": 0.6243567753001715, "grad_norm": 1.355837106704712, "learning_rate": 6.536449668445905e-06, "loss": 0.5478, "step": 4186 }, { "epoch": 0.6245059288537549, "grad_norm": 1.2458831071853638, "learning_rate": 6.531917407447441e-06, "loss": 0.542, "step": 4187 }, { "epoch": 0.6246550824073384, "grad_norm": 1.4262847900390625, "learning_rate": 6.527385956095094e-06, "loss": 0.5267, "step": 4188 }, { "epoch": 0.6248042359609217, "grad_norm": 0.7806773781776428, "learning_rate": 6.52285531544676e-06, "loss": 0.6411, "step": 4189 }, { "epoch": 0.6249533895145052, "grad_norm": 1.412103533744812, "learning_rate": 6.518325486560151e-06, "loss": 0.5694, "step": 4190 }, { "epoch": 0.6251025430680885, "grad_norm": 1.4443843364715576, "learning_rate": 6.5137964704927795e-06, "loss": 0.5296, "step": 4191 }, { "epoch": 0.625251696621672, "grad_norm": 1.4404774904251099, "learning_rate": 6.509268268301976e-06, "loss": 0.5893, "step": 4192 }, { "epoch": 0.6254008501752554, "grad_norm": 1.6058244705200195, "learning_rate": 6.504740881044875e-06, "loss": 0.6606, "step": 4193 }, { "epoch": 0.6255500037288388, "grad_norm": 1.351729154586792, "learning_rate": 6.500214309778432e-06, "loss": 0.5441, "step": 4194 }, { "epoch": 0.6256991572824222, "grad_norm": 1.754453420639038, "learning_rate": 6.495688555559396e-06, "loss": 0.6304, "step": 4195 }, { "epoch": 0.6258483108360057, "grad_norm": 1.493419885635376, "learning_rate": 6.491163619444341e-06, "loss": 0.5653, "step": 4196 }, { "epoch": 0.625997464389589, "grad_norm": 1.3474783897399902, "learning_rate": 6.4866395024896335e-06, "loss": 0.5705, "step": 4197 }, { "epoch": 0.6261466179431725, "grad_norm": 1.2689145803451538, "learning_rate": 6.4821162057514635e-06, "loss": 0.5411, "step": 4198 }, { "epoch": 0.6262957714967559, "grad_norm": 1.4520599842071533, "learning_rate": 6.477593730285821e-06, "loss": 0.6257, "step": 4199 }, { "epoch": 0.6264449250503393, "grad_norm": 1.1790050268173218, "learning_rate": 6.4730720771485104e-06, "loss": 0.4885, "step": 4200 }, { "epoch": 0.6265940786039227, "grad_norm": 1.5122382640838623, "learning_rate": 6.468551247395136e-06, "loss": 0.5909, "step": 4201 }, { "epoch": 0.6267432321575062, "grad_norm": 1.3310999870300293, "learning_rate": 6.464031242081114e-06, "loss": 0.6067, "step": 4202 }, { "epoch": 0.6268923857110895, "grad_norm": 1.6904242038726807, "learning_rate": 6.459512062261674e-06, "loss": 0.5905, "step": 4203 }, { "epoch": 0.627041539264673, "grad_norm": 1.2821102142333984, "learning_rate": 6.4549937089918464e-06, "loss": 0.577, "step": 4204 }, { "epoch": 0.6271906928182563, "grad_norm": 1.5777336359024048, "learning_rate": 6.450476183326466e-06, "loss": 0.5764, "step": 4205 }, { "epoch": 0.6273398463718398, "grad_norm": 1.3401859998703003, "learning_rate": 6.445959486320184e-06, "loss": 0.6016, "step": 4206 }, { "epoch": 0.6274889999254232, "grad_norm": 1.4780009984970093, "learning_rate": 6.441443619027445e-06, "loss": 0.5059, "step": 4207 }, { "epoch": 0.6276381534790066, "grad_norm": 1.2635459899902344, "learning_rate": 6.4369285825025115e-06, "loss": 0.6504, "step": 4208 }, { "epoch": 0.62778730703259, "grad_norm": 1.4265421628952026, "learning_rate": 6.432414377799449e-06, "loss": 0.5289, "step": 4209 }, { "epoch": 0.6279364605861735, "grad_norm": 1.3225232362747192, "learning_rate": 6.4279010059721194e-06, "loss": 0.5668, "step": 4210 }, { "epoch": 0.6280856141397568, "grad_norm": 1.2515114545822144, "learning_rate": 6.423388468074207e-06, "loss": 0.5399, "step": 4211 }, { "epoch": 0.6282347676933403, "grad_norm": 1.3257023096084595, "learning_rate": 6.418876765159195e-06, "loss": 0.5989, "step": 4212 }, { "epoch": 0.6283839212469237, "grad_norm": 1.304560661315918, "learning_rate": 6.414365898280362e-06, "loss": 0.5665, "step": 4213 }, { "epoch": 0.6285330748005071, "grad_norm": 1.3430172204971313, "learning_rate": 6.409855868490799e-06, "loss": 0.5895, "step": 4214 }, { "epoch": 0.6286822283540905, "grad_norm": 1.322842001914978, "learning_rate": 6.405346676843406e-06, "loss": 0.5302, "step": 4215 }, { "epoch": 0.628831381907674, "grad_norm": 1.4858489036560059, "learning_rate": 6.400838324390878e-06, "loss": 0.5923, "step": 4216 }, { "epoch": 0.6289805354612573, "grad_norm": 1.4301013946533203, "learning_rate": 6.3963308121857234e-06, "loss": 0.5586, "step": 4217 }, { "epoch": 0.6291296890148408, "grad_norm": 1.520756721496582, "learning_rate": 6.391824141280247e-06, "loss": 0.643, "step": 4218 }, { "epoch": 0.6292788425684241, "grad_norm": 1.5806018114089966, "learning_rate": 6.387318312726558e-06, "loss": 0.5693, "step": 4219 }, { "epoch": 0.6294279961220076, "grad_norm": 1.429460048675537, "learning_rate": 6.382813327576574e-06, "loss": 0.581, "step": 4220 }, { "epoch": 0.629577149675591, "grad_norm": 1.4385201930999756, "learning_rate": 6.378309186882016e-06, "loss": 0.5571, "step": 4221 }, { "epoch": 0.6297263032291744, "grad_norm": 1.5130468606948853, "learning_rate": 6.373805891694398e-06, "loss": 0.615, "step": 4222 }, { "epoch": 0.6298754567827578, "grad_norm": 1.2302156686782837, "learning_rate": 6.369303443065047e-06, "loss": 0.5416, "step": 4223 }, { "epoch": 0.6300246103363413, "grad_norm": 1.2664145231246948, "learning_rate": 6.364801842045088e-06, "loss": 0.5439, "step": 4224 }, { "epoch": 0.6301737638899246, "grad_norm": 1.5633891820907593, "learning_rate": 6.360301089685445e-06, "loss": 0.6149, "step": 4225 }, { "epoch": 0.6303229174435081, "grad_norm": 1.6886394023895264, "learning_rate": 6.355801187036854e-06, "loss": 0.5429, "step": 4226 }, { "epoch": 0.6304720709970915, "grad_norm": 1.4526621103286743, "learning_rate": 6.3513021351498404e-06, "loss": 0.5145, "step": 4227 }, { "epoch": 0.6306212245506749, "grad_norm": 1.4020442962646484, "learning_rate": 6.346803935074737e-06, "loss": 0.5451, "step": 4228 }, { "epoch": 0.6307703781042583, "grad_norm": 1.3336749076843262, "learning_rate": 6.342306587861683e-06, "loss": 0.6041, "step": 4229 }, { "epoch": 0.6309195316578418, "grad_norm": 1.2979702949523926, "learning_rate": 6.337810094560609e-06, "loss": 0.5961, "step": 4230 }, { "epoch": 0.6310686852114251, "grad_norm": 1.297007441520691, "learning_rate": 6.333314456221249e-06, "loss": 0.5613, "step": 4231 }, { "epoch": 0.6312178387650086, "grad_norm": 1.2642943859100342, "learning_rate": 6.328819673893143e-06, "loss": 0.5942, "step": 4232 }, { "epoch": 0.631366992318592, "grad_norm": 1.373961329460144, "learning_rate": 6.324325748625619e-06, "loss": 0.5621, "step": 4233 }, { "epoch": 0.6315161458721754, "grad_norm": 1.3456389904022217, "learning_rate": 6.3198326814678225e-06, "loss": 0.6379, "step": 4234 }, { "epoch": 0.6316652994257588, "grad_norm": 1.2254986763000488, "learning_rate": 6.31534047346868e-06, "loss": 0.6186, "step": 4235 }, { "epoch": 0.6318144529793422, "grad_norm": 1.4640699625015259, "learning_rate": 6.310849125676934e-06, "loss": 0.5846, "step": 4236 }, { "epoch": 0.6319636065329256, "grad_norm": 1.416143536567688, "learning_rate": 6.306358639141109e-06, "loss": 0.665, "step": 4237 }, { "epoch": 0.6321127600865091, "grad_norm": 1.2257118225097656, "learning_rate": 6.301869014909548e-06, "loss": 0.4609, "step": 4238 }, { "epoch": 0.6322619136400924, "grad_norm": 1.4082064628601074, "learning_rate": 6.297380254030376e-06, "loss": 0.5376, "step": 4239 }, { "epoch": 0.6324110671936759, "grad_norm": 1.3944282531738281, "learning_rate": 6.292892357551527e-06, "loss": 0.6202, "step": 4240 }, { "epoch": 0.6325602207472593, "grad_norm": 2.7941837310791016, "learning_rate": 6.288405326520726e-06, "loss": 0.4791, "step": 4241 }, { "epoch": 0.6327093743008427, "grad_norm": 1.3101187944412231, "learning_rate": 6.283919161985501e-06, "loss": 0.5498, "step": 4242 }, { "epoch": 0.6328585278544261, "grad_norm": 1.367281198501587, "learning_rate": 6.279433864993176e-06, "loss": 0.6007, "step": 4243 }, { "epoch": 0.6330076814080096, "grad_norm": 1.3900188207626343, "learning_rate": 6.274949436590869e-06, "loss": 0.5775, "step": 4244 }, { "epoch": 0.6331568349615929, "grad_norm": 1.20313560962677, "learning_rate": 6.2704658778255e-06, "loss": 0.5622, "step": 4245 }, { "epoch": 0.6333059885151764, "grad_norm": 1.398918867111206, "learning_rate": 6.2659831897437895e-06, "loss": 0.5296, "step": 4246 }, { "epoch": 0.6334551420687597, "grad_norm": 1.4648463726043701, "learning_rate": 6.261501373392245e-06, "loss": 0.5846, "step": 4247 }, { "epoch": 0.6336042956223432, "grad_norm": 1.2166438102722168, "learning_rate": 6.257020429817177e-06, "loss": 0.5833, "step": 4248 }, { "epoch": 0.6337534491759266, "grad_norm": 1.4528976678848267, "learning_rate": 6.252540360064689e-06, "loss": 0.5517, "step": 4249 }, { "epoch": 0.63390260272951, "grad_norm": 1.4918625354766846, "learning_rate": 6.248061165180682e-06, "loss": 0.6556, "step": 4250 }, { "epoch": 0.6340517562830934, "grad_norm": 1.3103351593017578, "learning_rate": 6.243582846210856e-06, "loss": 0.5828, "step": 4251 }, { "epoch": 0.6342009098366769, "grad_norm": 1.528394103050232, "learning_rate": 6.239105404200698e-06, "loss": 0.5696, "step": 4252 }, { "epoch": 0.6343500633902602, "grad_norm": 1.2801884412765503, "learning_rate": 6.2346288401955e-06, "loss": 0.5457, "step": 4253 }, { "epoch": 0.6344992169438437, "grad_norm": 1.3474551439285278, "learning_rate": 6.230153155240339e-06, "loss": 0.6351, "step": 4254 }, { "epoch": 0.6346483704974271, "grad_norm": 1.2521915435791016, "learning_rate": 6.225678350380102e-06, "loss": 0.5299, "step": 4255 }, { "epoch": 0.6347975240510105, "grad_norm": 1.2970993518829346, "learning_rate": 6.221204426659452e-06, "loss": 0.5534, "step": 4256 }, { "epoch": 0.6349466776045939, "grad_norm": 1.2039920091629028, "learning_rate": 6.21673138512286e-06, "loss": 0.4604, "step": 4257 }, { "epoch": 0.6350958311581774, "grad_norm": 1.320613980293274, "learning_rate": 6.212259226814583e-06, "loss": 0.611, "step": 4258 }, { "epoch": 0.6352449847117607, "grad_norm": 1.4690427780151367, "learning_rate": 6.207787952778679e-06, "loss": 0.583, "step": 4259 }, { "epoch": 0.6353941382653442, "grad_norm": 1.636182427406311, "learning_rate": 6.203317564058993e-06, "loss": 0.5578, "step": 4260 }, { "epoch": 0.6355432918189275, "grad_norm": 1.3136056661605835, "learning_rate": 6.1988480616991635e-06, "loss": 0.5017, "step": 4261 }, { "epoch": 0.635692445372511, "grad_norm": 1.2306429147720337, "learning_rate": 6.19437944674263e-06, "loss": 0.5437, "step": 4262 }, { "epoch": 0.6358415989260944, "grad_norm": 1.3459858894348145, "learning_rate": 6.189911720232612e-06, "loss": 0.6048, "step": 4263 }, { "epoch": 0.6359907524796778, "grad_norm": 1.6858885288238525, "learning_rate": 6.185444883212135e-06, "loss": 0.5457, "step": 4264 }, { "epoch": 0.6361399060332612, "grad_norm": 1.4070428609848022, "learning_rate": 6.180978936724011e-06, "loss": 0.5505, "step": 4265 }, { "epoch": 0.6362890595868447, "grad_norm": 1.7990126609802246, "learning_rate": 6.176513881810844e-06, "loss": 0.5413, "step": 4266 }, { "epoch": 0.636438213140428, "grad_norm": 1.2929481267929077, "learning_rate": 6.172049719515023e-06, "loss": 0.4657, "step": 4267 }, { "epoch": 0.6365873666940115, "grad_norm": 1.3512948751449585, "learning_rate": 6.167586450878743e-06, "loss": 0.6545, "step": 4268 }, { "epoch": 0.6367365202475949, "grad_norm": 1.5218086242675781, "learning_rate": 6.163124076943978e-06, "loss": 0.5484, "step": 4269 }, { "epoch": 0.6368856738011783, "grad_norm": 1.6357707977294922, "learning_rate": 6.158662598752501e-06, "loss": 0.5369, "step": 4270 }, { "epoch": 0.6370348273547617, "grad_norm": 1.3650321960449219, "learning_rate": 6.154202017345872e-06, "loss": 0.552, "step": 4271 }, { "epoch": 0.6371839809083452, "grad_norm": 1.280583143234253, "learning_rate": 6.1497423337654365e-06, "loss": 0.5377, "step": 4272 }, { "epoch": 0.6373331344619285, "grad_norm": 1.5386720895767212, "learning_rate": 6.145283549052342e-06, "loss": 0.6925, "step": 4273 }, { "epoch": 0.637482288015512, "grad_norm": 1.486947774887085, "learning_rate": 6.140825664247523e-06, "loss": 0.625, "step": 4274 }, { "epoch": 0.6376314415690953, "grad_norm": 1.585364580154419, "learning_rate": 6.136368680391695e-06, "loss": 0.6523, "step": 4275 }, { "epoch": 0.6377805951226788, "grad_norm": 2.5033934116363525, "learning_rate": 6.1319125985253754e-06, "loss": 0.6269, "step": 4276 }, { "epoch": 0.6379297486762622, "grad_norm": 1.1508142948150635, "learning_rate": 6.1274574196888606e-06, "loss": 0.5119, "step": 4277 }, { "epoch": 0.6380789022298456, "grad_norm": 1.4327337741851807, "learning_rate": 6.123003144922242e-06, "loss": 0.6212, "step": 4278 }, { "epoch": 0.638228055783429, "grad_norm": 1.500844120979309, "learning_rate": 6.1185497752654e-06, "loss": 0.6417, "step": 4279 }, { "epoch": 0.6383772093370125, "grad_norm": 0.8648558855056763, "learning_rate": 6.114097311757996e-06, "loss": 0.6597, "step": 4280 }, { "epoch": 0.6385263628905958, "grad_norm": 1.393561601638794, "learning_rate": 6.109645755439495e-06, "loss": 0.6025, "step": 4281 }, { "epoch": 0.6386755164441793, "grad_norm": 1.2693506479263306, "learning_rate": 6.105195107349137e-06, "loss": 0.4901, "step": 4282 }, { "epoch": 0.6388246699977627, "grad_norm": 1.259874939918518, "learning_rate": 6.100745368525955e-06, "loss": 0.6227, "step": 4283 }, { "epoch": 0.6389738235513461, "grad_norm": 1.7083021402359009, "learning_rate": 6.09629654000877e-06, "loss": 0.5196, "step": 4284 }, { "epoch": 0.6391229771049295, "grad_norm": 1.4796314239501953, "learning_rate": 6.091848622836187e-06, "loss": 0.617, "step": 4285 }, { "epoch": 0.639272130658513, "grad_norm": 1.375211238861084, "learning_rate": 6.087401618046602e-06, "loss": 0.4481, "step": 4286 }, { "epoch": 0.6394212842120963, "grad_norm": 1.4301981925964355, "learning_rate": 6.082955526678199e-06, "loss": 0.6102, "step": 4287 }, { "epoch": 0.6395704377656798, "grad_norm": 1.4361417293548584, "learning_rate": 6.078510349768942e-06, "loss": 0.5905, "step": 4288 }, { "epoch": 0.6397195913192631, "grad_norm": 1.4146791696548462, "learning_rate": 6.074066088356587e-06, "loss": 0.5279, "step": 4289 }, { "epoch": 0.6398687448728466, "grad_norm": 1.2601895332336426, "learning_rate": 6.069622743478681e-06, "loss": 0.4845, "step": 4290 }, { "epoch": 0.64001789842643, "grad_norm": 1.2290202379226685, "learning_rate": 6.065180316172547e-06, "loss": 0.5033, "step": 4291 }, { "epoch": 0.6401670519800134, "grad_norm": 1.3917367458343506, "learning_rate": 6.060738807475295e-06, "loss": 0.6024, "step": 4292 }, { "epoch": 0.6403162055335968, "grad_norm": 0.8736146688461304, "learning_rate": 6.056298218423831e-06, "loss": 0.6355, "step": 4293 }, { "epoch": 0.6404653590871803, "grad_norm": 1.3878215551376343, "learning_rate": 6.051858550054832e-06, "loss": 0.5841, "step": 4294 }, { "epoch": 0.6406145126407636, "grad_norm": 1.4405651092529297, "learning_rate": 6.047419803404772e-06, "loss": 0.5355, "step": 4295 }, { "epoch": 0.6407636661943471, "grad_norm": 0.831691324710846, "learning_rate": 6.042981979509904e-06, "loss": 0.6652, "step": 4296 }, { "epoch": 0.6409128197479305, "grad_norm": 1.3862189054489136, "learning_rate": 6.038545079406264e-06, "loss": 0.5116, "step": 4297 }, { "epoch": 0.6410619733015139, "grad_norm": 1.4590808153152466, "learning_rate": 6.034109104129673e-06, "loss": 0.608, "step": 4298 }, { "epoch": 0.6412111268550973, "grad_norm": 1.4810012578964233, "learning_rate": 6.029674054715744e-06, "loss": 0.6229, "step": 4299 }, { "epoch": 0.6413602804086808, "grad_norm": 1.39152193069458, "learning_rate": 6.025239932199864e-06, "loss": 0.5651, "step": 4300 }, { "epoch": 0.6415094339622641, "grad_norm": 1.461243987083435, "learning_rate": 6.020806737617211e-06, "loss": 0.6217, "step": 4301 }, { "epoch": 0.6416585875158476, "grad_norm": 1.367652177810669, "learning_rate": 6.016374472002739e-06, "loss": 0.5492, "step": 4302 }, { "epoch": 0.641807741069431, "grad_norm": 1.32558012008667, "learning_rate": 6.0119431363911875e-06, "loss": 0.5752, "step": 4303 }, { "epoch": 0.6419568946230144, "grad_norm": 2.203099489212036, "learning_rate": 6.007512731817085e-06, "loss": 0.5549, "step": 4304 }, { "epoch": 0.6421060481765978, "grad_norm": 1.2711361646652222, "learning_rate": 6.0030832593147326e-06, "loss": 0.6426, "step": 4305 }, { "epoch": 0.6422552017301812, "grad_norm": 1.2464327812194824, "learning_rate": 5.998654719918223e-06, "loss": 0.4503, "step": 4306 }, { "epoch": 0.6424043552837646, "grad_norm": 1.3423458337783813, "learning_rate": 5.994227114661423e-06, "loss": 0.5638, "step": 4307 }, { "epoch": 0.6425535088373481, "grad_norm": 1.5284291505813599, "learning_rate": 5.989800444577991e-06, "loss": 0.6465, "step": 4308 }, { "epoch": 0.6427026623909314, "grad_norm": 1.384830355644226, "learning_rate": 5.985374710701358e-06, "loss": 0.5723, "step": 4309 }, { "epoch": 0.6428518159445149, "grad_norm": 1.2133636474609375, "learning_rate": 5.980949914064742e-06, "loss": 0.5331, "step": 4310 }, { "epoch": 0.6430009694980983, "grad_norm": 1.6369472742080688, "learning_rate": 5.976526055701137e-06, "loss": 0.6402, "step": 4311 }, { "epoch": 0.6431501230516817, "grad_norm": 1.3895082473754883, "learning_rate": 5.972103136643326e-06, "loss": 0.5523, "step": 4312 }, { "epoch": 0.6432992766052651, "grad_norm": 1.4745972156524658, "learning_rate": 5.967681157923864e-06, "loss": 0.5538, "step": 4313 }, { "epoch": 0.6434484301588486, "grad_norm": 1.7872602939605713, "learning_rate": 5.963260120575089e-06, "loss": 0.5852, "step": 4314 }, { "epoch": 0.6435975837124319, "grad_norm": 1.5342603921890259, "learning_rate": 5.9588400256291204e-06, "loss": 0.6045, "step": 4315 }, { "epoch": 0.6437467372660154, "grad_norm": 5.57663631439209, "learning_rate": 5.954420874117864e-06, "loss": 0.5987, "step": 4316 }, { "epoch": 0.6438958908195987, "grad_norm": 1.391909122467041, "learning_rate": 5.950002667072994e-06, "loss": 0.6171, "step": 4317 }, { "epoch": 0.6440450443731822, "grad_norm": 1.4558430910110474, "learning_rate": 5.945585405525971e-06, "loss": 0.6625, "step": 4318 }, { "epoch": 0.6441941979267656, "grad_norm": 1.6664241552352905, "learning_rate": 5.941169090508032e-06, "loss": 0.5934, "step": 4319 }, { "epoch": 0.644343351480349, "grad_norm": 0.8958982825279236, "learning_rate": 5.936753723050192e-06, "loss": 0.6689, "step": 4320 }, { "epoch": 0.6444925050339324, "grad_norm": 1.3099396228790283, "learning_rate": 5.932339304183251e-06, "loss": 0.5815, "step": 4321 }, { "epoch": 0.6446416585875159, "grad_norm": 1.3715018033981323, "learning_rate": 5.92792583493778e-06, "loss": 0.5866, "step": 4322 }, { "epoch": 0.6447908121410992, "grad_norm": 1.3415623903274536, "learning_rate": 5.923513316344135e-06, "loss": 0.497, "step": 4323 }, { "epoch": 0.6449399656946827, "grad_norm": 1.5560823678970337, "learning_rate": 5.919101749432441e-06, "loss": 0.5886, "step": 4324 }, { "epoch": 0.6450891192482661, "grad_norm": 1.3022810220718384, "learning_rate": 5.914691135232613e-06, "loss": 0.5485, "step": 4325 }, { "epoch": 0.6452382728018495, "grad_norm": 1.6008884906768799, "learning_rate": 5.910281474774335e-06, "loss": 0.6328, "step": 4326 }, { "epoch": 0.6453874263554329, "grad_norm": 1.4928916692733765, "learning_rate": 5.905872769087071e-06, "loss": 0.6068, "step": 4327 }, { "epoch": 0.6455365799090164, "grad_norm": 1.3501815795898438, "learning_rate": 5.901465019200059e-06, "loss": 0.5123, "step": 4328 }, { "epoch": 0.6456857334625997, "grad_norm": 1.3466559648513794, "learning_rate": 5.897058226142321e-06, "loss": 0.5909, "step": 4329 }, { "epoch": 0.6458348870161832, "grad_norm": 1.2294893264770508, "learning_rate": 5.892652390942645e-06, "loss": 0.5868, "step": 4330 }, { "epoch": 0.6459840405697665, "grad_norm": 1.266684889793396, "learning_rate": 5.888247514629607e-06, "loss": 0.5105, "step": 4331 }, { "epoch": 0.64613319412335, "grad_norm": 1.520390510559082, "learning_rate": 5.883843598231551e-06, "loss": 0.5488, "step": 4332 }, { "epoch": 0.6462823476769334, "grad_norm": 1.51046621799469, "learning_rate": 5.879440642776597e-06, "loss": 0.5075, "step": 4333 }, { "epoch": 0.6464315012305168, "grad_norm": 1.3052326440811157, "learning_rate": 5.875038649292648e-06, "loss": 0.4962, "step": 4334 }, { "epoch": 0.6465806547841002, "grad_norm": 1.3787373304367065, "learning_rate": 5.8706376188073775e-06, "loss": 0.5682, "step": 4335 }, { "epoch": 0.6467298083376837, "grad_norm": 1.2723338603973389, "learning_rate": 5.866237552348231e-06, "loss": 0.6095, "step": 4336 }, { "epoch": 0.646878961891267, "grad_norm": 1.5230430364608765, "learning_rate": 5.861838450942434e-06, "loss": 0.6165, "step": 4337 }, { "epoch": 0.6470281154448505, "grad_norm": 1.2897464036941528, "learning_rate": 5.857440315616987e-06, "loss": 0.5485, "step": 4338 }, { "epoch": 0.6471772689984339, "grad_norm": 1.1976590156555176, "learning_rate": 5.853043147398656e-06, "loss": 0.5013, "step": 4339 }, { "epoch": 0.6473264225520173, "grad_norm": 1.3273646831512451, "learning_rate": 5.848646947313996e-06, "loss": 0.5476, "step": 4340 }, { "epoch": 0.6474755761056007, "grad_norm": 0.8697934150695801, "learning_rate": 5.844251716389324e-06, "loss": 0.6759, "step": 4341 }, { "epoch": 0.6476247296591842, "grad_norm": 1.3948845863342285, "learning_rate": 5.839857455650732e-06, "loss": 0.5777, "step": 4342 }, { "epoch": 0.6477738832127675, "grad_norm": 1.3378549814224243, "learning_rate": 5.835464166124096e-06, "loss": 0.5454, "step": 4343 }, { "epoch": 0.647923036766351, "grad_norm": 1.43915593624115, "learning_rate": 5.831071848835053e-06, "loss": 0.6802, "step": 4344 }, { "epoch": 0.6480721903199343, "grad_norm": 0.8109112977981567, "learning_rate": 5.8266805048090216e-06, "loss": 0.5935, "step": 4345 }, { "epoch": 0.6482213438735178, "grad_norm": 1.21540105342865, "learning_rate": 5.82229013507118e-06, "loss": 0.5488, "step": 4346 }, { "epoch": 0.6483704974271012, "grad_norm": 1.4197927713394165, "learning_rate": 5.817900740646496e-06, "loss": 0.5513, "step": 4347 }, { "epoch": 0.6485196509806846, "grad_norm": 1.5225622653961182, "learning_rate": 5.813512322559699e-06, "loss": 0.5488, "step": 4348 }, { "epoch": 0.648668804534268, "grad_norm": 1.4195131063461304, "learning_rate": 5.809124881835299e-06, "loss": 0.5784, "step": 4349 }, { "epoch": 0.6488179580878515, "grad_norm": 1.3082648515701294, "learning_rate": 5.804738419497558e-06, "loss": 0.5672, "step": 4350 }, { "epoch": 0.6489671116414348, "grad_norm": 1.244309425354004, "learning_rate": 5.800352936570543e-06, "loss": 0.5457, "step": 4351 }, { "epoch": 0.6491162651950183, "grad_norm": 1.2860225439071655, "learning_rate": 5.795968434078059e-06, "loss": 0.5891, "step": 4352 }, { "epoch": 0.6492654187486017, "grad_norm": 1.4266688823699951, "learning_rate": 5.791584913043699e-06, "loss": 0.5626, "step": 4353 }, { "epoch": 0.6494145723021851, "grad_norm": 1.416654109954834, "learning_rate": 5.787202374490826e-06, "loss": 0.5494, "step": 4354 }, { "epoch": 0.6495637258557685, "grad_norm": 1.5403687953948975, "learning_rate": 5.782820819442576e-06, "loss": 0.6435, "step": 4355 }, { "epoch": 0.649712879409352, "grad_norm": 1.2800318002700806, "learning_rate": 5.778440248921842e-06, "loss": 0.6012, "step": 4356 }, { "epoch": 0.6498620329629353, "grad_norm": 1.297722339630127, "learning_rate": 5.7740606639513e-06, "loss": 0.5591, "step": 4357 }, { "epoch": 0.6500111865165188, "grad_norm": 1.413260579109192, "learning_rate": 5.7696820655533984e-06, "loss": 0.6372, "step": 4358 }, { "epoch": 0.6501603400701021, "grad_norm": 1.4665591716766357, "learning_rate": 5.765304454750333e-06, "loss": 0.6081, "step": 4359 }, { "epoch": 0.6503094936236856, "grad_norm": 1.4397557973861694, "learning_rate": 5.760927832564103e-06, "loss": 0.5275, "step": 4360 }, { "epoch": 0.650458647177269, "grad_norm": 1.4751514196395874, "learning_rate": 5.756552200016454e-06, "loss": 0.5344, "step": 4361 }, { "epoch": 0.6506078007308524, "grad_norm": 1.067373275756836, "learning_rate": 5.752177558128899e-06, "loss": 0.4781, "step": 4362 }, { "epoch": 0.6507569542844358, "grad_norm": 1.2968541383743286, "learning_rate": 5.74780390792273e-06, "loss": 0.6001, "step": 4363 }, { "epoch": 0.6509061078380193, "grad_norm": 1.5508215427398682, "learning_rate": 5.743431250419007e-06, "loss": 0.6361, "step": 4364 }, { "epoch": 0.6510552613916026, "grad_norm": 1.302200198173523, "learning_rate": 5.73905958663855e-06, "loss": 0.562, "step": 4365 }, { "epoch": 0.6512044149451861, "grad_norm": 1.2812466621398926, "learning_rate": 5.734688917601952e-06, "loss": 0.5159, "step": 4366 }, { "epoch": 0.6513535684987695, "grad_norm": 1.476812481880188, "learning_rate": 5.7303192443295805e-06, "loss": 0.5732, "step": 4367 }, { "epoch": 0.6515027220523529, "grad_norm": 1.2944248914718628, "learning_rate": 5.725950567841552e-06, "loss": 0.5643, "step": 4368 }, { "epoch": 0.6516518756059363, "grad_norm": 1.5347129106521606, "learning_rate": 5.7215828891577705e-06, "loss": 0.6211, "step": 4369 }, { "epoch": 0.6518010291595198, "grad_norm": 0.896650493144989, "learning_rate": 5.717216209297902e-06, "loss": 0.6778, "step": 4370 }, { "epoch": 0.6519501827131031, "grad_norm": 1.471126675605774, "learning_rate": 5.712850529281366e-06, "loss": 0.543, "step": 4371 }, { "epoch": 0.6520993362666866, "grad_norm": 1.2671802043914795, "learning_rate": 5.708485850127365e-06, "loss": 0.5413, "step": 4372 }, { "epoch": 0.65224848982027, "grad_norm": 1.6273208856582642, "learning_rate": 5.704122172854863e-06, "loss": 0.5167, "step": 4373 }, { "epoch": 0.6523976433738534, "grad_norm": 1.8313300609588623, "learning_rate": 5.6997594984825795e-06, "loss": 0.5969, "step": 4374 }, { "epoch": 0.6525467969274368, "grad_norm": 1.9258904457092285, "learning_rate": 5.695397828029016e-06, "loss": 0.5787, "step": 4375 }, { "epoch": 0.6526959504810202, "grad_norm": 0.8289717435836792, "learning_rate": 5.69103716251243e-06, "loss": 0.6429, "step": 4376 }, { "epoch": 0.6528451040346036, "grad_norm": 1.3626526594161987, "learning_rate": 5.686677502950848e-06, "loss": 0.581, "step": 4377 }, { "epoch": 0.6529942575881871, "grad_norm": 1.3408088684082031, "learning_rate": 5.682318850362061e-06, "loss": 0.6091, "step": 4378 }, { "epoch": 0.6531434111417704, "grad_norm": 1.253153681755066, "learning_rate": 5.677961205763626e-06, "loss": 0.5683, "step": 4379 }, { "epoch": 0.6532925646953539, "grad_norm": 1.365187406539917, "learning_rate": 5.673604570172857e-06, "loss": 0.6116, "step": 4380 }, { "epoch": 0.6534417182489373, "grad_norm": 0.8179966807365417, "learning_rate": 5.669248944606842e-06, "loss": 0.6303, "step": 4381 }, { "epoch": 0.6535908718025207, "grad_norm": 1.3224319219589233, "learning_rate": 5.66489433008243e-06, "loss": 0.6104, "step": 4382 }, { "epoch": 0.6537400253561041, "grad_norm": 1.7869000434875488, "learning_rate": 5.660540727616237e-06, "loss": 0.6163, "step": 4383 }, { "epoch": 0.6538891789096876, "grad_norm": 1.6602386236190796, "learning_rate": 5.656188138224633e-06, "loss": 0.5862, "step": 4384 }, { "epoch": 0.6540383324632709, "grad_norm": 1.2376463413238525, "learning_rate": 5.651836562923761e-06, "loss": 0.5904, "step": 4385 }, { "epoch": 0.6541874860168544, "grad_norm": 1.3611425161361694, "learning_rate": 5.647486002729523e-06, "loss": 0.5398, "step": 4386 }, { "epoch": 0.6543366395704378, "grad_norm": 1.2445751428604126, "learning_rate": 5.643136458657586e-06, "loss": 0.5521, "step": 4387 }, { "epoch": 0.6544857931240212, "grad_norm": 1.1910308599472046, "learning_rate": 5.638787931723379e-06, "loss": 0.4543, "step": 4388 }, { "epoch": 0.6546349466776046, "grad_norm": 1.2764003276824951, "learning_rate": 5.634440422942098e-06, "loss": 0.5516, "step": 4389 }, { "epoch": 0.654784100231188, "grad_norm": 1.3195228576660156, "learning_rate": 5.630093933328688e-06, "loss": 0.5489, "step": 4390 }, { "epoch": 0.6549332537847714, "grad_norm": 1.438494086265564, "learning_rate": 5.625748463897871e-06, "loss": 0.6317, "step": 4391 }, { "epoch": 0.6550824073383549, "grad_norm": 1.373305320739746, "learning_rate": 5.621404015664125e-06, "loss": 0.5658, "step": 4392 }, { "epoch": 0.6552315608919382, "grad_norm": 1.675317645072937, "learning_rate": 5.617060589641685e-06, "loss": 0.5694, "step": 4393 }, { "epoch": 0.6553807144455217, "grad_norm": 1.4688668251037598, "learning_rate": 5.612718186844548e-06, "loss": 0.5935, "step": 4394 }, { "epoch": 0.6555298679991051, "grad_norm": 1.3420931100845337, "learning_rate": 5.608376808286491e-06, "loss": 0.5818, "step": 4395 }, { "epoch": 0.6556790215526885, "grad_norm": 1.4231756925582886, "learning_rate": 5.604036454981024e-06, "loss": 0.538, "step": 4396 }, { "epoch": 0.6558281751062719, "grad_norm": 1.3699427843093872, "learning_rate": 5.599697127941432e-06, "loss": 0.5593, "step": 4397 }, { "epoch": 0.6559773286598554, "grad_norm": 1.4246803522109985, "learning_rate": 5.5953588281807644e-06, "loss": 0.7135, "step": 4398 }, { "epoch": 0.6561264822134387, "grad_norm": 1.3075835704803467, "learning_rate": 5.591021556711818e-06, "loss": 0.615, "step": 4399 }, { "epoch": 0.6562756357670222, "grad_norm": 1.2397950887680054, "learning_rate": 5.586685314547159e-06, "loss": 0.5906, "step": 4400 }, { "epoch": 0.6564247893206056, "grad_norm": 1.4607998132705688, "learning_rate": 5.582350102699112e-06, "loss": 0.5156, "step": 4401 }, { "epoch": 0.656573942874189, "grad_norm": 1.3591983318328857, "learning_rate": 5.578015922179764e-06, "loss": 0.5946, "step": 4402 }, { "epoch": 0.6567230964277724, "grad_norm": 1.2887206077575684, "learning_rate": 5.573682774000944e-06, "loss": 0.517, "step": 4403 }, { "epoch": 0.6568722499813558, "grad_norm": 0.9164236187934875, "learning_rate": 5.5693506591742705e-06, "loss": 0.6545, "step": 4404 }, { "epoch": 0.6570214035349392, "grad_norm": 1.2309486865997314, "learning_rate": 5.5650195787110915e-06, "loss": 0.5346, "step": 4405 }, { "epoch": 0.6571705570885227, "grad_norm": 1.1868550777435303, "learning_rate": 5.560689533622529e-06, "loss": 0.6017, "step": 4406 }, { "epoch": 0.657319710642106, "grad_norm": 1.2732294797897339, "learning_rate": 5.55636052491946e-06, "loss": 0.5695, "step": 4407 }, { "epoch": 0.6574688641956895, "grad_norm": 1.5076086521148682, "learning_rate": 5.552032553612523e-06, "loss": 0.6122, "step": 4408 }, { "epoch": 0.6576180177492729, "grad_norm": 1.46744966506958, "learning_rate": 5.547705620712103e-06, "loss": 0.5895, "step": 4409 }, { "epoch": 0.6577671713028563, "grad_norm": 1.3027290105819702, "learning_rate": 5.543379727228354e-06, "loss": 0.6086, "step": 4410 }, { "epoch": 0.6579163248564397, "grad_norm": 1.196760892868042, "learning_rate": 5.539054874171183e-06, "loss": 0.5057, "step": 4411 }, { "epoch": 0.6580654784100232, "grad_norm": 1.4022160768508911, "learning_rate": 5.534731062550257e-06, "loss": 0.4848, "step": 4412 }, { "epoch": 0.6582146319636065, "grad_norm": 1.5189989805221558, "learning_rate": 5.530408293374995e-06, "loss": 0.5651, "step": 4413 }, { "epoch": 0.65836378551719, "grad_norm": 1.4672952890396118, "learning_rate": 5.526086567654581e-06, "loss": 0.5489, "step": 4414 }, { "epoch": 0.6585129390707734, "grad_norm": 1.427770972251892, "learning_rate": 5.521765886397938e-06, "loss": 0.635, "step": 4415 }, { "epoch": 0.6586620926243568, "grad_norm": 1.2624198198318481, "learning_rate": 5.517446250613766e-06, "loss": 0.5411, "step": 4416 }, { "epoch": 0.6588112461779402, "grad_norm": 1.2370747327804565, "learning_rate": 5.513127661310512e-06, "loss": 0.5168, "step": 4417 }, { "epoch": 0.6589603997315236, "grad_norm": 1.9663667678833008, "learning_rate": 5.508810119496372e-06, "loss": 0.5332, "step": 4418 }, { "epoch": 0.659109553285107, "grad_norm": 1.4643672704696655, "learning_rate": 5.504493626179307e-06, "loss": 0.5839, "step": 4419 }, { "epoch": 0.6592587068386905, "grad_norm": 1.6618667840957642, "learning_rate": 5.5001781823670305e-06, "loss": 0.5673, "step": 4420 }, { "epoch": 0.6594078603922738, "grad_norm": 1.3045464754104614, "learning_rate": 5.4958637890670105e-06, "loss": 0.536, "step": 4421 }, { "epoch": 0.6595570139458573, "grad_norm": 1.2884219884872437, "learning_rate": 5.491550447286469e-06, "loss": 0.6061, "step": 4422 }, { "epoch": 0.6597061674994407, "grad_norm": 1.4261301755905151, "learning_rate": 5.487238158032388e-06, "loss": 0.5353, "step": 4423 }, { "epoch": 0.6598553210530241, "grad_norm": 1.7506076097488403, "learning_rate": 5.482926922311491e-06, "loss": 0.4502, "step": 4424 }, { "epoch": 0.6600044746066075, "grad_norm": 1.5500160455703735, "learning_rate": 5.478616741130269e-06, "loss": 0.5973, "step": 4425 }, { "epoch": 0.660153628160191, "grad_norm": 1.3047329187393188, "learning_rate": 5.474307615494958e-06, "loss": 0.5839, "step": 4426 }, { "epoch": 0.6603027817137743, "grad_norm": 1.4140944480895996, "learning_rate": 5.469999546411557e-06, "loss": 0.5577, "step": 4427 }, { "epoch": 0.6604519352673578, "grad_norm": 2.236126661300659, "learning_rate": 5.465692534885807e-06, "loss": 0.5115, "step": 4428 }, { "epoch": 0.6606010888209412, "grad_norm": 1.324777603149414, "learning_rate": 5.461386581923207e-06, "loss": 0.5236, "step": 4429 }, { "epoch": 0.6607502423745246, "grad_norm": 1.6385499238967896, "learning_rate": 5.457081688529011e-06, "loss": 0.5719, "step": 4430 }, { "epoch": 0.660899395928108, "grad_norm": 1.513242244720459, "learning_rate": 5.452777855708224e-06, "loss": 0.5609, "step": 4431 }, { "epoch": 0.6610485494816914, "grad_norm": 1.480926513671875, "learning_rate": 5.448475084465605e-06, "loss": 0.6495, "step": 4432 }, { "epoch": 0.6611977030352748, "grad_norm": 1.6348953247070312, "learning_rate": 5.4441733758056655e-06, "loss": 0.5823, "step": 4433 }, { "epoch": 0.6613468565888583, "grad_norm": 1.2851543426513672, "learning_rate": 5.439872730732659e-06, "loss": 0.6092, "step": 4434 }, { "epoch": 0.6614960101424416, "grad_norm": 1.3445216417312622, "learning_rate": 5.4355731502506035e-06, "loss": 0.5633, "step": 4435 }, { "epoch": 0.6616451636960251, "grad_norm": 1.3772855997085571, "learning_rate": 5.431274635363268e-06, "loss": 0.5666, "step": 4436 }, { "epoch": 0.6617943172496085, "grad_norm": 1.6004760265350342, "learning_rate": 5.426977187074158e-06, "loss": 0.6074, "step": 4437 }, { "epoch": 0.6619434708031919, "grad_norm": 1.7414216995239258, "learning_rate": 5.422680806386544e-06, "loss": 0.6542, "step": 4438 }, { "epoch": 0.6620926243567753, "grad_norm": 1.4038429260253906, "learning_rate": 5.418385494303453e-06, "loss": 0.5408, "step": 4439 }, { "epoch": 0.6622417779103588, "grad_norm": 1.389970064163208, "learning_rate": 5.414091251827642e-06, "loss": 0.4745, "step": 4440 }, { "epoch": 0.6623909314639421, "grad_norm": 1.269396424293518, "learning_rate": 5.409798079961632e-06, "loss": 0.5895, "step": 4441 }, { "epoch": 0.6625400850175256, "grad_norm": 1.356675148010254, "learning_rate": 5.405505979707698e-06, "loss": 0.607, "step": 4442 }, { "epoch": 0.662689238571109, "grad_norm": 1.606412410736084, "learning_rate": 5.401214952067849e-06, "loss": 0.5602, "step": 4443 }, { "epoch": 0.6628383921246924, "grad_norm": 1.259192705154419, "learning_rate": 5.396924998043858e-06, "loss": 0.5583, "step": 4444 }, { "epoch": 0.6629875456782758, "grad_norm": 1.7896318435668945, "learning_rate": 5.392636118637242e-06, "loss": 0.6672, "step": 4445 }, { "epoch": 0.6631366992318593, "grad_norm": 1.2831059694290161, "learning_rate": 5.388348314849261e-06, "loss": 0.5895, "step": 4446 }, { "epoch": 0.6632858527854426, "grad_norm": 1.6649161577224731, "learning_rate": 5.38406158768094e-06, "loss": 0.6697, "step": 4447 }, { "epoch": 0.6634350063390261, "grad_norm": 1.6964175701141357, "learning_rate": 5.379775938133043e-06, "loss": 0.5537, "step": 4448 }, { "epoch": 0.6635841598926094, "grad_norm": 1.5330265760421753, "learning_rate": 5.375491367206074e-06, "loss": 0.4559, "step": 4449 }, { "epoch": 0.6637333134461929, "grad_norm": 1.3339611291885376, "learning_rate": 5.371207875900298e-06, "loss": 0.623, "step": 4450 }, { "epoch": 0.6638824669997763, "grad_norm": 1.16057288646698, "learning_rate": 5.366925465215728e-06, "loss": 0.5962, "step": 4451 }, { "epoch": 0.6640316205533597, "grad_norm": 1.395755410194397, "learning_rate": 5.362644136152111e-06, "loss": 0.5888, "step": 4452 }, { "epoch": 0.6641807741069431, "grad_norm": 2.1142029762268066, "learning_rate": 5.358363889708954e-06, "loss": 0.6687, "step": 4453 }, { "epoch": 0.6643299276605266, "grad_norm": 1.4322807788848877, "learning_rate": 5.354084726885511e-06, "loss": 0.5553, "step": 4454 }, { "epoch": 0.6644790812141099, "grad_norm": 1.6056679487228394, "learning_rate": 5.349806648680778e-06, "loss": 0.6071, "step": 4455 }, { "epoch": 0.6646282347676934, "grad_norm": 1.3906559944152832, "learning_rate": 5.3455296560935e-06, "loss": 0.6314, "step": 4456 }, { "epoch": 0.6647773883212768, "grad_norm": 1.3265031576156616, "learning_rate": 5.34125375012217e-06, "loss": 0.5348, "step": 4457 }, { "epoch": 0.6649265418748602, "grad_norm": 1.6064345836639404, "learning_rate": 5.336978931765023e-06, "loss": 0.5872, "step": 4458 }, { "epoch": 0.6650756954284436, "grad_norm": 1.4669419527053833, "learning_rate": 5.332705202020043e-06, "loss": 0.5642, "step": 4459 }, { "epoch": 0.665224848982027, "grad_norm": 1.2578812837600708, "learning_rate": 5.328432561884962e-06, "loss": 0.5825, "step": 4460 }, { "epoch": 0.6653740025356104, "grad_norm": 1.3721076250076294, "learning_rate": 5.324161012357256e-06, "loss": 0.5937, "step": 4461 }, { "epoch": 0.6655231560891939, "grad_norm": 1.5140728950500488, "learning_rate": 5.31989055443414e-06, "loss": 0.5617, "step": 4462 }, { "epoch": 0.6656723096427772, "grad_norm": 1.603834867477417, "learning_rate": 5.315621189112582e-06, "loss": 0.5583, "step": 4463 }, { "epoch": 0.6658214631963607, "grad_norm": 1.718348503112793, "learning_rate": 5.3113529173892945e-06, "loss": 0.5416, "step": 4464 }, { "epoch": 0.6659706167499441, "grad_norm": 1.3783663511276245, "learning_rate": 5.307085740260731e-06, "loss": 0.567, "step": 4465 }, { "epoch": 0.6661197703035275, "grad_norm": 1.4092893600463867, "learning_rate": 5.302819658723095e-06, "loss": 0.6008, "step": 4466 }, { "epoch": 0.6662689238571109, "grad_norm": 1.2408419847488403, "learning_rate": 5.29855467377233e-06, "loss": 0.6025, "step": 4467 }, { "epoch": 0.6664180774106944, "grad_norm": 1.5323870182037354, "learning_rate": 5.294290786404119e-06, "loss": 0.5936, "step": 4468 }, { "epoch": 0.6665672309642777, "grad_norm": 1.3374019861221313, "learning_rate": 5.290027997613898e-06, "loss": 0.5338, "step": 4469 }, { "epoch": 0.6667163845178611, "grad_norm": 1.3300294876098633, "learning_rate": 5.285766308396845e-06, "loss": 0.5685, "step": 4470 }, { "epoch": 0.6668655380714446, "grad_norm": 1.9795348644256592, "learning_rate": 5.28150571974787e-06, "loss": 0.5986, "step": 4471 }, { "epoch": 0.6670146916250279, "grad_norm": 1.5558327436447144, "learning_rate": 5.277246232661641e-06, "loss": 0.5424, "step": 4472 }, { "epoch": 0.6671638451786114, "grad_norm": 1.2444924116134644, "learning_rate": 5.272987848132562e-06, "loss": 0.5305, "step": 4473 }, { "epoch": 0.6673129987321947, "grad_norm": 1.4813265800476074, "learning_rate": 5.268730567154778e-06, "loss": 0.6195, "step": 4474 }, { "epoch": 0.6674621522857782, "grad_norm": 1.406427025794983, "learning_rate": 5.264474390722181e-06, "loss": 0.5509, "step": 4475 }, { "epoch": 0.6676113058393616, "grad_norm": 1.3825249671936035, "learning_rate": 5.260219319828405e-06, "loss": 0.5881, "step": 4476 }, { "epoch": 0.667760459392945, "grad_norm": 1.8643237352371216, "learning_rate": 5.2559653554668184e-06, "loss": 0.6261, "step": 4477 }, { "epoch": 0.6679096129465284, "grad_norm": 1.4815999269485474, "learning_rate": 5.251712498630537e-06, "loss": 0.5311, "step": 4478 }, { "epoch": 0.6680587665001119, "grad_norm": 1.8406318426132202, "learning_rate": 5.24746075031242e-06, "loss": 0.6873, "step": 4479 }, { "epoch": 0.6682079200536952, "grad_norm": 1.4083455801010132, "learning_rate": 5.243210111505068e-06, "loss": 0.5277, "step": 4480 }, { "epoch": 0.6683570736072787, "grad_norm": 1.276590347290039, "learning_rate": 5.238960583200807e-06, "loss": 0.5818, "step": 4481 }, { "epoch": 0.668506227160862, "grad_norm": 1.4135445356369019, "learning_rate": 5.234712166391735e-06, "loss": 0.5122, "step": 4482 }, { "epoch": 0.6686553807144455, "grad_norm": 1.4000935554504395, "learning_rate": 5.230464862069658e-06, "loss": 0.5732, "step": 4483 }, { "epoch": 0.6688045342680289, "grad_norm": 1.4412872791290283, "learning_rate": 5.226218671226142e-06, "loss": 0.5803, "step": 4484 }, { "epoch": 0.6689536878216124, "grad_norm": 1.4561951160430908, "learning_rate": 5.221973594852488e-06, "loss": 0.6537, "step": 4485 }, { "epoch": 0.6691028413751957, "grad_norm": 1.5513908863067627, "learning_rate": 5.217729633939737e-06, "loss": 0.5895, "step": 4486 }, { "epoch": 0.6692519949287792, "grad_norm": 0.8564832210540771, "learning_rate": 5.213486789478665e-06, "loss": 0.6234, "step": 4487 }, { "epoch": 0.6694011484823625, "grad_norm": 3.789811134338379, "learning_rate": 5.209245062459791e-06, "loss": 0.5319, "step": 4488 }, { "epoch": 0.669550302035946, "grad_norm": 1.331092357635498, "learning_rate": 5.205004453873381e-06, "loss": 0.572, "step": 4489 }, { "epoch": 0.6696994555895294, "grad_norm": 1.4591927528381348, "learning_rate": 5.2007649647094195e-06, "loss": 0.6061, "step": 4490 }, { "epoch": 0.6698486091431128, "grad_norm": 1.4839650392532349, "learning_rate": 5.196526595957654e-06, "loss": 0.5679, "step": 4491 }, { "epoch": 0.6699977626966962, "grad_norm": 1.4603031873703003, "learning_rate": 5.192289348607557e-06, "loss": 0.6053, "step": 4492 }, { "epoch": 0.6701469162502797, "grad_norm": 1.4000024795532227, "learning_rate": 5.188053223648337e-06, "loss": 0.5531, "step": 4493 }, { "epoch": 0.670296069803863, "grad_norm": 1.3058735132217407, "learning_rate": 5.183818222068944e-06, "loss": 0.542, "step": 4494 }, { "epoch": 0.6704452233574465, "grad_norm": 1.3005915880203247, "learning_rate": 5.179584344858074e-06, "loss": 0.5265, "step": 4495 }, { "epoch": 0.6705943769110299, "grad_norm": 1.3159810304641724, "learning_rate": 5.175351593004143e-06, "loss": 0.5618, "step": 4496 }, { "epoch": 0.6707435304646133, "grad_norm": 1.2818801403045654, "learning_rate": 5.171119967495319e-06, "loss": 0.5416, "step": 4497 }, { "epoch": 0.6708926840181967, "grad_norm": 1.3230814933776855, "learning_rate": 5.1668894693195045e-06, "loss": 0.6225, "step": 4498 }, { "epoch": 0.6710418375717802, "grad_norm": 1.5293235778808594, "learning_rate": 5.162660099464327e-06, "loss": 0.6725, "step": 4499 }, { "epoch": 0.6711909911253635, "grad_norm": 0.8423118591308594, "learning_rate": 5.158431858917169e-06, "loss": 0.6548, "step": 4500 }, { "epoch": 0.671340144678947, "grad_norm": 1.3550752401351929, "learning_rate": 5.1542047486651415e-06, "loss": 0.5832, "step": 4501 }, { "epoch": 0.6714892982325303, "grad_norm": 1.300261378288269, "learning_rate": 5.149978769695084e-06, "loss": 0.5409, "step": 4502 }, { "epoch": 0.6716384517861138, "grad_norm": 0.8003860712051392, "learning_rate": 5.145753922993582e-06, "loss": 0.5952, "step": 4503 }, { "epoch": 0.6717876053396972, "grad_norm": 1.3678241968154907, "learning_rate": 5.141530209546954e-06, "loss": 0.5577, "step": 4504 }, { "epoch": 0.6719367588932806, "grad_norm": 1.3291672468185425, "learning_rate": 5.137307630341248e-06, "loss": 0.5482, "step": 4505 }, { "epoch": 0.672085912446864, "grad_norm": 1.5663046836853027, "learning_rate": 5.133086186362257e-06, "loss": 0.6162, "step": 4506 }, { "epoch": 0.6722350660004475, "grad_norm": 1.323333501815796, "learning_rate": 5.128865878595502e-06, "loss": 0.5381, "step": 4507 }, { "epoch": 0.6723842195540308, "grad_norm": 1.1564428806304932, "learning_rate": 5.124646708026241e-06, "loss": 0.5968, "step": 4508 }, { "epoch": 0.6725333731076143, "grad_norm": 1.4008082151412964, "learning_rate": 5.120428675639466e-06, "loss": 0.4349, "step": 4509 }, { "epoch": 0.6726825266611977, "grad_norm": 1.4307520389556885, "learning_rate": 5.116211782419911e-06, "loss": 0.6076, "step": 4510 }, { "epoch": 0.6728316802147811, "grad_norm": 1.2897320985794067, "learning_rate": 5.111996029352025e-06, "loss": 0.4638, "step": 4511 }, { "epoch": 0.6729808337683645, "grad_norm": 1.3429343700408936, "learning_rate": 5.107781417420008e-06, "loss": 0.5914, "step": 4512 }, { "epoch": 0.673129987321948, "grad_norm": 1.2458144426345825, "learning_rate": 5.103567947607788e-06, "loss": 0.5254, "step": 4513 }, { "epoch": 0.6732791408755313, "grad_norm": 1.483436942100525, "learning_rate": 5.099355620899032e-06, "loss": 0.5559, "step": 4514 }, { "epoch": 0.6734282944291148, "grad_norm": 1.2581236362457275, "learning_rate": 5.095144438277124e-06, "loss": 0.581, "step": 4515 }, { "epoch": 0.6735774479826981, "grad_norm": 1.2782554626464844, "learning_rate": 5.090934400725194e-06, "loss": 0.6057, "step": 4516 }, { "epoch": 0.6737266015362816, "grad_norm": 1.3235774040222168, "learning_rate": 5.086725509226111e-06, "loss": 0.5966, "step": 4517 }, { "epoch": 0.673875755089865, "grad_norm": 1.6253416538238525, "learning_rate": 5.08251776476246e-06, "loss": 0.5336, "step": 4518 }, { "epoch": 0.6740249086434484, "grad_norm": 1.770124077796936, "learning_rate": 5.0783111683165676e-06, "loss": 0.5712, "step": 4519 }, { "epoch": 0.6741740621970318, "grad_norm": 0.8984549045562744, "learning_rate": 5.074105720870495e-06, "loss": 0.6427, "step": 4520 }, { "epoch": 0.6743232157506153, "grad_norm": 1.4895317554473877, "learning_rate": 5.069901423406023e-06, "loss": 0.5559, "step": 4521 }, { "epoch": 0.6744723693041986, "grad_norm": 1.3620386123657227, "learning_rate": 5.065698276904676e-06, "loss": 0.4621, "step": 4522 }, { "epoch": 0.6746215228577821, "grad_norm": 1.4542325735092163, "learning_rate": 5.061496282347709e-06, "loss": 0.5398, "step": 4523 }, { "epoch": 0.6747706764113655, "grad_norm": 1.3997403383255005, "learning_rate": 5.0572954407160954e-06, "loss": 0.519, "step": 4524 }, { "epoch": 0.6749198299649489, "grad_norm": 1.335997223854065, "learning_rate": 5.0530957529905515e-06, "loss": 0.5884, "step": 4525 }, { "epoch": 0.6750689835185323, "grad_norm": 1.6050059795379639, "learning_rate": 5.048897220151532e-06, "loss": 0.6856, "step": 4526 }, { "epoch": 0.6752181370721158, "grad_norm": 2.4854118824005127, "learning_rate": 5.044699843179197e-06, "loss": 0.4865, "step": 4527 }, { "epoch": 0.6753672906256991, "grad_norm": 1.4670053720474243, "learning_rate": 5.040503623053458e-06, "loss": 0.594, "step": 4528 }, { "epoch": 0.6755164441792826, "grad_norm": 1.2850124835968018, "learning_rate": 5.036308560753955e-06, "loss": 0.5163, "step": 4529 }, { "epoch": 0.6756655977328659, "grad_norm": 1.259742259979248, "learning_rate": 5.03211465726004e-06, "loss": 0.6179, "step": 4530 }, { "epoch": 0.6758147512864494, "grad_norm": 1.5888967514038086, "learning_rate": 5.027921913550813e-06, "loss": 0.5399, "step": 4531 }, { "epoch": 0.6759639048400328, "grad_norm": 1.2917402982711792, "learning_rate": 5.023730330605095e-06, "loss": 0.5227, "step": 4532 }, { "epoch": 0.6761130583936162, "grad_norm": 0.8179947733879089, "learning_rate": 5.019539909401445e-06, "loss": 0.6668, "step": 4533 }, { "epoch": 0.6762622119471996, "grad_norm": 1.3492399454116821, "learning_rate": 5.015350650918129e-06, "loss": 0.5295, "step": 4534 }, { "epoch": 0.6764113655007831, "grad_norm": 1.2932829856872559, "learning_rate": 5.011162556133174e-06, "loss": 0.5628, "step": 4535 }, { "epoch": 0.6765605190543664, "grad_norm": 0.845636248588562, "learning_rate": 5.006975626024304e-06, "loss": 0.6363, "step": 4536 }, { "epoch": 0.6767096726079499, "grad_norm": 1.574242115020752, "learning_rate": 5.00278986156899e-06, "loss": 0.6064, "step": 4537 }, { "epoch": 0.6768588261615333, "grad_norm": 1.3085378408432007, "learning_rate": 4.998605263744426e-06, "loss": 0.4866, "step": 4538 }, { "epoch": 0.6770079797151167, "grad_norm": 1.5793496370315552, "learning_rate": 4.994421833527536e-06, "loss": 0.6275, "step": 4539 }, { "epoch": 0.6771571332687001, "grad_norm": 1.2679316997528076, "learning_rate": 4.99023957189496e-06, "loss": 0.5164, "step": 4540 }, { "epoch": 0.6773062868222836, "grad_norm": 1.1671373844146729, "learning_rate": 4.986058479823079e-06, "loss": 0.4444, "step": 4541 }, { "epoch": 0.6774554403758669, "grad_norm": 1.5855294466018677, "learning_rate": 4.9818785582880006e-06, "loss": 0.5745, "step": 4542 }, { "epoch": 0.6776045939294504, "grad_norm": 1.4468353986740112, "learning_rate": 4.9776998082655405e-06, "loss": 0.6021, "step": 4543 }, { "epoch": 0.6777537474830337, "grad_norm": 1.594543695449829, "learning_rate": 4.973522230731267e-06, "loss": 0.6517, "step": 4544 }, { "epoch": 0.6779029010366172, "grad_norm": 1.4104684591293335, "learning_rate": 4.969345826660462e-06, "loss": 0.624, "step": 4545 }, { "epoch": 0.6780520545902006, "grad_norm": 1.627100944519043, "learning_rate": 4.9651705970281286e-06, "loss": 0.5464, "step": 4546 }, { "epoch": 0.678201208143784, "grad_norm": 1.3870856761932373, "learning_rate": 4.960996542809001e-06, "loss": 0.5474, "step": 4547 }, { "epoch": 0.6783503616973674, "grad_norm": 1.3669500350952148, "learning_rate": 4.956823664977545e-06, "loss": 0.5242, "step": 4548 }, { "epoch": 0.6784995152509509, "grad_norm": 1.4712153673171997, "learning_rate": 4.9526519645079376e-06, "loss": 0.5399, "step": 4549 }, { "epoch": 0.6786486688045342, "grad_norm": 1.244611144065857, "learning_rate": 4.948481442374093e-06, "loss": 0.5433, "step": 4550 }, { "epoch": 0.6787978223581177, "grad_norm": 1.2300021648406982, "learning_rate": 4.944312099549647e-06, "loss": 0.53, "step": 4551 }, { "epoch": 0.6789469759117011, "grad_norm": 1.5794525146484375, "learning_rate": 4.940143937007957e-06, "loss": 0.5605, "step": 4552 }, { "epoch": 0.6790961294652845, "grad_norm": 1.6939549446105957, "learning_rate": 4.935976955722109e-06, "loss": 0.5658, "step": 4553 }, { "epoch": 0.6792452830188679, "grad_norm": 1.4284462928771973, "learning_rate": 4.931811156664916e-06, "loss": 0.5759, "step": 4554 }, { "epoch": 0.6793944365724514, "grad_norm": 1.4685611724853516, "learning_rate": 4.927646540808903e-06, "loss": 0.5406, "step": 4555 }, { "epoch": 0.6795435901260347, "grad_norm": 0.8502655029296875, "learning_rate": 4.923483109126328e-06, "loss": 0.6339, "step": 4556 }, { "epoch": 0.6796927436796182, "grad_norm": 1.51195228099823, "learning_rate": 4.919320862589172e-06, "loss": 0.6083, "step": 4557 }, { "epoch": 0.6798418972332015, "grad_norm": 1.2913521528244019, "learning_rate": 4.915159802169143e-06, "loss": 0.593, "step": 4558 }, { "epoch": 0.679991050786785, "grad_norm": 1.3394553661346436, "learning_rate": 4.910999928837656e-06, "loss": 0.5932, "step": 4559 }, { "epoch": 0.6801402043403684, "grad_norm": 1.6827878952026367, "learning_rate": 4.906841243565869e-06, "loss": 0.5741, "step": 4560 }, { "epoch": 0.6802893578939518, "grad_norm": 1.375832438468933, "learning_rate": 4.90268374732465e-06, "loss": 0.6265, "step": 4561 }, { "epoch": 0.6804385114475352, "grad_norm": 1.5796136856079102, "learning_rate": 4.898527441084595e-06, "loss": 0.5603, "step": 4562 }, { "epoch": 0.6805876650011187, "grad_norm": 1.4302978515625, "learning_rate": 4.894372325816019e-06, "loss": 0.5916, "step": 4563 }, { "epoch": 0.680736818554702, "grad_norm": 1.2959742546081543, "learning_rate": 4.890218402488966e-06, "loss": 0.6162, "step": 4564 }, { "epoch": 0.6808859721082855, "grad_norm": 1.301602840423584, "learning_rate": 4.886065672073186e-06, "loss": 0.6613, "step": 4565 }, { "epoch": 0.6810351256618689, "grad_norm": 1.5723423957824707, "learning_rate": 4.881914135538166e-06, "loss": 0.5899, "step": 4566 }, { "epoch": 0.6811842792154523, "grad_norm": 1.4150015115737915, "learning_rate": 4.877763793853112e-06, "loss": 0.5246, "step": 4567 }, { "epoch": 0.6813334327690357, "grad_norm": 1.5754328966140747, "learning_rate": 4.8736146479869404e-06, "loss": 0.4787, "step": 4568 }, { "epoch": 0.6814825863226192, "grad_norm": 1.56873619556427, "learning_rate": 4.869466698908297e-06, "loss": 0.6119, "step": 4569 }, { "epoch": 0.6816317398762025, "grad_norm": 1.2882945537567139, "learning_rate": 4.865319947585556e-06, "loss": 0.559, "step": 4570 }, { "epoch": 0.681780893429786, "grad_norm": 1.4593244791030884, "learning_rate": 4.861174394986795e-06, "loss": 0.4937, "step": 4571 }, { "epoch": 0.6819300469833693, "grad_norm": 1.3052611351013184, "learning_rate": 4.857030042079821e-06, "loss": 0.5188, "step": 4572 }, { "epoch": 0.6820792005369528, "grad_norm": 1.4057705402374268, "learning_rate": 4.852886889832163e-06, "loss": 0.5216, "step": 4573 }, { "epoch": 0.6822283540905362, "grad_norm": 1.2765462398529053, "learning_rate": 4.848744939211062e-06, "loss": 0.6225, "step": 4574 }, { "epoch": 0.6823775076441196, "grad_norm": 1.1693897247314453, "learning_rate": 4.844604191183485e-06, "loss": 0.4648, "step": 4575 }, { "epoch": 0.682526661197703, "grad_norm": 1.5715374946594238, "learning_rate": 4.8404646467161184e-06, "loss": 0.649, "step": 4576 }, { "epoch": 0.6826758147512865, "grad_norm": 1.5383702516555786, "learning_rate": 4.836326306775357e-06, "loss": 0.4863, "step": 4577 }, { "epoch": 0.6828249683048698, "grad_norm": 1.3052093982696533, "learning_rate": 4.832189172327333e-06, "loss": 0.568, "step": 4578 }, { "epoch": 0.6829741218584533, "grad_norm": 1.3790003061294556, "learning_rate": 4.8280532443378855e-06, "loss": 0.5516, "step": 4579 }, { "epoch": 0.6831232754120367, "grad_norm": 1.1629568338394165, "learning_rate": 4.823918523772567e-06, "loss": 0.5391, "step": 4580 }, { "epoch": 0.6832724289656201, "grad_norm": 1.4062199592590332, "learning_rate": 4.819785011596659e-06, "loss": 0.5368, "step": 4581 }, { "epoch": 0.6834215825192035, "grad_norm": 1.3157973289489746, "learning_rate": 4.81565270877516e-06, "loss": 0.5998, "step": 4582 }, { "epoch": 0.683570736072787, "grad_norm": 1.4184261560440063, "learning_rate": 4.811521616272774e-06, "loss": 0.4713, "step": 4583 }, { "epoch": 0.6837198896263703, "grad_norm": 1.2788538932800293, "learning_rate": 4.807391735053936e-06, "loss": 0.6054, "step": 4584 }, { "epoch": 0.6838690431799538, "grad_norm": 1.37421452999115, "learning_rate": 4.8032630660827914e-06, "loss": 0.6135, "step": 4585 }, { "epoch": 0.6840181967335371, "grad_norm": 1.4426432847976685, "learning_rate": 4.799135610323207e-06, "loss": 0.5602, "step": 4586 }, { "epoch": 0.6841673502871206, "grad_norm": 1.3301665782928467, "learning_rate": 4.795009368738761e-06, "loss": 0.6123, "step": 4587 }, { "epoch": 0.684316503840704, "grad_norm": 1.3517926931381226, "learning_rate": 4.790884342292758e-06, "loss": 0.5308, "step": 4588 }, { "epoch": 0.6844656573942874, "grad_norm": 1.3347536325454712, "learning_rate": 4.7867605319482014e-06, "loss": 0.5658, "step": 4589 }, { "epoch": 0.6846148109478708, "grad_norm": 1.9062936305999756, "learning_rate": 4.782637938667825e-06, "loss": 0.6179, "step": 4590 }, { "epoch": 0.6847639645014543, "grad_norm": 1.9412184953689575, "learning_rate": 4.778516563414078e-06, "loss": 0.4857, "step": 4591 }, { "epoch": 0.6849131180550376, "grad_norm": 1.1944352388381958, "learning_rate": 4.7743964071491224e-06, "loss": 0.4488, "step": 4592 }, { "epoch": 0.6850622716086211, "grad_norm": 1.5629291534423828, "learning_rate": 4.770277470834829e-06, "loss": 0.5886, "step": 4593 }, { "epoch": 0.6852114251622045, "grad_norm": 1.4871481657028198, "learning_rate": 4.766159755432793e-06, "loss": 0.5792, "step": 4594 }, { "epoch": 0.6853605787157879, "grad_norm": 1.4950004816055298, "learning_rate": 4.762043261904321e-06, "loss": 0.5381, "step": 4595 }, { "epoch": 0.6855097322693713, "grad_norm": 1.5764660835266113, "learning_rate": 4.757927991210436e-06, "loss": 0.527, "step": 4596 }, { "epoch": 0.6856588858229548, "grad_norm": 1.26249361038208, "learning_rate": 4.753813944311873e-06, "loss": 0.5601, "step": 4597 }, { "epoch": 0.6858080393765381, "grad_norm": 1.2459880113601685, "learning_rate": 4.749701122169089e-06, "loss": 0.5428, "step": 4598 }, { "epoch": 0.6859571929301216, "grad_norm": 1.2948627471923828, "learning_rate": 4.745589525742238e-06, "loss": 0.5935, "step": 4599 }, { "epoch": 0.6861063464837049, "grad_norm": 1.4270763397216797, "learning_rate": 4.741479155991204e-06, "loss": 0.6752, "step": 4600 }, { "epoch": 0.6862555000372884, "grad_norm": 1.424700379371643, "learning_rate": 4.737370013875583e-06, "loss": 0.591, "step": 4601 }, { "epoch": 0.6864046535908718, "grad_norm": 1.5006258487701416, "learning_rate": 4.7332621003546716e-06, "loss": 0.6017, "step": 4602 }, { "epoch": 0.6865538071444552, "grad_norm": 1.190105676651001, "learning_rate": 4.729155416387495e-06, "loss": 0.5629, "step": 4603 }, { "epoch": 0.6867029606980386, "grad_norm": 1.535428762435913, "learning_rate": 4.725049962932782e-06, "loss": 0.6019, "step": 4604 }, { "epoch": 0.6868521142516221, "grad_norm": 1.2678868770599365, "learning_rate": 4.720945740948979e-06, "loss": 0.5746, "step": 4605 }, { "epoch": 0.6870012678052054, "grad_norm": 0.8583310842514038, "learning_rate": 4.716842751394241e-06, "loss": 0.6531, "step": 4606 }, { "epoch": 0.6871504213587889, "grad_norm": 1.3225393295288086, "learning_rate": 4.7127409952264445e-06, "loss": 0.5007, "step": 4607 }, { "epoch": 0.6872995749123723, "grad_norm": 1.61417818069458, "learning_rate": 4.70864047340316e-06, "loss": 0.5967, "step": 4608 }, { "epoch": 0.6874487284659557, "grad_norm": 1.4162354469299316, "learning_rate": 4.704541186881685e-06, "loss": 0.6152, "step": 4609 }, { "epoch": 0.6875978820195391, "grad_norm": 1.608886480331421, "learning_rate": 4.700443136619024e-06, "loss": 0.5437, "step": 4610 }, { "epoch": 0.6877470355731226, "grad_norm": 1.2627547979354858, "learning_rate": 4.696346323571899e-06, "loss": 0.5017, "step": 4611 }, { "epoch": 0.6878961891267059, "grad_norm": 1.2960822582244873, "learning_rate": 4.692250748696723e-06, "loss": 0.5697, "step": 4612 }, { "epoch": 0.6880453426802894, "grad_norm": 1.4944226741790771, "learning_rate": 4.688156412949651e-06, "loss": 0.5739, "step": 4613 }, { "epoch": 0.6881944962338727, "grad_norm": 1.3736287355422974, "learning_rate": 4.684063317286521e-06, "loss": 0.5126, "step": 4614 }, { "epoch": 0.6883436497874562, "grad_norm": 1.8318454027175903, "learning_rate": 4.679971462662896e-06, "loss": 0.5733, "step": 4615 }, { "epoch": 0.6884928033410396, "grad_norm": 1.4140965938568115, "learning_rate": 4.675880850034045e-06, "loss": 0.627, "step": 4616 }, { "epoch": 0.688641956894623, "grad_norm": 1.3723039627075195, "learning_rate": 4.67179148035495e-06, "loss": 0.6924, "step": 4617 }, { "epoch": 0.6887911104482064, "grad_norm": 1.277951717376709, "learning_rate": 4.667703354580297e-06, "loss": 0.5684, "step": 4618 }, { "epoch": 0.6889402640017899, "grad_norm": 1.3550556898117065, "learning_rate": 4.663616473664485e-06, "loss": 0.6406, "step": 4619 }, { "epoch": 0.6890894175553732, "grad_norm": 1.4545941352844238, "learning_rate": 4.659530838561629e-06, "loss": 0.6063, "step": 4620 }, { "epoch": 0.6892385711089567, "grad_norm": 1.2404295206069946, "learning_rate": 4.6554464502255345e-06, "loss": 0.5314, "step": 4621 }, { "epoch": 0.6893877246625401, "grad_norm": 1.166969895362854, "learning_rate": 4.65136330960974e-06, "loss": 0.5681, "step": 4622 }, { "epoch": 0.6895368782161235, "grad_norm": 1.249638557434082, "learning_rate": 4.64728141766748e-06, "loss": 0.5673, "step": 4623 }, { "epoch": 0.6896860317697069, "grad_norm": 1.3079816102981567, "learning_rate": 4.6432007753516904e-06, "loss": 0.5628, "step": 4624 }, { "epoch": 0.6898351853232904, "grad_norm": 0.8959793448448181, "learning_rate": 4.6391213836150284e-06, "loss": 0.663, "step": 4625 }, { "epoch": 0.6899843388768737, "grad_norm": 0.8588896989822388, "learning_rate": 4.635043243409857e-06, "loss": 0.6238, "step": 4626 }, { "epoch": 0.6901334924304572, "grad_norm": 1.6195759773254395, "learning_rate": 4.6309663556882365e-06, "loss": 0.5758, "step": 4627 }, { "epoch": 0.6902826459840405, "grad_norm": 1.4064058065414429, "learning_rate": 4.626890721401948e-06, "loss": 0.5837, "step": 4628 }, { "epoch": 0.690431799537624, "grad_norm": 1.4236140251159668, "learning_rate": 4.622816341502475e-06, "loss": 0.6231, "step": 4629 }, { "epoch": 0.6905809530912074, "grad_norm": 1.2794365882873535, "learning_rate": 4.618743216941e-06, "loss": 0.6062, "step": 4630 }, { "epoch": 0.6907301066447908, "grad_norm": 1.3316409587860107, "learning_rate": 4.614671348668429e-06, "loss": 0.535, "step": 4631 }, { "epoch": 0.6908792601983742, "grad_norm": 1.347335696220398, "learning_rate": 4.610600737635367e-06, "loss": 0.5461, "step": 4632 }, { "epoch": 0.6910284137519577, "grad_norm": 1.3540616035461426, "learning_rate": 4.606531384792114e-06, "loss": 0.5964, "step": 4633 }, { "epoch": 0.691177567305541, "grad_norm": 1.3515057563781738, "learning_rate": 4.602463291088695e-06, "loss": 0.5381, "step": 4634 }, { "epoch": 0.6913267208591245, "grad_norm": 1.30028235912323, "learning_rate": 4.5983964574748315e-06, "loss": 0.5891, "step": 4635 }, { "epoch": 0.6914758744127079, "grad_norm": 1.779730200767517, "learning_rate": 4.594330884899948e-06, "loss": 0.5647, "step": 4636 }, { "epoch": 0.6916250279662913, "grad_norm": 1.5571852922439575, "learning_rate": 4.59026657431318e-06, "loss": 0.5197, "step": 4637 }, { "epoch": 0.6917741815198747, "grad_norm": 1.3312032222747803, "learning_rate": 4.586203526663368e-06, "loss": 0.6104, "step": 4638 }, { "epoch": 0.6919233350734582, "grad_norm": 1.3298927545547485, "learning_rate": 4.582141742899056e-06, "loss": 0.644, "step": 4639 }, { "epoch": 0.6920724886270415, "grad_norm": 1.2446848154067993, "learning_rate": 4.578081223968494e-06, "loss": 0.5713, "step": 4640 }, { "epoch": 0.692221642180625, "grad_norm": 1.429821491241455, "learning_rate": 4.574021970819635e-06, "loss": 0.5573, "step": 4641 }, { "epoch": 0.6923707957342083, "grad_norm": 1.4303771257400513, "learning_rate": 4.569963984400143e-06, "loss": 0.6453, "step": 4642 }, { "epoch": 0.6925199492877918, "grad_norm": 1.6298019886016846, "learning_rate": 4.565907265657372e-06, "loss": 0.6297, "step": 4643 }, { "epoch": 0.6926691028413752, "grad_norm": 1.6020582914352417, "learning_rate": 4.561851815538394e-06, "loss": 0.5936, "step": 4644 }, { "epoch": 0.6928182563949586, "grad_norm": 1.5143290758132935, "learning_rate": 4.557797634989982e-06, "loss": 0.5786, "step": 4645 }, { "epoch": 0.692967409948542, "grad_norm": 1.4478330612182617, "learning_rate": 4.553744724958605e-06, "loss": 0.5955, "step": 4646 }, { "epoch": 0.6931165635021255, "grad_norm": 1.2621644735336304, "learning_rate": 4.54969308639044e-06, "loss": 0.6649, "step": 4647 }, { "epoch": 0.6932657170557088, "grad_norm": 1.3110201358795166, "learning_rate": 4.545642720231378e-06, "loss": 0.4963, "step": 4648 }, { "epoch": 0.6934148706092923, "grad_norm": 1.5700910091400146, "learning_rate": 4.541593627426993e-06, "loss": 0.628, "step": 4649 }, { "epoch": 0.6935640241628757, "grad_norm": 1.2844935655593872, "learning_rate": 4.537545808922577e-06, "loss": 0.657, "step": 4650 }, { "epoch": 0.6937131777164591, "grad_norm": 1.6589221954345703, "learning_rate": 4.5334992656631184e-06, "loss": 0.5394, "step": 4651 }, { "epoch": 0.6938623312700425, "grad_norm": 1.562371850013733, "learning_rate": 4.529453998593305e-06, "loss": 0.632, "step": 4652 }, { "epoch": 0.694011484823626, "grad_norm": 1.5104340314865112, "learning_rate": 4.525410008657534e-06, "loss": 0.5854, "step": 4653 }, { "epoch": 0.6941606383772093, "grad_norm": 1.302764892578125, "learning_rate": 4.521367296799902e-06, "loss": 0.6132, "step": 4654 }, { "epoch": 0.6943097919307928, "grad_norm": 1.3193820714950562, "learning_rate": 4.517325863964201e-06, "loss": 0.6314, "step": 4655 }, { "epoch": 0.6944589454843761, "grad_norm": 0.8520970940589905, "learning_rate": 4.5132857110939275e-06, "loss": 0.6022, "step": 4656 }, { "epoch": 0.6946080990379596, "grad_norm": 1.1914666891098022, "learning_rate": 4.509246839132294e-06, "loss": 0.4904, "step": 4657 }, { "epoch": 0.694757252591543, "grad_norm": 1.7134387493133545, "learning_rate": 4.5052092490221885e-06, "loss": 0.6251, "step": 4658 }, { "epoch": 0.6949064061451264, "grad_norm": 1.4954564571380615, "learning_rate": 4.501172941706218e-06, "loss": 0.5596, "step": 4659 }, { "epoch": 0.6950555596987098, "grad_norm": 1.4747157096862793, "learning_rate": 4.497137918126685e-06, "loss": 0.5507, "step": 4660 }, { "epoch": 0.6952047132522933, "grad_norm": 1.390127420425415, "learning_rate": 4.4931041792255855e-06, "loss": 0.5756, "step": 4661 }, { "epoch": 0.6953538668058766, "grad_norm": 0.8353344202041626, "learning_rate": 4.489071725944627e-06, "loss": 0.6296, "step": 4662 }, { "epoch": 0.6955030203594601, "grad_norm": 1.341407299041748, "learning_rate": 4.485040559225211e-06, "loss": 0.566, "step": 4663 }, { "epoch": 0.6956521739130435, "grad_norm": 1.2221087217330933, "learning_rate": 4.48101068000844e-06, "loss": 0.5232, "step": 4664 }, { "epoch": 0.6958013274666269, "grad_norm": 0.8165313601493835, "learning_rate": 4.476982089235109e-06, "loss": 0.6444, "step": 4665 }, { "epoch": 0.6959504810202103, "grad_norm": 1.3535380363464355, "learning_rate": 4.472954787845729e-06, "loss": 0.538, "step": 4666 }, { "epoch": 0.6960996345737938, "grad_norm": 1.4557580947875977, "learning_rate": 4.468928776780489e-06, "loss": 0.6046, "step": 4667 }, { "epoch": 0.6962487881273771, "grad_norm": 1.4636403322219849, "learning_rate": 4.464904056979293e-06, "loss": 0.6131, "step": 4668 }, { "epoch": 0.6963979416809606, "grad_norm": 1.4063093662261963, "learning_rate": 4.460880629381736e-06, "loss": 0.568, "step": 4669 }, { "epoch": 0.696547095234544, "grad_norm": 1.3879687786102295, "learning_rate": 4.456858494927116e-06, "loss": 0.4894, "step": 4670 }, { "epoch": 0.6966962487881274, "grad_norm": 1.310115933418274, "learning_rate": 4.452837654554419e-06, "loss": 0.5347, "step": 4671 }, { "epoch": 0.6968454023417108, "grad_norm": 1.4478917121887207, "learning_rate": 4.448818109202341e-06, "loss": 0.6104, "step": 4672 }, { "epoch": 0.6969945558952942, "grad_norm": 1.3897476196289062, "learning_rate": 4.444799859809274e-06, "loss": 0.5471, "step": 4673 }, { "epoch": 0.6971437094488776, "grad_norm": 1.3980711698532104, "learning_rate": 4.440782907313291e-06, "loss": 0.4941, "step": 4674 }, { "epoch": 0.6972928630024611, "grad_norm": 1.2523415088653564, "learning_rate": 4.436767252652189e-06, "loss": 0.5518, "step": 4675 }, { "epoch": 0.6974420165560444, "grad_norm": 1.358669638633728, "learning_rate": 4.432752896763447e-06, "loss": 0.5923, "step": 4676 }, { "epoch": 0.6975911701096279, "grad_norm": 1.4176374673843384, "learning_rate": 4.428739840584235e-06, "loss": 0.555, "step": 4677 }, { "epoch": 0.6977403236632113, "grad_norm": 1.364169955253601, "learning_rate": 4.42472808505143e-06, "loss": 0.6205, "step": 4678 }, { "epoch": 0.6978894772167947, "grad_norm": 1.5069520473480225, "learning_rate": 4.420717631101607e-06, "loss": 0.6113, "step": 4679 }, { "epoch": 0.6980386307703781, "grad_norm": 1.4685486555099487, "learning_rate": 4.416708479671022e-06, "loss": 0.6067, "step": 4680 }, { "epoch": 0.6981877843239616, "grad_norm": 1.8556591272354126, "learning_rate": 4.412700631695645e-06, "loss": 0.6026, "step": 4681 }, { "epoch": 0.6983369378775449, "grad_norm": 2.143237352371216, "learning_rate": 4.4086940881111294e-06, "loss": 0.5377, "step": 4682 }, { "epoch": 0.6984860914311284, "grad_norm": 1.4589389562606812, "learning_rate": 4.404688849852832e-06, "loss": 0.5561, "step": 4683 }, { "epoch": 0.6986352449847117, "grad_norm": 1.5411007404327393, "learning_rate": 4.4006849178558e-06, "loss": 0.5901, "step": 4684 }, { "epoch": 0.6987843985382952, "grad_norm": 1.3394775390625, "learning_rate": 4.396682293054779e-06, "loss": 0.5979, "step": 4685 }, { "epoch": 0.6989335520918786, "grad_norm": 1.2520214319229126, "learning_rate": 4.392680976384204e-06, "loss": 0.5742, "step": 4686 }, { "epoch": 0.699082705645462, "grad_norm": 1.3089591264724731, "learning_rate": 4.388680968778207e-06, "loss": 0.5661, "step": 4687 }, { "epoch": 0.6992318591990454, "grad_norm": 1.3002326488494873, "learning_rate": 4.384682271170619e-06, "loss": 0.5633, "step": 4688 }, { "epoch": 0.6993810127526289, "grad_norm": 1.382412314414978, "learning_rate": 4.380684884494965e-06, "loss": 0.544, "step": 4689 }, { "epoch": 0.6995301663062122, "grad_norm": 1.3568047285079956, "learning_rate": 4.376688809684452e-06, "loss": 0.5734, "step": 4690 }, { "epoch": 0.6996793198597957, "grad_norm": 1.268717646598816, "learning_rate": 4.3726940476719925e-06, "loss": 0.6159, "step": 4691 }, { "epoch": 0.6998284734133791, "grad_norm": 2.826936960220337, "learning_rate": 4.3687005993901895e-06, "loss": 0.5924, "step": 4692 }, { "epoch": 0.6999776269669625, "grad_norm": 1.2808183431625366, "learning_rate": 4.364708465771341e-06, "loss": 0.4808, "step": 4693 }, { "epoch": 0.7001267805205459, "grad_norm": 1.572896957397461, "learning_rate": 4.360717647747434e-06, "loss": 0.5691, "step": 4694 }, { "epoch": 0.7002759340741294, "grad_norm": 1.3573520183563232, "learning_rate": 4.3567281462501555e-06, "loss": 0.5373, "step": 4695 }, { "epoch": 0.7004250876277127, "grad_norm": 1.348266363143921, "learning_rate": 4.352739962210872e-06, "loss": 0.543, "step": 4696 }, { "epoch": 0.7005742411812962, "grad_norm": 1.4807896614074707, "learning_rate": 4.348753096560655e-06, "loss": 0.6185, "step": 4697 }, { "epoch": 0.7007233947348795, "grad_norm": 1.3307266235351562, "learning_rate": 4.344767550230268e-06, "loss": 0.6356, "step": 4698 }, { "epoch": 0.700872548288463, "grad_norm": 1.2946139574050903, "learning_rate": 4.340783324150153e-06, "loss": 0.5289, "step": 4699 }, { "epoch": 0.7010217018420464, "grad_norm": 1.337578296661377, "learning_rate": 4.3368004192504554e-06, "loss": 0.559, "step": 4700 }, { "epoch": 0.7011708553956298, "grad_norm": 1.3389586210250854, "learning_rate": 4.332818836461019e-06, "loss": 0.5821, "step": 4701 }, { "epoch": 0.7013200089492132, "grad_norm": 1.2055760622024536, "learning_rate": 4.32883857671136e-06, "loss": 0.4475, "step": 4702 }, { "epoch": 0.7014691625027967, "grad_norm": 1.2842514514923096, "learning_rate": 4.3248596409306995e-06, "loss": 0.5888, "step": 4703 }, { "epoch": 0.70161831605638, "grad_norm": 1.5234965085983276, "learning_rate": 4.3208820300479495e-06, "loss": 0.5442, "step": 4704 }, { "epoch": 0.7017674696099635, "grad_norm": 1.4656023979187012, "learning_rate": 4.316905744991699e-06, "loss": 0.5814, "step": 4705 }, { "epoch": 0.7019166231635469, "grad_norm": 1.6756322383880615, "learning_rate": 4.312930786690244e-06, "loss": 0.5591, "step": 4706 }, { "epoch": 0.7020657767171303, "grad_norm": 1.356507420539856, "learning_rate": 4.308957156071565e-06, "loss": 0.571, "step": 4707 }, { "epoch": 0.7022149302707137, "grad_norm": 1.9161021709442139, "learning_rate": 4.304984854063326e-06, "loss": 0.6361, "step": 4708 }, { "epoch": 0.7023640838242972, "grad_norm": 1.464931607246399, "learning_rate": 4.301013881592885e-06, "loss": 0.5149, "step": 4709 }, { "epoch": 0.7025132373778805, "grad_norm": 1.2784806489944458, "learning_rate": 4.297044239587304e-06, "loss": 0.5449, "step": 4710 }, { "epoch": 0.702662390931464, "grad_norm": 1.2867265939712524, "learning_rate": 4.293075928973308e-06, "loss": 0.4659, "step": 4711 }, { "epoch": 0.7028115444850473, "grad_norm": 1.5613210201263428, "learning_rate": 4.28910895067733e-06, "loss": 0.5632, "step": 4712 }, { "epoch": 0.7029606980386308, "grad_norm": 1.676274061203003, "learning_rate": 4.285143305625489e-06, "loss": 0.5827, "step": 4713 }, { "epoch": 0.7031098515922142, "grad_norm": 1.4013971090316772, "learning_rate": 4.281178994743584e-06, "loss": 0.6314, "step": 4714 }, { "epoch": 0.7032590051457976, "grad_norm": 1.4093825817108154, "learning_rate": 4.277216018957112e-06, "loss": 0.5528, "step": 4715 }, { "epoch": 0.703408158699381, "grad_norm": 0.8418352007865906, "learning_rate": 4.273254379191255e-06, "loss": 0.6454, "step": 4716 }, { "epoch": 0.7035573122529645, "grad_norm": 1.2412070035934448, "learning_rate": 4.269294076370884e-06, "loss": 0.5987, "step": 4717 }, { "epoch": 0.7037064658065478, "grad_norm": 1.3047566413879395, "learning_rate": 4.265335111420554e-06, "loss": 0.5889, "step": 4718 }, { "epoch": 0.7038556193601313, "grad_norm": 1.386641025543213, "learning_rate": 4.26137748526452e-06, "loss": 0.602, "step": 4719 }, { "epoch": 0.7040047729137147, "grad_norm": 1.5445870161056519, "learning_rate": 4.257421198826703e-06, "loss": 0.5667, "step": 4720 }, { "epoch": 0.7041539264672981, "grad_norm": 1.5075448751449585, "learning_rate": 4.253466253030728e-06, "loss": 0.6212, "step": 4721 }, { "epoch": 0.7043030800208815, "grad_norm": 1.5445563793182373, "learning_rate": 4.249512648799904e-06, "loss": 0.5988, "step": 4722 }, { "epoch": 0.704452233574465, "grad_norm": 1.3451205492019653, "learning_rate": 4.245560387057228e-06, "loss": 0.5119, "step": 4723 }, { "epoch": 0.7046013871280483, "grad_norm": 1.376338243484497, "learning_rate": 4.241609468725374e-06, "loss": 0.5218, "step": 4724 }, { "epoch": 0.7047505406816318, "grad_norm": 1.4161254167556763, "learning_rate": 4.2376598947267124e-06, "loss": 0.4925, "step": 4725 }, { "epoch": 0.7048996942352151, "grad_norm": 1.6896679401397705, "learning_rate": 4.233711665983297e-06, "loss": 0.5882, "step": 4726 }, { "epoch": 0.7050488477887986, "grad_norm": 2.825220823287964, "learning_rate": 4.229764783416867e-06, "loss": 0.5716, "step": 4727 }, { "epoch": 0.705198001342382, "grad_norm": 1.3446253538131714, "learning_rate": 4.225819247948846e-06, "loss": 0.5349, "step": 4728 }, { "epoch": 0.7053471548959654, "grad_norm": 1.23496413230896, "learning_rate": 4.22187506050035e-06, "loss": 0.5235, "step": 4729 }, { "epoch": 0.7054963084495488, "grad_norm": 1.4347388744354248, "learning_rate": 4.2179322219921684e-06, "loss": 0.5624, "step": 4730 }, { "epoch": 0.7056454620031323, "grad_norm": 1.2206448316574097, "learning_rate": 4.213990733344783e-06, "loss": 0.5395, "step": 4731 }, { "epoch": 0.7057946155567156, "grad_norm": 1.3439158201217651, "learning_rate": 4.210050595478365e-06, "loss": 0.5094, "step": 4732 }, { "epoch": 0.7059437691102991, "grad_norm": 1.6362208127975464, "learning_rate": 4.206111809312757e-06, "loss": 0.5288, "step": 4733 }, { "epoch": 0.7060929226638825, "grad_norm": 1.5162571668624878, "learning_rate": 4.202174375767498e-06, "loss": 0.6709, "step": 4734 }, { "epoch": 0.7062420762174659, "grad_norm": 1.4702404737472534, "learning_rate": 4.198238295761807e-06, "loss": 0.5594, "step": 4735 }, { "epoch": 0.7063912297710493, "grad_norm": 1.4943675994873047, "learning_rate": 4.194303570214586e-06, "loss": 0.7308, "step": 4736 }, { "epoch": 0.7065403833246328, "grad_norm": 1.309386134147644, "learning_rate": 4.1903702000444235e-06, "loss": 0.5492, "step": 4737 }, { "epoch": 0.7066895368782161, "grad_norm": 1.3579524755477905, "learning_rate": 4.1864381861695934e-06, "loss": 0.5189, "step": 4738 }, { "epoch": 0.7068386904317996, "grad_norm": 1.302778959274292, "learning_rate": 4.182507529508042e-06, "loss": 0.5548, "step": 4739 }, { "epoch": 0.706987843985383, "grad_norm": 1.1941708326339722, "learning_rate": 4.178578230977409e-06, "loss": 0.503, "step": 4740 }, { "epoch": 0.7071369975389664, "grad_norm": 0.8056668639183044, "learning_rate": 4.174650291495015e-06, "loss": 0.6256, "step": 4741 }, { "epoch": 0.7072861510925498, "grad_norm": 1.3991427421569824, "learning_rate": 4.170723711977867e-06, "loss": 0.624, "step": 4742 }, { "epoch": 0.7074353046461332, "grad_norm": 1.1954032182693481, "learning_rate": 4.166798493342642e-06, "loss": 0.523, "step": 4743 }, { "epoch": 0.7075844581997166, "grad_norm": 1.613294005393982, "learning_rate": 4.162874636505713e-06, "loss": 0.5952, "step": 4744 }, { "epoch": 0.7077336117533001, "grad_norm": 1.3064154386520386, "learning_rate": 4.1589521423831254e-06, "loss": 0.5323, "step": 4745 }, { "epoch": 0.7078827653068834, "grad_norm": 1.2373369932174683, "learning_rate": 4.1550310118906145e-06, "loss": 0.5675, "step": 4746 }, { "epoch": 0.7080319188604669, "grad_norm": 1.3453022241592407, "learning_rate": 4.151111245943592e-06, "loss": 0.515, "step": 4747 }, { "epoch": 0.7081810724140503, "grad_norm": 1.8526880741119385, "learning_rate": 4.1471928454571565e-06, "loss": 0.5808, "step": 4748 }, { "epoch": 0.7083302259676337, "grad_norm": 1.3100380897521973, "learning_rate": 4.143275811346076e-06, "loss": 0.5306, "step": 4749 }, { "epoch": 0.7084793795212171, "grad_norm": 1.1995830535888672, "learning_rate": 4.13936014452481e-06, "loss": 0.5839, "step": 4750 }, { "epoch": 0.7086285330748006, "grad_norm": 1.2719042301177979, "learning_rate": 4.1354458459075005e-06, "loss": 0.5493, "step": 4751 }, { "epoch": 0.7087776866283839, "grad_norm": 1.6558183431625366, "learning_rate": 4.131532916407955e-06, "loss": 0.5955, "step": 4752 }, { "epoch": 0.7089268401819674, "grad_norm": 0.838981032371521, "learning_rate": 4.127621356939683e-06, "loss": 0.6528, "step": 4753 }, { "epoch": 0.7090759937355507, "grad_norm": 1.386208415031433, "learning_rate": 4.1237111684158625e-06, "loss": 0.5378, "step": 4754 }, { "epoch": 0.7092251472891342, "grad_norm": 1.1985867023468018, "learning_rate": 4.119802351749346e-06, "loss": 0.5444, "step": 4755 }, { "epoch": 0.7093743008427176, "grad_norm": 1.5863537788391113, "learning_rate": 4.1158949078526734e-06, "loss": 0.6544, "step": 4756 }, { "epoch": 0.709523454396301, "grad_norm": 1.5003130435943604, "learning_rate": 4.111988837638067e-06, "loss": 0.6111, "step": 4757 }, { "epoch": 0.7096726079498844, "grad_norm": 1.3323363065719604, "learning_rate": 4.1080841420174175e-06, "loss": 0.469, "step": 4758 }, { "epoch": 0.7098217615034679, "grad_norm": 1.6440273523330688, "learning_rate": 4.104180821902305e-06, "loss": 0.5805, "step": 4759 }, { "epoch": 0.7099709150570512, "grad_norm": 1.3549585342407227, "learning_rate": 4.100278878203986e-06, "loss": 0.5443, "step": 4760 }, { "epoch": 0.7101200686106347, "grad_norm": 1.3748656511306763, "learning_rate": 4.096378311833386e-06, "loss": 0.5898, "step": 4761 }, { "epoch": 0.7102692221642181, "grad_norm": 1.2081068754196167, "learning_rate": 4.092479123701126e-06, "loss": 0.544, "step": 4762 }, { "epoch": 0.7104183757178015, "grad_norm": 1.3551613092422485, "learning_rate": 4.088581314717498e-06, "loss": 0.5887, "step": 4763 }, { "epoch": 0.7105675292713849, "grad_norm": 1.3168306350708008, "learning_rate": 4.084684885792462e-06, "loss": 0.5278, "step": 4764 }, { "epoch": 0.7107166828249684, "grad_norm": 1.5709612369537354, "learning_rate": 4.08078983783567e-06, "loss": 0.5265, "step": 4765 }, { "epoch": 0.7108658363785517, "grad_norm": 1.3346586227416992, "learning_rate": 4.076896171756444e-06, "loss": 0.523, "step": 4766 }, { "epoch": 0.7110149899321352, "grad_norm": 1.3430538177490234, "learning_rate": 4.073003888463789e-06, "loss": 0.5479, "step": 4767 }, { "epoch": 0.7111641434857185, "grad_norm": 1.381659746170044, "learning_rate": 4.069112988866377e-06, "loss": 0.4947, "step": 4768 }, { "epoch": 0.7113132970393019, "grad_norm": 1.4275949001312256, "learning_rate": 4.065223473872567e-06, "loss": 0.6381, "step": 4769 }, { "epoch": 0.7114624505928854, "grad_norm": 1.4533116817474365, "learning_rate": 4.061335344390391e-06, "loss": 0.6337, "step": 4770 }, { "epoch": 0.7116116041464687, "grad_norm": 1.5457756519317627, "learning_rate": 4.0574486013275586e-06, "loss": 0.6229, "step": 4771 }, { "epoch": 0.7117607577000522, "grad_norm": 1.499470829963684, "learning_rate": 4.053563245591452e-06, "loss": 0.6847, "step": 4772 }, { "epoch": 0.7119099112536356, "grad_norm": 1.2787284851074219, "learning_rate": 4.049679278089139e-06, "loss": 0.515, "step": 4773 }, { "epoch": 0.712059064807219, "grad_norm": 1.6334278583526611, "learning_rate": 4.045796699727349e-06, "loss": 0.5975, "step": 4774 }, { "epoch": 0.7122082183608024, "grad_norm": 1.4460736513137817, "learning_rate": 4.0419155114124985e-06, "loss": 0.5473, "step": 4775 }, { "epoch": 0.7123573719143859, "grad_norm": 1.3254449367523193, "learning_rate": 4.038035714050678e-06, "loss": 0.5683, "step": 4776 }, { "epoch": 0.7125065254679692, "grad_norm": 1.3786332607269287, "learning_rate": 4.034157308547645e-06, "loss": 0.6152, "step": 4777 }, { "epoch": 0.7126556790215527, "grad_norm": 2.0054948329925537, "learning_rate": 4.030280295808838e-06, "loss": 0.5594, "step": 4778 }, { "epoch": 0.712804832575136, "grad_norm": 1.5304639339447021, "learning_rate": 4.0264046767393815e-06, "loss": 0.524, "step": 4779 }, { "epoch": 0.7129539861287195, "grad_norm": 1.3256394863128662, "learning_rate": 4.022530452244052e-06, "loss": 0.5982, "step": 4780 }, { "epoch": 0.7131031396823029, "grad_norm": 1.5084857940673828, "learning_rate": 4.018657623227317e-06, "loss": 0.5379, "step": 4781 }, { "epoch": 0.7132522932358863, "grad_norm": 1.5170507431030273, "learning_rate": 4.0147861905933146e-06, "loss": 0.6241, "step": 4782 }, { "epoch": 0.7134014467894697, "grad_norm": 1.5234540700912476, "learning_rate": 4.010916155245851e-06, "loss": 0.5452, "step": 4783 }, { "epoch": 0.7135506003430532, "grad_norm": 1.2905069589614868, "learning_rate": 4.007047518088413e-06, "loss": 0.5223, "step": 4784 }, { "epoch": 0.7136997538966365, "grad_norm": 1.3606903553009033, "learning_rate": 4.003180280024163e-06, "loss": 0.5746, "step": 4785 }, { "epoch": 0.71384890745022, "grad_norm": 1.3889977931976318, "learning_rate": 3.9993144419559234e-06, "loss": 0.6271, "step": 4786 }, { "epoch": 0.7139980610038034, "grad_norm": 1.3394311666488647, "learning_rate": 3.995450004786201e-06, "loss": 0.5073, "step": 4787 }, { "epoch": 0.7141472145573868, "grad_norm": 1.3325475454330444, "learning_rate": 3.991586969417184e-06, "loss": 0.5742, "step": 4788 }, { "epoch": 0.7142963681109702, "grad_norm": 1.284583330154419, "learning_rate": 3.9877253367507104e-06, "loss": 0.5534, "step": 4789 }, { "epoch": 0.7144455216645537, "grad_norm": 1.334282636642456, "learning_rate": 3.98386510768831e-06, "loss": 0.4751, "step": 4790 }, { "epoch": 0.714594675218137, "grad_norm": 1.3211143016815186, "learning_rate": 3.980006283131178e-06, "loss": 0.6384, "step": 4791 }, { "epoch": 0.7147438287717205, "grad_norm": 1.2903186082839966, "learning_rate": 3.976148863980176e-06, "loss": 0.5445, "step": 4792 }, { "epoch": 0.7148929823253038, "grad_norm": 1.1890342235565186, "learning_rate": 3.972292851135847e-06, "loss": 0.5366, "step": 4793 }, { "epoch": 0.7150421358788873, "grad_norm": 1.347374677658081, "learning_rate": 3.9684382454984015e-06, "loss": 0.6284, "step": 4794 }, { "epoch": 0.7151912894324707, "grad_norm": 1.3864085674285889, "learning_rate": 3.9645850479677264e-06, "loss": 0.5052, "step": 4795 }, { "epoch": 0.7153404429860541, "grad_norm": 1.347923755645752, "learning_rate": 3.960733259443365e-06, "loss": 0.5368, "step": 4796 }, { "epoch": 0.7154895965396375, "grad_norm": 1.416996955871582, "learning_rate": 3.956882880824553e-06, "loss": 0.5218, "step": 4797 }, { "epoch": 0.715638750093221, "grad_norm": 1.6640278100967407, "learning_rate": 3.953033913010179e-06, "loss": 0.6201, "step": 4798 }, { "epoch": 0.7157879036468043, "grad_norm": 1.5663213729858398, "learning_rate": 3.949186356898811e-06, "loss": 0.5462, "step": 4799 }, { "epoch": 0.7159370572003878, "grad_norm": 1.5271270275115967, "learning_rate": 3.945340213388687e-06, "loss": 0.6171, "step": 4800 }, { "epoch": 0.7160862107539712, "grad_norm": 0.8360869884490967, "learning_rate": 3.941495483377714e-06, "loss": 0.6071, "step": 4801 }, { "epoch": 0.7162353643075546, "grad_norm": 0.8459926247596741, "learning_rate": 3.937652167763466e-06, "loss": 0.6326, "step": 4802 }, { "epoch": 0.716384517861138, "grad_norm": 1.3061378002166748, "learning_rate": 3.933810267443191e-06, "loss": 0.5088, "step": 4803 }, { "epoch": 0.7165336714147215, "grad_norm": 1.4352718591690063, "learning_rate": 3.9299697833138094e-06, "loss": 0.574, "step": 4804 }, { "epoch": 0.7166828249683048, "grad_norm": 1.2957425117492676, "learning_rate": 3.926130716271896e-06, "loss": 0.5186, "step": 4805 }, { "epoch": 0.7168319785218883, "grad_norm": 1.3020750284194946, "learning_rate": 3.9222930672137175e-06, "loss": 0.6008, "step": 4806 }, { "epoch": 0.7169811320754716, "grad_norm": 1.2691832780838013, "learning_rate": 3.918456837035195e-06, "loss": 0.6014, "step": 4807 }, { "epoch": 0.7171302856290551, "grad_norm": 1.5146760940551758, "learning_rate": 3.914622026631916e-06, "loss": 0.6182, "step": 4808 }, { "epoch": 0.7172794391826385, "grad_norm": 1.4296576976776123, "learning_rate": 3.910788636899143e-06, "loss": 0.5771, "step": 4809 }, { "epoch": 0.717428592736222, "grad_norm": 1.5400420427322388, "learning_rate": 3.906956668731813e-06, "loss": 0.5394, "step": 4810 }, { "epoch": 0.7175777462898053, "grad_norm": 1.4896880388259888, "learning_rate": 3.903126123024512e-06, "loss": 0.5981, "step": 4811 }, { "epoch": 0.7177268998433888, "grad_norm": 1.2759575843811035, "learning_rate": 3.899297000671511e-06, "loss": 0.5251, "step": 4812 }, { "epoch": 0.7178760533969721, "grad_norm": 1.5748765468597412, "learning_rate": 3.895469302566745e-06, "loss": 0.5953, "step": 4813 }, { "epoch": 0.7180252069505556, "grad_norm": 1.5787546634674072, "learning_rate": 3.891643029603811e-06, "loss": 0.619, "step": 4814 }, { "epoch": 0.718174360504139, "grad_norm": 1.4895459413528442, "learning_rate": 3.88781818267598e-06, "loss": 0.6038, "step": 4815 }, { "epoch": 0.7183235140577224, "grad_norm": 0.8114829063415527, "learning_rate": 3.883994762676189e-06, "loss": 0.6449, "step": 4816 }, { "epoch": 0.7184726676113058, "grad_norm": 1.2343823909759521, "learning_rate": 3.880172770497033e-06, "loss": 0.5469, "step": 4817 }, { "epoch": 0.7186218211648893, "grad_norm": 1.4284979104995728, "learning_rate": 3.8763522070307835e-06, "loss": 0.5976, "step": 4818 }, { "epoch": 0.7187709747184726, "grad_norm": 1.4630028009414673, "learning_rate": 3.872533073169377e-06, "loss": 0.6219, "step": 4819 }, { "epoch": 0.7189201282720561, "grad_norm": 1.372801661491394, "learning_rate": 3.868715369804418e-06, "loss": 0.5397, "step": 4820 }, { "epoch": 0.7190692818256395, "grad_norm": 0.8426862955093384, "learning_rate": 3.8648990978271646e-06, "loss": 0.6203, "step": 4821 }, { "epoch": 0.7192184353792229, "grad_norm": 1.2345503568649292, "learning_rate": 3.861084258128558e-06, "loss": 0.5347, "step": 4822 }, { "epoch": 0.7193675889328063, "grad_norm": 1.2558423280715942, "learning_rate": 3.857270851599193e-06, "loss": 0.5543, "step": 4823 }, { "epoch": 0.7195167424863897, "grad_norm": 1.6579618453979492, "learning_rate": 3.853458879129335e-06, "loss": 0.5286, "step": 4824 }, { "epoch": 0.7196658960399731, "grad_norm": 1.4171581268310547, "learning_rate": 3.849648341608914e-06, "loss": 0.6602, "step": 4825 }, { "epoch": 0.7198150495935566, "grad_norm": 1.5067381858825684, "learning_rate": 3.845839239927527e-06, "loss": 0.6018, "step": 4826 }, { "epoch": 0.7199642031471399, "grad_norm": 1.3604471683502197, "learning_rate": 3.842031574974426e-06, "loss": 0.5506, "step": 4827 }, { "epoch": 0.7201133567007234, "grad_norm": 1.6086466312408447, "learning_rate": 3.83822534763854e-06, "loss": 0.5419, "step": 4828 }, { "epoch": 0.7202625102543068, "grad_norm": 1.3476015329360962, "learning_rate": 3.834420558808459e-06, "loss": 0.5146, "step": 4829 }, { "epoch": 0.7204116638078902, "grad_norm": 1.348140835762024, "learning_rate": 3.830617209372429e-06, "loss": 0.5641, "step": 4830 }, { "epoch": 0.7205608173614736, "grad_norm": 1.4834092855453491, "learning_rate": 3.826815300218367e-06, "loss": 0.4773, "step": 4831 }, { "epoch": 0.7207099709150571, "grad_norm": 1.7824103832244873, "learning_rate": 3.8230148322338625e-06, "loss": 0.5333, "step": 4832 }, { "epoch": 0.7208591244686404, "grad_norm": 1.2397575378417969, "learning_rate": 3.819215806306148e-06, "loss": 0.5953, "step": 4833 }, { "epoch": 0.7210082780222239, "grad_norm": 1.5989888906478882, "learning_rate": 3.815418223322136e-06, "loss": 0.508, "step": 4834 }, { "epoch": 0.7211574315758073, "grad_norm": 1.3366594314575195, "learning_rate": 3.811622084168399e-06, "loss": 0.545, "step": 4835 }, { "epoch": 0.7213065851293907, "grad_norm": 1.478997826576233, "learning_rate": 3.8078273897311626e-06, "loss": 0.5913, "step": 4836 }, { "epoch": 0.7214557386829741, "grad_norm": 1.3427499532699585, "learning_rate": 3.8040341408963265e-06, "loss": 0.5123, "step": 4837 }, { "epoch": 0.7216048922365575, "grad_norm": 1.4367493391036987, "learning_rate": 3.8002423385494534e-06, "loss": 0.5156, "step": 4838 }, { "epoch": 0.7217540457901409, "grad_norm": 1.731315016746521, "learning_rate": 3.7964519835757554e-06, "loss": 0.5778, "step": 4839 }, { "epoch": 0.7219031993437244, "grad_norm": 1.4486812353134155, "learning_rate": 3.792663076860116e-06, "loss": 0.655, "step": 4840 }, { "epoch": 0.7220523528973077, "grad_norm": 1.4993313550949097, "learning_rate": 3.788875619287089e-06, "loss": 0.5943, "step": 4841 }, { "epoch": 0.7222015064508912, "grad_norm": 1.2748658657073975, "learning_rate": 3.785089611740872e-06, "loss": 0.5855, "step": 4842 }, { "epoch": 0.7223506600044746, "grad_norm": 1.4169583320617676, "learning_rate": 3.7813050551053344e-06, "loss": 0.6292, "step": 4843 }, { "epoch": 0.722499813558058, "grad_norm": 1.2737082242965698, "learning_rate": 3.7775219502640105e-06, "loss": 0.4923, "step": 4844 }, { "epoch": 0.7226489671116414, "grad_norm": 1.3732428550720215, "learning_rate": 3.7737402981000827e-06, "loss": 0.5393, "step": 4845 }, { "epoch": 0.7227981206652249, "grad_norm": 1.3027684688568115, "learning_rate": 3.7699600994964046e-06, "loss": 0.6064, "step": 4846 }, { "epoch": 0.7229472742188082, "grad_norm": 1.4275439977645874, "learning_rate": 3.766181355335489e-06, "loss": 0.5757, "step": 4847 }, { "epoch": 0.7230964277723917, "grad_norm": 1.4420197010040283, "learning_rate": 3.7624040664995075e-06, "loss": 0.5905, "step": 4848 }, { "epoch": 0.723245581325975, "grad_norm": 1.6690398454666138, "learning_rate": 3.7586282338702918e-06, "loss": 0.5718, "step": 4849 }, { "epoch": 0.7233947348795585, "grad_norm": 1.125647783279419, "learning_rate": 3.754853858329336e-06, "loss": 0.4828, "step": 4850 }, { "epoch": 0.7235438884331419, "grad_norm": 0.82258540391922, "learning_rate": 3.7510809407577932e-06, "loss": 0.6338, "step": 4851 }, { "epoch": 0.7236930419867253, "grad_norm": 1.5424623489379883, "learning_rate": 3.7473094820364707e-06, "loss": 0.5234, "step": 4852 }, { "epoch": 0.7238421955403087, "grad_norm": 1.469319224357605, "learning_rate": 3.7435394830458414e-06, "loss": 0.4903, "step": 4853 }, { "epoch": 0.7239913490938922, "grad_norm": 1.3363453149795532, "learning_rate": 3.73977094466604e-06, "loss": 0.5753, "step": 4854 }, { "epoch": 0.7241405026474755, "grad_norm": 0.8280409574508667, "learning_rate": 3.7360038677768495e-06, "loss": 0.6268, "step": 4855 }, { "epoch": 0.724289656201059, "grad_norm": 0.8184301853179932, "learning_rate": 3.7322382532577206e-06, "loss": 0.6332, "step": 4856 }, { "epoch": 0.7244388097546424, "grad_norm": 1.3274575471878052, "learning_rate": 3.72847410198776e-06, "loss": 0.5919, "step": 4857 }, { "epoch": 0.7245879633082258, "grad_norm": 1.3192561864852905, "learning_rate": 3.7247114148457342e-06, "loss": 0.5726, "step": 4858 }, { "epoch": 0.7247371168618092, "grad_norm": 1.601532220840454, "learning_rate": 3.7209501927100666e-06, "loss": 0.6225, "step": 4859 }, { "epoch": 0.7248862704153927, "grad_norm": 1.2406017780303955, "learning_rate": 3.7171904364588405e-06, "loss": 0.5718, "step": 4860 }, { "epoch": 0.725035423968976, "grad_norm": 1.4254999160766602, "learning_rate": 3.7134321469697886e-06, "loss": 0.534, "step": 4861 }, { "epoch": 0.7251845775225595, "grad_norm": 1.327452540397644, "learning_rate": 3.7096753251203134e-06, "loss": 0.5776, "step": 4862 }, { "epoch": 0.7253337310761429, "grad_norm": 1.3203762769699097, "learning_rate": 3.7059199717874693e-06, "loss": 0.5731, "step": 4863 }, { "epoch": 0.7254828846297263, "grad_norm": 1.4414480924606323, "learning_rate": 3.7021660878479628e-06, "loss": 0.6148, "step": 4864 }, { "epoch": 0.7256320381833097, "grad_norm": 1.449945092201233, "learning_rate": 3.698413674178165e-06, "loss": 0.5871, "step": 4865 }, { "epoch": 0.7257811917368931, "grad_norm": 1.3728876113891602, "learning_rate": 3.6946627316541017e-06, "loss": 0.6198, "step": 4866 }, { "epoch": 0.7259303452904765, "grad_norm": 1.310972809791565, "learning_rate": 3.690913261151453e-06, "loss": 0.5373, "step": 4867 }, { "epoch": 0.72607949884406, "grad_norm": 1.3593791723251343, "learning_rate": 3.6871652635455577e-06, "loss": 0.5178, "step": 4868 }, { "epoch": 0.7262286523976433, "grad_norm": 1.2685588598251343, "learning_rate": 3.683418739711413e-06, "loss": 0.5077, "step": 4869 }, { "epoch": 0.7263778059512268, "grad_norm": 1.7534114122390747, "learning_rate": 3.6796736905236618e-06, "loss": 0.63, "step": 4870 }, { "epoch": 0.7265269595048102, "grad_norm": 1.5793501138687134, "learning_rate": 3.6759301168566152e-06, "loss": 0.5373, "step": 4871 }, { "epoch": 0.7266761130583936, "grad_norm": 1.5965081453323364, "learning_rate": 3.6721880195842317e-06, "loss": 0.5398, "step": 4872 }, { "epoch": 0.726825266611977, "grad_norm": 1.3739920854568481, "learning_rate": 3.668447399580133e-06, "loss": 0.6359, "step": 4873 }, { "epoch": 0.7269744201655605, "grad_norm": 1.2709332704544067, "learning_rate": 3.664708257717583e-06, "loss": 0.5387, "step": 4874 }, { "epoch": 0.7271235737191438, "grad_norm": 1.5745693445205688, "learning_rate": 3.660970594869513e-06, "loss": 0.5947, "step": 4875 }, { "epoch": 0.7272727272727273, "grad_norm": 1.7526894807815552, "learning_rate": 3.6572344119085033e-06, "loss": 0.557, "step": 4876 }, { "epoch": 0.7274218808263107, "grad_norm": 1.2760515213012695, "learning_rate": 3.6534997097067913e-06, "loss": 0.5857, "step": 4877 }, { "epoch": 0.7275710343798941, "grad_norm": 1.588103175163269, "learning_rate": 3.649766489136265e-06, "loss": 0.6179, "step": 4878 }, { "epoch": 0.7277201879334775, "grad_norm": 1.4865306615829468, "learning_rate": 3.6460347510684736e-06, "loss": 0.5834, "step": 4879 }, { "epoch": 0.727869341487061, "grad_norm": 1.2685117721557617, "learning_rate": 3.642304496374608e-06, "loss": 0.5618, "step": 4880 }, { "epoch": 0.7280184950406443, "grad_norm": 1.4375079870224, "learning_rate": 3.638575725925523e-06, "loss": 0.5325, "step": 4881 }, { "epoch": 0.7281676485942278, "grad_norm": 1.4273247718811035, "learning_rate": 3.634848440591728e-06, "loss": 0.5516, "step": 4882 }, { "epoch": 0.7283168021478111, "grad_norm": 1.3001983165740967, "learning_rate": 3.631122641243372e-06, "loss": 0.5217, "step": 4883 }, { "epoch": 0.7284659557013946, "grad_norm": 1.2081348896026611, "learning_rate": 3.6273983287502756e-06, "loss": 0.5416, "step": 4884 }, { "epoch": 0.728615109254978, "grad_norm": 1.3989152908325195, "learning_rate": 3.623675503981905e-06, "loss": 0.5634, "step": 4885 }, { "epoch": 0.7287642628085614, "grad_norm": 1.4554088115692139, "learning_rate": 3.619954167807369e-06, "loss": 0.5447, "step": 4886 }, { "epoch": 0.7289134163621448, "grad_norm": 1.3751018047332764, "learning_rate": 3.616234321095441e-06, "loss": 0.5257, "step": 4887 }, { "epoch": 0.7290625699157283, "grad_norm": 1.2652398347854614, "learning_rate": 3.612515964714548e-06, "loss": 0.5296, "step": 4888 }, { "epoch": 0.7292117234693116, "grad_norm": 1.2944492101669312, "learning_rate": 3.608799099532757e-06, "loss": 0.5661, "step": 4889 }, { "epoch": 0.7293608770228951, "grad_norm": 1.620278239250183, "learning_rate": 3.6050837264177952e-06, "loss": 0.6275, "step": 4890 }, { "epoch": 0.7295100305764785, "grad_norm": 1.4738489389419556, "learning_rate": 3.6013698462370426e-06, "loss": 0.6701, "step": 4891 }, { "epoch": 0.7296591841300619, "grad_norm": 1.2707852125167847, "learning_rate": 3.5976574598575288e-06, "loss": 0.568, "step": 4892 }, { "epoch": 0.7298083376836453, "grad_norm": 1.2805240154266357, "learning_rate": 3.593946568145932e-06, "loss": 0.5811, "step": 4893 }, { "epoch": 0.7299574912372288, "grad_norm": 1.20746910572052, "learning_rate": 3.590237171968588e-06, "loss": 0.5369, "step": 4894 }, { "epoch": 0.7301066447908121, "grad_norm": 1.3995493650436401, "learning_rate": 3.5865292721914724e-06, "loss": 0.4915, "step": 4895 }, { "epoch": 0.7302557983443956, "grad_norm": 1.3136578798294067, "learning_rate": 3.5828228696802226e-06, "loss": 0.555, "step": 4896 }, { "epoch": 0.7304049518979789, "grad_norm": 1.5661121606826782, "learning_rate": 3.5791179653001195e-06, "loss": 0.5544, "step": 4897 }, { "epoch": 0.7305541054515624, "grad_norm": 1.4391093254089355, "learning_rate": 3.5754145599161026e-06, "loss": 0.514, "step": 4898 }, { "epoch": 0.7307032590051458, "grad_norm": 1.2397197484970093, "learning_rate": 3.5717126543927484e-06, "loss": 0.4957, "step": 4899 }, { "epoch": 0.7308524125587292, "grad_norm": 1.4029978513717651, "learning_rate": 3.5680122495942925e-06, "loss": 0.6505, "step": 4900 }, { "epoch": 0.7310015661123126, "grad_norm": 1.6108028888702393, "learning_rate": 3.5643133463846193e-06, "loss": 0.5893, "step": 4901 }, { "epoch": 0.7311507196658961, "grad_norm": 1.4177343845367432, "learning_rate": 3.5606159456272613e-06, "loss": 0.6266, "step": 4902 }, { "epoch": 0.7312998732194794, "grad_norm": 1.4165072441101074, "learning_rate": 3.5569200481854003e-06, "loss": 0.5918, "step": 4903 }, { "epoch": 0.7314490267730629, "grad_norm": 1.77101469039917, "learning_rate": 3.5532256549218715e-06, "loss": 0.5088, "step": 4904 }, { "epoch": 0.7315981803266463, "grad_norm": 1.1846133470535278, "learning_rate": 3.549532766699146e-06, "loss": 0.5736, "step": 4905 }, { "epoch": 0.7317473338802297, "grad_norm": 1.36439049243927, "learning_rate": 3.5458413843793583e-06, "loss": 0.4816, "step": 4906 }, { "epoch": 0.7318964874338131, "grad_norm": 1.2788606882095337, "learning_rate": 3.5421515088242855e-06, "loss": 0.5814, "step": 4907 }, { "epoch": 0.7320456409873966, "grad_norm": 1.3383656740188599, "learning_rate": 3.5384631408953483e-06, "loss": 0.4976, "step": 4908 }, { "epoch": 0.7321947945409799, "grad_norm": 1.348186731338501, "learning_rate": 3.5347762814536224e-06, "loss": 0.5926, "step": 4909 }, { "epoch": 0.7323439480945634, "grad_norm": 1.4445452690124512, "learning_rate": 3.5310909313598287e-06, "loss": 0.5493, "step": 4910 }, { "epoch": 0.7324931016481467, "grad_norm": 1.2883893251419067, "learning_rate": 3.5274070914743362e-06, "loss": 0.5317, "step": 4911 }, { "epoch": 0.7326422552017302, "grad_norm": 1.513081669807434, "learning_rate": 3.5237247626571604e-06, "loss": 0.5857, "step": 4912 }, { "epoch": 0.7327914087553136, "grad_norm": 1.4849648475646973, "learning_rate": 3.520043945767968e-06, "loss": 0.6054, "step": 4913 }, { "epoch": 0.732940562308897, "grad_norm": 1.5424222946166992, "learning_rate": 3.5163646416660634e-06, "loss": 0.5892, "step": 4914 }, { "epoch": 0.7330897158624804, "grad_norm": 1.2483694553375244, "learning_rate": 3.512686851210406e-06, "loss": 0.5161, "step": 4915 }, { "epoch": 0.7332388694160639, "grad_norm": 1.4547935724258423, "learning_rate": 3.509010575259604e-06, "loss": 0.5367, "step": 4916 }, { "epoch": 0.7333880229696472, "grad_norm": 1.1529346704483032, "learning_rate": 3.5053358146719e-06, "loss": 0.579, "step": 4917 }, { "epoch": 0.7335371765232307, "grad_norm": 1.3501392602920532, "learning_rate": 3.501662570305191e-06, "loss": 0.59, "step": 4918 }, { "epoch": 0.733686330076814, "grad_norm": 1.25519597530365, "learning_rate": 3.4979908430170285e-06, "loss": 0.5265, "step": 4919 }, { "epoch": 0.7338354836303975, "grad_norm": 1.16049325466156, "learning_rate": 3.4943206336645917e-06, "loss": 0.5285, "step": 4920 }, { "epoch": 0.7339846371839809, "grad_norm": 1.4426339864730835, "learning_rate": 3.490651943104718e-06, "loss": 0.598, "step": 4921 }, { "epoch": 0.7341337907375644, "grad_norm": 1.3318891525268555, "learning_rate": 3.4869847721938897e-06, "loss": 0.5869, "step": 4922 }, { "epoch": 0.7342829442911477, "grad_norm": 1.4628374576568604, "learning_rate": 3.4833191217882247e-06, "loss": 0.5988, "step": 4923 }, { "epoch": 0.7344320978447312, "grad_norm": 1.419461727142334, "learning_rate": 3.479654992743495e-06, "loss": 0.5723, "step": 4924 }, { "epoch": 0.7345812513983145, "grad_norm": 1.3531767129898071, "learning_rate": 3.4759923859151167e-06, "loss": 0.6764, "step": 4925 }, { "epoch": 0.734730404951898, "grad_norm": 1.36509108543396, "learning_rate": 3.4723313021581517e-06, "loss": 0.5461, "step": 4926 }, { "epoch": 0.7348795585054814, "grad_norm": 1.4054243564605713, "learning_rate": 3.4686717423272932e-06, "loss": 0.4968, "step": 4927 }, { "epoch": 0.7350287120590648, "grad_norm": 1.40316641330719, "learning_rate": 3.465013707276902e-06, "loss": 0.4893, "step": 4928 }, { "epoch": 0.7351778656126482, "grad_norm": 1.274916410446167, "learning_rate": 3.4613571978609595e-06, "loss": 0.5976, "step": 4929 }, { "epoch": 0.7353270191662317, "grad_norm": 2.106076240539551, "learning_rate": 3.4577022149331065e-06, "loss": 0.53, "step": 4930 }, { "epoch": 0.735476172719815, "grad_norm": 1.4353049993515015, "learning_rate": 3.4540487593466197e-06, "loss": 0.557, "step": 4931 }, { "epoch": 0.7356253262733985, "grad_norm": 1.4683834314346313, "learning_rate": 3.4503968319544266e-06, "loss": 0.5326, "step": 4932 }, { "epoch": 0.7357744798269819, "grad_norm": 1.3303179740905762, "learning_rate": 3.4467464336090863e-06, "loss": 0.5166, "step": 4933 }, { "epoch": 0.7359236333805653, "grad_norm": 1.439052700996399, "learning_rate": 3.443097565162811e-06, "loss": 0.5712, "step": 4934 }, { "epoch": 0.7360727869341487, "grad_norm": 1.5449925661087036, "learning_rate": 3.4394502274674544e-06, "loss": 0.6103, "step": 4935 }, { "epoch": 0.7362219404877322, "grad_norm": 1.372719168663025, "learning_rate": 3.435804421374502e-06, "loss": 0.5414, "step": 4936 }, { "epoch": 0.7363710940413155, "grad_norm": 2.509561061859131, "learning_rate": 3.4321601477351017e-06, "loss": 0.5612, "step": 4937 }, { "epoch": 0.736520247594899, "grad_norm": 1.3385087251663208, "learning_rate": 3.4285174074000317e-06, "loss": 0.5061, "step": 4938 }, { "epoch": 0.7366694011484823, "grad_norm": 1.7216588258743286, "learning_rate": 3.4248762012197047e-06, "loss": 0.5151, "step": 4939 }, { "epoch": 0.7368185547020658, "grad_norm": 1.2071961164474487, "learning_rate": 3.42123653004419e-06, "loss": 0.5082, "step": 4940 }, { "epoch": 0.7369677082556492, "grad_norm": 1.3739126920700073, "learning_rate": 3.417598394723193e-06, "loss": 0.5273, "step": 4941 }, { "epoch": 0.7371168618092326, "grad_norm": 1.8216902017593384, "learning_rate": 3.4139617961060546e-06, "loss": 0.6302, "step": 4942 }, { "epoch": 0.737266015362816, "grad_norm": 2.747519016265869, "learning_rate": 3.4103267350417645e-06, "loss": 0.4953, "step": 4943 }, { "epoch": 0.7374151689163995, "grad_norm": 1.4003558158874512, "learning_rate": 3.406693212378951e-06, "loss": 0.5416, "step": 4944 }, { "epoch": 0.7375643224699828, "grad_norm": 1.2064540386199951, "learning_rate": 3.4030612289658836e-06, "loss": 0.5327, "step": 4945 }, { "epoch": 0.7377134760235663, "grad_norm": 1.9450759887695312, "learning_rate": 3.399430785650473e-06, "loss": 0.5786, "step": 4946 }, { "epoch": 0.7378626295771497, "grad_norm": 1.3461816310882568, "learning_rate": 3.395801883280271e-06, "loss": 0.5591, "step": 4947 }, { "epoch": 0.7380117831307331, "grad_norm": 1.6045266389846802, "learning_rate": 3.3921745227024626e-06, "loss": 0.5385, "step": 4948 }, { "epoch": 0.7381609366843165, "grad_norm": 1.357467532157898, "learning_rate": 3.388548704763882e-06, "loss": 0.5598, "step": 4949 }, { "epoch": 0.7383100902379, "grad_norm": 1.4727283716201782, "learning_rate": 3.3849244303109986e-06, "loss": 0.5711, "step": 4950 }, { "epoch": 0.7384592437914833, "grad_norm": 1.364349126815796, "learning_rate": 3.381301700189927e-06, "loss": 0.6007, "step": 4951 }, { "epoch": 0.7386083973450668, "grad_norm": 1.3425424098968506, "learning_rate": 3.3776805152464087e-06, "loss": 0.5152, "step": 4952 }, { "epoch": 0.7387575508986501, "grad_norm": 1.2932883501052856, "learning_rate": 3.3740608763258375e-06, "loss": 0.5977, "step": 4953 }, { "epoch": 0.7389067044522336, "grad_norm": 1.1881029605865479, "learning_rate": 3.3704427842732403e-06, "loss": 0.5615, "step": 4954 }, { "epoch": 0.739055858005817, "grad_norm": 1.2533546686172485, "learning_rate": 3.366826239933283e-06, "loss": 0.556, "step": 4955 }, { "epoch": 0.7392050115594004, "grad_norm": 1.29573392868042, "learning_rate": 3.363211244150273e-06, "loss": 0.6066, "step": 4956 }, { "epoch": 0.7393541651129838, "grad_norm": 1.3280997276306152, "learning_rate": 3.359597797768157e-06, "loss": 0.5955, "step": 4957 }, { "epoch": 0.7395033186665673, "grad_norm": 1.3824388980865479, "learning_rate": 3.3559859016305094e-06, "loss": 0.5814, "step": 4958 }, { "epoch": 0.7396524722201506, "grad_norm": 1.5061087608337402, "learning_rate": 3.352375556580556e-06, "loss": 0.5443, "step": 4959 }, { "epoch": 0.7398016257737341, "grad_norm": 1.3652757406234741, "learning_rate": 3.3487667634611555e-06, "loss": 0.5797, "step": 4960 }, { "epoch": 0.7399507793273175, "grad_norm": 1.3768787384033203, "learning_rate": 3.3451595231148005e-06, "loss": 0.5704, "step": 4961 }, { "epoch": 0.7400999328809009, "grad_norm": 1.4568440914154053, "learning_rate": 3.341553836383621e-06, "loss": 0.5261, "step": 4962 }, { "epoch": 0.7402490864344843, "grad_norm": 1.5523958206176758, "learning_rate": 3.3379497041094e-06, "loss": 0.6569, "step": 4963 }, { "epoch": 0.7403982399880678, "grad_norm": 1.3063342571258545, "learning_rate": 3.334347127133534e-06, "loss": 0.5344, "step": 4964 }, { "epoch": 0.7405473935416511, "grad_norm": 1.4831011295318604, "learning_rate": 3.3307461062970726e-06, "loss": 0.5541, "step": 4965 }, { "epoch": 0.7406965470952346, "grad_norm": 1.3774478435516357, "learning_rate": 3.3271466424406984e-06, "loss": 0.6036, "step": 4966 }, { "epoch": 0.7408457006488179, "grad_norm": 1.4640518426895142, "learning_rate": 3.323548736404725e-06, "loss": 0.6426, "step": 4967 }, { "epoch": 0.7409948542024014, "grad_norm": 1.295133352279663, "learning_rate": 3.3199523890291074e-06, "loss": 0.5572, "step": 4968 }, { "epoch": 0.7411440077559848, "grad_norm": 1.3967106342315674, "learning_rate": 3.3163576011534417e-06, "loss": 0.6, "step": 4969 }, { "epoch": 0.7412931613095682, "grad_norm": 1.5484026670455933, "learning_rate": 3.312764373616946e-06, "loss": 0.6223, "step": 4970 }, { "epoch": 0.7414423148631516, "grad_norm": 6.800854682922363, "learning_rate": 3.3091727072584825e-06, "loss": 0.5367, "step": 4971 }, { "epoch": 0.7415914684167351, "grad_norm": 1.3927927017211914, "learning_rate": 3.305582602916558e-06, "loss": 0.6148, "step": 4972 }, { "epoch": 0.7417406219703184, "grad_norm": 1.4281870126724243, "learning_rate": 3.3019940614292977e-06, "loss": 0.5908, "step": 4973 }, { "epoch": 0.7418897755239019, "grad_norm": 1.1808552742004395, "learning_rate": 3.2984070836344717e-06, "loss": 0.5607, "step": 4974 }, { "epoch": 0.7420389290774853, "grad_norm": 1.2487295866012573, "learning_rate": 3.2948216703694836e-06, "loss": 0.524, "step": 4975 }, { "epoch": 0.7421880826310687, "grad_norm": 1.304473876953125, "learning_rate": 3.2912378224713727e-06, "loss": 0.5555, "step": 4976 }, { "epoch": 0.7423372361846521, "grad_norm": 1.294492483139038, "learning_rate": 3.287655540776805e-06, "loss": 0.5403, "step": 4977 }, { "epoch": 0.7424863897382356, "grad_norm": 1.2959884405136108, "learning_rate": 3.284074826122092e-06, "loss": 0.5619, "step": 4978 }, { "epoch": 0.7426355432918189, "grad_norm": 0.8210932016372681, "learning_rate": 3.280495679343173e-06, "loss": 0.6234, "step": 4979 }, { "epoch": 0.7427846968454024, "grad_norm": 1.4165407419204712, "learning_rate": 3.2769181012756248e-06, "loss": 0.5622, "step": 4980 }, { "epoch": 0.7429338503989857, "grad_norm": 1.3773047924041748, "learning_rate": 3.2733420927546533e-06, "loss": 0.5332, "step": 4981 }, { "epoch": 0.7430830039525692, "grad_norm": 1.8205249309539795, "learning_rate": 3.2697676546151045e-06, "loss": 0.5794, "step": 4982 }, { "epoch": 0.7432321575061526, "grad_norm": 1.450418472290039, "learning_rate": 3.266194787691449e-06, "loss": 0.5797, "step": 4983 }, { "epoch": 0.743381311059736, "grad_norm": 1.450011134147644, "learning_rate": 3.262623492817798e-06, "loss": 0.5729, "step": 4984 }, { "epoch": 0.7435304646133194, "grad_norm": 1.4175851345062256, "learning_rate": 3.2590537708278956e-06, "loss": 0.5644, "step": 4985 }, { "epoch": 0.7436796181669029, "grad_norm": 1.3944796323776245, "learning_rate": 3.25548562255511e-06, "loss": 0.5012, "step": 4986 }, { "epoch": 0.7438287717204862, "grad_norm": 1.5318894386291504, "learning_rate": 3.2519190488324528e-06, "loss": 0.6402, "step": 4987 }, { "epoch": 0.7439779252740697, "grad_norm": 1.2833516597747803, "learning_rate": 3.2483540504925616e-06, "loss": 0.5213, "step": 4988 }, { "epoch": 0.744127078827653, "grad_norm": 1.3077136278152466, "learning_rate": 3.24479062836771e-06, "loss": 0.5227, "step": 4989 }, { "epoch": 0.7442762323812365, "grad_norm": 1.3882249593734741, "learning_rate": 3.2412287832898004e-06, "loss": 0.5098, "step": 4990 }, { "epoch": 0.7444253859348199, "grad_norm": 1.642595648765564, "learning_rate": 3.237668516090372e-06, "loss": 0.5933, "step": 4991 }, { "epoch": 0.7445745394884034, "grad_norm": 1.445712685585022, "learning_rate": 3.2341098276005856e-06, "loss": 0.5945, "step": 4992 }, { "epoch": 0.7447236930419867, "grad_norm": 1.1131139993667603, "learning_rate": 3.2305527186512432e-06, "loss": 0.5413, "step": 4993 }, { "epoch": 0.7448728465955702, "grad_norm": 1.3062214851379395, "learning_rate": 3.226997190072777e-06, "loss": 0.5869, "step": 4994 }, { "epoch": 0.7450220001491535, "grad_norm": 1.3631565570831299, "learning_rate": 3.2234432426952432e-06, "loss": 0.5675, "step": 4995 }, { "epoch": 0.745171153702737, "grad_norm": 1.2576824426651, "learning_rate": 3.219890877348336e-06, "loss": 0.5703, "step": 4996 }, { "epoch": 0.7453203072563204, "grad_norm": 1.6060328483581543, "learning_rate": 3.216340094861378e-06, "loss": 0.5177, "step": 4997 }, { "epoch": 0.7454694608099038, "grad_norm": 1.2781118154525757, "learning_rate": 3.212790896063321e-06, "loss": 0.5663, "step": 4998 }, { "epoch": 0.7456186143634872, "grad_norm": 1.3469715118408203, "learning_rate": 3.2092432817827502e-06, "loss": 0.6871, "step": 4999 }, { "epoch": 0.7457677679170707, "grad_norm": 1.252915382385254, "learning_rate": 3.2056972528478802e-06, "loss": 0.5756, "step": 5000 }, { "epoch": 0.745916921470654, "grad_norm": 1.4076409339904785, "learning_rate": 3.2021528100865483e-06, "loss": 0.58, "step": 5001 }, { "epoch": 0.7460660750242375, "grad_norm": 1.4372414350509644, "learning_rate": 3.198609954326232e-06, "loss": 0.5912, "step": 5002 }, { "epoch": 0.7462152285778209, "grad_norm": 0.9039559364318848, "learning_rate": 3.1950686863940315e-06, "loss": 0.631, "step": 5003 }, { "epoch": 0.7463643821314043, "grad_norm": 1.404214859008789, "learning_rate": 3.1915290071166836e-06, "loss": 0.5748, "step": 5004 }, { "epoch": 0.7465135356849877, "grad_norm": 1.405129075050354, "learning_rate": 3.1879909173205425e-06, "loss": 0.4768, "step": 5005 }, { "epoch": 0.7466626892385712, "grad_norm": 1.2296899557113647, "learning_rate": 3.1844544178315995e-06, "loss": 0.4893, "step": 5006 }, { "epoch": 0.7468118427921545, "grad_norm": 1.599259614944458, "learning_rate": 3.1809195094754754e-06, "loss": 0.5918, "step": 5007 }, { "epoch": 0.746960996345738, "grad_norm": 1.6965794563293457, "learning_rate": 3.177386193077415e-06, "loss": 0.5371, "step": 5008 }, { "epoch": 0.7471101498993213, "grad_norm": 1.4379404783248901, "learning_rate": 3.1738544694622955e-06, "loss": 0.5717, "step": 5009 }, { "epoch": 0.7472593034529048, "grad_norm": 1.3180882930755615, "learning_rate": 3.170324339454621e-06, "loss": 0.5492, "step": 5010 }, { "epoch": 0.7474084570064882, "grad_norm": 1.5211888551712036, "learning_rate": 3.1667958038785206e-06, "loss": 0.5245, "step": 5011 }, { "epoch": 0.7475576105600716, "grad_norm": 1.4389101266860962, "learning_rate": 3.1632688635577535e-06, "loss": 0.5631, "step": 5012 }, { "epoch": 0.747706764113655, "grad_norm": 1.276091456413269, "learning_rate": 3.15974351931571e-06, "loss": 0.5109, "step": 5013 }, { "epoch": 0.7478559176672385, "grad_norm": 1.4327969551086426, "learning_rate": 3.156219771975397e-06, "loss": 0.5885, "step": 5014 }, { "epoch": 0.7480050712208218, "grad_norm": 1.6124247312545776, "learning_rate": 3.152697622359463e-06, "loss": 0.561, "step": 5015 }, { "epoch": 0.7481542247744053, "grad_norm": 1.3456027507781982, "learning_rate": 3.149177071290178e-06, "loss": 0.5431, "step": 5016 }, { "epoch": 0.7483033783279887, "grad_norm": 1.3633872270584106, "learning_rate": 3.14565811958943e-06, "loss": 0.6388, "step": 5017 }, { "epoch": 0.7484525318815721, "grad_norm": 3.218388319015503, "learning_rate": 3.142140768078744e-06, "loss": 0.5727, "step": 5018 }, { "epoch": 0.7486016854351555, "grad_norm": 1.5798683166503906, "learning_rate": 3.138625017579272e-06, "loss": 0.6177, "step": 5019 }, { "epoch": 0.748750838988739, "grad_norm": 1.416909098625183, "learning_rate": 3.1351108689117813e-06, "loss": 0.6233, "step": 5020 }, { "epoch": 0.7488999925423223, "grad_norm": 1.3236929178237915, "learning_rate": 3.1315983228966774e-06, "loss": 0.5703, "step": 5021 }, { "epoch": 0.7490491460959058, "grad_norm": 1.3595000505447388, "learning_rate": 3.1280873803539845e-06, "loss": 0.5959, "step": 5022 }, { "epoch": 0.7491982996494891, "grad_norm": 1.361760139465332, "learning_rate": 3.1245780421033557e-06, "loss": 0.5871, "step": 5023 }, { "epoch": 0.7493474532030726, "grad_norm": 1.5315675735473633, "learning_rate": 3.121070308964069e-06, "loss": 0.5662, "step": 5024 }, { "epoch": 0.749496606756656, "grad_norm": 1.4634732007980347, "learning_rate": 3.1175641817550295e-06, "loss": 0.5655, "step": 5025 }, { "epoch": 0.7496457603102394, "grad_norm": 1.3878686428070068, "learning_rate": 3.1140596612947582e-06, "loss": 0.547, "step": 5026 }, { "epoch": 0.7497949138638228, "grad_norm": 1.5241435766220093, "learning_rate": 3.1105567484014133e-06, "loss": 0.535, "step": 5027 }, { "epoch": 0.7499440674174063, "grad_norm": 1.4549388885498047, "learning_rate": 3.1070554438927703e-06, "loss": 0.5686, "step": 5028 }, { "epoch": 0.7500932209709896, "grad_norm": 1.5447752475738525, "learning_rate": 3.1035557485862343e-06, "loss": 0.6036, "step": 5029 }, { "epoch": 0.7502423745245731, "grad_norm": 1.3450875282287598, "learning_rate": 3.1000576632988265e-06, "loss": 0.5271, "step": 5030 }, { "epoch": 0.7503915280781565, "grad_norm": 1.6940839290618896, "learning_rate": 3.0965611888471993e-06, "loss": 0.533, "step": 5031 }, { "epoch": 0.7505406816317399, "grad_norm": 2.2698614597320557, "learning_rate": 3.093066326047628e-06, "loss": 0.6044, "step": 5032 }, { "epoch": 0.7506898351853233, "grad_norm": 1.597837209701538, "learning_rate": 3.0895730757160104e-06, "loss": 0.5984, "step": 5033 }, { "epoch": 0.7508389887389068, "grad_norm": 1.2675509452819824, "learning_rate": 3.0860814386678683e-06, "loss": 0.5932, "step": 5034 }, { "epoch": 0.7509881422924901, "grad_norm": 0.8013705611228943, "learning_rate": 3.0825914157183502e-06, "loss": 0.5905, "step": 5035 }, { "epoch": 0.7511372958460736, "grad_norm": 1.4802777767181396, "learning_rate": 3.079103007682217e-06, "loss": 0.6226, "step": 5036 }, { "epoch": 0.7512864493996569, "grad_norm": 1.4388927221298218, "learning_rate": 3.0756162153738633e-06, "loss": 0.5721, "step": 5037 }, { "epoch": 0.7514356029532404, "grad_norm": 1.5528013706207275, "learning_rate": 3.072131039607308e-06, "loss": 0.6402, "step": 5038 }, { "epoch": 0.7515847565068238, "grad_norm": 1.3020950555801392, "learning_rate": 3.0686474811961787e-06, "loss": 0.569, "step": 5039 }, { "epoch": 0.7517339100604072, "grad_norm": 1.3872729539871216, "learning_rate": 3.0651655409537394e-06, "loss": 0.6327, "step": 5040 }, { "epoch": 0.7518830636139906, "grad_norm": 0.8909628391265869, "learning_rate": 3.0616852196928714e-06, "loss": 0.6669, "step": 5041 }, { "epoch": 0.7520322171675741, "grad_norm": 1.4684513807296753, "learning_rate": 3.0582065182260777e-06, "loss": 0.5738, "step": 5042 }, { "epoch": 0.7521813707211574, "grad_norm": 1.4825397729873657, "learning_rate": 3.054729437365482e-06, "loss": 0.5353, "step": 5043 }, { "epoch": 0.7523305242747409, "grad_norm": 1.4225138425827026, "learning_rate": 3.051253977922838e-06, "loss": 0.4904, "step": 5044 }, { "epoch": 0.7524796778283243, "grad_norm": 0.8841769099235535, "learning_rate": 3.047780140709503e-06, "loss": 0.6524, "step": 5045 }, { "epoch": 0.7526288313819077, "grad_norm": 1.3726930618286133, "learning_rate": 3.044307926536474e-06, "loss": 0.5776, "step": 5046 }, { "epoch": 0.7527779849354911, "grad_norm": 1.2524808645248413, "learning_rate": 3.0408373362143617e-06, "loss": 0.3696, "step": 5047 }, { "epoch": 0.7529271384890746, "grad_norm": 1.5775249004364014, "learning_rate": 3.037368370553393e-06, "loss": 0.5908, "step": 5048 }, { "epoch": 0.7530762920426579, "grad_norm": 1.3529540300369263, "learning_rate": 3.0339010303634186e-06, "loss": 0.4865, "step": 5049 }, { "epoch": 0.7532254455962414, "grad_norm": 2.170748472213745, "learning_rate": 3.0304353164539224e-06, "loss": 0.5168, "step": 5050 }, { "epoch": 0.7533745991498247, "grad_norm": 1.4253431558609009, "learning_rate": 3.026971229633988e-06, "loss": 0.5696, "step": 5051 }, { "epoch": 0.7535237527034082, "grad_norm": 1.373403549194336, "learning_rate": 3.023508770712331e-06, "loss": 0.569, "step": 5052 }, { "epoch": 0.7536729062569916, "grad_norm": 1.3944244384765625, "learning_rate": 3.020047940497288e-06, "loss": 0.5016, "step": 5053 }, { "epoch": 0.753822059810575, "grad_norm": 1.4048835039138794, "learning_rate": 3.0165887397968064e-06, "loss": 0.5177, "step": 5054 }, { "epoch": 0.7539712133641584, "grad_norm": 3.003864288330078, "learning_rate": 3.0131311694184617e-06, "loss": 0.5398, "step": 5055 }, { "epoch": 0.7541203669177419, "grad_norm": 1.2061500549316406, "learning_rate": 3.009675230169444e-06, "loss": 0.4969, "step": 5056 }, { "epoch": 0.7542695204713252, "grad_norm": 1.4816967248916626, "learning_rate": 3.006220922856571e-06, "loss": 0.4995, "step": 5057 }, { "epoch": 0.7544186740249087, "grad_norm": 1.3653219938278198, "learning_rate": 3.0027682482862606e-06, "loss": 0.5432, "step": 5058 }, { "epoch": 0.7545678275784921, "grad_norm": 1.2329399585723877, "learning_rate": 2.999317207264575e-06, "loss": 0.5326, "step": 5059 }, { "epoch": 0.7547169811320755, "grad_norm": 1.403306007385254, "learning_rate": 2.9958678005971744e-06, "loss": 0.6164, "step": 5060 }, { "epoch": 0.7548661346856589, "grad_norm": 1.434025764465332, "learning_rate": 2.9924200290893447e-06, "loss": 0.5783, "step": 5061 }, { "epoch": 0.7550152882392424, "grad_norm": 1.3915373086929321, "learning_rate": 2.9889738935459934e-06, "loss": 0.5509, "step": 5062 }, { "epoch": 0.7551644417928257, "grad_norm": 1.5620747804641724, "learning_rate": 2.9855293947716446e-06, "loss": 0.6158, "step": 5063 }, { "epoch": 0.7553135953464092, "grad_norm": 1.5348997116088867, "learning_rate": 2.9820865335704318e-06, "loss": 0.615, "step": 5064 }, { "epoch": 0.7554627488999925, "grad_norm": 1.4213351011276245, "learning_rate": 2.9786453107461166e-06, "loss": 0.4639, "step": 5065 }, { "epoch": 0.7556119024535759, "grad_norm": 1.3651888370513916, "learning_rate": 2.9752057271020785e-06, "loss": 0.5816, "step": 5066 }, { "epoch": 0.7557610560071594, "grad_norm": 1.3685451745986938, "learning_rate": 2.9717677834413006e-06, "loss": 0.4683, "step": 5067 }, { "epoch": 0.7559102095607427, "grad_norm": 1.4365485906600952, "learning_rate": 2.968331480566402e-06, "loss": 0.5583, "step": 5068 }, { "epoch": 0.7560593631143262, "grad_norm": 1.3276945352554321, "learning_rate": 2.96489681927961e-06, "loss": 0.5361, "step": 5069 }, { "epoch": 0.7562085166679096, "grad_norm": 0.8898283243179321, "learning_rate": 2.961463800382761e-06, "loss": 0.6567, "step": 5070 }, { "epoch": 0.756357670221493, "grad_norm": 1.4997258186340332, "learning_rate": 2.9580324246773195e-06, "loss": 0.5567, "step": 5071 }, { "epoch": 0.7565068237750764, "grad_norm": 1.3691660165786743, "learning_rate": 2.9546026929643645e-06, "loss": 0.5481, "step": 5072 }, { "epoch": 0.7566559773286599, "grad_norm": 1.366114854812622, "learning_rate": 2.9511746060445834e-06, "loss": 0.6268, "step": 5073 }, { "epoch": 0.7568051308822432, "grad_norm": 1.51362144947052, "learning_rate": 2.947748164718288e-06, "loss": 0.61, "step": 5074 }, { "epoch": 0.7569542844358267, "grad_norm": 1.6381510496139526, "learning_rate": 2.9443233697854036e-06, "loss": 0.568, "step": 5075 }, { "epoch": 0.75710343798941, "grad_norm": 1.4147790670394897, "learning_rate": 2.9409002220454686e-06, "loss": 0.5739, "step": 5076 }, { "epoch": 0.7572525915429935, "grad_norm": 1.3678243160247803, "learning_rate": 2.9374787222976397e-06, "loss": 0.4922, "step": 5077 }, { "epoch": 0.7574017450965769, "grad_norm": 1.2649654150009155, "learning_rate": 2.9340588713406927e-06, "loss": 0.5977, "step": 5078 }, { "epoch": 0.7575508986501603, "grad_norm": 0.8429850339889526, "learning_rate": 2.930640669973005e-06, "loss": 0.6578, "step": 5079 }, { "epoch": 0.7577000522037437, "grad_norm": 1.3176251649856567, "learning_rate": 2.927224118992582e-06, "loss": 0.5037, "step": 5080 }, { "epoch": 0.7578492057573272, "grad_norm": 1.43226957321167, "learning_rate": 2.92380921919704e-06, "loss": 0.5784, "step": 5081 }, { "epoch": 0.7579983593109105, "grad_norm": 1.2340843677520752, "learning_rate": 2.920395971383612e-06, "loss": 0.5325, "step": 5082 }, { "epoch": 0.758147512864494, "grad_norm": 1.2880743741989136, "learning_rate": 2.9169843763491345e-06, "loss": 0.5939, "step": 5083 }, { "epoch": 0.7582966664180774, "grad_norm": 1.6515685319900513, "learning_rate": 2.9135744348900717e-06, "loss": 0.6016, "step": 5084 }, { "epoch": 0.7584458199716608, "grad_norm": 1.3616230487823486, "learning_rate": 2.9101661478024958e-06, "loss": 0.4918, "step": 5085 }, { "epoch": 0.7585949735252442, "grad_norm": 1.4452773332595825, "learning_rate": 2.9067595158820925e-06, "loss": 0.6603, "step": 5086 }, { "epoch": 0.7587441270788277, "grad_norm": 1.4740054607391357, "learning_rate": 2.9033545399241625e-06, "loss": 0.6183, "step": 5087 }, { "epoch": 0.758893280632411, "grad_norm": 1.922692894935608, "learning_rate": 2.8999512207236226e-06, "loss": 0.5598, "step": 5088 }, { "epoch": 0.7590424341859945, "grad_norm": 1.6468993425369263, "learning_rate": 2.8965495590749925e-06, "loss": 0.5537, "step": 5089 }, { "epoch": 0.7591915877395778, "grad_norm": 1.4366803169250488, "learning_rate": 2.8931495557724154e-06, "loss": 0.6229, "step": 5090 }, { "epoch": 0.7593407412931613, "grad_norm": 1.3580580949783325, "learning_rate": 2.8897512116096473e-06, "loss": 0.4892, "step": 5091 }, { "epoch": 0.7594898948467447, "grad_norm": 1.4912967681884766, "learning_rate": 2.8863545273800462e-06, "loss": 0.5546, "step": 5092 }, { "epoch": 0.7596390484003281, "grad_norm": 1.4890880584716797, "learning_rate": 2.8829595038765914e-06, "loss": 0.5548, "step": 5093 }, { "epoch": 0.7597882019539115, "grad_norm": 1.6285810470581055, "learning_rate": 2.8795661418918806e-06, "loss": 0.5755, "step": 5094 }, { "epoch": 0.759937355507495, "grad_norm": 1.40431547164917, "learning_rate": 2.876174442218107e-06, "loss": 0.5838, "step": 5095 }, { "epoch": 0.7600865090610783, "grad_norm": 1.585207462310791, "learning_rate": 2.8727844056470886e-06, "loss": 0.5539, "step": 5096 }, { "epoch": 0.7602356626146618, "grad_norm": 1.4188663959503174, "learning_rate": 2.8693960329702542e-06, "loss": 0.6087, "step": 5097 }, { "epoch": 0.7603848161682452, "grad_norm": 1.3290342092514038, "learning_rate": 2.866009324978635e-06, "loss": 0.5532, "step": 5098 }, { "epoch": 0.7605339697218286, "grad_norm": 1.5577480792999268, "learning_rate": 2.862624282462881e-06, "loss": 0.6323, "step": 5099 }, { "epoch": 0.760683123275412, "grad_norm": 1.4552758932113647, "learning_rate": 2.859240906213254e-06, "loss": 0.5264, "step": 5100 }, { "epoch": 0.7608322768289955, "grad_norm": 1.3279473781585693, "learning_rate": 2.855859197019627e-06, "loss": 0.5425, "step": 5101 }, { "epoch": 0.7609814303825788, "grad_norm": 1.9128199815750122, "learning_rate": 2.8524791556714736e-06, "loss": 0.5738, "step": 5102 }, { "epoch": 0.7611305839361623, "grad_norm": 1.5245940685272217, "learning_rate": 2.8491007829578965e-06, "loss": 0.6377, "step": 5103 }, { "epoch": 0.7612797374897456, "grad_norm": 1.138901948928833, "learning_rate": 2.845724079667591e-06, "loss": 0.5564, "step": 5104 }, { "epoch": 0.7614288910433291, "grad_norm": 1.294039011001587, "learning_rate": 2.8423490465888727e-06, "loss": 0.5328, "step": 5105 }, { "epoch": 0.7615780445969125, "grad_norm": 1.3538025617599487, "learning_rate": 2.8389756845096637e-06, "loss": 0.556, "step": 5106 }, { "epoch": 0.7617271981504959, "grad_norm": 1.406044602394104, "learning_rate": 2.835603994217502e-06, "loss": 0.528, "step": 5107 }, { "epoch": 0.7618763517040793, "grad_norm": 0.8825968503952026, "learning_rate": 2.8322339764995235e-06, "loss": 0.6338, "step": 5108 }, { "epoch": 0.7620255052576628, "grad_norm": 1.4226433038711548, "learning_rate": 2.8288656321424824e-06, "loss": 0.5559, "step": 5109 }, { "epoch": 0.7621746588112461, "grad_norm": 1.3351366519927979, "learning_rate": 2.825498961932743e-06, "loss": 0.5558, "step": 5110 }, { "epoch": 0.7623238123648296, "grad_norm": 1.5574557781219482, "learning_rate": 2.8221339666562695e-06, "loss": 0.6167, "step": 5111 }, { "epoch": 0.762472965918413, "grad_norm": 1.5118483304977417, "learning_rate": 2.8187706470986496e-06, "loss": 0.609, "step": 5112 }, { "epoch": 0.7626221194719964, "grad_norm": 1.3614258766174316, "learning_rate": 2.815409004045071e-06, "loss": 0.5588, "step": 5113 }, { "epoch": 0.7627712730255798, "grad_norm": 1.5362136363983154, "learning_rate": 2.8120490382803244e-06, "loss": 0.6236, "step": 5114 }, { "epoch": 0.7629204265791633, "grad_norm": 1.4005582332611084, "learning_rate": 2.8086907505888205e-06, "loss": 0.5954, "step": 5115 }, { "epoch": 0.7630695801327466, "grad_norm": 1.4927526712417603, "learning_rate": 2.8053341417545744e-06, "loss": 0.6261, "step": 5116 }, { "epoch": 0.7632187336863301, "grad_norm": 1.6012656688690186, "learning_rate": 2.801979212561202e-06, "loss": 0.5781, "step": 5117 }, { "epoch": 0.7633678872399134, "grad_norm": 1.3432977199554443, "learning_rate": 2.7986259637919365e-06, "loss": 0.5253, "step": 5118 }, { "epoch": 0.7635170407934969, "grad_norm": 1.2440593242645264, "learning_rate": 2.7952743962296146e-06, "loss": 0.5174, "step": 5119 }, { "epoch": 0.7636661943470803, "grad_norm": 1.2892236709594727, "learning_rate": 2.7919245106566827e-06, "loss": 0.5287, "step": 5120 }, { "epoch": 0.7638153479006637, "grad_norm": 1.4735262393951416, "learning_rate": 2.788576307855192e-06, "loss": 0.5535, "step": 5121 }, { "epoch": 0.7639645014542471, "grad_norm": 1.4635345935821533, "learning_rate": 2.785229788606806e-06, "loss": 0.6143, "step": 5122 }, { "epoch": 0.7641136550078306, "grad_norm": 1.2116203308105469, "learning_rate": 2.7818849536927827e-06, "loss": 0.4849, "step": 5123 }, { "epoch": 0.7642628085614139, "grad_norm": 1.3887643814086914, "learning_rate": 2.7785418038940004e-06, "loss": 0.6248, "step": 5124 }, { "epoch": 0.7644119621149974, "grad_norm": 1.2208865880966187, "learning_rate": 2.7752003399909423e-06, "loss": 0.5038, "step": 5125 }, { "epoch": 0.7645611156685808, "grad_norm": 0.8352847695350647, "learning_rate": 2.771860562763686e-06, "loss": 0.587, "step": 5126 }, { "epoch": 0.7647102692221642, "grad_norm": 1.2120205163955688, "learning_rate": 2.768522472991929e-06, "loss": 0.4644, "step": 5127 }, { "epoch": 0.7648594227757476, "grad_norm": 1.3916654586791992, "learning_rate": 2.7651860714549695e-06, "loss": 0.4947, "step": 5128 }, { "epoch": 0.7650085763293311, "grad_norm": 1.4043000936508179, "learning_rate": 2.761851358931711e-06, "loss": 0.5879, "step": 5129 }, { "epoch": 0.7651577298829144, "grad_norm": 1.1653085947036743, "learning_rate": 2.758518336200664e-06, "loss": 0.55, "step": 5130 }, { "epoch": 0.7653068834364979, "grad_norm": 1.3223586082458496, "learning_rate": 2.7551870040399475e-06, "loss": 0.5465, "step": 5131 }, { "epoch": 0.7654560369900812, "grad_norm": 1.779221534729004, "learning_rate": 2.751857363227276e-06, "loss": 0.5519, "step": 5132 }, { "epoch": 0.7656051905436647, "grad_norm": 1.2868891954421997, "learning_rate": 2.7485294145399778e-06, "loss": 0.5575, "step": 5133 }, { "epoch": 0.7657543440972481, "grad_norm": 1.3322683572769165, "learning_rate": 2.7452031587549844e-06, "loss": 0.5095, "step": 5134 }, { "epoch": 0.7659034976508315, "grad_norm": 1.2937184572219849, "learning_rate": 2.7418785966488347e-06, "loss": 0.4805, "step": 5135 }, { "epoch": 0.7660526512044149, "grad_norm": 1.5001907348632812, "learning_rate": 2.738555728997664e-06, "loss": 0.5594, "step": 5136 }, { "epoch": 0.7662018047579984, "grad_norm": 1.4782065153121948, "learning_rate": 2.7352345565772175e-06, "loss": 0.5883, "step": 5137 }, { "epoch": 0.7663509583115817, "grad_norm": 1.1993980407714844, "learning_rate": 2.731915080162847e-06, "loss": 0.4655, "step": 5138 }, { "epoch": 0.7665001118651652, "grad_norm": 1.3228652477264404, "learning_rate": 2.728597300529503e-06, "loss": 0.591, "step": 5139 }, { "epoch": 0.7666492654187486, "grad_norm": 1.339324712753296, "learning_rate": 2.7252812184517454e-06, "loss": 0.6322, "step": 5140 }, { "epoch": 0.766798418972332, "grad_norm": 1.3142213821411133, "learning_rate": 2.721966834703734e-06, "loss": 0.5617, "step": 5141 }, { "epoch": 0.7669475725259154, "grad_norm": 1.322619080543518, "learning_rate": 2.718654150059231e-06, "loss": 0.5627, "step": 5142 }, { "epoch": 0.7670967260794989, "grad_norm": 0.8583683967590332, "learning_rate": 2.715343165291604e-06, "loss": 0.6218, "step": 5143 }, { "epoch": 0.7672458796330822, "grad_norm": 1.38034987449646, "learning_rate": 2.7120338811738277e-06, "loss": 0.4954, "step": 5144 }, { "epoch": 0.7673950331866657, "grad_norm": 1.4486746788024902, "learning_rate": 2.708726298478469e-06, "loss": 0.5527, "step": 5145 }, { "epoch": 0.767544186740249, "grad_norm": 1.2854806184768677, "learning_rate": 2.7054204179777054e-06, "loss": 0.6067, "step": 5146 }, { "epoch": 0.7676933402938325, "grad_norm": 1.3562257289886475, "learning_rate": 2.7021162404433243e-06, "loss": 0.602, "step": 5147 }, { "epoch": 0.7678424938474159, "grad_norm": 1.621774435043335, "learning_rate": 2.6988137666466983e-06, "loss": 0.5428, "step": 5148 }, { "epoch": 0.7679916474009993, "grad_norm": 1.3685798645019531, "learning_rate": 2.6955129973588136e-06, "loss": 0.5162, "step": 5149 }, { "epoch": 0.7681408009545827, "grad_norm": 1.3640029430389404, "learning_rate": 2.6922139333502594e-06, "loss": 0.5626, "step": 5150 }, { "epoch": 0.7682899545081662, "grad_norm": 1.802399754524231, "learning_rate": 2.6889165753912173e-06, "loss": 0.5582, "step": 5151 }, { "epoch": 0.7684391080617495, "grad_norm": 1.6241259574890137, "learning_rate": 2.6856209242514797e-06, "loss": 0.6015, "step": 5152 }, { "epoch": 0.768588261615333, "grad_norm": 1.3738728761672974, "learning_rate": 2.682326980700437e-06, "loss": 0.6276, "step": 5153 }, { "epoch": 0.7687374151689164, "grad_norm": 1.3983490467071533, "learning_rate": 2.679034745507082e-06, "loss": 0.5495, "step": 5154 }, { "epoch": 0.7688865687224998, "grad_norm": 1.3399296998977661, "learning_rate": 2.6757442194400087e-06, "loss": 0.5877, "step": 5155 }, { "epoch": 0.7690357222760832, "grad_norm": 1.4987082481384277, "learning_rate": 2.6724554032674133e-06, "loss": 0.6693, "step": 5156 }, { "epoch": 0.7691848758296667, "grad_norm": 1.5711561441421509, "learning_rate": 2.6691682977570855e-06, "loss": 0.6478, "step": 5157 }, { "epoch": 0.76933402938325, "grad_norm": 1.2180882692337036, "learning_rate": 2.6658829036764232e-06, "loss": 0.512, "step": 5158 }, { "epoch": 0.7694831829368335, "grad_norm": 1.4904659986495972, "learning_rate": 2.6625992217924245e-06, "loss": 0.5383, "step": 5159 }, { "epoch": 0.7696323364904168, "grad_norm": 1.4019992351531982, "learning_rate": 2.6593172528716884e-06, "loss": 0.6235, "step": 5160 }, { "epoch": 0.7697814900440003, "grad_norm": 1.4282792806625366, "learning_rate": 2.6560369976804045e-06, "loss": 0.5261, "step": 5161 }, { "epoch": 0.7699306435975837, "grad_norm": 1.3861067295074463, "learning_rate": 2.6527584569843746e-06, "loss": 0.5203, "step": 5162 }, { "epoch": 0.7700797971511671, "grad_norm": 1.7977386713027954, "learning_rate": 2.6494816315489923e-06, "loss": 0.5578, "step": 5163 }, { "epoch": 0.7702289507047505, "grad_norm": 1.2490664720535278, "learning_rate": 2.6462065221392564e-06, "loss": 0.5378, "step": 5164 }, { "epoch": 0.770378104258334, "grad_norm": 1.559059739112854, "learning_rate": 2.6429331295197593e-06, "loss": 0.5454, "step": 5165 }, { "epoch": 0.7705272578119173, "grad_norm": 1.2703351974487305, "learning_rate": 2.6396614544547005e-06, "loss": 0.5793, "step": 5166 }, { "epoch": 0.7706764113655008, "grad_norm": 1.510684609413147, "learning_rate": 2.6363914977078665e-06, "loss": 0.6589, "step": 5167 }, { "epoch": 0.7708255649190842, "grad_norm": 1.4037359952926636, "learning_rate": 2.6331232600426535e-06, "loss": 0.5039, "step": 5168 }, { "epoch": 0.7709747184726676, "grad_norm": 1.7334182262420654, "learning_rate": 2.6298567422220556e-06, "loss": 0.5447, "step": 5169 }, { "epoch": 0.771123872026251, "grad_norm": 0.800217866897583, "learning_rate": 2.6265919450086553e-06, "loss": 0.6421, "step": 5170 }, { "epoch": 0.7712730255798345, "grad_norm": 1.5885157585144043, "learning_rate": 2.623328869164644e-06, "loss": 0.5997, "step": 5171 }, { "epoch": 0.7714221791334178, "grad_norm": 1.344955563545227, "learning_rate": 2.6200675154518075e-06, "loss": 0.5545, "step": 5172 }, { "epoch": 0.7715713326870013, "grad_norm": 1.2268643379211426, "learning_rate": 2.6168078846315303e-06, "loss": 0.5522, "step": 5173 }, { "epoch": 0.7717204862405846, "grad_norm": 1.4850184917449951, "learning_rate": 2.613549977464793e-06, "loss": 0.5303, "step": 5174 }, { "epoch": 0.7718696397941681, "grad_norm": 1.3053107261657715, "learning_rate": 2.6102937947121798e-06, "loss": 0.5601, "step": 5175 }, { "epoch": 0.7720187933477515, "grad_norm": 1.64600670337677, "learning_rate": 2.607039337133859e-06, "loss": 0.6018, "step": 5176 }, { "epoch": 0.772167946901335, "grad_norm": 1.5410887002944946, "learning_rate": 2.60378660548961e-06, "loss": 0.6293, "step": 5177 }, { "epoch": 0.7723171004549183, "grad_norm": 1.436151146888733, "learning_rate": 2.6005356005388047e-06, "loss": 0.6303, "step": 5178 }, { "epoch": 0.7724662540085018, "grad_norm": 1.3328837156295776, "learning_rate": 2.5972863230404066e-06, "loss": 0.5735, "step": 5179 }, { "epoch": 0.7726154075620851, "grad_norm": 1.3050235509872437, "learning_rate": 2.594038773752984e-06, "loss": 0.5432, "step": 5180 }, { "epoch": 0.7727645611156686, "grad_norm": 1.3389911651611328, "learning_rate": 2.590792953434695e-06, "loss": 0.5697, "step": 5181 }, { "epoch": 0.772913714669252, "grad_norm": 2.0148913860321045, "learning_rate": 2.5875488628433e-06, "loss": 0.5799, "step": 5182 }, { "epoch": 0.7730628682228354, "grad_norm": 1.3354085683822632, "learning_rate": 2.5843065027361526e-06, "loss": 0.565, "step": 5183 }, { "epoch": 0.7732120217764188, "grad_norm": 1.339113473892212, "learning_rate": 2.581065873870203e-06, "loss": 0.5277, "step": 5184 }, { "epoch": 0.7733611753300023, "grad_norm": 1.7473371028900146, "learning_rate": 2.577826977001995e-06, "loss": 0.5282, "step": 5185 }, { "epoch": 0.7735103288835856, "grad_norm": 1.696895718574524, "learning_rate": 2.574589812887669e-06, "loss": 0.5047, "step": 5186 }, { "epoch": 0.7736594824371691, "grad_norm": 1.3785395622253418, "learning_rate": 2.5713543822829636e-06, "loss": 0.5665, "step": 5187 }, { "epoch": 0.7738086359907524, "grad_norm": 1.3405206203460693, "learning_rate": 2.5681206859432127e-06, "loss": 0.5566, "step": 5188 }, { "epoch": 0.7739577895443359, "grad_norm": 1.4612971544265747, "learning_rate": 2.5648887246233357e-06, "loss": 0.5661, "step": 5189 }, { "epoch": 0.7741069430979193, "grad_norm": 1.314090609550476, "learning_rate": 2.5616584990778625e-06, "loss": 0.6171, "step": 5190 }, { "epoch": 0.7742560966515027, "grad_norm": 1.2950819730758667, "learning_rate": 2.5584300100609116e-06, "loss": 0.5895, "step": 5191 }, { "epoch": 0.7744052502050861, "grad_norm": 1.3935058116912842, "learning_rate": 2.5552032583261867e-06, "loss": 0.4939, "step": 5192 }, { "epoch": 0.7745544037586696, "grad_norm": 1.5843981504440308, "learning_rate": 2.551978244626998e-06, "loss": 0.6299, "step": 5193 }, { "epoch": 0.7747035573122529, "grad_norm": 1.3542171716690063, "learning_rate": 2.548754969716248e-06, "loss": 0.5455, "step": 5194 }, { "epoch": 0.7748527108658364, "grad_norm": 1.3475044965744019, "learning_rate": 2.5455334343464246e-06, "loss": 0.5339, "step": 5195 }, { "epoch": 0.7750018644194198, "grad_norm": 1.5197460651397705, "learning_rate": 2.54231363926962e-06, "loss": 0.6214, "step": 5196 }, { "epoch": 0.7751510179730032, "grad_norm": 1.3530246019363403, "learning_rate": 2.5390955852375177e-06, "loss": 0.5754, "step": 5197 }, { "epoch": 0.7753001715265866, "grad_norm": 1.6530836820602417, "learning_rate": 2.5358792730013847e-06, "loss": 0.601, "step": 5198 }, { "epoch": 0.7754493250801701, "grad_norm": 1.321293830871582, "learning_rate": 2.532664703312099e-06, "loss": 0.554, "step": 5199 }, { "epoch": 0.7755984786337534, "grad_norm": 1.3287636041641235, "learning_rate": 2.5294518769201213e-06, "loss": 0.6683, "step": 5200 }, { "epoch": 0.7757476321873369, "grad_norm": 1.3595460653305054, "learning_rate": 2.5262407945755017e-06, "loss": 0.5179, "step": 5201 }, { "epoch": 0.7758967857409202, "grad_norm": 1.5078586339950562, "learning_rate": 2.5230314570278914e-06, "loss": 0.5516, "step": 5202 }, { "epoch": 0.7760459392945037, "grad_norm": 1.4131909608840942, "learning_rate": 2.5198238650265317e-06, "loss": 0.5717, "step": 5203 }, { "epoch": 0.7761950928480871, "grad_norm": 1.1875410079956055, "learning_rate": 2.5166180193202517e-06, "loss": 0.4859, "step": 5204 }, { "epoch": 0.7763442464016705, "grad_norm": 0.8127466440200806, "learning_rate": 2.5134139206574793e-06, "loss": 0.621, "step": 5205 }, { "epoch": 0.7764933999552539, "grad_norm": 1.3414584398269653, "learning_rate": 2.5102115697862304e-06, "loss": 0.5797, "step": 5206 }, { "epoch": 0.7766425535088374, "grad_norm": 1.3228057622909546, "learning_rate": 2.5070109674541155e-06, "loss": 0.5728, "step": 5207 }, { "epoch": 0.7767917070624207, "grad_norm": 1.296670913696289, "learning_rate": 2.503812114408336e-06, "loss": 0.5987, "step": 5208 }, { "epoch": 0.7769408606160042, "grad_norm": 0.8169787526130676, "learning_rate": 2.5006150113956874e-06, "loss": 0.605, "step": 5209 }, { "epoch": 0.7770900141695876, "grad_norm": 1.3678984642028809, "learning_rate": 2.4974196591625467e-06, "loss": 0.5759, "step": 5210 }, { "epoch": 0.777239167723171, "grad_norm": 1.414449691772461, "learning_rate": 2.494226058454894e-06, "loss": 0.5936, "step": 5211 }, { "epoch": 0.7773883212767544, "grad_norm": 1.351351022720337, "learning_rate": 2.491034210018295e-06, "loss": 0.5568, "step": 5212 }, { "epoch": 0.7775374748303379, "grad_norm": 1.2791587114334106, "learning_rate": 2.4878441145979115e-06, "loss": 0.4476, "step": 5213 }, { "epoch": 0.7776866283839212, "grad_norm": 1.2251204252243042, "learning_rate": 2.4846557729384835e-06, "loss": 0.477, "step": 5214 }, { "epoch": 0.7778357819375047, "grad_norm": 1.4526203870773315, "learning_rate": 2.4814691857843544e-06, "loss": 0.5559, "step": 5215 }, { "epoch": 0.777984935491088, "grad_norm": 1.485526204109192, "learning_rate": 2.478284353879453e-06, "loss": 0.6046, "step": 5216 }, { "epoch": 0.7781340890446715, "grad_norm": 1.2566992044448853, "learning_rate": 2.475101277967299e-06, "loss": 0.4362, "step": 5217 }, { "epoch": 0.7782832425982549, "grad_norm": 1.3190946578979492, "learning_rate": 2.471919958791e-06, "loss": 0.574, "step": 5218 }, { "epoch": 0.7784323961518383, "grad_norm": 1.4692132472991943, "learning_rate": 2.4687403970932622e-06, "loss": 0.5995, "step": 5219 }, { "epoch": 0.7785815497054217, "grad_norm": 1.1837316751480103, "learning_rate": 2.465562593616365e-06, "loss": 0.4951, "step": 5220 }, { "epoch": 0.7787307032590052, "grad_norm": 1.27358877658844, "learning_rate": 2.4623865491021913e-06, "loss": 0.5092, "step": 5221 }, { "epoch": 0.7788798568125885, "grad_norm": 1.3793076276779175, "learning_rate": 2.4592122642922134e-06, "loss": 0.5932, "step": 5222 }, { "epoch": 0.779029010366172, "grad_norm": 1.2855063676834106, "learning_rate": 2.456039739927479e-06, "loss": 0.548, "step": 5223 }, { "epoch": 0.7791781639197554, "grad_norm": 1.3413559198379517, "learning_rate": 2.452868976748639e-06, "loss": 0.5656, "step": 5224 }, { "epoch": 0.7793273174733388, "grad_norm": 1.384921669960022, "learning_rate": 2.449699975495934e-06, "loss": 0.5775, "step": 5225 }, { "epoch": 0.7794764710269222, "grad_norm": 1.3617887496948242, "learning_rate": 2.4465327369091784e-06, "loss": 0.5073, "step": 5226 }, { "epoch": 0.7796256245805057, "grad_norm": 1.281256079673767, "learning_rate": 2.4433672617277892e-06, "loss": 0.6165, "step": 5227 }, { "epoch": 0.779774778134089, "grad_norm": 1.2222532033920288, "learning_rate": 2.4402035506907697e-06, "loss": 0.5114, "step": 5228 }, { "epoch": 0.7799239316876725, "grad_norm": 1.3431360721588135, "learning_rate": 2.437041604536702e-06, "loss": 0.5994, "step": 5229 }, { "epoch": 0.7800730852412558, "grad_norm": 1.5590672492980957, "learning_rate": 2.4338814240037643e-06, "loss": 0.5018, "step": 5230 }, { "epoch": 0.7802222387948393, "grad_norm": 1.3060364723205566, "learning_rate": 2.430723009829724e-06, "loss": 0.5121, "step": 5231 }, { "epoch": 0.7803713923484227, "grad_norm": 0.8601388335227966, "learning_rate": 2.427566362751934e-06, "loss": 0.6313, "step": 5232 }, { "epoch": 0.7805205459020061, "grad_norm": 1.333444356918335, "learning_rate": 2.424411483507325e-06, "loss": 0.4984, "step": 5233 }, { "epoch": 0.7806696994555895, "grad_norm": 0.8535929918289185, "learning_rate": 2.4212583728324367e-06, "loss": 0.6358, "step": 5234 }, { "epoch": 0.780818853009173, "grad_norm": 1.3896667957305908, "learning_rate": 2.4181070314633727e-06, "loss": 0.564, "step": 5235 }, { "epoch": 0.7809680065627563, "grad_norm": 1.4592711925506592, "learning_rate": 2.4149574601358383e-06, "loss": 0.5998, "step": 5236 }, { "epoch": 0.7811171601163398, "grad_norm": 1.6252247095108032, "learning_rate": 2.4118096595851205e-06, "loss": 0.553, "step": 5237 }, { "epoch": 0.7812663136699232, "grad_norm": 1.3192996978759766, "learning_rate": 2.408663630546095e-06, "loss": 0.5058, "step": 5238 }, { "epoch": 0.7814154672235066, "grad_norm": 1.4381680488586426, "learning_rate": 2.405519373753219e-06, "loss": 0.5698, "step": 5239 }, { "epoch": 0.78156462077709, "grad_norm": 1.3194490671157837, "learning_rate": 2.4023768899405407e-06, "loss": 0.5273, "step": 5240 }, { "epoch": 0.7817137743306735, "grad_norm": 1.6805589199066162, "learning_rate": 2.3992361798416974e-06, "loss": 0.528, "step": 5241 }, { "epoch": 0.7818629278842568, "grad_norm": 1.3375550508499146, "learning_rate": 2.3960972441898976e-06, "loss": 0.5196, "step": 5242 }, { "epoch": 0.7820120814378403, "grad_norm": 1.507750153541565, "learning_rate": 2.392960083717957e-06, "loss": 0.4936, "step": 5243 }, { "epoch": 0.7821612349914236, "grad_norm": 1.3528618812561035, "learning_rate": 2.389824699158263e-06, "loss": 0.6147, "step": 5244 }, { "epoch": 0.7823103885450071, "grad_norm": 1.5838193893432617, "learning_rate": 2.3866910912427875e-06, "loss": 0.5908, "step": 5245 }, { "epoch": 0.7824595420985905, "grad_norm": 0.8093360662460327, "learning_rate": 2.383559260703093e-06, "loss": 0.6324, "step": 5246 }, { "epoch": 0.782608695652174, "grad_norm": 1.2933427095413208, "learning_rate": 2.3804292082703295e-06, "loss": 0.5986, "step": 5247 }, { "epoch": 0.7827578492057573, "grad_norm": 1.6442195177078247, "learning_rate": 2.3773009346752207e-06, "loss": 0.5341, "step": 5248 }, { "epoch": 0.7829070027593408, "grad_norm": 1.762033462524414, "learning_rate": 2.374174440648086e-06, "loss": 0.6027, "step": 5249 }, { "epoch": 0.7830561563129241, "grad_norm": 1.6093273162841797, "learning_rate": 2.3710497269188258e-06, "loss": 0.602, "step": 5250 }, { "epoch": 0.7832053098665076, "grad_norm": 1.4040088653564453, "learning_rate": 2.3679267942169237e-06, "loss": 0.5878, "step": 5251 }, { "epoch": 0.783354463420091, "grad_norm": 1.5786739587783813, "learning_rate": 2.3648056432714483e-06, "loss": 0.5335, "step": 5252 }, { "epoch": 0.7835036169736744, "grad_norm": 1.3005597591400146, "learning_rate": 2.361686274811056e-06, "loss": 0.5342, "step": 5253 }, { "epoch": 0.7836527705272578, "grad_norm": 1.6790833473205566, "learning_rate": 2.3585686895639757e-06, "loss": 0.5618, "step": 5254 }, { "epoch": 0.7838019240808413, "grad_norm": 1.3863986730575562, "learning_rate": 2.355452888258033e-06, "loss": 0.5598, "step": 5255 }, { "epoch": 0.7839510776344246, "grad_norm": 1.446117639541626, "learning_rate": 2.352338871620634e-06, "loss": 0.5667, "step": 5256 }, { "epoch": 0.7841002311880081, "grad_norm": 1.3882577419281006, "learning_rate": 2.349226640378759e-06, "loss": 0.6026, "step": 5257 }, { "epoch": 0.7842493847415914, "grad_norm": 1.2022180557250977, "learning_rate": 2.346116195258982e-06, "loss": 0.4887, "step": 5258 }, { "epoch": 0.7843985382951749, "grad_norm": 1.4076368808746338, "learning_rate": 2.3430075369874563e-06, "loss": 0.6202, "step": 5259 }, { "epoch": 0.7845476918487583, "grad_norm": 1.6472328901290894, "learning_rate": 2.339900666289918e-06, "loss": 0.5933, "step": 5260 }, { "epoch": 0.7846968454023417, "grad_norm": 1.602686882019043, "learning_rate": 2.3367955838916855e-06, "loss": 0.5846, "step": 5261 }, { "epoch": 0.7848459989559251, "grad_norm": 1.336650013923645, "learning_rate": 2.333692290517664e-06, "loss": 0.5332, "step": 5262 }, { "epoch": 0.7849951525095086, "grad_norm": 1.348346471786499, "learning_rate": 2.3305907868923306e-06, "loss": 0.5421, "step": 5263 }, { "epoch": 0.7851443060630919, "grad_norm": 1.5523875951766968, "learning_rate": 2.327491073739755e-06, "loss": 0.5787, "step": 5264 }, { "epoch": 0.7852934596166754, "grad_norm": 1.4518197774887085, "learning_rate": 2.324393151783585e-06, "loss": 0.6896, "step": 5265 }, { "epoch": 0.7854426131702588, "grad_norm": 1.4188933372497559, "learning_rate": 2.321297021747052e-06, "loss": 0.5583, "step": 5266 }, { "epoch": 0.7855917667238422, "grad_norm": 1.3180091381072998, "learning_rate": 2.318202684352964e-06, "loss": 0.6193, "step": 5267 }, { "epoch": 0.7857409202774256, "grad_norm": 1.4848312139511108, "learning_rate": 2.315110140323713e-06, "loss": 0.5412, "step": 5268 }, { "epoch": 0.7858900738310091, "grad_norm": 1.360925555229187, "learning_rate": 2.312019390381277e-06, "loss": 0.6322, "step": 5269 }, { "epoch": 0.7860392273845924, "grad_norm": 1.5018075704574585, "learning_rate": 2.3089304352472095e-06, "loss": 0.5794, "step": 5270 }, { "epoch": 0.7861883809381759, "grad_norm": 1.3709274530410767, "learning_rate": 2.3058432756426473e-06, "loss": 0.5101, "step": 5271 }, { "epoch": 0.7863375344917592, "grad_norm": 1.4235382080078125, "learning_rate": 2.3027579122883114e-06, "loss": 0.545, "step": 5272 }, { "epoch": 0.7864866880453427, "grad_norm": 1.4714082479476929, "learning_rate": 2.2996743459044925e-06, "loss": 0.6167, "step": 5273 }, { "epoch": 0.7866358415989261, "grad_norm": 1.369319200515747, "learning_rate": 2.296592577211072e-06, "loss": 0.5495, "step": 5274 }, { "epoch": 0.7867849951525095, "grad_norm": 1.2560553550720215, "learning_rate": 2.2935126069275116e-06, "loss": 0.5525, "step": 5275 }, { "epoch": 0.7869341487060929, "grad_norm": 1.2764402627944946, "learning_rate": 2.290434435772845e-06, "loss": 0.5779, "step": 5276 }, { "epoch": 0.7870833022596764, "grad_norm": 1.314451813697815, "learning_rate": 2.28735806446569e-06, "loss": 0.5194, "step": 5277 }, { "epoch": 0.7872324558132597, "grad_norm": 1.4680525064468384, "learning_rate": 2.284283493724255e-06, "loss": 0.5355, "step": 5278 }, { "epoch": 0.7873816093668432, "grad_norm": 1.3844728469848633, "learning_rate": 2.2812107242663082e-06, "loss": 0.6151, "step": 5279 }, { "epoch": 0.7875307629204266, "grad_norm": 1.383569359779358, "learning_rate": 2.2781397568092113e-06, "loss": 0.6133, "step": 5280 }, { "epoch": 0.78767991647401, "grad_norm": 1.3555145263671875, "learning_rate": 2.2750705920699044e-06, "loss": 0.5979, "step": 5281 }, { "epoch": 0.7878290700275934, "grad_norm": 1.2972056865692139, "learning_rate": 2.2720032307648967e-06, "loss": 0.5889, "step": 5282 }, { "epoch": 0.7879782235811769, "grad_norm": 1.2120038270950317, "learning_rate": 2.2689376736102874e-06, "loss": 0.5655, "step": 5283 }, { "epoch": 0.7881273771347602, "grad_norm": 1.2861783504486084, "learning_rate": 2.2658739213217496e-06, "loss": 0.5234, "step": 5284 }, { "epoch": 0.7882765306883437, "grad_norm": 1.4208217859268188, "learning_rate": 2.262811974614537e-06, "loss": 0.5848, "step": 5285 }, { "epoch": 0.788425684241927, "grad_norm": 1.0834152698516846, "learning_rate": 2.2597518342034797e-06, "loss": 0.486, "step": 5286 }, { "epoch": 0.7885748377955105, "grad_norm": 1.3972746133804321, "learning_rate": 2.25669350080299e-06, "loss": 0.6091, "step": 5287 }, { "epoch": 0.7887239913490939, "grad_norm": 1.2654223442077637, "learning_rate": 2.2536369751270514e-06, "loss": 0.5818, "step": 5288 }, { "epoch": 0.7888731449026773, "grad_norm": 1.6365089416503906, "learning_rate": 2.25058225788923e-06, "loss": 0.509, "step": 5289 }, { "epoch": 0.7890222984562607, "grad_norm": 1.315350890159607, "learning_rate": 2.2475293498026697e-06, "loss": 0.5874, "step": 5290 }, { "epoch": 0.7891714520098442, "grad_norm": 1.3517100811004639, "learning_rate": 2.2444782515800946e-06, "loss": 0.4831, "step": 5291 }, { "epoch": 0.7893206055634275, "grad_norm": 1.510596513748169, "learning_rate": 2.2414289639337983e-06, "loss": 0.5306, "step": 5292 }, { "epoch": 0.789469759117011, "grad_norm": 1.268338918685913, "learning_rate": 2.2383814875756583e-06, "loss": 0.5241, "step": 5293 }, { "epoch": 0.7896189126705944, "grad_norm": 2.313830852508545, "learning_rate": 2.235335823217127e-06, "loss": 0.5781, "step": 5294 }, { "epoch": 0.7897680662241778, "grad_norm": 1.6137412786483765, "learning_rate": 2.2322919715692358e-06, "loss": 0.5465, "step": 5295 }, { "epoch": 0.7899172197777612, "grad_norm": 1.4577363729476929, "learning_rate": 2.229249933342591e-06, "loss": 0.4988, "step": 5296 }, { "epoch": 0.7900663733313447, "grad_norm": 1.372381567955017, "learning_rate": 2.2262097092473776e-06, "loss": 0.5681, "step": 5297 }, { "epoch": 0.790215526884928, "grad_norm": 1.3930693864822388, "learning_rate": 2.2231712999933506e-06, "loss": 0.6128, "step": 5298 }, { "epoch": 0.7903646804385115, "grad_norm": 1.3021295070648193, "learning_rate": 2.2201347062898505e-06, "loss": 0.4742, "step": 5299 }, { "epoch": 0.7905138339920948, "grad_norm": 1.4213265180587769, "learning_rate": 2.2170999288457896e-06, "loss": 0.6046, "step": 5300 }, { "epoch": 0.7906629875456783, "grad_norm": 1.351348876953125, "learning_rate": 2.2140669683696513e-06, "loss": 0.4971, "step": 5301 }, { "epoch": 0.7908121410992617, "grad_norm": 0.8518800139427185, "learning_rate": 2.211035825569503e-06, "loss": 0.62, "step": 5302 }, { "epoch": 0.7909612946528451, "grad_norm": 1.6113687753677368, "learning_rate": 2.2080065011529848e-06, "loss": 0.555, "step": 5303 }, { "epoch": 0.7911104482064285, "grad_norm": 1.2956067323684692, "learning_rate": 2.2049789958273117e-06, "loss": 0.5251, "step": 5304 }, { "epoch": 0.791259601760012, "grad_norm": 1.390625, "learning_rate": 2.201953310299274e-06, "loss": 0.6124, "step": 5305 }, { "epoch": 0.7914087553135953, "grad_norm": 1.3518092632293701, "learning_rate": 2.1989294452752398e-06, "loss": 0.5441, "step": 5306 }, { "epoch": 0.7915579088671788, "grad_norm": 1.4385831356048584, "learning_rate": 2.1959074014611447e-06, "loss": 0.4812, "step": 5307 }, { "epoch": 0.7917070624207622, "grad_norm": 1.4919548034667969, "learning_rate": 2.192887179562506e-06, "loss": 0.6073, "step": 5308 }, { "epoch": 0.7918562159743456, "grad_norm": 1.7877964973449707, "learning_rate": 2.1898687802844187e-06, "loss": 0.5619, "step": 5309 }, { "epoch": 0.792005369527929, "grad_norm": 1.5276291370391846, "learning_rate": 2.186852204331541e-06, "loss": 0.5779, "step": 5310 }, { "epoch": 0.7921545230815125, "grad_norm": 1.6744434833526611, "learning_rate": 2.183837452408113e-06, "loss": 0.5452, "step": 5311 }, { "epoch": 0.7923036766350958, "grad_norm": 2.231724739074707, "learning_rate": 2.1808245252179503e-06, "loss": 0.5597, "step": 5312 }, { "epoch": 0.7924528301886793, "grad_norm": 1.4130058288574219, "learning_rate": 2.177813423464439e-06, "loss": 0.5463, "step": 5313 }, { "epoch": 0.7926019837422627, "grad_norm": 1.456976294517517, "learning_rate": 2.1748041478505386e-06, "loss": 0.5316, "step": 5314 }, { "epoch": 0.7927511372958461, "grad_norm": 1.5406992435455322, "learning_rate": 2.1717966990787877e-06, "loss": 0.5445, "step": 5315 }, { "epoch": 0.7929002908494295, "grad_norm": 1.5238075256347656, "learning_rate": 2.168791077851293e-06, "loss": 0.572, "step": 5316 }, { "epoch": 0.793049444403013, "grad_norm": 1.3241057395935059, "learning_rate": 2.1657872848697336e-06, "loss": 0.4918, "step": 5317 }, { "epoch": 0.7931985979565963, "grad_norm": 1.471045970916748, "learning_rate": 2.1627853208353655e-06, "loss": 0.5756, "step": 5318 }, { "epoch": 0.7933477515101798, "grad_norm": 1.532816767692566, "learning_rate": 2.1597851864490193e-06, "loss": 0.5358, "step": 5319 }, { "epoch": 0.7934969050637631, "grad_norm": 1.5594377517700195, "learning_rate": 2.156786882411087e-06, "loss": 0.5386, "step": 5320 }, { "epoch": 0.7936460586173466, "grad_norm": 1.5323362350463867, "learning_rate": 2.1537904094215512e-06, "loss": 0.534, "step": 5321 }, { "epoch": 0.79379521217093, "grad_norm": 1.6237925291061401, "learning_rate": 2.1507957681799574e-06, "loss": 0.5331, "step": 5322 }, { "epoch": 0.7939443657245134, "grad_norm": 1.4443899393081665, "learning_rate": 2.147802959385419e-06, "loss": 0.527, "step": 5323 }, { "epoch": 0.7940935192780968, "grad_norm": 1.3324673175811768, "learning_rate": 2.1448119837366266e-06, "loss": 0.567, "step": 5324 }, { "epoch": 0.7942426728316803, "grad_norm": 1.4919627904891968, "learning_rate": 2.1418228419318486e-06, "loss": 0.5588, "step": 5325 }, { "epoch": 0.7943918263852636, "grad_norm": 1.5347496271133423, "learning_rate": 2.1388355346689118e-06, "loss": 0.5432, "step": 5326 }, { "epoch": 0.7945409799388471, "grad_norm": 1.455069661140442, "learning_rate": 2.135850062645225e-06, "loss": 0.5484, "step": 5327 }, { "epoch": 0.7946901334924305, "grad_norm": 1.824220895767212, "learning_rate": 2.1328664265577694e-06, "loss": 0.5265, "step": 5328 }, { "epoch": 0.7948392870460139, "grad_norm": 1.4352052211761475, "learning_rate": 2.1298846271030847e-06, "loss": 0.4686, "step": 5329 }, { "epoch": 0.7949884405995973, "grad_norm": 1.4160393476486206, "learning_rate": 2.126904664977302e-06, "loss": 0.546, "step": 5330 }, { "epoch": 0.7951375941531807, "grad_norm": 1.3659803867340088, "learning_rate": 2.123926540876109e-06, "loss": 0.5536, "step": 5331 }, { "epoch": 0.7952867477067641, "grad_norm": 1.391606330871582, "learning_rate": 2.1209502554947636e-06, "loss": 0.5993, "step": 5332 }, { "epoch": 0.7954359012603476, "grad_norm": 1.397342562675476, "learning_rate": 2.1179758095281023e-06, "loss": 0.6161, "step": 5333 }, { "epoch": 0.7955850548139309, "grad_norm": 0.8358104825019836, "learning_rate": 2.1150032036705316e-06, "loss": 0.6544, "step": 5334 }, { "epoch": 0.7957342083675144, "grad_norm": 1.552930474281311, "learning_rate": 2.1120324386160187e-06, "loss": 0.6659, "step": 5335 }, { "epoch": 0.7958833619210978, "grad_norm": 1.308136224746704, "learning_rate": 2.109063515058111e-06, "loss": 0.4519, "step": 5336 }, { "epoch": 0.7960325154746812, "grad_norm": 0.7676651477813721, "learning_rate": 2.1060964336899216e-06, "loss": 0.5819, "step": 5337 }, { "epoch": 0.7961816690282646, "grad_norm": 1.3321679830551147, "learning_rate": 2.1031311952041366e-06, "loss": 0.5577, "step": 5338 }, { "epoch": 0.7963308225818481, "grad_norm": 1.3564836978912354, "learning_rate": 2.1001678002930093e-06, "loss": 0.5074, "step": 5339 }, { "epoch": 0.7964799761354314, "grad_norm": 1.2441413402557373, "learning_rate": 2.0972062496483657e-06, "loss": 0.5297, "step": 5340 }, { "epoch": 0.7966291296890149, "grad_norm": 1.5339916944503784, "learning_rate": 2.0942465439615935e-06, "loss": 0.6121, "step": 5341 }, { "epoch": 0.7967782832425983, "grad_norm": 1.4733195304870605, "learning_rate": 2.0912886839236567e-06, "loss": 0.5804, "step": 5342 }, { "epoch": 0.7969274367961817, "grad_norm": 0.7637775540351868, "learning_rate": 2.0883326702250885e-06, "loss": 0.5954, "step": 5343 }, { "epoch": 0.7970765903497651, "grad_norm": 1.2524665594100952, "learning_rate": 2.0853785035559903e-06, "loss": 0.5952, "step": 5344 }, { "epoch": 0.7972257439033485, "grad_norm": 1.2226752042770386, "learning_rate": 2.082426184606027e-06, "loss": 0.5294, "step": 5345 }, { "epoch": 0.7973748974569319, "grad_norm": 1.894169569015503, "learning_rate": 2.0794757140644397e-06, "loss": 0.5189, "step": 5346 }, { "epoch": 0.7975240510105154, "grad_norm": 2.56071400642395, "learning_rate": 2.076527092620032e-06, "loss": 0.535, "step": 5347 }, { "epoch": 0.7976732045640987, "grad_norm": 1.7337512969970703, "learning_rate": 2.0735803209611805e-06, "loss": 0.5629, "step": 5348 }, { "epoch": 0.7978223581176822, "grad_norm": 1.5834839344024658, "learning_rate": 2.070635399775828e-06, "loss": 0.5418, "step": 5349 }, { "epoch": 0.7979715116712656, "grad_norm": 1.5834046602249146, "learning_rate": 2.0676923297514874e-06, "loss": 0.6259, "step": 5350 }, { "epoch": 0.798120665224849, "grad_norm": 1.3963849544525146, "learning_rate": 2.064751111575232e-06, "loss": 0.5508, "step": 5351 }, { "epoch": 0.7982698187784324, "grad_norm": 1.2368018627166748, "learning_rate": 2.0618117459337107e-06, "loss": 0.5532, "step": 5352 }, { "epoch": 0.7984189723320159, "grad_norm": 1.2956523895263672, "learning_rate": 2.0588742335131397e-06, "loss": 0.577, "step": 5353 }, { "epoch": 0.7985681258855992, "grad_norm": 1.309914231300354, "learning_rate": 2.0559385749992956e-06, "loss": 0.5006, "step": 5354 }, { "epoch": 0.7987172794391827, "grad_norm": 1.5283100605010986, "learning_rate": 2.053004771077525e-06, "loss": 0.5319, "step": 5355 }, { "epoch": 0.798866432992766, "grad_norm": 1.2941631078720093, "learning_rate": 2.0500728224327537e-06, "loss": 0.5287, "step": 5356 }, { "epoch": 0.7990155865463495, "grad_norm": 1.326649785041809, "learning_rate": 2.047142729749454e-06, "loss": 0.5454, "step": 5357 }, { "epoch": 0.7991647400999329, "grad_norm": 1.4385441541671753, "learning_rate": 2.044214493711677e-06, "loss": 0.538, "step": 5358 }, { "epoch": 0.7993138936535163, "grad_norm": 1.4578192234039307, "learning_rate": 2.041288115003043e-06, "loss": 0.6216, "step": 5359 }, { "epoch": 0.7994630472070997, "grad_norm": 1.3557178974151611, "learning_rate": 2.038363594306727e-06, "loss": 0.4985, "step": 5360 }, { "epoch": 0.7996122007606832, "grad_norm": 1.4426599740982056, "learning_rate": 2.0354409323054814e-06, "loss": 0.5707, "step": 5361 }, { "epoch": 0.7997613543142665, "grad_norm": 1.3663747310638428, "learning_rate": 2.0325201296816177e-06, "loss": 0.6459, "step": 5362 }, { "epoch": 0.79991050786785, "grad_norm": 1.2277848720550537, "learning_rate": 2.0296011871170208e-06, "loss": 0.5485, "step": 5363 }, { "epoch": 0.8000596614214334, "grad_norm": 1.5503143072128296, "learning_rate": 2.0266841052931275e-06, "loss": 0.5095, "step": 5364 }, { "epoch": 0.8002088149750167, "grad_norm": 1.785555362701416, "learning_rate": 2.0237688848909607e-06, "loss": 0.5557, "step": 5365 }, { "epoch": 0.8003579685286002, "grad_norm": 1.4574062824249268, "learning_rate": 2.020855526591089e-06, "loss": 0.6088, "step": 5366 }, { "epoch": 0.8005071220821836, "grad_norm": 1.4183381795883179, "learning_rate": 2.0179440310736575e-06, "loss": 0.5772, "step": 5367 }, { "epoch": 0.800656275635767, "grad_norm": 1.8792822360992432, "learning_rate": 2.015034399018373e-06, "loss": 0.5943, "step": 5368 }, { "epoch": 0.8008054291893504, "grad_norm": 1.3521884679794312, "learning_rate": 2.0121266311045106e-06, "loss": 0.5973, "step": 5369 }, { "epoch": 0.8009545827429339, "grad_norm": 1.6885976791381836, "learning_rate": 2.009220728010901e-06, "loss": 0.5618, "step": 5370 }, { "epoch": 0.8011037362965172, "grad_norm": 1.2928786277770996, "learning_rate": 2.0063166904159516e-06, "loss": 0.5681, "step": 5371 }, { "epoch": 0.8012528898501007, "grad_norm": 1.4075114727020264, "learning_rate": 2.0034145189976275e-06, "loss": 0.5956, "step": 5372 }, { "epoch": 0.801402043403684, "grad_norm": 1.5228734016418457, "learning_rate": 2.0005142144334533e-06, "loss": 0.5662, "step": 5373 }, { "epoch": 0.8015511969572675, "grad_norm": 2.909062385559082, "learning_rate": 1.9976157774005323e-06, "loss": 0.65, "step": 5374 }, { "epoch": 0.8017003505108509, "grad_norm": 1.42556631565094, "learning_rate": 1.994719208575522e-06, "loss": 0.5362, "step": 5375 }, { "epoch": 0.8018495040644343, "grad_norm": 1.4417308568954468, "learning_rate": 1.9918245086346387e-06, "loss": 0.568, "step": 5376 }, { "epoch": 0.8019986576180177, "grad_norm": 1.5643653869628906, "learning_rate": 1.9889316782536737e-06, "loss": 0.6667, "step": 5377 }, { "epoch": 0.8021478111716012, "grad_norm": 1.3208990097045898, "learning_rate": 1.9860407181079787e-06, "loss": 0.4969, "step": 5378 }, { "epoch": 0.8022969647251845, "grad_norm": 1.3318156003952026, "learning_rate": 1.9831516288724607e-06, "loss": 0.5413, "step": 5379 }, { "epoch": 0.802446118278768, "grad_norm": 1.7786526679992676, "learning_rate": 1.9802644112215996e-06, "loss": 0.5273, "step": 5380 }, { "epoch": 0.8025952718323514, "grad_norm": 1.3596200942993164, "learning_rate": 1.9773790658294368e-06, "loss": 0.5494, "step": 5381 }, { "epoch": 0.8027444253859348, "grad_norm": 1.3125089406967163, "learning_rate": 1.9744955933695663e-06, "loss": 0.5547, "step": 5382 }, { "epoch": 0.8028935789395182, "grad_norm": 1.363503336906433, "learning_rate": 1.9716139945151634e-06, "loss": 0.5437, "step": 5383 }, { "epoch": 0.8030427324931017, "grad_norm": 1.4180288314819336, "learning_rate": 1.9687342699389542e-06, "loss": 0.5607, "step": 5384 }, { "epoch": 0.803191886046685, "grad_norm": 1.2843005657196045, "learning_rate": 1.9658564203132235e-06, "loss": 0.5377, "step": 5385 }, { "epoch": 0.8033410396002685, "grad_norm": 1.2832924127578735, "learning_rate": 1.962980446309827e-06, "loss": 0.5514, "step": 5386 }, { "epoch": 0.8034901931538518, "grad_norm": 1.3043702840805054, "learning_rate": 1.9601063486001815e-06, "loss": 0.5539, "step": 5387 }, { "epoch": 0.8036393467074353, "grad_norm": 5.5018205642700195, "learning_rate": 1.9572341278552575e-06, "loss": 0.5055, "step": 5388 }, { "epoch": 0.8037885002610187, "grad_norm": 1.4173887968063354, "learning_rate": 1.9543637847455976e-06, "loss": 0.6183, "step": 5389 }, { "epoch": 0.8039376538146021, "grad_norm": 1.5022988319396973, "learning_rate": 1.9514953199413013e-06, "loss": 0.5206, "step": 5390 }, { "epoch": 0.8040868073681855, "grad_norm": 1.3904963731765747, "learning_rate": 1.948628734112029e-06, "loss": 0.4991, "step": 5391 }, { "epoch": 0.804235960921769, "grad_norm": 1.5608768463134766, "learning_rate": 1.9457640279270053e-06, "loss": 0.6252, "step": 5392 }, { "epoch": 0.8043851144753523, "grad_norm": 1.3646959066390991, "learning_rate": 1.942901202055015e-06, "loss": 0.5644, "step": 5393 }, { "epoch": 0.8045342680289358, "grad_norm": 1.939101219177246, "learning_rate": 1.9400402571644005e-06, "loss": 0.5344, "step": 5394 }, { "epoch": 0.8046834215825192, "grad_norm": 1.4021594524383545, "learning_rate": 1.937181193923068e-06, "loss": 0.5601, "step": 5395 }, { "epoch": 0.8048325751361026, "grad_norm": 1.476307988166809, "learning_rate": 1.9343240129984843e-06, "loss": 0.521, "step": 5396 }, { "epoch": 0.804981728689686, "grad_norm": 1.442497730255127, "learning_rate": 1.9314687150576806e-06, "loss": 0.6247, "step": 5397 }, { "epoch": 0.8051308822432695, "grad_norm": 1.7611860036849976, "learning_rate": 1.928615300767237e-06, "loss": 0.5241, "step": 5398 }, { "epoch": 0.8052800357968528, "grad_norm": 1.2152632474899292, "learning_rate": 1.9257637707933043e-06, "loss": 0.5683, "step": 5399 }, { "epoch": 0.8054291893504363, "grad_norm": 0.8156342506408691, "learning_rate": 1.922914125801596e-06, "loss": 0.6621, "step": 5400 }, { "epoch": 0.8055783429040196, "grad_norm": 1.2813048362731934, "learning_rate": 1.920066366457374e-06, "loss": 0.632, "step": 5401 }, { "epoch": 0.8057274964576031, "grad_norm": 2.234842300415039, "learning_rate": 1.917220493425467e-06, "loss": 0.4828, "step": 5402 }, { "epoch": 0.8058766500111865, "grad_norm": 1.4027583599090576, "learning_rate": 1.9143765073702646e-06, "loss": 0.53, "step": 5403 }, { "epoch": 0.8060258035647699, "grad_norm": 1.393050193786621, "learning_rate": 1.911534408955711e-06, "loss": 0.5494, "step": 5404 }, { "epoch": 0.8061749571183533, "grad_norm": 1.3360519409179688, "learning_rate": 1.908694198845312e-06, "loss": 0.6209, "step": 5405 }, { "epoch": 0.8063241106719368, "grad_norm": 1.687534213066101, "learning_rate": 1.9058558777021363e-06, "loss": 0.5516, "step": 5406 }, { "epoch": 0.8064732642255201, "grad_norm": 1.1738133430480957, "learning_rate": 1.9030194461888041e-06, "loss": 0.4871, "step": 5407 }, { "epoch": 0.8066224177791036, "grad_norm": 0.7845811247825623, "learning_rate": 1.900184904967498e-06, "loss": 0.6057, "step": 5408 }, { "epoch": 0.806771571332687, "grad_norm": 1.308988094329834, "learning_rate": 1.8973522546999667e-06, "loss": 0.4874, "step": 5409 }, { "epoch": 0.8069207248862704, "grad_norm": 1.256949543952942, "learning_rate": 1.8945214960475034e-06, "loss": 0.6074, "step": 5410 }, { "epoch": 0.8070698784398538, "grad_norm": 1.4047439098358154, "learning_rate": 1.8916926296709692e-06, "loss": 0.5799, "step": 5411 }, { "epoch": 0.8072190319934373, "grad_norm": 1.2174333333969116, "learning_rate": 1.8888656562307849e-06, "loss": 0.5972, "step": 5412 }, { "epoch": 0.8073681855470206, "grad_norm": 1.4411662817001343, "learning_rate": 1.8860405763869183e-06, "loss": 0.5179, "step": 5413 }, { "epoch": 0.8075173391006041, "grad_norm": 1.2931163311004639, "learning_rate": 1.883217390798907e-06, "loss": 0.597, "step": 5414 }, { "epoch": 0.8076664926541874, "grad_norm": 1.3941041231155396, "learning_rate": 1.8803961001258408e-06, "loss": 0.5591, "step": 5415 }, { "epoch": 0.8078156462077709, "grad_norm": 1.2986332178115845, "learning_rate": 1.8775767050263683e-06, "loss": 0.4727, "step": 5416 }, { "epoch": 0.8079647997613543, "grad_norm": 1.3475617170333862, "learning_rate": 1.874759206158695e-06, "loss": 0.5662, "step": 5417 }, { "epoch": 0.8081139533149377, "grad_norm": 1.8921928405761719, "learning_rate": 1.8719436041805872e-06, "loss": 0.5362, "step": 5418 }, { "epoch": 0.8082631068685211, "grad_norm": 0.8141233325004578, "learning_rate": 1.869129899749359e-06, "loss": 0.5936, "step": 5419 }, { "epoch": 0.8084122604221046, "grad_norm": 1.2384110689163208, "learning_rate": 1.8663180935218927e-06, "loss": 0.4927, "step": 5420 }, { "epoch": 0.8085614139756879, "grad_norm": 1.801287293434143, "learning_rate": 1.86350818615462e-06, "loss": 0.5696, "step": 5421 }, { "epoch": 0.8087105675292714, "grad_norm": 1.2956717014312744, "learning_rate": 1.860700178303535e-06, "loss": 0.5551, "step": 5422 }, { "epoch": 0.8088597210828548, "grad_norm": 1.3395076990127563, "learning_rate": 1.857894070624181e-06, "loss": 0.5569, "step": 5423 }, { "epoch": 0.8090088746364382, "grad_norm": 1.2338849306106567, "learning_rate": 1.855089863771663e-06, "loss": 0.6246, "step": 5424 }, { "epoch": 0.8091580281900216, "grad_norm": 1.4475047588348389, "learning_rate": 1.8522875584006417e-06, "loss": 0.4564, "step": 5425 }, { "epoch": 0.809307181743605, "grad_norm": 1.2626008987426758, "learning_rate": 1.8494871551653338e-06, "loss": 0.5965, "step": 5426 }, { "epoch": 0.8094563352971884, "grad_norm": 1.3251060247421265, "learning_rate": 1.8466886547195106e-06, "loss": 0.532, "step": 5427 }, { "epoch": 0.8096054888507719, "grad_norm": 1.4388395547866821, "learning_rate": 1.8438920577165032e-06, "loss": 0.5796, "step": 5428 }, { "epoch": 0.8097546424043552, "grad_norm": 1.5890318155288696, "learning_rate": 1.84109736480919e-06, "loss": 0.5185, "step": 5429 }, { "epoch": 0.8099037959579387, "grad_norm": 1.1938605308532715, "learning_rate": 1.8383045766500117e-06, "loss": 0.485, "step": 5430 }, { "epoch": 0.8100529495115221, "grad_norm": 1.3776283264160156, "learning_rate": 1.8355136938909656e-06, "loss": 0.5493, "step": 5431 }, { "epoch": 0.8102021030651055, "grad_norm": 1.2458701133728027, "learning_rate": 1.8327247171835961e-06, "loss": 0.4669, "step": 5432 }, { "epoch": 0.8103512566186889, "grad_norm": 1.2303776741027832, "learning_rate": 1.8299376471790097e-06, "loss": 0.5657, "step": 5433 }, { "epoch": 0.8105004101722724, "grad_norm": 1.3308441638946533, "learning_rate": 1.8271524845278676e-06, "loss": 0.5836, "step": 5434 }, { "epoch": 0.8106495637258557, "grad_norm": 1.3241713047027588, "learning_rate": 1.8243692298803816e-06, "loss": 0.5017, "step": 5435 }, { "epoch": 0.8107987172794392, "grad_norm": 1.5172103643417358, "learning_rate": 1.821587883886321e-06, "loss": 0.5491, "step": 5436 }, { "epoch": 0.8109478708330226, "grad_norm": 1.316053867340088, "learning_rate": 1.818808447195013e-06, "loss": 0.5638, "step": 5437 }, { "epoch": 0.811097024386606, "grad_norm": 1.416364312171936, "learning_rate": 1.8160309204553272e-06, "loss": 0.5272, "step": 5438 }, { "epoch": 0.8112461779401894, "grad_norm": 1.4186018705368042, "learning_rate": 1.8132553043156997e-06, "loss": 0.618, "step": 5439 }, { "epoch": 0.8113953314937729, "grad_norm": 1.4214874505996704, "learning_rate": 1.8104815994241155e-06, "loss": 0.586, "step": 5440 }, { "epoch": 0.8115444850473562, "grad_norm": 1.1974064111709595, "learning_rate": 1.807709806428115e-06, "loss": 0.5274, "step": 5441 }, { "epoch": 0.8116936386009397, "grad_norm": 1.4096477031707764, "learning_rate": 1.8049399259747869e-06, "loss": 0.548, "step": 5442 }, { "epoch": 0.811842792154523, "grad_norm": 1.4086158275604248, "learning_rate": 1.8021719587107811e-06, "loss": 0.5819, "step": 5443 }, { "epoch": 0.8119919457081065, "grad_norm": 1.4537659883499146, "learning_rate": 1.7994059052822953e-06, "loss": 0.5154, "step": 5444 }, { "epoch": 0.8121410992616899, "grad_norm": 1.6114003658294678, "learning_rate": 1.7966417663350843e-06, "loss": 0.5696, "step": 5445 }, { "epoch": 0.8122902528152733, "grad_norm": 1.3784832954406738, "learning_rate": 1.7938795425144529e-06, "loss": 0.511, "step": 5446 }, { "epoch": 0.8124394063688567, "grad_norm": 1.283471941947937, "learning_rate": 1.7911192344652616e-06, "loss": 0.538, "step": 5447 }, { "epoch": 0.8125885599224402, "grad_norm": 1.3559801578521729, "learning_rate": 1.78836084283192e-06, "loss": 0.5633, "step": 5448 }, { "epoch": 0.8127377134760235, "grad_norm": 1.3972926139831543, "learning_rate": 1.7856043682583913e-06, "loss": 0.5057, "step": 5449 }, { "epoch": 0.812886867029607, "grad_norm": 1.4664664268493652, "learning_rate": 1.7828498113881976e-06, "loss": 0.6211, "step": 5450 }, { "epoch": 0.8130360205831904, "grad_norm": 1.424375295639038, "learning_rate": 1.780097172864399e-06, "loss": 0.5096, "step": 5451 }, { "epoch": 0.8131851741367738, "grad_norm": 0.7942729592323303, "learning_rate": 1.7773464533296237e-06, "loss": 0.573, "step": 5452 }, { "epoch": 0.8133343276903572, "grad_norm": 1.6492873430252075, "learning_rate": 1.7745976534260457e-06, "loss": 0.6034, "step": 5453 }, { "epoch": 0.8134834812439407, "grad_norm": 1.3182193040847778, "learning_rate": 1.7718507737953838e-06, "loss": 0.5549, "step": 5454 }, { "epoch": 0.813632634797524, "grad_norm": 1.3409324884414673, "learning_rate": 1.7691058150789186e-06, "loss": 0.6323, "step": 5455 }, { "epoch": 0.8137817883511075, "grad_norm": 1.3404929637908936, "learning_rate": 1.7663627779174797e-06, "loss": 0.5811, "step": 5456 }, { "epoch": 0.8139309419046908, "grad_norm": 2.0822787284851074, "learning_rate": 1.7636216629514435e-06, "loss": 0.6193, "step": 5457 }, { "epoch": 0.8140800954582743, "grad_norm": 1.6253204345703125, "learning_rate": 1.7608824708207405e-06, "loss": 0.636, "step": 5458 }, { "epoch": 0.8142292490118577, "grad_norm": 1.321102499961853, "learning_rate": 1.758145202164857e-06, "loss": 0.6516, "step": 5459 }, { "epoch": 0.8143784025654411, "grad_norm": 1.4720309972763062, "learning_rate": 1.7554098576228185e-06, "loss": 0.5823, "step": 5460 }, { "epoch": 0.8145275561190245, "grad_norm": 1.3053042888641357, "learning_rate": 1.752676437833216e-06, "loss": 0.4958, "step": 5461 }, { "epoch": 0.814676709672608, "grad_norm": 1.2485800981521606, "learning_rate": 1.7499449434341843e-06, "loss": 0.5098, "step": 5462 }, { "epoch": 0.8148258632261913, "grad_norm": 1.314996361732483, "learning_rate": 1.7472153750634014e-06, "loss": 0.5406, "step": 5463 }, { "epoch": 0.8149750167797748, "grad_norm": 1.383793830871582, "learning_rate": 1.7444877333581067e-06, "loss": 0.525, "step": 5464 }, { "epoch": 0.8151241703333582, "grad_norm": 1.4460910558700562, "learning_rate": 1.7417620189550877e-06, "loss": 0.5894, "step": 5465 }, { "epoch": 0.8152733238869416, "grad_norm": 1.6057627201080322, "learning_rate": 1.7390382324906752e-06, "loss": 0.5763, "step": 5466 }, { "epoch": 0.815422477440525, "grad_norm": 1.4397624731063843, "learning_rate": 1.7363163746007572e-06, "loss": 0.569, "step": 5467 }, { "epoch": 0.8155716309941085, "grad_norm": 1.515162467956543, "learning_rate": 1.7335964459207688e-06, "loss": 0.5768, "step": 5468 }, { "epoch": 0.8157207845476918, "grad_norm": 1.4212640523910522, "learning_rate": 1.7308784470856944e-06, "loss": 0.5669, "step": 5469 }, { "epoch": 0.8158699381012753, "grad_norm": 1.6380707025527954, "learning_rate": 1.7281623787300672e-06, "loss": 0.5209, "step": 5470 }, { "epoch": 0.8160190916548586, "grad_norm": 1.400361180305481, "learning_rate": 1.725448241487976e-06, "loss": 0.5923, "step": 5471 }, { "epoch": 0.8161682452084421, "grad_norm": 1.3569741249084473, "learning_rate": 1.7227360359930468e-06, "loss": 0.5949, "step": 5472 }, { "epoch": 0.8163173987620255, "grad_norm": 1.3293240070343018, "learning_rate": 1.7200257628784633e-06, "loss": 0.6137, "step": 5473 }, { "epoch": 0.8164665523156089, "grad_norm": 1.3281643390655518, "learning_rate": 1.7173174227769574e-06, "loss": 0.6018, "step": 5474 }, { "epoch": 0.8166157058691923, "grad_norm": 0.8150570392608643, "learning_rate": 1.7146110163208108e-06, "loss": 0.6426, "step": 5475 }, { "epoch": 0.8167648594227758, "grad_norm": 1.4648000001907349, "learning_rate": 1.711906544141846e-06, "loss": 0.531, "step": 5476 }, { "epoch": 0.8169140129763591, "grad_norm": 1.832201600074768, "learning_rate": 1.7092040068714421e-06, "loss": 0.5736, "step": 5477 }, { "epoch": 0.8170631665299426, "grad_norm": 1.407710313796997, "learning_rate": 1.7065034051405239e-06, "loss": 0.6342, "step": 5478 }, { "epoch": 0.817212320083526, "grad_norm": 1.307342767715454, "learning_rate": 1.703804739579563e-06, "loss": 0.4517, "step": 5479 }, { "epoch": 0.8173614736371094, "grad_norm": 1.5131574869155884, "learning_rate": 1.701108010818583e-06, "loss": 0.5203, "step": 5480 }, { "epoch": 0.8175106271906928, "grad_norm": 1.4185811281204224, "learning_rate": 1.6984132194871516e-06, "loss": 0.5463, "step": 5481 }, { "epoch": 0.8176597807442763, "grad_norm": 1.2081438302993774, "learning_rate": 1.6957203662143818e-06, "loss": 0.5512, "step": 5482 }, { "epoch": 0.8178089342978596, "grad_norm": 1.4474279880523682, "learning_rate": 1.6930294516289403e-06, "loss": 0.5425, "step": 5483 }, { "epoch": 0.8179580878514431, "grad_norm": 1.4413691759109497, "learning_rate": 1.6903404763590403e-06, "loss": 0.5358, "step": 5484 }, { "epoch": 0.8181072414050264, "grad_norm": 1.5057365894317627, "learning_rate": 1.6876534410324352e-06, "loss": 0.5716, "step": 5485 }, { "epoch": 0.8182563949586099, "grad_norm": 1.3978923559188843, "learning_rate": 1.684968346276431e-06, "loss": 0.5772, "step": 5486 }, { "epoch": 0.8184055485121933, "grad_norm": 1.535706877708435, "learning_rate": 1.6822851927178874e-06, "loss": 0.5141, "step": 5487 }, { "epoch": 0.8185547020657767, "grad_norm": 1.5378352403640747, "learning_rate": 1.6796039809831977e-06, "loss": 0.531, "step": 5488 }, { "epoch": 0.8187038556193601, "grad_norm": 1.7291449308395386, "learning_rate": 1.6769247116983079e-06, "loss": 0.6327, "step": 5489 }, { "epoch": 0.8188530091729436, "grad_norm": 1.6456162929534912, "learning_rate": 1.6742473854887154e-06, "loss": 0.5201, "step": 5490 }, { "epoch": 0.8190021627265269, "grad_norm": 2.160457134246826, "learning_rate": 1.6715720029794525e-06, "loss": 0.6132, "step": 5491 }, { "epoch": 0.8191513162801104, "grad_norm": 0.9078351259231567, "learning_rate": 1.6688985647951085e-06, "loss": 0.6923, "step": 5492 }, { "epoch": 0.8193004698336938, "grad_norm": 1.4439841508865356, "learning_rate": 1.666227071559814e-06, "loss": 0.6026, "step": 5493 }, { "epoch": 0.8194496233872772, "grad_norm": 1.4982324838638306, "learning_rate": 1.6635575238972478e-06, "loss": 0.5671, "step": 5494 }, { "epoch": 0.8195987769408606, "grad_norm": 1.4225735664367676, "learning_rate": 1.6608899224306264e-06, "loss": 0.5178, "step": 5495 }, { "epoch": 0.819747930494444, "grad_norm": 1.5117582082748413, "learning_rate": 1.6582242677827286e-06, "loss": 0.5465, "step": 5496 }, { "epoch": 0.8198970840480274, "grad_norm": 0.8480676412582397, "learning_rate": 1.6555605605758606e-06, "loss": 0.6774, "step": 5497 }, { "epoch": 0.8200462376016109, "grad_norm": 1.397398591041565, "learning_rate": 1.6528988014318848e-06, "loss": 0.5788, "step": 5498 }, { "epoch": 0.8201953911551942, "grad_norm": 1.3529223203659058, "learning_rate": 1.650238990972205e-06, "loss": 0.624, "step": 5499 }, { "epoch": 0.8203445447087777, "grad_norm": 1.3689539432525635, "learning_rate": 1.6475811298177747e-06, "loss": 0.5346, "step": 5500 }, { "epoch": 0.8204936982623611, "grad_norm": 1.187456727027893, "learning_rate": 1.644925218589083e-06, "loss": 0.4924, "step": 5501 }, { "epoch": 0.8206428518159445, "grad_norm": 1.704887866973877, "learning_rate": 1.6422712579061727e-06, "loss": 0.6075, "step": 5502 }, { "epoch": 0.8207920053695279, "grad_norm": 1.3109543323516846, "learning_rate": 1.6396192483886285e-06, "loss": 0.5418, "step": 5503 }, { "epoch": 0.8209411589231114, "grad_norm": 1.3807753324508667, "learning_rate": 1.636969190655574e-06, "loss": 0.522, "step": 5504 }, { "epoch": 0.8210903124766947, "grad_norm": 1.2758057117462158, "learning_rate": 1.6343210853256885e-06, "loss": 0.4748, "step": 5505 }, { "epoch": 0.8212394660302782, "grad_norm": 1.5597409009933472, "learning_rate": 1.6316749330171888e-06, "loss": 0.5538, "step": 5506 }, { "epoch": 0.8213886195838616, "grad_norm": 1.4228954315185547, "learning_rate": 1.6290307343478318e-06, "loss": 0.5516, "step": 5507 }, { "epoch": 0.821537773137445, "grad_norm": 1.44875967502594, "learning_rate": 1.6263884899349248e-06, "loss": 0.5658, "step": 5508 }, { "epoch": 0.8216869266910284, "grad_norm": 1.3307533264160156, "learning_rate": 1.6237482003953187e-06, "loss": 0.5282, "step": 5509 }, { "epoch": 0.8218360802446119, "grad_norm": 1.5324703454971313, "learning_rate": 1.6211098663454016e-06, "loss": 0.5319, "step": 5510 }, { "epoch": 0.8219852337981952, "grad_norm": 1.5012516975402832, "learning_rate": 1.6184734884011123e-06, "loss": 0.5497, "step": 5511 }, { "epoch": 0.8221343873517787, "grad_norm": 1.4135984182357788, "learning_rate": 1.6158390671779322e-06, "loss": 0.5731, "step": 5512 }, { "epoch": 0.822283540905362, "grad_norm": 1.36500084400177, "learning_rate": 1.6132066032908766e-06, "loss": 0.6097, "step": 5513 }, { "epoch": 0.8224326944589455, "grad_norm": 1.607393503189087, "learning_rate": 1.6105760973545181e-06, "loss": 0.5917, "step": 5514 }, { "epoch": 0.8225818480125289, "grad_norm": 0.7955243587493896, "learning_rate": 1.6079475499829655e-06, "loss": 0.6075, "step": 5515 }, { "epoch": 0.8227310015661123, "grad_norm": 1.1855140924453735, "learning_rate": 1.6053209617898646e-06, "loss": 0.5435, "step": 5516 }, { "epoch": 0.8228801551196957, "grad_norm": 1.3348180055618286, "learning_rate": 1.6026963333884127e-06, "loss": 0.5691, "step": 5517 }, { "epoch": 0.8230293086732792, "grad_norm": 1.422727108001709, "learning_rate": 1.6000736653913485e-06, "loss": 0.5144, "step": 5518 }, { "epoch": 0.8231784622268625, "grad_norm": 1.412916660308838, "learning_rate": 1.5974529584109444e-06, "loss": 0.527, "step": 5519 }, { "epoch": 0.823327615780446, "grad_norm": 1.2939702272415161, "learning_rate": 1.5948342130590256e-06, "loss": 0.642, "step": 5520 }, { "epoch": 0.8234767693340294, "grad_norm": 0.8384800553321838, "learning_rate": 1.5922174299469528e-06, "loss": 0.6745, "step": 5521 }, { "epoch": 0.8236259228876128, "grad_norm": 1.484566569328308, "learning_rate": 1.5896026096856321e-06, "loss": 0.5532, "step": 5522 }, { "epoch": 0.8237750764411962, "grad_norm": 0.8643951416015625, "learning_rate": 1.5869897528855106e-06, "loss": 0.6053, "step": 5523 }, { "epoch": 0.8239242299947797, "grad_norm": 1.4600698947906494, "learning_rate": 1.5843788601565757e-06, "loss": 0.5848, "step": 5524 }, { "epoch": 0.824073383548363, "grad_norm": 1.4267792701721191, "learning_rate": 1.58176993210836e-06, "loss": 0.554, "step": 5525 }, { "epoch": 0.8242225371019465, "grad_norm": 1.3058393001556396, "learning_rate": 1.5791629693499289e-06, "loss": 0.5657, "step": 5526 }, { "epoch": 0.8243716906555298, "grad_norm": 1.3666718006134033, "learning_rate": 1.5765579724898973e-06, "loss": 0.5219, "step": 5527 }, { "epoch": 0.8245208442091133, "grad_norm": 1.5143054723739624, "learning_rate": 1.5739549421364196e-06, "loss": 0.6692, "step": 5528 }, { "epoch": 0.8246699977626967, "grad_norm": 1.661615014076233, "learning_rate": 1.5713538788971882e-06, "loss": 0.6078, "step": 5529 }, { "epoch": 0.8248191513162801, "grad_norm": 1.7476179599761963, "learning_rate": 1.5687547833794349e-06, "loss": 0.5628, "step": 5530 }, { "epoch": 0.8249683048698635, "grad_norm": 1.7114921808242798, "learning_rate": 1.5661576561899438e-06, "loss": 0.536, "step": 5531 }, { "epoch": 0.825117458423447, "grad_norm": 1.2881197929382324, "learning_rate": 1.563562497935025e-06, "loss": 0.5519, "step": 5532 }, { "epoch": 0.8252666119770303, "grad_norm": 1.8370835781097412, "learning_rate": 1.5609693092205347e-06, "loss": 0.553, "step": 5533 }, { "epoch": 0.8254157655306138, "grad_norm": 1.2778178453445435, "learning_rate": 1.558378090651872e-06, "loss": 0.4182, "step": 5534 }, { "epoch": 0.8255649190841972, "grad_norm": 1.6468640565872192, "learning_rate": 1.5557888428339706e-06, "loss": 0.5439, "step": 5535 }, { "epoch": 0.8257140726377806, "grad_norm": 1.6751199960708618, "learning_rate": 1.5532015663713085e-06, "loss": 0.5053, "step": 5536 }, { "epoch": 0.825863226191364, "grad_norm": 1.3976045846939087, "learning_rate": 1.5506162618679043e-06, "loss": 0.5427, "step": 5537 }, { "epoch": 0.8260123797449475, "grad_norm": 1.589112401008606, "learning_rate": 1.548032929927309e-06, "loss": 0.5535, "step": 5538 }, { "epoch": 0.8261615332985308, "grad_norm": 1.2718545198440552, "learning_rate": 1.5454515711526187e-06, "loss": 0.5848, "step": 5539 }, { "epoch": 0.8263106868521143, "grad_norm": 1.3136266469955444, "learning_rate": 1.5428721861464746e-06, "loss": 0.5151, "step": 5540 }, { "epoch": 0.8264598404056976, "grad_norm": 1.3564774990081787, "learning_rate": 1.540294775511043e-06, "loss": 0.5926, "step": 5541 }, { "epoch": 0.8266089939592811, "grad_norm": 1.439505934715271, "learning_rate": 1.5377193398480406e-06, "loss": 0.5876, "step": 5542 }, { "epoch": 0.8267581475128645, "grad_norm": 2.1920204162597656, "learning_rate": 1.5351458797587205e-06, "loss": 0.5852, "step": 5543 }, { "epoch": 0.8269073010664479, "grad_norm": 1.2917994260787964, "learning_rate": 1.5325743958438698e-06, "loss": 0.4532, "step": 5544 }, { "epoch": 0.8270564546200313, "grad_norm": 1.421137809753418, "learning_rate": 1.53000488870382e-06, "loss": 0.5641, "step": 5545 }, { "epoch": 0.8272056081736148, "grad_norm": 1.4946520328521729, "learning_rate": 1.5274373589384384e-06, "loss": 0.6239, "step": 5546 }, { "epoch": 0.8273547617271981, "grad_norm": 1.7057406902313232, "learning_rate": 1.5248718071471346e-06, "loss": 0.615, "step": 5547 }, { "epoch": 0.8275039152807816, "grad_norm": 1.2869809865951538, "learning_rate": 1.5223082339288452e-06, "loss": 0.6206, "step": 5548 }, { "epoch": 0.827653068834365, "grad_norm": 1.434003233909607, "learning_rate": 1.5197466398820625e-06, "loss": 0.6048, "step": 5549 }, { "epoch": 0.8278022223879484, "grad_norm": 1.5234665870666504, "learning_rate": 1.5171870256048005e-06, "loss": 0.6122, "step": 5550 }, { "epoch": 0.8279513759415318, "grad_norm": 1.175389289855957, "learning_rate": 1.5146293916946185e-06, "loss": 0.6012, "step": 5551 }, { "epoch": 0.8281005294951153, "grad_norm": 1.550500512123108, "learning_rate": 1.512073738748614e-06, "loss": 0.5977, "step": 5552 }, { "epoch": 0.8282496830486986, "grad_norm": 1.3361477851867676, "learning_rate": 1.5095200673634224e-06, "loss": 0.578, "step": 5553 }, { "epoch": 0.8283988366022821, "grad_norm": 1.274015188217163, "learning_rate": 1.5069683781352106e-06, "loss": 0.5903, "step": 5554 }, { "epoch": 0.8285479901558654, "grad_norm": 1.444655179977417, "learning_rate": 1.5044186716596888e-06, "loss": 0.6759, "step": 5555 }, { "epoch": 0.8286971437094489, "grad_norm": 0.8157780766487122, "learning_rate": 1.5018709485321004e-06, "loss": 0.6007, "step": 5556 }, { "epoch": 0.8288462972630323, "grad_norm": 1.306738018989563, "learning_rate": 1.499325209347231e-06, "loss": 0.6446, "step": 5557 }, { "epoch": 0.8289954508166157, "grad_norm": 1.4225671291351318, "learning_rate": 1.4967814546993975e-06, "loss": 0.6185, "step": 5558 }, { "epoch": 0.8291446043701991, "grad_norm": 1.5909981727600098, "learning_rate": 1.4942396851824582e-06, "loss": 0.5689, "step": 5559 }, { "epoch": 0.8292937579237826, "grad_norm": 1.3463588953018188, "learning_rate": 1.4916999013898027e-06, "loss": 0.5798, "step": 5560 }, { "epoch": 0.8294429114773659, "grad_norm": 1.2089767456054688, "learning_rate": 1.4891621039143589e-06, "loss": 0.5548, "step": 5561 }, { "epoch": 0.8295920650309494, "grad_norm": 1.3660119771957397, "learning_rate": 1.4866262933485975e-06, "loss": 0.5539, "step": 5562 }, { "epoch": 0.8297412185845328, "grad_norm": 1.387880802154541, "learning_rate": 1.4840924702845128e-06, "loss": 0.5832, "step": 5563 }, { "epoch": 0.8298903721381162, "grad_norm": 1.3021403551101685, "learning_rate": 1.4815606353136459e-06, "loss": 0.6001, "step": 5564 }, { "epoch": 0.8300395256916996, "grad_norm": 3.1281633377075195, "learning_rate": 1.4790307890270694e-06, "loss": 0.5646, "step": 5565 }, { "epoch": 0.8301886792452831, "grad_norm": 1.3898777961730957, "learning_rate": 1.4765029320153912e-06, "loss": 0.5137, "step": 5566 }, { "epoch": 0.8303378327988664, "grad_norm": 1.4692049026489258, "learning_rate": 1.4739770648687568e-06, "loss": 0.5404, "step": 5567 }, { "epoch": 0.8304869863524499, "grad_norm": 1.5810247659683228, "learning_rate": 1.4714531881768478e-06, "loss": 0.5974, "step": 5568 }, { "epoch": 0.8306361399060332, "grad_norm": 0.774449348449707, "learning_rate": 1.4689313025288754e-06, "loss": 0.6179, "step": 5569 }, { "epoch": 0.8307852934596167, "grad_norm": 1.4324836730957031, "learning_rate": 1.4664114085135916e-06, "loss": 0.5947, "step": 5570 }, { "epoch": 0.8309344470132001, "grad_norm": 1.3570642471313477, "learning_rate": 1.4638935067192828e-06, "loss": 0.5954, "step": 5571 }, { "epoch": 0.8310836005667835, "grad_norm": 1.453050971031189, "learning_rate": 1.4613775977337707e-06, "loss": 0.5739, "step": 5572 }, { "epoch": 0.8312327541203669, "grad_norm": 1.8184144496917725, "learning_rate": 1.4588636821444059e-06, "loss": 0.4849, "step": 5573 }, { "epoch": 0.8313819076739504, "grad_norm": 1.3501189947128296, "learning_rate": 1.4563517605380805e-06, "loss": 0.5371, "step": 5574 }, { "epoch": 0.8315310612275337, "grad_norm": 1.572727084159851, "learning_rate": 1.4538418335012194e-06, "loss": 0.5918, "step": 5575 }, { "epoch": 0.8316802147811172, "grad_norm": 1.2733886241912842, "learning_rate": 1.4513339016197802e-06, "loss": 0.5517, "step": 5576 }, { "epoch": 0.8318293683347006, "grad_norm": 1.4847465753555298, "learning_rate": 1.4488279654792558e-06, "loss": 0.4931, "step": 5577 }, { "epoch": 0.831978521888284, "grad_norm": 1.3287081718444824, "learning_rate": 1.446324025664676e-06, "loss": 0.5672, "step": 5578 }, { "epoch": 0.8321276754418674, "grad_norm": 1.3556606769561768, "learning_rate": 1.4438220827605965e-06, "loss": 0.4668, "step": 5579 }, { "epoch": 0.8322768289954509, "grad_norm": 1.5817605257034302, "learning_rate": 1.4413221373511132e-06, "loss": 0.5867, "step": 5580 }, { "epoch": 0.8324259825490342, "grad_norm": 1.2599700689315796, "learning_rate": 1.4388241900198597e-06, "loss": 0.585, "step": 5581 }, { "epoch": 0.8325751361026177, "grad_norm": 1.3865845203399658, "learning_rate": 1.4363282413499902e-06, "loss": 0.562, "step": 5582 }, { "epoch": 0.832724289656201, "grad_norm": 1.3560882806777954, "learning_rate": 1.433834291924201e-06, "loss": 0.5564, "step": 5583 }, { "epoch": 0.8328734432097845, "grad_norm": 1.4972811937332153, "learning_rate": 1.4313423423247275e-06, "loss": 0.5677, "step": 5584 }, { "epoch": 0.8330225967633679, "grad_norm": 1.5022615194320679, "learning_rate": 1.4288523931333242e-06, "loss": 0.531, "step": 5585 }, { "epoch": 0.8331717503169513, "grad_norm": 1.2825987339019775, "learning_rate": 1.4263644449312896e-06, "loss": 0.5497, "step": 5586 }, { "epoch": 0.8333209038705347, "grad_norm": 1.1426140069961548, "learning_rate": 1.4238784982994503e-06, "loss": 0.5366, "step": 5587 }, { "epoch": 0.8334700574241182, "grad_norm": 1.3027452230453491, "learning_rate": 1.4213945538181651e-06, "loss": 0.5776, "step": 5588 }, { "epoch": 0.8336192109777015, "grad_norm": 1.5294650793075562, "learning_rate": 1.418912612067327e-06, "loss": 0.4796, "step": 5589 }, { "epoch": 0.833768364531285, "grad_norm": 1.3621013164520264, "learning_rate": 1.4164326736263645e-06, "loss": 0.5646, "step": 5590 }, { "epoch": 0.8339175180848684, "grad_norm": 1.328145980834961, "learning_rate": 1.413954739074227e-06, "loss": 0.5414, "step": 5591 }, { "epoch": 0.8340666716384518, "grad_norm": 0.8120284080505371, "learning_rate": 1.4114788089894128e-06, "loss": 0.6248, "step": 5592 }, { "epoch": 0.8342158251920352, "grad_norm": 1.3398240804672241, "learning_rate": 1.4090048839499426e-06, "loss": 0.5836, "step": 5593 }, { "epoch": 0.8343649787456187, "grad_norm": 1.347730278968811, "learning_rate": 1.4065329645333658e-06, "loss": 0.5345, "step": 5594 }, { "epoch": 0.834514132299202, "grad_norm": 1.5369569063186646, "learning_rate": 1.4040630513167697e-06, "loss": 0.5647, "step": 5595 }, { "epoch": 0.8346632858527855, "grad_norm": 1.3457345962524414, "learning_rate": 1.401595144876775e-06, "loss": 0.5555, "step": 5596 }, { "epoch": 0.8348124394063688, "grad_norm": 1.3632590770721436, "learning_rate": 1.3991292457895234e-06, "loss": 0.561, "step": 5597 }, { "epoch": 0.8349615929599523, "grad_norm": 1.281542181968689, "learning_rate": 1.3966653546306997e-06, "loss": 0.6023, "step": 5598 }, { "epoch": 0.8351107465135357, "grad_norm": 1.2774391174316406, "learning_rate": 1.3942034719755127e-06, "loss": 0.493, "step": 5599 }, { "epoch": 0.8352599000671191, "grad_norm": 1.4411423206329346, "learning_rate": 1.391743598398707e-06, "loss": 0.586, "step": 5600 }, { "epoch": 0.8354090536207025, "grad_norm": 0.8138124942779541, "learning_rate": 1.3892857344745537e-06, "loss": 0.5858, "step": 5601 }, { "epoch": 0.835558207174286, "grad_norm": 1.519972801208496, "learning_rate": 1.386829880776861e-06, "loss": 0.5683, "step": 5602 }, { "epoch": 0.8357073607278693, "grad_norm": 1.289616346359253, "learning_rate": 1.3843760378789583e-06, "loss": 0.6268, "step": 5603 }, { "epoch": 0.8358565142814528, "grad_norm": 1.3417032957077026, "learning_rate": 1.3819242063537131e-06, "loss": 0.5442, "step": 5604 }, { "epoch": 0.8360056678350362, "grad_norm": 1.4954737424850464, "learning_rate": 1.3794743867735206e-06, "loss": 0.6075, "step": 5605 }, { "epoch": 0.8361548213886196, "grad_norm": 1.4546555280685425, "learning_rate": 1.37702657971031e-06, "loss": 0.544, "step": 5606 }, { "epoch": 0.836303974942203, "grad_norm": 1.361737608909607, "learning_rate": 1.3745807857355342e-06, "loss": 0.5342, "step": 5607 }, { "epoch": 0.8364531284957865, "grad_norm": 1.3176448345184326, "learning_rate": 1.3721370054201788e-06, "loss": 0.5448, "step": 5608 }, { "epoch": 0.8366022820493698, "grad_norm": 1.4335405826568604, "learning_rate": 1.3696952393347629e-06, "loss": 0.5907, "step": 5609 }, { "epoch": 0.8367514356029533, "grad_norm": 1.1864547729492188, "learning_rate": 1.3672554880493305e-06, "loss": 0.4538, "step": 5610 }, { "epoch": 0.8369005891565366, "grad_norm": 1.5928980112075806, "learning_rate": 1.3648177521334582e-06, "loss": 0.5851, "step": 5611 }, { "epoch": 0.8370497427101201, "grad_norm": 1.3706859350204468, "learning_rate": 1.3623820321562531e-06, "loss": 0.5267, "step": 5612 }, { "epoch": 0.8371988962637035, "grad_norm": 1.2629250288009644, "learning_rate": 1.3599483286863458e-06, "loss": 0.599, "step": 5613 }, { "epoch": 0.8373480498172869, "grad_norm": 1.3191267251968384, "learning_rate": 1.3575166422919006e-06, "loss": 0.5211, "step": 5614 }, { "epoch": 0.8374972033708703, "grad_norm": 0.8513544797897339, "learning_rate": 1.3550869735406124e-06, "loss": 0.6181, "step": 5615 }, { "epoch": 0.8376463569244538, "grad_norm": 1.5811772346496582, "learning_rate": 1.352659322999701e-06, "loss": 0.5856, "step": 5616 }, { "epoch": 0.8377955104780371, "grad_norm": 1.698525309562683, "learning_rate": 1.350233691235915e-06, "loss": 0.5258, "step": 5617 }, { "epoch": 0.8379446640316206, "grad_norm": 1.7467420101165771, "learning_rate": 1.3478100788155413e-06, "loss": 0.6071, "step": 5618 }, { "epoch": 0.838093817585204, "grad_norm": 0.8289312124252319, "learning_rate": 1.3453884863043798e-06, "loss": 0.6285, "step": 5619 }, { "epoch": 0.8382429711387874, "grad_norm": 1.4035848379135132, "learning_rate": 1.3429689142677704e-06, "loss": 0.4997, "step": 5620 }, { "epoch": 0.8383921246923708, "grad_norm": 1.427214503288269, "learning_rate": 1.3405513632705792e-06, "loss": 0.5682, "step": 5621 }, { "epoch": 0.8385412782459543, "grad_norm": 1.315742015838623, "learning_rate": 1.3381358338771954e-06, "loss": 0.5751, "step": 5622 }, { "epoch": 0.8386904317995376, "grad_norm": 1.3460848331451416, "learning_rate": 1.3357223266515406e-06, "loss": 0.5259, "step": 5623 }, { "epoch": 0.8388395853531211, "grad_norm": 1.634365439414978, "learning_rate": 1.333310842157064e-06, "loss": 0.5323, "step": 5624 }, { "epoch": 0.8389887389067044, "grad_norm": 1.6321754455566406, "learning_rate": 1.330901380956745e-06, "loss": 0.534, "step": 5625 }, { "epoch": 0.8391378924602879, "grad_norm": 1.5995111465454102, "learning_rate": 1.3284939436130816e-06, "loss": 0.6245, "step": 5626 }, { "epoch": 0.8392870460138713, "grad_norm": 1.266740083694458, "learning_rate": 1.3260885306881122e-06, "loss": 0.5102, "step": 5627 }, { "epoch": 0.8394361995674547, "grad_norm": 0.858832061290741, "learning_rate": 1.3236851427433917e-06, "loss": 0.6431, "step": 5628 }, { "epoch": 0.8395853531210381, "grad_norm": 1.1746337413787842, "learning_rate": 1.3212837803400068e-06, "loss": 0.4699, "step": 5629 }, { "epoch": 0.8397345066746216, "grad_norm": 1.574960708618164, "learning_rate": 1.3188844440385716e-06, "loss": 0.5595, "step": 5630 }, { "epoch": 0.8398836602282049, "grad_norm": 1.399551510810852, "learning_rate": 1.3164871343992292e-06, "loss": 0.5867, "step": 5631 }, { "epoch": 0.8400328137817884, "grad_norm": 1.37860107421875, "learning_rate": 1.3140918519816415e-06, "loss": 0.5798, "step": 5632 }, { "epoch": 0.8401819673353718, "grad_norm": 2.60241961479187, "learning_rate": 1.3116985973450058e-06, "loss": 0.534, "step": 5633 }, { "epoch": 0.8403311208889552, "grad_norm": 1.3699519634246826, "learning_rate": 1.3093073710480442e-06, "loss": 0.565, "step": 5634 }, { "epoch": 0.8404802744425386, "grad_norm": 1.4864319562911987, "learning_rate": 1.3069181736489978e-06, "loss": 0.5418, "step": 5635 }, { "epoch": 0.8406294279961221, "grad_norm": 1.5428866147994995, "learning_rate": 1.3045310057056459e-06, "loss": 0.4885, "step": 5636 }, { "epoch": 0.8407785815497054, "grad_norm": 1.747677206993103, "learning_rate": 1.3021458677752884e-06, "loss": 0.578, "step": 5637 }, { "epoch": 0.8409277351032889, "grad_norm": 1.3414493799209595, "learning_rate": 1.2997627604147468e-06, "loss": 0.6001, "step": 5638 }, { "epoch": 0.8410768886568722, "grad_norm": 1.416298508644104, "learning_rate": 1.2973816841803756e-06, "loss": 0.5823, "step": 5639 }, { "epoch": 0.8412260422104557, "grad_norm": 1.5353587865829468, "learning_rate": 1.2950026396280536e-06, "loss": 0.4893, "step": 5640 }, { "epoch": 0.8413751957640391, "grad_norm": 1.4729371070861816, "learning_rate": 1.2926256273131799e-06, "loss": 0.4945, "step": 5641 }, { "epoch": 0.8415243493176225, "grad_norm": 0.8359286189079285, "learning_rate": 1.2902506477906862e-06, "loss": 0.6529, "step": 5642 }, { "epoch": 0.8416735028712059, "grad_norm": 1.30454421043396, "learning_rate": 1.2878777016150267e-06, "loss": 0.5052, "step": 5643 }, { "epoch": 0.8418226564247894, "grad_norm": 1.2650494575500488, "learning_rate": 1.2855067893401773e-06, "loss": 0.4494, "step": 5644 }, { "epoch": 0.8419718099783727, "grad_norm": 1.2894625663757324, "learning_rate": 1.2831379115196473e-06, "loss": 0.5577, "step": 5645 }, { "epoch": 0.8421209635319562, "grad_norm": 1.4556394815444946, "learning_rate": 1.2807710687064667e-06, "loss": 0.5399, "step": 5646 }, { "epoch": 0.8422701170855396, "grad_norm": 2.2393290996551514, "learning_rate": 1.2784062614531866e-06, "loss": 0.4786, "step": 5647 }, { "epoch": 0.842419270639123, "grad_norm": 1.533506155014038, "learning_rate": 1.2760434903118868e-06, "loss": 0.5452, "step": 5648 }, { "epoch": 0.8425684241927064, "grad_norm": 1.877803921699524, "learning_rate": 1.2736827558341735e-06, "loss": 0.6353, "step": 5649 }, { "epoch": 0.8427175777462899, "grad_norm": 1.2372686862945557, "learning_rate": 1.2713240585711762e-06, "loss": 0.4614, "step": 5650 }, { "epoch": 0.8428667312998732, "grad_norm": 1.4495891332626343, "learning_rate": 1.2689673990735428e-06, "loss": 0.5899, "step": 5651 }, { "epoch": 0.8430158848534567, "grad_norm": 1.6024744510650635, "learning_rate": 1.266612777891455e-06, "loss": 0.6378, "step": 5652 }, { "epoch": 0.84316503840704, "grad_norm": 1.7015211582183838, "learning_rate": 1.2642601955746126e-06, "loss": 0.5675, "step": 5653 }, { "epoch": 0.8433141919606235, "grad_norm": 1.4875980615615845, "learning_rate": 1.2619096526722418e-06, "loss": 0.5315, "step": 5654 }, { "epoch": 0.8434633455142069, "grad_norm": 0.8152217864990234, "learning_rate": 1.2595611497330917e-06, "loss": 0.6263, "step": 5655 }, { "epoch": 0.8436124990677903, "grad_norm": 1.3882300853729248, "learning_rate": 1.257214687305437e-06, "loss": 0.5321, "step": 5656 }, { "epoch": 0.8437616526213737, "grad_norm": 1.406617522239685, "learning_rate": 1.2548702659370703e-06, "loss": 0.5791, "step": 5657 }, { "epoch": 0.8439108061749572, "grad_norm": 6.132126808166504, "learning_rate": 1.2525278861753142e-06, "loss": 0.5588, "step": 5658 }, { "epoch": 0.8440599597285405, "grad_norm": 1.53749680519104, "learning_rate": 1.2501875485670145e-06, "loss": 0.4898, "step": 5659 }, { "epoch": 0.844209113282124, "grad_norm": 1.3931176662445068, "learning_rate": 1.247849253658533e-06, "loss": 0.4966, "step": 5660 }, { "epoch": 0.8443582668357074, "grad_norm": 1.3533580303192139, "learning_rate": 1.2455130019957607e-06, "loss": 0.5224, "step": 5661 }, { "epoch": 0.8445074203892907, "grad_norm": 0.8634002804756165, "learning_rate": 1.2431787941241157e-06, "loss": 0.62, "step": 5662 }, { "epoch": 0.8446565739428742, "grad_norm": 1.4726660251617432, "learning_rate": 1.240846630588529e-06, "loss": 0.5733, "step": 5663 }, { "epoch": 0.8448057274964575, "grad_norm": 1.3210855722427368, "learning_rate": 1.2385165119334607e-06, "loss": 0.4963, "step": 5664 }, { "epoch": 0.844954881050041, "grad_norm": 1.462996244430542, "learning_rate": 1.2361884387028933e-06, "loss": 0.5491, "step": 5665 }, { "epoch": 0.8451040346036244, "grad_norm": 1.385901927947998, "learning_rate": 1.2338624114403263e-06, "loss": 0.5444, "step": 5666 }, { "epoch": 0.8452531881572078, "grad_norm": 1.3626337051391602, "learning_rate": 1.231538430688789e-06, "loss": 0.5239, "step": 5667 }, { "epoch": 0.8454023417107912, "grad_norm": 1.377005934715271, "learning_rate": 1.2292164969908294e-06, "loss": 0.5557, "step": 5668 }, { "epoch": 0.8455514952643747, "grad_norm": 1.3130073547363281, "learning_rate": 1.226896610888516e-06, "loss": 0.5496, "step": 5669 }, { "epoch": 0.845700648817958, "grad_norm": 1.3841488361358643, "learning_rate": 1.2245787729234404e-06, "loss": 0.5192, "step": 5670 }, { "epoch": 0.8458498023715415, "grad_norm": 1.3550784587860107, "learning_rate": 1.2222629836367227e-06, "loss": 0.6002, "step": 5671 }, { "epoch": 0.8459989559251249, "grad_norm": 1.3406312465667725, "learning_rate": 1.2199492435689918e-06, "loss": 0.5204, "step": 5672 }, { "epoch": 0.8461481094787083, "grad_norm": 1.2557249069213867, "learning_rate": 1.217637553260409e-06, "loss": 0.5267, "step": 5673 }, { "epoch": 0.8462972630322917, "grad_norm": 1.4486674070358276, "learning_rate": 1.2153279132506535e-06, "loss": 0.5569, "step": 5674 }, { "epoch": 0.8464464165858752, "grad_norm": 1.3591610193252563, "learning_rate": 1.2130203240789228e-06, "loss": 0.5667, "step": 5675 }, { "epoch": 0.8465955701394585, "grad_norm": 1.4705897569656372, "learning_rate": 1.2107147862839396e-06, "loss": 0.5136, "step": 5676 }, { "epoch": 0.846744723693042, "grad_norm": 1.3533235788345337, "learning_rate": 1.2084113004039467e-06, "loss": 0.5215, "step": 5677 }, { "epoch": 0.8468938772466253, "grad_norm": 1.3638916015625, "learning_rate": 1.20610986697671e-06, "loss": 0.5335, "step": 5678 }, { "epoch": 0.8470430308002088, "grad_norm": 1.5984539985656738, "learning_rate": 1.2038104865395072e-06, "loss": 0.5775, "step": 5679 }, { "epoch": 0.8471921843537922, "grad_norm": 1.2378545999526978, "learning_rate": 1.2015131596291518e-06, "loss": 0.5253, "step": 5680 }, { "epoch": 0.8473413379073756, "grad_norm": 1.4505583047866821, "learning_rate": 1.1992178867819636e-06, "loss": 0.5548, "step": 5681 }, { "epoch": 0.847490491460959, "grad_norm": 2.9983773231506348, "learning_rate": 1.1969246685337909e-06, "loss": 0.5494, "step": 5682 }, { "epoch": 0.8476396450145425, "grad_norm": 1.3227369785308838, "learning_rate": 1.1946335054199999e-06, "loss": 0.5438, "step": 5683 }, { "epoch": 0.8477887985681258, "grad_norm": 1.284163236618042, "learning_rate": 1.192344397975479e-06, "loss": 0.4974, "step": 5684 }, { "epoch": 0.8479379521217093, "grad_norm": 1.5032625198364258, "learning_rate": 1.1900573467346322e-06, "loss": 0.5373, "step": 5685 }, { "epoch": 0.8480871056752927, "grad_norm": 1.4452593326568604, "learning_rate": 1.1877723522313867e-06, "loss": 0.5569, "step": 5686 }, { "epoch": 0.8482362592288761, "grad_norm": 1.358825922012329, "learning_rate": 1.1854894149991902e-06, "loss": 0.5018, "step": 5687 }, { "epoch": 0.8483854127824595, "grad_norm": 1.3699493408203125, "learning_rate": 1.1832085355710087e-06, "loss": 0.5557, "step": 5688 }, { "epoch": 0.848534566336043, "grad_norm": 1.6727577447891235, "learning_rate": 1.1809297144793285e-06, "loss": 0.5188, "step": 5689 }, { "epoch": 0.8486837198896263, "grad_norm": 1.3618568181991577, "learning_rate": 1.1786529522561564e-06, "loss": 0.5745, "step": 5690 }, { "epoch": 0.8488328734432098, "grad_norm": 1.997494101524353, "learning_rate": 1.1763782494330135e-06, "loss": 0.5307, "step": 5691 }, { "epoch": 0.8489820269967931, "grad_norm": 1.2882487773895264, "learning_rate": 1.174105606540945e-06, "loss": 0.5265, "step": 5692 }, { "epoch": 0.8491311805503766, "grad_norm": 1.4617371559143066, "learning_rate": 1.171835024110517e-06, "loss": 0.5938, "step": 5693 }, { "epoch": 0.84928033410396, "grad_norm": 1.413026213645935, "learning_rate": 1.1695665026718073e-06, "loss": 0.6627, "step": 5694 }, { "epoch": 0.8494294876575434, "grad_norm": 1.5406911373138428, "learning_rate": 1.167300042754419e-06, "loss": 0.5713, "step": 5695 }, { "epoch": 0.8495786412111268, "grad_norm": 1.5361753702163696, "learning_rate": 1.16503564488747e-06, "loss": 0.5493, "step": 5696 }, { "epoch": 0.8497277947647103, "grad_norm": 1.7877157926559448, "learning_rate": 1.1627733095996008e-06, "loss": 0.6014, "step": 5697 }, { "epoch": 0.8498769483182936, "grad_norm": 1.4117100238800049, "learning_rate": 1.1605130374189676e-06, "loss": 0.5124, "step": 5698 }, { "epoch": 0.8500261018718771, "grad_norm": 1.6992496252059937, "learning_rate": 1.1582548288732465e-06, "loss": 0.5488, "step": 5699 }, { "epoch": 0.8501752554254605, "grad_norm": 1.968284010887146, "learning_rate": 1.1559986844896265e-06, "loss": 0.6137, "step": 5700 }, { "epoch": 0.8503244089790439, "grad_norm": 1.4432076215744019, "learning_rate": 1.153744604794822e-06, "loss": 0.5062, "step": 5701 }, { "epoch": 0.8504735625326273, "grad_norm": 1.2586339712142944, "learning_rate": 1.151492590315062e-06, "loss": 0.4988, "step": 5702 }, { "epoch": 0.8506227160862108, "grad_norm": 1.373822569847107, "learning_rate": 1.149242641576096e-06, "loss": 0.5916, "step": 5703 }, { "epoch": 0.8507718696397941, "grad_norm": 1.48861825466156, "learning_rate": 1.1469947591031848e-06, "loss": 0.563, "step": 5704 }, { "epoch": 0.8509210231933776, "grad_norm": 2.011577606201172, "learning_rate": 1.1447489434211124e-06, "loss": 0.5186, "step": 5705 }, { "epoch": 0.851070176746961, "grad_norm": 1.179237961769104, "learning_rate": 1.1425051950541798e-06, "loss": 0.5554, "step": 5706 }, { "epoch": 0.8512193303005444, "grad_norm": 0.8384901285171509, "learning_rate": 1.1402635145262043e-06, "loss": 0.6377, "step": 5707 }, { "epoch": 0.8513684838541278, "grad_norm": 1.482979655265808, "learning_rate": 1.13802390236052e-06, "loss": 0.4965, "step": 5708 }, { "epoch": 0.8515176374077112, "grad_norm": 1.2499973773956299, "learning_rate": 1.13578635907998e-06, "loss": 0.536, "step": 5709 }, { "epoch": 0.8516667909612946, "grad_norm": 1.5094963312149048, "learning_rate": 1.133550885206951e-06, "loss": 0.5984, "step": 5710 }, { "epoch": 0.8518159445148781, "grad_norm": 1.4431064128875732, "learning_rate": 1.13131748126332e-06, "loss": 0.5351, "step": 5711 }, { "epoch": 0.8519650980684614, "grad_norm": 1.5131181478500366, "learning_rate": 1.1290861477704918e-06, "loss": 0.6117, "step": 5712 }, { "epoch": 0.8521142516220449, "grad_norm": 1.3492497205734253, "learning_rate": 1.12685688524938e-06, "loss": 0.5698, "step": 5713 }, { "epoch": 0.8522634051756283, "grad_norm": 1.382181167602539, "learning_rate": 1.1246296942204216e-06, "loss": 0.5558, "step": 5714 }, { "epoch": 0.8524125587292117, "grad_norm": 1.5137866735458374, "learning_rate": 1.122404575203574e-06, "loss": 0.5911, "step": 5715 }, { "epoch": 0.8525617122827951, "grad_norm": 1.5698843002319336, "learning_rate": 1.1201815287183005e-06, "loss": 0.6149, "step": 5716 }, { "epoch": 0.8527108658363786, "grad_norm": 1.4676241874694824, "learning_rate": 1.1179605552835859e-06, "loss": 0.4576, "step": 5717 }, { "epoch": 0.8528600193899619, "grad_norm": 1.5492985248565674, "learning_rate": 1.1157416554179345e-06, "loss": 0.5719, "step": 5718 }, { "epoch": 0.8530091729435454, "grad_norm": 1.3029066324234009, "learning_rate": 1.1135248296393574e-06, "loss": 0.5053, "step": 5719 }, { "epoch": 0.8531583264971287, "grad_norm": 1.3213165998458862, "learning_rate": 1.1113100784653895e-06, "loss": 0.5964, "step": 5720 }, { "epoch": 0.8533074800507122, "grad_norm": 1.2516989707946777, "learning_rate": 1.1090974024130795e-06, "loss": 0.5429, "step": 5721 }, { "epoch": 0.8534566336042956, "grad_norm": 1.417494535446167, "learning_rate": 1.1068868019989864e-06, "loss": 0.5392, "step": 5722 }, { "epoch": 0.853605787157879, "grad_norm": 1.3829656839370728, "learning_rate": 1.1046782777391951e-06, "loss": 0.5792, "step": 5723 }, { "epoch": 0.8537549407114624, "grad_norm": 1.5197430849075317, "learning_rate": 1.1024718301492975e-06, "loss": 0.5805, "step": 5724 }, { "epoch": 0.8539040942650459, "grad_norm": 1.3321363925933838, "learning_rate": 1.1002674597444019e-06, "loss": 0.5526, "step": 5725 }, { "epoch": 0.8540532478186292, "grad_norm": 0.8335850834846497, "learning_rate": 1.0980651670391317e-06, "loss": 0.6318, "step": 5726 }, { "epoch": 0.8542024013722127, "grad_norm": 1.3910986185073853, "learning_rate": 1.0958649525476306e-06, "loss": 0.5353, "step": 5727 }, { "epoch": 0.8543515549257961, "grad_norm": 1.3797348737716675, "learning_rate": 1.0936668167835484e-06, "loss": 0.5647, "step": 5728 }, { "epoch": 0.8545007084793795, "grad_norm": 1.3443478345870972, "learning_rate": 1.0914707602600549e-06, "loss": 0.5999, "step": 5729 }, { "epoch": 0.8546498620329629, "grad_norm": 1.514784574508667, "learning_rate": 1.0892767834898343e-06, "loss": 0.5808, "step": 5730 }, { "epoch": 0.8547990155865464, "grad_norm": 1.6129801273345947, "learning_rate": 1.0870848869850847e-06, "loss": 0.5479, "step": 5731 }, { "epoch": 0.8549481691401297, "grad_norm": 1.634213924407959, "learning_rate": 1.084895071257518e-06, "loss": 0.534, "step": 5732 }, { "epoch": 0.8550973226937132, "grad_norm": 1.2673848867416382, "learning_rate": 1.0827073368183627e-06, "loss": 0.4948, "step": 5733 }, { "epoch": 0.8552464762472965, "grad_norm": 1.2317019701004028, "learning_rate": 1.080521684178356e-06, "loss": 0.5863, "step": 5734 }, { "epoch": 0.85539562980088, "grad_norm": 1.533734679222107, "learning_rate": 1.0783381138477544e-06, "loss": 0.6312, "step": 5735 }, { "epoch": 0.8555447833544634, "grad_norm": 1.486788034439087, "learning_rate": 1.0761566263363254e-06, "loss": 0.6216, "step": 5736 }, { "epoch": 0.8556939369080468, "grad_norm": 1.4456933736801147, "learning_rate": 1.073977222153355e-06, "loss": 0.5494, "step": 5737 }, { "epoch": 0.8558430904616302, "grad_norm": 1.5113381147384644, "learning_rate": 1.071799901807633e-06, "loss": 0.6476, "step": 5738 }, { "epoch": 0.8559922440152137, "grad_norm": 1.4784537553787231, "learning_rate": 1.0696246658074728e-06, "loss": 0.5317, "step": 5739 }, { "epoch": 0.856141397568797, "grad_norm": 2.3316876888275146, "learning_rate": 1.0674515146606957e-06, "loss": 0.4983, "step": 5740 }, { "epoch": 0.8562905511223805, "grad_norm": 0.8294664621353149, "learning_rate": 1.0652804488746382e-06, "loss": 0.5967, "step": 5741 }, { "epoch": 0.8564397046759639, "grad_norm": 1.4261128902435303, "learning_rate": 1.0631114689561496e-06, "loss": 0.6433, "step": 5742 }, { "epoch": 0.8565888582295473, "grad_norm": 1.4055590629577637, "learning_rate": 1.0609445754115944e-06, "loss": 0.5142, "step": 5743 }, { "epoch": 0.8567380117831307, "grad_norm": 1.8404924869537354, "learning_rate": 1.0587797687468438e-06, "loss": 0.5348, "step": 5744 }, { "epoch": 0.8568871653367142, "grad_norm": 1.3526018857955933, "learning_rate": 1.0566170494672878e-06, "loss": 0.4507, "step": 5745 }, { "epoch": 0.8570363188902975, "grad_norm": 1.348737120628357, "learning_rate": 1.0544564180778283e-06, "loss": 0.5716, "step": 5746 }, { "epoch": 0.857185472443881, "grad_norm": 1.3745354413986206, "learning_rate": 1.0522978750828761e-06, "loss": 0.5119, "step": 5747 }, { "epoch": 0.8573346259974644, "grad_norm": 1.4153447151184082, "learning_rate": 1.050141420986357e-06, "loss": 0.6263, "step": 5748 }, { "epoch": 0.8574837795510478, "grad_norm": 1.359337568283081, "learning_rate": 1.0479870562917105e-06, "loss": 0.5458, "step": 5749 }, { "epoch": 0.8576329331046312, "grad_norm": 1.5814940929412842, "learning_rate": 1.0458347815018855e-06, "loss": 0.5771, "step": 5750 }, { "epoch": 0.8577820866582146, "grad_norm": 0.8768262267112732, "learning_rate": 1.0436845971193465e-06, "loss": 0.6255, "step": 5751 }, { "epoch": 0.857931240211798, "grad_norm": 1.7561007738113403, "learning_rate": 1.0415365036460679e-06, "loss": 0.5796, "step": 5752 }, { "epoch": 0.8580803937653815, "grad_norm": 1.299210548400879, "learning_rate": 1.0393905015835325e-06, "loss": 0.6091, "step": 5753 }, { "epoch": 0.8582295473189648, "grad_norm": 1.402393102645874, "learning_rate": 1.0372465914327402e-06, "loss": 0.5769, "step": 5754 }, { "epoch": 0.8583787008725483, "grad_norm": 1.4039504528045654, "learning_rate": 1.0351047736942e-06, "loss": 0.5239, "step": 5755 }, { "epoch": 0.8585278544261317, "grad_norm": 1.3720366954803467, "learning_rate": 1.0329650488679366e-06, "loss": 0.585, "step": 5756 }, { "epoch": 0.8586770079797151, "grad_norm": 1.406632661819458, "learning_rate": 1.030827417453475e-06, "loss": 0.4802, "step": 5757 }, { "epoch": 0.8588261615332985, "grad_norm": 1.5260578393936157, "learning_rate": 1.028691879949868e-06, "loss": 0.6387, "step": 5758 }, { "epoch": 0.858975315086882, "grad_norm": 1.474452257156372, "learning_rate": 1.0265584368556636e-06, "loss": 0.5805, "step": 5759 }, { "epoch": 0.8591244686404653, "grad_norm": 1.5409822463989258, "learning_rate": 1.02442708866893e-06, "loss": 0.5282, "step": 5760 }, { "epoch": 0.8592736221940488, "grad_norm": 1.5477843284606934, "learning_rate": 1.0222978358872448e-06, "loss": 0.5559, "step": 5761 }, { "epoch": 0.8594227757476322, "grad_norm": 1.7990381717681885, "learning_rate": 1.020170679007697e-06, "loss": 0.6042, "step": 5762 }, { "epoch": 0.8595719293012156, "grad_norm": 1.5562511682510376, "learning_rate": 1.0180456185268805e-06, "loss": 0.4615, "step": 5763 }, { "epoch": 0.859721082854799, "grad_norm": 1.2867428064346313, "learning_rate": 1.0159226549409074e-06, "loss": 0.5519, "step": 5764 }, { "epoch": 0.8598702364083824, "grad_norm": 1.3646056652069092, "learning_rate": 1.0138017887453988e-06, "loss": 0.575, "step": 5765 }, { "epoch": 0.8600193899619658, "grad_norm": 1.415016531944275, "learning_rate": 1.011683020435479e-06, "loss": 0.5247, "step": 5766 }, { "epoch": 0.8601685435155493, "grad_norm": 1.7000386714935303, "learning_rate": 1.009566350505793e-06, "loss": 0.5926, "step": 5767 }, { "epoch": 0.8603176970691326, "grad_norm": 1.3503984212875366, "learning_rate": 1.0074517794504913e-06, "loss": 0.5431, "step": 5768 }, { "epoch": 0.8604668506227161, "grad_norm": 1.4753090143203735, "learning_rate": 1.0053393077632302e-06, "loss": 0.6672, "step": 5769 }, { "epoch": 0.8606160041762995, "grad_norm": 0.8451013565063477, "learning_rate": 1.0032289359371816e-06, "loss": 0.6247, "step": 5770 }, { "epoch": 0.8607651577298829, "grad_norm": 1.2293606996536255, "learning_rate": 1.0011206644650273e-06, "loss": 0.5421, "step": 5771 }, { "epoch": 0.8609143112834663, "grad_norm": 1.5153124332427979, "learning_rate": 9.990144938389546e-07, "loss": 0.5737, "step": 5772 }, { "epoch": 0.8610634648370498, "grad_norm": 1.6244562864303589, "learning_rate": 9.96910424550661e-07, "loss": 0.6447, "step": 5773 }, { "epoch": 0.8612126183906331, "grad_norm": 1.4801620244979858, "learning_rate": 9.948084570913585e-07, "loss": 0.5802, "step": 5774 }, { "epoch": 0.8613617719442166, "grad_norm": 1.265730857849121, "learning_rate": 9.92708591951762e-07, "loss": 0.5477, "step": 5775 }, { "epoch": 0.8615109254978, "grad_norm": 1.394662857055664, "learning_rate": 9.906108296221007e-07, "loss": 0.5968, "step": 5776 }, { "epoch": 0.8616600790513834, "grad_norm": 1.3525843620300293, "learning_rate": 9.885151705921115e-07, "loss": 0.5686, "step": 5777 }, { "epoch": 0.8618092326049668, "grad_norm": 1.4474211931228638, "learning_rate": 9.864216153510364e-07, "loss": 0.5868, "step": 5778 }, { "epoch": 0.8619583861585502, "grad_norm": 1.244974970817566, "learning_rate": 9.843301643876292e-07, "loss": 0.4808, "step": 5779 }, { "epoch": 0.8621075397121336, "grad_norm": 1.3927932977676392, "learning_rate": 9.822408181901544e-07, "loss": 0.5905, "step": 5780 }, { "epoch": 0.8622566932657171, "grad_norm": 1.633229374885559, "learning_rate": 9.801535772463856e-07, "loss": 0.6553, "step": 5781 }, { "epoch": 0.8624058468193004, "grad_norm": 1.4357833862304688, "learning_rate": 9.78068442043597e-07, "loss": 0.5774, "step": 5782 }, { "epoch": 0.8625550003728839, "grad_norm": 1.920028805732727, "learning_rate": 9.759854130685798e-07, "loss": 0.5689, "step": 5783 }, { "epoch": 0.8627041539264673, "grad_norm": 1.4793803691864014, "learning_rate": 9.739044908076301e-07, "loss": 0.514, "step": 5784 }, { "epoch": 0.8628533074800507, "grad_norm": 0.8535627126693726, "learning_rate": 9.718256757465526e-07, "loss": 0.6107, "step": 5785 }, { "epoch": 0.8630024610336341, "grad_norm": 1.320291519165039, "learning_rate": 9.697489683706607e-07, "loss": 0.5013, "step": 5786 }, { "epoch": 0.8631516145872176, "grad_norm": 1.9108673334121704, "learning_rate": 9.67674369164776e-07, "loss": 0.7073, "step": 5787 }, { "epoch": 0.8633007681408009, "grad_norm": 1.4519827365875244, "learning_rate": 9.656018786132236e-07, "loss": 0.5863, "step": 5788 }, { "epoch": 0.8634499216943844, "grad_norm": 1.344799518585205, "learning_rate": 9.63531497199841e-07, "loss": 0.5687, "step": 5789 }, { "epoch": 0.8635990752479678, "grad_norm": 1.7374730110168457, "learning_rate": 9.614632254079748e-07, "loss": 0.5781, "step": 5790 }, { "epoch": 0.8637482288015512, "grad_norm": 1.659446120262146, "learning_rate": 9.59397063720472e-07, "loss": 0.5173, "step": 5791 }, { "epoch": 0.8638973823551346, "grad_norm": 1.587687611579895, "learning_rate": 9.573330126196912e-07, "loss": 0.5679, "step": 5792 }, { "epoch": 0.864046535908718, "grad_norm": 1.5753635168075562, "learning_rate": 9.552710725875047e-07, "loss": 0.6242, "step": 5793 }, { "epoch": 0.8641956894623014, "grad_norm": 1.2705652713775635, "learning_rate": 9.532112441052799e-07, "loss": 0.4979, "step": 5794 }, { "epoch": 0.8643448430158849, "grad_norm": 1.3727149963378906, "learning_rate": 9.511535276538986e-07, "loss": 0.5253, "step": 5795 }, { "epoch": 0.8644939965694682, "grad_norm": 1.4755113124847412, "learning_rate": 9.490979237137487e-07, "loss": 0.5526, "step": 5796 }, { "epoch": 0.8646431501230517, "grad_norm": 1.3500769138336182, "learning_rate": 9.470444327647221e-07, "loss": 0.5632, "step": 5797 }, { "epoch": 0.8647923036766351, "grad_norm": 1.372314691543579, "learning_rate": 9.449930552862208e-07, "loss": 0.526, "step": 5798 }, { "epoch": 0.8649414572302185, "grad_norm": 1.389938235282898, "learning_rate": 9.429437917571526e-07, "loss": 0.5242, "step": 5799 }, { "epoch": 0.8650906107838019, "grad_norm": 1.833687663078308, "learning_rate": 9.408966426559296e-07, "loss": 0.4741, "step": 5800 }, { "epoch": 0.8652397643373854, "grad_norm": 1.6872609853744507, "learning_rate": 9.388516084604704e-07, "loss": 0.5701, "step": 5801 }, { "epoch": 0.8653889178909687, "grad_norm": 1.7586884498596191, "learning_rate": 9.368086896482065e-07, "loss": 0.6212, "step": 5802 }, { "epoch": 0.8655380714445522, "grad_norm": 1.3861644268035889, "learning_rate": 9.347678866960664e-07, "loss": 0.5466, "step": 5803 }, { "epoch": 0.8656872249981356, "grad_norm": 1.3338950872421265, "learning_rate": 9.3272920008049e-07, "loss": 0.6107, "step": 5804 }, { "epoch": 0.865836378551719, "grad_norm": 1.5830862522125244, "learning_rate": 9.306926302774233e-07, "loss": 0.5794, "step": 5805 }, { "epoch": 0.8659855321053024, "grad_norm": 1.4734241962432861, "learning_rate": 9.286581777623127e-07, "loss": 0.6133, "step": 5806 }, { "epoch": 0.8661346856588859, "grad_norm": 1.4566676616668701, "learning_rate": 9.26625843010116e-07, "loss": 0.4955, "step": 5807 }, { "epoch": 0.8662838392124692, "grad_norm": 1.5215872526168823, "learning_rate": 9.24595626495296e-07, "loss": 0.5838, "step": 5808 }, { "epoch": 0.8664329927660527, "grad_norm": 1.4484728574752808, "learning_rate": 9.225675286918201e-07, "loss": 0.6234, "step": 5809 }, { "epoch": 0.866582146319636, "grad_norm": 1.2918686866760254, "learning_rate": 9.205415500731551e-07, "loss": 0.5506, "step": 5810 }, { "epoch": 0.8667312998732195, "grad_norm": 1.2676738500595093, "learning_rate": 9.185176911122873e-07, "loss": 0.5586, "step": 5811 }, { "epoch": 0.8668804534268029, "grad_norm": 1.4713071584701538, "learning_rate": 9.164959522816941e-07, "loss": 0.5828, "step": 5812 }, { "epoch": 0.8670296069803863, "grad_norm": 0.8359708189964294, "learning_rate": 9.144763340533635e-07, "loss": 0.6457, "step": 5813 }, { "epoch": 0.8671787605339697, "grad_norm": 1.5632041692733765, "learning_rate": 9.124588368987896e-07, "loss": 0.6265, "step": 5814 }, { "epoch": 0.8673279140875532, "grad_norm": 1.425057053565979, "learning_rate": 9.104434612889723e-07, "loss": 0.5839, "step": 5815 }, { "epoch": 0.8674770676411365, "grad_norm": 1.3936814069747925, "learning_rate": 9.084302076944096e-07, "loss": 0.5833, "step": 5816 }, { "epoch": 0.86762622119472, "grad_norm": 1.211134672164917, "learning_rate": 9.0641907658511e-07, "loss": 0.4782, "step": 5817 }, { "epoch": 0.8677753747483034, "grad_norm": 1.3894529342651367, "learning_rate": 9.044100684305857e-07, "loss": 0.5431, "step": 5818 }, { "epoch": 0.8679245283018868, "grad_norm": 1.2360235452651978, "learning_rate": 9.024031836998525e-07, "loss": 0.508, "step": 5819 }, { "epoch": 0.8680736818554702, "grad_norm": 1.2422046661376953, "learning_rate": 9.003984228614293e-07, "loss": 0.4997, "step": 5820 }, { "epoch": 0.8682228354090537, "grad_norm": 1.6157113313674927, "learning_rate": 8.983957863833437e-07, "loss": 0.5629, "step": 5821 }, { "epoch": 0.868371988962637, "grad_norm": 1.3271141052246094, "learning_rate": 8.963952747331195e-07, "loss": 0.5339, "step": 5822 }, { "epoch": 0.8685211425162205, "grad_norm": 1.3800272941589355, "learning_rate": 8.943968883777909e-07, "loss": 0.5591, "step": 5823 }, { "epoch": 0.8686702960698038, "grad_norm": 1.4408020973205566, "learning_rate": 8.92400627783897e-07, "loss": 0.5306, "step": 5824 }, { "epoch": 0.8688194496233873, "grad_norm": 1.3685803413391113, "learning_rate": 8.904064934174717e-07, "loss": 0.4821, "step": 5825 }, { "epoch": 0.8689686031769707, "grad_norm": 1.3901057243347168, "learning_rate": 8.884144857440624e-07, "loss": 0.4682, "step": 5826 }, { "epoch": 0.8691177567305541, "grad_norm": 1.516501545906067, "learning_rate": 8.864246052287151e-07, "loss": 0.4884, "step": 5827 }, { "epoch": 0.8692669102841375, "grad_norm": 1.2334377765655518, "learning_rate": 8.844368523359803e-07, "loss": 0.5318, "step": 5828 }, { "epoch": 0.869416063837721, "grad_norm": 1.4251219034194946, "learning_rate": 8.824512275299114e-07, "loss": 0.6459, "step": 5829 }, { "epoch": 0.8695652173913043, "grad_norm": 1.3931220769882202, "learning_rate": 8.804677312740673e-07, "loss": 0.5744, "step": 5830 }, { "epoch": 0.8697143709448878, "grad_norm": 2.111722707748413, "learning_rate": 8.784863640315045e-07, "loss": 0.5481, "step": 5831 }, { "epoch": 0.8698635244984712, "grad_norm": 1.4732441902160645, "learning_rate": 8.765071262647873e-07, "loss": 0.5998, "step": 5832 }, { "epoch": 0.8700126780520546, "grad_norm": 1.473974585533142, "learning_rate": 8.745300184359817e-07, "loss": 0.5438, "step": 5833 }, { "epoch": 0.870161831605638, "grad_norm": 1.338034987449646, "learning_rate": 8.725550410066575e-07, "loss": 0.5495, "step": 5834 }, { "epoch": 0.8703109851592215, "grad_norm": 1.4612029790878296, "learning_rate": 8.705821944378834e-07, "loss": 0.547, "step": 5835 }, { "epoch": 0.8704601387128048, "grad_norm": 1.4584887027740479, "learning_rate": 8.686114791902334e-07, "loss": 0.5834, "step": 5836 }, { "epoch": 0.8706092922663883, "grad_norm": 1.359711766242981, "learning_rate": 8.666428957237849e-07, "loss": 0.597, "step": 5837 }, { "epoch": 0.8707584458199716, "grad_norm": 1.5461796522140503, "learning_rate": 8.64676444498116e-07, "loss": 0.6233, "step": 5838 }, { "epoch": 0.8709075993735551, "grad_norm": 0.7453974485397339, "learning_rate": 8.627121259723071e-07, "loss": 0.5628, "step": 5839 }, { "epoch": 0.8710567529271385, "grad_norm": 1.488402247428894, "learning_rate": 8.607499406049424e-07, "loss": 0.5899, "step": 5840 }, { "epoch": 0.8712059064807219, "grad_norm": 1.2960312366485596, "learning_rate": 8.587898888541035e-07, "loss": 0.5219, "step": 5841 }, { "epoch": 0.8713550600343053, "grad_norm": 1.4375495910644531, "learning_rate": 8.568319711773787e-07, "loss": 0.5709, "step": 5842 }, { "epoch": 0.8715042135878888, "grad_norm": 1.4594416618347168, "learning_rate": 8.54876188031859e-07, "loss": 0.5742, "step": 5843 }, { "epoch": 0.8716533671414721, "grad_norm": 1.8842312097549438, "learning_rate": 8.529225398741303e-07, "loss": 0.5822, "step": 5844 }, { "epoch": 0.8718025206950556, "grad_norm": 1.4726389646530151, "learning_rate": 8.509710271602833e-07, "loss": 0.5909, "step": 5845 }, { "epoch": 0.871951674248639, "grad_norm": 1.4784488677978516, "learning_rate": 8.490216503459181e-07, "loss": 0.6564, "step": 5846 }, { "epoch": 0.8721008278022224, "grad_norm": 1.5606573820114136, "learning_rate": 8.470744098861239e-07, "loss": 0.6593, "step": 5847 }, { "epoch": 0.8722499813558058, "grad_norm": 1.5084614753723145, "learning_rate": 8.45129306235497e-07, "loss": 0.5995, "step": 5848 }, { "epoch": 0.8723991349093893, "grad_norm": 1.523701548576355, "learning_rate": 8.431863398481366e-07, "loss": 0.6347, "step": 5849 }, { "epoch": 0.8725482884629726, "grad_norm": 1.177024483680725, "learning_rate": 8.412455111776374e-07, "loss": 0.5829, "step": 5850 }, { "epoch": 0.8726974420165561, "grad_norm": 1.4557708501815796, "learning_rate": 8.393068206770993e-07, "loss": 0.6081, "step": 5851 }, { "epoch": 0.8728465955701394, "grad_norm": 1.3021208047866821, "learning_rate": 8.373702687991247e-07, "loss": 0.528, "step": 5852 }, { "epoch": 0.8729957491237229, "grad_norm": 1.4116733074188232, "learning_rate": 8.354358559958087e-07, "loss": 0.445, "step": 5853 }, { "epoch": 0.8731449026773063, "grad_norm": 1.5552104711532593, "learning_rate": 8.335035827187577e-07, "loss": 0.6093, "step": 5854 }, { "epoch": 0.8732940562308897, "grad_norm": 1.3867359161376953, "learning_rate": 8.31573449419073e-07, "loss": 0.5804, "step": 5855 }, { "epoch": 0.8734432097844731, "grad_norm": 1.4997435808181763, "learning_rate": 8.296454565473522e-07, "loss": 0.5545, "step": 5856 }, { "epoch": 0.8735923633380566, "grad_norm": 1.7242966890335083, "learning_rate": 8.277196045537006e-07, "loss": 0.5727, "step": 5857 }, { "epoch": 0.8737415168916399, "grad_norm": 1.4551805257797241, "learning_rate": 8.25795893887722e-07, "loss": 0.5727, "step": 5858 }, { "epoch": 0.8738906704452234, "grad_norm": 1.4242364168167114, "learning_rate": 8.238743249985159e-07, "loss": 0.5756, "step": 5859 }, { "epoch": 0.8740398239988068, "grad_norm": 1.2446376085281372, "learning_rate": 8.219548983346859e-07, "loss": 0.4654, "step": 5860 }, { "epoch": 0.8741889775523902, "grad_norm": 1.5235934257507324, "learning_rate": 8.200376143443356e-07, "loss": 0.421, "step": 5861 }, { "epoch": 0.8743381311059736, "grad_norm": 1.3249725103378296, "learning_rate": 8.181224734750659e-07, "loss": 0.5339, "step": 5862 }, { "epoch": 0.874487284659557, "grad_norm": 1.3856010437011719, "learning_rate": 8.162094761739792e-07, "loss": 0.5557, "step": 5863 }, { "epoch": 0.8746364382131404, "grad_norm": 1.6080118417739868, "learning_rate": 8.14298622887677e-07, "loss": 0.5113, "step": 5864 }, { "epoch": 0.8747855917667239, "grad_norm": 1.3275846242904663, "learning_rate": 8.123899140622616e-07, "loss": 0.5243, "step": 5865 }, { "epoch": 0.8749347453203072, "grad_norm": 1.2990962266921997, "learning_rate": 8.104833501433318e-07, "loss": 0.5295, "step": 5866 }, { "epoch": 0.8750838988738907, "grad_norm": 1.5265895128250122, "learning_rate": 8.085789315759862e-07, "loss": 0.5635, "step": 5867 }, { "epoch": 0.8752330524274741, "grad_norm": 1.3528826236724854, "learning_rate": 8.06676658804827e-07, "loss": 0.5777, "step": 5868 }, { "epoch": 0.8753822059810575, "grad_norm": 1.4859877824783325, "learning_rate": 8.047765322739476e-07, "loss": 0.5252, "step": 5869 }, { "epoch": 0.8755313595346409, "grad_norm": 1.2993046045303345, "learning_rate": 8.028785524269466e-07, "loss": 0.5892, "step": 5870 }, { "epoch": 0.8756805130882244, "grad_norm": 1.1702778339385986, "learning_rate": 8.009827197069209e-07, "loss": 0.5027, "step": 5871 }, { "epoch": 0.8758296666418077, "grad_norm": 1.2770600318908691, "learning_rate": 7.990890345564628e-07, "loss": 0.5677, "step": 5872 }, { "epoch": 0.8759788201953912, "grad_norm": 1.240026593208313, "learning_rate": 7.971974974176666e-07, "loss": 0.4906, "step": 5873 }, { "epoch": 0.8761279737489746, "grad_norm": 1.5166163444519043, "learning_rate": 7.953081087321257e-07, "loss": 0.4721, "step": 5874 }, { "epoch": 0.876277127302558, "grad_norm": 2.047994613647461, "learning_rate": 7.934208689409251e-07, "loss": 0.5349, "step": 5875 }, { "epoch": 0.8764262808561414, "grad_norm": 1.4849255084991455, "learning_rate": 7.915357784846556e-07, "loss": 0.5541, "step": 5876 }, { "epoch": 0.8765754344097249, "grad_norm": 0.717026948928833, "learning_rate": 7.896528378034052e-07, "loss": 0.6222, "step": 5877 }, { "epoch": 0.8767245879633082, "grad_norm": 1.4477449655532837, "learning_rate": 7.877720473367556e-07, "loss": 0.5434, "step": 5878 }, { "epoch": 0.8768737415168917, "grad_norm": 0.8157774209976196, "learning_rate": 7.858934075237901e-07, "loss": 0.5889, "step": 5879 }, { "epoch": 0.877022895070475, "grad_norm": 1.325366497039795, "learning_rate": 7.840169188030899e-07, "loss": 0.5149, "step": 5880 }, { "epoch": 0.8771720486240585, "grad_norm": 1.4087680578231812, "learning_rate": 7.821425816127337e-07, "loss": 0.5458, "step": 5881 }, { "epoch": 0.8773212021776419, "grad_norm": 1.4192196130752563, "learning_rate": 7.802703963902968e-07, "loss": 0.5587, "step": 5882 }, { "epoch": 0.8774703557312253, "grad_norm": 0.8774654269218445, "learning_rate": 7.784003635728555e-07, "loss": 0.6507, "step": 5883 }, { "epoch": 0.8776195092848087, "grad_norm": 1.2976351976394653, "learning_rate": 7.765324835969757e-07, "loss": 0.5819, "step": 5884 }, { "epoch": 0.8777686628383922, "grad_norm": 1.306409239768982, "learning_rate": 7.746667568987287e-07, "loss": 0.4585, "step": 5885 }, { "epoch": 0.8779178163919755, "grad_norm": 1.3554062843322754, "learning_rate": 7.728031839136818e-07, "loss": 0.5864, "step": 5886 }, { "epoch": 0.878066969945559, "grad_norm": 1.5087326765060425, "learning_rate": 7.70941765076898e-07, "loss": 0.5562, "step": 5887 }, { "epoch": 0.8782161234991424, "grad_norm": 1.573703408241272, "learning_rate": 7.690825008229319e-07, "loss": 0.5832, "step": 5888 }, { "epoch": 0.8783652770527258, "grad_norm": 1.1819467544555664, "learning_rate": 7.672253915858496e-07, "loss": 0.5282, "step": 5889 }, { "epoch": 0.8785144306063092, "grad_norm": 1.3788132667541504, "learning_rate": 7.653704377991977e-07, "loss": 0.5318, "step": 5890 }, { "epoch": 0.8786635841598927, "grad_norm": 2.3211276531219482, "learning_rate": 7.635176398960308e-07, "loss": 0.5849, "step": 5891 }, { "epoch": 0.878812737713476, "grad_norm": 1.3818563222885132, "learning_rate": 7.616669983088953e-07, "loss": 0.5388, "step": 5892 }, { "epoch": 0.8789618912670595, "grad_norm": 1.2697454690933228, "learning_rate": 7.598185134698366e-07, "loss": 0.5339, "step": 5893 }, { "epoch": 0.8791110448206428, "grad_norm": 1.3760358095169067, "learning_rate": 7.579721858103928e-07, "loss": 0.5208, "step": 5894 }, { "epoch": 0.8792601983742263, "grad_norm": 1.6002740859985352, "learning_rate": 7.561280157616036e-07, "loss": 0.496, "step": 5895 }, { "epoch": 0.8794093519278097, "grad_norm": 1.4447792768478394, "learning_rate": 7.542860037540012e-07, "loss": 0.563, "step": 5896 }, { "epoch": 0.8795585054813931, "grad_norm": 1.4789339303970337, "learning_rate": 7.524461502176128e-07, "loss": 0.5936, "step": 5897 }, { "epoch": 0.8797076590349765, "grad_norm": 1.2659014463424683, "learning_rate": 7.506084555819682e-07, "loss": 0.5623, "step": 5898 }, { "epoch": 0.87985681258856, "grad_norm": 1.3049269914627075, "learning_rate": 7.487729202760874e-07, "loss": 0.5341, "step": 5899 }, { "epoch": 0.8800059661421433, "grad_norm": 2.0171890258789062, "learning_rate": 7.469395447284866e-07, "loss": 0.5701, "step": 5900 }, { "epoch": 0.8801551196957268, "grad_norm": 1.4264051914215088, "learning_rate": 7.451083293671801e-07, "loss": 0.6333, "step": 5901 }, { "epoch": 0.8803042732493102, "grad_norm": 1.6575266122817993, "learning_rate": 7.432792746196793e-07, "loss": 0.5827, "step": 5902 }, { "epoch": 0.8804534268028936, "grad_norm": 1.5154228210449219, "learning_rate": 7.414523809129836e-07, "loss": 0.5302, "step": 5903 }, { "epoch": 0.880602580356477, "grad_norm": 1.4828745126724243, "learning_rate": 7.396276486735965e-07, "loss": 0.4439, "step": 5904 }, { "epoch": 0.8807517339100605, "grad_norm": 1.3921555280685425, "learning_rate": 7.378050783275115e-07, "loss": 0.4953, "step": 5905 }, { "epoch": 0.8809008874636438, "grad_norm": 1.4031894207000732, "learning_rate": 7.359846703002216e-07, "loss": 0.6238, "step": 5906 }, { "epoch": 0.8810500410172273, "grad_norm": 1.1696685552597046, "learning_rate": 7.341664250167113e-07, "loss": 0.415, "step": 5907 }, { "epoch": 0.8811991945708106, "grad_norm": 1.8257795572280884, "learning_rate": 7.323503429014633e-07, "loss": 0.5612, "step": 5908 }, { "epoch": 0.8813483481243941, "grad_norm": 1.7560288906097412, "learning_rate": 7.305364243784507e-07, "loss": 0.5549, "step": 5909 }, { "epoch": 0.8814975016779775, "grad_norm": 1.4599133729934692, "learning_rate": 7.287246698711459e-07, "loss": 0.6225, "step": 5910 }, { "epoch": 0.8816466552315609, "grad_norm": 1.2949392795562744, "learning_rate": 7.269150798025148e-07, "loss": 0.5612, "step": 5911 }, { "epoch": 0.8817958087851443, "grad_norm": 1.2897366285324097, "learning_rate": 7.251076545950198e-07, "loss": 0.577, "step": 5912 }, { "epoch": 0.8819449623387278, "grad_norm": 0.8224018812179565, "learning_rate": 7.233023946706108e-07, "loss": 0.6196, "step": 5913 }, { "epoch": 0.8820941158923111, "grad_norm": 1.2903574705123901, "learning_rate": 7.214993004507409e-07, "loss": 0.5593, "step": 5914 }, { "epoch": 0.8822432694458946, "grad_norm": 1.4536138772964478, "learning_rate": 7.196983723563544e-07, "loss": 0.5885, "step": 5915 }, { "epoch": 0.882392422999478, "grad_norm": 2.239551067352295, "learning_rate": 7.178996108078873e-07, "loss": 0.524, "step": 5916 }, { "epoch": 0.8825415765530614, "grad_norm": 1.2399370670318604, "learning_rate": 7.161030162252735e-07, "loss": 0.4409, "step": 5917 }, { "epoch": 0.8826907301066448, "grad_norm": 1.4588310718536377, "learning_rate": 7.143085890279411e-07, "loss": 0.4838, "step": 5918 }, { "epoch": 0.8828398836602283, "grad_norm": 1.5789618492126465, "learning_rate": 7.12516329634807e-07, "loss": 0.5821, "step": 5919 }, { "epoch": 0.8829890372138116, "grad_norm": 1.6375365257263184, "learning_rate": 7.107262384642877e-07, "loss": 0.5593, "step": 5920 }, { "epoch": 0.8831381907673951, "grad_norm": 0.8161758184432983, "learning_rate": 7.089383159342933e-07, "loss": 0.6244, "step": 5921 }, { "epoch": 0.8832873443209784, "grad_norm": 1.3628851175308228, "learning_rate": 7.071525624622211e-07, "loss": 0.501, "step": 5922 }, { "epoch": 0.8834364978745619, "grad_norm": 1.7157005071640015, "learning_rate": 7.053689784649676e-07, "loss": 0.5522, "step": 5923 }, { "epoch": 0.8835856514281453, "grad_norm": 0.8891729712486267, "learning_rate": 7.035875643589274e-07, "loss": 0.6418, "step": 5924 }, { "epoch": 0.8837348049817287, "grad_norm": 1.4911422729492188, "learning_rate": 7.018083205599779e-07, "loss": 0.5858, "step": 5925 }, { "epoch": 0.8838839585353121, "grad_norm": 1.3125324249267578, "learning_rate": 7.000312474834959e-07, "loss": 0.4909, "step": 5926 }, { "epoch": 0.8840331120888956, "grad_norm": 0.8762341141700745, "learning_rate": 6.982563455443525e-07, "loss": 0.6331, "step": 5927 }, { "epoch": 0.8841822656424789, "grad_norm": 1.4020088911056519, "learning_rate": 6.964836151569066e-07, "loss": 0.5849, "step": 5928 }, { "epoch": 0.8843314191960624, "grad_norm": 1.402654767036438, "learning_rate": 6.947130567350147e-07, "loss": 0.5299, "step": 5929 }, { "epoch": 0.8844805727496458, "grad_norm": 1.388558268547058, "learning_rate": 6.929446706920285e-07, "loss": 0.5037, "step": 5930 }, { "epoch": 0.8846297263032292, "grad_norm": 1.3487002849578857, "learning_rate": 6.911784574407832e-07, "loss": 0.6053, "step": 5931 }, { "epoch": 0.8847788798568126, "grad_norm": 1.4404677152633667, "learning_rate": 6.894144173936146e-07, "loss": 0.5512, "step": 5932 }, { "epoch": 0.884928033410396, "grad_norm": 1.2007559537887573, "learning_rate": 6.876525509623532e-07, "loss": 0.4814, "step": 5933 }, { "epoch": 0.8850771869639794, "grad_norm": 1.2625792026519775, "learning_rate": 6.858928585583135e-07, "loss": 0.6071, "step": 5934 }, { "epoch": 0.8852263405175629, "grad_norm": 1.343592882156372, "learning_rate": 6.841353405923079e-07, "loss": 0.525, "step": 5935 }, { "epoch": 0.8853754940711462, "grad_norm": 1.1989115476608276, "learning_rate": 6.823799974746425e-07, "loss": 0.5002, "step": 5936 }, { "epoch": 0.8855246476247297, "grad_norm": 1.4391436576843262, "learning_rate": 6.806268296151097e-07, "loss": 0.5639, "step": 5937 }, { "epoch": 0.8856738011783131, "grad_norm": 1.2248378992080688, "learning_rate": 6.788758374229998e-07, "loss": 0.528, "step": 5938 }, { "epoch": 0.8858229547318965, "grad_norm": 1.338749647140503, "learning_rate": 6.771270213070935e-07, "loss": 0.5816, "step": 5939 }, { "epoch": 0.8859721082854799, "grad_norm": 1.615903377532959, "learning_rate": 6.753803816756643e-07, "loss": 0.5637, "step": 5940 }, { "epoch": 0.8861212618390634, "grad_norm": 1.4641857147216797, "learning_rate": 6.736359189364716e-07, "loss": 0.5715, "step": 5941 }, { "epoch": 0.8862704153926467, "grad_norm": 1.504456877708435, "learning_rate": 6.718936334967774e-07, "loss": 0.5432, "step": 5942 }, { "epoch": 0.8864195689462302, "grad_norm": 1.4167040586471558, "learning_rate": 6.701535257633252e-07, "loss": 0.5892, "step": 5943 }, { "epoch": 0.8865687224998136, "grad_norm": 1.3263911008834839, "learning_rate": 6.684155961423555e-07, "loss": 0.4255, "step": 5944 }, { "epoch": 0.886717876053397, "grad_norm": 1.5757927894592285, "learning_rate": 6.666798450395995e-07, "loss": 0.5544, "step": 5945 }, { "epoch": 0.8868670296069804, "grad_norm": 1.3598827123641968, "learning_rate": 6.649462728602807e-07, "loss": 0.6032, "step": 5946 }, { "epoch": 0.8870161831605639, "grad_norm": 1.2180153131484985, "learning_rate": 6.632148800091099e-07, "loss": 0.5753, "step": 5947 }, { "epoch": 0.8871653367141472, "grad_norm": 1.3064184188842773, "learning_rate": 6.614856668902924e-07, "loss": 0.5263, "step": 5948 }, { "epoch": 0.8873144902677307, "grad_norm": 1.509033203125, "learning_rate": 6.597586339075279e-07, "loss": 0.5671, "step": 5949 }, { "epoch": 0.887463643821314, "grad_norm": 1.317120909690857, "learning_rate": 6.580337814639959e-07, "loss": 0.5643, "step": 5950 }, { "epoch": 0.8876127973748975, "grad_norm": 1.234807014465332, "learning_rate": 6.563111099623809e-07, "loss": 0.566, "step": 5951 }, { "epoch": 0.8877619509284809, "grad_norm": 1.8355786800384521, "learning_rate": 6.545906198048502e-07, "loss": 0.6038, "step": 5952 }, { "epoch": 0.8879111044820643, "grad_norm": 1.6768826246261597, "learning_rate": 6.528723113930613e-07, "loss": 0.6393, "step": 5953 }, { "epoch": 0.8880602580356477, "grad_norm": 1.3186264038085938, "learning_rate": 6.511561851281656e-07, "loss": 0.5028, "step": 5954 }, { "epoch": 0.8882094115892312, "grad_norm": 1.2595033645629883, "learning_rate": 6.494422414108048e-07, "loss": 0.5286, "step": 5955 }, { "epoch": 0.8883585651428145, "grad_norm": 1.2690939903259277, "learning_rate": 6.477304806411078e-07, "loss": 0.569, "step": 5956 }, { "epoch": 0.888507718696398, "grad_norm": 1.4180201292037964, "learning_rate": 6.460209032186971e-07, "loss": 0.5924, "step": 5957 }, { "epoch": 0.8886568722499814, "grad_norm": 0.839624285697937, "learning_rate": 6.443135095426845e-07, "loss": 0.623, "step": 5958 }, { "epoch": 0.8888060258035648, "grad_norm": 1.3707863092422485, "learning_rate": 6.426083000116723e-07, "loss": 0.4985, "step": 5959 }, { "epoch": 0.8889551793571482, "grad_norm": 1.3927679061889648, "learning_rate": 6.409052750237521e-07, "loss": 0.5728, "step": 5960 }, { "epoch": 0.8891043329107315, "grad_norm": 1.3056389093399048, "learning_rate": 6.39204434976507e-07, "loss": 0.5388, "step": 5961 }, { "epoch": 0.889253486464315, "grad_norm": 1.4065521955490112, "learning_rate": 6.375057802670081e-07, "loss": 0.5301, "step": 5962 }, { "epoch": 0.8894026400178984, "grad_norm": 1.502274513244629, "learning_rate": 6.358093112918174e-07, "loss": 0.4963, "step": 5963 }, { "epoch": 0.8895517935714818, "grad_norm": 1.4841610193252563, "learning_rate": 6.341150284469855e-07, "loss": 0.4418, "step": 5964 }, { "epoch": 0.8897009471250652, "grad_norm": 1.4155060052871704, "learning_rate": 6.324229321280572e-07, "loss": 0.5857, "step": 5965 }, { "epoch": 0.8898501006786487, "grad_norm": 1.508070945739746, "learning_rate": 6.307330227300579e-07, "loss": 0.6143, "step": 5966 }, { "epoch": 0.889999254232232, "grad_norm": 1.3365405797958374, "learning_rate": 6.290453006475117e-07, "loss": 0.5315, "step": 5967 }, { "epoch": 0.8901484077858155, "grad_norm": 1.4699842929840088, "learning_rate": 6.273597662744269e-07, "loss": 0.61, "step": 5968 }, { "epoch": 0.8902975613393989, "grad_norm": 1.3902294635772705, "learning_rate": 6.25676420004302e-07, "loss": 0.5991, "step": 5969 }, { "epoch": 0.8904467148929823, "grad_norm": 1.4616237878799438, "learning_rate": 6.239952622301248e-07, "loss": 0.5697, "step": 5970 }, { "epoch": 0.8905958684465657, "grad_norm": 1.4094517230987549, "learning_rate": 6.22316293344375e-07, "loss": 0.5982, "step": 5971 }, { "epoch": 0.8907450220001492, "grad_norm": 1.571008563041687, "learning_rate": 6.206395137390153e-07, "loss": 0.5082, "step": 5972 }, { "epoch": 0.8908941755537325, "grad_norm": 1.3661861419677734, "learning_rate": 6.189649238055018e-07, "loss": 0.5658, "step": 5973 }, { "epoch": 0.891043329107316, "grad_norm": 1.3549885749816895, "learning_rate": 6.172925239347793e-07, "loss": 0.5342, "step": 5974 }, { "epoch": 0.8911924826608993, "grad_norm": 1.5654218196868896, "learning_rate": 6.15622314517278e-07, "loss": 0.5644, "step": 5975 }, { "epoch": 0.8913416362144828, "grad_norm": 1.6693097352981567, "learning_rate": 6.13954295942919e-07, "loss": 0.55, "step": 5976 }, { "epoch": 0.8914907897680662, "grad_norm": 1.5428582429885864, "learning_rate": 6.122884686011166e-07, "loss": 0.6379, "step": 5977 }, { "epoch": 0.8916399433216496, "grad_norm": 1.3516004085540771, "learning_rate": 6.10624832880764e-07, "loss": 0.5302, "step": 5978 }, { "epoch": 0.891789096875233, "grad_norm": 1.5181691646575928, "learning_rate": 6.089633891702496e-07, "loss": 0.569, "step": 5979 }, { "epoch": 0.8919382504288165, "grad_norm": 1.1935148239135742, "learning_rate": 6.073041378574485e-07, "loss": 0.5303, "step": 5980 }, { "epoch": 0.8920874039823998, "grad_norm": 1.807208776473999, "learning_rate": 6.056470793297209e-07, "loss": 0.4543, "step": 5981 }, { "epoch": 0.8922365575359833, "grad_norm": 1.2530267238616943, "learning_rate": 6.039922139739196e-07, "loss": 0.5291, "step": 5982 }, { "epoch": 0.8923857110895667, "grad_norm": 2.5167012214660645, "learning_rate": 6.023395421763856e-07, "loss": 0.5332, "step": 5983 }, { "epoch": 0.8925348646431501, "grad_norm": 1.5749197006225586, "learning_rate": 6.006890643229424e-07, "loss": 0.6076, "step": 5984 }, { "epoch": 0.8926840181967335, "grad_norm": 1.6107743978500366, "learning_rate": 5.99040780798904e-07, "loss": 0.5727, "step": 5985 }, { "epoch": 0.892833171750317, "grad_norm": 1.640561819076538, "learning_rate": 5.973946919890772e-07, "loss": 0.4992, "step": 5986 }, { "epoch": 0.8929823253039003, "grad_norm": 1.4356745481491089, "learning_rate": 5.957507982777477e-07, "loss": 0.5382, "step": 5987 }, { "epoch": 0.8931314788574838, "grad_norm": 1.5504664182662964, "learning_rate": 5.941091000486953e-07, "loss": 0.4818, "step": 5988 }, { "epoch": 0.8932806324110671, "grad_norm": 1.4701247215270996, "learning_rate": 5.924695976851846e-07, "loss": 0.5895, "step": 5989 }, { "epoch": 0.8934297859646506, "grad_norm": 1.4455546140670776, "learning_rate": 5.908322915699694e-07, "loss": 0.5822, "step": 5990 }, { "epoch": 0.893578939518234, "grad_norm": 1.4081859588623047, "learning_rate": 5.89197182085286e-07, "loss": 0.5668, "step": 5991 }, { "epoch": 0.8937280930718174, "grad_norm": 1.4146713018417358, "learning_rate": 5.875642696128625e-07, "loss": 0.6035, "step": 5992 }, { "epoch": 0.8938772466254008, "grad_norm": 1.7417048215866089, "learning_rate": 5.859335545339129e-07, "loss": 0.5453, "step": 5993 }, { "epoch": 0.8940264001789843, "grad_norm": 1.6364978551864624, "learning_rate": 5.843050372291381e-07, "loss": 0.571, "step": 5994 }, { "epoch": 0.8941755537325676, "grad_norm": 1.8158890008926392, "learning_rate": 5.826787180787274e-07, "loss": 0.5395, "step": 5995 }, { "epoch": 0.8943247072861511, "grad_norm": 0.8481720685958862, "learning_rate": 5.810545974623549e-07, "loss": 0.6173, "step": 5996 }, { "epoch": 0.8944738608397345, "grad_norm": 1.6063097715377808, "learning_rate": 5.794326757591795e-07, "loss": 0.5749, "step": 5997 }, { "epoch": 0.8946230143933179, "grad_norm": 1.3955878019332886, "learning_rate": 5.778129533478516e-07, "loss": 0.5491, "step": 5998 }, { "epoch": 0.8947721679469013, "grad_norm": 1.3561604022979736, "learning_rate": 5.761954306065065e-07, "loss": 0.5252, "step": 5999 }, { "epoch": 0.8949213215004848, "grad_norm": 1.3369327783584595, "learning_rate": 5.745801079127622e-07, "loss": 0.4663, "step": 6000 }, { "epoch": 0.8950704750540681, "grad_norm": 1.3431497812271118, "learning_rate": 5.729669856437281e-07, "loss": 0.6001, "step": 6001 }, { "epoch": 0.8952196286076516, "grad_norm": 1.4958778619766235, "learning_rate": 5.713560641759975e-07, "loss": 0.5639, "step": 6002 }, { "epoch": 0.8953687821612349, "grad_norm": 1.2499053478240967, "learning_rate": 5.697473438856505e-07, "loss": 0.4671, "step": 6003 }, { "epoch": 0.8955179357148184, "grad_norm": 1.275039792060852, "learning_rate": 5.681408251482523e-07, "loss": 0.5047, "step": 6004 }, { "epoch": 0.8956670892684018, "grad_norm": 1.3515053987503052, "learning_rate": 5.665365083388586e-07, "loss": 0.6327, "step": 6005 }, { "epoch": 0.8958162428219852, "grad_norm": 1.485520839691162, "learning_rate": 5.649343938320029e-07, "loss": 0.5332, "step": 6006 }, { "epoch": 0.8959653963755686, "grad_norm": 1.3786566257476807, "learning_rate": 5.633344820017106e-07, "loss": 0.5343, "step": 6007 }, { "epoch": 0.8961145499291521, "grad_norm": 1.4776198863983154, "learning_rate": 5.61736773221494e-07, "loss": 0.6842, "step": 6008 }, { "epoch": 0.8962637034827354, "grad_norm": 1.542482852935791, "learning_rate": 5.601412678643447e-07, "loss": 0.5436, "step": 6009 }, { "epoch": 0.8964128570363189, "grad_norm": 1.3908394575119019, "learning_rate": 5.585479663027437e-07, "loss": 0.6101, "step": 6010 }, { "epoch": 0.8965620105899023, "grad_norm": 1.4181362390518188, "learning_rate": 5.569568689086602e-07, "loss": 0.6149, "step": 6011 }, { "epoch": 0.8967111641434857, "grad_norm": 1.335484266281128, "learning_rate": 5.553679760535447e-07, "loss": 0.5931, "step": 6012 }, { "epoch": 0.8968603176970691, "grad_norm": 1.3142467737197876, "learning_rate": 5.537812881083349e-07, "loss": 0.5376, "step": 6013 }, { "epoch": 0.8970094712506526, "grad_norm": 1.4012726545333862, "learning_rate": 5.521968054434534e-07, "loss": 0.5283, "step": 6014 }, { "epoch": 0.8971586248042359, "grad_norm": 1.3424811363220215, "learning_rate": 5.506145284288056e-07, "loss": 0.5943, "step": 6015 }, { "epoch": 0.8973077783578194, "grad_norm": 1.7090808153152466, "learning_rate": 5.49034457433787e-07, "loss": 0.5442, "step": 6016 }, { "epoch": 0.8974569319114027, "grad_norm": 1.3364226818084717, "learning_rate": 5.474565928272735e-07, "loss": 0.4588, "step": 6017 }, { "epoch": 0.8976060854649862, "grad_norm": 1.5458649396896362, "learning_rate": 5.458809349776306e-07, "loss": 0.542, "step": 6018 }, { "epoch": 0.8977552390185696, "grad_norm": 1.4048577547073364, "learning_rate": 5.443074842527007e-07, "loss": 0.5253, "step": 6019 }, { "epoch": 0.897904392572153, "grad_norm": 1.3977075815200806, "learning_rate": 5.427362410198212e-07, "loss": 0.6753, "step": 6020 }, { "epoch": 0.8980535461257364, "grad_norm": 1.6220837831497192, "learning_rate": 5.411672056458051e-07, "loss": 0.525, "step": 6021 }, { "epoch": 0.8982026996793199, "grad_norm": 1.4005444049835205, "learning_rate": 5.396003784969551e-07, "loss": 0.4984, "step": 6022 }, { "epoch": 0.8983518532329032, "grad_norm": 1.3667255640029907, "learning_rate": 5.380357599390573e-07, "loss": 0.5483, "step": 6023 }, { "epoch": 0.8985010067864867, "grad_norm": 1.3035671710968018, "learning_rate": 5.364733503373842e-07, "loss": 0.5487, "step": 6024 }, { "epoch": 0.8986501603400701, "grad_norm": 1.4547854661941528, "learning_rate": 5.34913150056685e-07, "loss": 0.6089, "step": 6025 }, { "epoch": 0.8987993138936535, "grad_norm": 2.2389509677886963, "learning_rate": 5.333551594612018e-07, "loss": 0.5647, "step": 6026 }, { "epoch": 0.8989484674472369, "grad_norm": 1.5187586545944214, "learning_rate": 5.317993789146591e-07, "loss": 0.5142, "step": 6027 }, { "epoch": 0.8990976210008204, "grad_norm": 1.5023043155670166, "learning_rate": 5.302458087802587e-07, "loss": 0.6502, "step": 6028 }, { "epoch": 0.8992467745544037, "grad_norm": 1.4799951314926147, "learning_rate": 5.286944494206969e-07, "loss": 0.5509, "step": 6029 }, { "epoch": 0.8993959281079872, "grad_norm": 1.921838641166687, "learning_rate": 5.271453011981464e-07, "loss": 0.5021, "step": 6030 }, { "epoch": 0.8995450816615705, "grad_norm": 1.3058184385299683, "learning_rate": 5.255983644742646e-07, "loss": 0.5298, "step": 6031 }, { "epoch": 0.899694235215154, "grad_norm": 1.75776207447052, "learning_rate": 5.240536396101948e-07, "loss": 0.5641, "step": 6032 }, { "epoch": 0.8998433887687374, "grad_norm": 1.6663942337036133, "learning_rate": 5.225111269665651e-07, "loss": 0.6251, "step": 6033 }, { "epoch": 0.8999925423223208, "grad_norm": 0.7969785928726196, "learning_rate": 5.209708269034797e-07, "loss": 0.6053, "step": 6034 }, { "epoch": 0.9001416958759042, "grad_norm": 1.456571340560913, "learning_rate": 5.194327397805365e-07, "loss": 0.5855, "step": 6035 }, { "epoch": 0.9002908494294877, "grad_norm": 1.3370128870010376, "learning_rate": 5.178968659568084e-07, "loss": 0.6363, "step": 6036 }, { "epoch": 0.900440002983071, "grad_norm": 0.8846838474273682, "learning_rate": 5.163632057908574e-07, "loss": 0.6471, "step": 6037 }, { "epoch": 0.9005891565366545, "grad_norm": 2.2017908096313477, "learning_rate": 5.148317596407259e-07, "loss": 0.6344, "step": 6038 }, { "epoch": 0.9007383100902379, "grad_norm": 1.4279061555862427, "learning_rate": 5.133025278639403e-07, "loss": 0.5477, "step": 6039 }, { "epoch": 0.9008874636438213, "grad_norm": 1.2664328813552856, "learning_rate": 5.117755108175071e-07, "loss": 0.4725, "step": 6040 }, { "epoch": 0.9010366171974047, "grad_norm": 1.2934235334396362, "learning_rate": 5.102507088579189e-07, "loss": 0.4467, "step": 6041 }, { "epoch": 0.9011857707509882, "grad_norm": 1.309348225593567, "learning_rate": 5.087281223411522e-07, "loss": 0.5312, "step": 6042 }, { "epoch": 0.9013349243045715, "grad_norm": 1.4115219116210938, "learning_rate": 5.072077516226648e-07, "loss": 0.6215, "step": 6043 }, { "epoch": 0.901484077858155, "grad_norm": 1.5063204765319824, "learning_rate": 5.05689597057395e-07, "loss": 0.5227, "step": 6044 }, { "epoch": 0.9016332314117383, "grad_norm": 1.4847129583358765, "learning_rate": 5.04173658999767e-07, "loss": 0.4935, "step": 6045 }, { "epoch": 0.9017823849653218, "grad_norm": 1.9710776805877686, "learning_rate": 5.026599378036845e-07, "loss": 0.5671, "step": 6046 }, { "epoch": 0.9019315385189052, "grad_norm": 1.5472122430801392, "learning_rate": 5.011484338225381e-07, "loss": 0.5314, "step": 6047 }, { "epoch": 0.9020806920724886, "grad_norm": 1.3489503860473633, "learning_rate": 4.996391474091966e-07, "loss": 0.4534, "step": 6048 }, { "epoch": 0.902229845626072, "grad_norm": 0.7266349196434021, "learning_rate": 4.981320789160138e-07, "loss": 0.6052, "step": 6049 }, { "epoch": 0.9023789991796555, "grad_norm": 1.495004415512085, "learning_rate": 4.966272286948215e-07, "loss": 0.6119, "step": 6050 }, { "epoch": 0.9025281527332388, "grad_norm": 1.4144567251205444, "learning_rate": 4.951245970969399e-07, "loss": 0.4742, "step": 6051 }, { "epoch": 0.9026773062868223, "grad_norm": 1.3667622804641724, "learning_rate": 4.936241844731671e-07, "loss": 0.6454, "step": 6052 }, { "epoch": 0.9028264598404057, "grad_norm": 1.363696813583374, "learning_rate": 4.921259911737831e-07, "loss": 0.4792, "step": 6053 }, { "epoch": 0.9029756133939891, "grad_norm": 1.3833670616149902, "learning_rate": 4.906300175485501e-07, "loss": 0.4486, "step": 6054 }, { "epoch": 0.9031247669475725, "grad_norm": 1.3666125535964966, "learning_rate": 4.891362639467156e-07, "loss": 0.5324, "step": 6055 }, { "epoch": 0.903273920501156, "grad_norm": 1.2530876398086548, "learning_rate": 4.87644730717004e-07, "loss": 0.5576, "step": 6056 }, { "epoch": 0.9034230740547393, "grad_norm": 1.229817509651184, "learning_rate": 4.861554182076222e-07, "loss": 0.551, "step": 6057 }, { "epoch": 0.9035722276083228, "grad_norm": 1.244421362876892, "learning_rate": 4.846683267662632e-07, "loss": 0.4975, "step": 6058 }, { "epoch": 0.9037213811619061, "grad_norm": 1.5408861637115479, "learning_rate": 4.83183456740095e-07, "loss": 0.5124, "step": 6059 }, { "epoch": 0.9038705347154896, "grad_norm": 1.3085509538650513, "learning_rate": 4.817008084757713e-07, "loss": 0.5291, "step": 6060 }, { "epoch": 0.904019688269073, "grad_norm": 1.5107983350753784, "learning_rate": 4.802203823194263e-07, "loss": 0.5392, "step": 6061 }, { "epoch": 0.9041688418226564, "grad_norm": 1.6910589933395386, "learning_rate": 4.787421786166724e-07, "loss": 0.6065, "step": 6062 }, { "epoch": 0.9043179953762398, "grad_norm": 1.2682784795761108, "learning_rate": 4.77266197712607e-07, "loss": 0.5511, "step": 6063 }, { "epoch": 0.9044671489298233, "grad_norm": 1.247114896774292, "learning_rate": 4.757924399518099e-07, "loss": 0.5394, "step": 6064 }, { "epoch": 0.9046163024834066, "grad_norm": 1.4086530208587646, "learning_rate": 4.743209056783371e-07, "loss": 0.5085, "step": 6065 }, { "epoch": 0.9047654560369901, "grad_norm": 1.757408618927002, "learning_rate": 4.72851595235726e-07, "loss": 0.5986, "step": 6066 }, { "epoch": 0.9049146095905735, "grad_norm": 1.3698875904083252, "learning_rate": 4.7138450896700105e-07, "loss": 0.5673, "step": 6067 }, { "epoch": 0.9050637631441569, "grad_norm": 1.472625732421875, "learning_rate": 4.6991964721465944e-07, "loss": 0.5359, "step": 6068 }, { "epoch": 0.9052129166977403, "grad_norm": 1.5738986730575562, "learning_rate": 4.68457010320682e-07, "loss": 0.5947, "step": 6069 }, { "epoch": 0.9053620702513238, "grad_norm": 1.3500831127166748, "learning_rate": 4.6699659862653347e-07, "loss": 0.5945, "step": 6070 }, { "epoch": 0.9055112238049071, "grad_norm": 1.432153344154358, "learning_rate": 4.6553841247315544e-07, "loss": 0.6087, "step": 6071 }, { "epoch": 0.9056603773584906, "grad_norm": 1.3318039178848267, "learning_rate": 4.6408245220096795e-07, "loss": 0.5942, "step": 6072 }, { "epoch": 0.905809530912074, "grad_norm": 1.5444433689117432, "learning_rate": 4.6262871814987895e-07, "loss": 0.5131, "step": 6073 }, { "epoch": 0.9059586844656574, "grad_norm": 1.297227382659912, "learning_rate": 4.6117721065926824e-07, "loss": 0.5022, "step": 6074 }, { "epoch": 0.9061078380192408, "grad_norm": 1.4835376739501953, "learning_rate": 4.597279300680013e-07, "loss": 0.5468, "step": 6075 }, { "epoch": 0.9062569915728242, "grad_norm": 1.2132453918457031, "learning_rate": 4.58280876714422e-07, "loss": 0.5192, "step": 6076 }, { "epoch": 0.9064061451264076, "grad_norm": 1.7209869623184204, "learning_rate": 4.568360509363545e-07, "loss": 0.5242, "step": 6077 }, { "epoch": 0.9065552986799911, "grad_norm": 0.9110065698623657, "learning_rate": 4.5539345307110125e-07, "loss": 0.6549, "step": 6078 }, { "epoch": 0.9067044522335744, "grad_norm": 1.1870383024215698, "learning_rate": 4.539530834554473e-07, "loss": 0.5172, "step": 6079 }, { "epoch": 0.9068536057871579, "grad_norm": 1.285365104675293, "learning_rate": 4.5251494242565587e-07, "loss": 0.5757, "step": 6080 }, { "epoch": 0.9070027593407413, "grad_norm": 1.266411542892456, "learning_rate": 4.510790303174672e-07, "loss": 0.5097, "step": 6081 }, { "epoch": 0.9071519128943247, "grad_norm": 1.1954938173294067, "learning_rate": 4.496453474661089e-07, "loss": 0.5903, "step": 6082 }, { "epoch": 0.9073010664479081, "grad_norm": 1.369402289390564, "learning_rate": 4.48213894206283e-07, "loss": 0.6075, "step": 6083 }, { "epoch": 0.9074502200014916, "grad_norm": 1.4557710886001587, "learning_rate": 4.4678467087216794e-07, "loss": 0.6623, "step": 6084 }, { "epoch": 0.9075993735550749, "grad_norm": 1.5014965534210205, "learning_rate": 4.453576777974278e-07, "loss": 0.5851, "step": 6085 }, { "epoch": 0.9077485271086584, "grad_norm": 2.212064027786255, "learning_rate": 4.439329153152028e-07, "loss": 0.5159, "step": 6086 }, { "epoch": 0.9078976806622417, "grad_norm": 1.4790619611740112, "learning_rate": 4.425103837581124e-07, "loss": 0.561, "step": 6087 }, { "epoch": 0.9080468342158252, "grad_norm": 1.2800437211990356, "learning_rate": 4.410900834582543e-07, "loss": 0.4553, "step": 6088 }, { "epoch": 0.9081959877694086, "grad_norm": 1.3847705125808716, "learning_rate": 4.3967201474721e-07, "loss": 0.5298, "step": 6089 }, { "epoch": 0.908345141322992, "grad_norm": 0.8910319209098816, "learning_rate": 4.382561779560335e-07, "loss": 0.6265, "step": 6090 }, { "epoch": 0.9084942948765754, "grad_norm": 1.5311869382858276, "learning_rate": 4.3684257341526373e-07, "loss": 0.5593, "step": 6091 }, { "epoch": 0.9086434484301589, "grad_norm": 1.148821473121643, "learning_rate": 4.3543120145491555e-07, "loss": 0.5116, "step": 6092 }, { "epoch": 0.9087926019837422, "grad_norm": 1.5979512929916382, "learning_rate": 4.3402206240447997e-07, "loss": 0.5546, "step": 6093 }, { "epoch": 0.9089417555373257, "grad_norm": 1.8475250005722046, "learning_rate": 4.326151565929315e-07, "loss": 0.5446, "step": 6094 }, { "epoch": 0.9090909090909091, "grad_norm": 1.3538702726364136, "learning_rate": 4.3121048434872083e-07, "loss": 0.4614, "step": 6095 }, { "epoch": 0.9092400626444925, "grad_norm": 1.7090363502502441, "learning_rate": 4.2980804599978e-07, "loss": 0.6016, "step": 6096 }, { "epoch": 0.9093892161980759, "grad_norm": 1.3845819234848022, "learning_rate": 4.284078418735138e-07, "loss": 0.5438, "step": 6097 }, { "epoch": 0.9095383697516594, "grad_norm": 1.1382249593734741, "learning_rate": 4.270098722968108e-07, "loss": 0.4851, "step": 6098 }, { "epoch": 0.9096875233052427, "grad_norm": 1.2999651432037354, "learning_rate": 4.256141375960343e-07, "loss": 0.5697, "step": 6099 }, { "epoch": 0.9098366768588262, "grad_norm": 1.5315979719161987, "learning_rate": 4.2422063809702927e-07, "loss": 0.6147, "step": 6100 }, { "epoch": 0.9099858304124095, "grad_norm": 1.389304518699646, "learning_rate": 4.228293741251166e-07, "loss": 0.4965, "step": 6101 }, { "epoch": 0.910134983965993, "grad_norm": 1.6197841167449951, "learning_rate": 4.214403460050964e-07, "loss": 0.5109, "step": 6102 }, { "epoch": 0.9102841375195764, "grad_norm": 1.3348684310913086, "learning_rate": 4.200535540612449e-07, "loss": 0.6471, "step": 6103 }, { "epoch": 0.9104332910731598, "grad_norm": 1.3950815200805664, "learning_rate": 4.1866899861731867e-07, "loss": 0.6787, "step": 6104 }, { "epoch": 0.9105824446267432, "grad_norm": 1.9305717945098877, "learning_rate": 4.1728667999655027e-07, "loss": 0.5142, "step": 6105 }, { "epoch": 0.9107315981803267, "grad_norm": 1.4825578927993774, "learning_rate": 4.159065985216515e-07, "loss": 0.5424, "step": 6106 }, { "epoch": 0.91088075173391, "grad_norm": 1.4607192277908325, "learning_rate": 4.14528754514808e-07, "loss": 0.5108, "step": 6107 }, { "epoch": 0.9110299052874935, "grad_norm": 1.3079261779785156, "learning_rate": 4.131531482976925e-07, "loss": 0.6045, "step": 6108 }, { "epoch": 0.9111790588410769, "grad_norm": 1.3262325525283813, "learning_rate": 4.117797801914447e-07, "loss": 0.5688, "step": 6109 }, { "epoch": 0.9113282123946603, "grad_norm": 1.510025978088379, "learning_rate": 4.104086505166871e-07, "loss": 0.5147, "step": 6110 }, { "epoch": 0.9114773659482437, "grad_norm": 1.3005177974700928, "learning_rate": 4.0903975959352026e-07, "loss": 0.5681, "step": 6111 }, { "epoch": 0.9116265195018272, "grad_norm": 1.3331594467163086, "learning_rate": 4.0767310774151746e-07, "loss": 0.4858, "step": 6112 }, { "epoch": 0.9117756730554105, "grad_norm": 1.2908622026443481, "learning_rate": 4.063086952797346e-07, "loss": 0.5439, "step": 6113 }, { "epoch": 0.911924826608994, "grad_norm": 1.2435026168823242, "learning_rate": 4.049465225267013e-07, "loss": 0.47, "step": 6114 }, { "epoch": 0.9120739801625773, "grad_norm": 1.2234774827957153, "learning_rate": 4.0358658980042765e-07, "loss": 0.5241, "step": 6115 }, { "epoch": 0.9122231337161608, "grad_norm": 1.3850648403167725, "learning_rate": 4.022288974183941e-07, "loss": 0.5565, "step": 6116 }, { "epoch": 0.9123722872697442, "grad_norm": 1.5246505737304688, "learning_rate": 4.0087344569756934e-07, "loss": 0.6151, "step": 6117 }, { "epoch": 0.9125214408233276, "grad_norm": 1.5470606088638306, "learning_rate": 3.99520234954387e-07, "loss": 0.5495, "step": 6118 }, { "epoch": 0.912670594376911, "grad_norm": 0.8245221376419067, "learning_rate": 3.981692655047642e-07, "loss": 0.6082, "step": 6119 }, { "epoch": 0.9128197479304945, "grad_norm": 1.4268863201141357, "learning_rate": 3.968205376640932e-07, "loss": 0.5539, "step": 6120 }, { "epoch": 0.9129689014840778, "grad_norm": 1.4677759408950806, "learning_rate": 3.954740517472455e-07, "loss": 0.6321, "step": 6121 }, { "epoch": 0.9131180550376613, "grad_norm": 1.4430334568023682, "learning_rate": 3.94129808068564e-07, "loss": 0.5868, "step": 6122 }, { "epoch": 0.9132672085912447, "grad_norm": 1.5570635795593262, "learning_rate": 3.9278780694187114e-07, "loss": 0.5238, "step": 6123 }, { "epoch": 0.9134163621448281, "grad_norm": 1.3514147996902466, "learning_rate": 3.9144804868046724e-07, "loss": 0.617, "step": 6124 }, { "epoch": 0.9135655156984115, "grad_norm": 0.8593356013298035, "learning_rate": 3.901105335971267e-07, "loss": 0.6459, "step": 6125 }, { "epoch": 0.913714669251995, "grad_norm": 1.2918415069580078, "learning_rate": 3.887752620041008e-07, "loss": 0.5123, "step": 6126 }, { "epoch": 0.9138638228055783, "grad_norm": 1.464558482170105, "learning_rate": 3.8744223421311787e-07, "loss": 0.5569, "step": 6127 }, { "epoch": 0.9140129763591618, "grad_norm": 1.432163119316101, "learning_rate": 3.8611145053538134e-07, "loss": 0.6018, "step": 6128 }, { "epoch": 0.9141621299127451, "grad_norm": 1.2540336847305298, "learning_rate": 3.8478291128157155e-07, "loss": 0.4758, "step": 6129 }, { "epoch": 0.9143112834663286, "grad_norm": 1.4234046936035156, "learning_rate": 3.8345661676184475e-07, "loss": 0.5069, "step": 6130 }, { "epoch": 0.914460437019912, "grad_norm": 1.5287225246429443, "learning_rate": 3.8213256728583115e-07, "loss": 0.6093, "step": 6131 }, { "epoch": 0.9146095905734954, "grad_norm": 1.4366278648376465, "learning_rate": 3.808107631626401e-07, "loss": 0.5951, "step": 6132 }, { "epoch": 0.9147587441270788, "grad_norm": 1.2018479108810425, "learning_rate": 3.7949120470085586e-07, "loss": 0.5054, "step": 6133 }, { "epoch": 0.9149078976806623, "grad_norm": 1.5364114046096802, "learning_rate": 3.781738922085354e-07, "loss": 0.6512, "step": 6134 }, { "epoch": 0.9150570512342456, "grad_norm": 1.3711611032485962, "learning_rate": 3.76858825993216e-07, "loss": 0.4666, "step": 6135 }, { "epoch": 0.9152062047878291, "grad_norm": 1.6253243684768677, "learning_rate": 3.7554600636190876e-07, "loss": 0.4767, "step": 6136 }, { "epoch": 0.9153553583414125, "grad_norm": 1.3665413856506348, "learning_rate": 3.742354336210974e-07, "loss": 0.6809, "step": 6137 }, { "epoch": 0.9155045118949959, "grad_norm": 1.4114160537719727, "learning_rate": 3.7292710807674493e-07, "loss": 0.5547, "step": 6138 }, { "epoch": 0.9156536654485793, "grad_norm": 1.635034203529358, "learning_rate": 3.716210300342893e-07, "loss": 0.6236, "step": 6139 }, { "epoch": 0.9158028190021628, "grad_norm": 1.382319450378418, "learning_rate": 3.70317199798641e-07, "loss": 0.5189, "step": 6140 }, { "epoch": 0.9159519725557461, "grad_norm": 1.4025828838348389, "learning_rate": 3.690156176741877e-07, "loss": 0.6313, "step": 6141 }, { "epoch": 0.9161011261093296, "grad_norm": 1.4803087711334229, "learning_rate": 3.6771628396479295e-07, "loss": 0.5506, "step": 6142 }, { "epoch": 0.916250279662913, "grad_norm": 1.4501276016235352, "learning_rate": 3.664191989737942e-07, "loss": 0.5495, "step": 6143 }, { "epoch": 0.9163994332164964, "grad_norm": 1.3665357828140259, "learning_rate": 3.651243630040047e-07, "loss": 0.5277, "step": 6144 }, { "epoch": 0.9165485867700798, "grad_norm": 1.351007103919983, "learning_rate": 3.638317763577126e-07, "loss": 0.5682, "step": 6145 }, { "epoch": 0.9166977403236632, "grad_norm": 0.8491365313529968, "learning_rate": 3.6254143933667886e-07, "loss": 0.6537, "step": 6146 }, { "epoch": 0.9168468938772466, "grad_norm": 1.4639519453048706, "learning_rate": 3.6125335224214133e-07, "loss": 0.6151, "step": 6147 }, { "epoch": 0.9169960474308301, "grad_norm": 1.5703232288360596, "learning_rate": 3.5996751537481277e-07, "loss": 0.5756, "step": 6148 }, { "epoch": 0.9171452009844134, "grad_norm": 1.3900339603424072, "learning_rate": 3.586839290348809e-07, "loss": 0.5572, "step": 6149 }, { "epoch": 0.9172943545379969, "grad_norm": 1.1883453130722046, "learning_rate": 3.57402593522006e-07, "loss": 0.5756, "step": 6150 }, { "epoch": 0.9174435080915803, "grad_norm": 1.324560523033142, "learning_rate": 3.561235091353243e-07, "loss": 0.5515, "step": 6151 }, { "epoch": 0.9175926616451637, "grad_norm": 1.356297254562378, "learning_rate": 3.548466761734459e-07, "loss": 0.5371, "step": 6152 }, { "epoch": 0.9177418151987471, "grad_norm": 0.8789373636245728, "learning_rate": 3.535720949344557e-07, "loss": 0.6367, "step": 6153 }, { "epoch": 0.9178909687523306, "grad_norm": 1.4952706098556519, "learning_rate": 3.522997657159133e-07, "loss": 0.5863, "step": 6154 }, { "epoch": 0.9180401223059139, "grad_norm": 1.2958940267562866, "learning_rate": 3.5102968881485344e-07, "loss": 0.5404, "step": 6155 }, { "epoch": 0.9181892758594974, "grad_norm": 1.4319860935211182, "learning_rate": 3.4976186452778116e-07, "loss": 0.5444, "step": 6156 }, { "epoch": 0.9183384294130807, "grad_norm": 1.4519156217575073, "learning_rate": 3.4849629315067856e-07, "loss": 0.4772, "step": 6157 }, { "epoch": 0.9184875829666642, "grad_norm": 1.4742685556411743, "learning_rate": 3.4723297497900487e-07, "loss": 0.5544, "step": 6158 }, { "epoch": 0.9186367365202476, "grad_norm": 1.424757957458496, "learning_rate": 3.459719103076831e-07, "loss": 0.579, "step": 6159 }, { "epoch": 0.918785890073831, "grad_norm": 1.2206237316131592, "learning_rate": 3.447130994311232e-07, "loss": 0.5079, "step": 6160 }, { "epoch": 0.9189350436274144, "grad_norm": 1.2667821645736694, "learning_rate": 3.4345654264320017e-07, "loss": 0.6093, "step": 6161 }, { "epoch": 0.9190841971809979, "grad_norm": 1.266677737236023, "learning_rate": 3.422022402372649e-07, "loss": 0.5334, "step": 6162 }, { "epoch": 0.9192333507345812, "grad_norm": 1.664842963218689, "learning_rate": 3.4095019250614316e-07, "loss": 0.6019, "step": 6163 }, { "epoch": 0.9193825042881647, "grad_norm": 1.821533203125, "learning_rate": 3.397003997421344e-07, "loss": 0.5209, "step": 6164 }, { "epoch": 0.9195316578417481, "grad_norm": 1.3023955821990967, "learning_rate": 3.3845286223700757e-07, "loss": 0.5264, "step": 6165 }, { "epoch": 0.9196808113953315, "grad_norm": 1.610085368156433, "learning_rate": 3.372075802820107e-07, "loss": 0.5122, "step": 6166 }, { "epoch": 0.9198299649489149, "grad_norm": 1.3832098245620728, "learning_rate": 3.3596455416786245e-07, "loss": 0.5516, "step": 6167 }, { "epoch": 0.9199791185024984, "grad_norm": 1.4491674900054932, "learning_rate": 3.34723784184755e-07, "loss": 0.5867, "step": 6168 }, { "epoch": 0.9201282720560817, "grad_norm": 1.361897587776184, "learning_rate": 3.334852706223546e-07, "loss": 0.5358, "step": 6169 }, { "epoch": 0.9202774256096652, "grad_norm": 1.8287694454193115, "learning_rate": 3.322490137697998e-07, "loss": 0.5399, "step": 6170 }, { "epoch": 0.9204265791632485, "grad_norm": 1.6029517650604248, "learning_rate": 3.310150139157031e-07, "loss": 0.6462, "step": 6171 }, { "epoch": 0.920575732716832, "grad_norm": 1.40430748462677, "learning_rate": 3.297832713481486e-07, "loss": 0.4897, "step": 6172 }, { "epoch": 0.9207248862704154, "grad_norm": 1.2853866815567017, "learning_rate": 3.2855378635469503e-07, "loss": 0.4525, "step": 6173 }, { "epoch": 0.9208740398239988, "grad_norm": 1.4958035945892334, "learning_rate": 3.273265592223751e-07, "loss": 0.6093, "step": 6174 }, { "epoch": 0.9210231933775822, "grad_norm": 1.6090608835220337, "learning_rate": 3.261015902376896e-07, "loss": 0.683, "step": 6175 }, { "epoch": 0.9211723469311657, "grad_norm": 1.53337824344635, "learning_rate": 3.2487887968661866e-07, "loss": 0.6352, "step": 6176 }, { "epoch": 0.921321500484749, "grad_norm": 1.3812837600708008, "learning_rate": 3.2365842785460954e-07, "loss": 0.5827, "step": 6177 }, { "epoch": 0.9214706540383325, "grad_norm": 1.7173023223876953, "learning_rate": 3.2244023502658537e-07, "loss": 0.5529, "step": 6178 }, { "epoch": 0.9216198075919159, "grad_norm": 1.412805438041687, "learning_rate": 3.2122430148694203e-07, "loss": 0.5042, "step": 6179 }, { "epoch": 0.9217689611454993, "grad_norm": 1.2573603391647339, "learning_rate": 3.2001062751954583e-07, "loss": 0.5871, "step": 6180 }, { "epoch": 0.9219181146990827, "grad_norm": 1.5930962562561035, "learning_rate": 3.1879921340773776e-07, "loss": 0.5296, "step": 6181 }, { "epoch": 0.9220672682526662, "grad_norm": 1.3066352605819702, "learning_rate": 3.175900594343284e-07, "loss": 0.539, "step": 6182 }, { "epoch": 0.9222164218062495, "grad_norm": 1.3696669340133667, "learning_rate": 3.163831658816052e-07, "loss": 0.533, "step": 6183 }, { "epoch": 0.922365575359833, "grad_norm": 1.544201374053955, "learning_rate": 3.151785330313217e-07, "loss": 0.6356, "step": 6184 }, { "epoch": 0.9225147289134163, "grad_norm": 1.3581129312515259, "learning_rate": 3.1397616116470964e-07, "loss": 0.5282, "step": 6185 }, { "epoch": 0.9226638824669998, "grad_norm": 1.4117333889007568, "learning_rate": 3.1277605056246994e-07, "loss": 0.5605, "step": 6186 }, { "epoch": 0.9228130360205832, "grad_norm": 1.3251113891601562, "learning_rate": 3.1157820150477634e-07, "loss": 0.6119, "step": 6187 }, { "epoch": 0.9229621895741666, "grad_norm": 1.7922900915145874, "learning_rate": 3.10382614271274e-07, "loss": 0.5589, "step": 6188 }, { "epoch": 0.92311134312775, "grad_norm": 1.610206127166748, "learning_rate": 3.091892891410808e-07, "loss": 0.6109, "step": 6189 }, { "epoch": 0.9232604966813335, "grad_norm": 1.388500452041626, "learning_rate": 3.079982263927861e-07, "loss": 0.5465, "step": 6190 }, { "epoch": 0.9234096502349168, "grad_norm": 1.387229323387146, "learning_rate": 3.0680942630444965e-07, "loss": 0.6158, "step": 6191 }, { "epoch": 0.9235588037885003, "grad_norm": 1.2604386806488037, "learning_rate": 3.0562288915360837e-07, "loss": 0.5559, "step": 6192 }, { "epoch": 0.9237079573420837, "grad_norm": 1.3155699968338013, "learning_rate": 3.0443861521726183e-07, "loss": 0.5208, "step": 6193 }, { "epoch": 0.9238571108956671, "grad_norm": 1.5474779605865479, "learning_rate": 3.0325660477188767e-07, "loss": 0.5871, "step": 6194 }, { "epoch": 0.9240062644492505, "grad_norm": 1.4465343952178955, "learning_rate": 3.020768580934386e-07, "loss": 0.5918, "step": 6195 }, { "epoch": 0.924155418002834, "grad_norm": 1.281020164489746, "learning_rate": 3.008993754573286e-07, "loss": 0.5263, "step": 6196 }, { "epoch": 0.9243045715564173, "grad_norm": 1.7987970113754272, "learning_rate": 2.9972415713845016e-07, "loss": 0.5808, "step": 6197 }, { "epoch": 0.9244537251100008, "grad_norm": 1.3290802240371704, "learning_rate": 2.9855120341116706e-07, "loss": 0.5647, "step": 6198 }, { "epoch": 0.9246028786635841, "grad_norm": 1.3814092874526978, "learning_rate": 2.973805145493103e-07, "loss": 0.4772, "step": 6199 }, { "epoch": 0.9247520322171676, "grad_norm": 1.4626805782318115, "learning_rate": 2.962120908261856e-07, "loss": 0.5881, "step": 6200 }, { "epoch": 0.924901185770751, "grad_norm": 1.619988203048706, "learning_rate": 2.950459325145705e-07, "loss": 0.551, "step": 6201 }, { "epoch": 0.9250503393243344, "grad_norm": 1.535101294517517, "learning_rate": 2.9388203988671037e-07, "loss": 0.5656, "step": 6202 }, { "epoch": 0.9251994928779178, "grad_norm": 1.4294993877410889, "learning_rate": 2.9272041321432353e-07, "loss": 0.5794, "step": 6203 }, { "epoch": 0.9253486464315013, "grad_norm": 1.3710216283798218, "learning_rate": 2.915610527685997e-07, "loss": 0.5688, "step": 6204 }, { "epoch": 0.9254977999850846, "grad_norm": 1.3044061660766602, "learning_rate": 2.904039588202001e-07, "loss": 0.5358, "step": 6205 }, { "epoch": 0.9256469535386681, "grad_norm": 1.3621490001678467, "learning_rate": 2.892491316392543e-07, "loss": 0.5146, "step": 6206 }, { "epoch": 0.9257961070922515, "grad_norm": 1.4349340200424194, "learning_rate": 2.880965714953643e-07, "loss": 0.5354, "step": 6207 }, { "epoch": 0.9259452606458349, "grad_norm": 1.4527263641357422, "learning_rate": 2.869462786576027e-07, "loss": 0.5311, "step": 6208 }, { "epoch": 0.9260944141994183, "grad_norm": 1.6791592836380005, "learning_rate": 2.857982533945125e-07, "loss": 0.5577, "step": 6209 }, { "epoch": 0.9262435677530018, "grad_norm": 1.4120458364486694, "learning_rate": 2.8465249597410816e-07, "loss": 0.5132, "step": 6210 }, { "epoch": 0.9263927213065851, "grad_norm": 1.3069336414337158, "learning_rate": 2.835090066638746e-07, "loss": 0.534, "step": 6211 }, { "epoch": 0.9265418748601686, "grad_norm": 1.3921921253204346, "learning_rate": 2.823677857307638e-07, "loss": 0.4788, "step": 6212 }, { "epoch": 0.926691028413752, "grad_norm": 1.3397635221481323, "learning_rate": 2.812288334412039e-07, "loss": 0.5842, "step": 6213 }, { "epoch": 0.9268401819673354, "grad_norm": 1.5209771394729614, "learning_rate": 2.80092150061092e-07, "loss": 0.5841, "step": 6214 }, { "epoch": 0.9269893355209188, "grad_norm": 1.4391613006591797, "learning_rate": 2.7895773585579047e-07, "loss": 0.5584, "step": 6215 }, { "epoch": 0.9271384890745022, "grad_norm": 1.5107862949371338, "learning_rate": 2.778255910901362e-07, "loss": 0.509, "step": 6216 }, { "epoch": 0.9272876426280856, "grad_norm": 1.492836594581604, "learning_rate": 2.766957160284389e-07, "loss": 0.4771, "step": 6217 }, { "epoch": 0.9274367961816691, "grad_norm": 2.675588607788086, "learning_rate": 2.75568110934471e-07, "loss": 0.5541, "step": 6218 }, { "epoch": 0.9275859497352524, "grad_norm": 1.2987861633300781, "learning_rate": 2.744427760714818e-07, "loss": 0.6038, "step": 6219 }, { "epoch": 0.9277351032888359, "grad_norm": 1.498684287071228, "learning_rate": 2.7331971170218684e-07, "loss": 0.5794, "step": 6220 }, { "epoch": 0.9278842568424193, "grad_norm": 1.3102664947509766, "learning_rate": 2.72198918088773e-07, "loss": 0.6032, "step": 6221 }, { "epoch": 0.9280334103960027, "grad_norm": 1.322291374206543, "learning_rate": 2.7108039549289754e-07, "loss": 0.4878, "step": 6222 }, { "epoch": 0.9281825639495861, "grad_norm": 1.3907670974731445, "learning_rate": 2.699641441756862e-07, "loss": 0.6087, "step": 6223 }, { "epoch": 0.9283317175031696, "grad_norm": 1.6095610857009888, "learning_rate": 2.688501643977337e-07, "loss": 0.5211, "step": 6224 }, { "epoch": 0.9284808710567529, "grad_norm": 1.3789114952087402, "learning_rate": 2.6773845641910655e-07, "loss": 0.6297, "step": 6225 }, { "epoch": 0.9286300246103364, "grad_norm": 1.589224934577942, "learning_rate": 2.6662902049934047e-07, "loss": 0.5884, "step": 6226 }, { "epoch": 0.9287791781639197, "grad_norm": 1.604508876800537, "learning_rate": 2.655218568974416e-07, "loss": 0.5504, "step": 6227 }, { "epoch": 0.9289283317175032, "grad_norm": 1.5903888940811157, "learning_rate": 2.64416965871882e-07, "loss": 0.565, "step": 6228 }, { "epoch": 0.9290774852710866, "grad_norm": 1.8996034860610962, "learning_rate": 2.633143476806066e-07, "loss": 0.5485, "step": 6229 }, { "epoch": 0.92922663882467, "grad_norm": 1.4560691118240356, "learning_rate": 2.6221400258102826e-07, "loss": 0.594, "step": 6230 }, { "epoch": 0.9293757923782534, "grad_norm": 1.7183096408843994, "learning_rate": 2.611159308300304e-07, "loss": 0.5056, "step": 6231 }, { "epoch": 0.9295249459318369, "grad_norm": 1.5188579559326172, "learning_rate": 2.600201326839646e-07, "loss": 0.5891, "step": 6232 }, { "epoch": 0.9296740994854202, "grad_norm": 1.356104850769043, "learning_rate": 2.58926608398653e-07, "loss": 0.522, "step": 6233 }, { "epoch": 0.9298232530390037, "grad_norm": 1.227980136871338, "learning_rate": 2.5783535822938354e-07, "loss": 0.5484, "step": 6234 }, { "epoch": 0.9299724065925871, "grad_norm": 1.3235273361206055, "learning_rate": 2.56746382430918e-07, "loss": 0.5337, "step": 6235 }, { "epoch": 0.9301215601461705, "grad_norm": 1.3276727199554443, "learning_rate": 2.55659681257483e-07, "loss": 0.476, "step": 6236 }, { "epoch": 0.9302707136997539, "grad_norm": 1.4436631202697754, "learning_rate": 2.545752549627767e-07, "loss": 0.5275, "step": 6237 }, { "epoch": 0.9304198672533374, "grad_norm": 1.5525439977645874, "learning_rate": 2.534931037999633e-07, "loss": 0.611, "step": 6238 }, { "epoch": 0.9305690208069207, "grad_norm": 1.4762650728225708, "learning_rate": 2.524132280216818e-07, "loss": 0.6159, "step": 6239 }, { "epoch": 0.9307181743605042, "grad_norm": 1.2464537620544434, "learning_rate": 2.5133562788003276e-07, "loss": 0.5509, "step": 6240 }, { "epoch": 0.9308673279140876, "grad_norm": 1.5191038846969604, "learning_rate": 2.5026030362659157e-07, "loss": 0.5782, "step": 6241 }, { "epoch": 0.931016481467671, "grad_norm": 1.052430510520935, "learning_rate": 2.491872555123975e-07, "loss": 0.4077, "step": 6242 }, { "epoch": 0.9311656350212544, "grad_norm": 1.4480599164962769, "learning_rate": 2.4811648378796127e-07, "loss": 0.5772, "step": 6243 }, { "epoch": 0.9313147885748378, "grad_norm": 1.2687268257141113, "learning_rate": 2.4704798870326174e-07, "loss": 0.4907, "step": 6244 }, { "epoch": 0.9314639421284212, "grad_norm": 1.3438682556152344, "learning_rate": 2.4598177050774495e-07, "loss": 0.5227, "step": 6245 }, { "epoch": 0.9316130956820047, "grad_norm": 1.4594670534133911, "learning_rate": 2.449178294503274e-07, "loss": 0.6252, "step": 6246 }, { "epoch": 0.931762249235588, "grad_norm": 1.385027289390564, "learning_rate": 2.438561657793914e-07, "loss": 0.6173, "step": 6247 }, { "epoch": 0.9319114027891715, "grad_norm": 1.4815385341644287, "learning_rate": 2.4279677974279214e-07, "loss": 0.5726, "step": 6248 }, { "epoch": 0.9320605563427549, "grad_norm": 1.5317778587341309, "learning_rate": 2.417396715878462e-07, "loss": 0.5828, "step": 6249 }, { "epoch": 0.9322097098963383, "grad_norm": 1.3551326990127563, "learning_rate": 2.40684841561345e-07, "loss": 0.5262, "step": 6250 }, { "epoch": 0.9323588634499217, "grad_norm": 1.560125470161438, "learning_rate": 2.396322899095449e-07, "loss": 0.5253, "step": 6251 }, { "epoch": 0.9325080170035052, "grad_norm": 0.8533357381820679, "learning_rate": 2.3858201687817164e-07, "loss": 0.6375, "step": 6252 }, { "epoch": 0.9326571705570885, "grad_norm": 1.3536500930786133, "learning_rate": 2.3753402271241566e-07, "loss": 0.5212, "step": 6253 }, { "epoch": 0.932806324110672, "grad_norm": 1.3485608100891113, "learning_rate": 2.3648830765693908e-07, "loss": 0.523, "step": 6254 }, { "epoch": 0.9329554776642554, "grad_norm": 1.3880541324615479, "learning_rate": 2.3544487195587108e-07, "loss": 0.5984, "step": 6255 }, { "epoch": 0.9331046312178388, "grad_norm": 1.4305882453918457, "learning_rate": 2.3440371585280896e-07, "loss": 0.5783, "step": 6256 }, { "epoch": 0.9332537847714222, "grad_norm": 1.2418612241744995, "learning_rate": 2.3336483959081612e-07, "loss": 0.4912, "step": 6257 }, { "epoch": 0.9334029383250055, "grad_norm": 1.445263385772705, "learning_rate": 2.3232824341242743e-07, "loss": 0.6439, "step": 6258 }, { "epoch": 0.933552091878589, "grad_norm": 1.4403162002563477, "learning_rate": 2.312939275596393e-07, "loss": 0.6265, "step": 6259 }, { "epoch": 0.9337012454321724, "grad_norm": 1.4291713237762451, "learning_rate": 2.3026189227392083e-07, "loss": 0.5427, "step": 6260 }, { "epoch": 0.9338503989857558, "grad_norm": 0.8431811928749084, "learning_rate": 2.2923213779620924e-07, "loss": 0.5909, "step": 6261 }, { "epoch": 0.9339995525393392, "grad_norm": 1.4019416570663452, "learning_rate": 2.2820466436690447e-07, "loss": 0.5983, "step": 6262 }, { "epoch": 0.9341487060929227, "grad_norm": 1.4150235652923584, "learning_rate": 2.27179472225878e-07, "loss": 0.5432, "step": 6263 }, { "epoch": 0.934297859646506, "grad_norm": 1.4090150594711304, "learning_rate": 2.2615656161246613e-07, "loss": 0.4891, "step": 6264 }, { "epoch": 0.9344470132000895, "grad_norm": 1.528688907623291, "learning_rate": 2.2513593276547673e-07, "loss": 0.5464, "step": 6265 }, { "epoch": 0.9345961667536729, "grad_norm": 1.6811213493347168, "learning_rate": 2.2411758592318033e-07, "loss": 0.5475, "step": 6266 }, { "epoch": 0.9347453203072563, "grad_norm": 1.3750817775726318, "learning_rate": 2.2310152132331676e-07, "loss": 0.5974, "step": 6267 }, { "epoch": 0.9348944738608397, "grad_norm": 1.4621752500534058, "learning_rate": 2.220877392030929e-07, "loss": 0.4905, "step": 6268 }, { "epoch": 0.9350436274144232, "grad_norm": 1.318656325340271, "learning_rate": 2.210762397991828e-07, "loss": 0.5323, "step": 6269 }, { "epoch": 0.9351927809680065, "grad_norm": 1.285213828086853, "learning_rate": 2.2006702334772755e-07, "loss": 0.5607, "step": 6270 }, { "epoch": 0.93534193452159, "grad_norm": 1.3314927816390991, "learning_rate": 2.1906009008433427e-07, "loss": 0.6274, "step": 6271 }, { "epoch": 0.9354910880751733, "grad_norm": 1.5921409130096436, "learning_rate": 2.1805544024407933e-07, "loss": 0.6209, "step": 6272 }, { "epoch": 0.9356402416287568, "grad_norm": 1.5770087242126465, "learning_rate": 2.17053074061504e-07, "loss": 0.5579, "step": 6273 }, { "epoch": 0.9357893951823402, "grad_norm": 1.1914317607879639, "learning_rate": 2.1605299177061668e-07, "loss": 0.5806, "step": 6274 }, { "epoch": 0.9359385487359236, "grad_norm": 1.3973809480667114, "learning_rate": 2.150551936048928e-07, "loss": 0.5147, "step": 6275 }, { "epoch": 0.936087702289507, "grad_norm": 1.5680164098739624, "learning_rate": 2.1405967979727715e-07, "loss": 0.5629, "step": 6276 }, { "epoch": 0.9362368558430905, "grad_norm": 0.8872525691986084, "learning_rate": 2.1306645058017607e-07, "loss": 0.6637, "step": 6277 }, { "epoch": 0.9363860093966738, "grad_norm": 1.6200323104858398, "learning_rate": 2.1207550618546624e-07, "loss": 0.5573, "step": 6278 }, { "epoch": 0.9365351629502573, "grad_norm": 1.6438970565795898, "learning_rate": 2.1108684684448932e-07, "loss": 0.5636, "step": 6279 }, { "epoch": 0.9366843165038407, "grad_norm": 1.7078217267990112, "learning_rate": 2.1010047278805735e-07, "loss": 0.4954, "step": 6280 }, { "epoch": 0.9368334700574241, "grad_norm": 1.5288244485855103, "learning_rate": 2.0911638424644055e-07, "loss": 0.5753, "step": 6281 }, { "epoch": 0.9369826236110075, "grad_norm": 0.8466613292694092, "learning_rate": 2.0813458144938514e-07, "loss": 0.6529, "step": 6282 }, { "epoch": 0.937131777164591, "grad_norm": 1.3349509239196777, "learning_rate": 2.0715506462609557e-07, "loss": 0.5466, "step": 6283 }, { "epoch": 0.9372809307181743, "grad_norm": 1.2607040405273438, "learning_rate": 2.0617783400525003e-07, "loss": 0.5545, "step": 6284 }, { "epoch": 0.9374300842717578, "grad_norm": 2.007388114929199, "learning_rate": 2.0520288981498605e-07, "loss": 0.579, "step": 6285 }, { "epoch": 0.9375792378253411, "grad_norm": 1.3772706985473633, "learning_rate": 2.0423023228291373e-07, "loss": 0.5447, "step": 6286 }, { "epoch": 0.9377283913789246, "grad_norm": 1.2844834327697754, "learning_rate": 2.0325986163610367e-07, "loss": 0.5054, "step": 6287 }, { "epoch": 0.937877544932508, "grad_norm": 1.21637761592865, "learning_rate": 2.022917781010958e-07, "loss": 0.5419, "step": 6288 }, { "epoch": 0.9380266984860914, "grad_norm": 1.5099365711212158, "learning_rate": 2.0132598190389596e-07, "loss": 0.4946, "step": 6289 }, { "epoch": 0.9381758520396748, "grad_norm": 1.481996774673462, "learning_rate": 2.0036247326997383e-07, "loss": 0.5376, "step": 6290 }, { "epoch": 0.9383250055932583, "grad_norm": 1.374804973602295, "learning_rate": 1.9940125242426834e-07, "loss": 0.4673, "step": 6291 }, { "epoch": 0.9384741591468416, "grad_norm": 0.8880771398544312, "learning_rate": 1.9844231959118444e-07, "loss": 0.6549, "step": 6292 }, { "epoch": 0.9386233127004251, "grad_norm": 1.6104248762130737, "learning_rate": 1.9748567499458639e-07, "loss": 0.5188, "step": 6293 }, { "epoch": 0.9387724662540085, "grad_norm": 1.6349756717681885, "learning_rate": 1.9653131885781328e-07, "loss": 0.6472, "step": 6294 }, { "epoch": 0.9389216198075919, "grad_norm": 1.3137919902801514, "learning_rate": 1.9557925140366363e-07, "loss": 0.4532, "step": 6295 }, { "epoch": 0.9390707733611753, "grad_norm": 1.3163584470748901, "learning_rate": 1.9462947285440405e-07, "loss": 0.529, "step": 6296 }, { "epoch": 0.9392199269147588, "grad_norm": 1.4001561403274536, "learning_rate": 1.9368198343176604e-07, "loss": 0.5193, "step": 6297 }, { "epoch": 0.9393690804683421, "grad_norm": 1.2590322494506836, "learning_rate": 1.9273678335694712e-07, "loss": 0.5532, "step": 6298 }, { "epoch": 0.9395182340219256, "grad_norm": 1.3215007781982422, "learning_rate": 1.917938728506108e-07, "loss": 0.4939, "step": 6299 }, { "epoch": 0.9396673875755089, "grad_norm": 1.2503125667572021, "learning_rate": 1.9085325213288542e-07, "loss": 0.5236, "step": 6300 }, { "epoch": 0.9398165411290924, "grad_norm": 1.7453432083129883, "learning_rate": 1.8991492142336644e-07, "loss": 0.5592, "step": 6301 }, { "epoch": 0.9399656946826758, "grad_norm": 1.6914016008377075, "learning_rate": 1.8897888094110972e-07, "loss": 0.5904, "step": 6302 }, { "epoch": 0.9401148482362592, "grad_norm": 1.4087142944335938, "learning_rate": 1.880451309046427e-07, "loss": 0.5855, "step": 6303 }, { "epoch": 0.9402640017898426, "grad_norm": 1.4325922727584839, "learning_rate": 1.8711367153195436e-07, "loss": 0.5325, "step": 6304 }, { "epoch": 0.9404131553434261, "grad_norm": 1.2859176397323608, "learning_rate": 1.8618450304050074e-07, "loss": 0.5398, "step": 6305 }, { "epoch": 0.9405623088970094, "grad_norm": 1.4796684980392456, "learning_rate": 1.852576256472005e-07, "loss": 0.4781, "step": 6306 }, { "epoch": 0.9407114624505929, "grad_norm": 1.1955674886703491, "learning_rate": 1.8433303956843952e-07, "loss": 0.5059, "step": 6307 }, { "epoch": 0.9408606160041763, "grad_norm": 1.3171069622039795, "learning_rate": 1.834107450200695e-07, "loss": 0.5322, "step": 6308 }, { "epoch": 0.9410097695577597, "grad_norm": 1.543033480644226, "learning_rate": 1.8249074221740494e-07, "loss": 0.5943, "step": 6309 }, { "epoch": 0.9411589231113431, "grad_norm": 1.3714357614517212, "learning_rate": 1.815730313752273e-07, "loss": 0.5577, "step": 6310 }, { "epoch": 0.9413080766649266, "grad_norm": 1.3049644231796265, "learning_rate": 1.8065761270778303e-07, "loss": 0.5552, "step": 6311 }, { "epoch": 0.9414572302185099, "grad_norm": 1.6440800428390503, "learning_rate": 1.7974448642877894e-07, "loss": 0.5629, "step": 6312 }, { "epoch": 0.9416063837720934, "grad_norm": 1.741986870765686, "learning_rate": 1.788336527513934e-07, "loss": 0.6289, "step": 6313 }, { "epoch": 0.9417555373256767, "grad_norm": 1.3837237358093262, "learning_rate": 1.7792511188826522e-07, "loss": 0.5954, "step": 6314 }, { "epoch": 0.9419046908792602, "grad_norm": 1.5287350416183472, "learning_rate": 1.7701886405149914e-07, "loss": 0.5475, "step": 6315 }, { "epoch": 0.9420538444328436, "grad_norm": 1.3379360437393188, "learning_rate": 1.7611490945266375e-07, "loss": 0.5612, "step": 6316 }, { "epoch": 0.942202997986427, "grad_norm": 1.618381142616272, "learning_rate": 1.7521324830279463e-07, "loss": 0.5103, "step": 6317 }, { "epoch": 0.9423521515400104, "grad_norm": 1.1626992225646973, "learning_rate": 1.7431388081238898e-07, "loss": 0.5145, "step": 6318 }, { "epoch": 0.9425013050935939, "grad_norm": 1.6628721952438354, "learning_rate": 1.7341680719141106e-07, "loss": 0.5075, "step": 6319 }, { "epoch": 0.9426504586471772, "grad_norm": 1.5808753967285156, "learning_rate": 1.7252202764928893e-07, "loss": 0.4897, "step": 6320 }, { "epoch": 0.9427996122007607, "grad_norm": 1.5195859670639038, "learning_rate": 1.7162954239491213e-07, "loss": 0.5513, "step": 6321 }, { "epoch": 0.942948765754344, "grad_norm": 1.3830041885375977, "learning_rate": 1.7073935163663847e-07, "loss": 0.5068, "step": 6322 }, { "epoch": 0.9430979193079275, "grad_norm": 1.2889944314956665, "learning_rate": 1.6985145558228942e-07, "loss": 0.5286, "step": 6323 }, { "epoch": 0.9432470728615109, "grad_norm": 1.3234944343566895, "learning_rate": 1.6896585443914927e-07, "loss": 0.5775, "step": 6324 }, { "epoch": 0.9433962264150944, "grad_norm": 1.4810270071029663, "learning_rate": 1.6808254841396587e-07, "loss": 0.6575, "step": 6325 }, { "epoch": 0.9435453799686777, "grad_norm": 0.8413586616516113, "learning_rate": 1.6720153771295656e-07, "loss": 0.596, "step": 6326 }, { "epoch": 0.9436945335222612, "grad_norm": 1.5886808633804321, "learning_rate": 1.6632282254179456e-07, "loss": 0.5535, "step": 6327 }, { "epoch": 0.9438436870758445, "grad_norm": 1.4571739435195923, "learning_rate": 1.6544640310562466e-07, "loss": 0.6233, "step": 6328 }, { "epoch": 0.943992840629428, "grad_norm": 1.403275966644287, "learning_rate": 1.6457227960905097e-07, "loss": 0.5169, "step": 6329 }, { "epoch": 0.9441419941830114, "grad_norm": 1.2681306600570679, "learning_rate": 1.6370045225614474e-07, "loss": 0.5365, "step": 6330 }, { "epoch": 0.9442911477365948, "grad_norm": 1.4798405170440674, "learning_rate": 1.6283092125043754e-07, "loss": 0.5725, "step": 6331 }, { "epoch": 0.9444403012901782, "grad_norm": 1.9256471395492554, "learning_rate": 1.6196368679492815e-07, "loss": 0.5728, "step": 6332 }, { "epoch": 0.9445894548437617, "grad_norm": 1.4147568941116333, "learning_rate": 1.6109874909207901e-07, "loss": 0.5769, "step": 6333 }, { "epoch": 0.944738608397345, "grad_norm": 1.3651225566864014, "learning_rate": 1.6023610834381197e-07, "loss": 0.5608, "step": 6334 }, { "epoch": 0.9448877619509285, "grad_norm": 1.5025427341461182, "learning_rate": 1.593757647515204e-07, "loss": 0.5567, "step": 6335 }, { "epoch": 0.9450369155045119, "grad_norm": 1.24268639087677, "learning_rate": 1.585177185160547e-07, "loss": 0.5672, "step": 6336 }, { "epoch": 0.9451860690580953, "grad_norm": 1.5482895374298096, "learning_rate": 1.576619698377313e-07, "loss": 0.5458, "step": 6337 }, { "epoch": 0.9453352226116787, "grad_norm": 1.3312934637069702, "learning_rate": 1.5680851891633042e-07, "loss": 0.6015, "step": 6338 }, { "epoch": 0.9454843761652622, "grad_norm": 1.3279831409454346, "learning_rate": 1.55957365951096e-07, "loss": 0.5852, "step": 6339 }, { "epoch": 0.9456335297188455, "grad_norm": 1.5549200773239136, "learning_rate": 1.5510851114073467e-07, "loss": 0.5425, "step": 6340 }, { "epoch": 0.945782683272429, "grad_norm": 1.4697822332382202, "learning_rate": 1.5426195468341675e-07, "loss": 0.5369, "step": 6341 }, { "epoch": 0.9459318368260123, "grad_norm": 1.1484270095825195, "learning_rate": 1.5341769677677753e-07, "loss": 0.5665, "step": 6342 }, { "epoch": 0.9460809903795958, "grad_norm": 1.4633852243423462, "learning_rate": 1.5257573761791265e-07, "loss": 0.5189, "step": 6343 }, { "epoch": 0.9462301439331792, "grad_norm": 1.8181027173995972, "learning_rate": 1.5173607740338382e-07, "loss": 0.5618, "step": 6344 }, { "epoch": 0.9463792974867626, "grad_norm": 1.6155083179473877, "learning_rate": 1.5089871632921638e-07, "loss": 0.4714, "step": 6345 }, { "epoch": 0.946528451040346, "grad_norm": 1.5601882934570312, "learning_rate": 1.5006365459089622e-07, "loss": 0.5728, "step": 6346 }, { "epoch": 0.9466776045939295, "grad_norm": 1.5562573671340942, "learning_rate": 1.4923089238337296e-07, "loss": 0.5369, "step": 6347 }, { "epoch": 0.9468267581475128, "grad_norm": 1.437912940979004, "learning_rate": 1.484004299010633e-07, "loss": 0.5436, "step": 6348 }, { "epoch": 0.9469759117010963, "grad_norm": 1.4619354009628296, "learning_rate": 1.4757226733783992e-07, "loss": 0.5682, "step": 6349 }, { "epoch": 0.9471250652546797, "grad_norm": 1.4661211967468262, "learning_rate": 1.4674640488704596e-07, "loss": 0.5797, "step": 6350 }, { "epoch": 0.9472742188082631, "grad_norm": 1.3898118734359741, "learning_rate": 1.4592284274148273e-07, "loss": 0.6345, "step": 6351 }, { "epoch": 0.9474233723618465, "grad_norm": 1.4178117513656616, "learning_rate": 1.4510158109341644e-07, "loss": 0.5579, "step": 6352 }, { "epoch": 0.94757252591543, "grad_norm": 1.312643051147461, "learning_rate": 1.4428262013457706e-07, "loss": 0.4782, "step": 6353 }, { "epoch": 0.9477216794690133, "grad_norm": 1.4268912076950073, "learning_rate": 1.4346596005615499e-07, "loss": 0.5867, "step": 6354 }, { "epoch": 0.9478708330225968, "grad_norm": 1.4863932132720947, "learning_rate": 1.4265160104880438e-07, "loss": 0.5955, "step": 6355 }, { "epoch": 0.9480199865761801, "grad_norm": 1.3125557899475098, "learning_rate": 1.4183954330264317e-07, "loss": 0.5878, "step": 6356 }, { "epoch": 0.9481691401297636, "grad_norm": 1.647679328918457, "learning_rate": 1.410297870072508e-07, "loss": 0.464, "step": 6357 }, { "epoch": 0.948318293683347, "grad_norm": 2.0636963844299316, "learning_rate": 1.402223323516727e-07, "loss": 0.605, "step": 6358 }, { "epoch": 0.9484674472369304, "grad_norm": 1.9923207759857178, "learning_rate": 1.3941717952441146e-07, "loss": 0.6031, "step": 6359 }, { "epoch": 0.9486166007905138, "grad_norm": 1.6048169136047363, "learning_rate": 1.386143287134356e-07, "loss": 0.5517, "step": 6360 }, { "epoch": 0.9487657543440973, "grad_norm": 1.2263872623443604, "learning_rate": 1.378137801061763e-07, "loss": 0.501, "step": 6361 }, { "epoch": 0.9489149078976806, "grad_norm": 1.4209256172180176, "learning_rate": 1.3701553388952627e-07, "loss": 0.5711, "step": 6362 }, { "epoch": 0.9490640614512641, "grad_norm": 1.3240379095077515, "learning_rate": 1.362195902498431e-07, "loss": 0.6428, "step": 6363 }, { "epoch": 0.9492132150048475, "grad_norm": 1.2994651794433594, "learning_rate": 1.354259493729426e-07, "loss": 0.6175, "step": 6364 }, { "epoch": 0.9493623685584309, "grad_norm": 1.4741687774658203, "learning_rate": 1.346346114441066e-07, "loss": 0.4511, "step": 6365 }, { "epoch": 0.9495115221120143, "grad_norm": 1.344557285308838, "learning_rate": 1.3384557664807729e-07, "loss": 0.5926, "step": 6366 }, { "epoch": 0.9496606756655978, "grad_norm": 1.4741458892822266, "learning_rate": 1.3305884516906065e-07, "loss": 0.5786, "step": 6367 }, { "epoch": 0.9498098292191811, "grad_norm": 1.3984687328338623, "learning_rate": 1.322744171907242e-07, "loss": 0.5015, "step": 6368 }, { "epoch": 0.9499589827727646, "grad_norm": 1.5746896266937256, "learning_rate": 1.3149229289619593e-07, "loss": 0.5817, "step": 6369 }, { "epoch": 0.9501081363263479, "grad_norm": 1.2107475996017456, "learning_rate": 1.3071247246806972e-07, "loss": 0.5968, "step": 6370 }, { "epoch": 0.9502572898799314, "grad_norm": 1.4009807109832764, "learning_rate": 1.299349560883989e-07, "loss": 0.6209, "step": 6371 }, { "epoch": 0.9504064434335148, "grad_norm": 1.3144233226776123, "learning_rate": 1.2915974393870046e-07, "loss": 0.578, "step": 6372 }, { "epoch": 0.9505555969870982, "grad_norm": 1.448414921760559, "learning_rate": 1.2838683619995185e-07, "loss": 0.6633, "step": 6373 }, { "epoch": 0.9507047505406816, "grad_norm": 1.3011720180511475, "learning_rate": 1.276162330525932e-07, "loss": 0.5661, "step": 6374 }, { "epoch": 0.9508539040942651, "grad_norm": 1.3277339935302734, "learning_rate": 1.2684793467652722e-07, "loss": 0.5187, "step": 6375 }, { "epoch": 0.9510030576478484, "grad_norm": 1.4867157936096191, "learning_rate": 1.2608194125111716e-07, "loss": 0.5752, "step": 6376 }, { "epoch": 0.9511522112014319, "grad_norm": 1.3471684455871582, "learning_rate": 1.2531825295519106e-07, "loss": 0.4697, "step": 6377 }, { "epoch": 0.9513013647550153, "grad_norm": 1.322855830192566, "learning_rate": 1.2455686996703409e-07, "loss": 0.4815, "step": 6378 }, { "epoch": 0.9514505183085987, "grad_norm": 1.488170862197876, "learning_rate": 1.237977924643985e-07, "loss": 0.4776, "step": 6379 }, { "epoch": 0.9515996718621821, "grad_norm": 1.4108352661132812, "learning_rate": 1.2304102062449475e-07, "loss": 0.5791, "step": 6380 }, { "epoch": 0.9517488254157656, "grad_norm": 1.3721203804016113, "learning_rate": 1.2228655462399598e-07, "loss": 0.5079, "step": 6381 }, { "epoch": 0.9518979789693489, "grad_norm": 1.5723329782485962, "learning_rate": 1.2153439463903793e-07, "loss": 0.5673, "step": 6382 }, { "epoch": 0.9520471325229324, "grad_norm": 1.7054160833358765, "learning_rate": 1.2078454084521575e-07, "loss": 0.5272, "step": 6383 }, { "epoch": 0.9521962860765157, "grad_norm": 1.62067449092865, "learning_rate": 1.2003699341758934e-07, "loss": 0.5288, "step": 6384 }, { "epoch": 0.9523454396300992, "grad_norm": 1.2972906827926636, "learning_rate": 1.192917525306758e-07, "loss": 0.4863, "step": 6385 }, { "epoch": 0.9524945931836826, "grad_norm": 1.3338713645935059, "learning_rate": 1.1854881835846044e-07, "loss": 0.4969, "step": 6386 }, { "epoch": 0.952643746737266, "grad_norm": 1.7760902643203735, "learning_rate": 1.1780819107438112e-07, "loss": 0.4875, "step": 6387 }, { "epoch": 0.9527929002908494, "grad_norm": 1.6606686115264893, "learning_rate": 1.1706987085134624e-07, "loss": 0.6283, "step": 6388 }, { "epoch": 0.9529420538444329, "grad_norm": 1.6162688732147217, "learning_rate": 1.1633385786171903e-07, "loss": 0.6501, "step": 6389 }, { "epoch": 0.9530912073980162, "grad_norm": 0.8869240283966064, "learning_rate": 1.1560015227732757e-07, "loss": 0.6491, "step": 6390 }, { "epoch": 0.9532403609515997, "grad_norm": 1.3998143672943115, "learning_rate": 1.1486875426945931e-07, "loss": 0.5326, "step": 6391 }, { "epoch": 0.9533895145051831, "grad_norm": 1.2526326179504395, "learning_rate": 1.1413966400886544e-07, "loss": 0.504, "step": 6392 }, { "epoch": 0.9535386680587665, "grad_norm": 0.8531723618507385, "learning_rate": 1.1341288166575425e-07, "loss": 0.64, "step": 6393 }, { "epoch": 0.9536878216123499, "grad_norm": 1.4894647598266602, "learning_rate": 1.1268840740979891e-07, "loss": 0.5254, "step": 6394 }, { "epoch": 0.9538369751659334, "grad_norm": 1.4083776473999023, "learning_rate": 1.1196624141013301e-07, "loss": 0.4541, "step": 6395 }, { "epoch": 0.9539861287195167, "grad_norm": 1.3347086906433105, "learning_rate": 1.1124638383534947e-07, "loss": 0.5435, "step": 6396 }, { "epoch": 0.9541352822731002, "grad_norm": 1.2960498332977295, "learning_rate": 1.1052883485350607e-07, "loss": 0.5315, "step": 6397 }, { "epoch": 0.9542844358266835, "grad_norm": 1.3723673820495605, "learning_rate": 1.0981359463211772e-07, "loss": 0.5967, "step": 6398 }, { "epoch": 0.954433589380267, "grad_norm": 1.7864761352539062, "learning_rate": 1.0910066333816194e-07, "loss": 0.5094, "step": 6399 }, { "epoch": 0.9545827429338504, "grad_norm": 1.3560038805007935, "learning_rate": 1.083900411380756e-07, "loss": 0.6185, "step": 6400 }, { "epoch": 0.9547318964874338, "grad_norm": 1.5115807056427002, "learning_rate": 1.0768172819776158e-07, "loss": 0.6013, "step": 6401 }, { "epoch": 0.9548810500410172, "grad_norm": 1.2971723079681396, "learning_rate": 1.0697572468257755e-07, "loss": 0.4758, "step": 6402 }, { "epoch": 0.9550302035946007, "grad_norm": 1.2514700889587402, "learning_rate": 1.0627203075734394e-07, "loss": 0.5049, "step": 6403 }, { "epoch": 0.955179357148184, "grad_norm": 1.307293176651001, "learning_rate": 1.0557064658634486e-07, "loss": 0.4693, "step": 6404 }, { "epoch": 0.9553285107017675, "grad_norm": 1.3154171705245972, "learning_rate": 1.0487157233332046e-07, "loss": 0.5948, "step": 6405 }, { "epoch": 0.9554776642553509, "grad_norm": 1.4244837760925293, "learning_rate": 1.0417480816147574e-07, "loss": 0.5933, "step": 6406 }, { "epoch": 0.9556268178089343, "grad_norm": 1.1403453350067139, "learning_rate": 1.0348035423347613e-07, "loss": 0.4123, "step": 6407 }, { "epoch": 0.9557759713625177, "grad_norm": 1.3780763149261475, "learning_rate": 1.0278821071144306e-07, "loss": 0.5093, "step": 6408 }, { "epoch": 0.9559251249161012, "grad_norm": 1.3282634019851685, "learning_rate": 1.0209837775696396e-07, "loss": 0.5734, "step": 6409 }, { "epoch": 0.9560742784696845, "grad_norm": 1.4647259712219238, "learning_rate": 1.0141085553108443e-07, "loss": 0.5167, "step": 6410 }, { "epoch": 0.956223432023268, "grad_norm": 1.342680811882019, "learning_rate": 1.0072564419431053e-07, "loss": 0.5803, "step": 6411 }, { "epoch": 0.9563725855768513, "grad_norm": 2.934887170791626, "learning_rate": 1.0004274390660984e-07, "loss": 0.576, "step": 6412 }, { "epoch": 0.9565217391304348, "grad_norm": 1.7876652479171753, "learning_rate": 9.936215482740819e-08, "loss": 0.5114, "step": 6413 }, { "epoch": 0.9566708926840182, "grad_norm": 1.4598767757415771, "learning_rate": 9.868387711559624e-08, "loss": 0.5713, "step": 6414 }, { "epoch": 0.9568200462376016, "grad_norm": 1.2346314191818237, "learning_rate": 9.800791092951956e-08, "loss": 0.5491, "step": 6415 }, { "epoch": 0.956969199791185, "grad_norm": 1.4921847581863403, "learning_rate": 9.733425642698857e-08, "loss": 0.5883, "step": 6416 }, { "epoch": 0.9571183533447685, "grad_norm": 1.5124036073684692, "learning_rate": 9.666291376527304e-08, "loss": 0.6241, "step": 6417 }, { "epoch": 0.9572675068983518, "grad_norm": 1.3359707593917847, "learning_rate": 9.59938831010998e-08, "loss": 0.5767, "step": 6418 }, { "epoch": 0.9574166604519353, "grad_norm": 1.3698675632476807, "learning_rate": 9.532716459065838e-08, "loss": 0.5704, "step": 6419 }, { "epoch": 0.9575658140055187, "grad_norm": 1.4270520210266113, "learning_rate": 9.466275838960093e-08, "loss": 0.5454, "step": 6420 }, { "epoch": 0.9577149675591021, "grad_norm": 1.265663981437683, "learning_rate": 9.400066465303448e-08, "loss": 0.4753, "step": 6421 }, { "epoch": 0.9578641211126855, "grad_norm": 1.318826675415039, "learning_rate": 9.334088353553206e-08, "loss": 0.5346, "step": 6422 }, { "epoch": 0.958013274666269, "grad_norm": 1.2831025123596191, "learning_rate": 9.268341519112156e-08, "loss": 0.6386, "step": 6423 }, { "epoch": 0.9581624282198523, "grad_norm": 1.5507770776748657, "learning_rate": 9.202825977329355e-08, "loss": 0.6201, "step": 6424 }, { "epoch": 0.9583115817734358, "grad_norm": 1.4948383569717407, "learning_rate": 9.1375417434999e-08, "loss": 0.5568, "step": 6425 }, { "epoch": 0.9584607353270191, "grad_norm": 1.5867410898208618, "learning_rate": 9.072488832864823e-08, "loss": 0.6297, "step": 6426 }, { "epoch": 0.9586098888806026, "grad_norm": 1.4829601049423218, "learning_rate": 9.007667260610975e-08, "loss": 0.5251, "step": 6427 }, { "epoch": 0.958759042434186, "grad_norm": 1.4785027503967285, "learning_rate": 8.943077041871584e-08, "loss": 0.5758, "step": 6428 }, { "epoch": 0.9589081959877694, "grad_norm": 1.4092212915420532, "learning_rate": 8.878718191725478e-08, "loss": 0.5419, "step": 6429 }, { "epoch": 0.9590573495413528, "grad_norm": 1.4805105924606323, "learning_rate": 8.814590725197636e-08, "loss": 0.503, "step": 6430 }, { "epoch": 0.9592065030949363, "grad_norm": 1.39207124710083, "learning_rate": 8.750694657259195e-08, "loss": 0.5739, "step": 6431 }, { "epoch": 0.9593556566485196, "grad_norm": 1.2970871925354004, "learning_rate": 8.687030002827113e-08, "loss": 0.5574, "step": 6432 }, { "epoch": 0.9595048102021031, "grad_norm": 1.4250491857528687, "learning_rate": 8.623596776764165e-08, "loss": 0.5099, "step": 6433 }, { "epoch": 0.9596539637556865, "grad_norm": 1.2661347389221191, "learning_rate": 8.560394993879173e-08, "loss": 0.615, "step": 6434 }, { "epoch": 0.9598031173092699, "grad_norm": 1.2715409994125366, "learning_rate": 8.497424668927224e-08, "loss": 0.5256, "step": 6435 }, { "epoch": 0.9599522708628533, "grad_norm": 1.2237154245376587, "learning_rate": 8.434685816609e-08, "loss": 0.535, "step": 6436 }, { "epoch": 0.9601014244164368, "grad_norm": 1.366827368736267, "learning_rate": 8.372178451571344e-08, "loss": 0.6071, "step": 6437 }, { "epoch": 0.9602505779700201, "grad_norm": 1.5069199800491333, "learning_rate": 8.309902588407026e-08, "loss": 0.618, "step": 6438 }, { "epoch": 0.9603997315236036, "grad_norm": 1.4933990240097046, "learning_rate": 8.247858241654638e-08, "loss": 0.5547, "step": 6439 }, { "epoch": 0.9605488850771869, "grad_norm": 1.380566954612732, "learning_rate": 8.186045425798817e-08, "loss": 0.5746, "step": 6440 }, { "epoch": 0.9606980386307704, "grad_norm": 1.2089669704437256, "learning_rate": 8.124464155270351e-08, "loss": 0.5237, "step": 6441 }, { "epoch": 0.9608471921843538, "grad_norm": 1.4702363014221191, "learning_rate": 8.063114444445741e-08, "loss": 0.5173, "step": 6442 }, { "epoch": 0.9609963457379372, "grad_norm": 1.4076701402664185, "learning_rate": 8.001996307647197e-08, "loss": 0.4959, "step": 6443 }, { "epoch": 0.9611454992915206, "grad_norm": 1.6837540864944458, "learning_rate": 7.941109759143528e-08, "loss": 0.4983, "step": 6444 }, { "epoch": 0.9612946528451041, "grad_norm": 1.3231446743011475, "learning_rate": 7.880454813148807e-08, "loss": 0.4916, "step": 6445 }, { "epoch": 0.9614438063986874, "grad_norm": 1.6507563591003418, "learning_rate": 7.820031483823487e-08, "loss": 0.6353, "step": 6446 }, { "epoch": 0.9615929599522709, "grad_norm": 1.2558882236480713, "learning_rate": 7.759839785273615e-08, "loss": 0.5282, "step": 6447 }, { "epoch": 0.9617421135058543, "grad_norm": 1.4921176433563232, "learning_rate": 7.699879731551397e-08, "loss": 0.604, "step": 6448 }, { "epoch": 0.9618912670594377, "grad_norm": 1.4366761445999146, "learning_rate": 7.640151336654966e-08, "loss": 0.5249, "step": 6449 }, { "epoch": 0.9620404206130211, "grad_norm": 1.5242327451705933, "learning_rate": 7.580654614528282e-08, "loss": 0.5322, "step": 6450 }, { "epoch": 0.9621895741666046, "grad_norm": 1.366154432296753, "learning_rate": 7.521389579061234e-08, "loss": 0.4525, "step": 6451 }, { "epoch": 0.9623387277201879, "grad_norm": 1.2252180576324463, "learning_rate": 7.462356244089642e-08, "loss": 0.4822, "step": 6452 }, { "epoch": 0.9624878812737714, "grad_norm": 1.57624089717865, "learning_rate": 7.403554623395038e-08, "loss": 0.626, "step": 6453 }, { "epoch": 0.9626370348273547, "grad_norm": 1.4725022315979004, "learning_rate": 7.344984730705218e-08, "loss": 0.5699, "step": 6454 }, { "epoch": 0.9627861883809382, "grad_norm": 1.3600518703460693, "learning_rate": 7.286646579693691e-08, "loss": 0.497, "step": 6455 }, { "epoch": 0.9629353419345216, "grad_norm": 1.2901819944381714, "learning_rate": 7.228540183979782e-08, "loss": 0.5391, "step": 6456 }, { "epoch": 0.963084495488105, "grad_norm": 1.4683825969696045, "learning_rate": 7.170665557128975e-08, "loss": 0.5832, "step": 6457 }, { "epoch": 0.9632336490416884, "grad_norm": 1.465475082397461, "learning_rate": 7.11302271265224e-08, "loss": 0.5482, "step": 6458 }, { "epoch": 0.9633828025952719, "grad_norm": 1.2867991924285889, "learning_rate": 7.055611664006701e-08, "loss": 0.5498, "step": 6459 }, { "epoch": 0.9635319561488552, "grad_norm": 1.2697051763534546, "learning_rate": 6.998432424595524e-08, "loss": 0.5551, "step": 6460 }, { "epoch": 0.9636811097024387, "grad_norm": 1.1937847137451172, "learning_rate": 6.941485007767479e-08, "loss": 0.5303, "step": 6461 }, { "epoch": 0.9638302632560221, "grad_norm": 1.4986562728881836, "learning_rate": 6.884769426817261e-08, "loss": 0.5153, "step": 6462 }, { "epoch": 0.9639794168096055, "grad_norm": 1.336982011795044, "learning_rate": 6.828285694985504e-08, "loss": 0.5517, "step": 6463 }, { "epoch": 0.9641285703631889, "grad_norm": 0.8306257128715515, "learning_rate": 6.772033825458769e-08, "loss": 0.6433, "step": 6464 }, { "epoch": 0.9642777239167724, "grad_norm": 1.5489847660064697, "learning_rate": 6.716013831369217e-08, "loss": 0.5841, "step": 6465 }, { "epoch": 0.9644268774703557, "grad_norm": 1.6396998167037964, "learning_rate": 6.660225725795278e-08, "loss": 0.5614, "step": 6466 }, { "epoch": 0.9645760310239392, "grad_norm": 1.2784156799316406, "learning_rate": 6.604669521760975e-08, "loss": 0.5385, "step": 6467 }, { "epoch": 0.9647251845775225, "grad_norm": 1.4585034847259521, "learning_rate": 6.54934523223627e-08, "loss": 0.5968, "step": 6468 }, { "epoch": 0.964874338131106, "grad_norm": 1.3806378841400146, "learning_rate": 6.494252870136942e-08, "loss": 0.6171, "step": 6469 }, { "epoch": 0.9650234916846894, "grad_norm": 1.5523666143417358, "learning_rate": 6.439392448324699e-08, "loss": 0.5477, "step": 6470 }, { "epoch": 0.9651726452382728, "grad_norm": 1.3636322021484375, "learning_rate": 6.384763979607078e-08, "loss": 0.5431, "step": 6471 }, { "epoch": 0.9653217987918562, "grad_norm": 1.4669052362442017, "learning_rate": 6.330367476737321e-08, "loss": 0.5188, "step": 6472 }, { "epoch": 0.9654709523454397, "grad_norm": 1.275700569152832, "learning_rate": 6.276202952414823e-08, "loss": 0.509, "step": 6473 }, { "epoch": 0.965620105899023, "grad_norm": 1.3612233400344849, "learning_rate": 6.222270419284359e-08, "loss": 0.4597, "step": 6474 }, { "epoch": 0.9657692594526065, "grad_norm": 1.4348325729370117, "learning_rate": 6.168569889937081e-08, "loss": 0.6488, "step": 6475 }, { "epoch": 0.9659184130061899, "grad_norm": 1.3914363384246826, "learning_rate": 6.115101376909738e-08, "loss": 0.604, "step": 6476 }, { "epoch": 0.9660675665597733, "grad_norm": 1.383410096168518, "learning_rate": 6.061864892684788e-08, "loss": 0.5576, "step": 6477 }, { "epoch": 0.9662167201133567, "grad_norm": 1.34444260597229, "learning_rate": 6.008860449690512e-08, "loss": 0.5761, "step": 6478 }, { "epoch": 0.9663658736669402, "grad_norm": 1.4499939680099487, "learning_rate": 5.956088060301457e-08, "loss": 0.4964, "step": 6479 }, { "epoch": 0.9665150272205235, "grad_norm": 1.5813695192337036, "learning_rate": 5.903547736837323e-08, "loss": 0.5038, "step": 6480 }, { "epoch": 0.966664180774107, "grad_norm": 1.4692903757095337, "learning_rate": 5.851239491564298e-08, "loss": 0.5721, "step": 6481 }, { "epoch": 0.9668133343276903, "grad_norm": 1.3496301174163818, "learning_rate": 5.799163336693836e-08, "loss": 0.5539, "step": 6482 }, { "epoch": 0.9669624878812738, "grad_norm": 1.2833759784698486, "learning_rate": 5.7473192843835454e-08, "loss": 0.5232, "step": 6483 }, { "epoch": 0.9671116414348572, "grad_norm": 1.3652454614639282, "learning_rate": 5.6957073467367454e-08, "loss": 0.5629, "step": 6484 }, { "epoch": 0.9672607949884406, "grad_norm": 1.3514606952667236, "learning_rate": 5.644327535802685e-08, "loss": 0.5261, "step": 6485 }, { "epoch": 0.967409948542024, "grad_norm": 1.454548716545105, "learning_rate": 5.5931798635761036e-08, "loss": 0.5815, "step": 6486 }, { "epoch": 0.9675591020956075, "grad_norm": 1.4111580848693848, "learning_rate": 5.542264341997894e-08, "loss": 0.5513, "step": 6487 }, { "epoch": 0.9677082556491908, "grad_norm": 1.2744121551513672, "learning_rate": 5.491580982954547e-08, "loss": 0.694, "step": 6488 }, { "epoch": 0.9678574092027743, "grad_norm": 1.7900981903076172, "learning_rate": 5.441129798278488e-08, "loss": 0.5414, "step": 6489 }, { "epoch": 0.9680065627563577, "grad_norm": 1.4899483919143677, "learning_rate": 5.3909107997477395e-08, "loss": 0.5232, "step": 6490 }, { "epoch": 0.9681557163099411, "grad_norm": 1.4337626695632935, "learning_rate": 5.3409239990863673e-08, "loss": 0.5763, "step": 6491 }, { "epoch": 0.9683048698635245, "grad_norm": 1.3960723876953125, "learning_rate": 5.291169407964147e-08, "loss": 0.5894, "step": 6492 }, { "epoch": 0.968454023417108, "grad_norm": 1.2178841829299927, "learning_rate": 5.2416470379964556e-08, "loss": 0.5249, "step": 6493 }, { "epoch": 0.9686031769706913, "grad_norm": 1.5645649433135986, "learning_rate": 5.192356900744711e-08, "loss": 0.5629, "step": 6494 }, { "epoch": 0.9687523305242748, "grad_norm": 1.2660614252090454, "learning_rate": 5.1432990077160405e-08, "loss": 0.5455, "step": 6495 }, { "epoch": 0.9689014840778581, "grad_norm": 1.7736324071884155, "learning_rate": 5.0944733703632845e-08, "loss": 0.5063, "step": 6496 }, { "epoch": 0.9690506376314416, "grad_norm": 1.3376970291137695, "learning_rate": 5.04588000008499e-08, "loss": 0.5525, "step": 6497 }, { "epoch": 0.969199791185025, "grad_norm": 1.3853083848953247, "learning_rate": 4.9975189082258625e-08, "loss": 0.6021, "step": 6498 }, { "epoch": 0.9693489447386084, "grad_norm": 1.4605058431625366, "learning_rate": 4.949390106075758e-08, "loss": 0.5793, "step": 6499 }, { "epoch": 0.9694980982921918, "grad_norm": 1.7675831317901611, "learning_rate": 4.901493604870799e-08, "loss": 0.6005, "step": 6500 }, { "epoch": 0.9696472518457753, "grad_norm": 1.216318130493164, "learning_rate": 4.853829415792932e-08, "loss": 0.5451, "step": 6501 }, { "epoch": 0.9697964053993586, "grad_norm": 1.4765111207962036, "learning_rate": 4.8063975499694774e-08, "loss": 0.555, "step": 6502 }, { "epoch": 0.9699455589529421, "grad_norm": 1.4698901176452637, "learning_rate": 4.7591980184736874e-08, "loss": 0.59, "step": 6503 }, { "epoch": 0.9700947125065255, "grad_norm": 1.3845324516296387, "learning_rate": 4.7122308323246377e-08, "loss": 0.5734, "step": 6504 }, { "epoch": 0.9702438660601089, "grad_norm": 1.3054107427597046, "learning_rate": 4.6654960024871134e-08, "loss": 0.5505, "step": 6505 }, { "epoch": 0.9703930196136923, "grad_norm": 1.3812674283981323, "learning_rate": 4.618993539871719e-08, "loss": 0.6013, "step": 6506 }, { "epoch": 0.9705421731672758, "grad_norm": 1.39763605594635, "learning_rate": 4.57272345533466e-08, "loss": 0.5349, "step": 6507 }, { "epoch": 0.9706913267208591, "grad_norm": 1.396845817565918, "learning_rate": 4.526685759678073e-08, "loss": 0.4717, "step": 6508 }, { "epoch": 0.9708404802744426, "grad_norm": 1.6802090406417847, "learning_rate": 4.480880463649584e-08, "loss": 0.5979, "step": 6509 }, { "epoch": 0.9709896338280259, "grad_norm": 1.5477359294891357, "learning_rate": 4.4353075779429713e-08, "loss": 0.5226, "step": 6510 }, { "epoch": 0.9711387873816094, "grad_norm": 1.3087962865829468, "learning_rate": 4.389967113197391e-08, "loss": 0.5088, "step": 6511 }, { "epoch": 0.9712879409351928, "grad_norm": 1.5070754289627075, "learning_rate": 4.3448590799978205e-08, "loss": 0.5686, "step": 6512 }, { "epoch": 0.9714370944887762, "grad_norm": 1.9075853824615479, "learning_rate": 4.299983488875059e-08, "loss": 0.5397, "step": 6513 }, { "epoch": 0.9715862480423596, "grad_norm": 1.7407798767089844, "learning_rate": 4.255340350305726e-08, "loss": 0.6123, "step": 6514 }, { "epoch": 0.9717354015959431, "grad_norm": 0.9012773036956787, "learning_rate": 4.210929674711817e-08, "loss": 0.6327, "step": 6515 }, { "epoch": 0.9718845551495264, "grad_norm": 1.3216266632080078, "learning_rate": 4.166751472461483e-08, "loss": 0.5393, "step": 6516 }, { "epoch": 0.9720337087031099, "grad_norm": 1.453436017036438, "learning_rate": 4.1228057538683644e-08, "loss": 0.5761, "step": 6517 }, { "epoch": 0.9721828622566933, "grad_norm": 1.3228815793991089, "learning_rate": 4.0790925291918084e-08, "loss": 0.4999, "step": 6518 }, { "epoch": 0.9723320158102767, "grad_norm": 1.3821004629135132, "learning_rate": 4.035611808636986e-08, "loss": 0.5794, "step": 6519 }, { "epoch": 0.9724811693638601, "grad_norm": 1.3758609294891357, "learning_rate": 3.9923636023547765e-08, "loss": 0.5345, "step": 6520 }, { "epoch": 0.9726303229174436, "grad_norm": 1.341080665588379, "learning_rate": 3.94934792044166e-08, "loss": 0.5229, "step": 6521 }, { "epoch": 0.9727794764710269, "grad_norm": 1.4424231052398682, "learning_rate": 3.906564772939936e-08, "loss": 0.5629, "step": 6522 }, { "epoch": 0.9729286300246104, "grad_norm": 1.5165225267410278, "learning_rate": 3.8640141698378376e-08, "loss": 0.5188, "step": 6523 }, { "epoch": 0.9730777835781937, "grad_norm": 1.3878928422927856, "learning_rate": 3.821696121068752e-08, "loss": 0.5078, "step": 6524 }, { "epoch": 0.9732269371317772, "grad_norm": 1.390367865562439, "learning_rate": 3.779610636512221e-08, "loss": 0.5824, "step": 6525 }, { "epoch": 0.9733760906853606, "grad_norm": 1.4520277976989746, "learning_rate": 3.737757725993496e-08, "loss": 0.4927, "step": 6526 }, { "epoch": 0.973525244238944, "grad_norm": 1.4212322235107422, "learning_rate": 3.696137399283206e-08, "loss": 0.6014, "step": 6527 }, { "epoch": 0.9736743977925274, "grad_norm": 1.2541574239730835, "learning_rate": 3.654749666098023e-08, "loss": 0.5377, "step": 6528 }, { "epoch": 0.9738235513461109, "grad_norm": 1.573636770248413, "learning_rate": 3.613594536100107e-08, "loss": 0.5001, "step": 6529 }, { "epoch": 0.9739727048996942, "grad_norm": 1.2776786088943481, "learning_rate": 3.5726720188974384e-08, "loss": 0.5502, "step": 6530 }, { "epoch": 0.9741218584532777, "grad_norm": 1.7257682085037231, "learning_rate": 3.531982124043598e-08, "loss": 0.5549, "step": 6531 }, { "epoch": 0.9742710120068611, "grad_norm": 1.2863904237747192, "learning_rate": 3.491524861037876e-08, "loss": 0.5385, "step": 6532 }, { "epoch": 0.9744201655604445, "grad_norm": 1.4047092199325562, "learning_rate": 3.451300239325384e-08, "loss": 0.5941, "step": 6533 }, { "epoch": 0.9745693191140279, "grad_norm": 1.3050695657730103, "learning_rate": 3.41130826829672e-08, "loss": 0.6244, "step": 6534 }, { "epoch": 0.9747184726676114, "grad_norm": 1.2553095817565918, "learning_rate": 3.371548957288418e-08, "loss": 0.5956, "step": 6535 }, { "epoch": 0.9748676262211947, "grad_norm": 1.308959722518921, "learning_rate": 3.332022315582273e-08, "loss": 0.4735, "step": 6536 }, { "epoch": 0.9750167797747782, "grad_norm": 1.5278946161270142, "learning_rate": 3.292728352406238e-08, "loss": 0.5778, "step": 6537 }, { "epoch": 0.9751659333283615, "grad_norm": 1.357495665550232, "learning_rate": 3.253667076933753e-08, "loss": 0.5634, "step": 6538 }, { "epoch": 0.975315086881945, "grad_norm": 1.3978906869888306, "learning_rate": 3.214838498283857e-08, "loss": 0.4921, "step": 6539 }, { "epoch": 0.9754642404355284, "grad_norm": 1.2748900651931763, "learning_rate": 3.176242625521297e-08, "loss": 0.5524, "step": 6540 }, { "epoch": 0.9756133939891118, "grad_norm": 1.3284529447555542, "learning_rate": 3.137879467656535e-08, "loss": 0.5403, "step": 6541 }, { "epoch": 0.9757625475426952, "grad_norm": 1.4636125564575195, "learning_rate": 3.099749033645738e-08, "loss": 0.6212, "step": 6542 }, { "epoch": 0.9759117010962787, "grad_norm": 1.2176868915557861, "learning_rate": 3.061851332390786e-08, "loss": 0.5345, "step": 6543 }, { "epoch": 0.976060854649862, "grad_norm": 1.7054764032363892, "learning_rate": 3.024186372738935e-08, "loss": 0.584, "step": 6544 }, { "epoch": 0.9762100082034455, "grad_norm": 1.2901313304901123, "learning_rate": 2.986754163483485e-08, "loss": 0.5238, "step": 6545 }, { "epoch": 0.9763591617570289, "grad_norm": 1.3981353044509888, "learning_rate": 2.949554713363112e-08, "loss": 0.5457, "step": 6546 }, { "epoch": 0.9765083153106123, "grad_norm": 1.4245580434799194, "learning_rate": 2.9125880310623132e-08, "loss": 0.5617, "step": 6547 }, { "epoch": 0.9766574688641957, "grad_norm": 1.6399275064468384, "learning_rate": 2.875854125211297e-08, "loss": 0.5428, "step": 6548 }, { "epoch": 0.9768066224177792, "grad_norm": 1.2486326694488525, "learning_rate": 2.8393530043856476e-08, "loss": 0.5073, "step": 6549 }, { "epoch": 0.9769557759713625, "grad_norm": 1.2516263723373413, "learning_rate": 2.803084677106882e-08, "loss": 0.4785, "step": 6550 }, { "epoch": 0.977104929524946, "grad_norm": 1.3713204860687256, "learning_rate": 2.767049151842005e-08, "loss": 0.572, "step": 6551 }, { "epoch": 0.9772540830785293, "grad_norm": 1.5732065439224243, "learning_rate": 2.731246437003843e-08, "loss": 0.5136, "step": 6552 }, { "epoch": 0.9774032366321128, "grad_norm": 1.2624592781066895, "learning_rate": 2.695676540950709e-08, "loss": 0.5417, "step": 6553 }, { "epoch": 0.9775523901856962, "grad_norm": 1.4259437322616577, "learning_rate": 2.660339471986739e-08, "loss": 0.6452, "step": 6554 }, { "epoch": 0.9777015437392796, "grad_norm": 1.2114413976669312, "learning_rate": 2.6252352383613346e-08, "loss": 0.5554, "step": 6555 }, { "epoch": 0.977850697292863, "grad_norm": 0.8264786005020142, "learning_rate": 2.59036384827005e-08, "loss": 0.6568, "step": 6556 }, { "epoch": 0.9779998508464464, "grad_norm": 1.3161752223968506, "learning_rate": 2.555725309853818e-08, "loss": 0.6095, "step": 6557 }, { "epoch": 0.9781490044000298, "grad_norm": 1.517126202583313, "learning_rate": 2.5213196311990595e-08, "loss": 0.466, "step": 6558 }, { "epoch": 0.9782981579536132, "grad_norm": 1.3649232387542725, "learning_rate": 2.4871468203382376e-08, "loss": 0.6184, "step": 6559 }, { "epoch": 0.9784473115071967, "grad_norm": 1.3910001516342163, "learning_rate": 2.4532068852489708e-08, "loss": 0.5736, "step": 6560 }, { "epoch": 0.97859646506078, "grad_norm": 1.4643762111663818, "learning_rate": 2.4194998338548103e-08, "loss": 0.5451, "step": 6561 }, { "epoch": 0.9787456186143635, "grad_norm": 1.457332968711853, "learning_rate": 2.3860256740250166e-08, "loss": 0.5552, "step": 6562 }, { "epoch": 0.9788947721679468, "grad_norm": 1.3365395069122314, "learning_rate": 2.352784413574227e-08, "loss": 0.5922, "step": 6563 }, { "epoch": 0.9790439257215303, "grad_norm": 1.4099576473236084, "learning_rate": 2.3197760602629015e-08, "loss": 0.5189, "step": 6564 }, { "epoch": 0.9791930792751137, "grad_norm": 1.4307565689086914, "learning_rate": 2.2870006217969864e-08, "loss": 0.5145, "step": 6565 }, { "epoch": 0.9793422328286971, "grad_norm": 1.244937539100647, "learning_rate": 2.25445810582825e-08, "loss": 0.482, "step": 6566 }, { "epoch": 0.9794913863822805, "grad_norm": 1.465780258178711, "learning_rate": 2.2221485199537262e-08, "loss": 0.58, "step": 6567 }, { "epoch": 0.979640539935864, "grad_norm": 1.3652311563491821, "learning_rate": 2.1900718717164927e-08, "loss": 0.5178, "step": 6568 }, { "epoch": 0.9797896934894473, "grad_norm": 1.4081252813339233, "learning_rate": 2.1582281686048924e-08, "loss": 0.5653, "step": 6569 }, { "epoch": 0.9799388470430308, "grad_norm": 1.5832912921905518, "learning_rate": 2.1266174180532006e-08, "loss": 0.6097, "step": 6570 }, { "epoch": 0.9800880005966142, "grad_norm": 1.4508135318756104, "learning_rate": 2.09523962744107e-08, "loss": 0.4859, "step": 6571 }, { "epoch": 0.9802371541501976, "grad_norm": 1.3670926094055176, "learning_rate": 2.0640948040937525e-08, "loss": 0.5652, "step": 6572 }, { "epoch": 0.980386307703781, "grad_norm": 1.3083975315093994, "learning_rate": 2.0331829552824313e-08, "loss": 0.5644, "step": 6573 }, { "epoch": 0.9805354612573645, "grad_norm": 1.4399482011795044, "learning_rate": 2.0025040882234447e-08, "loss": 0.5563, "step": 6574 }, { "epoch": 0.9806846148109478, "grad_norm": 1.5886808633804321, "learning_rate": 1.9720582100791753e-08, "loss": 0.526, "step": 6575 }, { "epoch": 0.9808337683645313, "grad_norm": 0.8839775323867798, "learning_rate": 1.94184532795727e-08, "loss": 0.6518, "step": 6576 }, { "epoch": 0.9809829219181146, "grad_norm": 1.2467548847198486, "learning_rate": 1.9118654489110877e-08, "loss": 0.5305, "step": 6577 }, { "epoch": 0.9811320754716981, "grad_norm": 1.6230987310409546, "learning_rate": 1.8821185799398067e-08, "loss": 0.5739, "step": 6578 }, { "epoch": 0.9812812290252815, "grad_norm": 1.58897066116333, "learning_rate": 1.8526047279878723e-08, "loss": 0.5217, "step": 6579 }, { "epoch": 0.981430382578865, "grad_norm": 1.3842414617538452, "learning_rate": 1.8233238999454394e-08, "loss": 0.5861, "step": 6580 }, { "epoch": 0.9815795361324483, "grad_norm": 1.3395564556121826, "learning_rate": 1.7942761026484845e-08, "loss": 0.5859, "step": 6581 }, { "epoch": 0.9817286896860318, "grad_norm": 2.5463757514953613, "learning_rate": 1.7654613428782498e-08, "loss": 0.5157, "step": 6582 }, { "epoch": 0.9818778432396151, "grad_norm": 1.5236107110977173, "learning_rate": 1.7368796273617982e-08, "loss": 0.6112, "step": 6583 }, { "epoch": 0.9820269967931986, "grad_norm": 1.3911006450653076, "learning_rate": 1.708530962771793e-08, "loss": 0.5531, "step": 6584 }, { "epoch": 0.982176150346782, "grad_norm": 1.4401905536651611, "learning_rate": 1.6804153557261615e-08, "loss": 0.6498, "step": 6585 }, { "epoch": 0.9823253039003654, "grad_norm": 1.4680742025375366, "learning_rate": 1.652532812788987e-08, "loss": 0.614, "step": 6586 }, { "epoch": 0.9824744574539488, "grad_norm": 1.3529657125473022, "learning_rate": 1.6248833404692856e-08, "loss": 0.4977, "step": 6587 }, { "epoch": 0.9826236110075323, "grad_norm": 1.337029218673706, "learning_rate": 1.597466945222337e-08, "loss": 0.5632, "step": 6588 }, { "epoch": 0.9827727645611156, "grad_norm": 1.4088133573532104, "learning_rate": 1.570283633448466e-08, "loss": 0.4262, "step": 6589 }, { "epoch": 0.9829219181146991, "grad_norm": 1.3461666107177734, "learning_rate": 1.5433334114938193e-08, "loss": 0.4886, "step": 6590 }, { "epoch": 0.9830710716682824, "grad_norm": 1.6347413063049316, "learning_rate": 1.516616285650141e-08, "loss": 0.4947, "step": 6591 }, { "epoch": 0.9832202252218659, "grad_norm": 1.5392589569091797, "learning_rate": 1.4901322621547753e-08, "loss": 0.5489, "step": 6592 }, { "epoch": 0.9833693787754493, "grad_norm": 1.311003565788269, "learning_rate": 1.4638813471904435e-08, "loss": 0.5127, "step": 6593 }, { "epoch": 0.9835185323290327, "grad_norm": 1.4982550144195557, "learning_rate": 1.4378635468855762e-08, "loss": 0.5835, "step": 6594 }, { "epoch": 0.9836676858826161, "grad_norm": 1.3603013753890991, "learning_rate": 1.412078867314426e-08, "loss": 0.548, "step": 6595 }, { "epoch": 0.9838168394361996, "grad_norm": 1.6866728067398071, "learning_rate": 1.3865273144963998e-08, "loss": 0.53, "step": 6596 }, { "epoch": 0.9839659929897829, "grad_norm": 1.2624038457870483, "learning_rate": 1.3612088943967262e-08, "loss": 0.5953, "step": 6597 }, { "epoch": 0.9841151465433664, "grad_norm": 1.6974990367889404, "learning_rate": 1.3361236129261212e-08, "loss": 0.5309, "step": 6598 }, { "epoch": 0.9842643000969498, "grad_norm": 1.322638988494873, "learning_rate": 1.3112714759409006e-08, "loss": 0.6052, "step": 6599 }, { "epoch": 0.9844134536505332, "grad_norm": 1.5243257284164429, "learning_rate": 1.2866524892430898e-08, "loss": 0.5819, "step": 6600 }, { "epoch": 0.9845626072041166, "grad_norm": 1.3718695640563965, "learning_rate": 1.2622666585799803e-08, "loss": 0.5529, "step": 6601 }, { "epoch": 0.9847117607577001, "grad_norm": 1.2600350379943848, "learning_rate": 1.2381139896445737e-08, "loss": 0.533, "step": 6602 }, { "epoch": 0.9848609143112834, "grad_norm": 1.2817765474319458, "learning_rate": 1.2141944880756928e-08, "loss": 0.4904, "step": 6603 }, { "epoch": 0.9850100678648669, "grad_norm": 1.3151142597198486, "learning_rate": 1.1905081594573153e-08, "loss": 0.4849, "step": 6604 }, { "epoch": 0.9851592214184502, "grad_norm": 0.8774458765983582, "learning_rate": 1.167055009319129e-08, "loss": 0.5975, "step": 6605 }, { "epoch": 0.9853083749720337, "grad_norm": 1.167645812034607, "learning_rate": 1.143835043136643e-08, "loss": 0.4944, "step": 6606 }, { "epoch": 0.9854575285256171, "grad_norm": 1.621445655822754, "learning_rate": 1.120848266330521e-08, "loss": 0.5638, "step": 6607 }, { "epoch": 0.9856066820792005, "grad_norm": 1.2821909189224243, "learning_rate": 1.098094684267137e-08, "loss": 0.553, "step": 6608 }, { "epoch": 0.9857558356327839, "grad_norm": 1.5446919202804565, "learning_rate": 1.0755743022585751e-08, "loss": 0.5348, "step": 6609 }, { "epoch": 0.9859049891863674, "grad_norm": 1.4239978790283203, "learning_rate": 1.0532871255624077e-08, "loss": 0.6088, "step": 6610 }, { "epoch": 0.9860541427399507, "grad_norm": 1.3168461322784424, "learning_rate": 1.0312331593815839e-08, "loss": 0.4983, "step": 6611 }, { "epoch": 0.9862032962935342, "grad_norm": 1.3477954864501953, "learning_rate": 1.0094124088648739e-08, "loss": 0.5398, "step": 6612 }, { "epoch": 0.9863524498471176, "grad_norm": 1.4920740127563477, "learning_rate": 9.878248791063138e-09, "loss": 0.5748, "step": 6613 }, { "epoch": 0.986501603400701, "grad_norm": 1.4168095588684082, "learning_rate": 9.664705751457615e-09, "loss": 0.5749, "step": 6614 }, { "epoch": 0.9866507569542844, "grad_norm": 1.5239049196243286, "learning_rate": 9.45349501968451e-09, "loss": 0.5516, "step": 6615 }, { "epoch": 0.9867999105078679, "grad_norm": 1.5490095615386963, "learning_rate": 9.244616645053272e-09, "loss": 0.568, "step": 6616 }, { "epoch": 0.9869490640614512, "grad_norm": 1.3049355745315552, "learning_rate": 9.038070676328226e-09, "loss": 0.508, "step": 6617 }, { "epoch": 0.9870982176150347, "grad_norm": 1.6161746978759766, "learning_rate": 8.833857161726355e-09, "loss": 0.5542, "step": 6618 }, { "epoch": 0.987247371168618, "grad_norm": 0.8782498836517334, "learning_rate": 8.631976148925081e-09, "loss": 0.6661, "step": 6619 }, { "epoch": 0.9873965247222015, "grad_norm": 1.3339519500732422, "learning_rate": 8.432427685054479e-09, "loss": 0.5432, "step": 6620 }, { "epoch": 0.9875456782757849, "grad_norm": 1.3496371507644653, "learning_rate": 8.235211816699506e-09, "loss": 0.62, "step": 6621 }, { "epoch": 0.9876948318293683, "grad_norm": 1.4449573755264282, "learning_rate": 8.040328589901114e-09, "loss": 0.5065, "step": 6622 }, { "epoch": 0.9878439853829517, "grad_norm": 0.8422784209251404, "learning_rate": 7.847778050157351e-09, "loss": 0.644, "step": 6623 }, { "epoch": 0.9879931389365352, "grad_norm": 0.8819248676300049, "learning_rate": 7.657560242420037e-09, "loss": 0.6706, "step": 6624 }, { "epoch": 0.9881422924901185, "grad_norm": 1.501665711402893, "learning_rate": 7.469675211096983e-09, "loss": 0.5649, "step": 6625 }, { "epoch": 0.988291446043702, "grad_norm": 1.8677341938018799, "learning_rate": 7.28412300004977e-09, "loss": 0.4777, "step": 6626 }, { "epoch": 0.9884405995972854, "grad_norm": 0.8149887919425964, "learning_rate": 7.10090365259819e-09, "loss": 0.6206, "step": 6627 }, { "epoch": 0.9885897531508688, "grad_norm": 1.516149878501892, "learning_rate": 6.920017211515806e-09, "loss": 0.6185, "step": 6628 }, { "epoch": 0.9887389067044522, "grad_norm": 1.6421096324920654, "learning_rate": 6.7414637190310605e-09, "loss": 0.583, "step": 6629 }, { "epoch": 0.9888880602580357, "grad_norm": 1.3734047412872314, "learning_rate": 6.5652432168283875e-09, "loss": 0.5329, "step": 6630 }, { "epoch": 0.989037213811619, "grad_norm": 1.3150677680969238, "learning_rate": 6.391355746048211e-09, "loss": 0.5275, "step": 6631 }, { "epoch": 0.9891863673652025, "grad_norm": 1.57711660861969, "learning_rate": 6.219801347285837e-09, "loss": 0.5739, "step": 6632 }, { "epoch": 0.9893355209187858, "grad_norm": 1.3483080863952637, "learning_rate": 6.050580060590339e-09, "loss": 0.6105, "step": 6633 }, { "epoch": 0.9894846744723693, "grad_norm": 1.4046624898910522, "learning_rate": 5.883691925469004e-09, "loss": 0.4695, "step": 6634 }, { "epoch": 0.9896338280259527, "grad_norm": 1.4581142663955688, "learning_rate": 5.719136980882889e-09, "loss": 0.5428, "step": 6635 }, { "epoch": 0.9897829815795361, "grad_norm": 1.2971287965774536, "learning_rate": 5.556915265247931e-09, "loss": 0.5686, "step": 6636 }, { "epoch": 0.9899321351331195, "grad_norm": 1.5852420330047607, "learning_rate": 5.397026816434947e-09, "loss": 0.5136, "step": 6637 }, { "epoch": 0.990081288686703, "grad_norm": 1.5995205640792847, "learning_rate": 5.239471671772967e-09, "loss": 0.541, "step": 6638 }, { "epoch": 0.9902304422402863, "grad_norm": 1.5040277242660522, "learning_rate": 5.084249868042568e-09, "loss": 0.5873, "step": 6639 }, { "epoch": 0.9903795957938698, "grad_norm": 1.3778884410858154, "learning_rate": 4.93136144148143e-09, "loss": 0.5518, "step": 6640 }, { "epoch": 0.9905287493474532, "grad_norm": 1.3394885063171387, "learning_rate": 4.780806427783224e-09, "loss": 0.5623, "step": 6641 }, { "epoch": 0.9906779029010366, "grad_norm": 1.612064242362976, "learning_rate": 4.632584862095391e-09, "loss": 0.5047, "step": 6642 }, { "epoch": 0.99082705645462, "grad_norm": 1.5181946754455566, "learning_rate": 4.48669677902025e-09, "loss": 0.6183, "step": 6643 }, { "epoch": 0.9909762100082035, "grad_norm": 1.3024383783340454, "learning_rate": 4.3431422126183344e-09, "loss": 0.5364, "step": 6644 }, { "epoch": 0.9911253635617868, "grad_norm": 1.3976013660430908, "learning_rate": 4.201921196402836e-09, "loss": 0.552, "step": 6645 }, { "epoch": 0.9912745171153703, "grad_norm": 0.9430871605873108, "learning_rate": 4.063033763341828e-09, "loss": 0.6825, "step": 6646 }, { "epoch": 0.9914236706689536, "grad_norm": 1.556351661682129, "learning_rate": 3.9264799458593736e-09, "loss": 0.556, "step": 6647 }, { "epoch": 0.9915728242225371, "grad_norm": 1.817840576171875, "learning_rate": 3.7922597758355275e-09, "loss": 0.5834, "step": 6648 }, { "epoch": 0.9917219777761205, "grad_norm": 1.4594366550445557, "learning_rate": 3.660373284605223e-09, "loss": 0.6632, "step": 6649 }, { "epoch": 0.991871131329704, "grad_norm": 1.4920250177383423, "learning_rate": 3.5308205029571663e-09, "loss": 0.5664, "step": 6650 }, { "epoch": 0.9920202848832873, "grad_norm": 1.3599557876586914, "learning_rate": 3.4036014611371624e-09, "loss": 0.4975, "step": 6651 }, { "epoch": 0.9921694384368708, "grad_norm": 1.2342208623886108, "learning_rate": 3.2787161888447883e-09, "loss": 0.5644, "step": 6652 }, { "epoch": 0.9923185919904541, "grad_norm": 1.3848623037338257, "learning_rate": 3.15616471523561e-09, "loss": 0.5421, "step": 6653 }, { "epoch": 0.9924677455440376, "grad_norm": 1.4428112506866455, "learning_rate": 3.035947068920075e-09, "loss": 0.5613, "step": 6654 }, { "epoch": 0.992616899097621, "grad_norm": 1.2319611310958862, "learning_rate": 2.9180632779624017e-09, "loss": 0.5226, "step": 6655 }, { "epoch": 0.9927660526512044, "grad_norm": 1.4120817184448242, "learning_rate": 2.8025133698861282e-09, "loss": 0.4895, "step": 6656 }, { "epoch": 0.9929152062047878, "grad_norm": 1.3322505950927734, "learning_rate": 2.6892973716641235e-09, "loss": 0.5221, "step": 6657 }, { "epoch": 0.9930643597583713, "grad_norm": 1.3222341537475586, "learning_rate": 2.578415309729687e-09, "loss": 0.5563, "step": 6658 }, { "epoch": 0.9932135133119546, "grad_norm": 1.4487162828445435, "learning_rate": 2.469867209967669e-09, "loss": 0.5371, "step": 6659 }, { "epoch": 0.9933626668655381, "grad_norm": 1.4905307292938232, "learning_rate": 2.36365309772002e-09, "loss": 0.5268, "step": 6660 }, { "epoch": 0.9935118204191214, "grad_norm": 1.3627440929412842, "learning_rate": 2.259772997782461e-09, "loss": 0.5917, "step": 6661 }, { "epoch": 0.9936609739727049, "grad_norm": 1.2674909830093384, "learning_rate": 2.1582269344067043e-09, "loss": 0.5234, "step": 6662 }, { "epoch": 0.9938101275262883, "grad_norm": 1.4112329483032227, "learning_rate": 2.0590149312993412e-09, "loss": 0.5006, "step": 6663 }, { "epoch": 0.9939592810798717, "grad_norm": 1.2860627174377441, "learning_rate": 1.9621370116218453e-09, "loss": 0.4916, "step": 6664 }, { "epoch": 0.9941084346334551, "grad_norm": 1.429064393043518, "learning_rate": 1.8675931979916794e-09, "loss": 0.613, "step": 6665 }, { "epoch": 0.9942575881870386, "grad_norm": 1.3844772577285767, "learning_rate": 1.7753835124800778e-09, "loss": 0.5442, "step": 6666 }, { "epoch": 0.9944067417406219, "grad_norm": 1.3112493753433228, "learning_rate": 1.6855079766142646e-09, "loss": 0.5686, "step": 6667 }, { "epoch": 0.9945558952942054, "grad_norm": 1.3345869779586792, "learning_rate": 1.5979666113763448e-09, "loss": 0.4829, "step": 6668 }, { "epoch": 0.9947050488477888, "grad_norm": 1.490751028060913, "learning_rate": 1.5127594372033038e-09, "loss": 0.6227, "step": 6669 }, { "epoch": 0.9948542024013722, "grad_norm": 1.3890241384506226, "learning_rate": 1.4298864739870078e-09, "loss": 0.5323, "step": 6670 }, { "epoch": 0.9950033559549556, "grad_norm": 1.2407341003417969, "learning_rate": 1.349347741075313e-09, "loss": 0.5481, "step": 6671 }, { "epoch": 0.9951525095085391, "grad_norm": 1.5148576498031616, "learning_rate": 1.2711432572698468e-09, "loss": 0.5713, "step": 6672 }, { "epoch": 0.9953016630621224, "grad_norm": 1.4186774492263794, "learning_rate": 1.1952730408282264e-09, "loss": 0.597, "step": 6673 }, { "epoch": 0.9954508166157059, "grad_norm": 1.40076744556427, "learning_rate": 1.12173710946184e-09, "loss": 0.5973, "step": 6674 }, { "epoch": 0.9955999701692893, "grad_norm": 1.2457184791564941, "learning_rate": 1.0505354803402867e-09, "loss": 0.5428, "step": 6675 }, { "epoch": 0.9957491237228727, "grad_norm": 1.2511085271835327, "learning_rate": 9.816681700847152e-10, "loss": 0.5583, "step": 6676 }, { "epoch": 0.9958982772764561, "grad_norm": 1.3415443897247314, "learning_rate": 9.151351947722653e-10, "loss": 0.5255, "step": 6677 }, { "epoch": 0.9960474308300395, "grad_norm": 1.441324234008789, "learning_rate": 8.509365699360672e-10, "loss": 0.6352, "step": 6678 }, { "epoch": 0.9961965843836229, "grad_norm": 1.3471413850784302, "learning_rate": 7.890723105641318e-10, "loss": 0.5148, "step": 6679 }, { "epoch": 0.9963457379372064, "grad_norm": 1.585437297821045, "learning_rate": 7.295424310982402e-10, "loss": 0.5348, "step": 6680 }, { "epoch": 0.9964948914907897, "grad_norm": 1.360052466392517, "learning_rate": 6.723469454372744e-10, "loss": 0.5365, "step": 6681 }, { "epoch": 0.9966440450443732, "grad_norm": 1.5833654403686523, "learning_rate": 6.174858669316664e-10, "loss": 0.5828, "step": 6682 }, { "epoch": 0.9967931985979566, "grad_norm": 1.3309102058410645, "learning_rate": 5.649592083911693e-10, "loss": 0.6232, "step": 6683 }, { "epoch": 0.99694235215154, "grad_norm": 1.3952643871307373, "learning_rate": 5.147669820770861e-10, "loss": 0.5149, "step": 6684 }, { "epoch": 0.9970915057051234, "grad_norm": 1.2394627332687378, "learning_rate": 4.669091997078212e-10, "loss": 0.5504, "step": 6685 }, { "epoch": 0.9972406592587069, "grad_norm": 1.5272576808929443, "learning_rate": 4.2138587245665883e-10, "loss": 0.5491, "step": 6686 }, { "epoch": 0.9973898128122902, "grad_norm": 1.1747347116470337, "learning_rate": 3.7819701094954365e-10, "loss": 0.513, "step": 6687 }, { "epoch": 0.9975389663658737, "grad_norm": 1.2983100414276123, "learning_rate": 3.373426252706313e-10, "loss": 0.5964, "step": 6688 }, { "epoch": 0.997688119919457, "grad_norm": 1.5933345556259155, "learning_rate": 2.988227249578479e-10, "loss": 0.5536, "step": 6689 }, { "epoch": 0.9978372734730405, "grad_norm": 1.226975917816162, "learning_rate": 2.626373190028897e-10, "loss": 0.5103, "step": 6690 }, { "epoch": 0.9979864270266239, "grad_norm": 1.4723412990570068, "learning_rate": 2.2878641585455385e-10, "loss": 0.5535, "step": 6691 }, { "epoch": 0.9981355805802073, "grad_norm": 1.3499491214752197, "learning_rate": 1.9727002341429768e-10, "loss": 0.536, "step": 6692 }, { "epoch": 0.9982847341337907, "grad_norm": 1.4139840602874756, "learning_rate": 1.680881490406794e-10, "loss": 0.5388, "step": 6693 }, { "epoch": 0.9984338876873742, "grad_norm": 1.273285984992981, "learning_rate": 1.4124079954602743e-10, "loss": 0.5285, "step": 6694 }, { "epoch": 0.9985830412409575, "grad_norm": 1.4379265308380127, "learning_rate": 1.167279811975508e-10, "loss": 0.5469, "step": 6695 }, { "epoch": 0.998732194794541, "grad_norm": 1.2744473218917847, "learning_rate": 9.454969971955941e-11, "loss": 0.5533, "step": 6696 }, { "epoch": 0.9988813483481244, "grad_norm": 2.2242894172668457, "learning_rate": 7.470596028902321e-11, "loss": 0.5601, "step": 6697 }, { "epoch": 0.9990305019017078, "grad_norm": 1.4548887014389038, "learning_rate": 5.719676753668246e-11, "loss": 0.5332, "step": 6698 }, { "epoch": 0.9991796554552912, "grad_norm": 1.291332483291626, "learning_rate": 4.202212555259877e-11, "loss": 0.5848, "step": 6699 }, { "epoch": 0.9993288090088747, "grad_norm": 1.4373674392700195, "learning_rate": 2.9182037879493805e-11, "loss": 0.5342, "step": 6700 }, { "epoch": 0.999477962562458, "grad_norm": 1.4577016830444336, "learning_rate": 1.8676507512749297e-11, "loss": 0.5739, "step": 6701 }, { "epoch": 0.9996271161160415, "grad_norm": 1.4619547128677368, "learning_rate": 1.0505536907068347e-11, "loss": 0.5384, "step": 6702 }, { "epoch": 0.9997762696696249, "grad_norm": 1.2020866870880127, "learning_rate": 4.6691279687038905e-12, "loss": 0.4813, "step": 6703 }, { "epoch": 0.9999254232232083, "grad_norm": 1.3079601526260376, "learning_rate": 1.167282059899577e-12, "loss": 0.4877, "step": 6704 }, { "epoch": 0.9999254232232083, "step": 6704, "total_flos": 2.2951749673119908e+19, "train_loss": 0.6032926769958262, "train_runtime": 33533.4333, "train_samples_per_second": 25.593, "train_steps_per_second": 0.2 } ], "logging_steps": 1.0, "max_steps": 6704, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.2951749673119908e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }