{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2781, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002157497303128371, "grad_norm": 2.570061445236206, "learning_rate": 3.571428571428571e-09, "loss": 0.9756889939308167, "step": 2 }, { "epoch": 0.004314994606256742, "grad_norm": 2.8094983100891113, "learning_rate": 1.0714285714285715e-08, "loss": 0.6637275218963623, "step": 4 }, { "epoch": 0.006472491909385114, "grad_norm": 4.699804782867432, "learning_rate": 1.7857142857142856e-08, "loss": 0.8357824087142944, "step": 6 }, { "epoch": 0.008629989212513484, "grad_norm": 4.096808433532715, "learning_rate": 2.5e-08, "loss": 0.8224667906761169, "step": 8 }, { "epoch": 0.010787486515641856, "grad_norm": 2.105877161026001, "learning_rate": 3.214285714285714e-08, "loss": 0.7087568044662476, "step": 10 }, { "epoch": 0.012944983818770227, "grad_norm": 4.518003463745117, "learning_rate": 3.9285714285714285e-08, "loss": 0.7910435795783997, "step": 12 }, { "epoch": 0.015102481121898598, "grad_norm": 5.335687160491943, "learning_rate": 4.642857142857143e-08, "loss": 0.8569395542144775, "step": 14 }, { "epoch": 0.017259978425026967, "grad_norm": 2.8184239864349365, "learning_rate": 5.3571428571428564e-08, "loss": 1.1704014539718628, "step": 16 }, { "epoch": 0.019417475728155338, "grad_norm": 2.6054298877716064, "learning_rate": 6.071428571428572e-08, "loss": 1.0189613103866577, "step": 18 }, { "epoch": 0.021574973031283712, "grad_norm": 2.1738038063049316, "learning_rate": 6.785714285714285e-08, "loss": 0.6196831464767456, "step": 20 }, { "epoch": 0.023732470334412083, "grad_norm": 3.19342041015625, "learning_rate": 7.5e-08, "loss": 0.9074231386184692, "step": 22 }, { "epoch": 0.025889967637540454, "grad_norm": 6.756741046905518, "learning_rate": 8.214285714285714e-08, "loss": 0.80443274974823, "step": 24 }, { "epoch": 0.028047464940668825, "grad_norm": 4.307950496673584, "learning_rate": 8.928571428571429e-08, "loss": 0.7722480893135071, "step": 26 }, { "epoch": 0.030204962243797196, "grad_norm": 4.798768043518066, "learning_rate": 9.642857142857142e-08, "loss": 0.7002389430999756, "step": 28 }, { "epoch": 0.032362459546925564, "grad_norm": 4.575655460357666, "learning_rate": 1.0357142857142857e-07, "loss": 1.014316439628601, "step": 30 }, { "epoch": 0.034519956850053934, "grad_norm": 1.0345690250396729, "learning_rate": 1.107142857142857e-07, "loss": 0.6911119222640991, "step": 32 }, { "epoch": 0.036677454153182305, "grad_norm": 9.628297805786133, "learning_rate": 1.1785714285714285e-07, "loss": 1.0148026943206787, "step": 34 }, { "epoch": 0.038834951456310676, "grad_norm": 2.975783109664917, "learning_rate": 1.25e-07, "loss": 0.8927556276321411, "step": 36 }, { "epoch": 0.040992448759439054, "grad_norm": 9.633585929870605, "learning_rate": 1.3214285714285714e-07, "loss": 1.5045725107192993, "step": 38 }, { "epoch": 0.043149946062567425, "grad_norm": 6.078215599060059, "learning_rate": 1.392857142857143e-07, "loss": 0.510919988155365, "step": 40 }, { "epoch": 0.045307443365695796, "grad_norm": 2.061255693435669, "learning_rate": 1.4642857142857143e-07, "loss": 0.7295307517051697, "step": 42 }, { "epoch": 0.04746494066882417, "grad_norm": 1.5910701751708984, "learning_rate": 1.5357142857142858e-07, "loss": 0.8399662375450134, "step": 44 }, { "epoch": 0.04962243797195254, "grad_norm": 2.5256459712982178, "learning_rate": 1.6071428571428573e-07, "loss": 0.6381903290748596, "step": 46 }, { "epoch": 0.05177993527508091, "grad_norm": 2.43086314201355, "learning_rate": 1.6785714285714285e-07, "loss": 0.8072210550308228, "step": 48 }, { "epoch": 0.05393743257820928, "grad_norm": 2.4569172859191895, "learning_rate": 1.75e-07, "loss": 0.6129989624023438, "step": 50 }, { "epoch": 0.05609492988133765, "grad_norm": 1.5575639009475708, "learning_rate": 1.8214285714285714e-07, "loss": 0.39090248942375183, "step": 52 }, { "epoch": 0.05825242718446602, "grad_norm": 7.692088603973389, "learning_rate": 1.8928571428571426e-07, "loss": 0.9982097148895264, "step": 54 }, { "epoch": 0.06040992448759439, "grad_norm": 28.100284576416016, "learning_rate": 1.964285714285714e-07, "loss": 1.190761923789978, "step": 56 }, { "epoch": 0.06256742179072276, "grad_norm": 4.549140930175781, "learning_rate": 2.0357142857142855e-07, "loss": 0.6789318323135376, "step": 58 }, { "epoch": 0.06472491909385113, "grad_norm": 12.27080249786377, "learning_rate": 2.107142857142857e-07, "loss": 0.8964890241622925, "step": 60 }, { "epoch": 0.0668824163969795, "grad_norm": 8.274192810058594, "learning_rate": 2.1785714285714284e-07, "loss": 1.1355807781219482, "step": 62 }, { "epoch": 0.06903991370010787, "grad_norm": 2.232541084289551, "learning_rate": 2.25e-07, "loss": 0.9159867763519287, "step": 64 }, { "epoch": 0.07119741100323625, "grad_norm": 3.91237735748291, "learning_rate": 2.3214285714285714e-07, "loss": 1.0847519636154175, "step": 66 }, { "epoch": 0.07335490830636461, "grad_norm": 3.247027635574341, "learning_rate": 2.392857142857143e-07, "loss": 0.8140153884887695, "step": 68 }, { "epoch": 0.07551240560949299, "grad_norm": 13.169454574584961, "learning_rate": 2.4642857142857143e-07, "loss": 0.9606142640113831, "step": 70 }, { "epoch": 0.07766990291262135, "grad_norm": 2.072512626647949, "learning_rate": 2.5357142857142855e-07, "loss": 0.7430305480957031, "step": 72 }, { "epoch": 0.07982740021574973, "grad_norm": 2.4250895977020264, "learning_rate": 2.607142857142857e-07, "loss": 0.6543456315994263, "step": 74 }, { "epoch": 0.08198489751887811, "grad_norm": 6.248291492462158, "learning_rate": 2.6785714285714284e-07, "loss": 0.7182219624519348, "step": 76 }, { "epoch": 0.08414239482200647, "grad_norm": 2.01570725440979, "learning_rate": 2.75e-07, "loss": 0.6486653685569763, "step": 78 }, { "epoch": 0.08629989212513485, "grad_norm": 4.23671817779541, "learning_rate": 2.8214285714285713e-07, "loss": 1.1015154123306274, "step": 80 }, { "epoch": 0.08845738942826321, "grad_norm": 6.89118766784668, "learning_rate": 2.892857142857143e-07, "loss": 0.7223177552223206, "step": 82 }, { "epoch": 0.09061488673139159, "grad_norm": 3.134542226791382, "learning_rate": 2.9642857142857143e-07, "loss": 0.5234851837158203, "step": 84 }, { "epoch": 0.09277238403451996, "grad_norm": 1.6814379692077637, "learning_rate": 3.0357142857142855e-07, "loss": 0.7054818868637085, "step": 86 }, { "epoch": 0.09492988133764833, "grad_norm": 2.541091203689575, "learning_rate": 3.107142857142857e-07, "loss": 0.7358066439628601, "step": 88 }, { "epoch": 0.0970873786407767, "grad_norm": 7.0923991203308105, "learning_rate": 3.1785714285714284e-07, "loss": 0.7044313549995422, "step": 90 }, { "epoch": 0.09924487594390508, "grad_norm": 1.4156235456466675, "learning_rate": 3.25e-07, "loss": 0.7540506720542908, "step": 92 }, { "epoch": 0.10140237324703344, "grad_norm": 2.159705877304077, "learning_rate": 3.3214285714285713e-07, "loss": 0.8926774859428406, "step": 94 }, { "epoch": 0.10355987055016182, "grad_norm": 1.109196662902832, "learning_rate": 3.392857142857143e-07, "loss": 0.8637357950210571, "step": 96 }, { "epoch": 0.10571736785329018, "grad_norm": 2.337041139602661, "learning_rate": 3.464285714285714e-07, "loss": 0.748981237411499, "step": 98 }, { "epoch": 0.10787486515641856, "grad_norm": 11.489009857177734, "learning_rate": 3.535714285714286e-07, "loss": 0.6166712641716003, "step": 100 }, { "epoch": 0.11003236245954692, "grad_norm": 2.5563786029815674, "learning_rate": 3.607142857142857e-07, "loss": 0.8990151286125183, "step": 102 }, { "epoch": 0.1121898597626753, "grad_norm": 11.782515525817871, "learning_rate": 3.678571428571429e-07, "loss": 0.9428755044937134, "step": 104 }, { "epoch": 0.11434735706580366, "grad_norm": 4.793514251708984, "learning_rate": 3.75e-07, "loss": 0.515690803527832, "step": 106 }, { "epoch": 0.11650485436893204, "grad_norm": 10.346397399902344, "learning_rate": 3.821428571428571e-07, "loss": 1.0367025136947632, "step": 108 }, { "epoch": 0.1186623516720604, "grad_norm": 1.8307247161865234, "learning_rate": 3.8928571428571425e-07, "loss": 0.682608962059021, "step": 110 }, { "epoch": 0.12081984897518878, "grad_norm": 2.618833541870117, "learning_rate": 3.9642857142857137e-07, "loss": 0.5873494148254395, "step": 112 }, { "epoch": 0.12297734627831715, "grad_norm": 4.1070427894592285, "learning_rate": 4.0357142857142854e-07, "loss": 0.5457082390785217, "step": 114 }, { "epoch": 0.12513484358144553, "grad_norm": 3.505392074584961, "learning_rate": 4.1071428571428566e-07, "loss": 0.7192925214767456, "step": 116 }, { "epoch": 0.1272923408845739, "grad_norm": 4.676717758178711, "learning_rate": 4.1785714285714283e-07, "loss": 0.6860368847846985, "step": 118 }, { "epoch": 0.12944983818770225, "grad_norm": 1.218853235244751, "learning_rate": 4.2499999999999995e-07, "loss": 0.5903947353363037, "step": 120 }, { "epoch": 0.13160733549083065, "grad_norm": 1.845142126083374, "learning_rate": 4.3214285714285713e-07, "loss": 0.7133704423904419, "step": 122 }, { "epoch": 0.133764832793959, "grad_norm": 6.926656246185303, "learning_rate": 4.3928571428571425e-07, "loss": 0.7799294590950012, "step": 124 }, { "epoch": 0.13592233009708737, "grad_norm": 0.8671985268592834, "learning_rate": 4.464285714285714e-07, "loss": 0.9356327056884766, "step": 126 }, { "epoch": 0.13807982740021574, "grad_norm": 2.000596761703491, "learning_rate": 4.5357142857142854e-07, "loss": 0.65338534116745, "step": 128 }, { "epoch": 0.14023732470334413, "grad_norm": 2.7190310955047607, "learning_rate": 4.6071428571428566e-07, "loss": 0.3992256820201874, "step": 130 }, { "epoch": 0.1423948220064725, "grad_norm": 4.049299240112305, "learning_rate": 4.6785714285714283e-07, "loss": 0.68809974193573, "step": 132 }, { "epoch": 0.14455231930960086, "grad_norm": 3.6315855979919434, "learning_rate": 4.7499999999999995e-07, "loss": 0.8035519123077393, "step": 134 }, { "epoch": 0.14670981661272922, "grad_norm": 1.8729430437088013, "learning_rate": 4.821428571428571e-07, "loss": 0.9223624467849731, "step": 136 }, { "epoch": 0.1488673139158576, "grad_norm": 2.9594123363494873, "learning_rate": 4.892857142857142e-07, "loss": 0.7450867891311646, "step": 138 }, { "epoch": 0.15102481121898598, "grad_norm": 1.094379186630249, "learning_rate": 4.964285714285715e-07, "loss": 0.521569550037384, "step": 140 }, { "epoch": 0.15318230852211434, "grad_norm": 3.2890093326568604, "learning_rate": 4.999998408101351e-07, "loss": 0.7194236516952515, "step": 142 }, { "epoch": 0.1553398058252427, "grad_norm": 4.202434062957764, "learning_rate": 4.999985672925673e-07, "loss": 0.6269642114639282, "step": 144 }, { "epoch": 0.1574973031283711, "grad_norm": 9.459905624389648, "learning_rate": 4.999960202646399e-07, "loss": 0.798893392086029, "step": 146 }, { "epoch": 0.15965480043149946, "grad_norm": 1.1723222732543945, "learning_rate": 4.999921997407693e-07, "loss": 0.827728271484375, "step": 148 }, { "epoch": 0.16181229773462782, "grad_norm": 2.3163399696350098, "learning_rate": 4.999871057425801e-07, "loss": 0.6281718015670776, "step": 150 }, { "epoch": 0.16396979503775622, "grad_norm": 1.8294389247894287, "learning_rate": 4.999807382989047e-07, "loss": 0.4277426302433014, "step": 152 }, { "epoch": 0.16612729234088458, "grad_norm": 7.350142955780029, "learning_rate": 4.999730974457832e-07, "loss": 0.9866698980331421, "step": 154 }, { "epoch": 0.16828478964401294, "grad_norm": 13.259355545043945, "learning_rate": 4.999641832264634e-07, "loss": 0.8262766599655151, "step": 156 }, { "epoch": 0.1704422869471413, "grad_norm": 1.965742588043213, "learning_rate": 4.999539956914009e-07, "loss": 0.6971994042396545, "step": 158 }, { "epoch": 0.1725997842502697, "grad_norm": 5.429571628570557, "learning_rate": 4.999425348982576e-07, "loss": 0.8861981630325317, "step": 160 }, { "epoch": 0.17475728155339806, "grad_norm": 2.0696070194244385, "learning_rate": 4.999298009119028e-07, "loss": 0.7111194133758545, "step": 162 }, { "epoch": 0.17691477885652643, "grad_norm": 1.6481966972351074, "learning_rate": 4.999157938044117e-07, "loss": 0.5676076412200928, "step": 164 }, { "epoch": 0.1790722761596548, "grad_norm": 1.6638914346694946, "learning_rate": 4.999005136550658e-07, "loss": 0.6949500441551208, "step": 166 }, { "epoch": 0.18122977346278318, "grad_norm": 1.638273000717163, "learning_rate": 4.998839605503519e-07, "loss": 0.5900384783744812, "step": 168 }, { "epoch": 0.18338727076591155, "grad_norm": 4.502748012542725, "learning_rate": 4.998661345839621e-07, "loss": 0.6821661591529846, "step": 170 }, { "epoch": 0.1855447680690399, "grad_norm": 7.763872146606445, "learning_rate": 4.998470358567927e-07, "loss": 0.942156195640564, "step": 172 }, { "epoch": 0.18770226537216828, "grad_norm": 2.503589630126953, "learning_rate": 4.998266644769442e-07, "loss": 0.7288610935211182, "step": 174 }, { "epoch": 0.18985976267529667, "grad_norm": 1.9182608127593994, "learning_rate": 4.998050205597199e-07, "loss": 0.6357927918434143, "step": 176 }, { "epoch": 0.19201725997842503, "grad_norm": 6.610482692718506, "learning_rate": 4.997821042276267e-07, "loss": 0.6117627024650574, "step": 178 }, { "epoch": 0.1941747572815534, "grad_norm": 1.5938338041305542, "learning_rate": 4.997579156103726e-07, "loss": 0.5153307914733887, "step": 180 }, { "epoch": 0.19633225458468176, "grad_norm": 3.5181422233581543, "learning_rate": 4.99732454844867e-07, "loss": 0.8765067458152771, "step": 182 }, { "epoch": 0.19848975188781015, "grad_norm": 2.738868236541748, "learning_rate": 4.997057220752203e-07, "loss": 0.6484391689300537, "step": 184 }, { "epoch": 0.20064724919093851, "grad_norm": 4.510643005371094, "learning_rate": 4.996777174527419e-07, "loss": 0.7230756878852844, "step": 186 }, { "epoch": 0.20280474649406688, "grad_norm": 8.01621150970459, "learning_rate": 4.996484411359404e-07, "loss": 0.5043923854827881, "step": 188 }, { "epoch": 0.20496224379719524, "grad_norm": 3.4308345317840576, "learning_rate": 4.996178932905221e-07, "loss": 0.43938198685646057, "step": 190 }, { "epoch": 0.20711974110032363, "grad_norm": 1.9731006622314453, "learning_rate": 4.995860740893904e-07, "loss": 0.5857755541801453, "step": 192 }, { "epoch": 0.209277238403452, "grad_norm": 5.68419075012207, "learning_rate": 4.995529837126445e-07, "loss": 0.6594390273094177, "step": 194 }, { "epoch": 0.21143473570658036, "grad_norm": 2.10448956489563, "learning_rate": 4.995186223475785e-07, "loss": 0.5797517895698547, "step": 196 }, { "epoch": 0.21359223300970873, "grad_norm": 3.166001081466675, "learning_rate": 4.99482990188681e-07, "loss": 0.6418941617012024, "step": 198 }, { "epoch": 0.21574973031283712, "grad_norm": 4.582778453826904, "learning_rate": 4.994460874376325e-07, "loss": 0.6962154507637024, "step": 200 }, { "epoch": 0.21790722761596548, "grad_norm": 2.809333086013794, "learning_rate": 4.994079143033057e-07, "loss": 0.5948184132575989, "step": 202 }, { "epoch": 0.22006472491909385, "grad_norm": 1.9424008131027222, "learning_rate": 4.993684710017639e-07, "loss": 0.6439061164855957, "step": 204 }, { "epoch": 0.2222222222222222, "grad_norm": 1.9433950185775757, "learning_rate": 4.993277577562591e-07, "loss": 0.6355498433113098, "step": 206 }, { "epoch": 0.2243797195253506, "grad_norm": 1.5145349502563477, "learning_rate": 4.992857747972318e-07, "loss": 0.5916112661361694, "step": 208 }, { "epoch": 0.22653721682847897, "grad_norm": 4.159152507781982, "learning_rate": 4.99242522362309e-07, "loss": 0.6694331169128418, "step": 210 }, { "epoch": 0.22869471413160733, "grad_norm": 2.050935983657837, "learning_rate": 4.991980006963029e-07, "loss": 0.5539838075637817, "step": 212 }, { "epoch": 0.2308522114347357, "grad_norm": 3.0738911628723145, "learning_rate": 4.9915221005121e-07, "loss": 0.5762456655502319, "step": 214 }, { "epoch": 0.23300970873786409, "grad_norm": 5.252814292907715, "learning_rate": 4.991051506862089e-07, "loss": 0.593986988067627, "step": 216 }, { "epoch": 0.23516720604099245, "grad_norm": 4.718217372894287, "learning_rate": 4.990568228676597e-07, "loss": 0.6690990328788757, "step": 218 }, { "epoch": 0.2373247033441208, "grad_norm": 3.038825035095215, "learning_rate": 4.990072268691015e-07, "loss": 0.703670859336853, "step": 220 }, { "epoch": 0.23948220064724918, "grad_norm": 2.9468464851379395, "learning_rate": 4.98956362971252e-07, "loss": 0.7173536419868469, "step": 222 }, { "epoch": 0.24163969795037757, "grad_norm": 2.068927526473999, "learning_rate": 4.989042314620048e-07, "loss": 0.658054769039154, "step": 224 }, { "epoch": 0.24379719525350593, "grad_norm": 6.555632591247559, "learning_rate": 4.988508326364288e-07, "loss": 0.6826736927032471, "step": 226 }, { "epoch": 0.2459546925566343, "grad_norm": 16.916873931884766, "learning_rate": 4.987961667967655e-07, "loss": 0.5197000503540039, "step": 228 }, { "epoch": 0.2481121898597627, "grad_norm": 1.6320703029632568, "learning_rate": 4.987402342524282e-07, "loss": 0.6915658712387085, "step": 230 }, { "epoch": 0.25026968716289105, "grad_norm": 8.044790267944336, "learning_rate": 4.986830353199997e-07, "loss": 0.9126973748207092, "step": 232 }, { "epoch": 0.2524271844660194, "grad_norm": 6.615198135375977, "learning_rate": 4.986245703232305e-07, "loss": 0.8132709264755249, "step": 234 }, { "epoch": 0.2545846817691478, "grad_norm": 10.388949394226074, "learning_rate": 4.985648395930373e-07, "loss": 0.7186510562896729, "step": 236 }, { "epoch": 0.25674217907227614, "grad_norm": 1.9609507322311401, "learning_rate": 4.985038434675011e-07, "loss": 0.6574066877365112, "step": 238 }, { "epoch": 0.2588996763754045, "grad_norm": 1.8315008878707886, "learning_rate": 4.984415822918648e-07, "loss": 0.7553728818893433, "step": 240 }, { "epoch": 0.26105717367853293, "grad_norm": 1.6982944011688232, "learning_rate": 4.983780564185318e-07, "loss": 0.6477434635162354, "step": 242 }, { "epoch": 0.2632146709816613, "grad_norm": 3.038011312484741, "learning_rate": 4.983132662070639e-07, "loss": 0.6487295031547546, "step": 244 }, { "epoch": 0.26537216828478966, "grad_norm": 2.443270683288574, "learning_rate": 4.982472120241788e-07, "loss": 0.6732483506202698, "step": 246 }, { "epoch": 0.267529665587918, "grad_norm": 23.088842391967773, "learning_rate": 4.981798942437488e-07, "loss": 0.8093491792678833, "step": 248 }, { "epoch": 0.2696871628910464, "grad_norm": 1.265063762664795, "learning_rate": 4.981113132467979e-07, "loss": 0.6368775963783264, "step": 250 }, { "epoch": 0.27184466019417475, "grad_norm": 4.324744701385498, "learning_rate": 4.980414694215002e-07, "loss": 0.7633779644966125, "step": 252 }, { "epoch": 0.2740021574973031, "grad_norm": 1.693095326423645, "learning_rate": 4.979703631631776e-07, "loss": 0.4923373758792877, "step": 254 }, { "epoch": 0.2761596548004315, "grad_norm": 1.543394923210144, "learning_rate": 4.978979948742973e-07, "loss": 0.580363392829895, "step": 256 }, { "epoch": 0.2783171521035599, "grad_norm": 4.408818244934082, "learning_rate": 4.978243649644698e-07, "loss": 0.45743411779403687, "step": 258 }, { "epoch": 0.28047464940668826, "grad_norm": 1.3202674388885498, "learning_rate": 4.977494738504462e-07, "loss": 0.7671762108802795, "step": 260 }, { "epoch": 0.2826321467098166, "grad_norm": 1.4401205778121948, "learning_rate": 4.976733219561166e-07, "loss": 0.5078914761543274, "step": 262 }, { "epoch": 0.284789644012945, "grad_norm": 2.1078848838806152, "learning_rate": 4.97595909712507e-07, "loss": 0.6809296607971191, "step": 264 }, { "epoch": 0.28694714131607335, "grad_norm": 1.4545615911483765, "learning_rate": 4.975172375577768e-07, "loss": 0.5965730547904968, "step": 266 }, { "epoch": 0.2891046386192017, "grad_norm": 3.4635396003723145, "learning_rate": 4.974373059372171e-07, "loss": 0.6723196506500244, "step": 268 }, { "epoch": 0.2912621359223301, "grad_norm": 1.4718010425567627, "learning_rate": 4.973561153032472e-07, "loss": 0.5375315546989441, "step": 270 }, { "epoch": 0.29341963322545844, "grad_norm": 2.598886489868164, "learning_rate": 4.972736661154131e-07, "loss": 0.6744803786277771, "step": 272 }, { "epoch": 0.29557713052858686, "grad_norm": 2.894680976867676, "learning_rate": 4.971899588403836e-07, "loss": 0.6589304804801941, "step": 274 }, { "epoch": 0.2977346278317152, "grad_norm": 6.139466762542725, "learning_rate": 4.97104993951949e-07, "loss": 0.6724509000778198, "step": 276 }, { "epoch": 0.2998921251348436, "grad_norm": 4.252213954925537, "learning_rate": 4.970187719310173e-07, "loss": 0.7082564830780029, "step": 278 }, { "epoch": 0.30204962243797195, "grad_norm": 4.135262489318848, "learning_rate": 4.969312932656125e-07, "loss": 0.6953362822532654, "step": 280 }, { "epoch": 0.3042071197411003, "grad_norm": 6.377979755401611, "learning_rate": 4.968425584508709e-07, "loss": 0.6467074155807495, "step": 282 }, { "epoch": 0.3063646170442287, "grad_norm": 2.111952066421509, "learning_rate": 4.967525679890388e-07, "loss": 0.6030433177947998, "step": 284 }, { "epoch": 0.30852211434735705, "grad_norm": 2.0884768962860107, "learning_rate": 4.966613223894696e-07, "loss": 0.4877060651779175, "step": 286 }, { "epoch": 0.3106796116504854, "grad_norm": 8.26274585723877, "learning_rate": 4.96568822168621e-07, "loss": 0.6577244400978088, "step": 288 }, { "epoch": 0.31283710895361383, "grad_norm": 1.4854352474212646, "learning_rate": 4.964750678500517e-07, "loss": 0.6866559386253357, "step": 290 }, { "epoch": 0.3149946062567422, "grad_norm": 2.8770534992218018, "learning_rate": 4.963800599644189e-07, "loss": 0.6283307075500488, "step": 292 }, { "epoch": 0.31715210355987056, "grad_norm": 3.0127577781677246, "learning_rate": 4.96283799049475e-07, "loss": 0.7268189191818237, "step": 294 }, { "epoch": 0.3193096008629989, "grad_norm": 1.608444333076477, "learning_rate": 4.961862856500647e-07, "loss": 0.4435058832168579, "step": 296 }, { "epoch": 0.3214670981661273, "grad_norm": 4.365738391876221, "learning_rate": 4.960875203181219e-07, "loss": 0.6397342085838318, "step": 298 }, { "epoch": 0.32362459546925565, "grad_norm": 17.852121353149414, "learning_rate": 4.959875036126664e-07, "loss": 0.7305557131767273, "step": 300 }, { "epoch": 0.325782092772384, "grad_norm": 5.299609661102295, "learning_rate": 4.958862360998011e-07, "loss": 0.45774808526039124, "step": 302 }, { "epoch": 0.32793959007551243, "grad_norm": 3.3113415241241455, "learning_rate": 4.957837183527081e-07, "loss": 0.577039897441864, "step": 304 }, { "epoch": 0.3300970873786408, "grad_norm": 1.8541628122329712, "learning_rate": 4.956799509516467e-07, "loss": 0.6706950664520264, "step": 306 }, { "epoch": 0.33225458468176916, "grad_norm": 2.7589690685272217, "learning_rate": 4.955749344839487e-07, "loss": 0.6480457186698914, "step": 308 }, { "epoch": 0.3344120819848975, "grad_norm": 2.4146876335144043, "learning_rate": 4.954686695440159e-07, "loss": 0.7360544204711914, "step": 310 }, { "epoch": 0.3365695792880259, "grad_norm": 2.073716878890991, "learning_rate": 4.953611567333166e-07, "loss": 0.6692315340042114, "step": 312 }, { "epoch": 0.33872707659115425, "grad_norm": 5.253190994262695, "learning_rate": 4.952523966603822e-07, "loss": 0.6974945068359375, "step": 314 }, { "epoch": 0.3408845738942826, "grad_norm": 2.9199769496917725, "learning_rate": 4.951423899408035e-07, "loss": 0.7332634329795837, "step": 316 }, { "epoch": 0.343042071197411, "grad_norm": 4.3365254402160645, "learning_rate": 4.950311371972277e-07, "loss": 0.7718333005905151, "step": 318 }, { "epoch": 0.3451995685005394, "grad_norm": 3.805959701538086, "learning_rate": 4.949186390593544e-07, "loss": 0.5337668657302856, "step": 320 }, { "epoch": 0.34735706580366776, "grad_norm": 0.888100266456604, "learning_rate": 4.948048961639323e-07, "loss": 0.40571683645248413, "step": 322 }, { "epoch": 0.34951456310679613, "grad_norm": 1.4906032085418701, "learning_rate": 4.946899091547556e-07, "loss": 0.5962201356887817, "step": 324 }, { "epoch": 0.3516720604099245, "grad_norm": 13.393712997436523, "learning_rate": 4.945736786826601e-07, "loss": 0.7609850168228149, "step": 326 }, { "epoch": 0.35382955771305286, "grad_norm": 6.983129024505615, "learning_rate": 4.944562054055198e-07, "loss": 0.6627951860427856, "step": 328 }, { "epoch": 0.3559870550161812, "grad_norm": 1.3096721172332764, "learning_rate": 4.943374899882432e-07, "loss": 0.6522207856178284, "step": 330 }, { "epoch": 0.3581445523193096, "grad_norm": 4.269274711608887, "learning_rate": 4.942175331027693e-07, "loss": 0.4938734173774719, "step": 332 }, { "epoch": 0.36030204962243795, "grad_norm": 5.267324924468994, "learning_rate": 4.940963354280638e-07, "loss": 0.6275608539581299, "step": 334 }, { "epoch": 0.36245954692556637, "grad_norm": 5.786050796508789, "learning_rate": 4.939738976501156e-07, "loss": 0.49768128991127014, "step": 336 }, { "epoch": 0.36461704422869473, "grad_norm": 2.491793155670166, "learning_rate": 4.938502204619325e-07, "loss": 0.5501651763916016, "step": 338 }, { "epoch": 0.3667745415318231, "grad_norm": 33.490753173828125, "learning_rate": 4.937253045635375e-07, "loss": 0.6647762656211853, "step": 340 }, { "epoch": 0.36893203883495146, "grad_norm": 2.3751609325408936, "learning_rate": 4.93599150661965e-07, "loss": 0.6672598719596863, "step": 342 }, { "epoch": 0.3710895361380798, "grad_norm": 2.2334680557250977, "learning_rate": 4.934717594712564e-07, "loss": 0.5913785099983215, "step": 344 }, { "epoch": 0.3732470334412082, "grad_norm": 2.6074905395507812, "learning_rate": 4.933431317124562e-07, "loss": 0.6229710578918457, "step": 346 }, { "epoch": 0.37540453074433655, "grad_norm": 13.647238731384277, "learning_rate": 4.932132681136079e-07, "loss": 0.7039205431938171, "step": 348 }, { "epoch": 0.3775620280474649, "grad_norm": 1.9593160152435303, "learning_rate": 4.930821694097507e-07, "loss": 0.6190288662910461, "step": 350 }, { "epoch": 0.37971952535059333, "grad_norm": 5.780766487121582, "learning_rate": 4.929498363429135e-07, "loss": 0.6188508868217468, "step": 352 }, { "epoch": 0.3818770226537217, "grad_norm": 13.814835548400879, "learning_rate": 4.928162696621125e-07, "loss": 0.6810972094535828, "step": 354 }, { "epoch": 0.38403451995685006, "grad_norm": 2.1199731826782227, "learning_rate": 4.926814701233461e-07, "loss": 0.5854727029800415, "step": 356 }, { "epoch": 0.3861920172599784, "grad_norm": 6.030026435852051, "learning_rate": 4.925454384895906e-07, "loss": 0.5390771627426147, "step": 358 }, { "epoch": 0.3883495145631068, "grad_norm": 1.6937010288238525, "learning_rate": 4.924081755307964e-07, "loss": 0.6610192060470581, "step": 360 }, { "epoch": 0.39050701186623515, "grad_norm": 2.114851236343384, "learning_rate": 4.922696820238831e-07, "loss": 0.6132209300994873, "step": 362 }, { "epoch": 0.3926645091693635, "grad_norm": 5.5387749671936035, "learning_rate": 4.921299587527352e-07, "loss": 0.5963850021362305, "step": 364 }, { "epoch": 0.3948220064724919, "grad_norm": 6.011134147644043, "learning_rate": 4.919890065081979e-07, "loss": 0.7333153486251831, "step": 366 }, { "epoch": 0.3969795037756203, "grad_norm": 6.8634209632873535, "learning_rate": 4.918468260880726e-07, "loss": 0.7082594633102417, "step": 368 }, { "epoch": 0.39913700107874867, "grad_norm": 0.9249289631843567, "learning_rate": 4.917034182971122e-07, "loss": 0.6958880424499512, "step": 370 }, { "epoch": 0.40129449838187703, "grad_norm": 2.1919021606445312, "learning_rate": 4.915587839470163e-07, "loss": 0.6318715214729309, "step": 372 }, { "epoch": 0.4034519956850054, "grad_norm": 1.5956162214279175, "learning_rate": 4.914129238564272e-07, "loss": 0.6127752661705017, "step": 374 }, { "epoch": 0.40560949298813376, "grad_norm": 1.8250914812088013, "learning_rate": 4.912658388509253e-07, "loss": 0.5225449204444885, "step": 376 }, { "epoch": 0.4077669902912621, "grad_norm": 3.8166756629943848, "learning_rate": 4.911175297630236e-07, "loss": 0.3997286558151245, "step": 378 }, { "epoch": 0.4099244875943905, "grad_norm": 2.1530375480651855, "learning_rate": 4.909679974321636e-07, "loss": 0.5367651581764221, "step": 380 }, { "epoch": 0.4120819848975189, "grad_norm": 12.938714981079102, "learning_rate": 4.908172427047109e-07, "loss": 0.5885851383209229, "step": 382 }, { "epoch": 0.41423948220064727, "grad_norm": 2.9967093467712402, "learning_rate": 4.906652664339493e-07, "loss": 0.6215783357620239, "step": 384 }, { "epoch": 0.41639697950377563, "grad_norm": 4.113240718841553, "learning_rate": 4.905120694800772e-07, "loss": 0.6612198352813721, "step": 386 }, { "epoch": 0.418554476806904, "grad_norm": 2.52058482170105, "learning_rate": 4.903576527102018e-07, "loss": 0.5840904712677002, "step": 388 }, { "epoch": 0.42071197411003236, "grad_norm": 5.358467102050781, "learning_rate": 4.902020169983346e-07, "loss": 0.3897056579589844, "step": 390 }, { "epoch": 0.4228694714131607, "grad_norm": 9.863550186157227, "learning_rate": 4.900451632253868e-07, "loss": 0.6255682706832886, "step": 392 }, { "epoch": 0.4250269687162891, "grad_norm": 3.4259397983551025, "learning_rate": 4.898870922791634e-07, "loss": 0.6414270997047424, "step": 394 }, { "epoch": 0.42718446601941745, "grad_norm": 8.052302360534668, "learning_rate": 4.89727805054359e-07, "loss": 0.761306881904602, "step": 396 }, { "epoch": 0.42934196332254587, "grad_norm": 3.4023313522338867, "learning_rate": 4.895673024525522e-07, "loss": 0.5789651274681091, "step": 398 }, { "epoch": 0.43149946062567424, "grad_norm": 3.1743762493133545, "learning_rate": 4.894055853822012e-07, "loss": 0.5212793946266174, "step": 400 }, { "epoch": 0.4336569579288026, "grad_norm": 2.2208149433135986, "learning_rate": 4.892426547586378e-07, "loss": 0.5916732549667358, "step": 402 }, { "epoch": 0.43581445523193096, "grad_norm": 2.620041608810425, "learning_rate": 4.890785115040626e-07, "loss": 0.6305989027023315, "step": 404 }, { "epoch": 0.43797195253505933, "grad_norm": 3.6425275802612305, "learning_rate": 4.889131565475401e-07, "loss": 0.756013035774231, "step": 406 }, { "epoch": 0.4401294498381877, "grad_norm": 5.899089336395264, "learning_rate": 4.887465908249925e-07, "loss": 0.5666382908821106, "step": 408 }, { "epoch": 0.44228694714131606, "grad_norm": 2.683706283569336, "learning_rate": 4.885788152791959e-07, "loss": 0.774447500705719, "step": 410 }, { "epoch": 0.4444444444444444, "grad_norm": 2.967109203338623, "learning_rate": 4.884098308597734e-07, "loss": 0.616926908493042, "step": 412 }, { "epoch": 0.44660194174757284, "grad_norm": 1.796120047569275, "learning_rate": 4.882396385231909e-07, "loss": 0.5476500391960144, "step": 414 }, { "epoch": 0.4487594390507012, "grad_norm": 1.2322068214416504, "learning_rate": 4.880682392327509e-07, "loss": 0.6756587028503418, "step": 416 }, { "epoch": 0.45091693635382957, "grad_norm": 2.247699737548828, "learning_rate": 4.878956339585874e-07, "loss": 0.6002364754676819, "step": 418 }, { "epoch": 0.45307443365695793, "grad_norm": 1.9976741075515747, "learning_rate": 4.877218236776603e-07, "loss": 0.5875912308692932, "step": 420 }, { "epoch": 0.4552319309600863, "grad_norm": 1.5975956916809082, "learning_rate": 4.875468093737504e-07, "loss": 0.6899119019508362, "step": 422 }, { "epoch": 0.45738942826321466, "grad_norm": 1.6121068000793457, "learning_rate": 4.873705920374528e-07, "loss": 0.5816035270690918, "step": 424 }, { "epoch": 0.459546925566343, "grad_norm": 9.736807823181152, "learning_rate": 4.87193172666172e-07, "loss": 0.6405006647109985, "step": 426 }, { "epoch": 0.4617044228694714, "grad_norm": 3.0928378105163574, "learning_rate": 4.870145522641164e-07, "loss": 0.8041344881057739, "step": 428 }, { "epoch": 0.4638619201725998, "grad_norm": 1.493276596069336, "learning_rate": 4.868347318422921e-07, "loss": 0.6042853593826294, "step": 430 }, { "epoch": 0.46601941747572817, "grad_norm": 5.470495700836182, "learning_rate": 4.866537124184973e-07, "loss": 0.6030522584915161, "step": 432 }, { "epoch": 0.46817691477885653, "grad_norm": 2.748800754547119, "learning_rate": 4.864714950173171e-07, "loss": 0.449870765209198, "step": 434 }, { "epoch": 0.4703344120819849, "grad_norm": 6.604589939117432, "learning_rate": 4.862880806701166e-07, "loss": 0.7801079154014587, "step": 436 }, { "epoch": 0.47249190938511326, "grad_norm": 15.256587028503418, "learning_rate": 4.861034704150363e-07, "loss": 0.8985238075256348, "step": 438 }, { "epoch": 0.4746494066882416, "grad_norm": 2.490304946899414, "learning_rate": 4.859176652969853e-07, "loss": 0.5823092460632324, "step": 440 }, { "epoch": 0.47680690399137, "grad_norm": 1.5544885396957397, "learning_rate": 4.857306663676358e-07, "loss": 0.5551883578300476, "step": 442 }, { "epoch": 0.47896440129449835, "grad_norm": 3.0149269104003906, "learning_rate": 4.855424746854171e-07, "loss": 0.6494267582893372, "step": 444 }, { "epoch": 0.4811218985976268, "grad_norm": 6.602444171905518, "learning_rate": 4.853530913155097e-07, "loss": 0.6156390905380249, "step": 446 }, { "epoch": 0.48327939590075514, "grad_norm": 2.8398349285125732, "learning_rate": 4.851625173298389e-07, "loss": 0.6284780502319336, "step": 448 }, { "epoch": 0.4854368932038835, "grad_norm": 2.238544225692749, "learning_rate": 4.84970753807069e-07, "loss": 0.6789742112159729, "step": 450 }, { "epoch": 0.48759439050701187, "grad_norm": 5.265446186065674, "learning_rate": 4.847778018325974e-07, "loss": 0.6509313583374023, "step": 452 }, { "epoch": 0.48975188781014023, "grad_norm": 1.3713135719299316, "learning_rate": 4.845836624985484e-07, "loss": 0.5825515985488892, "step": 454 }, { "epoch": 0.4919093851132686, "grad_norm": 2.7798221111297607, "learning_rate": 4.84388336903766e-07, "loss": 0.46305614709854126, "step": 456 }, { "epoch": 0.49406688241639696, "grad_norm": 5.081538200378418, "learning_rate": 4.841918261538093e-07, "loss": 0.6566568613052368, "step": 458 }, { "epoch": 0.4962243797195254, "grad_norm": 8.522509574890137, "learning_rate": 4.839941313609456e-07, "loss": 0.7881090641021729, "step": 460 }, { "epoch": 0.49838187702265374, "grad_norm": 3.8689656257629395, "learning_rate": 4.837952536441432e-07, "loss": 0.592060387134552, "step": 462 }, { "epoch": 0.5005393743257821, "grad_norm": 3.3025739192962646, "learning_rate": 4.835951941290665e-07, "loss": 0.6704021096229553, "step": 464 }, { "epoch": 0.5026968716289104, "grad_norm": 1.400242805480957, "learning_rate": 4.833939539480689e-07, "loss": 0.7260591983795166, "step": 466 }, { "epoch": 0.5048543689320388, "grad_norm": 1.9812822341918945, "learning_rate": 4.831915342401862e-07, "loss": 0.6312894225120544, "step": 468 }, { "epoch": 0.5070118662351673, "grad_norm": 5.561008930206299, "learning_rate": 4.829879361511305e-07, "loss": 0.5982025861740112, "step": 470 }, { "epoch": 0.5091693635382956, "grad_norm": 1.6053967475891113, "learning_rate": 4.827831608332839e-07, "loss": 0.6564121246337891, "step": 472 }, { "epoch": 0.511326860841424, "grad_norm": 1.3539955615997314, "learning_rate": 4.825772094456913e-07, "loss": 0.7475908994674683, "step": 474 }, { "epoch": 0.5134843581445523, "grad_norm": 1.227378249168396, "learning_rate": 4.823700831540547e-07, "loss": 0.8186240792274475, "step": 476 }, { "epoch": 0.5156418554476807, "grad_norm": 1.6025233268737793, "learning_rate": 4.821617831307256e-07, "loss": 0.6983221769332886, "step": 478 }, { "epoch": 0.517799352750809, "grad_norm": 1.7689329385757446, "learning_rate": 4.819523105546994e-07, "loss": 0.4151468276977539, "step": 480 }, { "epoch": 0.5199568500539374, "grad_norm": 1.0149801969528198, "learning_rate": 4.817416666116082e-07, "loss": 0.6161211729049683, "step": 482 }, { "epoch": 0.5221143473570659, "grad_norm": 3.3374340534210205, "learning_rate": 4.815298524937138e-07, "loss": 0.6058178544044495, "step": 484 }, { "epoch": 0.5242718446601942, "grad_norm": 1.603325605392456, "learning_rate": 4.813168693999016e-07, "loss": 0.7110425233840942, "step": 486 }, { "epoch": 0.5264293419633226, "grad_norm": 8.539067268371582, "learning_rate": 4.811027185356733e-07, "loss": 0.6150040626525879, "step": 488 }, { "epoch": 0.5285868392664509, "grad_norm": 1.6875640153884888, "learning_rate": 4.808874011131405e-07, "loss": 0.5068952441215515, "step": 490 }, { "epoch": 0.5307443365695793, "grad_norm": 3.954596996307373, "learning_rate": 4.806709183510174e-07, "loss": 0.6284441947937012, "step": 492 }, { "epoch": 0.5329018338727076, "grad_norm": 11.474177360534668, "learning_rate": 4.804532714746142e-07, "loss": 0.7361968159675598, "step": 494 }, { "epoch": 0.535059331175836, "grad_norm": 5.204519271850586, "learning_rate": 4.8023446171583e-07, "loss": 0.6124238967895508, "step": 496 }, { "epoch": 0.5372168284789643, "grad_norm": 1.8010599613189697, "learning_rate": 4.800144903131462e-07, "loss": 0.7173029780387878, "step": 498 }, { "epoch": 0.5393743257820928, "grad_norm": 3.062251567840576, "learning_rate": 4.79793358511619e-07, "loss": 0.5613416433334351, "step": 500 }, { "epoch": 0.5415318230852212, "grad_norm": 1.9310272932052612, "learning_rate": 4.795710675628724e-07, "loss": 0.6559145450592041, "step": 502 }, { "epoch": 0.5436893203883495, "grad_norm": 2.027998924255371, "learning_rate": 4.793476187250913e-07, "loss": 0.5871425271034241, "step": 504 }, { "epoch": 0.5458468176914779, "grad_norm": 1.8883675336837769, "learning_rate": 4.791230132630148e-07, "loss": 0.6756701469421387, "step": 506 }, { "epoch": 0.5480043149946062, "grad_norm": 1.6534463167190552, "learning_rate": 4.78897252447928e-07, "loss": 0.747499942779541, "step": 508 }, { "epoch": 0.5501618122977346, "grad_norm": 1.547232747077942, "learning_rate": 4.786703375576557e-07, "loss": 0.5784983038902283, "step": 510 }, { "epoch": 0.552319309600863, "grad_norm": 2.1741976737976074, "learning_rate": 4.784422698765549e-07, "loss": 0.6561665534973145, "step": 512 }, { "epoch": 0.5544768069039914, "grad_norm": 7.314396381378174, "learning_rate": 4.782130506955072e-07, "loss": 0.6497671008110046, "step": 514 }, { "epoch": 0.5566343042071198, "grad_norm": 4.359201908111572, "learning_rate": 4.779826813119122e-07, "loss": 0.5485538840293884, "step": 516 }, { "epoch": 0.5587918015102481, "grad_norm": 1.9365326166152954, "learning_rate": 4.777511630296795e-07, "loss": 0.6849959492683411, "step": 518 }, { "epoch": 0.5609492988133765, "grad_norm": 3.751213312149048, "learning_rate": 4.775184971592214e-07, "loss": 0.6562321186065674, "step": 520 }, { "epoch": 0.5631067961165048, "grad_norm": 4.876345634460449, "learning_rate": 4.772846850174459e-07, "loss": 0.6159510016441345, "step": 522 }, { "epoch": 0.5652642934196332, "grad_norm": 1.8032896518707275, "learning_rate": 4.77049727927749e-07, "loss": 0.37065228819847107, "step": 524 }, { "epoch": 0.5674217907227616, "grad_norm": 1.5449891090393066, "learning_rate": 4.7681362722000703e-07, "loss": 0.6935975551605225, "step": 526 }, { "epoch": 0.56957928802589, "grad_norm": 1.7247692346572876, "learning_rate": 4.7657638423056947e-07, "loss": 0.5516192317008972, "step": 528 }, { "epoch": 0.5717367853290184, "grad_norm": 2.4441468715667725, "learning_rate": 4.76338000302251e-07, "loss": 0.6022404432296753, "step": 530 }, { "epoch": 0.5738942826321467, "grad_norm": 2.1737172603607178, "learning_rate": 4.760984767843242e-07, "loss": 0.6502859592437744, "step": 532 }, { "epoch": 0.5760517799352751, "grad_norm": 1.620774269104004, "learning_rate": 4.7585781503251197e-07, "loss": 0.660953938961029, "step": 534 }, { "epoch": 0.5782092772384034, "grad_norm": 2.282656669616699, "learning_rate": 4.7561601640897956e-07, "loss": 0.5781145095825195, "step": 536 }, { "epoch": 0.5803667745415318, "grad_norm": 1.9041107892990112, "learning_rate": 4.75373082282327e-07, "loss": 0.6320368647575378, "step": 538 }, { "epoch": 0.5825242718446602, "grad_norm": 5.685824871063232, "learning_rate": 4.751290140275813e-07, "loss": 0.7650933265686035, "step": 540 }, { "epoch": 0.5846817691477886, "grad_norm": 39.65275955200195, "learning_rate": 4.7488381302618887e-07, "loss": 0.6836517453193665, "step": 542 }, { "epoch": 0.5868392664509169, "grad_norm": 1.5295366048812866, "learning_rate": 4.7463748066600754e-07, "loss": 0.5143399834632874, "step": 544 }, { "epoch": 0.5889967637540453, "grad_norm": 1.5446751117706299, "learning_rate": 4.7439001834129876e-07, "loss": 0.7021965980529785, "step": 546 }, { "epoch": 0.5911542610571737, "grad_norm": 0.48094552755355835, "learning_rate": 4.7414142745271944e-07, "loss": 0.7895625829696655, "step": 548 }, { "epoch": 0.593311758360302, "grad_norm": 1.5283570289611816, "learning_rate": 4.738917094073146e-07, "loss": 0.5346397757530212, "step": 550 }, { "epoch": 0.5954692556634305, "grad_norm": 1.5133683681488037, "learning_rate": 4.7364086561850866e-07, "loss": 0.6586446762084961, "step": 552 }, { "epoch": 0.5976267529665588, "grad_norm": 1.5767534971237183, "learning_rate": 4.733888975060981e-07, "loss": 0.6797043681144714, "step": 554 }, { "epoch": 0.5997842502696872, "grad_norm": 1.8032898902893066, "learning_rate": 4.7313580649624335e-07, "loss": 0.5901877284049988, "step": 556 }, { "epoch": 0.6019417475728155, "grad_norm": 12.88221549987793, "learning_rate": 4.7288159402146e-07, "loss": 0.7293663024902344, "step": 558 }, { "epoch": 0.6040992448759439, "grad_norm": 3.4143638610839844, "learning_rate": 4.726262615206117e-07, "loss": 0.7597730159759521, "step": 560 }, { "epoch": 0.6062567421790723, "grad_norm": 1.426260232925415, "learning_rate": 4.723698104389013e-07, "loss": 0.6413673162460327, "step": 562 }, { "epoch": 0.6084142394822006, "grad_norm": 1.8875607252120972, "learning_rate": 4.72112242227863e-07, "loss": 0.705469012260437, "step": 564 }, { "epoch": 0.6105717367853291, "grad_norm": 1.8347193002700806, "learning_rate": 4.71853558345354e-07, "loss": 0.5616622567176819, "step": 566 }, { "epoch": 0.6127292340884574, "grad_norm": 1.3484468460083008, "learning_rate": 4.715937602555464e-07, "loss": 0.615619957447052, "step": 568 }, { "epoch": 0.6148867313915858, "grad_norm": 1.4341267347335815, "learning_rate": 4.7133284942891846e-07, "loss": 0.6071439981460571, "step": 570 }, { "epoch": 0.6170442286947141, "grad_norm": 7.157674312591553, "learning_rate": 4.7107082734224713e-07, "loss": 0.668420672416687, "step": 572 }, { "epoch": 0.6192017259978425, "grad_norm": 1.7236965894699097, "learning_rate": 4.7080769547859884e-07, "loss": 0.6496031284332275, "step": 574 }, { "epoch": 0.6213592233009708, "grad_norm": 3.2555484771728516, "learning_rate": 4.7054345532732155e-07, "loss": 0.5818045735359192, "step": 576 }, { "epoch": 0.6235167206040992, "grad_norm": 3.8229000568389893, "learning_rate": 4.7027810838403613e-07, "loss": 0.6198642253875732, "step": 578 }, { "epoch": 0.6256742179072277, "grad_norm": 3.169841766357422, "learning_rate": 4.700116561506282e-07, "loss": 0.666779637336731, "step": 580 }, { "epoch": 0.627831715210356, "grad_norm": 8.474517822265625, "learning_rate": 4.697441001352392e-07, "loss": 0.564462423324585, "step": 582 }, { "epoch": 0.6299892125134844, "grad_norm": 2.2855448722839355, "learning_rate": 4.6947544185225805e-07, "loss": 0.6741440296173096, "step": 584 }, { "epoch": 0.6321467098166127, "grad_norm": 3.856265068054199, "learning_rate": 4.692056828223129e-07, "loss": 0.7456379532814026, "step": 586 }, { "epoch": 0.6343042071197411, "grad_norm": 2.731037139892578, "learning_rate": 4.6893482457226174e-07, "loss": 0.5937924385070801, "step": 588 }, { "epoch": 0.6364617044228694, "grad_norm": 1.8991349935531616, "learning_rate": 4.6866286863518465e-07, "loss": 0.7059175372123718, "step": 590 }, { "epoch": 0.6386192017259978, "grad_norm": 11.739773750305176, "learning_rate": 4.6838981655037463e-07, "loss": 0.7047215104103088, "step": 592 }, { "epoch": 0.6407766990291263, "grad_norm": 12.105711936950684, "learning_rate": 4.6811566986332875e-07, "loss": 0.8416004180908203, "step": 594 }, { "epoch": 0.6429341963322546, "grad_norm": 1.6077960729599, "learning_rate": 4.678404301257398e-07, "loss": 0.6131125688552856, "step": 596 }, { "epoch": 0.645091693635383, "grad_norm": 1.12235689163208, "learning_rate": 4.6756409889548734e-07, "loss": 0.4833483397960663, "step": 598 }, { "epoch": 0.6472491909385113, "grad_norm": 3.067166328430176, "learning_rate": 4.6728667773662873e-07, "loss": 0.7458387017250061, "step": 600 }, { "epoch": 0.6494066882416397, "grad_norm": 4.32982873916626, "learning_rate": 4.6700816821939056e-07, "loss": 0.6110833883285522, "step": 602 }, { "epoch": 0.651564185544768, "grad_norm": 1.638934850692749, "learning_rate": 4.667285719201595e-07, "loss": 0.8020920753479004, "step": 604 }, { "epoch": 0.6537216828478964, "grad_norm": 4.146369457244873, "learning_rate": 4.6644789042147366e-07, "loss": 0.6712560653686523, "step": 606 }, { "epoch": 0.6558791801510249, "grad_norm": 1.4545388221740723, "learning_rate": 4.6616612531201324e-07, "loss": 0.4938512444496155, "step": 608 }, { "epoch": 0.6580366774541532, "grad_norm": 2.343878746032715, "learning_rate": 4.6588327818659195e-07, "loss": 0.6228682994842529, "step": 610 }, { "epoch": 0.6601941747572816, "grad_norm": 1.3540754318237305, "learning_rate": 4.655993506461478e-07, "loss": 0.5692847371101379, "step": 612 }, { "epoch": 0.6623516720604099, "grad_norm": 8.236356735229492, "learning_rate": 4.6531434429773384e-07, "loss": 0.7082728147506714, "step": 614 }, { "epoch": 0.6645091693635383, "grad_norm": 2.163950204849243, "learning_rate": 4.650282607545096e-07, "loss": 0.7548322081565857, "step": 616 }, { "epoch": 0.6666666666666666, "grad_norm": 1.8856990337371826, "learning_rate": 4.6474110163573114e-07, "loss": 0.6661372184753418, "step": 618 }, { "epoch": 0.668824163969795, "grad_norm": 3.379833698272705, "learning_rate": 4.644528685667428e-07, "loss": 0.6216427087783813, "step": 620 }, { "epoch": 0.6709816612729234, "grad_norm": 1.583329439163208, "learning_rate": 4.641635631789675e-07, "loss": 0.4488504230976105, "step": 622 }, { "epoch": 0.6731391585760518, "grad_norm": 1.1011863946914673, "learning_rate": 4.638731871098973e-07, "loss": 0.7307155728340149, "step": 624 }, { "epoch": 0.6752966558791802, "grad_norm": 2.518486976623535, "learning_rate": 4.635817420030847e-07, "loss": 0.605812668800354, "step": 626 }, { "epoch": 0.6774541531823085, "grad_norm": 3.132564067840576, "learning_rate": 4.6328922950813276e-07, "loss": 0.6636737585067749, "step": 628 }, { "epoch": 0.6796116504854369, "grad_norm": 2.8926432132720947, "learning_rate": 4.629956512806865e-07, "loss": 0.5622783899307251, "step": 630 }, { "epoch": 0.6817691477885652, "grad_norm": 0.8876265287399292, "learning_rate": 4.6270100898242257e-07, "loss": 0.7432127594947815, "step": 632 }, { "epoch": 0.6839266450916937, "grad_norm": 2.402222156524658, "learning_rate": 4.6240530428104064e-07, "loss": 0.495096892118454, "step": 634 }, { "epoch": 0.686084142394822, "grad_norm": 1.8342965841293335, "learning_rate": 4.6210853885025357e-07, "loss": 0.41085928678512573, "step": 636 }, { "epoch": 0.6882416396979504, "grad_norm": 6.494576930999756, "learning_rate": 4.6181071436977803e-07, "loss": 0.7552844285964966, "step": 638 }, { "epoch": 0.6903991370010788, "grad_norm": 1.355686902999878, "learning_rate": 4.615118325253251e-07, "loss": 0.7046399116516113, "step": 640 }, { "epoch": 0.6925566343042071, "grad_norm": 1.4658502340316772, "learning_rate": 4.612118950085905e-07, "loss": 0.6357789039611816, "step": 642 }, { "epoch": 0.6947141316073355, "grad_norm": 10.526252746582031, "learning_rate": 4.6091090351724523e-07, "loss": 0.8061111569404602, "step": 644 }, { "epoch": 0.6968716289104638, "grad_norm": 5.159208297729492, "learning_rate": 4.606088597549258e-07, "loss": 0.6721555590629578, "step": 646 }, { "epoch": 0.6990291262135923, "grad_norm": 13.965704917907715, "learning_rate": 4.603057654312247e-07, "loss": 0.5817508697509766, "step": 648 }, { "epoch": 0.7011866235167206, "grad_norm": 2.7752487659454346, "learning_rate": 4.600016222616807e-07, "loss": 0.6185034513473511, "step": 650 }, { "epoch": 0.703344120819849, "grad_norm": 1.5174920558929443, "learning_rate": 4.5969643196776907e-07, "loss": 0.6131242513656616, "step": 652 }, { "epoch": 0.7055016181229773, "grad_norm": 3.6596693992614746, "learning_rate": 4.5939019627689196e-07, "loss": 0.6017345190048218, "step": 654 }, { "epoch": 0.7076591154261057, "grad_norm": 1.590834379196167, "learning_rate": 4.590829169223686e-07, "loss": 0.6057524681091309, "step": 656 }, { "epoch": 0.7098166127292341, "grad_norm": 3.4560601711273193, "learning_rate": 4.587745956434252e-07, "loss": 0.48293402791023254, "step": 658 }, { "epoch": 0.7119741100323624, "grad_norm": 3.8374826908111572, "learning_rate": 4.584652341851855e-07, "loss": 0.6879529356956482, "step": 660 }, { "epoch": 0.7141316073354909, "grad_norm": 1.8356127738952637, "learning_rate": 4.581548342986609e-07, "loss": 0.6557474136352539, "step": 662 }, { "epoch": 0.7162891046386192, "grad_norm": 2.937380313873291, "learning_rate": 4.578433977407401e-07, "loss": 0.5540982484817505, "step": 664 }, { "epoch": 0.7184466019417476, "grad_norm": 9.279194831848145, "learning_rate": 4.5753092627417966e-07, "loss": 0.6095687747001648, "step": 666 }, { "epoch": 0.7206040992448759, "grad_norm": 1.7384564876556396, "learning_rate": 4.572174216675938e-07, "loss": 0.5323720574378967, "step": 668 }, { "epoch": 0.7227615965480043, "grad_norm": 3.279447555541992, "learning_rate": 4.5690288569544423e-07, "loss": 0.6184368133544922, "step": 670 }, { "epoch": 0.7249190938511327, "grad_norm": 6.155360698699951, "learning_rate": 4.5658732013803027e-07, "loss": 0.6536476612091064, "step": 672 }, { "epoch": 0.727076591154261, "grad_norm": 1.8201708793640137, "learning_rate": 4.5627072678147904e-07, "loss": 0.5651783347129822, "step": 674 }, { "epoch": 0.7292340884573895, "grad_norm": 3.5887646675109863, "learning_rate": 4.559531074177349e-07, "loss": 0.5361768007278442, "step": 676 }, { "epoch": 0.7313915857605178, "grad_norm": 9.772974967956543, "learning_rate": 4.5563446384454945e-07, "loss": 0.5051864981651306, "step": 678 }, { "epoch": 0.7335490830636462, "grad_norm": 1.5057132244110107, "learning_rate": 4.553147978654715e-07, "loss": 0.6263077855110168, "step": 680 }, { "epoch": 0.7357065803667745, "grad_norm": 1.7822685241699219, "learning_rate": 4.5499411128983674e-07, "loss": 0.6016006469726562, "step": 682 }, { "epoch": 0.7378640776699029, "grad_norm": 10.83908462524414, "learning_rate": 4.546724059327575e-07, "loss": 0.5432024002075195, "step": 684 }, { "epoch": 0.7400215749730313, "grad_norm": 2.1380414962768555, "learning_rate": 4.5434968361511263e-07, "loss": 0.7576265931129456, "step": 686 }, { "epoch": 0.7421790722761596, "grad_norm": 1.9547456502914429, "learning_rate": 4.5402594616353676e-07, "loss": 0.5621410012245178, "step": 688 }, { "epoch": 0.7443365695792881, "grad_norm": 1.8721739053726196, "learning_rate": 4.537011954104105e-07, "loss": 0.5246554613113403, "step": 690 }, { "epoch": 0.7464940668824164, "grad_norm": 1.6793891191482544, "learning_rate": 4.533754331938498e-07, "loss": 0.6016390323638916, "step": 692 }, { "epoch": 0.7486515641855448, "grad_norm": 1.9931007623672485, "learning_rate": 4.530486613576954e-07, "loss": 0.6583068370819092, "step": 694 }, { "epoch": 0.7508090614886731, "grad_norm": 4.051970481872559, "learning_rate": 4.5272088175150305e-07, "loss": 0.5990911722183228, "step": 696 }, { "epoch": 0.7529665587918015, "grad_norm": 1.6209897994995117, "learning_rate": 4.523920962305319e-07, "loss": 0.6055378913879395, "step": 698 }, { "epoch": 0.7551240560949298, "grad_norm": 1.4128165245056152, "learning_rate": 4.520623066557351e-07, "loss": 0.7489557862281799, "step": 700 }, { "epoch": 0.7572815533980582, "grad_norm": 2.651704788208008, "learning_rate": 4.5173151489374874e-07, "loss": 0.5671336054801941, "step": 702 }, { "epoch": 0.7594390507011867, "grad_norm": 2.9760892391204834, "learning_rate": 4.5139972281688125e-07, "loss": 0.5748198628425598, "step": 704 }, { "epoch": 0.761596548004315, "grad_norm": 2.070570230484009, "learning_rate": 4.510669323031032e-07, "loss": 0.6631715297698975, "step": 706 }, { "epoch": 0.7637540453074434, "grad_norm": 2.8771958351135254, "learning_rate": 4.50733145236036e-07, "loss": 0.538175106048584, "step": 708 }, { "epoch": 0.7659115426105717, "grad_norm": 1.689902901649475, "learning_rate": 4.50398363504942e-07, "loss": 0.5201085209846497, "step": 710 }, { "epoch": 0.7680690399137001, "grad_norm": 3.293177366256714, "learning_rate": 4.500625890047133e-07, "loss": 0.825022280216217, "step": 712 }, { "epoch": 0.7702265372168284, "grad_norm": 0.5633196234703064, "learning_rate": 4.49725823635861e-07, "loss": 0.6611315011978149, "step": 714 }, { "epoch": 0.7723840345199569, "grad_norm": 1.3877530097961426, "learning_rate": 4.4938806930450476e-07, "loss": 0.5903546810150146, "step": 716 }, { "epoch": 0.7745415318230853, "grad_norm": 2.2322981357574463, "learning_rate": 4.4904932792236187e-07, "loss": 0.4113418459892273, "step": 718 }, { "epoch": 0.7766990291262136, "grad_norm": 1.2284919023513794, "learning_rate": 4.487096014067363e-07, "loss": 0.5899285078048706, "step": 720 }, { "epoch": 0.778856526429342, "grad_norm": 1.843123197555542, "learning_rate": 4.483688916805081e-07, "loss": 0.5846186280250549, "step": 722 }, { "epoch": 0.7810140237324703, "grad_norm": 3.0655667781829834, "learning_rate": 4.4802720067212237e-07, "loss": 0.6830175518989563, "step": 724 }, { "epoch": 0.7831715210355987, "grad_norm": 2.1866564750671387, "learning_rate": 4.4768453031557797e-07, "loss": 0.6690636873245239, "step": 726 }, { "epoch": 0.785329018338727, "grad_norm": 8.055279731750488, "learning_rate": 4.4734088255041747e-07, "loss": 0.6548261642456055, "step": 728 }, { "epoch": 0.7874865156418555, "grad_norm": 1.804400086402893, "learning_rate": 4.4699625932171534e-07, "loss": 0.8144820928573608, "step": 730 }, { "epoch": 0.7896440129449838, "grad_norm": 1.7731058597564697, "learning_rate": 4.466506625800674e-07, "loss": 0.6711707711219788, "step": 732 }, { "epoch": 0.7918015102481122, "grad_norm": 4.458347320556641, "learning_rate": 4.463040942815796e-07, "loss": 0.6960354447364807, "step": 734 }, { "epoch": 0.7939590075512406, "grad_norm": 1.831221103668213, "learning_rate": 4.459565563878568e-07, "loss": 0.6320536732673645, "step": 736 }, { "epoch": 0.7961165048543689, "grad_norm": 1.1004321575164795, "learning_rate": 4.456080508659922e-07, "loss": 0.6839619874954224, "step": 738 }, { "epoch": 0.7982740021574973, "grad_norm": 3.9790191650390625, "learning_rate": 4.452585796885555e-07, "loss": 0.787294328212738, "step": 740 }, { "epoch": 0.8004314994606256, "grad_norm": 2.5182931423187256, "learning_rate": 4.449081448335824e-07, "loss": 0.5662147402763367, "step": 742 }, { "epoch": 0.8025889967637541, "grad_norm": 1.3335113525390625, "learning_rate": 4.4455674828456285e-07, "loss": 0.4203084111213684, "step": 744 }, { "epoch": 0.8047464940668824, "grad_norm": 3.6229031085968018, "learning_rate": 4.442043920304302e-07, "loss": 0.3892061412334442, "step": 746 }, { "epoch": 0.8069039913700108, "grad_norm": 2.473930835723877, "learning_rate": 4.4385107806554964e-07, "loss": 0.6021113395690918, "step": 748 }, { "epoch": 0.8090614886731392, "grad_norm": 5.694954872131348, "learning_rate": 4.4349680838970745e-07, "loss": 0.586351752281189, "step": 750 }, { "epoch": 0.8112189859762675, "grad_norm": 1.9584993124008179, "learning_rate": 4.431415850080989e-07, "loss": 0.5266885757446289, "step": 752 }, { "epoch": 0.8133764832793959, "grad_norm": 2.9414353370666504, "learning_rate": 4.427854099313175e-07, "loss": 0.7066282033920288, "step": 754 }, { "epoch": 0.8155339805825242, "grad_norm": 2.3507425785064697, "learning_rate": 4.424282851753435e-07, "loss": 0.6412563323974609, "step": 756 }, { "epoch": 0.8176914778856527, "grad_norm": 1.6683588027954102, "learning_rate": 4.420702127615323e-07, "loss": 0.6926671862602234, "step": 758 }, { "epoch": 0.819848975188781, "grad_norm": 2.730984687805176, "learning_rate": 4.4171119471660315e-07, "loss": 0.506192684173584, "step": 760 }, { "epoch": 0.8220064724919094, "grad_norm": 1.8006408214569092, "learning_rate": 4.413512330726276e-07, "loss": 0.5694448351860046, "step": 762 }, { "epoch": 0.8241639697950378, "grad_norm": 1.66466224193573, "learning_rate": 4.4099032986701817e-07, "loss": 0.8421421051025391, "step": 764 }, { "epoch": 0.8263214670981661, "grad_norm": 2.7753241062164307, "learning_rate": 4.406284871425166e-07, "loss": 0.5470436215400696, "step": 766 }, { "epoch": 0.8284789644012945, "grad_norm": 2.583517551422119, "learning_rate": 4.402657069471824e-07, "loss": 0.5224552154541016, "step": 768 }, { "epoch": 0.8306364617044228, "grad_norm": 3.6843671798706055, "learning_rate": 4.3990199133438133e-07, "loss": 0.5659505724906921, "step": 770 }, { "epoch": 0.8327939590075513, "grad_norm": 2.2777364253997803, "learning_rate": 4.395373423627735e-07, "loss": 0.5481325387954712, "step": 772 }, { "epoch": 0.8349514563106796, "grad_norm": 12.834761619567871, "learning_rate": 4.3917176209630216e-07, "loss": 0.7331764101982117, "step": 774 }, { "epoch": 0.837108953613808, "grad_norm": 4.730658531188965, "learning_rate": 4.3880525260418143e-07, "loss": 0.6668429970741272, "step": 776 }, { "epoch": 0.8392664509169363, "grad_norm": 8.263324737548828, "learning_rate": 4.3843781596088526e-07, "loss": 0.6778562068939209, "step": 778 }, { "epoch": 0.8414239482200647, "grad_norm": 1.9440997838974, "learning_rate": 4.380694542461352e-07, "loss": 0.6994505524635315, "step": 780 }, { "epoch": 0.8435814455231931, "grad_norm": 0.9290769100189209, "learning_rate": 4.3770016954488887e-07, "loss": 0.5277756452560425, "step": 782 }, { "epoch": 0.8457389428263214, "grad_norm": 1.2959551811218262, "learning_rate": 4.373299639473277e-07, "loss": 0.5180898904800415, "step": 784 }, { "epoch": 0.8478964401294499, "grad_norm": 2.0248026847839355, "learning_rate": 4.3695883954884616e-07, "loss": 0.5872831344604492, "step": 786 }, { "epoch": 0.8500539374325782, "grad_norm": 1.9588837623596191, "learning_rate": 4.365867984500385e-07, "loss": 0.7239266633987427, "step": 788 }, { "epoch": 0.8522114347357066, "grad_norm": 1.0221301317214966, "learning_rate": 4.3621384275668796e-07, "loss": 0.38601911067962646, "step": 790 }, { "epoch": 0.8543689320388349, "grad_norm": 1.3124701976776123, "learning_rate": 4.3583997457975454e-07, "loss": 0.5240159630775452, "step": 792 }, { "epoch": 0.8565264293419633, "grad_norm": 7.634000778198242, "learning_rate": 4.354651960353625e-07, "loss": 0.3657144010066986, "step": 794 }, { "epoch": 0.8586839266450917, "grad_norm": 1.649867057800293, "learning_rate": 4.3508950924478943e-07, "loss": 0.5847235321998596, "step": 796 }, { "epoch": 0.86084142394822, "grad_norm": 2.088364601135254, "learning_rate": 4.3471291633445334e-07, "loss": 0.6124690771102905, "step": 798 }, { "epoch": 0.8629989212513485, "grad_norm": 3.3762848377227783, "learning_rate": 4.343354194359009e-07, "loss": 0.5077890753746033, "step": 800 }, { "epoch": 0.8651564185544768, "grad_norm": 1.6368306875228882, "learning_rate": 4.339570206857957e-07, "loss": 0.4499396085739136, "step": 802 }, { "epoch": 0.8673139158576052, "grad_norm": 1.6739598512649536, "learning_rate": 4.335777222259056e-07, "loss": 0.4595860242843628, "step": 804 }, { "epoch": 0.8694714131607335, "grad_norm": 2.5155189037323, "learning_rate": 4.331975262030911e-07, "loss": 0.5732553005218506, "step": 806 }, { "epoch": 0.8716289104638619, "grad_norm": 1.8127964735031128, "learning_rate": 4.3281643476929286e-07, "loss": 0.6196991801261902, "step": 808 }, { "epoch": 0.8737864077669902, "grad_norm": 3.939059257507324, "learning_rate": 4.324344500815197e-07, "loss": 0.5942954421043396, "step": 810 }, { "epoch": 0.8759439050701187, "grad_norm": 4.195562362670898, "learning_rate": 4.3205157430183627e-07, "loss": 0.38596200942993164, "step": 812 }, { "epoch": 0.8781014023732471, "grad_norm": 1.3970879316329956, "learning_rate": 4.316678095973509e-07, "loss": 0.613640308380127, "step": 814 }, { "epoch": 0.8802588996763754, "grad_norm": 1.3632667064666748, "learning_rate": 4.312831581402034e-07, "loss": 0.653675377368927, "step": 816 }, { "epoch": 0.8824163969795038, "grad_norm": 3.1686818599700928, "learning_rate": 4.3089762210755246e-07, "loss": 0.5764310956001282, "step": 818 }, { "epoch": 0.8845738942826321, "grad_norm": 3.4244537353515625, "learning_rate": 4.305112036815639e-07, "loss": 0.5124704241752625, "step": 820 }, { "epoch": 0.8867313915857605, "grad_norm": 1.641554832458496, "learning_rate": 4.3012390504939745e-07, "loss": 0.6814495921134949, "step": 822 }, { "epoch": 0.8888888888888888, "grad_norm": 1.3232611417770386, "learning_rate": 4.2973572840319536e-07, "loss": 0.6368851661682129, "step": 824 }, { "epoch": 0.8910463861920173, "grad_norm": 2.085556983947754, "learning_rate": 4.2934667594006917e-07, "loss": 0.6687034368515015, "step": 826 }, { "epoch": 0.8932038834951457, "grad_norm": 2.6139376163482666, "learning_rate": 4.2895674986208786e-07, "loss": 0.2821641266345978, "step": 828 }, { "epoch": 0.895361380798274, "grad_norm": 1.9903957843780518, "learning_rate": 4.28565952376265e-07, "loss": 0.5252734422683716, "step": 830 }, { "epoch": 0.8975188781014024, "grad_norm": 2.297610282897949, "learning_rate": 4.281742856945465e-07, "loss": 0.745256781578064, "step": 832 }, { "epoch": 0.8996763754045307, "grad_norm": 6.354626655578613, "learning_rate": 4.277817520337978e-07, "loss": 0.5809981226921082, "step": 834 }, { "epoch": 0.9018338727076591, "grad_norm": 2.735447883605957, "learning_rate": 4.273883536157917e-07, "loss": 0.6928726434707642, "step": 836 }, { "epoch": 0.9039913700107874, "grad_norm": 1.2760671377182007, "learning_rate": 4.269940926671957e-07, "loss": 0.6470978856086731, "step": 838 }, { "epoch": 0.9061488673139159, "grad_norm": 9.878061294555664, "learning_rate": 4.2659897141955876e-07, "loss": 0.6638087630271912, "step": 840 }, { "epoch": 0.9083063646170443, "grad_norm": 6.520370960235596, "learning_rate": 4.262029921092999e-07, "loss": 0.6910456418991089, "step": 842 }, { "epoch": 0.9104638619201726, "grad_norm": 6.460091590881348, "learning_rate": 4.258061569776944e-07, "loss": 0.8283501267433167, "step": 844 }, { "epoch": 0.912621359223301, "grad_norm": 5.205661296844482, "learning_rate": 4.254084682708617e-07, "loss": 0.8275612592697144, "step": 846 }, { "epoch": 0.9147788565264293, "grad_norm": 1.463245153427124, "learning_rate": 4.250099282397526e-07, "loss": 0.46582770347595215, "step": 848 }, { "epoch": 0.9169363538295577, "grad_norm": 5.333581924438477, "learning_rate": 4.246105391401362e-07, "loss": 0.7306879758834839, "step": 850 }, { "epoch": 0.919093851132686, "grad_norm": 5.498586654663086, "learning_rate": 4.2421030323258773e-07, "loss": 0.4175741672515869, "step": 852 }, { "epoch": 0.9212513484358145, "grad_norm": 1.0770583152770996, "learning_rate": 4.2380922278247524e-07, "loss": 0.6730119585990906, "step": 854 }, { "epoch": 0.9234088457389428, "grad_norm": 0.42357996106147766, "learning_rate": 4.234073000599469e-07, "loss": 0.8127405643463135, "step": 856 }, { "epoch": 0.9255663430420712, "grad_norm": 7.6792120933532715, "learning_rate": 4.230045373399185e-07, "loss": 0.576317310333252, "step": 858 }, { "epoch": 0.9277238403451996, "grad_norm": 5.140259265899658, "learning_rate": 4.2260093690206007e-07, "loss": 0.523854672908783, "step": 860 }, { "epoch": 0.9298813376483279, "grad_norm": 3.8896889686584473, "learning_rate": 4.221965010307831e-07, "loss": 0.8840075731277466, "step": 862 }, { "epoch": 0.9320388349514563, "grad_norm": 1.3811416625976562, "learning_rate": 4.2179123201522784e-07, "loss": 0.7325385212898254, "step": 864 }, { "epoch": 0.9341963322545846, "grad_norm": 1.7766026258468628, "learning_rate": 4.213851321492503e-07, "loss": 0.30791109800338745, "step": 866 }, { "epoch": 0.9363538295577131, "grad_norm": 1.1082113981246948, "learning_rate": 4.209782037314089e-07, "loss": 0.6958837509155273, "step": 868 }, { "epoch": 0.9385113268608414, "grad_norm": 7.657420635223389, "learning_rate": 4.2057044906495197e-07, "loss": 0.42088598012924194, "step": 870 }, { "epoch": 0.9406688241639698, "grad_norm": 9.207883834838867, "learning_rate": 4.2016187045780445e-07, "loss": 0.6101109385490417, "step": 872 }, { "epoch": 0.9428263214670982, "grad_norm": 1.3625860214233398, "learning_rate": 4.197524702225547e-07, "loss": 0.6703606247901917, "step": 874 }, { "epoch": 0.9449838187702265, "grad_norm": 4.89463996887207, "learning_rate": 4.1934225067644163e-07, "loss": 0.5207024216651917, "step": 876 }, { "epoch": 0.9471413160733549, "grad_norm": 5.676528453826904, "learning_rate": 4.1893121414134165e-07, "loss": 0.67097008228302, "step": 878 }, { "epoch": 0.9492988133764833, "grad_norm": 3.8142192363739014, "learning_rate": 4.1851936294375525e-07, "loss": 0.8798677325248718, "step": 880 }, { "epoch": 0.9514563106796117, "grad_norm": 3.3056960105895996, "learning_rate": 4.181066994147939e-07, "loss": 0.5886131525039673, "step": 882 }, { "epoch": 0.95361380798274, "grad_norm": 1.9534400701522827, "learning_rate": 4.176932258901673e-07, "loss": 0.553901731967926, "step": 884 }, { "epoch": 0.9557713052858684, "grad_norm": 1.5135817527770996, "learning_rate": 4.1727894471016933e-07, "loss": 0.35753270983695984, "step": 886 }, { "epoch": 0.9579288025889967, "grad_norm": 4.861209869384766, "learning_rate": 4.168638582196654e-07, "loss": 0.8348190188407898, "step": 888 }, { "epoch": 0.9600862998921251, "grad_norm": 2.2612881660461426, "learning_rate": 4.164479687680794e-07, "loss": 0.8166549205780029, "step": 890 }, { "epoch": 0.9622437971952535, "grad_norm": 1.4900096654891968, "learning_rate": 4.160312787093796e-07, "loss": 0.47946974635124207, "step": 892 }, { "epoch": 0.9644012944983819, "grad_norm": 2.3414793014526367, "learning_rate": 4.156137904020659e-07, "loss": 0.5899794697761536, "step": 894 }, { "epoch": 0.9665587918015103, "grad_norm": 1.6720659732818604, "learning_rate": 4.1519550620915643e-07, "loss": 0.6609233617782593, "step": 896 }, { "epoch": 0.9687162891046386, "grad_norm": 1.4975440502166748, "learning_rate": 4.1477642849817414e-07, "loss": 0.6876581907272339, "step": 898 }, { "epoch": 0.970873786407767, "grad_norm": 4.46371603012085, "learning_rate": 4.143565596411331e-07, "loss": 0.6830723881721497, "step": 900 }, { "epoch": 0.9730312837108953, "grad_norm": 1.2813857793807983, "learning_rate": 4.139359020145257e-07, "loss": 0.5846165418624878, "step": 902 }, { "epoch": 0.9751887810140237, "grad_norm": 3.6278672218322754, "learning_rate": 4.1351445799930837e-07, "loss": 0.7689055800437927, "step": 904 }, { "epoch": 0.9773462783171522, "grad_norm": 6.095682621002197, "learning_rate": 4.1309222998088923e-07, "loss": 0.5822760462760925, "step": 906 }, { "epoch": 0.9795037756202805, "grad_norm": 1.989863634109497, "learning_rate": 4.126692203491132e-07, "loss": 0.6141869425773621, "step": 908 }, { "epoch": 0.9816612729234089, "grad_norm": 1.743506908416748, "learning_rate": 4.1224543149824945e-07, "loss": 0.6478677988052368, "step": 910 }, { "epoch": 0.9838187702265372, "grad_norm": 1.9053099155426025, "learning_rate": 4.11820865826978e-07, "loss": 0.48178091645240784, "step": 912 }, { "epoch": 0.9859762675296656, "grad_norm": 2.2662453651428223, "learning_rate": 4.1139552573837515e-07, "loss": 0.607950747013092, "step": 914 }, { "epoch": 0.9881337648327939, "grad_norm": 2.4114274978637695, "learning_rate": 4.109694136399008e-07, "loss": 0.6108921766281128, "step": 916 }, { "epoch": 0.9902912621359223, "grad_norm": 7.512186050415039, "learning_rate": 4.105425319433844e-07, "loss": 0.6795108318328857, "step": 918 }, { "epoch": 0.9924487594390508, "grad_norm": 1.7533015012741089, "learning_rate": 4.1011488306501136e-07, "loss": 0.437101811170578, "step": 920 }, { "epoch": 0.9946062567421791, "grad_norm": 2.403139352798462, "learning_rate": 4.096864694253095e-07, "loss": 0.4864053428173065, "step": 922 }, { "epoch": 0.9967637540453075, "grad_norm": 1.6964081525802612, "learning_rate": 4.0925729344913507e-07, "loss": 0.643713116645813, "step": 924 }, { "epoch": 0.9989212513484358, "grad_norm": 1.4134613275527954, "learning_rate": 4.088273575656594e-07, "loss": 0.6958010196685791, "step": 926 }, { "epoch": 1.0010787486515642, "grad_norm": 1.5019845962524414, "learning_rate": 4.083966642083549e-07, "loss": 0.491763174533844, "step": 928 }, { "epoch": 1.0032362459546926, "grad_norm": 1.6836707592010498, "learning_rate": 4.079652158149813e-07, "loss": 0.6048216819763184, "step": 930 }, { "epoch": 1.0053937432578208, "grad_norm": 1.5291476249694824, "learning_rate": 4.075330148275719e-07, "loss": 0.6078330278396606, "step": 932 }, { "epoch": 1.0075512405609492, "grad_norm": 1.7825332880020142, "learning_rate": 4.0710006369241984e-07, "loss": 0.6125837564468384, "step": 934 }, { "epoch": 1.0097087378640777, "grad_norm": 2.508885622024536, "learning_rate": 4.06666364860064e-07, "loss": 0.42874282598495483, "step": 936 }, { "epoch": 1.011866235167206, "grad_norm": 3.95334529876709, "learning_rate": 4.062319207852754e-07, "loss": 0.7187290191650391, "step": 938 }, { "epoch": 1.0140237324703345, "grad_norm": 1.270442247390747, "learning_rate": 4.0579673392704315e-07, "loss": 0.5802730321884155, "step": 940 }, { "epoch": 1.0161812297734627, "grad_norm": 1.6048680543899536, "learning_rate": 4.0536080674856064e-07, "loss": 0.5463104248046875, "step": 942 }, { "epoch": 1.0183387270765911, "grad_norm": 4.189075469970703, "learning_rate": 4.0492414171721137e-07, "loss": 0.4786475896835327, "step": 944 }, { "epoch": 1.0204962243797195, "grad_norm": 7.608390808105469, "learning_rate": 4.044867413045554e-07, "loss": 0.6445322632789612, "step": 946 }, { "epoch": 1.022653721682848, "grad_norm": 3.295640230178833, "learning_rate": 4.0404860798631497e-07, "loss": 0.5995020866394043, "step": 948 }, { "epoch": 1.0248112189859762, "grad_norm": 3.5857110023498535, "learning_rate": 4.0360974424236045e-07, "loss": 0.5838800072669983, "step": 950 }, { "epoch": 1.0269687162891046, "grad_norm": 2.5072519779205322, "learning_rate": 4.031701525566968e-07, "loss": 0.550299882888794, "step": 952 }, { "epoch": 1.029126213592233, "grad_norm": 2.477893352508545, "learning_rate": 4.0272983541744906e-07, "loss": 0.48044463992118835, "step": 954 }, { "epoch": 1.0312837108953614, "grad_norm": 2.064870595932007, "learning_rate": 4.0228879531684825e-07, "loss": 0.607839822769165, "step": 956 }, { "epoch": 1.0334412081984898, "grad_norm": 1.39699125289917, "learning_rate": 4.018470347512177e-07, "loss": 0.6562771797180176, "step": 958 }, { "epoch": 1.035598705501618, "grad_norm": 4.258260250091553, "learning_rate": 4.0140455622095833e-07, "loss": 0.6375301480293274, "step": 960 }, { "epoch": 1.0377562028047465, "grad_norm": 2.1129860877990723, "learning_rate": 4.0096136223053503e-07, "loss": 0.5195407271385193, "step": 962 }, { "epoch": 1.0399137001078749, "grad_norm": 4.592264652252197, "learning_rate": 4.005174552884621e-07, "loss": 0.47522222995758057, "step": 964 }, { "epoch": 1.0420711974110033, "grad_norm": 1.5298984050750732, "learning_rate": 4.0007283790728937e-07, "loss": 0.45716097950935364, "step": 966 }, { "epoch": 1.0442286947141317, "grad_norm": 2.2780728340148926, "learning_rate": 3.996275126035877e-07, "loss": 0.6386222243309021, "step": 968 }, { "epoch": 1.04638619201726, "grad_norm": 39.363914489746094, "learning_rate": 3.9918148189793473e-07, "loss": 0.6134737133979797, "step": 970 }, { "epoch": 1.0485436893203883, "grad_norm": 1.4374394416809082, "learning_rate": 3.98734748314901e-07, "loss": 0.49869057536125183, "step": 972 }, { "epoch": 1.0507011866235167, "grad_norm": 2.8738322257995605, "learning_rate": 3.9828731438303513e-07, "loss": 0.4262846112251282, "step": 974 }, { "epoch": 1.0528586839266452, "grad_norm": 4.88612174987793, "learning_rate": 3.978391826348501e-07, "loss": 0.44011008739471436, "step": 976 }, { "epoch": 1.0550161812297734, "grad_norm": 1.591728925704956, "learning_rate": 3.973903556068082e-07, "loss": 0.579682469367981, "step": 978 }, { "epoch": 1.0571736785329018, "grad_norm": 2.144808769226074, "learning_rate": 3.9694083583930734e-07, "loss": 0.6116613149642944, "step": 980 }, { "epoch": 1.0593311758360302, "grad_norm": 4.2694525718688965, "learning_rate": 3.964906258766663e-07, "loss": 0.5095481872558594, "step": 982 }, { "epoch": 1.0614886731391586, "grad_norm": 1.4594628810882568, "learning_rate": 3.960397282671104e-07, "loss": 0.49267393350601196, "step": 984 }, { "epoch": 1.063646170442287, "grad_norm": 2.070873737335205, "learning_rate": 3.9558814556275705e-07, "loss": 0.5884362459182739, "step": 986 }, { "epoch": 1.0658036677454152, "grad_norm": 1.6621878147125244, "learning_rate": 3.9513588031960164e-07, "loss": 0.5078074336051941, "step": 988 }, { "epoch": 1.0679611650485437, "grad_norm": 1.82585608959198, "learning_rate": 3.946829350975024e-07, "loss": 0.5152098536491394, "step": 990 }, { "epoch": 1.070118662351672, "grad_norm": 1.609985589981079, "learning_rate": 3.942293124601664e-07, "loss": 0.5379571318626404, "step": 992 }, { "epoch": 1.0722761596548005, "grad_norm": 3.880840301513672, "learning_rate": 3.937750149751353e-07, "loss": 0.53695148229599, "step": 994 }, { "epoch": 1.074433656957929, "grad_norm": 1.5093027353286743, "learning_rate": 3.9332004521376976e-07, "loss": 0.6305853128433228, "step": 996 }, { "epoch": 1.0765911542610571, "grad_norm": 10.337991714477539, "learning_rate": 3.9286440575123625e-07, "loss": 0.46823710203170776, "step": 998 }, { "epoch": 1.0787486515641855, "grad_norm": 6.093511581420898, "learning_rate": 3.9240809916649146e-07, "loss": 0.7002033591270447, "step": 1000 }, { "epoch": 1.080906148867314, "grad_norm": 1.6978514194488525, "learning_rate": 3.919511280422681e-07, "loss": 0.4546293318271637, "step": 1002 }, { "epoch": 1.0830636461704424, "grad_norm": 1.1726937294006348, "learning_rate": 3.914934949650603e-07, "loss": 0.6637395620346069, "step": 1004 }, { "epoch": 1.0852211434735706, "grad_norm": 1.5652375221252441, "learning_rate": 3.910352025251087e-07, "loss": 0.5811644196510315, "step": 1006 }, { "epoch": 1.087378640776699, "grad_norm": 1.6414304971694946, "learning_rate": 3.905762533163863e-07, "loss": 0.595992386341095, "step": 1008 }, { "epoch": 1.0895361380798274, "grad_norm": 2.038053512573242, "learning_rate": 3.9011664993658315e-07, "loss": 0.5396429896354675, "step": 1010 }, { "epoch": 1.0916936353829558, "grad_norm": 1.9649733304977417, "learning_rate": 3.8965639498709213e-07, "loss": 0.5478025674819946, "step": 1012 }, { "epoch": 1.0938511326860842, "grad_norm": 3.463732957839966, "learning_rate": 3.891954910729942e-07, "loss": 0.49627092480659485, "step": 1014 }, { "epoch": 1.0960086299892124, "grad_norm": 3.943431854248047, "learning_rate": 3.8873394080304304e-07, "loss": 0.6524069905281067, "step": 1016 }, { "epoch": 1.0981661272923409, "grad_norm": 3.5497028827667236, "learning_rate": 3.8827174678965144e-07, "loss": 0.5223618149757385, "step": 1018 }, { "epoch": 1.1003236245954693, "grad_norm": 7.74370002746582, "learning_rate": 3.878089116488752e-07, "loss": 0.5389726161956787, "step": 1020 }, { "epoch": 1.1024811218985977, "grad_norm": 2.1320722103118896, "learning_rate": 3.873454380003992e-07, "loss": 0.5913807153701782, "step": 1022 }, { "epoch": 1.104638619201726, "grad_norm": 2.8024096488952637, "learning_rate": 3.8688132846752246e-07, "loss": 0.5501705408096313, "step": 1024 }, { "epoch": 1.1067961165048543, "grad_norm": 1.3087513446807861, "learning_rate": 3.864165856771429e-07, "loss": 0.5886018872261047, "step": 1026 }, { "epoch": 1.1089536138079827, "grad_norm": 1.2707762718200684, "learning_rate": 3.85951212259743e-07, "loss": 0.5950880646705627, "step": 1028 }, { "epoch": 1.1111111111111112, "grad_norm": 5.31006383895874, "learning_rate": 3.8548521084937434e-07, "loss": 0.4451335668563843, "step": 1030 }, { "epoch": 1.1132686084142396, "grad_norm": 3.055915117263794, "learning_rate": 3.8501858408364333e-07, "loss": 0.47768980264663696, "step": 1032 }, { "epoch": 1.1154261057173678, "grad_norm": 1.2441359758377075, "learning_rate": 3.845513346036957e-07, "loss": 0.6385471820831299, "step": 1034 }, { "epoch": 1.1175836030204962, "grad_norm": 6.279306411743164, "learning_rate": 3.840834650542018e-07, "loss": 0.45993420481681824, "step": 1036 }, { "epoch": 1.1197411003236246, "grad_norm": 1.52280592918396, "learning_rate": 3.836149780833418e-07, "loss": 0.5168780088424683, "step": 1038 }, { "epoch": 1.121898597626753, "grad_norm": 2.0793957710266113, "learning_rate": 3.8314587634279027e-07, "loss": 0.5576176643371582, "step": 1040 }, { "epoch": 1.1240560949298812, "grad_norm": 1.4773210287094116, "learning_rate": 3.8267616248770165e-07, "loss": 0.5373958945274353, "step": 1042 }, { "epoch": 1.1262135922330097, "grad_norm": 2.390502691268921, "learning_rate": 3.8220583917669486e-07, "loss": 0.5401830673217773, "step": 1044 }, { "epoch": 1.128371089536138, "grad_norm": 1.2080022096633911, "learning_rate": 3.8173490907183854e-07, "loss": 0.5357274413108826, "step": 1046 }, { "epoch": 1.1305285868392665, "grad_norm": 5.848990440368652, "learning_rate": 3.8126337483863565e-07, "loss": 0.5787194967269897, "step": 1048 }, { "epoch": 1.132686084142395, "grad_norm": 2.1143975257873535, "learning_rate": 3.8079123914600874e-07, "loss": 0.5402184724807739, "step": 1050 }, { "epoch": 1.134843581445523, "grad_norm": 1.5472838878631592, "learning_rate": 3.8031850466628446e-07, "loss": 0.5939875245094299, "step": 1052 }, { "epoch": 1.1370010787486515, "grad_norm": 1.5804020166397095, "learning_rate": 3.798451740751789e-07, "loss": 0.5679658651351929, "step": 1054 }, { "epoch": 1.13915857605178, "grad_norm": 1.2764010429382324, "learning_rate": 3.79371250051782e-07, "loss": 0.5673144459724426, "step": 1056 }, { "epoch": 1.1413160733549084, "grad_norm": 2.0156118869781494, "learning_rate": 3.788967352785426e-07, "loss": 0.6328020095825195, "step": 1058 }, { "epoch": 1.1434735706580366, "grad_norm": 9.465262413024902, "learning_rate": 3.7842163244125336e-07, "loss": 0.4857198894023895, "step": 1060 }, { "epoch": 1.145631067961165, "grad_norm": 1.7701698541641235, "learning_rate": 3.7794594422903524e-07, "loss": 0.2504948377609253, "step": 1062 }, { "epoch": 1.1477885652642934, "grad_norm": 3.641615629196167, "learning_rate": 3.7746967333432267e-07, "loss": 0.5472912192344666, "step": 1064 }, { "epoch": 1.1499460625674218, "grad_norm": 2.0676019191741943, "learning_rate": 3.769928224528479e-07, "loss": 0.6364511847496033, "step": 1066 }, { "epoch": 1.1521035598705502, "grad_norm": 4.401365756988525, "learning_rate": 3.7651539428362613e-07, "loss": 0.5439355969429016, "step": 1068 }, { "epoch": 1.1542610571736784, "grad_norm": 5.850034236907959, "learning_rate": 3.7603739152894e-07, "loss": 0.6640523076057434, "step": 1070 }, { "epoch": 1.1564185544768069, "grad_norm": 1.1790587902069092, "learning_rate": 3.755588168943242e-07, "loss": 0.5868851542472839, "step": 1072 }, { "epoch": 1.1585760517799353, "grad_norm": 1.469138503074646, "learning_rate": 3.7507967308855054e-07, "loss": 0.5512825846672058, "step": 1074 }, { "epoch": 1.1607335490830637, "grad_norm": 1.2130903005599976, "learning_rate": 3.7459996282361243e-07, "loss": 0.5239831805229187, "step": 1076 }, { "epoch": 1.162891046386192, "grad_norm": 7.653203964233398, "learning_rate": 3.741196888147091e-07, "loss": 0.6439744234085083, "step": 1078 }, { "epoch": 1.1650485436893203, "grad_norm": 1.5135337114334106, "learning_rate": 3.7363885378023103e-07, "loss": 0.5788765549659729, "step": 1080 }, { "epoch": 1.1672060409924487, "grad_norm": 3.182453155517578, "learning_rate": 3.731574604417439e-07, "loss": 0.5617559552192688, "step": 1082 }, { "epoch": 1.1693635382955772, "grad_norm": 2.3747951984405518, "learning_rate": 3.7267551152397357e-07, "loss": 0.5370228886604309, "step": 1084 }, { "epoch": 1.1715210355987056, "grad_norm": 1.1393845081329346, "learning_rate": 3.721930097547905e-07, "loss": 0.6155055165290833, "step": 1086 }, { "epoch": 1.173678532901834, "grad_norm": 2.894418954849243, "learning_rate": 3.717099578651941e-07, "loss": 0.6374943256378174, "step": 1088 }, { "epoch": 1.1758360302049622, "grad_norm": 2.416472911834717, "learning_rate": 3.71226358589298e-07, "loss": 0.6947451829910278, "step": 1090 }, { "epoch": 1.1779935275080906, "grad_norm": 1.3670376539230347, "learning_rate": 3.7074221466431373e-07, "loss": 0.5406089425086975, "step": 1092 }, { "epoch": 1.180151024811219, "grad_norm": 1.2521384954452515, "learning_rate": 3.702575288305355e-07, "loss": 0.5395722389221191, "step": 1094 }, { "epoch": 1.1823085221143474, "grad_norm": 1.9795541763305664, "learning_rate": 3.697723038313251e-07, "loss": 0.29258376359939575, "step": 1096 }, { "epoch": 1.1844660194174756, "grad_norm": 1.5469352006912231, "learning_rate": 3.692865424130957e-07, "loss": 0.28986942768096924, "step": 1098 }, { "epoch": 1.186623516720604, "grad_norm": 2.116645574569702, "learning_rate": 3.6880024732529693e-07, "loss": 0.4253014922142029, "step": 1100 }, { "epoch": 1.1887810140237325, "grad_norm": 1.2636767625808716, "learning_rate": 3.683134213203987e-07, "loss": 0.3352068066596985, "step": 1102 }, { "epoch": 1.190938511326861, "grad_norm": 4.438840866088867, "learning_rate": 3.6782606715387635e-07, "loss": 0.5909640192985535, "step": 1104 }, { "epoch": 1.1930960086299893, "grad_norm": 2.9154276847839355, "learning_rate": 3.673381875841945e-07, "loss": 0.6895350217819214, "step": 1106 }, { "epoch": 1.1952535059331175, "grad_norm": 1.7983450889587402, "learning_rate": 3.668497853727913e-07, "loss": 0.6385772824287415, "step": 1108 }, { "epoch": 1.197411003236246, "grad_norm": 3.447049140930176, "learning_rate": 3.6636086328406374e-07, "loss": 0.6397103071212769, "step": 1110 }, { "epoch": 1.1995685005393744, "grad_norm": 3.8200409412384033, "learning_rate": 3.6587142408535054e-07, "loss": 0.5476571321487427, "step": 1112 }, { "epoch": 1.2017259978425028, "grad_norm": 1.2859944105148315, "learning_rate": 3.6538147054691815e-07, "loss": 0.5042168498039246, "step": 1114 }, { "epoch": 1.203883495145631, "grad_norm": 1.283574104309082, "learning_rate": 3.648910054419435e-07, "loss": 0.5920245051383972, "step": 1116 }, { "epoch": 1.2060409924487594, "grad_norm": 1.5358695983886719, "learning_rate": 3.6440003154649953e-07, "loss": 0.4955591559410095, "step": 1118 }, { "epoch": 1.2081984897518878, "grad_norm": 1.0370635986328125, "learning_rate": 3.639085516395387e-07, "loss": 0.6023776531219482, "step": 1120 }, { "epoch": 1.2103559870550162, "grad_norm": 3.8796122074127197, "learning_rate": 3.6341656850287774e-07, "loss": 0.762068510055542, "step": 1122 }, { "epoch": 1.2125134843581447, "grad_norm": 3.400327444076538, "learning_rate": 3.629240849211814e-07, "loss": 0.6205697059631348, "step": 1124 }, { "epoch": 1.2146709816612729, "grad_norm": 2.327786684036255, "learning_rate": 3.6243110368194737e-07, "loss": 0.5409681797027588, "step": 1126 }, { "epoch": 1.2168284789644013, "grad_norm": 3.4878270626068115, "learning_rate": 3.619376275754897e-07, "loss": 0.5776150226593018, "step": 1128 }, { "epoch": 1.2189859762675297, "grad_norm": 3.1736040115356445, "learning_rate": 3.614436593949239e-07, "loss": 0.6846837997436523, "step": 1130 }, { "epoch": 1.2211434735706581, "grad_norm": 2.631258964538574, "learning_rate": 3.609492019361503e-07, "loss": 0.415913850069046, "step": 1132 }, { "epoch": 1.2233009708737863, "grad_norm": 2.7962353229522705, "learning_rate": 3.604542579978387e-07, "loss": 0.4860919415950775, "step": 1134 }, { "epoch": 1.2254584681769147, "grad_norm": 2.561530113220215, "learning_rate": 3.599588303814125e-07, "loss": 0.2348194271326065, "step": 1136 }, { "epoch": 1.2276159654800431, "grad_norm": 1.6811720132827759, "learning_rate": 3.594629218910325e-07, "loss": 0.5006701946258545, "step": 1138 }, { "epoch": 1.2297734627831716, "grad_norm": 4.786057472229004, "learning_rate": 3.589665353335817e-07, "loss": 0.7145098447799683, "step": 1140 }, { "epoch": 1.2319309600863, "grad_norm": 1.3728108406066895, "learning_rate": 3.584696735186486e-07, "loss": 0.7182168960571289, "step": 1142 }, { "epoch": 1.2340884573894282, "grad_norm": 2.7093677520751953, "learning_rate": 3.579723392585119e-07, "loss": 0.5651712417602539, "step": 1144 }, { "epoch": 1.2362459546925566, "grad_norm": 1.986265778541565, "learning_rate": 3.574745353681243e-07, "loss": 0.5348817110061646, "step": 1146 }, { "epoch": 1.238403451995685, "grad_norm": 6.097311496734619, "learning_rate": 3.5697626466509663e-07, "loss": 0.577741801738739, "step": 1148 }, { "epoch": 1.2405609492988134, "grad_norm": 11.053370475769043, "learning_rate": 3.564775299696821e-07, "loss": 0.5550810098648071, "step": 1150 }, { "epoch": 1.2427184466019416, "grad_norm": 1.4628392457962036, "learning_rate": 3.5597833410476006e-07, "loss": 0.620225727558136, "step": 1152 }, { "epoch": 1.24487594390507, "grad_norm": 7.8950276374816895, "learning_rate": 3.554786798958199e-07, "loss": 0.607008159160614, "step": 1154 }, { "epoch": 1.2470334412081985, "grad_norm": 1.5595459938049316, "learning_rate": 3.549785701709456e-07, "loss": 0.4987570643424988, "step": 1156 }, { "epoch": 1.249190938511327, "grad_norm": 1.6523284912109375, "learning_rate": 3.544780077607992e-07, "loss": 0.43261778354644775, "step": 1158 }, { "epoch": 1.2513484358144553, "grad_norm": 12.226542472839355, "learning_rate": 3.53976995498605e-07, "loss": 0.6562701463699341, "step": 1160 }, { "epoch": 1.2535059331175837, "grad_norm": 1.1661919355392456, "learning_rate": 3.534755362201336e-07, "loss": 0.501526951789856, "step": 1162 }, { "epoch": 1.255663430420712, "grad_norm": 2.927499532699585, "learning_rate": 3.529736327636856e-07, "loss": 0.5619246959686279, "step": 1164 }, { "epoch": 1.2578209277238404, "grad_norm": 2.0249011516571045, "learning_rate": 3.524712879700758e-07, "loss": 0.5738718509674072, "step": 1166 }, { "epoch": 1.2599784250269688, "grad_norm": 0.44852355122566223, "learning_rate": 3.5196850468261694e-07, "loss": 0.5509487390518188, "step": 1168 }, { "epoch": 1.262135922330097, "grad_norm": 2.7338967323303223, "learning_rate": 3.514652857471038e-07, "loss": 0.8337733149528503, "step": 1170 }, { "epoch": 1.2642934196332254, "grad_norm": 1.9451134204864502, "learning_rate": 3.509616340117968e-07, "loss": 0.650130033493042, "step": 1172 }, { "epoch": 1.2664509169363538, "grad_norm": 1.3618203401565552, "learning_rate": 3.50457552327406e-07, "loss": 0.5888417363166809, "step": 1174 }, { "epoch": 1.2686084142394822, "grad_norm": 3.983638048171997, "learning_rate": 3.499530435470753e-07, "loss": 0.5166311264038086, "step": 1176 }, { "epoch": 1.2707659115426106, "grad_norm": 0.49615857005119324, "learning_rate": 3.4944811052636557e-07, "loss": 0.24507802724838257, "step": 1178 }, { "epoch": 1.272923408845739, "grad_norm": 1.7828303575515747, "learning_rate": 3.4894275612323937e-07, "loss": 0.542197585105896, "step": 1180 }, { "epoch": 1.2750809061488673, "grad_norm": 3.6452059745788574, "learning_rate": 3.4843698319804406e-07, "loss": 0.4652722179889679, "step": 1182 }, { "epoch": 1.2772384034519957, "grad_norm": 2.0969831943511963, "learning_rate": 3.479307946134958e-07, "loss": 0.615992546081543, "step": 1184 }, { "epoch": 1.279395900755124, "grad_norm": 5.83058500289917, "learning_rate": 3.4742419323466364e-07, "loss": 0.6636797189712524, "step": 1186 }, { "epoch": 1.2815533980582523, "grad_norm": 1.9127455949783325, "learning_rate": 3.469171819289529e-07, "loss": 0.5533426403999329, "step": 1188 }, { "epoch": 1.2837108953613807, "grad_norm": 3.0942981243133545, "learning_rate": 3.4640976356608925e-07, "loss": 0.4518588185310364, "step": 1190 }, { "epoch": 1.2858683926645091, "grad_norm": 4.2268218994140625, "learning_rate": 3.4590194101810225e-07, "loss": 0.6738635897636414, "step": 1192 }, { "epoch": 1.2880258899676376, "grad_norm": 1.095913290977478, "learning_rate": 3.453937171593092e-07, "loss": 0.5171671509742737, "step": 1194 }, { "epoch": 1.290183387270766, "grad_norm": 1.8377701044082642, "learning_rate": 3.448850948662989e-07, "loss": 0.4299532175064087, "step": 1196 }, { "epoch": 1.2923408845738944, "grad_norm": 5.051590442657471, "learning_rate": 3.443760770179152e-07, "loss": 0.5502167344093323, "step": 1198 }, { "epoch": 1.2944983818770226, "grad_norm": 1.6523767709732056, "learning_rate": 3.438666664952409e-07, "loss": 0.6330602765083313, "step": 1200 }, { "epoch": 1.296655879180151, "grad_norm": 1.3870859146118164, "learning_rate": 3.4335686618158146e-07, "loss": 0.5067728757858276, "step": 1202 }, { "epoch": 1.2988133764832794, "grad_norm": 3.1465868949890137, "learning_rate": 3.428466789624484e-07, "loss": 0.4402804374694824, "step": 1204 }, { "epoch": 1.3009708737864076, "grad_norm": 9.775895118713379, "learning_rate": 3.4233610772554327e-07, "loss": 0.6617931127548218, "step": 1206 }, { "epoch": 1.303128371089536, "grad_norm": 6.532571792602539, "learning_rate": 3.418251553607414e-07, "loss": 0.5412026047706604, "step": 1208 }, { "epoch": 1.3052858683926645, "grad_norm": 23.66463279724121, "learning_rate": 3.4131382476007483e-07, "loss": 0.5621963739395142, "step": 1210 }, { "epoch": 1.307443365695793, "grad_norm": 4.139750957489014, "learning_rate": 3.408021188177168e-07, "loss": 0.43633171916007996, "step": 1212 }, { "epoch": 1.3096008629989213, "grad_norm": 1.5540739297866821, "learning_rate": 3.40290040429965e-07, "loss": 0.585831880569458, "step": 1214 }, { "epoch": 1.3117583603020497, "grad_norm": 0.7869778871536255, "learning_rate": 3.397775924952252e-07, "loss": 0.6663514375686646, "step": 1216 }, { "epoch": 1.313915857605178, "grad_norm": 3.8079118728637695, "learning_rate": 3.3926477791399466e-07, "loss": 0.6986393928527832, "step": 1218 }, { "epoch": 1.3160733549083063, "grad_norm": 80.64041137695312, "learning_rate": 3.3875159958884604e-07, "loss": 0.2534840703010559, "step": 1220 }, { "epoch": 1.3182308522114348, "grad_norm": 1.4813616275787354, "learning_rate": 3.382380604244108e-07, "loss": 0.5039587020874023, "step": 1222 }, { "epoch": 1.3203883495145632, "grad_norm": 1.3485782146453857, "learning_rate": 3.3772416332736267e-07, "loss": 0.49135297536849976, "step": 1224 }, { "epoch": 1.3225458468176914, "grad_norm": 2.1019444465637207, "learning_rate": 3.372099112064016e-07, "loss": 0.5730096101760864, "step": 1226 }, { "epoch": 1.3247033441208198, "grad_norm": 1.2675708532333374, "learning_rate": 3.3669530697223666e-07, "loss": 0.3039630055427551, "step": 1228 }, { "epoch": 1.3268608414239482, "grad_norm": 4.25254487991333, "learning_rate": 3.3618035353757004e-07, "loss": 0.5067458748817444, "step": 1230 }, { "epoch": 1.3290183387270766, "grad_norm": 1.8785464763641357, "learning_rate": 3.3566505381708053e-07, "loss": 0.592536449432373, "step": 1232 }, { "epoch": 1.331175836030205, "grad_norm": 3.387016773223877, "learning_rate": 3.351494107274067e-07, "loss": 0.5786437392234802, "step": 1234 }, { "epoch": 1.3333333333333333, "grad_norm": 3.627840757369995, "learning_rate": 3.3463342718713093e-07, "loss": 0.41272541880607605, "step": 1236 }, { "epoch": 1.3354908306364617, "grad_norm": 2.921630620956421, "learning_rate": 3.3411710611676245e-07, "loss": 0.5590270757675171, "step": 1238 }, { "epoch": 1.33764832793959, "grad_norm": 1.4048740863800049, "learning_rate": 3.3360045043872073e-07, "loss": 0.5370460748672485, "step": 1240 }, { "epoch": 1.3398058252427185, "grad_norm": 1.9135799407958984, "learning_rate": 3.3308346307731937e-07, "loss": 0.6795614361763, "step": 1242 }, { "epoch": 1.3419633225458467, "grad_norm": 5.181615829467773, "learning_rate": 3.325661469587493e-07, "loss": 0.43978267908096313, "step": 1244 }, { "epoch": 1.3441208198489751, "grad_norm": 2.0108721256256104, "learning_rate": 3.320485050110623e-07, "loss": 0.7196254730224609, "step": 1246 }, { "epoch": 1.3462783171521036, "grad_norm": 5.627135753631592, "learning_rate": 3.3153054016415404e-07, "loss": 0.6685677170753479, "step": 1248 }, { "epoch": 1.348435814455232, "grad_norm": 1.5753936767578125, "learning_rate": 3.3101225534974824e-07, "loss": 0.47921374440193176, "step": 1250 }, { "epoch": 1.3505933117583604, "grad_norm": 1.3769195079803467, "learning_rate": 3.304936535013796e-07, "loss": 0.5930690765380859, "step": 1252 }, { "epoch": 1.3527508090614886, "grad_norm": 3.187718391418457, "learning_rate": 3.2997473755437694e-07, "loss": 0.443682998418808, "step": 1254 }, { "epoch": 1.354908306364617, "grad_norm": 1.696946144104004, "learning_rate": 3.294555104458472e-07, "loss": 0.5157840251922607, "step": 1256 }, { "epoch": 1.3570658036677454, "grad_norm": 1.3455950021743774, "learning_rate": 3.289359751146585e-07, "loss": 0.4192175269126892, "step": 1258 }, { "epoch": 1.3592233009708738, "grad_norm": 2.213014602661133, "learning_rate": 3.2841613450142326e-07, "loss": 0.5426623821258545, "step": 1260 }, { "epoch": 1.361380798274002, "grad_norm": 1.4176040887832642, "learning_rate": 3.278959915484822e-07, "loss": 0.37126630544662476, "step": 1262 }, { "epoch": 1.3635382955771305, "grad_norm": 1.7383960485458374, "learning_rate": 3.2737554919988713e-07, "loss": 0.4560404419898987, "step": 1264 }, { "epoch": 1.3656957928802589, "grad_norm": 1.2408937215805054, "learning_rate": 3.2685481040138437e-07, "loss": 0.6066496968269348, "step": 1266 }, { "epoch": 1.3678532901833873, "grad_norm": 14.88650131225586, "learning_rate": 3.2633377810039837e-07, "loss": 0.5824995636940002, "step": 1268 }, { "epoch": 1.3700107874865157, "grad_norm": 2.481553792953491, "learning_rate": 3.2581245524601457e-07, "loss": 0.5986048579216003, "step": 1270 }, { "epoch": 1.3721682847896441, "grad_norm": 6.574549198150635, "learning_rate": 3.252908447889633e-07, "loss": 0.6264061331748962, "step": 1272 }, { "epoch": 1.3743257820927723, "grad_norm": 11.117745399475098, "learning_rate": 3.2476894968160245e-07, "loss": 0.47890105843544006, "step": 1274 }, { "epoch": 1.3764832793959008, "grad_norm": 2.9714407920837402, "learning_rate": 3.2424677287790105e-07, "loss": 0.6954044699668884, "step": 1276 }, { "epoch": 1.3786407766990292, "grad_norm": 4.487089157104492, "learning_rate": 3.237243173334229e-07, "loss": 0.3699471354484558, "step": 1278 }, { "epoch": 1.3807982740021574, "grad_norm": 3.5095317363739014, "learning_rate": 3.232015860053093e-07, "loss": 0.49049827456474304, "step": 1280 }, { "epoch": 1.3829557713052858, "grad_norm": 3.8155364990234375, "learning_rate": 3.226785818522622e-07, "loss": 0.5451414585113525, "step": 1282 }, { "epoch": 1.3851132686084142, "grad_norm": 1.868401050567627, "learning_rate": 3.221553078345282e-07, "loss": 0.4515441060066223, "step": 1284 }, { "epoch": 1.3872707659115426, "grad_norm": 7.603207111358643, "learning_rate": 3.216317669138812e-07, "loss": 0.6191388964653015, "step": 1286 }, { "epoch": 1.389428263214671, "grad_norm": 8.218348503112793, "learning_rate": 3.211079620536058e-07, "loss": 0.43059730529785156, "step": 1288 }, { "epoch": 1.3915857605177995, "grad_norm": 4.354824542999268, "learning_rate": 3.205838962184804e-07, "loss": 0.47998175024986267, "step": 1290 }, { "epoch": 1.3937432578209277, "grad_norm": 2.3470609188079834, "learning_rate": 3.2005957237476073e-07, "loss": 0.6489396095275879, "step": 1292 }, { "epoch": 1.395900755124056, "grad_norm": 5.762950897216797, "learning_rate": 3.1953499349016284e-07, "loss": 0.6003392934799194, "step": 1294 }, { "epoch": 1.3980582524271845, "grad_norm": 0.4735300838947296, "learning_rate": 3.190101625338461e-07, "loss": 0.5412855744361877, "step": 1296 }, { "epoch": 1.4002157497303127, "grad_norm": 1.6221954822540283, "learning_rate": 3.18485082476397e-07, "loss": 0.452088862657547, "step": 1298 }, { "epoch": 1.4023732470334411, "grad_norm": 5.19826078414917, "learning_rate": 3.179597562898116e-07, "loss": 0.45721590518951416, "step": 1300 }, { "epoch": 1.4045307443365695, "grad_norm": 2.296773910522461, "learning_rate": 3.1743418694747935e-07, "loss": 0.6805709600448608, "step": 1302 }, { "epoch": 1.406688241639698, "grad_norm": 1.289931058883667, "learning_rate": 3.169083774241658e-07, "loss": 0.48374688625335693, "step": 1304 }, { "epoch": 1.4088457389428264, "grad_norm": 2.8533899784088135, "learning_rate": 3.1638233069599603e-07, "loss": 0.5229544639587402, "step": 1306 }, { "epoch": 1.4110032362459548, "grad_norm": 6.371025085449219, "learning_rate": 3.158560497404377e-07, "loss": 0.5882778763771057, "step": 1308 }, { "epoch": 1.413160733549083, "grad_norm": 1.4979420900344849, "learning_rate": 3.153295375362843e-07, "loss": 0.4229152500629425, "step": 1310 }, { "epoch": 1.4153182308522114, "grad_norm": 1.8354501724243164, "learning_rate": 3.14802797063638e-07, "loss": 0.5944955945014954, "step": 1312 }, { "epoch": 1.4174757281553398, "grad_norm": 2.193164825439453, "learning_rate": 3.1427583130389324e-07, "loss": 0.5871320366859436, "step": 1314 }, { "epoch": 1.419633225458468, "grad_norm": 3.204235315322876, "learning_rate": 3.137486432397193e-07, "loss": 0.2334681898355484, "step": 1316 }, { "epoch": 1.4217907227615965, "grad_norm": 4.964504718780518, "learning_rate": 3.1322123585504395e-07, "loss": 0.5250051617622375, "step": 1318 }, { "epoch": 1.4239482200647249, "grad_norm": 2.2570719718933105, "learning_rate": 3.1269361213503643e-07, "loss": 0.5495631694793701, "step": 1320 }, { "epoch": 1.4261057173678533, "grad_norm": 1.536373496055603, "learning_rate": 3.121657750660901e-07, "loss": 0.48065459728240967, "step": 1322 }, { "epoch": 1.4282632146709817, "grad_norm": 4.908086776733398, "learning_rate": 3.116377276358063e-07, "loss": 0.41572993993759155, "step": 1324 }, { "epoch": 1.4304207119741101, "grad_norm": 8.478764533996582, "learning_rate": 3.111094728329767e-07, "loss": 0.6322842240333557, "step": 1326 }, { "epoch": 1.4325782092772383, "grad_norm": 1.3569598197937012, "learning_rate": 3.1058101364756684e-07, "loss": 0.48605573177337646, "step": 1328 }, { "epoch": 1.4347357065803668, "grad_norm": 2.1617515087127686, "learning_rate": 3.100523530706991e-07, "loss": 0.5922558307647705, "step": 1330 }, { "epoch": 1.4368932038834952, "grad_norm": 3.862966775894165, "learning_rate": 3.095234940946358e-07, "loss": 0.504891574382782, "step": 1332 }, { "epoch": 1.4390507011866236, "grad_norm": 1.3128629922866821, "learning_rate": 3.089944397127621e-07, "loss": 0.5910184383392334, "step": 1334 }, { "epoch": 1.4412081984897518, "grad_norm": 1.3905627727508545, "learning_rate": 3.0846519291956923e-07, "loss": 0.4852849543094635, "step": 1336 }, { "epoch": 1.4433656957928802, "grad_norm": 4.070714950561523, "learning_rate": 3.079357567106375e-07, "loss": 0.609307050704956, "step": 1338 }, { "epoch": 1.4455231930960086, "grad_norm": 1.5611417293548584, "learning_rate": 3.074061340826193e-07, "loss": 0.6423069834709167, "step": 1340 }, { "epoch": 1.447680690399137, "grad_norm": 4.069622993469238, "learning_rate": 3.0687632803322214e-07, "loss": 0.4610182046890259, "step": 1342 }, { "epoch": 1.4498381877022655, "grad_norm": 1.5368692874908447, "learning_rate": 3.0634634156119183e-07, "loss": 0.596781313419342, "step": 1344 }, { "epoch": 1.4519956850053937, "grad_norm": 4.0829572677612305, "learning_rate": 3.0581617766629525e-07, "loss": 0.46168115735054016, "step": 1346 }, { "epoch": 1.454153182308522, "grad_norm": 1.1894795894622803, "learning_rate": 3.052858393493036e-07, "loss": 0.3813992738723755, "step": 1348 }, { "epoch": 1.4563106796116505, "grad_norm": 7.187083721160889, "learning_rate": 3.0475532961197525e-07, "loss": 0.4869483411312103, "step": 1350 }, { "epoch": 1.458468176914779, "grad_norm": 2.1215786933898926, "learning_rate": 3.042246514570388e-07, "loss": 0.44144994020462036, "step": 1352 }, { "epoch": 1.4606256742179071, "grad_norm": 1.7207623720169067, "learning_rate": 3.036938078881764e-07, "loss": 0.5316063165664673, "step": 1354 }, { "epoch": 1.4627831715210355, "grad_norm": 1.4604369401931763, "learning_rate": 3.0316280191000595e-07, "loss": 0.5334872007369995, "step": 1356 }, { "epoch": 1.464940668824164, "grad_norm": 1.4460911750793457, "learning_rate": 3.0263163652806497e-07, "loss": 0.5650609135627747, "step": 1358 }, { "epoch": 1.4670981661272924, "grad_norm": 1.443108320236206, "learning_rate": 3.0210031474879323e-07, "loss": 0.3324916660785675, "step": 1360 }, { "epoch": 1.4692556634304208, "grad_norm": 1.309964656829834, "learning_rate": 3.015688395795154e-07, "loss": 0.6782206892967224, "step": 1362 }, { "epoch": 1.4714131607335492, "grad_norm": 1.0900267362594604, "learning_rate": 3.010372140284247e-07, "loss": 0.42707037925720215, "step": 1364 }, { "epoch": 1.4735706580366774, "grad_norm": 1.5196927785873413, "learning_rate": 3.0050544110456544e-07, "loss": 0.561892032623291, "step": 1366 }, { "epoch": 1.4757281553398058, "grad_norm": 1.3471835851669312, "learning_rate": 2.999735238178159e-07, "loss": 0.5165982842445374, "step": 1368 }, { "epoch": 1.4778856526429343, "grad_norm": 1.6917212009429932, "learning_rate": 2.9944146517887166e-07, "loss": 0.42515087127685547, "step": 1370 }, { "epoch": 1.4800431499460625, "grad_norm": 3.3652212619781494, "learning_rate": 2.989092681992283e-07, "loss": 0.5667294859886169, "step": 1372 }, { "epoch": 1.4822006472491909, "grad_norm": 6.991846561431885, "learning_rate": 2.983769358911643e-07, "loss": 0.4916223883628845, "step": 1374 }, { "epoch": 1.4843581445523193, "grad_norm": 3.88139271736145, "learning_rate": 2.9784447126772434e-07, "loss": 0.5577268600463867, "step": 1376 }, { "epoch": 1.4865156418554477, "grad_norm": 1.634870171546936, "learning_rate": 2.9731187734270173e-07, "loss": 0.5713073015213013, "step": 1378 }, { "epoch": 1.4886731391585761, "grad_norm": 2.828623056411743, "learning_rate": 2.967791571306221e-07, "loss": 0.7753892540931702, "step": 1380 }, { "epoch": 1.4908306364617046, "grad_norm": 2.1929805278778076, "learning_rate": 2.962463136467253e-07, "loss": 0.5757138729095459, "step": 1382 }, { "epoch": 1.4929881337648327, "grad_norm": 1.8372981548309326, "learning_rate": 2.9571334990694927e-07, "loss": 0.4425245523452759, "step": 1384 }, { "epoch": 1.4951456310679612, "grad_norm": 1.2296621799468994, "learning_rate": 2.951802689279126e-07, "loss": 0.6176034808158875, "step": 1386 }, { "epoch": 1.4973031283710896, "grad_norm": 4.398133277893066, "learning_rate": 2.9464707372689734e-07, "loss": 0.6378481984138489, "step": 1388 }, { "epoch": 1.4994606256742178, "grad_norm": 2.845616102218628, "learning_rate": 2.9411376732183206e-07, "loss": 0.6340577602386475, "step": 1390 }, { "epoch": 1.5016181229773462, "grad_norm": 1.8457330465316772, "learning_rate": 2.935803527312748e-07, "loss": 0.41831356287002563, "step": 1392 }, { "epoch": 1.5037756202804746, "grad_norm": 11.14624309539795, "learning_rate": 2.930468329743959e-07, "loss": 0.5453674793243408, "step": 1394 }, { "epoch": 1.505933117583603, "grad_norm": 1.1637507677078247, "learning_rate": 2.9251321107096105e-07, "loss": 0.6097413301467896, "step": 1396 }, { "epoch": 1.5080906148867315, "grad_norm": 1.4595943689346313, "learning_rate": 2.91979490041314e-07, "loss": 0.5867338180541992, "step": 1398 }, { "epoch": 1.5102481121898599, "grad_norm": 8.647173881530762, "learning_rate": 2.9144567290635956e-07, "loss": 0.6274332404136658, "step": 1400 }, { "epoch": 1.512405609492988, "grad_norm": 1.433868169784546, "learning_rate": 2.909117626875466e-07, "loss": 0.530730664730072, "step": 1402 }, { "epoch": 1.5145631067961165, "grad_norm": 2.0332090854644775, "learning_rate": 2.903777624068507e-07, "loss": 0.38328850269317627, "step": 1404 }, { "epoch": 1.516720604099245, "grad_norm": 2.187605381011963, "learning_rate": 2.8984367508675735e-07, "loss": 0.507274866104126, "step": 1406 }, { "epoch": 1.5188781014023731, "grad_norm": 2.2323250770568848, "learning_rate": 2.8930950375024444e-07, "loss": 0.47206202149391174, "step": 1408 }, { "epoch": 1.5210355987055015, "grad_norm": 3.1241204738616943, "learning_rate": 2.8877525142076584e-07, "loss": 0.45956486463546753, "step": 1410 }, { "epoch": 1.52319309600863, "grad_norm": 1.791256070137024, "learning_rate": 2.882409211222335e-07, "loss": 0.29296764731407166, "step": 1412 }, { "epoch": 1.5253505933117584, "grad_norm": 4.025422096252441, "learning_rate": 2.8770651587900075e-07, "loss": 0.471237450838089, "step": 1414 }, { "epoch": 1.5275080906148868, "grad_norm": 1.249983787536621, "learning_rate": 2.8717203871584504e-07, "loss": 0.5922088027000427, "step": 1416 }, { "epoch": 1.5296655879180152, "grad_norm": 5.017333030700684, "learning_rate": 2.866374926579512e-07, "loss": 0.4936787188053131, "step": 1418 }, { "epoch": 1.5318230852211436, "grad_norm": 1.736030101776123, "learning_rate": 2.8610288073089363e-07, "loss": 0.5232613682746887, "step": 1420 }, { "epoch": 1.5339805825242718, "grad_norm": 4.345666885375977, "learning_rate": 2.855682059606196e-07, "loss": 0.5884013772010803, "step": 1422 }, { "epoch": 1.5361380798274002, "grad_norm": 13.682802200317383, "learning_rate": 2.850334713734325e-07, "loss": 0.6141678094863892, "step": 1424 }, { "epoch": 1.5382955771305284, "grad_norm": 1.5001555681228638, "learning_rate": 2.844986799959738e-07, "loss": 0.41682037711143494, "step": 1426 }, { "epoch": 1.5404530744336569, "grad_norm": 1.6063748598098755, "learning_rate": 2.839638348552067e-07, "loss": 0.5947140455245972, "step": 1428 }, { "epoch": 1.5426105717367853, "grad_norm": 1.9265503883361816, "learning_rate": 2.8342893897839855e-07, "loss": 0.504668653011322, "step": 1430 }, { "epoch": 1.5447680690399137, "grad_norm": 2.9574995040893555, "learning_rate": 2.828939953931038e-07, "loss": 0.43785667419433594, "step": 1432 }, { "epoch": 1.5469255663430421, "grad_norm": 1.5642776489257812, "learning_rate": 2.823590071271472e-07, "loss": 0.42886197566986084, "step": 1434 }, { "epoch": 1.5490830636461705, "grad_norm": 0.6376549005508423, "learning_rate": 2.818239772086063e-07, "loss": 0.41987836360931396, "step": 1436 }, { "epoch": 1.551240560949299, "grad_norm": 3.890024423599243, "learning_rate": 2.8128890866579406e-07, "loss": 0.5718374252319336, "step": 1438 }, { "epoch": 1.5533980582524272, "grad_norm": 3.199538230895996, "learning_rate": 2.807538045272427e-07, "loss": 0.6301546096801758, "step": 1440 }, { "epoch": 1.5555555555555556, "grad_norm": 2.968087911605835, "learning_rate": 2.8021866782168547e-07, "loss": 0.5787625908851624, "step": 1442 }, { "epoch": 1.5577130528586838, "grad_norm": 4.71113920211792, "learning_rate": 2.796835015780398e-07, "loss": 0.5471571087837219, "step": 1444 }, { "epoch": 1.5598705501618122, "grad_norm": 3.893944025039673, "learning_rate": 2.79148308825391e-07, "loss": 0.4724005162715912, "step": 1446 }, { "epoch": 1.5620280474649406, "grad_norm": 0.665489673614502, "learning_rate": 2.7861309259297354e-07, "loss": 0.6328169107437134, "step": 1448 }, { "epoch": 1.564185544768069, "grad_norm": 1.5428534746170044, "learning_rate": 2.780778559101556e-07, "loss": 0.4981670379638672, "step": 1450 }, { "epoch": 1.5663430420711975, "grad_norm": 1.2379951477050781, "learning_rate": 2.7754260180642046e-07, "loss": 0.6006782054901123, "step": 1452 }, { "epoch": 1.5685005393743259, "grad_norm": 1.8184620141983032, "learning_rate": 2.770073333113504e-07, "loss": 0.4560186564922333, "step": 1454 }, { "epoch": 1.5706580366774543, "grad_norm": 7.522675037384033, "learning_rate": 2.7647205345460906e-07, "loss": 0.611346423625946, "step": 1456 }, { "epoch": 1.5728155339805825, "grad_norm": 1.3059622049331665, "learning_rate": 2.7593676526592423e-07, "loss": 0.2933533191680908, "step": 1458 }, { "epoch": 1.574973031283711, "grad_norm": 3.4842753410339355, "learning_rate": 2.7540147177507123e-07, "loss": 0.5341723561286926, "step": 1460 }, { "epoch": 1.577130528586839, "grad_norm": 5.351451396942139, "learning_rate": 2.74866176011855e-07, "loss": 0.6091599464416504, "step": 1462 }, { "epoch": 1.5792880258899675, "grad_norm": 2.680004119873047, "learning_rate": 2.743308810060935e-07, "loss": 0.7066933512687683, "step": 1464 }, { "epoch": 1.581445523193096, "grad_norm": 2.588871955871582, "learning_rate": 2.737955897876005e-07, "loss": 0.6565461754798889, "step": 1466 }, { "epoch": 1.5836030204962244, "grad_norm": 1.496903896331787, "learning_rate": 2.732603053861681e-07, "loss": 0.47990620136260986, "step": 1468 }, { "epoch": 1.5857605177993528, "grad_norm": 6.212797164916992, "learning_rate": 2.7272503083155004e-07, "loss": 0.4671979546546936, "step": 1470 }, { "epoch": 1.5879180151024812, "grad_norm": 4.061429977416992, "learning_rate": 2.7218976915344416e-07, "loss": 0.6535285711288452, "step": 1472 }, { "epoch": 1.5900755124056096, "grad_norm": 1.503240942955017, "learning_rate": 2.7165452338147555e-07, "loss": 0.5079244375228882, "step": 1474 }, { "epoch": 1.5922330097087378, "grad_norm": 3.7657103538513184, "learning_rate": 2.7111929654517925e-07, "loss": 0.6188565492630005, "step": 1476 }, { "epoch": 1.5943905070118662, "grad_norm": 1.6457622051239014, "learning_rate": 2.7058409167398305e-07, "loss": 0.5721461772918701, "step": 1478 }, { "epoch": 1.5965480043149944, "grad_norm": 2.564640760421753, "learning_rate": 2.7004891179719044e-07, "loss": 0.594935417175293, "step": 1480 }, { "epoch": 1.5987055016181229, "grad_norm": 1.901548147201538, "learning_rate": 2.695137599439635e-07, "loss": 0.5292646884918213, "step": 1482 }, { "epoch": 1.6008629989212513, "grad_norm": 2.8939266204833984, "learning_rate": 2.689786391433055e-07, "loss": 0.3793540894985199, "step": 1484 }, { "epoch": 1.6030204962243797, "grad_norm": 1.6566553115844727, "learning_rate": 2.6844355242404434e-07, "loss": 0.6384937167167664, "step": 1486 }, { "epoch": 1.6051779935275081, "grad_norm": 1.6839215755462646, "learning_rate": 2.6790850281481455e-07, "loss": 0.5815557837486267, "step": 1488 }, { "epoch": 1.6073354908306365, "grad_norm": 2.1547319889068604, "learning_rate": 2.6737349334404086e-07, "loss": 0.4502698481082916, "step": 1490 }, { "epoch": 1.609492988133765, "grad_norm": 6.962564945220947, "learning_rate": 2.66838527039921e-07, "loss": 0.5677059888839722, "step": 1492 }, { "epoch": 1.6116504854368932, "grad_norm": 2.3401830196380615, "learning_rate": 2.663036069304079e-07, "loss": 0.7475476264953613, "step": 1494 }, { "epoch": 1.6138079827400216, "grad_norm": 3.999359130859375, "learning_rate": 2.657687360431935e-07, "loss": 0.6050864458084106, "step": 1496 }, { "epoch": 1.61596548004315, "grad_norm": 1.3075038194656372, "learning_rate": 2.6523391740569074e-07, "loss": 0.5616152286529541, "step": 1498 }, { "epoch": 1.6181229773462782, "grad_norm": 1.1486589908599854, "learning_rate": 2.646991540450172e-07, "loss": 0.5269895792007446, "step": 1500 }, { "epoch": 1.6202804746494066, "grad_norm": 7.192336082458496, "learning_rate": 2.6416444898797716e-07, "loss": 0.47785210609436035, "step": 1502 }, { "epoch": 1.622437971952535, "grad_norm": 1.773577332496643, "learning_rate": 2.6362980526104536e-07, "loss": 0.3188018500804901, "step": 1504 }, { "epoch": 1.6245954692556634, "grad_norm": 4.216702938079834, "learning_rate": 2.630952258903491e-07, "loss": 0.5588706135749817, "step": 1506 }, { "epoch": 1.6267529665587919, "grad_norm": 1.6883203983306885, "learning_rate": 2.6256071390165147e-07, "loss": 0.3531300723552704, "step": 1508 }, { "epoch": 1.6289104638619203, "grad_norm": 1.359221339225769, "learning_rate": 2.620262723203342e-07, "loss": 0.3672279715538025, "step": 1510 }, { "epoch": 1.6310679611650487, "grad_norm": 8.669332504272461, "learning_rate": 2.6149190417138057e-07, "loss": 0.6095560193061829, "step": 1512 }, { "epoch": 1.633225458468177, "grad_norm": 3.492126226425171, "learning_rate": 2.609576124793581e-07, "loss": 0.41963210701942444, "step": 1514 }, { "epoch": 1.6353829557713053, "grad_norm": 3.135106086730957, "learning_rate": 2.604234002684016e-07, "loss": 0.4734860360622406, "step": 1516 }, { "epoch": 1.6375404530744335, "grad_norm": 1.9704272747039795, "learning_rate": 2.5988927056219613e-07, "loss": 0.596852719783783, "step": 1518 }, { "epoch": 1.639697950377562, "grad_norm": 2.8910233974456787, "learning_rate": 2.593552263839596e-07, "loss": 0.643505871295929, "step": 1520 }, { "epoch": 1.6418554476806904, "grad_norm": 2.2476956844329834, "learning_rate": 2.588212707564259e-07, "loss": 0.4490068554878235, "step": 1522 }, { "epoch": 1.6440129449838188, "grad_norm": 6.380528926849365, "learning_rate": 2.582874067018278e-07, "loss": 0.48139023780822754, "step": 1524 }, { "epoch": 1.6461704422869472, "grad_norm": 2.361678123474121, "learning_rate": 2.577536372418795e-07, "loss": 0.4531154930591583, "step": 1526 }, { "epoch": 1.6483279395900756, "grad_norm": 1.433486819267273, "learning_rate": 2.572199653977602e-07, "loss": 0.5615776181221008, "step": 1528 }, { "epoch": 1.650485436893204, "grad_norm": 1.2627114057540894, "learning_rate": 2.5668639419009606e-07, "loss": 0.5969760417938232, "step": 1530 }, { "epoch": 1.6526429341963322, "grad_norm": 6.023662090301514, "learning_rate": 2.5615292663894406e-07, "loss": 0.7165044546127319, "step": 1532 }, { "epoch": 1.6548004314994607, "grad_norm": 3.729762315750122, "learning_rate": 2.556195657637744e-07, "loss": 0.5139379501342773, "step": 1534 }, { "epoch": 1.6569579288025889, "grad_norm": 3.813971996307373, "learning_rate": 2.5508631458345325e-07, "loss": 0.40219447016716003, "step": 1536 }, { "epoch": 1.6591154261057173, "grad_norm": 2.2718698978424072, "learning_rate": 2.545531761162263e-07, "loss": 0.6281888484954834, "step": 1538 }, { "epoch": 1.6612729234088457, "grad_norm": 1.6029448509216309, "learning_rate": 2.540201533797007e-07, "loss": 0.5198391675949097, "step": 1540 }, { "epoch": 1.6634304207119741, "grad_norm": 1.972841501235962, "learning_rate": 2.5348724939082916e-07, "loss": 0.5897455811500549, "step": 1542 }, { "epoch": 1.6655879180151025, "grad_norm": 1.6188420057296753, "learning_rate": 2.5295446716589194e-07, "loss": 0.36811563372612, "step": 1544 }, { "epoch": 1.667745415318231, "grad_norm": 2.6556005477905273, "learning_rate": 2.5242180972048e-07, "loss": 0.48183539509773254, "step": 1546 }, { "epoch": 1.6699029126213594, "grad_norm": 1.075056791305542, "learning_rate": 2.5188928006947846e-07, "loss": 0.6169477105140686, "step": 1548 }, { "epoch": 1.6720604099244876, "grad_norm": 4.6703009605407715, "learning_rate": 2.513568812270487e-07, "loss": 0.481448233127594, "step": 1550 }, { "epoch": 1.674217907227616, "grad_norm": 2.274782180786133, "learning_rate": 2.5082461620661196e-07, "loss": 0.6557754874229431, "step": 1552 }, { "epoch": 1.6763754045307442, "grad_norm": 1.1385339498519897, "learning_rate": 2.502924880208318e-07, "loss": 0.4550181031227112, "step": 1554 }, { "epoch": 1.6785329018338726, "grad_norm": 9.544242858886719, "learning_rate": 2.497604996815976e-07, "loss": 0.5606685876846313, "step": 1556 }, { "epoch": 1.680690399137001, "grad_norm": 23.09193229675293, "learning_rate": 2.4922865420000693e-07, "loss": 0.4275263547897339, "step": 1558 }, { "epoch": 1.6828478964401294, "grad_norm": 1.2717275619506836, "learning_rate": 2.486969545863489e-07, "loss": 0.604001522064209, "step": 1560 }, { "epoch": 1.6850053937432579, "grad_norm": 3.0152664184570312, "learning_rate": 2.4816540385008696e-07, "loss": 0.5382636189460754, "step": 1562 }, { "epoch": 1.6871628910463863, "grad_norm": 2.152043581008911, "learning_rate": 2.4763400499984184e-07, "loss": 0.48857036232948303, "step": 1564 }, { "epoch": 1.6893203883495147, "grad_norm": 1.6351813077926636, "learning_rate": 2.471027610433748e-07, "loss": 0.604580819606781, "step": 1566 }, { "epoch": 1.691477885652643, "grad_norm": 1.648799180984497, "learning_rate": 2.465716749875701e-07, "loss": 0.5242129564285278, "step": 1568 }, { "epoch": 1.6936353829557713, "grad_norm": 1.386277198791504, "learning_rate": 2.4604074983841853e-07, "loss": 0.5764685869216919, "step": 1570 }, { "epoch": 1.6957928802588995, "grad_norm": 1.8634097576141357, "learning_rate": 2.4550998860099993e-07, "loss": 0.45359018445014954, "step": 1572 }, { "epoch": 1.697950377562028, "grad_norm": 5.895047187805176, "learning_rate": 2.4497939427946654e-07, "loss": 0.6302123069763184, "step": 1574 }, { "epoch": 1.7001078748651564, "grad_norm": 4.849222183227539, "learning_rate": 2.444489698770256e-07, "loss": 0.5112524032592773, "step": 1576 }, { "epoch": 1.7022653721682848, "grad_norm": 4.763610363006592, "learning_rate": 2.43918718395923e-07, "loss": 0.507486879825592, "step": 1578 }, { "epoch": 1.7044228694714132, "grad_norm": 1.9259899854660034, "learning_rate": 2.4338864283742554e-07, "loss": 0.5151503086090088, "step": 1580 }, { "epoch": 1.7065803667745416, "grad_norm": 1.45798659324646, "learning_rate": 2.428587462018044e-07, "loss": 0.578480064868927, "step": 1582 }, { "epoch": 1.70873786407767, "grad_norm": 1.3601330518722534, "learning_rate": 2.4232903148831805e-07, "loss": 0.5815561413764954, "step": 1584 }, { "epoch": 1.7108953613807982, "grad_norm": 2.1496477127075195, "learning_rate": 2.4179950169519514e-07, "loss": 0.48085469007492065, "step": 1586 }, { "epoch": 1.7130528586839266, "grad_norm": 1.8896552324295044, "learning_rate": 2.4127015981961797e-07, "loss": 0.44769376516342163, "step": 1588 }, { "epoch": 1.715210355987055, "grad_norm": 3.8881163597106934, "learning_rate": 2.407410088577047e-07, "loss": 0.578763484954834, "step": 1590 }, { "epoch": 1.7173678532901833, "grad_norm": 3.5086944103240967, "learning_rate": 2.402120518044935e-07, "loss": 0.5796621441841125, "step": 1592 }, { "epoch": 1.7195253505933117, "grad_norm": 1.3541215658187866, "learning_rate": 2.396832916539247e-07, "loss": 0.6715743541717529, "step": 1594 }, { "epoch": 1.72168284789644, "grad_norm": 1.8187955617904663, "learning_rate": 2.391547313988239e-07, "loss": 0.5842028260231018, "step": 1596 }, { "epoch": 1.7238403451995685, "grad_norm": 2.136215925216675, "learning_rate": 2.386263740308859e-07, "loss": 0.5518381595611572, "step": 1598 }, { "epoch": 1.725997842502697, "grad_norm": 5.43556022644043, "learning_rate": 2.3809822254065637e-07, "loss": 0.4855960011482239, "step": 1600 }, { "epoch": 1.7281553398058254, "grad_norm": 1.7936867475509644, "learning_rate": 2.375702799175164e-07, "loss": 0.5670949816703796, "step": 1602 }, { "epoch": 1.7303128371089536, "grad_norm": 3.9360201358795166, "learning_rate": 2.3704254914966436e-07, "loss": 0.4383196234703064, "step": 1604 }, { "epoch": 1.732470334412082, "grad_norm": 1.8262487649917603, "learning_rate": 2.365150332240999e-07, "loss": 0.4360693395137787, "step": 1606 }, { "epoch": 1.7346278317152104, "grad_norm": 5.341520309448242, "learning_rate": 2.3598773512660636e-07, "loss": 0.5174295902252197, "step": 1608 }, { "epoch": 1.7367853290183386, "grad_norm": 1.4834777116775513, "learning_rate": 2.3546065784173425e-07, "loss": 0.5596581697463989, "step": 1610 }, { "epoch": 1.738942826321467, "grad_norm": 2.0027642250061035, "learning_rate": 2.349338043527843e-07, "loss": 0.5774880051612854, "step": 1612 }, { "epoch": 1.7411003236245954, "grad_norm": 2.79825758934021, "learning_rate": 2.3440717764179053e-07, "loss": 0.34554505348205566, "step": 1614 }, { "epoch": 1.7432578209277239, "grad_norm": 1.6454131603240967, "learning_rate": 2.338807806895033e-07, "loss": 0.470796674489975, "step": 1616 }, { "epoch": 1.7454153182308523, "grad_norm": 0.6968181133270264, "learning_rate": 2.3335461647537252e-07, "loss": 0.458304226398468, "step": 1618 }, { "epoch": 1.7475728155339807, "grad_norm": 1.9778035879135132, "learning_rate": 2.3282868797753092e-07, "loss": 0.3832884132862091, "step": 1620 }, { "epoch": 1.7497303128371091, "grad_norm": 1.3171417713165283, "learning_rate": 2.3230299817277694e-07, "loss": 0.33851158618927, "step": 1622 }, { "epoch": 1.7518878101402373, "grad_norm": 1.1305524110794067, "learning_rate": 2.3177755003655803e-07, "loss": 0.38404303789138794, "step": 1624 }, { "epoch": 1.7540453074433657, "grad_norm": 3.3389196395874023, "learning_rate": 2.3125234654295378e-07, "loss": 0.4796540141105652, "step": 1626 }, { "epoch": 1.756202804746494, "grad_norm": 2.8726422786712646, "learning_rate": 2.3072739066465906e-07, "loss": 0.5632970333099365, "step": 1628 }, { "epoch": 1.7583603020496223, "grad_norm": 2.0043997764587402, "learning_rate": 2.3020268537296728e-07, "loss": 0.610961377620697, "step": 1630 }, { "epoch": 1.7605177993527508, "grad_norm": 1.977318286895752, "learning_rate": 2.2967823363775334e-07, "loss": 0.5584444403648376, "step": 1632 }, { "epoch": 1.7626752966558792, "grad_norm": 1.4552688598632812, "learning_rate": 2.2915403842745718e-07, "loss": 0.5306387543678284, "step": 1634 }, { "epoch": 1.7648327939590076, "grad_norm": 1.6729410886764526, "learning_rate": 2.286301027090668e-07, "loss": 0.7393071055412292, "step": 1636 }, { "epoch": 1.766990291262136, "grad_norm": 1.8149133920669556, "learning_rate": 2.2810642944810122e-07, "loss": 0.6838573217391968, "step": 1638 }, { "epoch": 1.7691477885652644, "grad_norm": 1.5463119745254517, "learning_rate": 2.2758302160859426e-07, "loss": 0.6229934692382812, "step": 1640 }, { "epoch": 1.7713052858683926, "grad_norm": 1.5769580602645874, "learning_rate": 2.2705988215307703e-07, "loss": 0.5759105682373047, "step": 1642 }, { "epoch": 1.773462783171521, "grad_norm": 1.2159730195999146, "learning_rate": 2.2653701404256204e-07, "loss": 0.5320509076118469, "step": 1644 }, { "epoch": 1.7756202804746493, "grad_norm": 20.918190002441406, "learning_rate": 2.260144202365254e-07, "loss": 0.7069261074066162, "step": 1646 }, { "epoch": 1.7777777777777777, "grad_norm": 1.1241180896759033, "learning_rate": 2.2549210369289124e-07, "loss": 0.5508931875228882, "step": 1648 }, { "epoch": 1.779935275080906, "grad_norm": 4.296023368835449, "learning_rate": 2.24970067368014e-07, "loss": 0.27031293511390686, "step": 1650 }, { "epoch": 1.7820927723840345, "grad_norm": 1.679374098777771, "learning_rate": 2.24448314216662e-07, "loss": 0.4512391686439514, "step": 1652 }, { "epoch": 1.784250269687163, "grad_norm": 2.161573886871338, "learning_rate": 2.2392684719200116e-07, "loss": 0.3788191080093384, "step": 1654 }, { "epoch": 1.7864077669902914, "grad_norm": 6.881853103637695, "learning_rate": 2.2340566924557735e-07, "loss": 0.5204552412033081, "step": 1656 }, { "epoch": 1.7885652642934198, "grad_norm": 2.566661834716797, "learning_rate": 2.228847833273007e-07, "loss": 0.5021325945854187, "step": 1658 }, { "epoch": 1.790722761596548, "grad_norm": 1.374242901802063, "learning_rate": 2.223641923854282e-07, "loss": 0.5507632493972778, "step": 1660 }, { "epoch": 1.7928802588996764, "grad_norm": 2.6447982788085938, "learning_rate": 2.2184389936654736e-07, "loss": 0.5545051097869873, "step": 1662 }, { "epoch": 1.7950377562028046, "grad_norm": 1.4282158613204956, "learning_rate": 2.2132390721555933e-07, "loss": 0.5268256068229675, "step": 1664 }, { "epoch": 1.797195253505933, "grad_norm": 9.742328643798828, "learning_rate": 2.2080421887566236e-07, "loss": 0.28805431723594666, "step": 1666 }, { "epoch": 1.7993527508090614, "grad_norm": 2.320671319961548, "learning_rate": 2.2028483728833524e-07, "loss": 0.61153244972229, "step": 1668 }, { "epoch": 1.8015102481121898, "grad_norm": 1.3254764080047607, "learning_rate": 2.197657653933202e-07, "loss": 0.5806170105934143, "step": 1670 }, { "epoch": 1.8036677454153183, "grad_norm": 1.328969120979309, "learning_rate": 2.1924700612860692e-07, "loss": 0.5318849086761475, "step": 1672 }, { "epoch": 1.8058252427184467, "grad_norm": 0.41818344593048096, "learning_rate": 2.1872856243041532e-07, "loss": 0.27527254819869995, "step": 1674 }, { "epoch": 1.807982740021575, "grad_norm": 4.447193622589111, "learning_rate": 2.1821043723317935e-07, "loss": 0.6419240236282349, "step": 1676 }, { "epoch": 1.8101402373247033, "grad_norm": 1.7327150106430054, "learning_rate": 2.1769263346953004e-07, "loss": 0.5605652332305908, "step": 1678 }, { "epoch": 1.8122977346278317, "grad_norm": 1.8278568983078003, "learning_rate": 2.1717515407027937e-07, "loss": 0.5259844660758972, "step": 1680 }, { "epoch": 1.81445523193096, "grad_norm": 2.8355183601379395, "learning_rate": 2.1665800196440314e-07, "loss": 0.48816215991973877, "step": 1682 }, { "epoch": 1.8166127292340883, "grad_norm": 4.203651428222656, "learning_rate": 2.161411800790247e-07, "loss": 0.622113049030304, "step": 1684 }, { "epoch": 1.8187702265372168, "grad_norm": 1.8128265142440796, "learning_rate": 2.1562469133939836e-07, "loss": 0.23197607696056366, "step": 1686 }, { "epoch": 1.8209277238403452, "grad_norm": 4.64774227142334, "learning_rate": 2.1510853866889278e-07, "loss": 0.5721830129623413, "step": 1688 }, { "epoch": 1.8230852211434736, "grad_norm": 1.5357946157455444, "learning_rate": 2.1459272498897452e-07, "loss": 0.47745242714881897, "step": 1690 }, { "epoch": 1.825242718446602, "grad_norm": 6.159779071807861, "learning_rate": 2.1407725321919107e-07, "loss": 0.6153979301452637, "step": 1692 }, { "epoch": 1.8274002157497304, "grad_norm": 7.977688312530518, "learning_rate": 2.1356212627715524e-07, "loss": 0.5228186845779419, "step": 1694 }, { "epoch": 1.8295577130528586, "grad_norm": 1.6970831155776978, "learning_rate": 2.1304734707852785e-07, "loss": 0.49107879400253296, "step": 1696 }, { "epoch": 1.831715210355987, "grad_norm": 1.6975973844528198, "learning_rate": 2.125329185370011e-07, "loss": 0.5908475518226624, "step": 1698 }, { "epoch": 1.8338727076591155, "grad_norm": 1.8754093647003174, "learning_rate": 2.1201884356428313e-07, "loss": 0.46887385845184326, "step": 1700 }, { "epoch": 1.8360302049622437, "grad_norm": 2.0672781467437744, "learning_rate": 2.1150512507008016e-07, "loss": 0.6688355207443237, "step": 1702 }, { "epoch": 1.838187702265372, "grad_norm": 6.069046497344971, "learning_rate": 2.1099176596208134e-07, "loss": 0.5004164576530457, "step": 1704 }, { "epoch": 1.8403451995685005, "grad_norm": 2.647517204284668, "learning_rate": 2.104787691459411e-07, "loss": 0.6052039265632629, "step": 1706 }, { "epoch": 1.842502696871629, "grad_norm": 24.532392501831055, "learning_rate": 2.099661375252636e-07, "loss": 0.5694432854652405, "step": 1708 }, { "epoch": 1.8446601941747574, "grad_norm": 1.9516592025756836, "learning_rate": 2.0945387400158597e-07, "loss": 0.38723820447921753, "step": 1710 }, { "epoch": 1.8468176914778858, "grad_norm": 5.9051408767700195, "learning_rate": 2.0894198147436177e-07, "loss": 0.5640073418617249, "step": 1712 }, { "epoch": 1.8489751887810142, "grad_norm": 1.8454663753509521, "learning_rate": 2.0843046284094474e-07, "loss": 0.5534006357192993, "step": 1714 }, { "epoch": 1.8511326860841424, "grad_norm": 2.155219316482544, "learning_rate": 2.0791932099657221e-07, "loss": 0.4856148660182953, "step": 1716 }, { "epoch": 1.8532901833872708, "grad_norm": 6.839637279510498, "learning_rate": 2.074085588343491e-07, "loss": 0.41986072063446045, "step": 1718 }, { "epoch": 1.855447680690399, "grad_norm": 1.4975872039794922, "learning_rate": 2.0689817924523112e-07, "loss": 0.4183667004108429, "step": 1720 }, { "epoch": 1.8576051779935274, "grad_norm": 1.6008542776107788, "learning_rate": 2.0638818511800865e-07, "loss": 0.5753411650657654, "step": 1722 }, { "epoch": 1.8597626752966558, "grad_norm": 1.6697242259979248, "learning_rate": 2.0587857933929037e-07, "loss": 0.6102425456047058, "step": 1724 }, { "epoch": 1.8619201725997843, "grad_norm": 1.3475979566574097, "learning_rate": 2.0536936479348672e-07, "loss": 0.5690769553184509, "step": 1726 }, { "epoch": 1.8640776699029127, "grad_norm": 2.36917781829834, "learning_rate": 2.0486054436279394e-07, "loss": 0.26033759117126465, "step": 1728 }, { "epoch": 1.866235167206041, "grad_norm": 0.5969313383102417, "learning_rate": 2.0435212092717729e-07, "loss": 0.2784996032714844, "step": 1730 }, { "epoch": 1.8683926645091695, "grad_norm": 1.8333349227905273, "learning_rate": 2.0384409736435526e-07, "loss": 0.5710358619689941, "step": 1732 }, { "epoch": 1.8705501618122977, "grad_norm": 1.0351983308792114, "learning_rate": 2.033364765497828e-07, "loss": 0.352516233921051, "step": 1734 }, { "epoch": 1.8727076591154261, "grad_norm": 5.123870849609375, "learning_rate": 2.0282926135663554e-07, "loss": 0.5661641359329224, "step": 1736 }, { "epoch": 1.8748651564185543, "grad_norm": 1.9763929843902588, "learning_rate": 2.0232245465579306e-07, "loss": 0.6391258239746094, "step": 1738 }, { "epoch": 1.8770226537216828, "grad_norm": 2.7457945346832275, "learning_rate": 2.0181605931582284e-07, "loss": 0.4859541952610016, "step": 1740 }, { "epoch": 1.8791801510248112, "grad_norm": 1.343418836593628, "learning_rate": 2.013100782029641e-07, "loss": 0.57615065574646, "step": 1742 }, { "epoch": 1.8813376483279396, "grad_norm": 2.796921491622925, "learning_rate": 2.0080451418111143e-07, "loss": 0.47713714838027954, "step": 1744 }, { "epoch": 1.883495145631068, "grad_norm": 1.2428369522094727, "learning_rate": 2.0029937011179882e-07, "loss": 0.46215853095054626, "step": 1746 }, { "epoch": 1.8856526429341964, "grad_norm": 1.3065931797027588, "learning_rate": 1.9979464885418295e-07, "loss": 0.37958696484565735, "step": 1748 }, { "epoch": 1.8878101402373249, "grad_norm": 0.4351181089878082, "learning_rate": 1.9929035326502773e-07, "loss": 0.5532968044281006, "step": 1750 }, { "epoch": 1.889967637540453, "grad_norm": 2.7067880630493164, "learning_rate": 1.9878648619868765e-07, "loss": 0.5427120923995972, "step": 1752 }, { "epoch": 1.8921251348435815, "grad_norm": 1.2321635484695435, "learning_rate": 1.9828305050709144e-07, "loss": 0.4515300989151001, "step": 1754 }, { "epoch": 1.8942826321467097, "grad_norm": 1.237924337387085, "learning_rate": 1.9778004903972667e-07, "loss": 0.6264490485191345, "step": 1756 }, { "epoch": 1.896440129449838, "grad_norm": 2.399315118789673, "learning_rate": 1.9727748464362276e-07, "loss": 0.5656343698501587, "step": 1758 }, { "epoch": 1.8985976267529665, "grad_norm": 2.262449264526367, "learning_rate": 1.9677536016333556e-07, "loss": 0.4433645009994507, "step": 1760 }, { "epoch": 1.900755124056095, "grad_norm": 0.9049375653266907, "learning_rate": 1.9627367844093078e-07, "loss": 0.5328507423400879, "step": 1762 }, { "epoch": 1.9029126213592233, "grad_norm": 3.406168222427368, "learning_rate": 1.9577244231596807e-07, "loss": 0.5393190979957581, "step": 1764 }, { "epoch": 1.9050701186623518, "grad_norm": 5.9175872802734375, "learning_rate": 1.9527165462548528e-07, "loss": 0.5409752130508423, "step": 1766 }, { "epoch": 1.9072276159654802, "grad_norm": 3.4059221744537354, "learning_rate": 1.9477131820398158e-07, "loss": 0.4544711410999298, "step": 1768 }, { "epoch": 1.9093851132686084, "grad_norm": 2.6678411960601807, "learning_rate": 1.942714358834024e-07, "loss": 0.4812181890010834, "step": 1770 }, { "epoch": 1.9115426105717368, "grad_norm": 2.3084359169006348, "learning_rate": 1.9377201049312252e-07, "loss": 0.5532037615776062, "step": 1772 }, { "epoch": 1.913700107874865, "grad_norm": 1.5713317394256592, "learning_rate": 1.9327304485993084e-07, "loss": 0.5627604722976685, "step": 1774 }, { "epoch": 1.9158576051779934, "grad_norm": 4.625167369842529, "learning_rate": 1.9277454180801367e-07, "loss": 0.5986460447311401, "step": 1776 }, { "epoch": 1.9180151024811218, "grad_norm": 4.1043267250061035, "learning_rate": 1.9227650415893914e-07, "loss": 0.5130695700645447, "step": 1778 }, { "epoch": 1.9201725997842503, "grad_norm": 1.6170152425765991, "learning_rate": 1.9177893473164142e-07, "loss": 0.3731135129928589, "step": 1780 }, { "epoch": 1.9223300970873787, "grad_norm": 1.3302977085113525, "learning_rate": 1.9128183634240414e-07, "loss": 0.4674024283885956, "step": 1782 }, { "epoch": 1.924487594390507, "grad_norm": 1.3277769088745117, "learning_rate": 1.907852118048451e-07, "loss": 0.5244305729866028, "step": 1784 }, { "epoch": 1.9266450916936355, "grad_norm": 1.839621663093567, "learning_rate": 1.902890639298998e-07, "loss": 0.6807081699371338, "step": 1786 }, { "epoch": 1.9288025889967637, "grad_norm": 22.250324249267578, "learning_rate": 1.8979339552580615e-07, "loss": 0.3220374882221222, "step": 1788 }, { "epoch": 1.9309600862998921, "grad_norm": 1.6265594959259033, "learning_rate": 1.8929820939808783e-07, "loss": 0.6456558108329773, "step": 1790 }, { "epoch": 1.9331175836030206, "grad_norm": 2.020714282989502, "learning_rate": 1.8880350834953912e-07, "loss": 0.5007312297821045, "step": 1792 }, { "epoch": 1.9352750809061487, "grad_norm": 2.1667919158935547, "learning_rate": 1.8830929518020833e-07, "loss": 0.46376651525497437, "step": 1794 }, { "epoch": 1.9374325782092772, "grad_norm": 5.430749893188477, "learning_rate": 1.8781557268738275e-07, "loss": 0.6586015820503235, "step": 1796 }, { "epoch": 1.9395900755124056, "grad_norm": 0.6731663942337036, "learning_rate": 1.8732234366557225e-07, "loss": 0.6122515797615051, "step": 1798 }, { "epoch": 1.941747572815534, "grad_norm": 2.9628219604492188, "learning_rate": 1.8682961090649342e-07, "loss": 0.7105916142463684, "step": 1800 }, { "epoch": 1.9439050701186624, "grad_norm": 1.580090045928955, "learning_rate": 1.8633737719905428e-07, "loss": 0.4230397939682007, "step": 1802 }, { "epoch": 1.9460625674217908, "grad_norm": 4.340382099151611, "learning_rate": 1.8584564532933784e-07, "loss": 0.6302311420440674, "step": 1804 }, { "epoch": 1.948220064724919, "grad_norm": 1.663780689239502, "learning_rate": 1.853544180805871e-07, "loss": 0.3612719774246216, "step": 1806 }, { "epoch": 1.9503775620280475, "grad_norm": 1.57147216796875, "learning_rate": 1.8486369823318833e-07, "loss": 0.5454095005989075, "step": 1808 }, { "epoch": 1.9525350593311759, "grad_norm": 1.2029516696929932, "learning_rate": 1.8437348856465623e-07, "loss": 0.564690351486206, "step": 1810 }, { "epoch": 1.954692556634304, "grad_norm": 1.5036405324935913, "learning_rate": 1.8388379184961795e-07, "loss": 0.2203519642353058, "step": 1812 }, { "epoch": 1.9568500539374325, "grad_norm": 1.5001392364501953, "learning_rate": 1.8339461085979686e-07, "loss": 0.5622031092643738, "step": 1814 }, { "epoch": 1.959007551240561, "grad_norm": 3.3546156883239746, "learning_rate": 1.8290594836399765e-07, "loss": 0.5708537697792053, "step": 1816 }, { "epoch": 1.9611650485436893, "grad_norm": 3.217376232147217, "learning_rate": 1.8241780712809007e-07, "loss": 0.5283911228179932, "step": 1818 }, { "epoch": 1.9633225458468178, "grad_norm": 1.1382657289505005, "learning_rate": 1.8193018991499364e-07, "loss": 0.6459410786628723, "step": 1820 }, { "epoch": 1.9654800431499462, "grad_norm": 1.5658422708511353, "learning_rate": 1.8144309948466175e-07, "loss": 0.5700141191482544, "step": 1822 }, { "epoch": 1.9676375404530746, "grad_norm": 1.9039454460144043, "learning_rate": 1.8095653859406628e-07, "loss": 0.4810370206832886, "step": 1824 }, { "epoch": 1.9697950377562028, "grad_norm": 3.176187038421631, "learning_rate": 1.8047050999718184e-07, "loss": 0.6500232815742493, "step": 1826 }, { "epoch": 1.9719525350593312, "grad_norm": 1.7816401720046997, "learning_rate": 1.7998501644497006e-07, "loss": 0.4381594657897949, "step": 1828 }, { "epoch": 1.9741100323624594, "grad_norm": 2.0123534202575684, "learning_rate": 1.795000606853646e-07, "loss": 0.5198497772216797, "step": 1830 }, { "epoch": 1.9762675296655878, "grad_norm": 1.8685188293457031, "learning_rate": 1.7901564546325436e-07, "loss": 0.5583903193473816, "step": 1832 }, { "epoch": 1.9784250269687162, "grad_norm": 7.242468357086182, "learning_rate": 1.7853177352046971e-07, "loss": 0.6218190789222717, "step": 1834 }, { "epoch": 1.9805825242718447, "grad_norm": 4.002262592315674, "learning_rate": 1.7804844759576538e-07, "loss": 0.5924632549285889, "step": 1836 }, { "epoch": 1.982740021574973, "grad_norm": 1.834091067314148, "learning_rate": 1.775656704248057e-07, "loss": 0.5840417146682739, "step": 1838 }, { "epoch": 1.9848975188781015, "grad_norm": 1.513541340827942, "learning_rate": 1.7708344474014924e-07, "loss": 0.5099426507949829, "step": 1840 }, { "epoch": 1.98705501618123, "grad_norm": 1.4789743423461914, "learning_rate": 1.7660177327123287e-07, "loss": 0.5921831130981445, "step": 1842 }, { "epoch": 1.9892125134843581, "grad_norm": 1.290024995803833, "learning_rate": 1.7612065874435677e-07, "loss": 0.5426990985870361, "step": 1844 }, { "epoch": 1.9913700107874865, "grad_norm": 1.4434101581573486, "learning_rate": 1.7564010388266837e-07, "loss": 0.5949893593788147, "step": 1846 }, { "epoch": 1.9935275080906147, "grad_norm": 7.057824611663818, "learning_rate": 1.7516011140614795e-07, "loss": 0.4401338994503021, "step": 1848 }, { "epoch": 1.9956850053937432, "grad_norm": 2.867464303970337, "learning_rate": 1.7468068403159218e-07, "loss": 0.4908779263496399, "step": 1850 }, { "epoch": 1.9978425026968716, "grad_norm": 1.0820274353027344, "learning_rate": 1.7420182447259926e-07, "loss": 0.41853272914886475, "step": 1852 }, { "epoch": 2.0, "grad_norm": 1.5740938186645508, "learning_rate": 1.7372353543955375e-07, "loss": 0.48160526156425476, "step": 1854 }, { "epoch": 2.0021574973031284, "grad_norm": 2.7098584175109863, "learning_rate": 1.7324581963961088e-07, "loss": 0.5192286372184753, "step": 1856 }, { "epoch": 2.004314994606257, "grad_norm": 1.2259626388549805, "learning_rate": 1.7276867977668117e-07, "loss": 0.38666832447052, "step": 1858 }, { "epoch": 2.0064724919093853, "grad_norm": 2.3588082790374756, "learning_rate": 1.7229211855141535e-07, "loss": 0.5268582105636597, "step": 1860 }, { "epoch": 2.0086299892125137, "grad_norm": 3.2187092304229736, "learning_rate": 1.718161386611892e-07, "loss": 0.39220139384269714, "step": 1862 }, { "epoch": 2.0107874865156417, "grad_norm": 3.2906999588012695, "learning_rate": 1.71340742800088e-07, "loss": 0.5459554195404053, "step": 1864 }, { "epoch": 2.01294498381877, "grad_norm": 1.1693605184555054, "learning_rate": 1.708659336588912e-07, "loss": 0.3647141456604004, "step": 1866 }, { "epoch": 2.0151024811218985, "grad_norm": 2.798161268234253, "learning_rate": 1.703917139250576e-07, "loss": 0.29213812947273254, "step": 1868 }, { "epoch": 2.017259978425027, "grad_norm": 1.1381080150604248, "learning_rate": 1.6991808628270987e-07, "loss": 0.5609018802642822, "step": 1870 }, { "epoch": 2.0194174757281553, "grad_norm": 1.3353042602539062, "learning_rate": 1.694450534126193e-07, "loss": 0.6125231981277466, "step": 1872 }, { "epoch": 2.0215749730312838, "grad_norm": 0.5496029853820801, "learning_rate": 1.689726179921906e-07, "loss": 0.6636590957641602, "step": 1874 }, { "epoch": 2.023732470334412, "grad_norm": 4.846103668212891, "learning_rate": 1.6850078269544736e-07, "loss": 0.5935502648353577, "step": 1876 }, { "epoch": 2.0258899676375406, "grad_norm": 1.36087167263031, "learning_rate": 1.6802955019301574e-07, "loss": 0.4898212254047394, "step": 1878 }, { "epoch": 2.028047464940669, "grad_norm": 1.213739037513733, "learning_rate": 1.6755892315211056e-07, "loss": 0.5537405014038086, "step": 1880 }, { "epoch": 2.030204962243797, "grad_norm": 2.0111231803894043, "learning_rate": 1.6708890423651965e-07, "loss": 0.5984706282615662, "step": 1882 }, { "epoch": 2.0323624595469254, "grad_norm": 1.080739974975586, "learning_rate": 1.6661949610658831e-07, "loss": 0.6536235809326172, "step": 1884 }, { "epoch": 2.034519956850054, "grad_norm": 1.0429832935333252, "learning_rate": 1.6615070141920538e-07, "loss": 0.4953509569168091, "step": 1886 }, { "epoch": 2.0366774541531822, "grad_norm": 0.8104021549224854, "learning_rate": 1.656825228277871e-07, "loss": 0.5021868944168091, "step": 1888 }, { "epoch": 2.0388349514563107, "grad_norm": 2.875866413116455, "learning_rate": 1.6521496298226293e-07, "loss": 0.4888242483139038, "step": 1890 }, { "epoch": 2.040992448759439, "grad_norm": 1.8168655633926392, "learning_rate": 1.647480245290596e-07, "loss": 0.5183455944061279, "step": 1892 }, { "epoch": 2.0431499460625675, "grad_norm": 2.0136873722076416, "learning_rate": 1.642817101110875e-07, "loss": 0.4676356911659241, "step": 1894 }, { "epoch": 2.045307443365696, "grad_norm": 1.8156472444534302, "learning_rate": 1.6381602236772428e-07, "loss": 0.39894169569015503, "step": 1896 }, { "epoch": 2.0474649406688243, "grad_norm": 1.542965054512024, "learning_rate": 1.6335096393480077e-07, "loss": 0.6107752919197083, "step": 1898 }, { "epoch": 2.0496224379719523, "grad_norm": 4.939133644104004, "learning_rate": 1.6288653744458603e-07, "loss": 0.4178003668785095, "step": 1900 }, { "epoch": 2.0517799352750807, "grad_norm": 1.781587839126587, "learning_rate": 1.62422745525772e-07, "loss": 0.45616793632507324, "step": 1902 }, { "epoch": 2.053937432578209, "grad_norm": 1.2864458560943604, "learning_rate": 1.619595908034591e-07, "loss": 0.4433179795742035, "step": 1904 }, { "epoch": 2.0560949298813376, "grad_norm": 1.3701529502868652, "learning_rate": 1.6149707589914092e-07, "loss": 0.30546942353248596, "step": 1906 }, { "epoch": 2.058252427184466, "grad_norm": 1.4088618755340576, "learning_rate": 1.6103520343068992e-07, "loss": 0.4966147840023041, "step": 1908 }, { "epoch": 2.0604099244875944, "grad_norm": 1.663954734802246, "learning_rate": 1.6057397601234218e-07, "loss": 0.5685679912567139, "step": 1910 }, { "epoch": 2.062567421790723, "grad_norm": 1.3583576679229736, "learning_rate": 1.6011339625468262e-07, "loss": 0.42057788372039795, "step": 1912 }, { "epoch": 2.0647249190938513, "grad_norm": 1.4086862802505493, "learning_rate": 1.5965346676463065e-07, "loss": 0.47804367542266846, "step": 1914 }, { "epoch": 2.0668824163969797, "grad_norm": 1.02560555934906, "learning_rate": 1.5919419014542485e-07, "loss": 0.5550174117088318, "step": 1916 }, { "epoch": 2.0690399137001076, "grad_norm": 1.144000768661499, "learning_rate": 1.5873556899660858e-07, "loss": 0.538378894329071, "step": 1918 }, { "epoch": 2.071197411003236, "grad_norm": 3.2516837120056152, "learning_rate": 1.5827760591401513e-07, "loss": 0.4809839129447937, "step": 1920 }, { "epoch": 2.0733549083063645, "grad_norm": 3.4512033462524414, "learning_rate": 1.578203034897533e-07, "loss": 0.5308358669281006, "step": 1922 }, { "epoch": 2.075512405609493, "grad_norm": 7.391180038452148, "learning_rate": 1.573636643121922e-07, "loss": 0.5464600324630737, "step": 1924 }, { "epoch": 2.0776699029126213, "grad_norm": 1.2600277662277222, "learning_rate": 1.5690769096594703e-07, "loss": 0.4952971339225769, "step": 1926 }, { "epoch": 2.0798274002157497, "grad_norm": 1.352362036705017, "learning_rate": 1.5645238603186456e-07, "loss": 0.5618917346000671, "step": 1928 }, { "epoch": 2.081984897518878, "grad_norm": 2.5637662410736084, "learning_rate": 1.5599775208700793e-07, "loss": 0.4722367525100708, "step": 1930 }, { "epoch": 2.0841423948220066, "grad_norm": 1.4300997257232666, "learning_rate": 1.5554379170464265e-07, "loss": 0.5235872864723206, "step": 1932 }, { "epoch": 2.086299892125135, "grad_norm": 1.3848403692245483, "learning_rate": 1.5509050745422164e-07, "loss": 0.5249854922294617, "step": 1934 }, { "epoch": 2.0884573894282634, "grad_norm": 1.8573588132858276, "learning_rate": 1.546379019013712e-07, "loss": 0.4274769127368927, "step": 1936 }, { "epoch": 2.0906148867313914, "grad_norm": 2.3221805095672607, "learning_rate": 1.5418597760787555e-07, "loss": 0.4279857277870178, "step": 1938 }, { "epoch": 2.09277238403452, "grad_norm": 1.2415211200714111, "learning_rate": 1.537347371316635e-07, "loss": 0.43542686104774475, "step": 1940 }, { "epoch": 2.0949298813376482, "grad_norm": 1.8443889617919922, "learning_rate": 1.532841830267934e-07, "loss": 0.46662214398384094, "step": 1942 }, { "epoch": 2.0970873786407767, "grad_norm": 1.2522131204605103, "learning_rate": 1.5283431784343802e-07, "loss": 0.5438724160194397, "step": 1944 }, { "epoch": 2.099244875943905, "grad_norm": 1.2235276699066162, "learning_rate": 1.5238514412787158e-07, "loss": 0.6343849897384644, "step": 1946 }, { "epoch": 2.1014023732470335, "grad_norm": 3.470675468444824, "learning_rate": 1.5193666442245402e-07, "loss": 0.43045446276664734, "step": 1948 }, { "epoch": 2.103559870550162, "grad_norm": 1.038780927658081, "learning_rate": 1.5148888126561726e-07, "loss": 0.4236026406288147, "step": 1950 }, { "epoch": 2.1057173678532903, "grad_norm": 2.081264019012451, "learning_rate": 1.5104179719185075e-07, "loss": 0.5168135166168213, "step": 1952 }, { "epoch": 2.1078748651564188, "grad_norm": 12.081764221191406, "learning_rate": 1.5059541473168715e-07, "loss": 0.5867135524749756, "step": 1954 }, { "epoch": 2.1100323624595467, "grad_norm": 2.5666370391845703, "learning_rate": 1.5014973641168776e-07, "loss": 0.5193699598312378, "step": 1956 }, { "epoch": 2.112189859762675, "grad_norm": 1.6125311851501465, "learning_rate": 1.497047647544283e-07, "loss": 0.529248833656311, "step": 1958 }, { "epoch": 2.1143473570658036, "grad_norm": 2.1865692138671875, "learning_rate": 1.4926050227848519e-07, "loss": 0.0995928943157196, "step": 1960 }, { "epoch": 2.116504854368932, "grad_norm": 1.370114803314209, "learning_rate": 1.4881695149842027e-07, "loss": 0.4155382513999939, "step": 1962 }, { "epoch": 2.1186623516720604, "grad_norm": 3.971156358718872, "learning_rate": 1.4837411492476743e-07, "loss": 0.42390692234039307, "step": 1964 }, { "epoch": 2.120819848975189, "grad_norm": 1.6040301322937012, "learning_rate": 1.4793199506401797e-07, "loss": 0.47764524817466736, "step": 1966 }, { "epoch": 2.1229773462783172, "grad_norm": 10.519671440124512, "learning_rate": 1.474905944186067e-07, "loss": 0.34308892488479614, "step": 1968 }, { "epoch": 2.1251348435814457, "grad_norm": 1.1796921491622925, "learning_rate": 1.4704991548689745e-07, "loss": 0.5871822834014893, "step": 1970 }, { "epoch": 2.127292340884574, "grad_norm": 1.7361336946487427, "learning_rate": 1.4660996076316912e-07, "loss": 0.3765156865119934, "step": 1972 }, { "epoch": 2.129449838187702, "grad_norm": 0.9722249507904053, "learning_rate": 1.461707327376016e-07, "loss": 0.5491130352020264, "step": 1974 }, { "epoch": 2.1316073354908305, "grad_norm": 0.8201406598091125, "learning_rate": 1.457322338962616e-07, "loss": 0.2312294989824295, "step": 1976 }, { "epoch": 2.133764832793959, "grad_norm": 1.1285258531570435, "learning_rate": 1.4529446672108852e-07, "loss": 0.408553808927536, "step": 1978 }, { "epoch": 2.1359223300970873, "grad_norm": 2.267793655395508, "learning_rate": 1.448574336898804e-07, "loss": 0.4971245229244232, "step": 1980 }, { "epoch": 2.1380798274002157, "grad_norm": 1.5453788042068481, "learning_rate": 1.4442113727628024e-07, "loss": 0.5261057615280151, "step": 1982 }, { "epoch": 2.140237324703344, "grad_norm": 1.3704675436019897, "learning_rate": 1.439855799497615e-07, "loss": 0.4675547480583191, "step": 1984 }, { "epoch": 2.1423948220064726, "grad_norm": 1.8390225172042847, "learning_rate": 1.4355076417561429e-07, "loss": 0.45672228932380676, "step": 1986 }, { "epoch": 2.144552319309601, "grad_norm": 2.047685384750366, "learning_rate": 1.4311669241493184e-07, "loss": 0.4718751013278961, "step": 1988 }, { "epoch": 2.1467098166127294, "grad_norm": 2.519134044647217, "learning_rate": 1.426833671245956e-07, "loss": 0.20050865411758423, "step": 1990 }, { "epoch": 2.148867313915858, "grad_norm": 1.5338327884674072, "learning_rate": 1.422507907572626e-07, "loss": 0.6630242466926575, "step": 1992 }, { "epoch": 2.151024811218986, "grad_norm": 1.7562376260757446, "learning_rate": 1.418189657613504e-07, "loss": 0.48454782366752625, "step": 1994 }, { "epoch": 2.1531823085221142, "grad_norm": 1.1941181421279907, "learning_rate": 1.4138789458102395e-07, "loss": 0.39869314432144165, "step": 1996 }, { "epoch": 2.1553398058252426, "grad_norm": 1.5241355895996094, "learning_rate": 1.409575796561815e-07, "loss": 0.5645185708999634, "step": 1998 }, { "epoch": 2.157497303128371, "grad_norm": 1.2509868144989014, "learning_rate": 1.4052802342244085e-07, "loss": 0.4137888550758362, "step": 2000 }, { "epoch": 2.1596548004314995, "grad_norm": 0.9289142489433289, "learning_rate": 1.4009922831112576e-07, "loss": 0.39357897639274597, "step": 2002 }, { "epoch": 2.161812297734628, "grad_norm": 1.4196053743362427, "learning_rate": 1.3967119674925144e-07, "loss": 0.4930650293827057, "step": 2004 }, { "epoch": 2.1639697950377563, "grad_norm": 1.6377662420272827, "learning_rate": 1.3924393115951183e-07, "loss": 0.5852062702178955, "step": 2006 }, { "epoch": 2.1661272923408847, "grad_norm": 3.294273614883423, "learning_rate": 1.3881743396026519e-07, "loss": 0.2635650932788849, "step": 2008 }, { "epoch": 2.168284789644013, "grad_norm": 3.2055675983428955, "learning_rate": 1.383917075655207e-07, "loss": 0.45562589168548584, "step": 2010 }, { "epoch": 2.170442286947141, "grad_norm": 1.2780847549438477, "learning_rate": 1.3796675438492466e-07, "loss": 0.2905101478099823, "step": 2012 }, { "epoch": 2.1725997842502696, "grad_norm": 1.1974769830703735, "learning_rate": 1.37542576823747e-07, "loss": 0.5826109647750854, "step": 2014 }, { "epoch": 2.174757281553398, "grad_norm": 12.196817398071289, "learning_rate": 1.3711917728286758e-07, "loss": 0.6894016861915588, "step": 2016 }, { "epoch": 2.1769147788565264, "grad_norm": 1.2338263988494873, "learning_rate": 1.3669655815876238e-07, "loss": 0.5621905326843262, "step": 2018 }, { "epoch": 2.179072276159655, "grad_norm": 1.2616736888885498, "learning_rate": 1.3627472184349054e-07, "loss": 0.5268079042434692, "step": 2020 }, { "epoch": 2.1812297734627832, "grad_norm": 2.433539867401123, "learning_rate": 1.3585367072468014e-07, "loss": 0.5845661163330078, "step": 2022 }, { "epoch": 2.1833872707659117, "grad_norm": 5.173591136932373, "learning_rate": 1.3543340718551505e-07, "loss": 0.5688271522521973, "step": 2024 }, { "epoch": 2.18554476806904, "grad_norm": 3.6794235706329346, "learning_rate": 1.3501393360472135e-07, "loss": 0.5398727655410767, "step": 2026 }, { "epoch": 2.1877022653721685, "grad_norm": 3.5358774662017822, "learning_rate": 1.345952523565541e-07, "loss": 0.4828336238861084, "step": 2028 }, { "epoch": 2.1898597626752965, "grad_norm": 2.045574188232422, "learning_rate": 1.3417736581078343e-07, "loss": 0.4576345682144165, "step": 2030 }, { "epoch": 2.192017259978425, "grad_norm": 2.9018726348876953, "learning_rate": 1.3376027633268145e-07, "loss": 0.5629587769508362, "step": 2032 }, { "epoch": 2.1941747572815533, "grad_norm": 0.7962714433670044, "learning_rate": 1.33343986283009e-07, "loss": 0.3971530795097351, "step": 2034 }, { "epoch": 2.1963322545846817, "grad_norm": 8.073356628417969, "learning_rate": 1.3292849801800172e-07, "loss": 0.34259432554244995, "step": 2036 }, { "epoch": 2.19848975188781, "grad_norm": 2.6538028717041016, "learning_rate": 1.325138138893574e-07, "loss": 0.3950064778327942, "step": 2038 }, { "epoch": 2.2006472491909386, "grad_norm": 2.4987733364105225, "learning_rate": 1.3209993624422226e-07, "loss": 0.5430999398231506, "step": 2040 }, { "epoch": 2.202804746494067, "grad_norm": 2.3891706466674805, "learning_rate": 1.3168686742517777e-07, "loss": 0.4212225377559662, "step": 2042 }, { "epoch": 2.2049622437971954, "grad_norm": 3.8016743659973145, "learning_rate": 1.312746097702273e-07, "loss": 0.47842153906822205, "step": 2044 }, { "epoch": 2.207119741100324, "grad_norm": 1.4113874435424805, "learning_rate": 1.3086316561278298e-07, "loss": 0.6112795472145081, "step": 2046 }, { "epoch": 2.209277238403452, "grad_norm": 1.0874158143997192, "learning_rate": 1.304525372816527e-07, "loss": 0.37503018975257874, "step": 2048 }, { "epoch": 2.2114347357065802, "grad_norm": 1.5210480690002441, "learning_rate": 1.3004272710102627e-07, "loss": 0.5227410793304443, "step": 2050 }, { "epoch": 2.2135922330097086, "grad_norm": 2.596205234527588, "learning_rate": 1.2963373739046308e-07, "loss": 0.5574774742126465, "step": 2052 }, { "epoch": 2.215749730312837, "grad_norm": 1.1875081062316895, "learning_rate": 1.2922557046487847e-07, "loss": 0.5108221769332886, "step": 2054 }, { "epoch": 2.2179072276159655, "grad_norm": 1.1905848979949951, "learning_rate": 1.2881822863453066e-07, "loss": 0.6219309568405151, "step": 2056 }, { "epoch": 2.220064724919094, "grad_norm": 1.272186279296875, "learning_rate": 1.2841171420500799e-07, "loss": 0.5619434118270874, "step": 2058 }, { "epoch": 2.2222222222222223, "grad_norm": 1.7432719469070435, "learning_rate": 1.2800602947721539e-07, "loss": 0.47846826910972595, "step": 2060 }, { "epoch": 2.2243797195253507, "grad_norm": 3.082793951034546, "learning_rate": 1.2760117674736174e-07, "loss": 0.40510886907577515, "step": 2062 }, { "epoch": 2.226537216828479, "grad_norm": 2.457563638687134, "learning_rate": 1.2719715830694665e-07, "loss": 0.5271166563034058, "step": 2064 }, { "epoch": 2.228694714131607, "grad_norm": 2.0129661560058594, "learning_rate": 1.2679397644274786e-07, "loss": 0.5444518327713013, "step": 2066 }, { "epoch": 2.2308522114347356, "grad_norm": 1.3749797344207764, "learning_rate": 1.2639163343680764e-07, "loss": 0.4370856285095215, "step": 2068 }, { "epoch": 2.233009708737864, "grad_norm": 2.305034637451172, "learning_rate": 1.259901315664204e-07, "loss": 0.3118676543235779, "step": 2070 }, { "epoch": 2.2351672060409924, "grad_norm": 2.7576963901519775, "learning_rate": 1.2558947310411988e-07, "loss": 0.5651037096977234, "step": 2072 }, { "epoch": 2.237324703344121, "grad_norm": 1.6983442306518555, "learning_rate": 1.251896603176657e-07, "loss": 0.4641881585121155, "step": 2074 }, { "epoch": 2.2394822006472492, "grad_norm": 1.8414127826690674, "learning_rate": 1.2479069547003113e-07, "loss": 0.43197086453437805, "step": 2076 }, { "epoch": 2.2416396979503777, "grad_norm": 2.0715110301971436, "learning_rate": 1.2439258081938982e-07, "loss": 0.4157189428806305, "step": 2078 }, { "epoch": 2.243797195253506, "grad_norm": 1.076478123664856, "learning_rate": 1.2399531861910356e-07, "loss": 0.47206589579582214, "step": 2080 }, { "epoch": 2.2459546925566345, "grad_norm": 1.650862693786621, "learning_rate": 1.2359891111770893e-07, "loss": 0.34252646565437317, "step": 2082 }, { "epoch": 2.2481121898597625, "grad_norm": 10.067177772521973, "learning_rate": 1.2320336055890485e-07, "loss": 0.39974507689476013, "step": 2084 }, { "epoch": 2.250269687162891, "grad_norm": 2.362675666809082, "learning_rate": 1.228086691815401e-07, "loss": 0.3123304843902588, "step": 2086 }, { "epoch": 2.2524271844660193, "grad_norm": 1.422263741493225, "learning_rate": 1.224148392196002e-07, "loss": 0.512368381023407, "step": 2088 }, { "epoch": 2.2545846817691477, "grad_norm": 3.862522602081299, "learning_rate": 1.2202187290219506e-07, "loss": 0.6224650144577026, "step": 2090 }, { "epoch": 2.256742179072276, "grad_norm": 1.9737151861190796, "learning_rate": 1.2162977245354618e-07, "loss": 0.5244027376174927, "step": 2092 }, { "epoch": 2.2588996763754046, "grad_norm": 1.3677443265914917, "learning_rate": 1.212385400929746e-07, "loss": 0.45020678639411926, "step": 2094 }, { "epoch": 2.261057173678533, "grad_norm": 2.039045572280884, "learning_rate": 1.208481780348872e-07, "loss": 0.5531943440437317, "step": 2096 }, { "epoch": 2.2632146709816614, "grad_norm": 1.5377328395843506, "learning_rate": 1.2045868848876553e-07, "loss": 0.37629038095474243, "step": 2098 }, { "epoch": 2.26537216828479, "grad_norm": 2.3397927284240723, "learning_rate": 1.2007007365915235e-07, "loss": 0.3320096433162689, "step": 2100 }, { "epoch": 2.267529665587918, "grad_norm": 3.1143479347229004, "learning_rate": 1.1968233574563937e-07, "loss": 0.40027597546577454, "step": 2102 }, { "epoch": 2.269687162891046, "grad_norm": 4.891796112060547, "learning_rate": 1.1929547694285518e-07, "loss": 0.4987124502658844, "step": 2104 }, { "epoch": 2.2718446601941746, "grad_norm": 1.840555191040039, "learning_rate": 1.1890949944045232e-07, "loss": 0.49355462193489075, "step": 2106 }, { "epoch": 2.274002157497303, "grad_norm": 3.1459503173828125, "learning_rate": 1.1852440542309507e-07, "loss": 0.36765629053115845, "step": 2108 }, { "epoch": 2.2761596548004315, "grad_norm": 1.4921073913574219, "learning_rate": 1.1814019707044715e-07, "loss": 0.4247042238712311, "step": 2110 }, { "epoch": 2.27831715210356, "grad_norm": 1.55894935131073, "learning_rate": 1.1775687655715948e-07, "loss": 0.4907058775424957, "step": 2112 }, { "epoch": 2.2804746494066883, "grad_norm": 1.8256114721298218, "learning_rate": 1.1737444605285757e-07, "loss": 0.46969401836395264, "step": 2114 }, { "epoch": 2.2826321467098167, "grad_norm": 2.421661376953125, "learning_rate": 1.1699290772212944e-07, "loss": 0.5222725868225098, "step": 2116 }, { "epoch": 2.284789644012945, "grad_norm": 1.7159234285354614, "learning_rate": 1.1661226372451344e-07, "loss": 0.38712745904922485, "step": 2118 }, { "epoch": 2.286947141316073, "grad_norm": 1.709659218788147, "learning_rate": 1.1623251621448581e-07, "loss": 0.2972549796104431, "step": 2120 }, { "epoch": 2.2891046386192015, "grad_norm": 5.333364486694336, "learning_rate": 1.1585366734144861e-07, "loss": 0.5384417772293091, "step": 2122 }, { "epoch": 2.29126213592233, "grad_norm": 1.5736010074615479, "learning_rate": 1.154757192497175e-07, "loss": 0.48839452862739563, "step": 2124 }, { "epoch": 2.2934196332254584, "grad_norm": 2.719190835952759, "learning_rate": 1.1509867407850982e-07, "loss": 0.4550670385360718, "step": 2126 }, { "epoch": 2.295577130528587, "grad_norm": 1.8119968175888062, "learning_rate": 1.1472253396193217e-07, "loss": 0.6911446452140808, "step": 2128 }, { "epoch": 2.2977346278317152, "grad_norm": 4.632421970367432, "learning_rate": 1.1434730102896833e-07, "loss": 0.4868852198123932, "step": 2130 }, { "epoch": 2.2998921251348436, "grad_norm": 1.4012049436569214, "learning_rate": 1.1397297740346771e-07, "loss": 0.4392762780189514, "step": 2132 }, { "epoch": 2.302049622437972, "grad_norm": 1.4517533779144287, "learning_rate": 1.1359956520413267e-07, "loss": 0.6284406185150146, "step": 2134 }, { "epoch": 2.3042071197411005, "grad_norm": 23.886926651000977, "learning_rate": 1.1322706654450692e-07, "loss": 0.5837987661361694, "step": 2136 }, { "epoch": 2.3063646170442285, "grad_norm": 2.8387584686279297, "learning_rate": 1.1285548353296335e-07, "loss": 0.49964287877082825, "step": 2138 }, { "epoch": 2.308522114347357, "grad_norm": 1.3726048469543457, "learning_rate": 1.1248481827269252e-07, "loss": 0.4638864994049072, "step": 2140 }, { "epoch": 2.3106796116504853, "grad_norm": 2.2775590419769287, "learning_rate": 1.1211507286168997e-07, "loss": 0.3237634003162384, "step": 2142 }, { "epoch": 2.3128371089536137, "grad_norm": 4.829261779785156, "learning_rate": 1.1174624939274521e-07, "loss": 0.37512320280075073, "step": 2144 }, { "epoch": 2.314994606256742, "grad_norm": 1.2612223625183105, "learning_rate": 1.1137834995342951e-07, "loss": 0.44408831000328064, "step": 2146 }, { "epoch": 2.3171521035598706, "grad_norm": 1.091060757637024, "learning_rate": 1.1101137662608356e-07, "loss": 0.45439931750297546, "step": 2148 }, { "epoch": 2.319309600862999, "grad_norm": 1.9243289232254028, "learning_rate": 1.1064533148780674e-07, "loss": 0.5608944296836853, "step": 2150 }, { "epoch": 2.3214670981661274, "grad_norm": 1.0947431325912476, "learning_rate": 1.1028021661044448e-07, "loss": 0.43637141585350037, "step": 2152 }, { "epoch": 2.323624595469256, "grad_norm": 1.2952344417572021, "learning_rate": 1.0991603406057712e-07, "loss": 0.4882223904132843, "step": 2154 }, { "epoch": 2.325782092772384, "grad_norm": 2.949446201324463, "learning_rate": 1.0955278589950754e-07, "loss": 0.4377874732017517, "step": 2156 }, { "epoch": 2.3279395900755127, "grad_norm": 1.3994691371917725, "learning_rate": 1.0919047418325027e-07, "loss": 0.4923911392688751, "step": 2158 }, { "epoch": 2.3300970873786406, "grad_norm": 4.1141438484191895, "learning_rate": 1.088291009625195e-07, "loss": 0.450039267539978, "step": 2160 }, { "epoch": 2.332254584681769, "grad_norm": 1.905044436454773, "learning_rate": 1.0846866828271706e-07, "loss": 0.3132597506046295, "step": 2162 }, { "epoch": 2.3344120819848975, "grad_norm": 2.2338316440582275, "learning_rate": 1.081091781839217e-07, "loss": 0.47647953033447266, "step": 2164 }, { "epoch": 2.336569579288026, "grad_norm": 1.30208158493042, "learning_rate": 1.0775063270087683e-07, "loss": 0.49173516035079956, "step": 2166 }, { "epoch": 2.3387270765911543, "grad_norm": 1.6397186517715454, "learning_rate": 1.073930338629793e-07, "loss": 0.6098090410232544, "step": 2168 }, { "epoch": 2.3408845738942827, "grad_norm": 1.269619107246399, "learning_rate": 1.0703638369426782e-07, "loss": 0.36568519473075867, "step": 2170 }, { "epoch": 2.343042071197411, "grad_norm": 1.4696781635284424, "learning_rate": 1.0668068421341176e-07, "loss": 0.5783711671829224, "step": 2172 }, { "epoch": 2.3451995685005396, "grad_norm": 7.201903343200684, "learning_rate": 1.0632593743369927e-07, "loss": 0.5010417699813843, "step": 2174 }, { "epoch": 2.347357065803668, "grad_norm": 1.4425990581512451, "learning_rate": 1.0597214536302627e-07, "loss": 0.4950565695762634, "step": 2176 }, { "epoch": 2.349514563106796, "grad_norm": 1.3873226642608643, "learning_rate": 1.0561931000388497e-07, "loss": 0.5758650898933411, "step": 2178 }, { "epoch": 2.3516720604099244, "grad_norm": 2.6027872562408447, "learning_rate": 1.0526743335335244e-07, "loss": 0.43806731700897217, "step": 2180 }, { "epoch": 2.353829557713053, "grad_norm": 1.688138723373413, "learning_rate": 1.0491651740307942e-07, "loss": 0.45398759841918945, "step": 2182 }, { "epoch": 2.355987055016181, "grad_norm": 1.561120867729187, "learning_rate": 1.0456656413927885e-07, "loss": 0.5060495734214783, "step": 2184 }, { "epoch": 2.3581445523193096, "grad_norm": 0.7075545787811279, "learning_rate": 1.0421757554271513e-07, "loss": 0.6581755876541138, "step": 2186 }, { "epoch": 2.360302049622438, "grad_norm": 1.6746031045913696, "learning_rate": 1.0386955358869228e-07, "loss": 0.4242514669895172, "step": 2188 }, { "epoch": 2.3624595469255665, "grad_norm": 1.3905069828033447, "learning_rate": 1.0352250024704305e-07, "loss": 0.5258157849311829, "step": 2190 }, { "epoch": 2.364617044228695, "grad_norm": 1.4214626550674438, "learning_rate": 1.0317641748211797e-07, "loss": 0.4111720025539398, "step": 2192 }, { "epoch": 2.3667745415318233, "grad_norm": 2.1955699920654297, "learning_rate": 1.0283130725277387e-07, "loss": 0.4222407937049866, "step": 2194 }, { "epoch": 2.3689320388349513, "grad_norm": 3.329183578491211, "learning_rate": 1.0248717151236292e-07, "loss": 0.48131048679351807, "step": 2196 }, { "epoch": 2.3710895361380797, "grad_norm": 1.4323745965957642, "learning_rate": 1.0214401220872165e-07, "loss": 0.5055102109909058, "step": 2198 }, { "epoch": 2.373247033441208, "grad_norm": 3.3138020038604736, "learning_rate": 1.0180183128415996e-07, "loss": 0.4181690514087677, "step": 2200 }, { "epoch": 2.3754045307443366, "grad_norm": 1.2691850662231445, "learning_rate": 1.0146063067544994e-07, "loss": 0.5093836188316345, "step": 2202 }, { "epoch": 2.377562028047465, "grad_norm": 1.5404523611068726, "learning_rate": 1.0112041231381497e-07, "loss": 0.5514087677001953, "step": 2204 }, { "epoch": 2.3797195253505934, "grad_norm": 0.5054550170898438, "learning_rate": 1.007811781249191e-07, "loss": 0.5555412173271179, "step": 2206 }, { "epoch": 2.381877022653722, "grad_norm": 1.4161839485168457, "learning_rate": 1.0044293002885543e-07, "loss": 0.40494877099990845, "step": 2208 }, { "epoch": 2.3840345199568502, "grad_norm": 2.184622049331665, "learning_rate": 1.0010566994013612e-07, "loss": 0.4903999865055084, "step": 2210 }, { "epoch": 2.3861920172599786, "grad_norm": 1.4374651908874512, "learning_rate": 9.976939976768092e-08, "loss": 0.3994739353656769, "step": 2212 }, { "epoch": 2.3883495145631066, "grad_norm": 1.230521559715271, "learning_rate": 9.943412141480658e-08, "loss": 0.3945184051990509, "step": 2214 }, { "epoch": 2.390507011866235, "grad_norm": 1.331252932548523, "learning_rate": 9.909983677921607e-08, "loss": 0.3576943278312683, "step": 2216 }, { "epoch": 2.3926645091693635, "grad_norm": 2.683382987976074, "learning_rate": 9.876654775298799e-08, "loss": 0.5146565437316895, "step": 2218 }, { "epoch": 2.394822006472492, "grad_norm": 1.1658935546875, "learning_rate": 9.843425622256546e-08, "loss": 0.4697680175304413, "step": 2220 }, { "epoch": 2.3969795037756203, "grad_norm": 1.2742339372634888, "learning_rate": 9.810296406874583e-08, "loss": 0.5487444400787354, "step": 2222 }, { "epoch": 2.3991370010787487, "grad_norm": 2.137641668319702, "learning_rate": 9.777267316667e-08, "loss": 0.4315913915634155, "step": 2224 }, { "epoch": 2.401294498381877, "grad_norm": 2.66500186920166, "learning_rate": 9.744338538581147e-08, "loss": 0.5236972570419312, "step": 2226 }, { "epoch": 2.4034519956850056, "grad_norm": 1.3068699836730957, "learning_rate": 9.711510258996617e-08, "loss": 0.46810081601142883, "step": 2228 }, { "epoch": 2.405609492988134, "grad_norm": 2.274906635284424, "learning_rate": 9.678782663724156e-08, "loss": 0.3743009567260742, "step": 2230 }, { "epoch": 2.407766990291262, "grad_norm": 7.867446422576904, "learning_rate": 9.646155938004655e-08, "loss": 0.5277361273765564, "step": 2232 }, { "epoch": 2.4099244875943904, "grad_norm": 1.2440294027328491, "learning_rate": 9.613630266508053e-08, "loss": 0.5060732960700989, "step": 2234 }, { "epoch": 2.412081984897519, "grad_norm": 2.0601508617401123, "learning_rate": 9.581205833332316e-08, "loss": 0.422480046749115, "step": 2236 }, { "epoch": 2.414239482200647, "grad_norm": 3.935227632522583, "learning_rate": 9.548882822002405e-08, "loss": 0.5240671634674072, "step": 2238 }, { "epoch": 2.4163969795037756, "grad_norm": 1.6686252355575562, "learning_rate": 9.516661415469216e-08, "loss": 0.5927475690841675, "step": 2240 }, { "epoch": 2.418554476806904, "grad_norm": 1.506426215171814, "learning_rate": 9.484541796108551e-08, "loss": 0.4393031597137451, "step": 2242 }, { "epoch": 2.4207119741100325, "grad_norm": 2.3679986000061035, "learning_rate": 9.45252414572009e-08, "loss": 0.41118109226226807, "step": 2244 }, { "epoch": 2.422869471413161, "grad_norm": 3.8800225257873535, "learning_rate": 9.420608645526373e-08, "loss": 0.5971561670303345, "step": 2246 }, { "epoch": 2.4250269687162893, "grad_norm": 3.2100000381469727, "learning_rate": 9.388795476171742e-08, "loss": 0.4730355739593506, "step": 2248 }, { "epoch": 2.4271844660194173, "grad_norm": 2.2373881340026855, "learning_rate": 9.357084817721342e-08, "loss": 0.5331867337226868, "step": 2250 }, { "epoch": 2.4293419633225457, "grad_norm": 1.286616563796997, "learning_rate": 9.325476849660124e-08, "loss": 0.3063022196292877, "step": 2252 }, { "epoch": 2.431499460625674, "grad_norm": 16.60342788696289, "learning_rate": 9.293971750891755e-08, "loss": 0.44442903995513916, "step": 2254 }, { "epoch": 2.4336569579288025, "grad_norm": 1.4422513246536255, "learning_rate": 9.262569699737699e-08, "loss": 0.45078244805336, "step": 2256 }, { "epoch": 2.435814455231931, "grad_norm": 3.8183093070983887, "learning_rate": 9.231270873936134e-08, "loss": 0.6303662657737732, "step": 2258 }, { "epoch": 2.4379719525350594, "grad_norm": 2.514275550842285, "learning_rate": 9.200075450640982e-08, "loss": 0.41539642214775085, "step": 2260 }, { "epoch": 2.440129449838188, "grad_norm": 15.767145156860352, "learning_rate": 9.16898360642091e-08, "loss": 0.5099453330039978, "step": 2262 }, { "epoch": 2.4422869471413162, "grad_norm": 1.8690749406814575, "learning_rate": 9.137995517258301e-08, "loss": 0.4256049394607544, "step": 2264 }, { "epoch": 2.4444444444444446, "grad_norm": 1.6698436737060547, "learning_rate": 9.107111358548284e-08, "loss": 0.24495507776737213, "step": 2266 }, { "epoch": 2.4466019417475726, "grad_norm": 1.4875117540359497, "learning_rate": 9.076331305097726e-08, "loss": 0.5850554704666138, "step": 2268 }, { "epoch": 2.448759439050701, "grad_norm": 1.4028273820877075, "learning_rate": 9.045655531124265e-08, "loss": 0.6093006134033203, "step": 2270 }, { "epoch": 2.4509169363538295, "grad_norm": 1.2962590456008911, "learning_rate": 9.015084210255303e-08, "loss": 0.47294872999191284, "step": 2272 }, { "epoch": 2.453074433656958, "grad_norm": 1.212159514427185, "learning_rate": 8.984617515527011e-08, "loss": 0.49960917234420776, "step": 2274 }, { "epoch": 2.4552319309600863, "grad_norm": 1.358234167098999, "learning_rate": 8.954255619383396e-08, "loss": 0.5016006231307983, "step": 2276 }, { "epoch": 2.4573894282632147, "grad_norm": 1.423470139503479, "learning_rate": 8.92399869367528e-08, "loss": 0.5450627207756042, "step": 2278 }, { "epoch": 2.459546925566343, "grad_norm": 10.734492301940918, "learning_rate": 8.893846909659339e-08, "loss": 0.5570814609527588, "step": 2280 }, { "epoch": 2.4617044228694716, "grad_norm": 1.709943413734436, "learning_rate": 8.863800437997145e-08, "loss": 0.487891286611557, "step": 2282 }, { "epoch": 2.4638619201726, "grad_norm": 4.394408226013184, "learning_rate": 8.833859448754206e-08, "loss": 0.4305161237716675, "step": 2284 }, { "epoch": 2.466019417475728, "grad_norm": 4.226398468017578, "learning_rate": 8.804024111398971e-08, "loss": 0.529059648513794, "step": 2286 }, { "epoch": 2.4681769147788564, "grad_norm": 1.9100017547607422, "learning_rate": 8.77429459480189e-08, "loss": 0.5420784950256348, "step": 2288 }, { "epoch": 2.470334412081985, "grad_norm": 2.035076141357422, "learning_rate": 8.744671067234483e-08, "loss": 0.3656485080718994, "step": 2290 }, { "epoch": 2.472491909385113, "grad_norm": 0.4504566490650177, "learning_rate": 8.715153696368342e-08, "loss": 0.10097479820251465, "step": 2292 }, { "epoch": 2.4746494066882416, "grad_norm": 1.2601561546325684, "learning_rate": 8.685742649274209e-08, "loss": 0.5880253314971924, "step": 2294 }, { "epoch": 2.47680690399137, "grad_norm": 1.1769603490829468, "learning_rate": 8.656438092421015e-08, "loss": 0.2316816747188568, "step": 2296 }, { "epoch": 2.4789644012944985, "grad_norm": 4.42578125, "learning_rate": 8.627240191674979e-08, "loss": 0.5261933207511902, "step": 2298 }, { "epoch": 2.481121898597627, "grad_norm": 2.054516553878784, "learning_rate": 8.598149112298586e-08, "loss": 0.3512814939022064, "step": 2300 }, { "epoch": 2.4832793959007553, "grad_norm": 1.8083107471466064, "learning_rate": 8.569165018949755e-08, "loss": 0.5700247287750244, "step": 2302 }, { "epoch": 2.4854368932038833, "grad_norm": 0.7789508700370789, "learning_rate": 8.540288075680832e-08, "loss": 0.5419098138809204, "step": 2304 }, { "epoch": 2.4875943905070117, "grad_norm": 1.425304889678955, "learning_rate": 8.511518445937682e-08, "loss": 0.5239717960357666, "step": 2306 }, { "epoch": 2.48975188781014, "grad_norm": 2.2157175540924072, "learning_rate": 8.482856292558771e-08, "loss": 0.5087226629257202, "step": 2308 }, { "epoch": 2.4919093851132685, "grad_norm": 1.6569185256958008, "learning_rate": 8.454301777774237e-08, "loss": 0.5517893433570862, "step": 2310 }, { "epoch": 2.494066882416397, "grad_norm": 1.5045207738876343, "learning_rate": 8.425855063204987e-08, "loss": 0.44327977299690247, "step": 2312 }, { "epoch": 2.4962243797195254, "grad_norm": 3.6006226539611816, "learning_rate": 8.397516309861743e-08, "loss": 0.5266181230545044, "step": 2314 }, { "epoch": 2.498381877022654, "grad_norm": 1.071789264678955, "learning_rate": 8.369285678144197e-08, "loss": 0.3867831826210022, "step": 2316 }, { "epoch": 2.500539374325782, "grad_norm": 1.2131776809692383, "learning_rate": 8.341163327840026e-08, "loss": 0.4348089396953583, "step": 2318 }, { "epoch": 2.5026968716289106, "grad_norm": 1.3953055143356323, "learning_rate": 8.313149418124043e-08, "loss": 0.3074108362197876, "step": 2320 }, { "epoch": 2.5048543689320386, "grad_norm": 1.4362233877182007, "learning_rate": 8.285244107557284e-08, "loss": 0.559751033782959, "step": 2322 }, { "epoch": 2.5070118662351675, "grad_norm": 2.6127679347991943, "learning_rate": 8.257447554086095e-08, "loss": 0.49618762731552124, "step": 2324 }, { "epoch": 2.5091693635382954, "grad_norm": 1.4416208267211914, "learning_rate": 8.229759915041243e-08, "loss": 0.37788355350494385, "step": 2326 }, { "epoch": 2.511326860841424, "grad_norm": 1.3234007358551025, "learning_rate": 8.202181347137041e-08, "loss": 0.529360830783844, "step": 2328 }, { "epoch": 2.5134843581445523, "grad_norm": 1.065699577331543, "learning_rate": 8.174712006470453e-08, "loss": 0.3420860767364502, "step": 2330 }, { "epoch": 2.5156418554476807, "grad_norm": 0.9615293741226196, "learning_rate": 8.147352048520198e-08, "loss": 0.3195402920246124, "step": 2332 }, { "epoch": 2.517799352750809, "grad_norm": 1.6290311813354492, "learning_rate": 8.12010162814588e-08, "loss": 0.41644540429115295, "step": 2334 }, { "epoch": 2.5199568500539375, "grad_norm": 3.109558343887329, "learning_rate": 8.092960899587121e-08, "loss": 0.4512273371219635, "step": 2336 }, { "epoch": 2.522114347357066, "grad_norm": 2.5993058681488037, "learning_rate": 8.065930016462671e-08, "loss": 0.5339113473892212, "step": 2338 }, { "epoch": 2.524271844660194, "grad_norm": 5.516918659210205, "learning_rate": 8.039009131769548e-08, "loss": 0.6151620745658875, "step": 2340 }, { "epoch": 2.526429341963323, "grad_norm": 18.284648895263672, "learning_rate": 8.012198397882164e-08, "loss": 0.5498458743095398, "step": 2342 }, { "epoch": 2.528586839266451, "grad_norm": 5.122011661529541, "learning_rate": 7.98549796655148e-08, "loss": 0.5199579000473022, "step": 2344 }, { "epoch": 2.530744336569579, "grad_norm": 1.5160562992095947, "learning_rate": 7.958907988904126e-08, "loss": 0.6041461825370789, "step": 2346 }, { "epoch": 2.5329018338727076, "grad_norm": 1.5068459510803223, "learning_rate": 7.932428615441553e-08, "loss": 0.593430757522583, "step": 2348 }, { "epoch": 2.535059331175836, "grad_norm": 1.434343934059143, "learning_rate": 7.9060599960392e-08, "loss": 0.31969255208969116, "step": 2350 }, { "epoch": 2.5372168284789645, "grad_norm": 2.9943349361419678, "learning_rate": 7.879802279945609e-08, "loss": 0.5539549589157104, "step": 2352 }, { "epoch": 2.539374325782093, "grad_norm": 1.722773790359497, "learning_rate": 7.85365561578161e-08, "loss": 0.5361152291297913, "step": 2354 }, { "epoch": 2.5415318230852213, "grad_norm": 1.305713176727295, "learning_rate": 7.827620151539466e-08, "loss": 0.4791654050350189, "step": 2356 }, { "epoch": 2.5436893203883493, "grad_norm": 3.0847299098968506, "learning_rate": 7.801696034582053e-08, "loss": 0.5922753810882568, "step": 2358 }, { "epoch": 2.545846817691478, "grad_norm": 2.508219003677368, "learning_rate": 7.77588341164198e-08, "loss": 0.5202733874320984, "step": 2360 }, { "epoch": 2.548004314994606, "grad_norm": 1.4630476236343384, "learning_rate": 7.750182428820827e-08, "loss": 0.5938437581062317, "step": 2362 }, { "epoch": 2.5501618122977345, "grad_norm": 4.091277122497559, "learning_rate": 7.724593231588272e-08, "loss": 0.5015133619308472, "step": 2364 }, { "epoch": 2.552319309600863, "grad_norm": 1.16217839717865, "learning_rate": 7.699115964781254e-08, "loss": 0.27357974648475647, "step": 2366 }, { "epoch": 2.5544768069039914, "grad_norm": 1.87477707862854, "learning_rate": 7.673750772603207e-08, "loss": 0.612422525882721, "step": 2368 }, { "epoch": 2.55663430420712, "grad_norm": 1.7888860702514648, "learning_rate": 7.6484977986232e-08, "loss": 0.7179882526397705, "step": 2370 }, { "epoch": 2.558791801510248, "grad_norm": 0.9722982048988342, "learning_rate": 7.623357185775133e-08, "loss": 0.4070899486541748, "step": 2372 }, { "epoch": 2.5609492988133766, "grad_norm": 1.3785275220870972, "learning_rate": 7.598329076356936e-08, "loss": 0.5034173727035522, "step": 2374 }, { "epoch": 2.5631067961165046, "grad_norm": 1.4615991115570068, "learning_rate": 7.573413612029774e-08, "loss": 0.42231953144073486, "step": 2376 }, { "epoch": 2.5652642934196335, "grad_norm": 1.8301200866699219, "learning_rate": 7.548610933817214e-08, "loss": 0.4184509217739105, "step": 2378 }, { "epoch": 2.5674217907227614, "grad_norm": 2.232179880142212, "learning_rate": 7.523921182104446e-08, "loss": 0.4791230261325836, "step": 2380 }, { "epoch": 2.56957928802589, "grad_norm": 1.1662960052490234, "learning_rate": 7.499344496637498e-08, "loss": 0.45605581998825073, "step": 2382 }, { "epoch": 2.5717367853290183, "grad_norm": 1.7335278987884521, "learning_rate": 7.474881016522429e-08, "loss": 0.4822132885456085, "step": 2384 }, { "epoch": 2.5738942826321467, "grad_norm": 1.393849492073059, "learning_rate": 7.45053088022454e-08, "loss": 0.61832594871521, "step": 2386 }, { "epoch": 2.576051779935275, "grad_norm": 2.2385711669921875, "learning_rate": 7.426294225567596e-08, "loss": 0.41741040349006653, "step": 2388 }, { "epoch": 2.5782092772384035, "grad_norm": 1.4406133890151978, "learning_rate": 7.40217118973306e-08, "loss": 0.4191496670246124, "step": 2390 }, { "epoch": 2.580366774541532, "grad_norm": 1.7564983367919922, "learning_rate": 7.378161909259297e-08, "loss": 0.5521677732467651, "step": 2392 }, { "epoch": 2.58252427184466, "grad_norm": 1.317151665687561, "learning_rate": 7.354266520040793e-08, "loss": 0.5924421548843384, "step": 2394 }, { "epoch": 2.584681769147789, "grad_norm": 7.4354753494262695, "learning_rate": 7.330485157327426e-08, "loss": 0.46872678399086, "step": 2396 }, { "epoch": 2.5868392664509168, "grad_norm": 1.6784964799880981, "learning_rate": 7.306817955723654e-08, "loss": 0.4340111017227173, "step": 2398 }, { "epoch": 2.588996763754045, "grad_norm": 2.780867099761963, "learning_rate": 7.283265049187784e-08, "loss": 0.35171282291412354, "step": 2400 }, { "epoch": 2.5911542610571736, "grad_norm": 1.664075493812561, "learning_rate": 7.259826571031191e-08, "loss": 0.39083340764045715, "step": 2402 }, { "epoch": 2.593311758360302, "grad_norm": 3.530792713165283, "learning_rate": 7.236502653917599e-08, "loss": 0.4641299247741699, "step": 2404 }, { "epoch": 2.5954692556634305, "grad_norm": 1.017684817314148, "learning_rate": 7.213293429862288e-08, "loss": 0.3411005437374115, "step": 2406 }, { "epoch": 2.597626752966559, "grad_norm": 3.9479050636291504, "learning_rate": 7.190199030231364e-08, "loss": 0.5616810321807861, "step": 2408 }, { "epoch": 2.5997842502696873, "grad_norm": 5.205540180206299, "learning_rate": 7.167219585741041e-08, "loss": 0.5188603401184082, "step": 2410 }, { "epoch": 2.6019417475728153, "grad_norm": 1.752669334411621, "learning_rate": 7.144355226456839e-08, "loss": 0.622796893119812, "step": 2412 }, { "epoch": 2.604099244875944, "grad_norm": 1.7586170434951782, "learning_rate": 7.121606081792928e-08, "loss": 0.4979010820388794, "step": 2414 }, { "epoch": 2.606256742179072, "grad_norm": 1.7928980588912964, "learning_rate": 7.098972280511323e-08, "loss": 0.40664538741111755, "step": 2416 }, { "epoch": 2.6084142394822005, "grad_norm": 1.9738396406173706, "learning_rate": 7.076453950721202e-08, "loss": 0.5753185153007507, "step": 2418 }, { "epoch": 2.610571736785329, "grad_norm": 1.1539170742034912, "learning_rate": 7.054051219878153e-08, "loss": 0.47662532329559326, "step": 2420 }, { "epoch": 2.6127292340884574, "grad_norm": 2.355470895767212, "learning_rate": 7.031764214783478e-08, "loss": 0.4526709318161011, "step": 2422 }, { "epoch": 2.614886731391586, "grad_norm": 1.3842222690582275, "learning_rate": 7.009593061583462e-08, "loss": 0.4917500615119934, "step": 2424 }, { "epoch": 2.617044228694714, "grad_norm": 7.645388603210449, "learning_rate": 6.987537885768635e-08, "loss": 0.504601240158081, "step": 2426 }, { "epoch": 2.6192017259978426, "grad_norm": 1.4394519329071045, "learning_rate": 6.965598812173118e-08, "loss": 0.6155430674552917, "step": 2428 }, { "epoch": 2.6213592233009706, "grad_norm": 1.2602229118347168, "learning_rate": 6.943775964973861e-08, "loss": 0.5159276723861694, "step": 2430 }, { "epoch": 2.6235167206040995, "grad_norm": 1.7222973108291626, "learning_rate": 6.922069467689969e-08, "loss": 0.46511101722717285, "step": 2432 }, { "epoch": 2.6256742179072274, "grad_norm": 3.4029550552368164, "learning_rate": 6.900479443182e-08, "loss": 0.5705016851425171, "step": 2434 }, { "epoch": 2.627831715210356, "grad_norm": 3.3381807804107666, "learning_rate": 6.879006013651269e-08, "loss": 0.588231086730957, "step": 2436 }, { "epoch": 2.6299892125134843, "grad_norm": 1.722324013710022, "learning_rate": 6.857649300639145e-08, "loss": 0.4552815556526184, "step": 2438 }, { "epoch": 2.6321467098166127, "grad_norm": 2.675380229949951, "learning_rate": 6.836409425026375e-08, "loss": 0.3620685040950775, "step": 2440 }, { "epoch": 2.634304207119741, "grad_norm": 4.263212203979492, "learning_rate": 6.815286507032405e-08, "loss": 0.33681440353393555, "step": 2442 }, { "epoch": 2.6364617044228695, "grad_norm": 3.89007830619812, "learning_rate": 6.794280666214682e-08, "loss": 0.4459841251373291, "step": 2444 }, { "epoch": 2.638619201725998, "grad_norm": 3.6217668056488037, "learning_rate": 6.773392021467987e-08, "loss": 0.5162920951843262, "step": 2446 }, { "epoch": 2.6407766990291264, "grad_norm": 3.093386173248291, "learning_rate": 6.752620691023762e-08, "loss": 0.25055232644081116, "step": 2448 }, { "epoch": 2.642934196332255, "grad_norm": 3.092965602874756, "learning_rate": 6.731966792449451e-08, "loss": 0.6372309923171997, "step": 2450 }, { "epoch": 2.6450916936353828, "grad_norm": 1.5319708585739136, "learning_rate": 6.711430442647809e-08, "loss": 0.4929147958755493, "step": 2452 }, { "epoch": 2.647249190938511, "grad_norm": 13.12871265411377, "learning_rate": 6.691011757856258e-08, "loss": 0.434012770652771, "step": 2454 }, { "epoch": 2.6494066882416396, "grad_norm": 1.5877397060394287, "learning_rate": 6.670710853646239e-08, "loss": 0.43648290634155273, "step": 2456 }, { "epoch": 2.651564185544768, "grad_norm": 1.0320699214935303, "learning_rate": 6.650527844922533e-08, "loss": 0.4268641471862793, "step": 2458 }, { "epoch": 2.6537216828478964, "grad_norm": 3.019049644470215, "learning_rate": 6.630462845922622e-08, "loss": 0.6072458624839783, "step": 2460 }, { "epoch": 2.655879180151025, "grad_norm": 1.9484155178070068, "learning_rate": 6.610515970216046e-08, "loss": 0.42939677834510803, "step": 2462 }, { "epoch": 2.6580366774541533, "grad_norm": 1.258055567741394, "learning_rate": 6.59068733070377e-08, "loss": 0.45139870047569275, "step": 2464 }, { "epoch": 2.6601941747572817, "grad_norm": 0.20732815563678741, "learning_rate": 6.570977039617512e-08, "loss": 0.19261834025382996, "step": 2466 }, { "epoch": 2.66235167206041, "grad_norm": 1.5855305194854736, "learning_rate": 6.551385208519136e-08, "loss": 0.609540581703186, "step": 2468 }, { "epoch": 2.664509169363538, "grad_norm": 1.3604331016540527, "learning_rate": 6.531911948300026e-08, "loss": 0.4661960303783417, "step": 2470 }, { "epoch": 2.6666666666666665, "grad_norm": 1.1402088403701782, "learning_rate": 6.512557369180416e-08, "loss": 0.3893601894378662, "step": 2472 }, { "epoch": 2.668824163969795, "grad_norm": 1.1722886562347412, "learning_rate": 6.493321580708825e-08, "loss": 0.50113445520401, "step": 2474 }, { "epoch": 2.6709816612729234, "grad_norm": 1.2451552152633667, "learning_rate": 6.474204691761392e-08, "loss": 0.5579499006271362, "step": 2476 }, { "epoch": 2.6731391585760518, "grad_norm": 1.9142377376556396, "learning_rate": 6.455206810541275e-08, "loss": 0.5365015864372253, "step": 2478 }, { "epoch": 2.67529665587918, "grad_norm": 4.749199867248535, "learning_rate": 6.436328044578045e-08, "loss": 0.5498421788215637, "step": 2480 }, { "epoch": 2.6774541531823086, "grad_norm": 4.736466884613037, "learning_rate": 6.417568500727065e-08, "loss": 0.474033921957016, "step": 2482 }, { "epoch": 2.679611650485437, "grad_norm": 1.416872262954712, "learning_rate": 6.398928285168894e-08, "loss": 0.5008449554443359, "step": 2484 }, { "epoch": 2.6817691477885655, "grad_norm": 1.2803456783294678, "learning_rate": 6.380407503408675e-08, "loss": 0.4675408601760864, "step": 2486 }, { "epoch": 2.6839266450916934, "grad_norm": 2.346578598022461, "learning_rate": 6.362006260275566e-08, "loss": 0.48824068903923035, "step": 2488 }, { "epoch": 2.686084142394822, "grad_norm": 1.5030266046524048, "learning_rate": 6.343724659922105e-08, "loss": 0.4942224323749542, "step": 2490 }, { "epoch": 2.6882416396979503, "grad_norm": 4.458725929260254, "learning_rate": 6.325562805823647e-08, "loss": 0.5143862962722778, "step": 2492 }, { "epoch": 2.6903991370010787, "grad_norm": 0.25812989473342896, "learning_rate": 6.307520800777791e-08, "loss": 0.06615746021270752, "step": 2494 }, { "epoch": 2.692556634304207, "grad_norm": 1.5842936038970947, "learning_rate": 6.289598746903753e-08, "loss": 0.488372266292572, "step": 2496 }, { "epoch": 2.6947141316073355, "grad_norm": 2.356571674346924, "learning_rate": 6.271796745641836e-08, "loss": 0.33276641368865967, "step": 2498 }, { "epoch": 2.696871628910464, "grad_norm": 1.8246636390686035, "learning_rate": 6.254114897752822e-08, "loss": 0.534456193447113, "step": 2500 }, { "epoch": 2.6990291262135924, "grad_norm": 1.4518077373504639, "learning_rate": 6.23655330331743e-08, "loss": 0.48372286558151245, "step": 2502 }, { "epoch": 2.701186623516721, "grad_norm": 1.9352519512176514, "learning_rate": 6.21911206173572e-08, "loss": 0.44714611768722534, "step": 2504 }, { "epoch": 2.7033441208198488, "grad_norm": 63.32822036743164, "learning_rate": 6.20179127172655e-08, "loss": 0.49458324909210205, "step": 2506 }, { "epoch": 2.705501618122977, "grad_norm": 1.128757357597351, "learning_rate": 6.184591031327023e-08, "loss": 0.53676438331604, "step": 2508 }, { "epoch": 2.7076591154261056, "grad_norm": 1.42232346534729, "learning_rate": 6.1675114378919e-08, "loss": 0.6496074199676514, "step": 2510 }, { "epoch": 2.709816612729234, "grad_norm": 1.3712254762649536, "learning_rate": 6.150552588093088e-08, "loss": 0.30613094568252563, "step": 2512 }, { "epoch": 2.7119741100323624, "grad_norm": 1.141538381576538, "learning_rate": 6.133714577919062e-08, "loss": 0.6155597567558289, "step": 2514 }, { "epoch": 2.714131607335491, "grad_norm": 1.5372565984725952, "learning_rate": 6.116997502674356e-08, "loss": 0.5866535305976868, "step": 2516 }, { "epoch": 2.7162891046386193, "grad_norm": 1.712314248085022, "learning_rate": 6.100401456978973e-08, "loss": 0.6070207357406616, "step": 2518 }, { "epoch": 2.7184466019417477, "grad_norm": 1.3143550157546997, "learning_rate": 6.0839265347679e-08, "loss": 0.46849966049194336, "step": 2520 }, { "epoch": 2.720604099244876, "grad_norm": 1.805611491203308, "learning_rate": 6.06757282929055e-08, "loss": 0.44359534978866577, "step": 2522 }, { "epoch": 2.722761596548004, "grad_norm": 3.4996039867401123, "learning_rate": 6.051340433110235e-08, "loss": 0.4810839295387268, "step": 2524 }, { "epoch": 2.724919093851133, "grad_norm": 0.6923230886459351, "learning_rate": 6.035229438103654e-08, "loss": 0.47448840737342834, "step": 2526 }, { "epoch": 2.727076591154261, "grad_norm": 1.4109259843826294, "learning_rate": 6.019239935460361e-08, "loss": 0.4482736885547638, "step": 2528 }, { "epoch": 2.7292340884573894, "grad_norm": 2.148198127746582, "learning_rate": 6.003372015682248e-08, "loss": 0.47598910331726074, "step": 2530 }, { "epoch": 2.7313915857605178, "grad_norm": 2.2834556102752686, "learning_rate": 5.987625768583047e-08, "loss": 0.5227712392807007, "step": 2532 }, { "epoch": 2.733549083063646, "grad_norm": 3.4445295333862305, "learning_rate": 5.972001283287814e-08, "loss": 0.4431000053882599, "step": 2534 }, { "epoch": 2.7357065803667746, "grad_norm": 2.449721097946167, "learning_rate": 5.956498648232411e-08, "loss": 0.4020468294620514, "step": 2536 }, { "epoch": 2.737864077669903, "grad_norm": 6.9917073249816895, "learning_rate": 5.9411179511630237e-08, "loss": 0.3725473880767822, "step": 2538 }, { "epoch": 2.7400215749730314, "grad_norm": 2.097342014312744, "learning_rate": 5.9258592791356675e-08, "loss": 0.47959214448928833, "step": 2540 }, { "epoch": 2.7421790722761594, "grad_norm": 1.6001266241073608, "learning_rate": 5.910722718515675e-08, "loss": 0.49233609437942505, "step": 2542 }, { "epoch": 2.7443365695792883, "grad_norm": 2.2737677097320557, "learning_rate": 5.8957083549772227e-08, "loss": 0.5828397870063782, "step": 2544 }, { "epoch": 2.7464940668824163, "grad_norm": 4.293592929840088, "learning_rate": 5.880816273502835e-08, "loss": 0.40149906277656555, "step": 2546 }, { "epoch": 2.7486515641855447, "grad_norm": 2.3264200687408447, "learning_rate": 5.866046558382924e-08, "loss": 0.5630208849906921, "step": 2548 }, { "epoch": 2.750809061488673, "grad_norm": 0.47289416193962097, "learning_rate": 5.851399293215284e-08, "loss": 0.3701988160610199, "step": 2550 }, { "epoch": 2.7529665587918015, "grad_norm": 4.011696815490723, "learning_rate": 5.8368745609046394e-08, "loss": 0.4746440351009369, "step": 2552 }, { "epoch": 2.75512405609493, "grad_norm": 4.506484508514404, "learning_rate": 5.8224724436621686e-08, "loss": 0.41828322410583496, "step": 2554 }, { "epoch": 2.7572815533980584, "grad_norm": 1.0973607301712036, "learning_rate": 5.808193023005037e-08, "loss": 0.35553479194641113, "step": 2556 }, { "epoch": 2.759439050701187, "grad_norm": 3.0977184772491455, "learning_rate": 5.7940363797559355e-08, "loss": 0.6049969792366028, "step": 2558 }, { "epoch": 2.7615965480043148, "grad_norm": 3.566246271133423, "learning_rate": 5.780002594042628e-08, "loss": 0.4752573072910309, "step": 2560 }, { "epoch": 2.7637540453074436, "grad_norm": 3.1380441188812256, "learning_rate": 5.766091745297499e-08, "loss": 0.42298072576522827, "step": 2562 }, { "epoch": 2.7659115426105716, "grad_norm": 1.4781795740127563, "learning_rate": 5.752303912257083e-08, "loss": 0.27772021293640137, "step": 2564 }, { "epoch": 2.7680690399137, "grad_norm": 0.4035155773162842, "learning_rate": 5.738639172961655e-08, "loss": 0.2534405291080475, "step": 2566 }, { "epoch": 2.7702265372168284, "grad_norm": 2.6446008682250977, "learning_rate": 5.725097604754762e-08, "loss": 0.445311039686203, "step": 2568 }, { "epoch": 2.772384034519957, "grad_norm": 0.6513259410858154, "learning_rate": 5.7116792842827847e-08, "loss": 0.3810059428215027, "step": 2570 }, { "epoch": 2.7745415318230853, "grad_norm": 1.0772795677185059, "learning_rate": 5.698384287494524e-08, "loss": 0.4859530031681061, "step": 2572 }, { "epoch": 2.7766990291262137, "grad_norm": 1.3191403150558472, "learning_rate": 5.68521268964075e-08, "loss": 0.3801627457141876, "step": 2574 }, { "epoch": 2.778856526429342, "grad_norm": 1.1567744016647339, "learning_rate": 5.672164565273794e-08, "loss": 0.43105101585388184, "step": 2576 }, { "epoch": 2.78101402373247, "grad_norm": 1.525492548942566, "learning_rate": 5.6592399882471005e-08, "loss": 0.46906399726867676, "step": 2578 }, { "epoch": 2.783171521035599, "grad_norm": 1.5278234481811523, "learning_rate": 5.646439031714843e-08, "loss": 0.44850075244903564, "step": 2580 }, { "epoch": 2.785329018338727, "grad_norm": 1.656800627708435, "learning_rate": 5.633761768131492e-08, "loss": 0.4555439352989197, "step": 2582 }, { "epoch": 2.7874865156418553, "grad_norm": 5.497950077056885, "learning_rate": 5.6212082692513836e-08, "loss": 0.5264440774917603, "step": 2584 }, { "epoch": 2.7896440129449838, "grad_norm": 3.069579601287842, "learning_rate": 5.608778606128367e-08, "loss": 0.4567970633506775, "step": 2586 }, { "epoch": 2.791801510248112, "grad_norm": 0.7134677767753601, "learning_rate": 5.59647284911535e-08, "loss": 0.5471997261047363, "step": 2588 }, { "epoch": 2.7939590075512406, "grad_norm": 1.767069935798645, "learning_rate": 5.5842910678639274e-08, "loss": 0.5194593667984009, "step": 2590 }, { "epoch": 2.796116504854369, "grad_norm": 1.2429695129394531, "learning_rate": 5.5722333313239796e-08, "loss": 0.472802996635437, "step": 2592 }, { "epoch": 2.7982740021574974, "grad_norm": 3.2857539653778076, "learning_rate": 5.5602997077432874e-08, "loss": 0.6141800880432129, "step": 2594 }, { "epoch": 2.8004314994606254, "grad_norm": 1.292257308959961, "learning_rate": 5.548490264667141e-08, "loss": 0.4846678376197815, "step": 2596 }, { "epoch": 2.8025889967637543, "grad_norm": 22.469823837280273, "learning_rate": 5.536805068937954e-08, "loss": 0.5878589749336243, "step": 2598 }, { "epoch": 2.8047464940668823, "grad_norm": 3.061379909515381, "learning_rate": 5.525244186694894e-08, "loss": 0.4901062548160553, "step": 2600 }, { "epoch": 2.8069039913700107, "grad_norm": 2.615751266479492, "learning_rate": 5.5138076833735084e-08, "loss": 0.40489572286605835, "step": 2602 }, { "epoch": 2.809061488673139, "grad_norm": 1.320258378982544, "learning_rate": 5.5024956237053384e-08, "loss": 0.5788986682891846, "step": 2604 }, { "epoch": 2.8112189859762675, "grad_norm": 1.3249293565750122, "learning_rate": 5.491308071717573e-08, "loss": 0.42938145995140076, "step": 2606 }, { "epoch": 2.813376483279396, "grad_norm": 1.0026346445083618, "learning_rate": 5.480245090732673e-08, "loss": 0.495646595954895, "step": 2608 }, { "epoch": 2.8155339805825244, "grad_norm": 1.6520612239837646, "learning_rate": 5.469306743368023e-08, "loss": 0.4816511273384094, "step": 2610 }, { "epoch": 2.8176914778856528, "grad_norm": 1.3639336824417114, "learning_rate": 5.458493091535563e-08, "loss": 0.3476675748825073, "step": 2612 }, { "epoch": 2.8198489751887807, "grad_norm": 1.3383265733718872, "learning_rate": 5.447804196441453e-08, "loss": 0.5728883147239685, "step": 2614 }, { "epoch": 2.8220064724919096, "grad_norm": 3.793182134628296, "learning_rate": 5.4372401185857145e-08, "loss": 0.6043237447738647, "step": 2616 }, { "epoch": 2.8241639697950376, "grad_norm": 2.1428301334381104, "learning_rate": 5.426800917761897e-08, "loss": 0.529897928237915, "step": 2618 }, { "epoch": 2.826321467098166, "grad_norm": 2.7142257690429688, "learning_rate": 5.41648665305673e-08, "loss": 0.5976702570915222, "step": 2620 }, { "epoch": 2.8284789644012944, "grad_norm": 1.4826109409332275, "learning_rate": 5.406297382849803e-08, "loss": 0.4717695116996765, "step": 2622 }, { "epoch": 2.830636461704423, "grad_norm": 1.6335054636001587, "learning_rate": 5.396233164813221e-08, "loss": 0.48008373379707336, "step": 2624 }, { "epoch": 2.8327939590075513, "grad_norm": 1.709679126739502, "learning_rate": 5.3862940559112795e-08, "loss": 0.5768192410469055, "step": 2626 }, { "epoch": 2.8349514563106797, "grad_norm": 3.3123061656951904, "learning_rate": 5.376480112400159e-08, "loss": 0.5282171368598938, "step": 2628 }, { "epoch": 2.837108953613808, "grad_norm": 6.133842945098877, "learning_rate": 5.366791389827578e-08, "loss": 0.47790658473968506, "step": 2630 }, { "epoch": 2.839266450916936, "grad_norm": 2.2591352462768555, "learning_rate": 5.3572279430325055e-08, "loss": 0.5901204347610474, "step": 2632 }, { "epoch": 2.841423948220065, "grad_norm": 1.7429542541503906, "learning_rate": 5.3477898261448344e-08, "loss": 0.40829578042030334, "step": 2634 }, { "epoch": 2.843581445523193, "grad_norm": 1.7901886701583862, "learning_rate": 5.3384770925850796e-08, "loss": 0.6178877353668213, "step": 2636 }, { "epoch": 2.8457389428263213, "grad_norm": 2.5778701305389404, "learning_rate": 5.3292897950640776e-08, "loss": 0.5174447298049927, "step": 2638 }, { "epoch": 2.8478964401294498, "grad_norm": 1.1254876852035522, "learning_rate": 5.3202279855826885e-08, "loss": 0.48666954040527344, "step": 2640 }, { "epoch": 2.850053937432578, "grad_norm": 1.12764310836792, "learning_rate": 5.311291715431497e-08, "loss": 0.5326154828071594, "step": 2642 }, { "epoch": 2.8522114347357066, "grad_norm": 4.193994522094727, "learning_rate": 5.3024810351905257e-08, "loss": 0.552856981754303, "step": 2644 }, { "epoch": 2.854368932038835, "grad_norm": 2.1312756538391113, "learning_rate": 5.2937959947289485e-08, "loss": 0.31122079491615295, "step": 2646 }, { "epoch": 2.8565264293419634, "grad_norm": 2.0096354484558105, "learning_rate": 5.2852366432048054e-08, "loss": 0.4695837199687958, "step": 2648 }, { "epoch": 2.858683926645092, "grad_norm": 2.734739065170288, "learning_rate": 5.2768030290647315e-08, "loss": 0.4716711640357971, "step": 2650 }, { "epoch": 2.8608414239482203, "grad_norm": 2.2249631881713867, "learning_rate": 5.26849520004367e-08, "loss": 0.44928935170173645, "step": 2652 }, { "epoch": 2.8629989212513482, "grad_norm": 4.771669387817383, "learning_rate": 5.260313203164621e-08, "loss": 0.49516862630844116, "step": 2654 }, { "epoch": 2.8651564185544767, "grad_norm": 1.4668776988983154, "learning_rate": 5.252257084738355e-08, "loss": 0.4240492582321167, "step": 2656 }, { "epoch": 2.867313915857605, "grad_norm": 1.4637136459350586, "learning_rate": 5.244326890363166e-08, "loss": 0.4604833126068115, "step": 2658 }, { "epoch": 2.8694714131607335, "grad_norm": 2.386207342147827, "learning_rate": 5.2365226649246e-08, "loss": 0.5221148133277893, "step": 2660 }, { "epoch": 2.871628910463862, "grad_norm": 1.389047622680664, "learning_rate": 5.2288444525952225e-08, "loss": 0.5388311147689819, "step": 2662 }, { "epoch": 2.8737864077669903, "grad_norm": 1.2806384563446045, "learning_rate": 5.221292296834336e-08, "loss": 0.4201410114765167, "step": 2664 }, { "epoch": 2.8759439050701188, "grad_norm": 2.330186605453491, "learning_rate": 5.213866240387767e-08, "loss": 0.48175758123397827, "step": 2666 }, { "epoch": 2.878101402373247, "grad_norm": 1.2908588647842407, "learning_rate": 5.206566325287606e-08, "loss": 0.5429530739784241, "step": 2668 }, { "epoch": 2.8802588996763756, "grad_norm": 1.2484322786331177, "learning_rate": 5.199392592851967e-08, "loss": 0.3116611838340759, "step": 2670 }, { "epoch": 2.8824163969795036, "grad_norm": 1.2387123107910156, "learning_rate": 5.192345083684766e-08, "loss": 0.5519980192184448, "step": 2672 }, { "epoch": 2.884573894282632, "grad_norm": 1.2122451066970825, "learning_rate": 5.1854238376754894e-08, "loss": 0.4367588758468628, "step": 2674 }, { "epoch": 2.8867313915857604, "grad_norm": 1.0126200914382935, "learning_rate": 5.178628893998947e-08, "loss": 0.5291083455085754, "step": 2676 }, { "epoch": 2.888888888888889, "grad_norm": 17.17489242553711, "learning_rate": 5.171960291115085e-08, "loss": 0.440005362033844, "step": 2678 }, { "epoch": 2.8910463861920173, "grad_norm": 2.179588794708252, "learning_rate": 5.165418066768743e-08, "loss": 0.33802393078804016, "step": 2680 }, { "epoch": 2.8932038834951457, "grad_norm": 1.7356486320495605, "learning_rate": 5.1590022579894453e-08, "loss": 0.6102227568626404, "step": 2682 }, { "epoch": 2.895361380798274, "grad_norm": 1.4570553302764893, "learning_rate": 5.152712901091197e-08, "loss": 0.519218921661377, "step": 2684 }, { "epoch": 2.8975188781014025, "grad_norm": 1.47493577003479, "learning_rate": 5.146550031672273e-08, "loss": 0.5683881640434265, "step": 2686 }, { "epoch": 2.899676375404531, "grad_norm": 2.640059471130371, "learning_rate": 5.1405136846150246e-08, "loss": 0.3549501597881317, "step": 2688 }, { "epoch": 2.901833872707659, "grad_norm": 3.362346887588501, "learning_rate": 5.1346038940856663e-08, "loss": 0.4709499180316925, "step": 2690 }, { "epoch": 2.9039913700107873, "grad_norm": 2.4766998291015625, "learning_rate": 5.1288206935341004e-08, "loss": 0.43772682547569275, "step": 2692 }, { "epoch": 2.9061488673139158, "grad_norm": 12.43819808959961, "learning_rate": 5.123164115693719e-08, "loss": 0.5715151429176331, "step": 2694 }, { "epoch": 2.908306364617044, "grad_norm": 1.2701572179794312, "learning_rate": 5.11763419258121e-08, "loss": 0.4489721655845642, "step": 2696 }, { "epoch": 2.9104638619201726, "grad_norm": 2.798393964767456, "learning_rate": 5.112230955496399e-08, "loss": 0.483008474111557, "step": 2698 }, { "epoch": 2.912621359223301, "grad_norm": 2.360163688659668, "learning_rate": 5.106954435022051e-08, "loss": 0.23574459552764893, "step": 2700 }, { "epoch": 2.9147788565264294, "grad_norm": 1.3117761611938477, "learning_rate": 5.1018046610236994e-08, "loss": 0.4940152168273926, "step": 2702 }, { "epoch": 2.916936353829558, "grad_norm": 4.25691556930542, "learning_rate": 5.0967816626494914e-08, "loss": 0.6037485003471375, "step": 2704 }, { "epoch": 2.9190938511326863, "grad_norm": 1.7997087240219116, "learning_rate": 5.09188546833001e-08, "loss": 0.5916652083396912, "step": 2706 }, { "epoch": 2.9212513484358142, "grad_norm": 1.2903374433517456, "learning_rate": 5.0871161057781174e-08, "loss": 0.5085786581039429, "step": 2708 }, { "epoch": 2.9234088457389427, "grad_norm": 2.052659511566162, "learning_rate": 5.0824736019887965e-08, "loss": 0.4842919409275055, "step": 2710 }, { "epoch": 2.925566343042071, "grad_norm": 1.6598625183105469, "learning_rate": 5.077957983239001e-08, "loss": 0.485705703496933, "step": 2712 }, { "epoch": 2.9277238403451995, "grad_norm": 1.2494041919708252, "learning_rate": 5.0735692750875014e-08, "loss": 0.5640828609466553, "step": 2714 }, { "epoch": 2.929881337648328, "grad_norm": 2.9620018005371094, "learning_rate": 5.0693075023747485e-08, "loss": 0.4791678786277771, "step": 2716 }, { "epoch": 2.9320388349514563, "grad_norm": 3.0161349773406982, "learning_rate": 5.0651726892227225e-08, "loss": 0.5130857229232788, "step": 2718 }, { "epoch": 2.9341963322545848, "grad_norm": 3.2139105796813965, "learning_rate": 5.061164859034808e-08, "loss": 0.5509821176528931, "step": 2720 }, { "epoch": 2.936353829557713, "grad_norm": 2.8673787117004395, "learning_rate": 5.057284034495652e-08, "loss": 0.4344579875469208, "step": 2722 }, { "epoch": 2.9385113268608416, "grad_norm": 2.557595729827881, "learning_rate": 5.05353023757104e-08, "loss": 0.4874427616596222, "step": 2724 }, { "epoch": 2.9406688241639696, "grad_norm": 1.2036933898925781, "learning_rate": 5.04990348950777e-08, "loss": 0.3965007960796356, "step": 2726 }, { "epoch": 2.9428263214670984, "grad_norm": 3.1260766983032227, "learning_rate": 5.0464038108335355e-08, "loss": 0.5202281475067139, "step": 2728 }, { "epoch": 2.9449838187702264, "grad_norm": 10.786460876464844, "learning_rate": 5.043031221356804e-08, "loss": 0.37282276153564453, "step": 2730 }, { "epoch": 2.947141316073355, "grad_norm": 1.7192671298980713, "learning_rate": 5.039785740166707e-08, "loss": 0.40285778045654297, "step": 2732 }, { "epoch": 2.9492988133764833, "grad_norm": 1.8213419914245605, "learning_rate": 5.036667385632939e-08, "loss": 0.49603918194770813, "step": 2734 }, { "epoch": 2.9514563106796117, "grad_norm": 1.3751518726348877, "learning_rate": 5.0336761754056387e-08, "loss": 0.4726869463920593, "step": 2736 }, { "epoch": 2.95361380798274, "grad_norm": 2.531486749649048, "learning_rate": 5.030812126415301e-08, "loss": 0.4985443949699402, "step": 2738 }, { "epoch": 2.9557713052858685, "grad_norm": 1.1253113746643066, "learning_rate": 5.028075254872682e-08, "loss": 0.43464401364326477, "step": 2740 }, { "epoch": 2.957928802588997, "grad_norm": 1.2070587873458862, "learning_rate": 5.025465576268697e-08, "loss": 0.4627860486507416, "step": 2742 }, { "epoch": 2.960086299892125, "grad_norm": 6.266397953033447, "learning_rate": 5.0229831053743396e-08, "loss": 0.4669394791126251, "step": 2744 }, { "epoch": 2.9622437971952538, "grad_norm": 1.160976767539978, "learning_rate": 5.020627856240602e-08, "loss": 0.5476797819137573, "step": 2746 }, { "epoch": 2.9644012944983817, "grad_norm": 1.2844939231872559, "learning_rate": 5.018399842198384e-08, "loss": 0.4904819130897522, "step": 2748 }, { "epoch": 2.96655879180151, "grad_norm": 2.1023972034454346, "learning_rate": 5.016299075858434e-08, "loss": 0.5619233250617981, "step": 2750 }, { "epoch": 2.9687162891046386, "grad_norm": 2.740511417388916, "learning_rate": 5.0143255691112545e-08, "loss": 0.5617838501930237, "step": 2752 }, { "epoch": 2.970873786407767, "grad_norm": 1.1180198192596436, "learning_rate": 5.012479333127061e-08, "loss": 0.6266708374023438, "step": 2754 }, { "epoch": 2.9730312837108954, "grad_norm": 2.0200552940368652, "learning_rate": 5.0107603783556983e-08, "loss": 0.2789224684238434, "step": 2756 }, { "epoch": 2.975188781014024, "grad_norm": 1.1091794967651367, "learning_rate": 5.009168714526591e-08, "loss": 0.5262956023216248, "step": 2758 }, { "epoch": 2.9773462783171523, "grad_norm": 1.1029998064041138, "learning_rate": 5.0077043506486894e-08, "loss": 0.1211983859539032, "step": 2760 }, { "epoch": 2.9795037756202802, "grad_norm": 1.243391990661621, "learning_rate": 5.006367295010413e-08, "loss": 0.36533698439598083, "step": 2762 }, { "epoch": 2.981661272923409, "grad_norm": 1.189220666885376, "learning_rate": 5.005157555179603e-08, "loss": 0.6093316078186035, "step": 2764 }, { "epoch": 2.983818770226537, "grad_norm": 1.3239524364471436, "learning_rate": 5.0040751380034905e-08, "loss": 0.29459065198898315, "step": 2766 }, { "epoch": 2.9859762675296655, "grad_norm": 3.5429818630218506, "learning_rate": 5.0031200496086436e-08, "loss": 0.5054223537445068, "step": 2768 }, { "epoch": 2.988133764832794, "grad_norm": 3.4252212047576904, "learning_rate": 5.0022922954009416e-08, "loss": 0.34435006976127625, "step": 2770 }, { "epoch": 2.9902912621359223, "grad_norm": 2.453650951385498, "learning_rate": 5.001591880065541e-08, "loss": 0.5675299167633057, "step": 2772 }, { "epoch": 2.9924487594390508, "grad_norm": 1.6163811683654785, "learning_rate": 5.001018807566848e-08, "loss": 0.5318145751953125, "step": 2774 }, { "epoch": 2.994606256742179, "grad_norm": 4.034750461578369, "learning_rate": 5.000573081148502e-08, "loss": 0.5025465488433838, "step": 2776 }, { "epoch": 2.9967637540453076, "grad_norm": 1.6249115467071533, "learning_rate": 5.0002547033333525e-08, "loss": 0.235714852809906, "step": 2778 }, { "epoch": 2.9989212513484356, "grad_norm": 2.0912704467773438, "learning_rate": 5.000063675923442e-08, "loss": 0.2616070806980133, "step": 2780 }, { "epoch": 3.0, "step": 2781, "total_flos": 3.284111394515778e+18, "train_loss": 0.5546778752215686, "train_runtime": 35021.8231, "train_samples_per_second": 1.271, "train_steps_per_second": 0.079 } ], "logging_steps": 2, "max_steps": 2781, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 99999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.284111394515778e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }