{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.670758292249388, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.70758292249388e-05, "grad_norm": 0.2088892071184168, "learning_rate": 2e-05, "loss": 5.5031, "step": 1 }, { "epoch": 0.0001341516584498776, "grad_norm": 0.17321255452342477, "learning_rate": 2e-05, "loss": 5.4741, "step": 2 }, { "epoch": 0.00020122748767481639, "grad_norm": 0.17705717953835415, "learning_rate": 2e-05, "loss": 5.5811, "step": 3 }, { "epoch": 0.0002683033168997552, "grad_norm": 0.17260535045397474, "learning_rate": 2e-05, "loss": 5.4813, "step": 4 }, { "epoch": 0.00033537914612469396, "grad_norm": 0.19166054132312318, "learning_rate": 2e-05, "loss": 5.7439, "step": 5 }, { "epoch": 0.00040245497534963277, "grad_norm": 0.17062805312294974, "learning_rate": 2e-05, "loss": 5.4534, "step": 6 }, { "epoch": 0.00046953080457457153, "grad_norm": 0.1441720310500711, "learning_rate": 2e-05, "loss": 5.6432, "step": 7 }, { "epoch": 0.0005366066337995104, "grad_norm": 0.18782153299505971, "learning_rate": 2e-05, "loss": 5.6218, "step": 8 }, { "epoch": 0.0006036824630244491, "grad_norm": 0.16853907715806088, "learning_rate": 2e-05, "loss": 5.6835, "step": 9 }, { "epoch": 0.0006707582922493879, "grad_norm": 0.1473332819625821, "learning_rate": 2e-05, "loss": 5.6875, "step": 10 }, { "epoch": 0.0007378341214743267, "grad_norm": 0.16391970465739825, "learning_rate": 2e-05, "loss": 5.4691, "step": 11 }, { "epoch": 0.0008049099506992655, "grad_norm": 0.16220219477867096, "learning_rate": 2e-05, "loss": 5.5406, "step": 12 }, { "epoch": 0.0008719857799242044, "grad_norm": 0.1453858067106413, "learning_rate": 2e-05, "loss": 5.5102, "step": 13 }, { "epoch": 0.0009390616091491431, "grad_norm": 0.14084321490627466, "learning_rate": 2e-05, "loss": 5.5568, "step": 14 }, { "epoch": 0.0010061374383740819, "grad_norm": 0.14000646727236543, "learning_rate": 2e-05, "loss": 5.5108, "step": 15 }, { "epoch": 0.0010732132675990208, "grad_norm": 0.15977868406169066, "learning_rate": 2e-05, "loss": 5.5685, "step": 16 }, { "epoch": 0.0011402890968239595, "grad_norm": 0.13950272788338686, "learning_rate": 2e-05, "loss": 5.4368, "step": 17 }, { "epoch": 0.0012073649260488982, "grad_norm": 0.1423850995627629, "learning_rate": 2e-05, "loss": 5.5593, "step": 18 }, { "epoch": 0.0012744407552738371, "grad_norm": 0.14591573013494366, "learning_rate": 2e-05, "loss": 5.5718, "step": 19 }, { "epoch": 0.0013415165844987758, "grad_norm": 0.13782840770835594, "learning_rate": 2e-05, "loss": 5.5443, "step": 20 }, { "epoch": 0.0014085924137237148, "grad_norm": 0.13978648384610254, "learning_rate": 2e-05, "loss": 5.408, "step": 21 }, { "epoch": 0.0014756682429486535, "grad_norm": 0.13512627725813864, "learning_rate": 2e-05, "loss": 5.5443, "step": 22 }, { "epoch": 0.0015427440721735922, "grad_norm": 0.1312106460504487, "learning_rate": 2e-05, "loss": 5.5331, "step": 23 }, { "epoch": 0.001609819901398531, "grad_norm": 0.14056683946857726, "learning_rate": 2e-05, "loss": 5.5272, "step": 24 }, { "epoch": 0.0016768957306234698, "grad_norm": 0.13231311902534845, "learning_rate": 2e-05, "loss": 5.5305, "step": 25 }, { "epoch": 0.0017439715598484087, "grad_norm": 0.13784679736013092, "learning_rate": 2e-05, "loss": 5.5801, "step": 26 }, { "epoch": 0.0018110473890733474, "grad_norm": 0.14221597170047004, "learning_rate": 2e-05, "loss": 5.4854, "step": 27 }, { "epoch": 0.0018781232182982861, "grad_norm": 0.13640226046082501, "learning_rate": 2e-05, "loss": 5.6401, "step": 28 }, { "epoch": 0.001945199047523225, "grad_norm": 0.13092587702477357, "learning_rate": 2e-05, "loss": 5.5403, "step": 29 }, { "epoch": 0.0020122748767481637, "grad_norm": 0.13344689085780878, "learning_rate": 2e-05, "loss": 5.6236, "step": 30 }, { "epoch": 0.0020793507059731027, "grad_norm": 0.1335214434649408, "learning_rate": 2e-05, "loss": 5.5047, "step": 31 }, { "epoch": 0.0021464265351980416, "grad_norm": 0.1328396399147632, "learning_rate": 2e-05, "loss": 5.5275, "step": 32 }, { "epoch": 0.00221350236442298, "grad_norm": 0.13535467521879146, "learning_rate": 2e-05, "loss": 5.4542, "step": 33 }, { "epoch": 0.002280578193647919, "grad_norm": 0.13414149168866382, "learning_rate": 2e-05, "loss": 5.33, "step": 34 }, { "epoch": 0.002347654022872858, "grad_norm": 0.13780529129927843, "learning_rate": 2e-05, "loss": 5.4567, "step": 35 }, { "epoch": 0.0024147298520977964, "grad_norm": 0.13604219553292732, "learning_rate": 2e-05, "loss": 5.5357, "step": 36 }, { "epoch": 0.0024818056813227353, "grad_norm": 0.1318214035103109, "learning_rate": 2e-05, "loss": 5.3572, "step": 37 }, { "epoch": 0.0025488815105476743, "grad_norm": 0.1351630493431948, "learning_rate": 2e-05, "loss": 5.5322, "step": 38 }, { "epoch": 0.0026159573397726127, "grad_norm": 0.1284795748342403, "learning_rate": 2e-05, "loss": 5.6209, "step": 39 }, { "epoch": 0.0026830331689975517, "grad_norm": 0.13753129488003146, "learning_rate": 2e-05, "loss": 5.5629, "step": 40 }, { "epoch": 0.0027501089982224906, "grad_norm": 0.1336923415931152, "learning_rate": 2e-05, "loss": 5.698, "step": 41 }, { "epoch": 0.0028171848274474295, "grad_norm": 0.1353122557488069, "learning_rate": 2e-05, "loss": 5.5382, "step": 42 }, { "epoch": 0.002884260656672368, "grad_norm": 0.13239176265106772, "learning_rate": 2e-05, "loss": 5.58, "step": 43 }, { "epoch": 0.002951336485897307, "grad_norm": 0.12996921152033858, "learning_rate": 2e-05, "loss": 5.3952, "step": 44 }, { "epoch": 0.003018412315122246, "grad_norm": 0.13451266328355627, "learning_rate": 2e-05, "loss": 5.5147, "step": 45 }, { "epoch": 0.0030854881443471843, "grad_norm": 0.12953006544419737, "learning_rate": 2e-05, "loss": 5.5425, "step": 46 }, { "epoch": 0.0031525639735721232, "grad_norm": 0.13190036666236674, "learning_rate": 2e-05, "loss": 5.4969, "step": 47 }, { "epoch": 0.003219639802797062, "grad_norm": 0.12580813137406593, "learning_rate": 2e-05, "loss": 5.5555, "step": 48 }, { "epoch": 0.0032867156320220007, "grad_norm": 0.134240834895453, "learning_rate": 2e-05, "loss": 5.5609, "step": 49 }, { "epoch": 0.0033537914612469396, "grad_norm": 0.13509934480058702, "learning_rate": 2e-05, "loss": 5.4461, "step": 50 }, { "epoch": 0.0034208672904718785, "grad_norm": 0.12911760934863037, "learning_rate": 2e-05, "loss": 5.372, "step": 51 }, { "epoch": 0.0034879431196968174, "grad_norm": 0.1289304855467342, "learning_rate": 2e-05, "loss": 5.5146, "step": 52 }, { "epoch": 0.003555018948921756, "grad_norm": 0.13558938400867776, "learning_rate": 2e-05, "loss": 5.6586, "step": 53 }, { "epoch": 0.003622094778146695, "grad_norm": 0.13951889305413906, "learning_rate": 2e-05, "loss": 5.5152, "step": 54 }, { "epoch": 0.0036891706073716338, "grad_norm": 0.1310176371439046, "learning_rate": 2e-05, "loss": 5.4154, "step": 55 }, { "epoch": 0.0037562464365965722, "grad_norm": 0.13468151741173084, "learning_rate": 2e-05, "loss": 5.4743, "step": 56 }, { "epoch": 0.003823322265821511, "grad_norm": 0.13657775759268048, "learning_rate": 2e-05, "loss": 5.4105, "step": 57 }, { "epoch": 0.00389039809504645, "grad_norm": 0.1377174064931782, "learning_rate": 2e-05, "loss": 5.5572, "step": 58 }, { "epoch": 0.003957473924271389, "grad_norm": 0.13134450915923954, "learning_rate": 2e-05, "loss": 5.4309, "step": 59 }, { "epoch": 0.0040245497534963275, "grad_norm": 0.13550158007127727, "learning_rate": 2e-05, "loss": 5.3783, "step": 60 }, { "epoch": 0.004091625582721266, "grad_norm": 0.14353127480952524, "learning_rate": 2e-05, "loss": 5.5321, "step": 61 }, { "epoch": 0.004158701411946205, "grad_norm": 0.13339324787974144, "learning_rate": 2e-05, "loss": 5.4805, "step": 62 }, { "epoch": 0.004225777241171144, "grad_norm": 0.1325785575514609, "learning_rate": 2e-05, "loss": 5.5095, "step": 63 }, { "epoch": 0.004292853070396083, "grad_norm": 0.13439259042034174, "learning_rate": 2e-05, "loss": 5.4382, "step": 64 }, { "epoch": 0.004359928899621022, "grad_norm": 0.13099643230056837, "learning_rate": 2e-05, "loss": 5.5033, "step": 65 }, { "epoch": 0.00442700472884596, "grad_norm": 0.1372781932082803, "learning_rate": 2e-05, "loss": 5.5851, "step": 66 }, { "epoch": 0.0044940805580708995, "grad_norm": 0.13618270342101546, "learning_rate": 2e-05, "loss": 5.6073, "step": 67 }, { "epoch": 0.004561156387295838, "grad_norm": 0.1281776153813227, "learning_rate": 2e-05, "loss": 5.5436, "step": 68 }, { "epoch": 0.0046282322165207765, "grad_norm": 0.13014390480646368, "learning_rate": 2e-05, "loss": 5.5412, "step": 69 }, { "epoch": 0.004695308045745716, "grad_norm": 0.13117541464138138, "learning_rate": 2e-05, "loss": 5.4486, "step": 70 }, { "epoch": 0.004762383874970654, "grad_norm": 0.13532574317570706, "learning_rate": 2e-05, "loss": 5.4481, "step": 71 }, { "epoch": 0.004829459704195593, "grad_norm": 0.13728600500531574, "learning_rate": 2e-05, "loss": 5.5811, "step": 72 }, { "epoch": 0.004896535533420532, "grad_norm": 0.13883210697177115, "learning_rate": 2e-05, "loss": 5.5089, "step": 73 }, { "epoch": 0.004963611362645471, "grad_norm": 0.13866279536440218, "learning_rate": 2e-05, "loss": 5.5829, "step": 74 }, { "epoch": 0.005030687191870409, "grad_norm": 0.13919952789506831, "learning_rate": 2e-05, "loss": 5.4849, "step": 75 }, { "epoch": 0.0050977630210953485, "grad_norm": 0.13470403306264467, "learning_rate": 2e-05, "loss": 5.5243, "step": 76 }, { "epoch": 0.005164838850320287, "grad_norm": 0.13656360198853124, "learning_rate": 2e-05, "loss": 5.4004, "step": 77 }, { "epoch": 0.0052319146795452255, "grad_norm": 0.13414689167296273, "learning_rate": 2e-05, "loss": 5.5352, "step": 78 }, { "epoch": 0.005298990508770165, "grad_norm": 0.13548879642087264, "learning_rate": 2e-05, "loss": 5.3694, "step": 79 }, { "epoch": 0.005366066337995103, "grad_norm": 0.13470819997700828, "learning_rate": 2e-05, "loss": 5.5881, "step": 80 }, { "epoch": 0.005433142167220043, "grad_norm": 0.13946108633411297, "learning_rate": 2e-05, "loss": 5.5732, "step": 81 }, { "epoch": 0.005500217996444981, "grad_norm": 0.14148673158288796, "learning_rate": 2e-05, "loss": 5.4718, "step": 82 }, { "epoch": 0.00556729382566992, "grad_norm": 0.13123561751639057, "learning_rate": 2e-05, "loss": 5.4608, "step": 83 }, { "epoch": 0.005634369654894859, "grad_norm": 0.14453362096375322, "learning_rate": 2e-05, "loss": 5.5689, "step": 84 }, { "epoch": 0.0057014454841197975, "grad_norm": 0.1390134333564256, "learning_rate": 2e-05, "loss": 5.5596, "step": 85 }, { "epoch": 0.005768521313344736, "grad_norm": 0.13086907602934444, "learning_rate": 2e-05, "loss": 5.3965, "step": 86 }, { "epoch": 0.005835597142569675, "grad_norm": 0.1376140994825975, "learning_rate": 2e-05, "loss": 5.5133, "step": 87 }, { "epoch": 0.005902672971794614, "grad_norm": 0.13940797532680824, "learning_rate": 2e-05, "loss": 5.5844, "step": 88 }, { "epoch": 0.005969748801019552, "grad_norm": 0.1380753874103704, "learning_rate": 2e-05, "loss": 5.4489, "step": 89 }, { "epoch": 0.006036824630244492, "grad_norm": 0.13230094784586252, "learning_rate": 2e-05, "loss": 5.4703, "step": 90 }, { "epoch": 0.00610390045946943, "grad_norm": 0.14069642600745544, "learning_rate": 2e-05, "loss": 5.4724, "step": 91 }, { "epoch": 0.006170976288694369, "grad_norm": 0.14169739074173623, "learning_rate": 2e-05, "loss": 5.4373, "step": 92 }, { "epoch": 0.006238052117919308, "grad_norm": 0.1367517162249026, "learning_rate": 2e-05, "loss": 5.4307, "step": 93 }, { "epoch": 0.0063051279471442465, "grad_norm": 0.13627396736285932, "learning_rate": 2e-05, "loss": 5.4903, "step": 94 }, { "epoch": 0.006372203776369185, "grad_norm": 0.13699244564898574, "learning_rate": 2e-05, "loss": 5.6385, "step": 95 }, { "epoch": 0.006439279605594124, "grad_norm": 0.13593832219752122, "learning_rate": 2e-05, "loss": 5.5313, "step": 96 }, { "epoch": 0.006506355434819063, "grad_norm": 0.13857135414659413, "learning_rate": 2e-05, "loss": 5.6319, "step": 97 }, { "epoch": 0.006573431264044001, "grad_norm": 0.13710855531989966, "learning_rate": 2e-05, "loss": 5.4892, "step": 98 }, { "epoch": 0.006640507093268941, "grad_norm": 0.13379942072765424, "learning_rate": 2e-05, "loss": 5.5565, "step": 99 }, { "epoch": 0.006707582922493879, "grad_norm": 0.13095575454186265, "learning_rate": 2e-05, "loss": 5.5431, "step": 100 }, { "epoch": 0.0067746587517188185, "grad_norm": 0.12974376269591142, "learning_rate": 2e-05, "loss": 5.5068, "step": 101 }, { "epoch": 0.006841734580943757, "grad_norm": 0.13412128703491133, "learning_rate": 2e-05, "loss": 5.5711, "step": 102 }, { "epoch": 0.0069088104101686955, "grad_norm": 0.13271313772769053, "learning_rate": 2e-05, "loss": 5.634, "step": 103 }, { "epoch": 0.006975886239393635, "grad_norm": 0.13641168267393963, "learning_rate": 2e-05, "loss": 5.5622, "step": 104 }, { "epoch": 0.007042962068618573, "grad_norm": 0.1364249978035869, "learning_rate": 2e-05, "loss": 5.5103, "step": 105 }, { "epoch": 0.007110037897843512, "grad_norm": 0.13342358315955516, "learning_rate": 2e-05, "loss": 5.5035, "step": 106 }, { "epoch": 0.007177113727068451, "grad_norm": 0.14070835799735137, "learning_rate": 2e-05, "loss": 5.5564, "step": 107 }, { "epoch": 0.00724418955629339, "grad_norm": 0.13328238411806956, "learning_rate": 2e-05, "loss": 5.4184, "step": 108 }, { "epoch": 0.007311265385518328, "grad_norm": 0.13665788614083904, "learning_rate": 2e-05, "loss": 5.4472, "step": 109 }, { "epoch": 0.0073783412147432675, "grad_norm": 0.13844396321247493, "learning_rate": 2e-05, "loss": 5.6078, "step": 110 }, { "epoch": 0.007445417043968206, "grad_norm": 0.13675685111602764, "learning_rate": 2e-05, "loss": 5.531, "step": 111 }, { "epoch": 0.0075124928731931445, "grad_norm": 0.1403964012168049, "learning_rate": 2e-05, "loss": 5.3716, "step": 112 }, { "epoch": 0.007579568702418084, "grad_norm": 0.13406503259528899, "learning_rate": 2e-05, "loss": 5.4193, "step": 113 }, { "epoch": 0.007646644531643022, "grad_norm": 0.14028550665961523, "learning_rate": 2e-05, "loss": 5.4634, "step": 114 }, { "epoch": 0.007713720360867961, "grad_norm": 0.13288525597029105, "learning_rate": 2e-05, "loss": 5.5415, "step": 115 }, { "epoch": 0.0077807961900929, "grad_norm": 0.13356413724016333, "learning_rate": 2e-05, "loss": 5.5839, "step": 116 }, { "epoch": 0.00784787201931784, "grad_norm": 0.13313072816239066, "learning_rate": 2e-05, "loss": 5.535, "step": 117 }, { "epoch": 0.007914947848542778, "grad_norm": 0.14387497257630388, "learning_rate": 2e-05, "loss": 5.4184, "step": 118 }, { "epoch": 0.007982023677767716, "grad_norm": 0.14451243030966593, "learning_rate": 2e-05, "loss": 5.501, "step": 119 }, { "epoch": 0.008049099506992655, "grad_norm": 0.14232739108862444, "learning_rate": 2e-05, "loss": 5.5989, "step": 120 }, { "epoch": 0.008116175336217593, "grad_norm": 0.13421891292311736, "learning_rate": 2e-05, "loss": 5.4069, "step": 121 }, { "epoch": 0.008183251165442532, "grad_norm": 0.13500935263732758, "learning_rate": 2e-05, "loss": 5.5524, "step": 122 }, { "epoch": 0.008250326994667472, "grad_norm": 0.14099518588784138, "learning_rate": 2e-05, "loss": 5.4164, "step": 123 }, { "epoch": 0.00831740282389241, "grad_norm": 0.15075974471607823, "learning_rate": 2e-05, "loss": 5.486, "step": 124 }, { "epoch": 0.00838447865311735, "grad_norm": 0.13783896093144649, "learning_rate": 2e-05, "loss": 5.5206, "step": 125 }, { "epoch": 0.008451554482342288, "grad_norm": 0.1344609185715249, "learning_rate": 2e-05, "loss": 5.4635, "step": 126 }, { "epoch": 0.008518630311567226, "grad_norm": 0.138208337990743, "learning_rate": 2e-05, "loss": 5.4815, "step": 127 }, { "epoch": 0.008585706140792166, "grad_norm": 0.1393046417378179, "learning_rate": 2e-05, "loss": 5.4908, "step": 128 }, { "epoch": 0.008652781970017105, "grad_norm": 0.1373988230937368, "learning_rate": 2e-05, "loss": 5.5015, "step": 129 }, { "epoch": 0.008719857799242043, "grad_norm": 0.13285684151509292, "learning_rate": 2e-05, "loss": 5.5537, "step": 130 }, { "epoch": 0.008786933628466982, "grad_norm": 0.13873899198440434, "learning_rate": 2e-05, "loss": 5.4701, "step": 131 }, { "epoch": 0.00885400945769192, "grad_norm": 0.14461458297305468, "learning_rate": 2e-05, "loss": 5.5739, "step": 132 }, { "epoch": 0.008921085286916859, "grad_norm": 0.1405940558904193, "learning_rate": 2e-05, "loss": 5.4074, "step": 133 }, { "epoch": 0.008988161116141799, "grad_norm": 0.14286201262384682, "learning_rate": 2e-05, "loss": 5.5183, "step": 134 }, { "epoch": 0.009055236945366738, "grad_norm": 0.13628491120224992, "learning_rate": 2e-05, "loss": 5.5413, "step": 135 }, { "epoch": 0.009122312774591676, "grad_norm": 0.13920410242894715, "learning_rate": 2e-05, "loss": 5.5742, "step": 136 }, { "epoch": 0.009189388603816614, "grad_norm": 0.15057892765641118, "learning_rate": 2e-05, "loss": 5.5209, "step": 137 }, { "epoch": 0.009256464433041553, "grad_norm": 0.13250022431579417, "learning_rate": 2e-05, "loss": 5.4923, "step": 138 }, { "epoch": 0.009323540262266491, "grad_norm": 0.13147301419695562, "learning_rate": 2e-05, "loss": 5.4712, "step": 139 }, { "epoch": 0.009390616091491432, "grad_norm": 0.1412971415975928, "learning_rate": 2e-05, "loss": 5.5326, "step": 140 }, { "epoch": 0.00945769192071637, "grad_norm": 0.13276962627372726, "learning_rate": 2e-05, "loss": 5.4725, "step": 141 }, { "epoch": 0.009524767749941309, "grad_norm": 0.13525965020820085, "learning_rate": 2e-05, "loss": 5.5783, "step": 142 }, { "epoch": 0.009591843579166247, "grad_norm": 0.13470426157166554, "learning_rate": 2e-05, "loss": 5.4414, "step": 143 }, { "epoch": 0.009658919408391186, "grad_norm": 0.1387203005348155, "learning_rate": 2e-05, "loss": 5.4097, "step": 144 }, { "epoch": 0.009725995237616126, "grad_norm": 0.13336201227369365, "learning_rate": 2e-05, "loss": 5.5642, "step": 145 }, { "epoch": 0.009793071066841064, "grad_norm": 0.13909228255806302, "learning_rate": 2e-05, "loss": 5.4909, "step": 146 }, { "epoch": 0.009860146896066003, "grad_norm": 0.13257100160349114, "learning_rate": 2e-05, "loss": 5.5663, "step": 147 }, { "epoch": 0.009927222725290941, "grad_norm": 0.12991537279833248, "learning_rate": 2e-05, "loss": 5.4793, "step": 148 }, { "epoch": 0.00999429855451588, "grad_norm": 0.13389375820706778, "learning_rate": 2e-05, "loss": 5.545, "step": 149 }, { "epoch": 0.010061374383740818, "grad_norm": 0.13125567956322495, "learning_rate": 2e-05, "loss": 5.597, "step": 150 }, { "epoch": 0.010128450212965759, "grad_norm": 0.13741817412973506, "learning_rate": 2e-05, "loss": 5.5423, "step": 151 }, { "epoch": 0.010195526042190697, "grad_norm": 0.13251126955987788, "learning_rate": 2e-05, "loss": 5.6468, "step": 152 }, { "epoch": 0.010262601871415635, "grad_norm": 0.13831460050034583, "learning_rate": 2e-05, "loss": 5.6094, "step": 153 }, { "epoch": 0.010329677700640574, "grad_norm": 0.13343651134367654, "learning_rate": 2e-05, "loss": 5.5306, "step": 154 }, { "epoch": 0.010396753529865512, "grad_norm": 0.13831155697627867, "learning_rate": 2e-05, "loss": 5.541, "step": 155 }, { "epoch": 0.010463829359090451, "grad_norm": 0.14066572771824454, "learning_rate": 2e-05, "loss": 5.4612, "step": 156 }, { "epoch": 0.010530905188315391, "grad_norm": 0.13792681398040846, "learning_rate": 2e-05, "loss": 5.6223, "step": 157 }, { "epoch": 0.01059798101754033, "grad_norm": 0.13170243667857998, "learning_rate": 2e-05, "loss": 5.4, "step": 158 }, { "epoch": 0.010665056846765268, "grad_norm": 0.1380771037852864, "learning_rate": 2e-05, "loss": 5.5203, "step": 159 }, { "epoch": 0.010732132675990207, "grad_norm": 0.1433553427121612, "learning_rate": 2e-05, "loss": 5.5152, "step": 160 }, { "epoch": 0.010799208505215145, "grad_norm": 0.13479448057617996, "learning_rate": 2e-05, "loss": 5.5257, "step": 161 }, { "epoch": 0.010866284334440085, "grad_norm": 0.13159147735932109, "learning_rate": 2e-05, "loss": 5.522, "step": 162 }, { "epoch": 0.010933360163665024, "grad_norm": 0.1369316227280448, "learning_rate": 2e-05, "loss": 5.5891, "step": 163 }, { "epoch": 0.011000435992889962, "grad_norm": 0.14076615488304237, "learning_rate": 2e-05, "loss": 5.5571, "step": 164 }, { "epoch": 0.0110675118221149, "grad_norm": 0.13283102030874372, "learning_rate": 2e-05, "loss": 5.529, "step": 165 }, { "epoch": 0.01113458765133984, "grad_norm": 0.13325588559324944, "learning_rate": 2e-05, "loss": 5.3261, "step": 166 }, { "epoch": 0.011201663480564778, "grad_norm": 0.1299736453291738, "learning_rate": 2e-05, "loss": 5.6576, "step": 167 }, { "epoch": 0.011268739309789718, "grad_norm": 0.13118665201796945, "learning_rate": 2e-05, "loss": 5.3818, "step": 168 }, { "epoch": 0.011335815139014657, "grad_norm": 0.13653431212504638, "learning_rate": 2e-05, "loss": 5.5639, "step": 169 }, { "epoch": 0.011402890968239595, "grad_norm": 0.13356055178008214, "learning_rate": 2e-05, "loss": 5.4809, "step": 170 }, { "epoch": 0.011469966797464533, "grad_norm": 0.1354015940184796, "learning_rate": 2e-05, "loss": 5.6393, "step": 171 }, { "epoch": 0.011537042626689472, "grad_norm": 0.13384596507250812, "learning_rate": 2e-05, "loss": 5.4493, "step": 172 }, { "epoch": 0.01160411845591441, "grad_norm": 0.1335560443666149, "learning_rate": 2e-05, "loss": 5.5138, "step": 173 }, { "epoch": 0.01167119428513935, "grad_norm": 0.1353461241003476, "learning_rate": 2e-05, "loss": 5.3689, "step": 174 }, { "epoch": 0.01173827011436429, "grad_norm": 0.13310993121743173, "learning_rate": 2e-05, "loss": 5.4317, "step": 175 }, { "epoch": 0.011805345943589228, "grad_norm": 0.14181164663352963, "learning_rate": 2e-05, "loss": 5.4778, "step": 176 }, { "epoch": 0.011872421772814166, "grad_norm": 0.13123002552600915, "learning_rate": 2e-05, "loss": 5.5789, "step": 177 }, { "epoch": 0.011939497602039105, "grad_norm": 0.1348766702462207, "learning_rate": 2e-05, "loss": 5.4131, "step": 178 }, { "epoch": 0.012006573431264045, "grad_norm": 0.13218692198518966, "learning_rate": 2e-05, "loss": 5.5337, "step": 179 }, { "epoch": 0.012073649260488983, "grad_norm": 0.13463891318118337, "learning_rate": 2e-05, "loss": 5.6098, "step": 180 }, { "epoch": 0.012140725089713922, "grad_norm": 0.13349456709801627, "learning_rate": 2e-05, "loss": 5.4946, "step": 181 }, { "epoch": 0.01220780091893886, "grad_norm": 0.13394579668557335, "learning_rate": 2e-05, "loss": 5.4568, "step": 182 }, { "epoch": 0.012274876748163799, "grad_norm": 0.15453286629635815, "learning_rate": 2e-05, "loss": 5.5621, "step": 183 }, { "epoch": 0.012341952577388737, "grad_norm": 0.13779559853420217, "learning_rate": 2e-05, "loss": 5.4652, "step": 184 }, { "epoch": 0.012409028406613678, "grad_norm": 0.13886472144296014, "learning_rate": 2e-05, "loss": 5.5631, "step": 185 }, { "epoch": 0.012476104235838616, "grad_norm": 0.1496746172659623, "learning_rate": 2e-05, "loss": 5.4174, "step": 186 }, { "epoch": 0.012543180065063554, "grad_norm": 0.14208723005689966, "learning_rate": 2e-05, "loss": 5.6048, "step": 187 }, { "epoch": 0.012610255894288493, "grad_norm": 0.13166535250834968, "learning_rate": 2e-05, "loss": 5.5068, "step": 188 }, { "epoch": 0.012677331723513431, "grad_norm": 0.12913640357768716, "learning_rate": 2e-05, "loss": 5.4392, "step": 189 }, { "epoch": 0.01274440755273837, "grad_norm": 0.13757135673412796, "learning_rate": 2e-05, "loss": 5.4461, "step": 190 }, { "epoch": 0.01281148338196331, "grad_norm": 0.13655506597832912, "learning_rate": 2e-05, "loss": 5.4447, "step": 191 }, { "epoch": 0.012878559211188249, "grad_norm": 0.1375066054180872, "learning_rate": 2e-05, "loss": 5.5012, "step": 192 }, { "epoch": 0.012945635040413187, "grad_norm": 0.15610482037412207, "learning_rate": 2e-05, "loss": 5.521, "step": 193 }, { "epoch": 0.013012710869638126, "grad_norm": 0.1367635037421754, "learning_rate": 2e-05, "loss": 5.399, "step": 194 }, { "epoch": 0.013079786698863064, "grad_norm": 0.1369003034390947, "learning_rate": 2e-05, "loss": 5.5249, "step": 195 }, { "epoch": 0.013146862528088003, "grad_norm": 0.1380794970424235, "learning_rate": 2e-05, "loss": 5.4218, "step": 196 }, { "epoch": 0.013213938357312943, "grad_norm": 0.13373877250050198, "learning_rate": 2e-05, "loss": 5.4796, "step": 197 }, { "epoch": 0.013281014186537881, "grad_norm": 0.14906325760735176, "learning_rate": 2e-05, "loss": 5.5528, "step": 198 }, { "epoch": 0.01334809001576282, "grad_norm": 0.15025471798102816, "learning_rate": 2e-05, "loss": 5.5543, "step": 199 }, { "epoch": 0.013415165844987758, "grad_norm": 0.14253606046227107, "learning_rate": 2e-05, "loss": 5.5075, "step": 200 }, { "epoch": 0.013482241674212697, "grad_norm": 0.13288747521549985, "learning_rate": 2e-05, "loss": 5.5899, "step": 201 }, { "epoch": 0.013549317503437637, "grad_norm": 0.13694648081151692, "learning_rate": 2e-05, "loss": 5.5104, "step": 202 }, { "epoch": 0.013616393332662576, "grad_norm": 0.13940060362621973, "learning_rate": 2e-05, "loss": 5.4867, "step": 203 }, { "epoch": 0.013683469161887514, "grad_norm": 0.1551342233966156, "learning_rate": 2e-05, "loss": 5.4671, "step": 204 }, { "epoch": 0.013750544991112452, "grad_norm": 0.13798361523391936, "learning_rate": 2e-05, "loss": 5.5551, "step": 205 }, { "epoch": 0.013817620820337391, "grad_norm": 0.14112620854433694, "learning_rate": 2e-05, "loss": 5.5151, "step": 206 }, { "epoch": 0.01388469664956233, "grad_norm": 0.14368054643929828, "learning_rate": 2e-05, "loss": 5.5385, "step": 207 }, { "epoch": 0.01395177247878727, "grad_norm": 0.13492428125481046, "learning_rate": 2e-05, "loss": 5.4413, "step": 208 }, { "epoch": 0.014018848308012208, "grad_norm": 0.1359437016514597, "learning_rate": 2e-05, "loss": 5.4821, "step": 209 }, { "epoch": 0.014085924137237147, "grad_norm": 0.1422595482933864, "learning_rate": 2e-05, "loss": 5.6255, "step": 210 }, { "epoch": 0.014152999966462085, "grad_norm": 0.13721004058447053, "learning_rate": 2e-05, "loss": 5.4843, "step": 211 }, { "epoch": 0.014220075795687024, "grad_norm": 0.1395545772069129, "learning_rate": 2e-05, "loss": 5.5288, "step": 212 }, { "epoch": 0.014287151624911962, "grad_norm": 0.12836343210391182, "learning_rate": 2e-05, "loss": 5.544, "step": 213 }, { "epoch": 0.014354227454136902, "grad_norm": 0.15185543409605295, "learning_rate": 2e-05, "loss": 5.5624, "step": 214 }, { "epoch": 0.01442130328336184, "grad_norm": 0.13534643795646575, "learning_rate": 2e-05, "loss": 5.5096, "step": 215 }, { "epoch": 0.01448837911258678, "grad_norm": 0.13470870363624546, "learning_rate": 2e-05, "loss": 5.627, "step": 216 }, { "epoch": 0.014555454941811718, "grad_norm": 0.1360087244048781, "learning_rate": 2e-05, "loss": 5.5024, "step": 217 }, { "epoch": 0.014622530771036656, "grad_norm": 0.14024312273251197, "learning_rate": 2e-05, "loss": 5.5826, "step": 218 }, { "epoch": 0.014689606600261597, "grad_norm": 0.1376420471114841, "learning_rate": 2e-05, "loss": 5.5262, "step": 219 }, { "epoch": 0.014756682429486535, "grad_norm": 0.139656095792711, "learning_rate": 2e-05, "loss": 5.407, "step": 220 }, { "epoch": 0.014823758258711473, "grad_norm": 0.1349880464633375, "learning_rate": 2e-05, "loss": 5.5401, "step": 221 }, { "epoch": 0.014890834087936412, "grad_norm": 0.1435660108693425, "learning_rate": 2e-05, "loss": 5.5442, "step": 222 }, { "epoch": 0.01495790991716135, "grad_norm": 0.13520150286281724, "learning_rate": 2e-05, "loss": 5.5113, "step": 223 }, { "epoch": 0.015024985746386289, "grad_norm": 0.13382236365345912, "learning_rate": 2e-05, "loss": 5.4909, "step": 224 }, { "epoch": 0.01509206157561123, "grad_norm": 0.1376846345964698, "learning_rate": 2e-05, "loss": 5.5181, "step": 225 }, { "epoch": 0.015159137404836168, "grad_norm": 0.14407632997232372, "learning_rate": 2e-05, "loss": 5.5495, "step": 226 }, { "epoch": 0.015226213234061106, "grad_norm": 0.14018701078938195, "learning_rate": 2e-05, "loss": 5.3041, "step": 227 }, { "epoch": 0.015293289063286045, "grad_norm": 0.1322376057261667, "learning_rate": 2e-05, "loss": 5.5604, "step": 228 }, { "epoch": 0.015360364892510983, "grad_norm": 0.1345121512748179, "learning_rate": 2e-05, "loss": 5.5028, "step": 229 }, { "epoch": 0.015427440721735922, "grad_norm": 0.13198319874561015, "learning_rate": 2e-05, "loss": 5.465, "step": 230 }, { "epoch": 0.015494516550960862, "grad_norm": 0.13474593983822677, "learning_rate": 2e-05, "loss": 5.5025, "step": 231 }, { "epoch": 0.0155615923801858, "grad_norm": 0.13388538276090686, "learning_rate": 2e-05, "loss": 5.5226, "step": 232 }, { "epoch": 0.01562866820941074, "grad_norm": 0.1347084236920801, "learning_rate": 2e-05, "loss": 5.4754, "step": 233 }, { "epoch": 0.01569574403863568, "grad_norm": 0.1379395338535944, "learning_rate": 2e-05, "loss": 5.549, "step": 234 }, { "epoch": 0.015762819867860618, "grad_norm": 0.13428076935551114, "learning_rate": 2e-05, "loss": 5.5567, "step": 235 }, { "epoch": 0.015829895697085556, "grad_norm": 0.13790232793247126, "learning_rate": 2e-05, "loss": 5.5109, "step": 236 }, { "epoch": 0.015896971526310495, "grad_norm": 0.13042042688748032, "learning_rate": 2e-05, "loss": 5.5566, "step": 237 }, { "epoch": 0.015964047355535433, "grad_norm": 0.1330663519900655, "learning_rate": 2e-05, "loss": 5.4465, "step": 238 }, { "epoch": 0.01603112318476037, "grad_norm": 0.13385255377819233, "learning_rate": 2e-05, "loss": 5.4327, "step": 239 }, { "epoch": 0.01609819901398531, "grad_norm": 0.12993697207416155, "learning_rate": 2e-05, "loss": 5.5471, "step": 240 }, { "epoch": 0.01616527484321025, "grad_norm": 0.1396024743841848, "learning_rate": 2e-05, "loss": 5.6155, "step": 241 }, { "epoch": 0.016232350672435187, "grad_norm": 0.13989956866798828, "learning_rate": 2e-05, "loss": 5.3373, "step": 242 }, { "epoch": 0.016299426501660125, "grad_norm": 0.13976501449136522, "learning_rate": 2e-05, "loss": 5.3496, "step": 243 }, { "epoch": 0.016366502330885064, "grad_norm": 0.13165882002358276, "learning_rate": 2e-05, "loss": 5.4782, "step": 244 }, { "epoch": 0.016433578160110006, "grad_norm": 0.13488722967310984, "learning_rate": 2e-05, "loss": 5.6121, "step": 245 }, { "epoch": 0.016500653989334944, "grad_norm": 0.1336981550894159, "learning_rate": 2e-05, "loss": 5.5589, "step": 246 }, { "epoch": 0.016567729818559883, "grad_norm": 0.1440148401620789, "learning_rate": 2e-05, "loss": 5.3725, "step": 247 }, { "epoch": 0.01663480564778482, "grad_norm": 0.1338616154763599, "learning_rate": 2e-05, "loss": 5.5469, "step": 248 }, { "epoch": 0.01670188147700976, "grad_norm": 0.13554269305080055, "learning_rate": 2e-05, "loss": 5.4585, "step": 249 }, { "epoch": 0.0167689573062347, "grad_norm": 0.14681796374747594, "learning_rate": 2e-05, "loss": 5.4348, "step": 250 }, { "epoch": 0.016836033135459637, "grad_norm": 0.13703003196924554, "learning_rate": 2e-05, "loss": 5.5859, "step": 251 }, { "epoch": 0.016903108964684575, "grad_norm": 0.14008819175775072, "learning_rate": 2e-05, "loss": 5.6353, "step": 252 }, { "epoch": 0.016970184793909514, "grad_norm": 0.13891502890897356, "learning_rate": 2e-05, "loss": 5.4349, "step": 253 }, { "epoch": 0.017037260623134452, "grad_norm": 0.13924088981573363, "learning_rate": 2e-05, "loss": 5.5485, "step": 254 }, { "epoch": 0.01710433645235939, "grad_norm": 0.13673406685267592, "learning_rate": 2e-05, "loss": 5.3589, "step": 255 }, { "epoch": 0.017171412281584333, "grad_norm": 0.13320380977224103, "learning_rate": 2e-05, "loss": 5.5116, "step": 256 }, { "epoch": 0.01723848811080927, "grad_norm": 0.1410488335732908, "learning_rate": 2e-05, "loss": 5.4918, "step": 257 }, { "epoch": 0.01730556394003421, "grad_norm": 0.13298435533886696, "learning_rate": 2e-05, "loss": 5.3208, "step": 258 }, { "epoch": 0.017372639769259148, "grad_norm": 0.13988287243353253, "learning_rate": 2e-05, "loss": 5.4744, "step": 259 }, { "epoch": 0.017439715598484087, "grad_norm": 0.14053031175611752, "learning_rate": 2e-05, "loss": 5.4224, "step": 260 }, { "epoch": 0.017506791427709025, "grad_norm": 0.13757472499958526, "learning_rate": 2e-05, "loss": 5.5816, "step": 261 }, { "epoch": 0.017573867256933964, "grad_norm": 0.13749854484353066, "learning_rate": 2e-05, "loss": 5.4426, "step": 262 }, { "epoch": 0.017640943086158902, "grad_norm": 0.13923202465722923, "learning_rate": 2e-05, "loss": 5.5661, "step": 263 }, { "epoch": 0.01770801891538384, "grad_norm": 0.1358334843327064, "learning_rate": 2e-05, "loss": 5.4333, "step": 264 }, { "epoch": 0.01777509474460878, "grad_norm": 0.13869653732550644, "learning_rate": 2e-05, "loss": 5.5152, "step": 265 }, { "epoch": 0.017842170573833718, "grad_norm": 0.14308039464116132, "learning_rate": 2e-05, "loss": 5.475, "step": 266 }, { "epoch": 0.017909246403058656, "grad_norm": 0.14289446053175728, "learning_rate": 2e-05, "loss": 5.3703, "step": 267 }, { "epoch": 0.017976322232283598, "grad_norm": 0.13954656437029866, "learning_rate": 2e-05, "loss": 5.512, "step": 268 }, { "epoch": 0.018043398061508537, "grad_norm": 0.13770164428473664, "learning_rate": 2e-05, "loss": 5.4472, "step": 269 }, { "epoch": 0.018110473890733475, "grad_norm": 0.13593856485860595, "learning_rate": 2e-05, "loss": 5.4916, "step": 270 }, { "epoch": 0.018177549719958414, "grad_norm": 0.1378056846456878, "learning_rate": 2e-05, "loss": 5.4144, "step": 271 }, { "epoch": 0.018244625549183352, "grad_norm": 0.14677081976064846, "learning_rate": 2e-05, "loss": 5.5786, "step": 272 }, { "epoch": 0.01831170137840829, "grad_norm": 0.1333997160128434, "learning_rate": 2e-05, "loss": 5.4599, "step": 273 }, { "epoch": 0.01837877720763323, "grad_norm": 0.1334431474865435, "learning_rate": 2e-05, "loss": 5.5029, "step": 274 }, { "epoch": 0.018445853036858167, "grad_norm": 0.14112418165203575, "learning_rate": 2e-05, "loss": 5.4597, "step": 275 }, { "epoch": 0.018512928866083106, "grad_norm": 0.14547318226177025, "learning_rate": 2e-05, "loss": 5.4463, "step": 276 }, { "epoch": 0.018580004695308044, "grad_norm": 0.14044227296642572, "learning_rate": 2e-05, "loss": 5.3426, "step": 277 }, { "epoch": 0.018647080524532983, "grad_norm": 0.13315530974306822, "learning_rate": 2e-05, "loss": 5.5762, "step": 278 }, { "epoch": 0.018714156353757925, "grad_norm": 0.13927811230900217, "learning_rate": 2e-05, "loss": 5.4824, "step": 279 }, { "epoch": 0.018781232182982863, "grad_norm": 0.13636044567814218, "learning_rate": 2e-05, "loss": 5.3487, "step": 280 }, { "epoch": 0.018848308012207802, "grad_norm": 0.1393901566987908, "learning_rate": 2e-05, "loss": 5.5384, "step": 281 }, { "epoch": 0.01891538384143274, "grad_norm": 0.14015357810907544, "learning_rate": 2e-05, "loss": 5.5201, "step": 282 }, { "epoch": 0.01898245967065768, "grad_norm": 0.13503157673832053, "learning_rate": 2e-05, "loss": 5.4651, "step": 283 }, { "epoch": 0.019049535499882617, "grad_norm": 0.13649017002960714, "learning_rate": 2e-05, "loss": 5.6387, "step": 284 }, { "epoch": 0.019116611329107556, "grad_norm": 0.14050803821643626, "learning_rate": 2e-05, "loss": 5.3334, "step": 285 }, { "epoch": 0.019183687158332494, "grad_norm": 0.13275357391343542, "learning_rate": 2e-05, "loss": 5.6031, "step": 286 }, { "epoch": 0.019250762987557433, "grad_norm": 0.13513545725223935, "learning_rate": 2e-05, "loss": 5.5803, "step": 287 }, { "epoch": 0.01931783881678237, "grad_norm": 0.13942077888191706, "learning_rate": 2e-05, "loss": 5.5644, "step": 288 }, { "epoch": 0.01938491464600731, "grad_norm": 0.1317177502262839, "learning_rate": 2e-05, "loss": 5.3929, "step": 289 }, { "epoch": 0.01945199047523225, "grad_norm": 0.14018213104286506, "learning_rate": 2e-05, "loss": 5.4108, "step": 290 }, { "epoch": 0.01951906630445719, "grad_norm": 0.1302789741300026, "learning_rate": 2e-05, "loss": 5.4, "step": 291 }, { "epoch": 0.01958614213368213, "grad_norm": 0.13573053088025885, "learning_rate": 2e-05, "loss": 5.5339, "step": 292 }, { "epoch": 0.019653217962907067, "grad_norm": 0.1298532622388913, "learning_rate": 2e-05, "loss": 5.5202, "step": 293 }, { "epoch": 0.019720293792132006, "grad_norm": 0.1387368585183225, "learning_rate": 2e-05, "loss": 5.4008, "step": 294 }, { "epoch": 0.019787369621356944, "grad_norm": 0.13286945880633255, "learning_rate": 2e-05, "loss": 5.5225, "step": 295 }, { "epoch": 0.019854445450581883, "grad_norm": 0.13460463802703496, "learning_rate": 2e-05, "loss": 5.6098, "step": 296 }, { "epoch": 0.01992152127980682, "grad_norm": 0.13266495300108314, "learning_rate": 2e-05, "loss": 5.5612, "step": 297 }, { "epoch": 0.01998859710903176, "grad_norm": 0.1390013145487187, "learning_rate": 2e-05, "loss": 5.4808, "step": 298 }, { "epoch": 0.020055672938256698, "grad_norm": 0.13036775755700042, "learning_rate": 2e-05, "loss": 5.4909, "step": 299 }, { "epoch": 0.020122748767481637, "grad_norm": 0.1351561306033971, "learning_rate": 2e-05, "loss": 5.5295, "step": 300 }, { "epoch": 0.020189824596706575, "grad_norm": 0.13068843395631943, "learning_rate": 2e-05, "loss": 5.5133, "step": 301 }, { "epoch": 0.020256900425931517, "grad_norm": 0.13486391073328413, "learning_rate": 2e-05, "loss": 5.4127, "step": 302 }, { "epoch": 0.020323976255156456, "grad_norm": 0.14064396051095973, "learning_rate": 2e-05, "loss": 5.4195, "step": 303 }, { "epoch": 0.020391052084381394, "grad_norm": 0.14354945962603494, "learning_rate": 2e-05, "loss": 5.3845, "step": 304 }, { "epoch": 0.020458127913606333, "grad_norm": 0.14099937008619579, "learning_rate": 2e-05, "loss": 5.3195, "step": 305 }, { "epoch": 0.02052520374283127, "grad_norm": 0.13190265555710654, "learning_rate": 2e-05, "loss": 5.4902, "step": 306 }, { "epoch": 0.02059227957205621, "grad_norm": 0.13464959866358975, "learning_rate": 2e-05, "loss": 5.477, "step": 307 }, { "epoch": 0.020659355401281148, "grad_norm": 0.1367295230234042, "learning_rate": 2e-05, "loss": 5.5423, "step": 308 }, { "epoch": 0.020726431230506086, "grad_norm": 0.13914460779187957, "learning_rate": 2e-05, "loss": 5.6471, "step": 309 }, { "epoch": 0.020793507059731025, "grad_norm": 0.1332943654584558, "learning_rate": 2e-05, "loss": 5.4724, "step": 310 }, { "epoch": 0.020860582888955963, "grad_norm": 0.12766112720947653, "learning_rate": 2e-05, "loss": 5.3929, "step": 311 }, { "epoch": 0.020927658718180902, "grad_norm": 0.13588223042602018, "learning_rate": 2e-05, "loss": 5.4093, "step": 312 }, { "epoch": 0.020994734547405844, "grad_norm": 0.14136836470907532, "learning_rate": 2e-05, "loss": 5.4596, "step": 313 }, { "epoch": 0.021061810376630782, "grad_norm": 0.1393880208383422, "learning_rate": 2e-05, "loss": 5.581, "step": 314 }, { "epoch": 0.02112888620585572, "grad_norm": 0.13164162031839544, "learning_rate": 2e-05, "loss": 5.4643, "step": 315 }, { "epoch": 0.02119596203508066, "grad_norm": 0.14010905667045967, "learning_rate": 2e-05, "loss": 5.6313, "step": 316 }, { "epoch": 0.021263037864305598, "grad_norm": 0.14526862125687154, "learning_rate": 2e-05, "loss": 5.4575, "step": 317 }, { "epoch": 0.021330113693530536, "grad_norm": 0.14093553081245067, "learning_rate": 2e-05, "loss": 5.528, "step": 318 }, { "epoch": 0.021397189522755475, "grad_norm": 0.1358165356915093, "learning_rate": 2e-05, "loss": 5.4931, "step": 319 }, { "epoch": 0.021464265351980413, "grad_norm": 0.14073772926918005, "learning_rate": 2e-05, "loss": 5.4055, "step": 320 }, { "epoch": 0.021531341181205352, "grad_norm": 0.13652977553817378, "learning_rate": 2e-05, "loss": 5.4774, "step": 321 }, { "epoch": 0.02159841701043029, "grad_norm": 0.14055402703216985, "learning_rate": 2e-05, "loss": 5.4304, "step": 322 }, { "epoch": 0.02166549283965523, "grad_norm": 0.13776074331038693, "learning_rate": 2e-05, "loss": 5.4917, "step": 323 }, { "epoch": 0.02173256866888017, "grad_norm": 0.14047674601848228, "learning_rate": 2e-05, "loss": 5.491, "step": 324 }, { "epoch": 0.02179964449810511, "grad_norm": 0.13892914026490785, "learning_rate": 2e-05, "loss": 5.4963, "step": 325 }, { "epoch": 0.021866720327330048, "grad_norm": 0.1382115436600762, "learning_rate": 2e-05, "loss": 5.5455, "step": 326 }, { "epoch": 0.021933796156554986, "grad_norm": 0.13932573949417545, "learning_rate": 2e-05, "loss": 5.4704, "step": 327 }, { "epoch": 0.022000871985779925, "grad_norm": 0.14107431052446137, "learning_rate": 2e-05, "loss": 5.4786, "step": 328 }, { "epoch": 0.022067947815004863, "grad_norm": 0.13943872404016952, "learning_rate": 2e-05, "loss": 5.4809, "step": 329 }, { "epoch": 0.0221350236442298, "grad_norm": 0.1417150365329029, "learning_rate": 2e-05, "loss": 5.5967, "step": 330 }, { "epoch": 0.02220209947345474, "grad_norm": 0.1422942300595576, "learning_rate": 2e-05, "loss": 5.4733, "step": 331 }, { "epoch": 0.02226917530267968, "grad_norm": 0.13801404901597958, "learning_rate": 2e-05, "loss": 5.4696, "step": 332 }, { "epoch": 0.022336251131904617, "grad_norm": 0.14334325976263998, "learning_rate": 2e-05, "loss": 5.4425, "step": 333 }, { "epoch": 0.022403326961129556, "grad_norm": 0.13734915017630128, "learning_rate": 2e-05, "loss": 5.669, "step": 334 }, { "epoch": 0.022470402790354494, "grad_norm": 0.13302604328883835, "learning_rate": 2e-05, "loss": 5.4489, "step": 335 }, { "epoch": 0.022537478619579436, "grad_norm": 0.13585129129115542, "learning_rate": 2e-05, "loss": 5.4531, "step": 336 }, { "epoch": 0.022604554448804375, "grad_norm": 0.1478783628958156, "learning_rate": 2e-05, "loss": 5.3855, "step": 337 }, { "epoch": 0.022671630278029313, "grad_norm": 0.14580947668953922, "learning_rate": 2e-05, "loss": 5.4602, "step": 338 }, { "epoch": 0.02273870610725425, "grad_norm": 0.13237653135928842, "learning_rate": 2e-05, "loss": 5.493, "step": 339 }, { "epoch": 0.02280578193647919, "grad_norm": 0.13720980295398222, "learning_rate": 2e-05, "loss": 5.4197, "step": 340 }, { "epoch": 0.02287285776570413, "grad_norm": 0.13860990487488198, "learning_rate": 2e-05, "loss": 5.5115, "step": 341 }, { "epoch": 0.022939933594929067, "grad_norm": 0.13744223044929282, "learning_rate": 2e-05, "loss": 5.4298, "step": 342 }, { "epoch": 0.023007009424154005, "grad_norm": 0.13584639927592604, "learning_rate": 2e-05, "loss": 5.432, "step": 343 }, { "epoch": 0.023074085253378944, "grad_norm": 0.13395627502911653, "learning_rate": 2e-05, "loss": 5.4762, "step": 344 }, { "epoch": 0.023141161082603882, "grad_norm": 0.13944307274388457, "learning_rate": 2e-05, "loss": 5.5209, "step": 345 }, { "epoch": 0.02320823691182882, "grad_norm": 0.1398135732658755, "learning_rate": 2e-05, "loss": 5.47, "step": 346 }, { "epoch": 0.023275312741053763, "grad_norm": 0.1404305164744192, "learning_rate": 2e-05, "loss": 5.4382, "step": 347 }, { "epoch": 0.0233423885702787, "grad_norm": 0.13787092697983222, "learning_rate": 2e-05, "loss": 5.4405, "step": 348 }, { "epoch": 0.02340946439950364, "grad_norm": 0.14044190484129454, "learning_rate": 2e-05, "loss": 5.4575, "step": 349 }, { "epoch": 0.02347654022872858, "grad_norm": 0.1366848205371932, "learning_rate": 2e-05, "loss": 5.4977, "step": 350 }, { "epoch": 0.023543616057953517, "grad_norm": 0.13577791584019544, "learning_rate": 2e-05, "loss": 5.3942, "step": 351 }, { "epoch": 0.023610691887178455, "grad_norm": 0.1406626418408333, "learning_rate": 2e-05, "loss": 5.3176, "step": 352 }, { "epoch": 0.023677767716403394, "grad_norm": 0.1354885761881078, "learning_rate": 2e-05, "loss": 5.6091, "step": 353 }, { "epoch": 0.023744843545628332, "grad_norm": 0.1329458810127426, "learning_rate": 2e-05, "loss": 5.4475, "step": 354 }, { "epoch": 0.02381191937485327, "grad_norm": 0.1396865316754293, "learning_rate": 2e-05, "loss": 5.4023, "step": 355 }, { "epoch": 0.02387899520407821, "grad_norm": 0.13921113920897119, "learning_rate": 2e-05, "loss": 5.3942, "step": 356 }, { "epoch": 0.023946071033303148, "grad_norm": 0.13167856019188462, "learning_rate": 2e-05, "loss": 5.4746, "step": 357 }, { "epoch": 0.02401314686252809, "grad_norm": 0.1364284648900273, "learning_rate": 2e-05, "loss": 5.4456, "step": 358 }, { "epoch": 0.024080222691753028, "grad_norm": 0.13555591249091348, "learning_rate": 2e-05, "loss": 5.4585, "step": 359 }, { "epoch": 0.024147298520977967, "grad_norm": 0.14056587633398115, "learning_rate": 2e-05, "loss": 5.4715, "step": 360 }, { "epoch": 0.024214374350202905, "grad_norm": 0.13581930293444056, "learning_rate": 2e-05, "loss": 5.3824, "step": 361 }, { "epoch": 0.024281450179427844, "grad_norm": 0.1373287534406947, "learning_rate": 2e-05, "loss": 5.545, "step": 362 }, { "epoch": 0.024348526008652782, "grad_norm": 0.1446479521870619, "learning_rate": 2e-05, "loss": 5.6046, "step": 363 }, { "epoch": 0.02441560183787772, "grad_norm": 0.14134614280417196, "learning_rate": 2e-05, "loss": 5.4769, "step": 364 }, { "epoch": 0.02448267766710266, "grad_norm": 0.13703265868085493, "learning_rate": 2e-05, "loss": 5.5608, "step": 365 }, { "epoch": 0.024549753496327598, "grad_norm": 0.14173530188852576, "learning_rate": 2e-05, "loss": 5.4972, "step": 366 }, { "epoch": 0.024616829325552536, "grad_norm": 0.13537935824715508, "learning_rate": 2e-05, "loss": 5.5082, "step": 367 }, { "epoch": 0.024683905154777475, "grad_norm": 0.13640541237042714, "learning_rate": 2e-05, "loss": 5.5298, "step": 368 }, { "epoch": 0.024750980984002413, "grad_norm": 0.1424194501022318, "learning_rate": 2e-05, "loss": 5.5066, "step": 369 }, { "epoch": 0.024818056813227355, "grad_norm": 0.13785459031620195, "learning_rate": 2e-05, "loss": 5.4622, "step": 370 }, { "epoch": 0.024885132642452294, "grad_norm": 0.1496021987416581, "learning_rate": 2e-05, "loss": 5.5762, "step": 371 }, { "epoch": 0.024952208471677232, "grad_norm": 0.14758782327148273, "learning_rate": 2e-05, "loss": 5.5048, "step": 372 }, { "epoch": 0.02501928430090217, "grad_norm": 0.14388897365700223, "learning_rate": 2e-05, "loss": 5.5665, "step": 373 }, { "epoch": 0.02508636013012711, "grad_norm": 0.13748350279103597, "learning_rate": 2e-05, "loss": 5.3806, "step": 374 }, { "epoch": 0.025153435959352047, "grad_norm": 0.13728178957568166, "learning_rate": 2e-05, "loss": 5.4678, "step": 375 }, { "epoch": 0.025220511788576986, "grad_norm": 0.15219734643771088, "learning_rate": 2e-05, "loss": 5.4523, "step": 376 }, { "epoch": 0.025287587617801924, "grad_norm": 0.13776093616107832, "learning_rate": 2e-05, "loss": 5.4599, "step": 377 }, { "epoch": 0.025354663447026863, "grad_norm": 0.1323313019917841, "learning_rate": 2e-05, "loss": 5.4284, "step": 378 }, { "epoch": 0.0254217392762518, "grad_norm": 0.1497693646313545, "learning_rate": 2e-05, "loss": 5.5278, "step": 379 }, { "epoch": 0.02548881510547674, "grad_norm": 0.13581608804947878, "learning_rate": 2e-05, "loss": 5.4432, "step": 380 }, { "epoch": 0.025555890934701682, "grad_norm": 0.13702156264001938, "learning_rate": 2e-05, "loss": 5.5198, "step": 381 }, { "epoch": 0.02562296676392662, "grad_norm": 0.1479831250398658, "learning_rate": 2e-05, "loss": 5.4233, "step": 382 }, { "epoch": 0.02569004259315156, "grad_norm": 0.14023110295322808, "learning_rate": 2e-05, "loss": 5.4498, "step": 383 }, { "epoch": 0.025757118422376497, "grad_norm": 0.13907866409465663, "learning_rate": 2e-05, "loss": 5.4888, "step": 384 }, { "epoch": 0.025824194251601436, "grad_norm": 0.13929828284213425, "learning_rate": 2e-05, "loss": 5.4757, "step": 385 }, { "epoch": 0.025891270080826374, "grad_norm": 0.13525869424923675, "learning_rate": 2e-05, "loss": 5.4232, "step": 386 }, { "epoch": 0.025958345910051313, "grad_norm": 0.1427802337933376, "learning_rate": 2e-05, "loss": 5.6668, "step": 387 }, { "epoch": 0.02602542173927625, "grad_norm": 0.13126390847250383, "learning_rate": 2e-05, "loss": 5.535, "step": 388 }, { "epoch": 0.02609249756850119, "grad_norm": 0.14213752720124426, "learning_rate": 2e-05, "loss": 5.4551, "step": 389 }, { "epoch": 0.02615957339772613, "grad_norm": 0.13989910666508423, "learning_rate": 2e-05, "loss": 5.4791, "step": 390 }, { "epoch": 0.026226649226951067, "grad_norm": 0.13430225912784785, "learning_rate": 2e-05, "loss": 5.3793, "step": 391 }, { "epoch": 0.026293725056176005, "grad_norm": 0.14461781740490645, "learning_rate": 2e-05, "loss": 5.4829, "step": 392 }, { "epoch": 0.026360800885400947, "grad_norm": 0.14446861354900634, "learning_rate": 2e-05, "loss": 5.3391, "step": 393 }, { "epoch": 0.026427876714625886, "grad_norm": 0.14356768600109318, "learning_rate": 2e-05, "loss": 5.5977, "step": 394 }, { "epoch": 0.026494952543850824, "grad_norm": 0.1362516471769211, "learning_rate": 2e-05, "loss": 5.5295, "step": 395 }, { "epoch": 0.026562028373075763, "grad_norm": 0.15600174473100523, "learning_rate": 2e-05, "loss": 5.3869, "step": 396 }, { "epoch": 0.0266291042023007, "grad_norm": 0.13853467608782233, "learning_rate": 2e-05, "loss": 5.4972, "step": 397 }, { "epoch": 0.02669618003152564, "grad_norm": 0.13542811149476883, "learning_rate": 2e-05, "loss": 5.5862, "step": 398 }, { "epoch": 0.026763255860750578, "grad_norm": 0.14308228641773624, "learning_rate": 2e-05, "loss": 5.4448, "step": 399 }, { "epoch": 0.026830331689975517, "grad_norm": 0.14388696098957304, "learning_rate": 2e-05, "loss": 5.5063, "step": 400 }, { "epoch": 0.026897407519200455, "grad_norm": 0.13311492221933083, "learning_rate": 2e-05, "loss": 5.6049, "step": 401 }, { "epoch": 0.026964483348425394, "grad_norm": 0.15068428191963382, "learning_rate": 2e-05, "loss": 5.6092, "step": 402 }, { "epoch": 0.027031559177650332, "grad_norm": 0.14149604609631475, "learning_rate": 2e-05, "loss": 5.5962, "step": 403 }, { "epoch": 0.027098635006875274, "grad_norm": 0.13321908144926464, "learning_rate": 2e-05, "loss": 5.2231, "step": 404 }, { "epoch": 0.027165710836100213, "grad_norm": 0.13859466222445496, "learning_rate": 2e-05, "loss": 5.5049, "step": 405 }, { "epoch": 0.02723278666532515, "grad_norm": 0.15478622697011363, "learning_rate": 2e-05, "loss": 5.5249, "step": 406 }, { "epoch": 0.02729986249455009, "grad_norm": 0.13575694755907047, "learning_rate": 2e-05, "loss": 5.5904, "step": 407 }, { "epoch": 0.027366938323775028, "grad_norm": 0.14148167111382318, "learning_rate": 2e-05, "loss": 5.7243, "step": 408 }, { "epoch": 0.027434014152999966, "grad_norm": 0.14287791773438815, "learning_rate": 2e-05, "loss": 5.4441, "step": 409 }, { "epoch": 0.027501089982224905, "grad_norm": 0.13619881969751138, "learning_rate": 2e-05, "loss": 5.4259, "step": 410 }, { "epoch": 0.027568165811449843, "grad_norm": 0.1446271165834697, "learning_rate": 2e-05, "loss": 5.28, "step": 411 }, { "epoch": 0.027635241640674782, "grad_norm": 0.14155968313445627, "learning_rate": 2e-05, "loss": 5.4784, "step": 412 }, { "epoch": 0.02770231746989972, "grad_norm": 0.13282673248736107, "learning_rate": 2e-05, "loss": 5.4283, "step": 413 }, { "epoch": 0.02776939329912466, "grad_norm": 0.13853298832320834, "learning_rate": 2e-05, "loss": 5.5138, "step": 414 }, { "epoch": 0.0278364691283496, "grad_norm": 0.1365273323961682, "learning_rate": 2e-05, "loss": 5.5315, "step": 415 }, { "epoch": 0.02790354495757454, "grad_norm": 0.13571835930531323, "learning_rate": 2e-05, "loss": 5.3449, "step": 416 }, { "epoch": 0.027970620786799478, "grad_norm": 0.13469511106014387, "learning_rate": 2e-05, "loss": 5.4744, "step": 417 }, { "epoch": 0.028037696616024416, "grad_norm": 0.13662464602102403, "learning_rate": 2e-05, "loss": 5.3925, "step": 418 }, { "epoch": 0.028104772445249355, "grad_norm": 0.13581649762198095, "learning_rate": 2e-05, "loss": 5.4875, "step": 419 }, { "epoch": 0.028171848274474293, "grad_norm": 0.1330851772069556, "learning_rate": 2e-05, "loss": 5.4274, "step": 420 }, { "epoch": 0.028238924103699232, "grad_norm": 0.14282359284803453, "learning_rate": 2e-05, "loss": 5.5418, "step": 421 }, { "epoch": 0.02830599993292417, "grad_norm": 0.1342083795099212, "learning_rate": 2e-05, "loss": 5.4413, "step": 422 }, { "epoch": 0.02837307576214911, "grad_norm": 0.13482010494998253, "learning_rate": 2e-05, "loss": 5.6325, "step": 423 }, { "epoch": 0.028440151591374047, "grad_norm": 0.1394152554487557, "learning_rate": 2e-05, "loss": 5.5611, "step": 424 }, { "epoch": 0.028507227420598986, "grad_norm": 0.1399139304195542, "learning_rate": 2e-05, "loss": 5.6259, "step": 425 }, { "epoch": 0.028574303249823924, "grad_norm": 0.1392994407976783, "learning_rate": 2e-05, "loss": 5.3537, "step": 426 }, { "epoch": 0.028641379079048866, "grad_norm": 0.13396664094567637, "learning_rate": 2e-05, "loss": 5.4843, "step": 427 }, { "epoch": 0.028708454908273805, "grad_norm": 0.13541701659867753, "learning_rate": 2e-05, "loss": 5.4514, "step": 428 }, { "epoch": 0.028775530737498743, "grad_norm": 0.13497212300887781, "learning_rate": 2e-05, "loss": 5.3372, "step": 429 }, { "epoch": 0.02884260656672368, "grad_norm": 0.13949643353385802, "learning_rate": 2e-05, "loss": 5.5618, "step": 430 }, { "epoch": 0.02890968239594862, "grad_norm": 0.13940262215484756, "learning_rate": 2e-05, "loss": 5.4795, "step": 431 }, { "epoch": 0.02897675822517356, "grad_norm": 0.14071923837378633, "learning_rate": 2e-05, "loss": 5.4975, "step": 432 }, { "epoch": 0.029043834054398497, "grad_norm": 0.14205565868695072, "learning_rate": 2e-05, "loss": 5.4597, "step": 433 }, { "epoch": 0.029110909883623436, "grad_norm": 0.135123026600456, "learning_rate": 2e-05, "loss": 5.4487, "step": 434 }, { "epoch": 0.029177985712848374, "grad_norm": 0.13233426812366825, "learning_rate": 2e-05, "loss": 5.4244, "step": 435 }, { "epoch": 0.029245061542073313, "grad_norm": 0.1347367216940183, "learning_rate": 2e-05, "loss": 5.5574, "step": 436 }, { "epoch": 0.02931213737129825, "grad_norm": 0.13488120836842218, "learning_rate": 2e-05, "loss": 5.404, "step": 437 }, { "epoch": 0.029379213200523193, "grad_norm": 0.13741477915485753, "learning_rate": 2e-05, "loss": 5.478, "step": 438 }, { "epoch": 0.02944628902974813, "grad_norm": 0.13611619541380157, "learning_rate": 2e-05, "loss": 5.4692, "step": 439 }, { "epoch": 0.02951336485897307, "grad_norm": 0.13931356000837053, "learning_rate": 2e-05, "loss": 5.4267, "step": 440 }, { "epoch": 0.02958044068819801, "grad_norm": 0.14033129427387336, "learning_rate": 2e-05, "loss": 5.5426, "step": 441 }, { "epoch": 0.029647516517422947, "grad_norm": 0.14237050493804299, "learning_rate": 2e-05, "loss": 5.5924, "step": 442 }, { "epoch": 0.029714592346647885, "grad_norm": 0.13698646809414747, "learning_rate": 2e-05, "loss": 5.5143, "step": 443 }, { "epoch": 0.029781668175872824, "grad_norm": 0.1441271803700368, "learning_rate": 2e-05, "loss": 5.6055, "step": 444 }, { "epoch": 0.029848744005097762, "grad_norm": 0.13818238967287785, "learning_rate": 2e-05, "loss": 5.6279, "step": 445 }, { "epoch": 0.0299158198343227, "grad_norm": 0.1396905579089585, "learning_rate": 2e-05, "loss": 5.3821, "step": 446 }, { "epoch": 0.02998289566354764, "grad_norm": 0.1346278932208077, "learning_rate": 2e-05, "loss": 5.4033, "step": 447 }, { "epoch": 0.030049971492772578, "grad_norm": 0.1472701382188289, "learning_rate": 2e-05, "loss": 5.4637, "step": 448 }, { "epoch": 0.03011704732199752, "grad_norm": 0.13620646142772422, "learning_rate": 2e-05, "loss": 5.4278, "step": 449 }, { "epoch": 0.03018412315122246, "grad_norm": 0.139769840144812, "learning_rate": 2e-05, "loss": 5.4008, "step": 450 }, { "epoch": 0.030251198980447397, "grad_norm": 0.13431662015387102, "learning_rate": 2e-05, "loss": 5.3964, "step": 451 }, { "epoch": 0.030318274809672335, "grad_norm": 0.13614222957040845, "learning_rate": 2e-05, "loss": 5.5446, "step": 452 }, { "epoch": 0.030385350638897274, "grad_norm": 0.13434201916745456, "learning_rate": 2e-05, "loss": 5.5751, "step": 453 }, { "epoch": 0.030452426468122212, "grad_norm": 0.13559922939738983, "learning_rate": 2e-05, "loss": 5.4403, "step": 454 }, { "epoch": 0.03051950229734715, "grad_norm": 0.1384197423953293, "learning_rate": 2e-05, "loss": 5.5235, "step": 455 }, { "epoch": 0.03058657812657209, "grad_norm": 0.13907468567870393, "learning_rate": 2e-05, "loss": 5.4258, "step": 456 }, { "epoch": 0.030653653955797028, "grad_norm": 0.13083304836010035, "learning_rate": 2e-05, "loss": 5.4779, "step": 457 }, { "epoch": 0.030720729785021966, "grad_norm": 0.13721686400553276, "learning_rate": 2e-05, "loss": 5.5321, "step": 458 }, { "epoch": 0.030787805614246905, "grad_norm": 0.14075765171865992, "learning_rate": 2e-05, "loss": 5.3771, "step": 459 }, { "epoch": 0.030854881443471843, "grad_norm": 0.13036528177136278, "learning_rate": 2e-05, "loss": 5.4213, "step": 460 }, { "epoch": 0.030921957272696785, "grad_norm": 0.13855385060783146, "learning_rate": 2e-05, "loss": 5.5771, "step": 461 }, { "epoch": 0.030989033101921724, "grad_norm": 0.1371557251618741, "learning_rate": 2e-05, "loss": 5.5021, "step": 462 }, { "epoch": 0.031056108931146662, "grad_norm": 0.14539671600075732, "learning_rate": 2e-05, "loss": 5.4447, "step": 463 }, { "epoch": 0.0311231847603716, "grad_norm": 0.14279042916220416, "learning_rate": 2e-05, "loss": 5.434, "step": 464 }, { "epoch": 0.03119026058959654, "grad_norm": 0.1319844368777511, "learning_rate": 2e-05, "loss": 5.5692, "step": 465 }, { "epoch": 0.03125733641882148, "grad_norm": 0.1383976266576286, "learning_rate": 2e-05, "loss": 5.4645, "step": 466 }, { "epoch": 0.03132441224804642, "grad_norm": 0.13656057789942108, "learning_rate": 2e-05, "loss": 5.5564, "step": 467 }, { "epoch": 0.03139148807727136, "grad_norm": 0.13778414743801168, "learning_rate": 2e-05, "loss": 5.4971, "step": 468 }, { "epoch": 0.0314585639064963, "grad_norm": 0.13625650077260326, "learning_rate": 2e-05, "loss": 5.3496, "step": 469 }, { "epoch": 0.031525639735721235, "grad_norm": 0.13728643391748266, "learning_rate": 2e-05, "loss": 5.4375, "step": 470 }, { "epoch": 0.031592715564946174, "grad_norm": 0.13580328335771577, "learning_rate": 2e-05, "loss": 5.4821, "step": 471 }, { "epoch": 0.03165979139417111, "grad_norm": 0.14140290882949022, "learning_rate": 2e-05, "loss": 5.3972, "step": 472 }, { "epoch": 0.03172686722339605, "grad_norm": 0.14291201743267656, "learning_rate": 2e-05, "loss": 5.5432, "step": 473 }, { "epoch": 0.03179394305262099, "grad_norm": 0.1436189244558032, "learning_rate": 2e-05, "loss": 5.4005, "step": 474 }, { "epoch": 0.03186101888184593, "grad_norm": 0.13630264220904464, "learning_rate": 2e-05, "loss": 5.4826, "step": 475 }, { "epoch": 0.031928094711070866, "grad_norm": 0.13495990310862327, "learning_rate": 2e-05, "loss": 5.4191, "step": 476 }, { "epoch": 0.031995170540295804, "grad_norm": 0.1478330990886607, "learning_rate": 2e-05, "loss": 5.5138, "step": 477 }, { "epoch": 0.03206224636952074, "grad_norm": 0.14829139583159276, "learning_rate": 2e-05, "loss": 5.5358, "step": 478 }, { "epoch": 0.03212932219874568, "grad_norm": 0.13624472672881807, "learning_rate": 2e-05, "loss": 5.4631, "step": 479 }, { "epoch": 0.03219639802797062, "grad_norm": 0.1477171651804662, "learning_rate": 2e-05, "loss": 5.4897, "step": 480 }, { "epoch": 0.03226347385719556, "grad_norm": 0.13663957269957958, "learning_rate": 2e-05, "loss": 5.3581, "step": 481 }, { "epoch": 0.0323305496864205, "grad_norm": 0.14135385647848828, "learning_rate": 2e-05, "loss": 5.6269, "step": 482 }, { "epoch": 0.032397625515645435, "grad_norm": 0.13618749920416148, "learning_rate": 2e-05, "loss": 5.5629, "step": 483 }, { "epoch": 0.032464701344870374, "grad_norm": 0.14598840683373648, "learning_rate": 2e-05, "loss": 5.5184, "step": 484 }, { "epoch": 0.03253177717409531, "grad_norm": 0.14044988424340765, "learning_rate": 2e-05, "loss": 5.5307, "step": 485 }, { "epoch": 0.03259885300332025, "grad_norm": 0.14063398831330848, "learning_rate": 2e-05, "loss": 5.463, "step": 486 }, { "epoch": 0.03266592883254519, "grad_norm": 0.14329169807758987, "learning_rate": 2e-05, "loss": 5.3324, "step": 487 }, { "epoch": 0.03273300466177013, "grad_norm": 0.13955101041545528, "learning_rate": 2e-05, "loss": 5.4437, "step": 488 }, { "epoch": 0.03280008049099507, "grad_norm": 0.14266305942757573, "learning_rate": 2e-05, "loss": 5.4165, "step": 489 }, { "epoch": 0.03286715632022001, "grad_norm": 0.14106744970060633, "learning_rate": 2e-05, "loss": 5.5754, "step": 490 }, { "epoch": 0.03293423214944495, "grad_norm": 0.14370074349163833, "learning_rate": 2e-05, "loss": 5.4381, "step": 491 }, { "epoch": 0.03300130797866989, "grad_norm": 0.13963738976655854, "learning_rate": 2e-05, "loss": 5.54, "step": 492 }, { "epoch": 0.03306838380789483, "grad_norm": 0.14146186981111028, "learning_rate": 2e-05, "loss": 5.4907, "step": 493 }, { "epoch": 0.033135459637119766, "grad_norm": 0.1447318132736114, "learning_rate": 2e-05, "loss": 5.4442, "step": 494 }, { "epoch": 0.033202535466344704, "grad_norm": 0.13850270496055583, "learning_rate": 2e-05, "loss": 5.4863, "step": 495 }, { "epoch": 0.03326961129556964, "grad_norm": 0.14216069408613088, "learning_rate": 2e-05, "loss": 5.5329, "step": 496 }, { "epoch": 0.03333668712479458, "grad_norm": 0.14427267437471356, "learning_rate": 2e-05, "loss": 5.4294, "step": 497 }, { "epoch": 0.03340376295401952, "grad_norm": 0.13932140836676593, "learning_rate": 2e-05, "loss": 5.4357, "step": 498 }, { "epoch": 0.03347083878324446, "grad_norm": 0.14367394276047166, "learning_rate": 2e-05, "loss": 5.4106, "step": 499 }, { "epoch": 0.0335379146124694, "grad_norm": 0.1393874677318847, "learning_rate": 2e-05, "loss": 5.309, "step": 500 }, { "epoch": 0.033604990441694335, "grad_norm": 0.14488851081875279, "learning_rate": 2e-05, "loss": 5.5565, "step": 501 }, { "epoch": 0.033672066270919274, "grad_norm": 0.14793721090457707, "learning_rate": 2e-05, "loss": 5.363, "step": 502 }, { "epoch": 0.03373914210014421, "grad_norm": 0.14146585464550626, "learning_rate": 2e-05, "loss": 5.4759, "step": 503 }, { "epoch": 0.03380621792936915, "grad_norm": 0.13726726554930982, "learning_rate": 2e-05, "loss": 5.467, "step": 504 }, { "epoch": 0.03387329375859409, "grad_norm": 0.1418633669922156, "learning_rate": 2e-05, "loss": 5.3842, "step": 505 }, { "epoch": 0.03394036958781903, "grad_norm": 0.14688269695131473, "learning_rate": 2e-05, "loss": 5.5817, "step": 506 }, { "epoch": 0.034007445417043966, "grad_norm": 0.139009095401521, "learning_rate": 2e-05, "loss": 5.3049, "step": 507 }, { "epoch": 0.034074521246268905, "grad_norm": 0.14080696309834667, "learning_rate": 2e-05, "loss": 5.3923, "step": 508 }, { "epoch": 0.03414159707549384, "grad_norm": 0.14123243394184842, "learning_rate": 2e-05, "loss": 5.4844, "step": 509 }, { "epoch": 0.03420867290471878, "grad_norm": 0.14580127846579277, "learning_rate": 2e-05, "loss": 5.4411, "step": 510 }, { "epoch": 0.03427574873394372, "grad_norm": 0.13350034824740667, "learning_rate": 2e-05, "loss": 5.2895, "step": 511 }, { "epoch": 0.034342824563168665, "grad_norm": 0.13647000906202034, "learning_rate": 2e-05, "loss": 5.558, "step": 512 }, { "epoch": 0.034409900392393604, "grad_norm": 0.13860675813577458, "learning_rate": 2e-05, "loss": 5.6502, "step": 513 }, { "epoch": 0.03447697622161854, "grad_norm": 0.14524770125419068, "learning_rate": 2e-05, "loss": 5.5042, "step": 514 }, { "epoch": 0.03454405205084348, "grad_norm": 0.14320345407262589, "learning_rate": 2e-05, "loss": 5.598, "step": 515 }, { "epoch": 0.03461112788006842, "grad_norm": 0.13415672032083029, "learning_rate": 2e-05, "loss": 5.3863, "step": 516 }, { "epoch": 0.03467820370929336, "grad_norm": 0.13179397485156813, "learning_rate": 2e-05, "loss": 5.4362, "step": 517 }, { "epoch": 0.034745279538518296, "grad_norm": 0.14782105129313314, "learning_rate": 2e-05, "loss": 5.5203, "step": 518 }, { "epoch": 0.034812355367743235, "grad_norm": 0.1422382968685032, "learning_rate": 2e-05, "loss": 5.4052, "step": 519 }, { "epoch": 0.03487943119696817, "grad_norm": 0.13652408341433767, "learning_rate": 2e-05, "loss": 5.5191, "step": 520 }, { "epoch": 0.03494650702619311, "grad_norm": 0.14191914614395676, "learning_rate": 2e-05, "loss": 5.4704, "step": 521 }, { "epoch": 0.03501358285541805, "grad_norm": 0.14747583217972665, "learning_rate": 2e-05, "loss": 5.4401, "step": 522 }, { "epoch": 0.03508065868464299, "grad_norm": 0.14136956577302243, "learning_rate": 2e-05, "loss": 5.3528, "step": 523 }, { "epoch": 0.03514773451386793, "grad_norm": 0.1395719243790696, "learning_rate": 2e-05, "loss": 5.3889, "step": 524 }, { "epoch": 0.035214810343092866, "grad_norm": 0.14389336811451958, "learning_rate": 2e-05, "loss": 5.4952, "step": 525 }, { "epoch": 0.035281886172317804, "grad_norm": 0.14260740116233941, "learning_rate": 2e-05, "loss": 5.3935, "step": 526 }, { "epoch": 0.03534896200154274, "grad_norm": 0.1348277216130678, "learning_rate": 2e-05, "loss": 5.3601, "step": 527 }, { "epoch": 0.03541603783076768, "grad_norm": 0.14549668705189578, "learning_rate": 2e-05, "loss": 5.435, "step": 528 }, { "epoch": 0.03548311365999262, "grad_norm": 0.1456737965017188, "learning_rate": 2e-05, "loss": 5.3944, "step": 529 }, { "epoch": 0.03555018948921756, "grad_norm": 0.1356501642805981, "learning_rate": 2e-05, "loss": 5.5089, "step": 530 }, { "epoch": 0.0356172653184425, "grad_norm": 0.14044511571468438, "learning_rate": 2e-05, "loss": 5.4358, "step": 531 }, { "epoch": 0.035684341147667435, "grad_norm": 0.1481717153953622, "learning_rate": 2e-05, "loss": 5.4442, "step": 532 }, { "epoch": 0.035751416976892374, "grad_norm": 0.13991426802378085, "learning_rate": 2e-05, "loss": 5.5592, "step": 533 }, { "epoch": 0.03581849280611731, "grad_norm": 0.14385081832078775, "learning_rate": 2e-05, "loss": 5.5402, "step": 534 }, { "epoch": 0.03588556863534226, "grad_norm": 0.13806055097884395, "learning_rate": 2e-05, "loss": 5.3843, "step": 535 }, { "epoch": 0.035952644464567196, "grad_norm": 0.1427953199256475, "learning_rate": 2e-05, "loss": 5.3584, "step": 536 }, { "epoch": 0.036019720293792135, "grad_norm": 0.13306325262425062, "learning_rate": 2e-05, "loss": 5.4365, "step": 537 }, { "epoch": 0.03608679612301707, "grad_norm": 0.1408053056341624, "learning_rate": 2e-05, "loss": 5.6113, "step": 538 }, { "epoch": 0.03615387195224201, "grad_norm": 0.1387742849455146, "learning_rate": 2e-05, "loss": 5.4181, "step": 539 }, { "epoch": 0.03622094778146695, "grad_norm": 0.13643333235157482, "learning_rate": 2e-05, "loss": 5.411, "step": 540 }, { "epoch": 0.03628802361069189, "grad_norm": 0.13658421853188527, "learning_rate": 2e-05, "loss": 5.3934, "step": 541 }, { "epoch": 0.03635509943991683, "grad_norm": 0.1435077876125611, "learning_rate": 2e-05, "loss": 5.3278, "step": 542 }, { "epoch": 0.036422175269141766, "grad_norm": 0.13239409817519127, "learning_rate": 2e-05, "loss": 5.3971, "step": 543 }, { "epoch": 0.036489251098366704, "grad_norm": 0.1451562967232421, "learning_rate": 2e-05, "loss": 5.414, "step": 544 }, { "epoch": 0.03655632692759164, "grad_norm": 0.14682351561486612, "learning_rate": 2e-05, "loss": 5.3347, "step": 545 }, { "epoch": 0.03662340275681658, "grad_norm": 0.13232980864620708, "learning_rate": 2e-05, "loss": 5.5624, "step": 546 }, { "epoch": 0.03669047858604152, "grad_norm": 0.1434220840238176, "learning_rate": 2e-05, "loss": 5.5377, "step": 547 }, { "epoch": 0.03675755441526646, "grad_norm": 0.14219675784544605, "learning_rate": 2e-05, "loss": 5.482, "step": 548 }, { "epoch": 0.036824630244491396, "grad_norm": 0.14336874812697367, "learning_rate": 2e-05, "loss": 5.4796, "step": 549 }, { "epoch": 0.036891706073716335, "grad_norm": 0.13536108402329605, "learning_rate": 2e-05, "loss": 5.4577, "step": 550 }, { "epoch": 0.03695878190294127, "grad_norm": 0.14007259563567018, "learning_rate": 2e-05, "loss": 5.5944, "step": 551 }, { "epoch": 0.03702585773216621, "grad_norm": 0.13942402023153103, "learning_rate": 2e-05, "loss": 5.5577, "step": 552 }, { "epoch": 0.03709293356139115, "grad_norm": 0.1337767835563611, "learning_rate": 2e-05, "loss": 5.4629, "step": 553 }, { "epoch": 0.03716000939061609, "grad_norm": 0.13960829021769286, "learning_rate": 2e-05, "loss": 5.5737, "step": 554 }, { "epoch": 0.03722708521984103, "grad_norm": 0.1411448232472383, "learning_rate": 2e-05, "loss": 5.5698, "step": 555 }, { "epoch": 0.037294161049065966, "grad_norm": 0.14256740146277125, "learning_rate": 2e-05, "loss": 5.4772, "step": 556 }, { "epoch": 0.03736123687829091, "grad_norm": 0.13822539715550178, "learning_rate": 2e-05, "loss": 5.4271, "step": 557 }, { "epoch": 0.03742831270751585, "grad_norm": 0.14097323248362797, "learning_rate": 2e-05, "loss": 5.5028, "step": 558 }, { "epoch": 0.03749538853674079, "grad_norm": 0.1386736355812705, "learning_rate": 2e-05, "loss": 5.555, "step": 559 }, { "epoch": 0.03756246436596573, "grad_norm": 0.13891426035135243, "learning_rate": 2e-05, "loss": 5.4492, "step": 560 }, { "epoch": 0.037629540195190665, "grad_norm": 0.14606355783798464, "learning_rate": 2e-05, "loss": 5.4714, "step": 561 }, { "epoch": 0.037696616024415604, "grad_norm": 0.1321918596749185, "learning_rate": 2e-05, "loss": 5.5481, "step": 562 }, { "epoch": 0.03776369185364054, "grad_norm": 0.1351852769680835, "learning_rate": 2e-05, "loss": 5.4913, "step": 563 }, { "epoch": 0.03783076768286548, "grad_norm": 0.1405655204708842, "learning_rate": 2e-05, "loss": 5.3741, "step": 564 }, { "epoch": 0.03789784351209042, "grad_norm": 0.13636462962121554, "learning_rate": 2e-05, "loss": 5.4713, "step": 565 }, { "epoch": 0.03796491934131536, "grad_norm": 0.13661207553159538, "learning_rate": 2e-05, "loss": 5.5205, "step": 566 }, { "epoch": 0.038031995170540296, "grad_norm": 0.13980690103939206, "learning_rate": 2e-05, "loss": 5.2878, "step": 567 }, { "epoch": 0.038099070999765235, "grad_norm": 0.13959467913809961, "learning_rate": 2e-05, "loss": 5.5552, "step": 568 }, { "epoch": 0.03816614682899017, "grad_norm": 0.1396055052539937, "learning_rate": 2e-05, "loss": 5.4398, "step": 569 }, { "epoch": 0.03823322265821511, "grad_norm": 0.13193367937539396, "learning_rate": 2e-05, "loss": 5.5218, "step": 570 }, { "epoch": 0.03830029848744005, "grad_norm": 0.13664550463911304, "learning_rate": 2e-05, "loss": 5.395, "step": 571 }, { "epoch": 0.03836737431666499, "grad_norm": 0.13473126239557717, "learning_rate": 2e-05, "loss": 5.4586, "step": 572 }, { "epoch": 0.03843445014588993, "grad_norm": 0.13164093228191315, "learning_rate": 2e-05, "loss": 5.4066, "step": 573 }, { "epoch": 0.038501525975114866, "grad_norm": 0.13871402916426678, "learning_rate": 2e-05, "loss": 5.5186, "step": 574 }, { "epoch": 0.038568601804339804, "grad_norm": 0.137519720494307, "learning_rate": 2e-05, "loss": 5.4445, "step": 575 }, { "epoch": 0.03863567763356474, "grad_norm": 0.1355004671118137, "learning_rate": 2e-05, "loss": 5.5667, "step": 576 }, { "epoch": 0.03870275346278968, "grad_norm": 0.14232664894195857, "learning_rate": 2e-05, "loss": 5.6236, "step": 577 }, { "epoch": 0.03876982929201462, "grad_norm": 0.1493222915824619, "learning_rate": 2e-05, "loss": 5.3458, "step": 578 }, { "epoch": 0.03883690512123956, "grad_norm": 0.13948887892224626, "learning_rate": 2e-05, "loss": 5.5183, "step": 579 }, { "epoch": 0.0389039809504645, "grad_norm": 0.14402416350878355, "learning_rate": 2e-05, "loss": 5.4023, "step": 580 }, { "epoch": 0.03897105677968944, "grad_norm": 0.14358669852650835, "learning_rate": 2e-05, "loss": 5.454, "step": 581 }, { "epoch": 0.03903813260891438, "grad_norm": 0.13756924562596545, "learning_rate": 2e-05, "loss": 5.485, "step": 582 }, { "epoch": 0.03910520843813932, "grad_norm": 0.1513360583916843, "learning_rate": 2e-05, "loss": 5.3952, "step": 583 }, { "epoch": 0.03917228426736426, "grad_norm": 0.14091786816383903, "learning_rate": 2e-05, "loss": 5.4896, "step": 584 }, { "epoch": 0.039239360096589196, "grad_norm": 0.1379135124585367, "learning_rate": 2e-05, "loss": 5.2596, "step": 585 }, { "epoch": 0.039306435925814134, "grad_norm": 0.14466392612894816, "learning_rate": 2e-05, "loss": 5.6167, "step": 586 }, { "epoch": 0.03937351175503907, "grad_norm": 0.14579166466124044, "learning_rate": 2e-05, "loss": 5.3335, "step": 587 }, { "epoch": 0.03944058758426401, "grad_norm": 0.13616253236203202, "learning_rate": 2e-05, "loss": 5.3037, "step": 588 }, { "epoch": 0.03950766341348895, "grad_norm": 0.13847137904328194, "learning_rate": 2e-05, "loss": 5.565, "step": 589 }, { "epoch": 0.03957473924271389, "grad_norm": 0.1575811213534679, "learning_rate": 2e-05, "loss": 5.3737, "step": 590 }, { "epoch": 0.03964181507193883, "grad_norm": 0.1410527173501351, "learning_rate": 2e-05, "loss": 5.5262, "step": 591 }, { "epoch": 0.039708890901163765, "grad_norm": 0.13665623475849864, "learning_rate": 2e-05, "loss": 5.4399, "step": 592 }, { "epoch": 0.039775966730388704, "grad_norm": 0.1338411862483947, "learning_rate": 2e-05, "loss": 5.5681, "step": 593 }, { "epoch": 0.03984304255961364, "grad_norm": 0.14850151022804023, "learning_rate": 2e-05, "loss": 5.3831, "step": 594 }, { "epoch": 0.03991011838883858, "grad_norm": 0.1449621747735679, "learning_rate": 2e-05, "loss": 5.4273, "step": 595 }, { "epoch": 0.03997719421806352, "grad_norm": 0.13747463700667578, "learning_rate": 2e-05, "loss": 5.5933, "step": 596 }, { "epoch": 0.04004427004728846, "grad_norm": 0.14247869152400375, "learning_rate": 2e-05, "loss": 5.3792, "step": 597 }, { "epoch": 0.040111345876513396, "grad_norm": 0.14545732042052903, "learning_rate": 2e-05, "loss": 5.586, "step": 598 }, { "epoch": 0.040178421705738335, "grad_norm": 0.14037656881537025, "learning_rate": 2e-05, "loss": 5.4353, "step": 599 }, { "epoch": 0.04024549753496327, "grad_norm": 0.13890638141933984, "learning_rate": 2e-05, "loss": 5.5707, "step": 600 }, { "epoch": 0.04031257336418821, "grad_norm": 0.14043548923391092, "learning_rate": 2e-05, "loss": 5.5406, "step": 601 }, { "epoch": 0.04037964919341315, "grad_norm": 0.1410720869213143, "learning_rate": 2e-05, "loss": 5.4818, "step": 602 }, { "epoch": 0.040446725022638096, "grad_norm": 0.14205292654560056, "learning_rate": 2e-05, "loss": 5.4011, "step": 603 }, { "epoch": 0.040513800851863034, "grad_norm": 0.1405367716706814, "learning_rate": 2e-05, "loss": 5.4069, "step": 604 }, { "epoch": 0.04058087668108797, "grad_norm": 0.1305995345486736, "learning_rate": 2e-05, "loss": 5.404, "step": 605 }, { "epoch": 0.04064795251031291, "grad_norm": 0.1438338887327888, "learning_rate": 2e-05, "loss": 5.5348, "step": 606 }, { "epoch": 0.04071502833953785, "grad_norm": 0.1447802627138278, "learning_rate": 2e-05, "loss": 5.4614, "step": 607 }, { "epoch": 0.04078210416876279, "grad_norm": 0.1386422869968883, "learning_rate": 2e-05, "loss": 5.411, "step": 608 }, { "epoch": 0.04084917999798773, "grad_norm": 0.14330857590760096, "learning_rate": 2e-05, "loss": 5.6255, "step": 609 }, { "epoch": 0.040916255827212665, "grad_norm": 0.1482156192113495, "learning_rate": 2e-05, "loss": 5.5517, "step": 610 }, { "epoch": 0.040983331656437604, "grad_norm": 0.1369358971036775, "learning_rate": 2e-05, "loss": 5.5285, "step": 611 }, { "epoch": 0.04105040748566254, "grad_norm": 0.13821679904084444, "learning_rate": 2e-05, "loss": 5.5294, "step": 612 }, { "epoch": 0.04111748331488748, "grad_norm": 0.14144677981017342, "learning_rate": 2e-05, "loss": 5.5266, "step": 613 }, { "epoch": 0.04118455914411242, "grad_norm": 0.13824273464898712, "learning_rate": 2e-05, "loss": 5.5659, "step": 614 }, { "epoch": 0.04125163497333736, "grad_norm": 0.14061187164486597, "learning_rate": 2e-05, "loss": 5.3934, "step": 615 }, { "epoch": 0.041318710802562296, "grad_norm": 0.14095205119779503, "learning_rate": 2e-05, "loss": 5.4487, "step": 616 }, { "epoch": 0.041385786631787234, "grad_norm": 0.14319504628953475, "learning_rate": 2e-05, "loss": 5.3973, "step": 617 }, { "epoch": 0.04145286246101217, "grad_norm": 0.13836943767229448, "learning_rate": 2e-05, "loss": 5.4719, "step": 618 }, { "epoch": 0.04151993829023711, "grad_norm": 0.13512784000399458, "learning_rate": 2e-05, "loss": 5.5451, "step": 619 }, { "epoch": 0.04158701411946205, "grad_norm": 0.14189427112027952, "learning_rate": 2e-05, "loss": 5.6001, "step": 620 }, { "epoch": 0.04165408994868699, "grad_norm": 0.1403513312690403, "learning_rate": 2e-05, "loss": 5.5655, "step": 621 }, { "epoch": 0.04172116577791193, "grad_norm": 0.13945650528846082, "learning_rate": 2e-05, "loss": 5.4681, "step": 622 }, { "epoch": 0.041788241607136865, "grad_norm": 0.13987003530335432, "learning_rate": 2e-05, "loss": 5.4305, "step": 623 }, { "epoch": 0.041855317436361804, "grad_norm": 0.14274062855701655, "learning_rate": 2e-05, "loss": 5.4842, "step": 624 }, { "epoch": 0.04192239326558675, "grad_norm": 0.1379136834023827, "learning_rate": 2e-05, "loss": 5.465, "step": 625 }, { "epoch": 0.04198946909481169, "grad_norm": 0.1331157866465741, "learning_rate": 2e-05, "loss": 5.3935, "step": 626 }, { "epoch": 0.042056544924036626, "grad_norm": 0.1444552460966105, "learning_rate": 2e-05, "loss": 5.589, "step": 627 }, { "epoch": 0.042123620753261565, "grad_norm": 0.1494344639472678, "learning_rate": 2e-05, "loss": 5.5014, "step": 628 }, { "epoch": 0.0421906965824865, "grad_norm": 0.13779121503233616, "learning_rate": 2e-05, "loss": 5.4231, "step": 629 }, { "epoch": 0.04225777241171144, "grad_norm": 0.1353682260026374, "learning_rate": 2e-05, "loss": 5.5386, "step": 630 }, { "epoch": 0.04232484824093638, "grad_norm": 0.13505601004317694, "learning_rate": 2e-05, "loss": 5.6441, "step": 631 }, { "epoch": 0.04239192407016132, "grad_norm": 0.13673407252093472, "learning_rate": 2e-05, "loss": 5.6584, "step": 632 }, { "epoch": 0.04245899989938626, "grad_norm": 0.13952895671065724, "learning_rate": 2e-05, "loss": 5.5915, "step": 633 }, { "epoch": 0.042526075728611196, "grad_norm": 0.13502645535908375, "learning_rate": 2e-05, "loss": 5.3987, "step": 634 }, { "epoch": 0.042593151557836134, "grad_norm": 0.13722203420484583, "learning_rate": 2e-05, "loss": 5.4855, "step": 635 }, { "epoch": 0.04266022738706107, "grad_norm": 0.13547254257428512, "learning_rate": 2e-05, "loss": 5.4538, "step": 636 }, { "epoch": 0.04272730321628601, "grad_norm": 0.13346091985202946, "learning_rate": 2e-05, "loss": 5.5436, "step": 637 }, { "epoch": 0.04279437904551095, "grad_norm": 0.1322312472443467, "learning_rate": 2e-05, "loss": 5.509, "step": 638 }, { "epoch": 0.04286145487473589, "grad_norm": 0.13976357835377334, "learning_rate": 2e-05, "loss": 5.5889, "step": 639 }, { "epoch": 0.04292853070396083, "grad_norm": 0.13533720945602512, "learning_rate": 2e-05, "loss": 5.4222, "step": 640 }, { "epoch": 0.042995606533185765, "grad_norm": 0.13529379166141756, "learning_rate": 2e-05, "loss": 5.5203, "step": 641 }, { "epoch": 0.043062682362410704, "grad_norm": 0.13405851885463874, "learning_rate": 2e-05, "loss": 5.472, "step": 642 }, { "epoch": 0.04312975819163564, "grad_norm": 0.14299724362105026, "learning_rate": 2e-05, "loss": 5.5263, "step": 643 }, { "epoch": 0.04319683402086058, "grad_norm": 0.13773847424131402, "learning_rate": 2e-05, "loss": 5.4432, "step": 644 }, { "epoch": 0.04326390985008552, "grad_norm": 0.13941592754755283, "learning_rate": 2e-05, "loss": 5.4179, "step": 645 }, { "epoch": 0.04333098567931046, "grad_norm": 0.14287175613530076, "learning_rate": 2e-05, "loss": 5.3087, "step": 646 }, { "epoch": 0.043398061508535396, "grad_norm": 0.14332531094216394, "learning_rate": 2e-05, "loss": 5.4335, "step": 647 }, { "epoch": 0.04346513733776034, "grad_norm": 0.13567264776547439, "learning_rate": 2e-05, "loss": 5.3195, "step": 648 }, { "epoch": 0.04353221316698528, "grad_norm": 0.1358870056566697, "learning_rate": 2e-05, "loss": 5.5239, "step": 649 }, { "epoch": 0.04359928899621022, "grad_norm": 0.14383192065048644, "learning_rate": 2e-05, "loss": 5.5391, "step": 650 }, { "epoch": 0.04366636482543516, "grad_norm": 0.14205681708970144, "learning_rate": 2e-05, "loss": 5.3914, "step": 651 }, { "epoch": 0.043733440654660095, "grad_norm": 0.14036834151940608, "learning_rate": 2e-05, "loss": 5.3794, "step": 652 }, { "epoch": 0.043800516483885034, "grad_norm": 0.14128155321834157, "learning_rate": 2e-05, "loss": 5.4656, "step": 653 }, { "epoch": 0.04386759231310997, "grad_norm": 0.1372517685943907, "learning_rate": 2e-05, "loss": 5.6727, "step": 654 }, { "epoch": 0.04393466814233491, "grad_norm": 0.14416137682283492, "learning_rate": 2e-05, "loss": 5.4771, "step": 655 }, { "epoch": 0.04400174397155985, "grad_norm": 0.14086294923237053, "learning_rate": 2e-05, "loss": 5.6039, "step": 656 }, { "epoch": 0.04406881980078479, "grad_norm": 0.14131574559933985, "learning_rate": 2e-05, "loss": 5.4385, "step": 657 }, { "epoch": 0.044135895630009726, "grad_norm": 0.13814460678025367, "learning_rate": 2e-05, "loss": 5.5273, "step": 658 }, { "epoch": 0.044202971459234665, "grad_norm": 0.13444402320247367, "learning_rate": 2e-05, "loss": 5.4203, "step": 659 }, { "epoch": 0.0442700472884596, "grad_norm": 0.13756833298978008, "learning_rate": 2e-05, "loss": 5.4767, "step": 660 }, { "epoch": 0.04433712311768454, "grad_norm": 0.14084657154059518, "learning_rate": 2e-05, "loss": 5.6169, "step": 661 }, { "epoch": 0.04440419894690948, "grad_norm": 0.13725206676233664, "learning_rate": 2e-05, "loss": 5.4911, "step": 662 }, { "epoch": 0.04447127477613442, "grad_norm": 0.14671819805687436, "learning_rate": 2e-05, "loss": 5.4505, "step": 663 }, { "epoch": 0.04453835060535936, "grad_norm": 0.13842636181004334, "learning_rate": 2e-05, "loss": 5.4645, "step": 664 }, { "epoch": 0.044605426434584296, "grad_norm": 0.1356989081518579, "learning_rate": 2e-05, "loss": 5.4456, "step": 665 }, { "epoch": 0.044672502263809234, "grad_norm": 0.143624327052737, "learning_rate": 2e-05, "loss": 5.3947, "step": 666 }, { "epoch": 0.04473957809303417, "grad_norm": 0.14421885778439977, "learning_rate": 2e-05, "loss": 5.6708, "step": 667 }, { "epoch": 0.04480665392225911, "grad_norm": 0.1427561940630554, "learning_rate": 2e-05, "loss": 5.5548, "step": 668 }, { "epoch": 0.04487372975148405, "grad_norm": 0.13864440268106895, "learning_rate": 2e-05, "loss": 5.4273, "step": 669 }, { "epoch": 0.04494080558070899, "grad_norm": 0.14245768057433805, "learning_rate": 2e-05, "loss": 5.4881, "step": 670 }, { "epoch": 0.045007881409933934, "grad_norm": 0.14154681851686246, "learning_rate": 2e-05, "loss": 5.4128, "step": 671 }, { "epoch": 0.04507495723915887, "grad_norm": 0.14527272461887336, "learning_rate": 2e-05, "loss": 5.4935, "step": 672 }, { "epoch": 0.04514203306838381, "grad_norm": 0.13734141016394774, "learning_rate": 2e-05, "loss": 5.5022, "step": 673 }, { "epoch": 0.04520910889760875, "grad_norm": 0.1465421696864576, "learning_rate": 2e-05, "loss": 5.5184, "step": 674 }, { "epoch": 0.04527618472683369, "grad_norm": 0.13779435303068854, "learning_rate": 2e-05, "loss": 5.4656, "step": 675 }, { "epoch": 0.045343260556058626, "grad_norm": 0.14178039715463103, "learning_rate": 2e-05, "loss": 5.4174, "step": 676 }, { "epoch": 0.045410336385283565, "grad_norm": 0.14616190490145753, "learning_rate": 2e-05, "loss": 5.4852, "step": 677 }, { "epoch": 0.0454774122145085, "grad_norm": 0.13763877213692075, "learning_rate": 2e-05, "loss": 5.4653, "step": 678 }, { "epoch": 0.04554448804373344, "grad_norm": 0.13980016136474885, "learning_rate": 2e-05, "loss": 5.5444, "step": 679 }, { "epoch": 0.04561156387295838, "grad_norm": 0.137715397390648, "learning_rate": 2e-05, "loss": 5.5387, "step": 680 }, { "epoch": 0.04567863970218332, "grad_norm": 0.1372706413266507, "learning_rate": 2e-05, "loss": 5.4213, "step": 681 }, { "epoch": 0.04574571553140826, "grad_norm": 0.1354507087608965, "learning_rate": 2e-05, "loss": 5.5454, "step": 682 }, { "epoch": 0.045812791360633195, "grad_norm": 0.138702741054376, "learning_rate": 2e-05, "loss": 5.5524, "step": 683 }, { "epoch": 0.045879867189858134, "grad_norm": 0.1439775365053947, "learning_rate": 2e-05, "loss": 5.4169, "step": 684 }, { "epoch": 0.04594694301908307, "grad_norm": 0.13955574881868843, "learning_rate": 2e-05, "loss": 5.38, "step": 685 }, { "epoch": 0.04601401884830801, "grad_norm": 0.1427532255763558, "learning_rate": 2e-05, "loss": 5.5877, "step": 686 }, { "epoch": 0.04608109467753295, "grad_norm": 0.14307774558633565, "learning_rate": 2e-05, "loss": 5.5149, "step": 687 }, { "epoch": 0.04614817050675789, "grad_norm": 0.13561945813456946, "learning_rate": 2e-05, "loss": 5.4972, "step": 688 }, { "epoch": 0.046215246335982826, "grad_norm": 0.13474997095301944, "learning_rate": 2e-05, "loss": 5.4804, "step": 689 }, { "epoch": 0.046282322165207765, "grad_norm": 0.14295841961581612, "learning_rate": 2e-05, "loss": 5.3925, "step": 690 }, { "epoch": 0.0463493979944327, "grad_norm": 0.13825600420584314, "learning_rate": 2e-05, "loss": 5.4524, "step": 691 }, { "epoch": 0.04641647382365764, "grad_norm": 0.14042608783431912, "learning_rate": 2e-05, "loss": 5.3639, "step": 692 }, { "epoch": 0.04648354965288258, "grad_norm": 0.13951528651867623, "learning_rate": 2e-05, "loss": 5.5752, "step": 693 }, { "epoch": 0.046550625482107526, "grad_norm": 0.13793742248091767, "learning_rate": 2e-05, "loss": 5.4995, "step": 694 }, { "epoch": 0.046617701311332464, "grad_norm": 0.1409577923361574, "learning_rate": 2e-05, "loss": 5.5288, "step": 695 }, { "epoch": 0.0466847771405574, "grad_norm": 0.13977771961849436, "learning_rate": 2e-05, "loss": 5.3562, "step": 696 }, { "epoch": 0.04675185296978234, "grad_norm": 0.14459618666743781, "learning_rate": 2e-05, "loss": 5.5676, "step": 697 }, { "epoch": 0.04681892879900728, "grad_norm": 0.1354144056086241, "learning_rate": 2e-05, "loss": 5.5064, "step": 698 }, { "epoch": 0.04688600462823222, "grad_norm": 0.14339030646872963, "learning_rate": 2e-05, "loss": 5.5063, "step": 699 }, { "epoch": 0.04695308045745716, "grad_norm": 0.13708036185843223, "learning_rate": 2e-05, "loss": 5.5848, "step": 700 }, { "epoch": 0.047020156286682095, "grad_norm": 0.13220596805939644, "learning_rate": 2e-05, "loss": 5.4577, "step": 701 }, { "epoch": 0.047087232115907034, "grad_norm": 0.14054448501164102, "learning_rate": 2e-05, "loss": 5.5742, "step": 702 }, { "epoch": 0.04715430794513197, "grad_norm": 0.1379018155485352, "learning_rate": 2e-05, "loss": 5.5134, "step": 703 }, { "epoch": 0.04722138377435691, "grad_norm": 0.13604312786485726, "learning_rate": 2e-05, "loss": 5.6483, "step": 704 }, { "epoch": 0.04728845960358185, "grad_norm": 0.1417843650138657, "learning_rate": 2e-05, "loss": 5.507, "step": 705 }, { "epoch": 0.04735553543280679, "grad_norm": 0.14688767455070992, "learning_rate": 2e-05, "loss": 5.4784, "step": 706 }, { "epoch": 0.047422611262031726, "grad_norm": 0.1424435900164657, "learning_rate": 2e-05, "loss": 5.3828, "step": 707 }, { "epoch": 0.047489687091256665, "grad_norm": 0.14738016852072106, "learning_rate": 2e-05, "loss": 5.4229, "step": 708 }, { "epoch": 0.0475567629204816, "grad_norm": 0.15536535033988336, "learning_rate": 2e-05, "loss": 5.4301, "step": 709 }, { "epoch": 0.04762383874970654, "grad_norm": 0.14129097197304216, "learning_rate": 2e-05, "loss": 5.3782, "step": 710 }, { "epoch": 0.04769091457893148, "grad_norm": 0.14731117823849502, "learning_rate": 2e-05, "loss": 5.4473, "step": 711 }, { "epoch": 0.04775799040815642, "grad_norm": 0.14435206870439454, "learning_rate": 2e-05, "loss": 5.5475, "step": 712 }, { "epoch": 0.04782506623738136, "grad_norm": 0.14288452454163153, "learning_rate": 2e-05, "loss": 5.4563, "step": 713 }, { "epoch": 0.047892142066606296, "grad_norm": 0.14161191308170515, "learning_rate": 2e-05, "loss": 5.3787, "step": 714 }, { "epoch": 0.047959217895831234, "grad_norm": 0.1418903743742546, "learning_rate": 2e-05, "loss": 5.4836, "step": 715 }, { "epoch": 0.04802629372505618, "grad_norm": 0.14489603623091238, "learning_rate": 2e-05, "loss": 5.5585, "step": 716 }, { "epoch": 0.04809336955428112, "grad_norm": 0.13924266255096693, "learning_rate": 2e-05, "loss": 5.479, "step": 717 }, { "epoch": 0.048160445383506056, "grad_norm": 0.13940991905924555, "learning_rate": 2e-05, "loss": 5.5412, "step": 718 }, { "epoch": 0.048227521212730995, "grad_norm": 0.14059649452273185, "learning_rate": 2e-05, "loss": 5.577, "step": 719 }, { "epoch": 0.04829459704195593, "grad_norm": 0.156013079699878, "learning_rate": 2e-05, "loss": 5.4822, "step": 720 }, { "epoch": 0.04836167287118087, "grad_norm": 0.13860426311076016, "learning_rate": 2e-05, "loss": 5.4887, "step": 721 }, { "epoch": 0.04842874870040581, "grad_norm": 0.15058213786113767, "learning_rate": 2e-05, "loss": 5.4957, "step": 722 }, { "epoch": 0.04849582452963075, "grad_norm": 0.1471991804658962, "learning_rate": 2e-05, "loss": 5.385, "step": 723 }, { "epoch": 0.04856290035885569, "grad_norm": 0.13898185980430522, "learning_rate": 2e-05, "loss": 5.4351, "step": 724 }, { "epoch": 0.048629976188080626, "grad_norm": 0.14474775848734892, "learning_rate": 2e-05, "loss": 5.4359, "step": 725 }, { "epoch": 0.048697052017305564, "grad_norm": 0.14547265157225628, "learning_rate": 2e-05, "loss": 5.45, "step": 726 }, { "epoch": 0.0487641278465305, "grad_norm": 0.15110964241263422, "learning_rate": 2e-05, "loss": 5.3932, "step": 727 }, { "epoch": 0.04883120367575544, "grad_norm": 0.14568161529226067, "learning_rate": 2e-05, "loss": 5.4044, "step": 728 }, { "epoch": 0.04889827950498038, "grad_norm": 0.14317519146667804, "learning_rate": 2e-05, "loss": 5.3993, "step": 729 }, { "epoch": 0.04896535533420532, "grad_norm": 0.14368900396520803, "learning_rate": 2e-05, "loss": 5.4428, "step": 730 }, { "epoch": 0.04903243116343026, "grad_norm": 0.14799592146466703, "learning_rate": 2e-05, "loss": 5.4929, "step": 731 }, { "epoch": 0.049099506992655195, "grad_norm": 0.1374586908551005, "learning_rate": 2e-05, "loss": 5.4652, "step": 732 }, { "epoch": 0.049166582821880134, "grad_norm": 0.14090424067704502, "learning_rate": 2e-05, "loss": 5.4518, "step": 733 }, { "epoch": 0.04923365865110507, "grad_norm": 0.14512103945201657, "learning_rate": 2e-05, "loss": 5.5188, "step": 734 }, { "epoch": 0.04930073448033001, "grad_norm": 0.13827428444801806, "learning_rate": 2e-05, "loss": 5.5551, "step": 735 }, { "epoch": 0.04936781030955495, "grad_norm": 0.14675688280788232, "learning_rate": 2e-05, "loss": 5.343, "step": 736 }, { "epoch": 0.04943488613877989, "grad_norm": 0.14638123039207257, "learning_rate": 2e-05, "loss": 5.5466, "step": 737 }, { "epoch": 0.049501961968004826, "grad_norm": 0.1560724955556814, "learning_rate": 2e-05, "loss": 5.5679, "step": 738 }, { "epoch": 0.04956903779722977, "grad_norm": 0.14466166873369335, "learning_rate": 2e-05, "loss": 5.3942, "step": 739 }, { "epoch": 0.04963611362645471, "grad_norm": 0.13981762977797096, "learning_rate": 2e-05, "loss": 5.4849, "step": 740 }, { "epoch": 0.04970318945567965, "grad_norm": 0.1459812432169665, "learning_rate": 2e-05, "loss": 5.5568, "step": 741 }, { "epoch": 0.04977026528490459, "grad_norm": 0.14413644529517733, "learning_rate": 2e-05, "loss": 5.47, "step": 742 }, { "epoch": 0.049837341114129526, "grad_norm": 0.13914012564303369, "learning_rate": 2e-05, "loss": 5.5178, "step": 743 }, { "epoch": 0.049904416943354464, "grad_norm": 0.13910785363640651, "learning_rate": 2e-05, "loss": 5.3553, "step": 744 }, { "epoch": 0.0499714927725794, "grad_norm": 0.15294409282184693, "learning_rate": 2e-05, "loss": 5.5345, "step": 745 }, { "epoch": 0.05003856860180434, "grad_norm": 0.14262523649231573, "learning_rate": 2e-05, "loss": 5.4811, "step": 746 }, { "epoch": 0.05010564443102928, "grad_norm": 0.13740834231732385, "learning_rate": 2e-05, "loss": 5.496, "step": 747 }, { "epoch": 0.05017272026025422, "grad_norm": 0.14381353132298083, "learning_rate": 2e-05, "loss": 5.5713, "step": 748 }, { "epoch": 0.050239796089479156, "grad_norm": 0.14407248975999912, "learning_rate": 2e-05, "loss": 5.4998, "step": 749 }, { "epoch": 0.050306871918704095, "grad_norm": 0.14299021645183782, "learning_rate": 2e-05, "loss": 5.6161, "step": 750 }, { "epoch": 0.05037394774792903, "grad_norm": 0.1436514686554492, "learning_rate": 2e-05, "loss": 5.4537, "step": 751 }, { "epoch": 0.05044102357715397, "grad_norm": 0.14349355455732365, "learning_rate": 2e-05, "loss": 5.5511, "step": 752 }, { "epoch": 0.05050809940637891, "grad_norm": 0.15364827332811654, "learning_rate": 2e-05, "loss": 5.5485, "step": 753 }, { "epoch": 0.05057517523560385, "grad_norm": 0.1395874557499621, "learning_rate": 2e-05, "loss": 5.5329, "step": 754 }, { "epoch": 0.05064225106482879, "grad_norm": 0.13851607290094617, "learning_rate": 2e-05, "loss": 5.5744, "step": 755 }, { "epoch": 0.050709326894053726, "grad_norm": 0.1482448145385073, "learning_rate": 2e-05, "loss": 5.432, "step": 756 }, { "epoch": 0.050776402723278664, "grad_norm": 0.14357580439103637, "learning_rate": 2e-05, "loss": 5.4076, "step": 757 }, { "epoch": 0.0508434785525036, "grad_norm": 0.13615352609853693, "learning_rate": 2e-05, "loss": 5.4581, "step": 758 }, { "epoch": 0.05091055438172854, "grad_norm": 0.14104478054752959, "learning_rate": 2e-05, "loss": 5.579, "step": 759 }, { "epoch": 0.05097763021095348, "grad_norm": 0.14382269214670343, "learning_rate": 2e-05, "loss": 5.4899, "step": 760 }, { "epoch": 0.05104470604017842, "grad_norm": 0.1453424860173766, "learning_rate": 2e-05, "loss": 5.5042, "step": 761 }, { "epoch": 0.051111781869403364, "grad_norm": 0.1378142275682304, "learning_rate": 2e-05, "loss": 5.4964, "step": 762 }, { "epoch": 0.0511788576986283, "grad_norm": 0.1407211767551057, "learning_rate": 2e-05, "loss": 5.4861, "step": 763 }, { "epoch": 0.05124593352785324, "grad_norm": 0.14507754599168074, "learning_rate": 2e-05, "loss": 5.4546, "step": 764 }, { "epoch": 0.05131300935707818, "grad_norm": 0.1371333172533457, "learning_rate": 2e-05, "loss": 5.4573, "step": 765 }, { "epoch": 0.05138008518630312, "grad_norm": 0.14019084845144397, "learning_rate": 2e-05, "loss": 5.45, "step": 766 }, { "epoch": 0.051447161015528056, "grad_norm": 0.14601344719671633, "learning_rate": 2e-05, "loss": 5.5222, "step": 767 }, { "epoch": 0.051514236844752995, "grad_norm": 0.13709215476394632, "learning_rate": 2e-05, "loss": 5.5543, "step": 768 }, { "epoch": 0.05158131267397793, "grad_norm": 0.13709275755688305, "learning_rate": 2e-05, "loss": 5.4711, "step": 769 }, { "epoch": 0.05164838850320287, "grad_norm": 0.14582096377803794, "learning_rate": 2e-05, "loss": 5.51, "step": 770 }, { "epoch": 0.05171546433242781, "grad_norm": 0.14413621052224881, "learning_rate": 2e-05, "loss": 5.5134, "step": 771 }, { "epoch": 0.05178254016165275, "grad_norm": 0.13946679017960892, "learning_rate": 2e-05, "loss": 5.4633, "step": 772 }, { "epoch": 0.05184961599087769, "grad_norm": 0.13784149293242642, "learning_rate": 2e-05, "loss": 5.5561, "step": 773 }, { "epoch": 0.051916691820102626, "grad_norm": 0.139259944074652, "learning_rate": 2e-05, "loss": 5.4068, "step": 774 }, { "epoch": 0.051983767649327564, "grad_norm": 0.14244617490022182, "learning_rate": 2e-05, "loss": 5.5005, "step": 775 }, { "epoch": 0.0520508434785525, "grad_norm": 0.1355845305077489, "learning_rate": 2e-05, "loss": 5.5592, "step": 776 }, { "epoch": 0.05211791930777744, "grad_norm": 0.1422706649195537, "learning_rate": 2e-05, "loss": 5.4064, "step": 777 }, { "epoch": 0.05218499513700238, "grad_norm": 0.14646043192603106, "learning_rate": 2e-05, "loss": 5.4213, "step": 778 }, { "epoch": 0.05225207096622732, "grad_norm": 0.147456315844512, "learning_rate": 2e-05, "loss": 5.3064, "step": 779 }, { "epoch": 0.05231914679545226, "grad_norm": 0.1470154653015442, "learning_rate": 2e-05, "loss": 5.4094, "step": 780 }, { "epoch": 0.052386222624677195, "grad_norm": 0.14069580701895698, "learning_rate": 2e-05, "loss": 5.6507, "step": 781 }, { "epoch": 0.052453298453902134, "grad_norm": 0.14032940746735406, "learning_rate": 2e-05, "loss": 5.4467, "step": 782 }, { "epoch": 0.05252037428312707, "grad_norm": 0.13175202171608927, "learning_rate": 2e-05, "loss": 5.2898, "step": 783 }, { "epoch": 0.05258745011235201, "grad_norm": 0.14175649744340305, "learning_rate": 2e-05, "loss": 5.4159, "step": 784 }, { "epoch": 0.052654525941576956, "grad_norm": 0.14354654706079994, "learning_rate": 2e-05, "loss": 5.4711, "step": 785 }, { "epoch": 0.052721601770801894, "grad_norm": 0.14651242905165499, "learning_rate": 2e-05, "loss": 5.3574, "step": 786 }, { "epoch": 0.05278867760002683, "grad_norm": 0.13971193924898306, "learning_rate": 2e-05, "loss": 5.4933, "step": 787 }, { "epoch": 0.05285575342925177, "grad_norm": 0.145747693978211, "learning_rate": 2e-05, "loss": 5.4219, "step": 788 }, { "epoch": 0.05292282925847671, "grad_norm": 0.1415688699713498, "learning_rate": 2e-05, "loss": 5.519, "step": 789 }, { "epoch": 0.05298990508770165, "grad_norm": 0.1336041732092004, "learning_rate": 2e-05, "loss": 5.4699, "step": 790 }, { "epoch": 0.05305698091692659, "grad_norm": 0.1365943676391641, "learning_rate": 2e-05, "loss": 5.5071, "step": 791 }, { "epoch": 0.053124056746151525, "grad_norm": 0.13602140990713338, "learning_rate": 2e-05, "loss": 5.4861, "step": 792 }, { "epoch": 0.053191132575376464, "grad_norm": 0.1377099261404001, "learning_rate": 2e-05, "loss": 5.1798, "step": 793 }, { "epoch": 0.0532582084046014, "grad_norm": 0.14286396454688055, "learning_rate": 2e-05, "loss": 5.6053, "step": 794 }, { "epoch": 0.05332528423382634, "grad_norm": 0.13769427743235285, "learning_rate": 2e-05, "loss": 5.6173, "step": 795 }, { "epoch": 0.05339236006305128, "grad_norm": 0.14037523303507846, "learning_rate": 2e-05, "loss": 5.4617, "step": 796 }, { "epoch": 0.05345943589227622, "grad_norm": 0.13784999632976636, "learning_rate": 2e-05, "loss": 5.4373, "step": 797 }, { "epoch": 0.053526511721501156, "grad_norm": 0.14077061853857362, "learning_rate": 2e-05, "loss": 5.4068, "step": 798 }, { "epoch": 0.053593587550726095, "grad_norm": 0.13607626227235245, "learning_rate": 2e-05, "loss": 5.4179, "step": 799 }, { "epoch": 0.05366066337995103, "grad_norm": 0.13974003766746196, "learning_rate": 2e-05, "loss": 5.5318, "step": 800 }, { "epoch": 0.05372773920917597, "grad_norm": 0.13639664080985492, "learning_rate": 2e-05, "loss": 5.3341, "step": 801 }, { "epoch": 0.05379481503840091, "grad_norm": 0.14167395023305554, "learning_rate": 2e-05, "loss": 5.3727, "step": 802 }, { "epoch": 0.05386189086762585, "grad_norm": 0.1394741194712935, "learning_rate": 2e-05, "loss": 5.5296, "step": 803 }, { "epoch": 0.05392896669685079, "grad_norm": 0.13712594140783416, "learning_rate": 2e-05, "loss": 5.3746, "step": 804 }, { "epoch": 0.053996042526075726, "grad_norm": 0.14158707733158002, "learning_rate": 2e-05, "loss": 5.3914, "step": 805 }, { "epoch": 0.054063118355300664, "grad_norm": 0.13486094025995213, "learning_rate": 2e-05, "loss": 5.4044, "step": 806 }, { "epoch": 0.05413019418452561, "grad_norm": 0.14201804441816912, "learning_rate": 2e-05, "loss": 5.3864, "step": 807 }, { "epoch": 0.05419727001375055, "grad_norm": 0.14257058862578786, "learning_rate": 2e-05, "loss": 5.5227, "step": 808 }, { "epoch": 0.05426434584297549, "grad_norm": 0.140244101248035, "learning_rate": 2e-05, "loss": 5.4433, "step": 809 }, { "epoch": 0.054331421672200425, "grad_norm": 0.14432063434507378, "learning_rate": 2e-05, "loss": 5.4841, "step": 810 }, { "epoch": 0.054398497501425364, "grad_norm": 0.14269364021934897, "learning_rate": 2e-05, "loss": 5.4715, "step": 811 }, { "epoch": 0.0544655733306503, "grad_norm": 0.1350589464942071, "learning_rate": 2e-05, "loss": 5.5264, "step": 812 }, { "epoch": 0.05453264915987524, "grad_norm": 0.13664763358249052, "learning_rate": 2e-05, "loss": 5.404, "step": 813 }, { "epoch": 0.05459972498910018, "grad_norm": 0.14147306932220935, "learning_rate": 2e-05, "loss": 5.5134, "step": 814 }, { "epoch": 0.05466680081832512, "grad_norm": 0.13878967516307975, "learning_rate": 2e-05, "loss": 5.529, "step": 815 }, { "epoch": 0.054733876647550056, "grad_norm": 0.13787894692276778, "learning_rate": 2e-05, "loss": 5.4843, "step": 816 }, { "epoch": 0.054800952476774994, "grad_norm": 0.13774696079253215, "learning_rate": 2e-05, "loss": 5.4294, "step": 817 }, { "epoch": 0.05486802830599993, "grad_norm": 0.13650675459088185, "learning_rate": 2e-05, "loss": 5.5133, "step": 818 }, { "epoch": 0.05493510413522487, "grad_norm": 0.14158812692741873, "learning_rate": 2e-05, "loss": 5.5465, "step": 819 }, { "epoch": 0.05500217996444981, "grad_norm": 0.1459840984810963, "learning_rate": 2e-05, "loss": 5.427, "step": 820 }, { "epoch": 0.05506925579367475, "grad_norm": 0.1333773447411539, "learning_rate": 2e-05, "loss": 5.481, "step": 821 }, { "epoch": 0.05513633162289969, "grad_norm": 0.1384659909074751, "learning_rate": 2e-05, "loss": 5.2546, "step": 822 }, { "epoch": 0.055203407452124625, "grad_norm": 0.14398823490378715, "learning_rate": 2e-05, "loss": 5.4862, "step": 823 }, { "epoch": 0.055270483281349564, "grad_norm": 0.13875110841844615, "learning_rate": 2e-05, "loss": 5.48, "step": 824 }, { "epoch": 0.0553375591105745, "grad_norm": 0.13948950139399105, "learning_rate": 2e-05, "loss": 5.5384, "step": 825 }, { "epoch": 0.05540463493979944, "grad_norm": 0.15116020802467697, "learning_rate": 2e-05, "loss": 5.4948, "step": 826 }, { "epoch": 0.05547171076902438, "grad_norm": 0.13819426660576553, "learning_rate": 2e-05, "loss": 5.4385, "step": 827 }, { "epoch": 0.05553878659824932, "grad_norm": 0.14568352193832856, "learning_rate": 2e-05, "loss": 5.3935, "step": 828 }, { "epoch": 0.055605862427474256, "grad_norm": 0.15529099787024003, "learning_rate": 2e-05, "loss": 5.4705, "step": 829 }, { "epoch": 0.0556729382566992, "grad_norm": 0.13876326922565835, "learning_rate": 2e-05, "loss": 5.4677, "step": 830 }, { "epoch": 0.05574001408592414, "grad_norm": 0.13577318238666272, "learning_rate": 2e-05, "loss": 5.4858, "step": 831 }, { "epoch": 0.05580708991514908, "grad_norm": 0.13595378860625845, "learning_rate": 2e-05, "loss": 5.4542, "step": 832 }, { "epoch": 0.05587416574437402, "grad_norm": 0.15779272648314802, "learning_rate": 2e-05, "loss": 5.5412, "step": 833 }, { "epoch": 0.055941241573598956, "grad_norm": 0.1387678022712011, "learning_rate": 2e-05, "loss": 5.3525, "step": 834 }, { "epoch": 0.056008317402823894, "grad_norm": 0.13807992148054876, "learning_rate": 2e-05, "loss": 5.4495, "step": 835 }, { "epoch": 0.05607539323204883, "grad_norm": 0.14276025564396136, "learning_rate": 2e-05, "loss": 5.5332, "step": 836 }, { "epoch": 0.05614246906127377, "grad_norm": 0.1409028275800159, "learning_rate": 2e-05, "loss": 5.5933, "step": 837 }, { "epoch": 0.05620954489049871, "grad_norm": 0.14006034522749472, "learning_rate": 2e-05, "loss": 5.4707, "step": 838 }, { "epoch": 0.05627662071972365, "grad_norm": 0.14767453023280888, "learning_rate": 2e-05, "loss": 5.5192, "step": 839 }, { "epoch": 0.05634369654894859, "grad_norm": 0.14190206730998428, "learning_rate": 2e-05, "loss": 5.6028, "step": 840 }, { "epoch": 0.056410772378173525, "grad_norm": 0.14087398642948445, "learning_rate": 2e-05, "loss": 5.3911, "step": 841 }, { "epoch": 0.056477848207398464, "grad_norm": 0.14057267469250576, "learning_rate": 2e-05, "loss": 5.4456, "step": 842 }, { "epoch": 0.0565449240366234, "grad_norm": 0.14076872251712083, "learning_rate": 2e-05, "loss": 5.3893, "step": 843 }, { "epoch": 0.05661199986584834, "grad_norm": 0.1350938626348686, "learning_rate": 2e-05, "loss": 5.4341, "step": 844 }, { "epoch": 0.05667907569507328, "grad_norm": 0.13915139457632292, "learning_rate": 2e-05, "loss": 5.5325, "step": 845 }, { "epoch": 0.05674615152429822, "grad_norm": 0.13812758742751205, "learning_rate": 2e-05, "loss": 5.3681, "step": 846 }, { "epoch": 0.056813227353523156, "grad_norm": 0.1353484945833019, "learning_rate": 2e-05, "loss": 5.4384, "step": 847 }, { "epoch": 0.056880303182748095, "grad_norm": 0.1332408583701437, "learning_rate": 2e-05, "loss": 5.3802, "step": 848 }, { "epoch": 0.05694737901197303, "grad_norm": 0.14174163270345783, "learning_rate": 2e-05, "loss": 5.3823, "step": 849 }, { "epoch": 0.05701445484119797, "grad_norm": 0.13892567553248103, "learning_rate": 2e-05, "loss": 5.4336, "step": 850 }, { "epoch": 0.05708153067042291, "grad_norm": 0.13734709444032592, "learning_rate": 2e-05, "loss": 5.406, "step": 851 }, { "epoch": 0.05714860649964785, "grad_norm": 0.14277060734479116, "learning_rate": 2e-05, "loss": 5.4567, "step": 852 }, { "epoch": 0.057215682328872794, "grad_norm": 0.14130189619703876, "learning_rate": 2e-05, "loss": 5.5571, "step": 853 }, { "epoch": 0.05728275815809773, "grad_norm": 0.13530427873091264, "learning_rate": 2e-05, "loss": 5.5739, "step": 854 }, { "epoch": 0.05734983398732267, "grad_norm": 0.13945347479306888, "learning_rate": 2e-05, "loss": 5.653, "step": 855 }, { "epoch": 0.05741690981654761, "grad_norm": 0.1374314318007774, "learning_rate": 2e-05, "loss": 5.3369, "step": 856 }, { "epoch": 0.05748398564577255, "grad_norm": 0.1375300801579823, "learning_rate": 2e-05, "loss": 5.4263, "step": 857 }, { "epoch": 0.057551061474997486, "grad_norm": 0.143143572627421, "learning_rate": 2e-05, "loss": 5.391, "step": 858 }, { "epoch": 0.057618137304222425, "grad_norm": 0.14253496001737856, "learning_rate": 2e-05, "loss": 5.6284, "step": 859 }, { "epoch": 0.05768521313344736, "grad_norm": 0.14241012072714998, "learning_rate": 2e-05, "loss": 5.5044, "step": 860 }, { "epoch": 0.0577522889626723, "grad_norm": 0.1446386983727709, "learning_rate": 2e-05, "loss": 5.4457, "step": 861 }, { "epoch": 0.05781936479189724, "grad_norm": 0.13753673113124346, "learning_rate": 2e-05, "loss": 5.4626, "step": 862 }, { "epoch": 0.05788644062112218, "grad_norm": 0.13388397561799564, "learning_rate": 2e-05, "loss": 5.5171, "step": 863 }, { "epoch": 0.05795351645034712, "grad_norm": 0.14387247898191605, "learning_rate": 2e-05, "loss": 5.351, "step": 864 }, { "epoch": 0.058020592279572056, "grad_norm": 0.13846387917257807, "learning_rate": 2e-05, "loss": 5.3717, "step": 865 }, { "epoch": 0.058087668108796994, "grad_norm": 0.1387772996752303, "learning_rate": 2e-05, "loss": 5.4604, "step": 866 }, { "epoch": 0.05815474393802193, "grad_norm": 0.13740457747626075, "learning_rate": 2e-05, "loss": 5.4497, "step": 867 }, { "epoch": 0.05822181976724687, "grad_norm": 0.1420323840381132, "learning_rate": 2e-05, "loss": 5.5668, "step": 868 }, { "epoch": 0.05828889559647181, "grad_norm": 0.13722131968167312, "learning_rate": 2e-05, "loss": 5.4712, "step": 869 }, { "epoch": 0.05835597142569675, "grad_norm": 0.13695411515133893, "learning_rate": 2e-05, "loss": 5.4577, "step": 870 }, { "epoch": 0.05842304725492169, "grad_norm": 0.136261333389352, "learning_rate": 2e-05, "loss": 5.3434, "step": 871 }, { "epoch": 0.058490123084146625, "grad_norm": 0.14596221144034294, "learning_rate": 2e-05, "loss": 5.6215, "step": 872 }, { "epoch": 0.058557198913371564, "grad_norm": 0.13686208974725156, "learning_rate": 2e-05, "loss": 5.487, "step": 873 }, { "epoch": 0.0586242747425965, "grad_norm": 0.14019161433698765, "learning_rate": 2e-05, "loss": 5.4383, "step": 874 }, { "epoch": 0.05869135057182145, "grad_norm": 0.1426100777175223, "learning_rate": 2e-05, "loss": 5.4463, "step": 875 }, { "epoch": 0.058758426401046386, "grad_norm": 0.13965488796368594, "learning_rate": 2e-05, "loss": 5.5507, "step": 876 }, { "epoch": 0.058825502230271325, "grad_norm": 0.13639290650168254, "learning_rate": 2e-05, "loss": 5.4881, "step": 877 }, { "epoch": 0.05889257805949626, "grad_norm": 0.13666268909296495, "learning_rate": 2e-05, "loss": 5.4489, "step": 878 }, { "epoch": 0.0589596538887212, "grad_norm": 0.14413906456030398, "learning_rate": 2e-05, "loss": 5.5592, "step": 879 }, { "epoch": 0.05902672971794614, "grad_norm": 0.1429403323240712, "learning_rate": 2e-05, "loss": 5.4641, "step": 880 }, { "epoch": 0.05909380554717108, "grad_norm": 0.13512452658212443, "learning_rate": 2e-05, "loss": 5.6031, "step": 881 }, { "epoch": 0.05916088137639602, "grad_norm": 0.1475140817477487, "learning_rate": 2e-05, "loss": 5.5272, "step": 882 }, { "epoch": 0.059227957205620956, "grad_norm": 0.14940223612041237, "learning_rate": 2e-05, "loss": 5.6019, "step": 883 }, { "epoch": 0.059295033034845894, "grad_norm": 0.13674108862846301, "learning_rate": 2e-05, "loss": 5.5681, "step": 884 }, { "epoch": 0.05936210886407083, "grad_norm": 0.13940093999562198, "learning_rate": 2e-05, "loss": 5.5676, "step": 885 }, { "epoch": 0.05942918469329577, "grad_norm": 0.14837945848325476, "learning_rate": 2e-05, "loss": 5.4533, "step": 886 }, { "epoch": 0.05949626052252071, "grad_norm": 0.15011354648255704, "learning_rate": 2e-05, "loss": 5.4575, "step": 887 }, { "epoch": 0.05956333635174565, "grad_norm": 0.1362209694144478, "learning_rate": 2e-05, "loss": 5.4284, "step": 888 }, { "epoch": 0.059630412180970586, "grad_norm": 0.14684444411160688, "learning_rate": 2e-05, "loss": 5.4075, "step": 889 }, { "epoch": 0.059697488010195525, "grad_norm": 0.1395625625161395, "learning_rate": 2e-05, "loss": 5.5442, "step": 890 }, { "epoch": 0.05976456383942046, "grad_norm": 0.14254708197502527, "learning_rate": 2e-05, "loss": 5.5444, "step": 891 }, { "epoch": 0.0598316396686454, "grad_norm": 0.1475803682858939, "learning_rate": 2e-05, "loss": 5.4413, "step": 892 }, { "epoch": 0.05989871549787034, "grad_norm": 0.1406127834801801, "learning_rate": 2e-05, "loss": 5.2831, "step": 893 }, { "epoch": 0.05996579132709528, "grad_norm": 0.14310900234828802, "learning_rate": 2e-05, "loss": 5.5871, "step": 894 }, { "epoch": 0.06003286715632022, "grad_norm": 0.14093769764642358, "learning_rate": 2e-05, "loss": 5.38, "step": 895 }, { "epoch": 0.060099942985545156, "grad_norm": 0.14191864192436038, "learning_rate": 2e-05, "loss": 5.3113, "step": 896 }, { "epoch": 0.060167018814770094, "grad_norm": 0.14235473367066245, "learning_rate": 2e-05, "loss": 5.4629, "step": 897 }, { "epoch": 0.06023409464399504, "grad_norm": 0.15059353819172402, "learning_rate": 2e-05, "loss": 5.4607, "step": 898 }, { "epoch": 0.06030117047321998, "grad_norm": 0.14899940698176392, "learning_rate": 2e-05, "loss": 5.4367, "step": 899 }, { "epoch": 0.06036824630244492, "grad_norm": 0.1430397072684481, "learning_rate": 2e-05, "loss": 5.4692, "step": 900 }, { "epoch": 0.060435322131669855, "grad_norm": 0.14136365852867017, "learning_rate": 2e-05, "loss": 5.4448, "step": 901 }, { "epoch": 0.060502397960894794, "grad_norm": 0.1480207035572126, "learning_rate": 2e-05, "loss": 5.4536, "step": 902 }, { "epoch": 0.06056947379011973, "grad_norm": 0.14587433880174241, "learning_rate": 2e-05, "loss": 5.4813, "step": 903 }, { "epoch": 0.06063654961934467, "grad_norm": 0.13745118216058058, "learning_rate": 2e-05, "loss": 5.4612, "step": 904 }, { "epoch": 0.06070362544856961, "grad_norm": 0.14815188248219527, "learning_rate": 2e-05, "loss": 5.5552, "step": 905 }, { "epoch": 0.06077070127779455, "grad_norm": 0.15415144822204996, "learning_rate": 2e-05, "loss": 5.5095, "step": 906 }, { "epoch": 0.060837777107019486, "grad_norm": 0.15181524962880177, "learning_rate": 2e-05, "loss": 5.3029, "step": 907 }, { "epoch": 0.060904852936244425, "grad_norm": 0.1416912136458125, "learning_rate": 2e-05, "loss": 5.5007, "step": 908 }, { "epoch": 0.06097192876546936, "grad_norm": 0.1438777775082393, "learning_rate": 2e-05, "loss": 5.4892, "step": 909 }, { "epoch": 0.0610390045946943, "grad_norm": 0.14232780598974237, "learning_rate": 2e-05, "loss": 5.5196, "step": 910 }, { "epoch": 0.06110608042391924, "grad_norm": 0.1358565494728242, "learning_rate": 2e-05, "loss": 5.5891, "step": 911 }, { "epoch": 0.06117315625314418, "grad_norm": 0.1401310661087953, "learning_rate": 2e-05, "loss": 5.2692, "step": 912 }, { "epoch": 0.06124023208236912, "grad_norm": 0.14689278941422543, "learning_rate": 2e-05, "loss": 5.4704, "step": 913 }, { "epoch": 0.061307307911594056, "grad_norm": 0.13829368223283206, "learning_rate": 2e-05, "loss": 5.4825, "step": 914 }, { "epoch": 0.061374383740818994, "grad_norm": 0.135609036348283, "learning_rate": 2e-05, "loss": 5.4353, "step": 915 }, { "epoch": 0.06144145957004393, "grad_norm": 0.14145041358914442, "learning_rate": 2e-05, "loss": 5.4622, "step": 916 }, { "epoch": 0.06150853539926887, "grad_norm": 0.14970412784364212, "learning_rate": 2e-05, "loss": 5.3994, "step": 917 }, { "epoch": 0.06157561122849381, "grad_norm": 0.13605722210160578, "learning_rate": 2e-05, "loss": 5.5852, "step": 918 }, { "epoch": 0.06164268705771875, "grad_norm": 0.1402338188085907, "learning_rate": 2e-05, "loss": 5.5012, "step": 919 }, { "epoch": 0.061709762886943686, "grad_norm": 0.1386535419548301, "learning_rate": 2e-05, "loss": 5.5691, "step": 920 }, { "epoch": 0.06177683871616863, "grad_norm": 0.14120651661213565, "learning_rate": 2e-05, "loss": 5.4144, "step": 921 }, { "epoch": 0.06184391454539357, "grad_norm": 0.1418152694970351, "learning_rate": 2e-05, "loss": 5.475, "step": 922 }, { "epoch": 0.06191099037461851, "grad_norm": 0.14081268526884533, "learning_rate": 2e-05, "loss": 5.4198, "step": 923 }, { "epoch": 0.06197806620384345, "grad_norm": 0.1429539519572204, "learning_rate": 2e-05, "loss": 5.5652, "step": 924 }, { "epoch": 0.062045142033068386, "grad_norm": 0.14014628197100631, "learning_rate": 2e-05, "loss": 5.4546, "step": 925 }, { "epoch": 0.062112217862293324, "grad_norm": 0.1403602122795351, "learning_rate": 2e-05, "loss": 5.5362, "step": 926 }, { "epoch": 0.06217929369151826, "grad_norm": 0.13766903314220935, "learning_rate": 2e-05, "loss": 5.5593, "step": 927 }, { "epoch": 0.0622463695207432, "grad_norm": 0.14442357345855064, "learning_rate": 2e-05, "loss": 5.5419, "step": 928 }, { "epoch": 0.06231344534996814, "grad_norm": 0.14637890184169824, "learning_rate": 2e-05, "loss": 5.4311, "step": 929 }, { "epoch": 0.06238052117919308, "grad_norm": 0.14086066691010285, "learning_rate": 2e-05, "loss": 5.4162, "step": 930 }, { "epoch": 0.06244759700841802, "grad_norm": 0.14469989861884466, "learning_rate": 2e-05, "loss": 5.5903, "step": 931 }, { "epoch": 0.06251467283764296, "grad_norm": 0.15443996737079702, "learning_rate": 2e-05, "loss": 5.4197, "step": 932 }, { "epoch": 0.0625817486668679, "grad_norm": 0.1447133409366118, "learning_rate": 2e-05, "loss": 5.502, "step": 933 }, { "epoch": 0.06264882449609284, "grad_norm": 0.13714509914637055, "learning_rate": 2e-05, "loss": 5.5174, "step": 934 }, { "epoch": 0.06271590032531778, "grad_norm": 0.14660932287603656, "learning_rate": 2e-05, "loss": 5.5223, "step": 935 }, { "epoch": 0.06278297615454272, "grad_norm": 0.14713844137885843, "learning_rate": 2e-05, "loss": 5.4564, "step": 936 }, { "epoch": 0.06285005198376765, "grad_norm": 0.1345358901580513, "learning_rate": 2e-05, "loss": 5.5006, "step": 937 }, { "epoch": 0.0629171278129926, "grad_norm": 0.15090421113098856, "learning_rate": 2e-05, "loss": 5.4349, "step": 938 }, { "epoch": 0.06298420364221753, "grad_norm": 0.13614640484954693, "learning_rate": 2e-05, "loss": 5.4222, "step": 939 }, { "epoch": 0.06305127947144247, "grad_norm": 0.1406727021827449, "learning_rate": 2e-05, "loss": 5.4434, "step": 940 }, { "epoch": 0.06311835530066741, "grad_norm": 0.14014131545298447, "learning_rate": 2e-05, "loss": 5.6228, "step": 941 }, { "epoch": 0.06318543112989235, "grad_norm": 0.1413707632397799, "learning_rate": 2e-05, "loss": 5.5162, "step": 942 }, { "epoch": 0.06325250695911729, "grad_norm": 0.13638836970443632, "learning_rate": 2e-05, "loss": 5.2534, "step": 943 }, { "epoch": 0.06331958278834222, "grad_norm": 0.1416451593858626, "learning_rate": 2e-05, "loss": 5.5657, "step": 944 }, { "epoch": 0.06338665861756716, "grad_norm": 0.1470180217638178, "learning_rate": 2e-05, "loss": 5.4488, "step": 945 }, { "epoch": 0.0634537344467921, "grad_norm": 0.1402528261630909, "learning_rate": 2e-05, "loss": 5.4298, "step": 946 }, { "epoch": 0.06352081027601704, "grad_norm": 0.14168013819452752, "learning_rate": 2e-05, "loss": 5.6216, "step": 947 }, { "epoch": 0.06358788610524198, "grad_norm": 0.13599946813522276, "learning_rate": 2e-05, "loss": 5.3424, "step": 948 }, { "epoch": 0.06365496193446692, "grad_norm": 0.1462738772591234, "learning_rate": 2e-05, "loss": 5.3495, "step": 949 }, { "epoch": 0.06372203776369186, "grad_norm": 0.14257591062972663, "learning_rate": 2e-05, "loss": 5.5289, "step": 950 }, { "epoch": 0.0637891135929168, "grad_norm": 0.13730199516767894, "learning_rate": 2e-05, "loss": 5.3791, "step": 951 }, { "epoch": 0.06385618942214173, "grad_norm": 0.14071710088510572, "learning_rate": 2e-05, "loss": 5.6989, "step": 952 }, { "epoch": 0.06392326525136667, "grad_norm": 0.13788605699179837, "learning_rate": 2e-05, "loss": 5.3702, "step": 953 }, { "epoch": 0.06399034108059161, "grad_norm": 0.14253818887026737, "learning_rate": 2e-05, "loss": 5.5086, "step": 954 }, { "epoch": 0.06405741690981655, "grad_norm": 0.1419512183340297, "learning_rate": 2e-05, "loss": 5.4139, "step": 955 }, { "epoch": 0.06412449273904149, "grad_norm": 0.13641133373263273, "learning_rate": 2e-05, "loss": 5.6436, "step": 956 }, { "epoch": 0.06419156856826642, "grad_norm": 0.1423878286069241, "learning_rate": 2e-05, "loss": 5.3848, "step": 957 }, { "epoch": 0.06425864439749136, "grad_norm": 0.1343091580272714, "learning_rate": 2e-05, "loss": 5.4329, "step": 958 }, { "epoch": 0.0643257202267163, "grad_norm": 0.1416301662054869, "learning_rate": 2e-05, "loss": 5.4526, "step": 959 }, { "epoch": 0.06439279605594124, "grad_norm": 0.1406843573770194, "learning_rate": 2e-05, "loss": 5.6132, "step": 960 }, { "epoch": 0.06445987188516618, "grad_norm": 0.1424090051868738, "learning_rate": 2e-05, "loss": 5.54, "step": 961 }, { "epoch": 0.06452694771439112, "grad_norm": 0.1517475569009069, "learning_rate": 2e-05, "loss": 5.4407, "step": 962 }, { "epoch": 0.06459402354361606, "grad_norm": 0.14808580189993836, "learning_rate": 2e-05, "loss": 5.5214, "step": 963 }, { "epoch": 0.064661099372841, "grad_norm": 0.1415675119324065, "learning_rate": 2e-05, "loss": 5.5676, "step": 964 }, { "epoch": 0.06472817520206593, "grad_norm": 0.1494174332805045, "learning_rate": 2e-05, "loss": 5.506, "step": 965 }, { "epoch": 0.06479525103129087, "grad_norm": 0.1396771353250545, "learning_rate": 2e-05, "loss": 5.4302, "step": 966 }, { "epoch": 0.06486232686051581, "grad_norm": 0.1431365155044209, "learning_rate": 2e-05, "loss": 5.4358, "step": 967 }, { "epoch": 0.06492940268974075, "grad_norm": 0.15043753801926193, "learning_rate": 2e-05, "loss": 5.2945, "step": 968 }, { "epoch": 0.06499647851896569, "grad_norm": 0.1457021416191956, "learning_rate": 2e-05, "loss": 5.4948, "step": 969 }, { "epoch": 0.06506355434819062, "grad_norm": 0.14317023515833505, "learning_rate": 2e-05, "loss": 5.5581, "step": 970 }, { "epoch": 0.06513063017741556, "grad_norm": 0.14392523134142407, "learning_rate": 2e-05, "loss": 5.4342, "step": 971 }, { "epoch": 0.0651977060066405, "grad_norm": 0.13961506605718788, "learning_rate": 2e-05, "loss": 5.4761, "step": 972 }, { "epoch": 0.06526478183586544, "grad_norm": 0.14661756815235763, "learning_rate": 2e-05, "loss": 5.4214, "step": 973 }, { "epoch": 0.06533185766509038, "grad_norm": 0.1502094874218225, "learning_rate": 2e-05, "loss": 5.4245, "step": 974 }, { "epoch": 0.06539893349431532, "grad_norm": 0.13807945735837462, "learning_rate": 2e-05, "loss": 5.5346, "step": 975 }, { "epoch": 0.06546600932354026, "grad_norm": 0.14097145075217724, "learning_rate": 2e-05, "loss": 5.5025, "step": 976 }, { "epoch": 0.0655330851527652, "grad_norm": 0.14699011978998494, "learning_rate": 2e-05, "loss": 5.5209, "step": 977 }, { "epoch": 0.06560016098199015, "grad_norm": 0.13954138721060855, "learning_rate": 2e-05, "loss": 5.4361, "step": 978 }, { "epoch": 0.06566723681121509, "grad_norm": 0.13462150953089716, "learning_rate": 2e-05, "loss": 5.5481, "step": 979 }, { "epoch": 0.06573431264044002, "grad_norm": 0.14131618901622997, "learning_rate": 2e-05, "loss": 5.5323, "step": 980 }, { "epoch": 0.06580138846966496, "grad_norm": 0.1410610627720264, "learning_rate": 2e-05, "loss": 5.3419, "step": 981 }, { "epoch": 0.0658684642988899, "grad_norm": 0.14746307906368222, "learning_rate": 2e-05, "loss": 5.5164, "step": 982 }, { "epoch": 0.06593554012811484, "grad_norm": 0.14345163280006756, "learning_rate": 2e-05, "loss": 5.5695, "step": 983 }, { "epoch": 0.06600261595733978, "grad_norm": 0.13873894896339284, "learning_rate": 2e-05, "loss": 5.4458, "step": 984 }, { "epoch": 0.06606969178656472, "grad_norm": 0.13815448538380473, "learning_rate": 2e-05, "loss": 5.4279, "step": 985 }, { "epoch": 0.06613676761578965, "grad_norm": 0.14243536706924334, "learning_rate": 2e-05, "loss": 5.6864, "step": 986 }, { "epoch": 0.06620384344501459, "grad_norm": 0.14804898457927465, "learning_rate": 2e-05, "loss": 5.4699, "step": 987 }, { "epoch": 0.06627091927423953, "grad_norm": 0.14163182862408727, "learning_rate": 2e-05, "loss": 5.355, "step": 988 }, { "epoch": 0.06633799510346447, "grad_norm": 0.14106974980590847, "learning_rate": 2e-05, "loss": 5.4675, "step": 989 }, { "epoch": 0.06640507093268941, "grad_norm": 0.1463973211213733, "learning_rate": 2e-05, "loss": 5.4647, "step": 990 }, { "epoch": 0.06647214676191435, "grad_norm": 0.13863246088588727, "learning_rate": 2e-05, "loss": 5.3694, "step": 991 }, { "epoch": 0.06653922259113929, "grad_norm": 0.13820198703500147, "learning_rate": 2e-05, "loss": 5.5041, "step": 992 }, { "epoch": 0.06660629842036422, "grad_norm": 0.14590687219591825, "learning_rate": 2e-05, "loss": 5.4105, "step": 993 }, { "epoch": 0.06667337424958916, "grad_norm": 0.1441502716480331, "learning_rate": 2e-05, "loss": 5.5926, "step": 994 }, { "epoch": 0.0667404500788141, "grad_norm": 0.1383324685051119, "learning_rate": 2e-05, "loss": 5.2819, "step": 995 }, { "epoch": 0.06680752590803904, "grad_norm": 0.13736178852125686, "learning_rate": 2e-05, "loss": 5.5035, "step": 996 }, { "epoch": 0.06687460173726398, "grad_norm": 0.13858731313062048, "learning_rate": 2e-05, "loss": 5.4112, "step": 997 }, { "epoch": 0.06694167756648892, "grad_norm": 0.14505483746317052, "learning_rate": 2e-05, "loss": 5.5129, "step": 998 }, { "epoch": 0.06700875339571385, "grad_norm": 0.1430188441396592, "learning_rate": 2e-05, "loss": 5.4757, "step": 999 }, { "epoch": 0.0670758292249388, "grad_norm": 0.14405572597639804, "learning_rate": 2e-05, "loss": 5.5309, "step": 1000 }, { "epoch": 0.06714290505416373, "grad_norm": 0.14420543000214317, "learning_rate": 2e-05, "loss": 5.4797, "step": 1001 }, { "epoch": 0.06720998088338867, "grad_norm": 0.14851517857430163, "learning_rate": 2e-05, "loss": 5.406, "step": 1002 }, { "epoch": 0.06727705671261361, "grad_norm": 0.14583599683339143, "learning_rate": 2e-05, "loss": 5.4348, "step": 1003 }, { "epoch": 0.06734413254183855, "grad_norm": 0.1555651851359692, "learning_rate": 2e-05, "loss": 5.4172, "step": 1004 }, { "epoch": 0.06741120837106349, "grad_norm": 0.13812575310833325, "learning_rate": 2e-05, "loss": 5.5737, "step": 1005 }, { "epoch": 0.06747828420028842, "grad_norm": 0.15711470856947715, "learning_rate": 2e-05, "loss": 5.5873, "step": 1006 }, { "epoch": 0.06754536002951336, "grad_norm": 0.13901211556871806, "learning_rate": 2e-05, "loss": 5.4356, "step": 1007 }, { "epoch": 0.0676124358587383, "grad_norm": 0.13927864389636174, "learning_rate": 2e-05, "loss": 5.4489, "step": 1008 }, { "epoch": 0.06767951168796324, "grad_norm": 0.14158542386555123, "learning_rate": 2e-05, "loss": 5.4621, "step": 1009 }, { "epoch": 0.06774658751718818, "grad_norm": 0.14353046296885066, "learning_rate": 2e-05, "loss": 5.4718, "step": 1010 }, { "epoch": 0.06781366334641312, "grad_norm": 0.14204280486231566, "learning_rate": 2e-05, "loss": 5.513, "step": 1011 }, { "epoch": 0.06788073917563806, "grad_norm": 0.13444323188584192, "learning_rate": 2e-05, "loss": 5.411, "step": 1012 }, { "epoch": 0.067947815004863, "grad_norm": 0.13741491296283515, "learning_rate": 2e-05, "loss": 5.285, "step": 1013 }, { "epoch": 0.06801489083408793, "grad_norm": 0.13954649460686996, "learning_rate": 2e-05, "loss": 5.543, "step": 1014 }, { "epoch": 0.06808196666331287, "grad_norm": 0.14578867505470353, "learning_rate": 2e-05, "loss": 5.4624, "step": 1015 }, { "epoch": 0.06814904249253781, "grad_norm": 0.14111504177081444, "learning_rate": 2e-05, "loss": 5.4701, "step": 1016 }, { "epoch": 0.06821611832176275, "grad_norm": 0.13955645828244653, "learning_rate": 2e-05, "loss": 5.5489, "step": 1017 }, { "epoch": 0.06828319415098769, "grad_norm": 0.14299837576813884, "learning_rate": 2e-05, "loss": 5.3295, "step": 1018 }, { "epoch": 0.06835026998021262, "grad_norm": 0.1418240516080218, "learning_rate": 2e-05, "loss": 5.5749, "step": 1019 }, { "epoch": 0.06841734580943756, "grad_norm": 0.14038716591908607, "learning_rate": 2e-05, "loss": 5.4621, "step": 1020 }, { "epoch": 0.0684844216386625, "grad_norm": 0.1461168971838683, "learning_rate": 2e-05, "loss": 5.4829, "step": 1021 }, { "epoch": 0.06855149746788744, "grad_norm": 0.14185332005520468, "learning_rate": 2e-05, "loss": 5.5131, "step": 1022 }, { "epoch": 0.06861857329711239, "grad_norm": 0.14319015260729892, "learning_rate": 2e-05, "loss": 5.5279, "step": 1023 }, { "epoch": 0.06868564912633733, "grad_norm": 0.13798329033655377, "learning_rate": 2e-05, "loss": 5.5017, "step": 1024 }, { "epoch": 0.06875272495556227, "grad_norm": 0.14199635737956007, "learning_rate": 2e-05, "loss": 5.4655, "step": 1025 }, { "epoch": 0.06881980078478721, "grad_norm": 0.13943549742355377, "learning_rate": 2e-05, "loss": 5.4443, "step": 1026 }, { "epoch": 0.06888687661401215, "grad_norm": 0.14198877913372665, "learning_rate": 2e-05, "loss": 5.4909, "step": 1027 }, { "epoch": 0.06895395244323708, "grad_norm": 0.14212895181676885, "learning_rate": 2e-05, "loss": 5.5207, "step": 1028 }, { "epoch": 0.06902102827246202, "grad_norm": 0.14181549512601482, "learning_rate": 2e-05, "loss": 5.4596, "step": 1029 }, { "epoch": 0.06908810410168696, "grad_norm": 0.13968785896108063, "learning_rate": 2e-05, "loss": 5.4212, "step": 1030 }, { "epoch": 0.0691551799309119, "grad_norm": 0.1458894091883494, "learning_rate": 2e-05, "loss": 5.4799, "step": 1031 }, { "epoch": 0.06922225576013684, "grad_norm": 0.14662279348565962, "learning_rate": 2e-05, "loss": 5.4255, "step": 1032 }, { "epoch": 0.06928933158936178, "grad_norm": 0.13688140857834163, "learning_rate": 2e-05, "loss": 5.4515, "step": 1033 }, { "epoch": 0.06935640741858672, "grad_norm": 0.14181437484839982, "learning_rate": 2e-05, "loss": 5.4739, "step": 1034 }, { "epoch": 0.06942348324781165, "grad_norm": 0.1482169290577267, "learning_rate": 2e-05, "loss": 5.4384, "step": 1035 }, { "epoch": 0.06949055907703659, "grad_norm": 0.14035351702567372, "learning_rate": 2e-05, "loss": 5.533, "step": 1036 }, { "epoch": 0.06955763490626153, "grad_norm": 0.14020746200098522, "learning_rate": 2e-05, "loss": 5.4794, "step": 1037 }, { "epoch": 0.06962471073548647, "grad_norm": 0.14544451998741595, "learning_rate": 2e-05, "loss": 5.4267, "step": 1038 }, { "epoch": 0.06969178656471141, "grad_norm": 0.14381063273595002, "learning_rate": 2e-05, "loss": 5.4277, "step": 1039 }, { "epoch": 0.06975886239393635, "grad_norm": 0.14765003466443793, "learning_rate": 2e-05, "loss": 5.3775, "step": 1040 }, { "epoch": 0.06982593822316129, "grad_norm": 0.14156940712819174, "learning_rate": 2e-05, "loss": 5.463, "step": 1041 }, { "epoch": 0.06989301405238622, "grad_norm": 0.1430620947732063, "learning_rate": 2e-05, "loss": 5.3188, "step": 1042 }, { "epoch": 0.06996008988161116, "grad_norm": 0.15319921901894945, "learning_rate": 2e-05, "loss": 5.6305, "step": 1043 }, { "epoch": 0.0700271657108361, "grad_norm": 0.14468005581288698, "learning_rate": 2e-05, "loss": 5.5308, "step": 1044 }, { "epoch": 0.07009424154006104, "grad_norm": 0.14272432692090467, "learning_rate": 2e-05, "loss": 5.4387, "step": 1045 }, { "epoch": 0.07016131736928598, "grad_norm": 0.1463843713355547, "learning_rate": 2e-05, "loss": 5.4086, "step": 1046 }, { "epoch": 0.07022839319851092, "grad_norm": 0.14723893957380496, "learning_rate": 2e-05, "loss": 5.5626, "step": 1047 }, { "epoch": 0.07029546902773585, "grad_norm": 0.13950860044380903, "learning_rate": 2e-05, "loss": 5.5123, "step": 1048 }, { "epoch": 0.0703625448569608, "grad_norm": 0.1442010900037428, "learning_rate": 2e-05, "loss": 5.4039, "step": 1049 }, { "epoch": 0.07042962068618573, "grad_norm": 0.15409751471477767, "learning_rate": 2e-05, "loss": 5.4824, "step": 1050 }, { "epoch": 0.07049669651541067, "grad_norm": 0.14509094467912714, "learning_rate": 2e-05, "loss": 5.3989, "step": 1051 }, { "epoch": 0.07056377234463561, "grad_norm": 0.14526710838882345, "learning_rate": 2e-05, "loss": 5.3232, "step": 1052 }, { "epoch": 0.07063084817386055, "grad_norm": 0.1478504773026071, "learning_rate": 2e-05, "loss": 5.3862, "step": 1053 }, { "epoch": 0.07069792400308549, "grad_norm": 0.14638817503052967, "learning_rate": 2e-05, "loss": 5.3418, "step": 1054 }, { "epoch": 0.07076499983231042, "grad_norm": 0.13586301346644258, "learning_rate": 2e-05, "loss": 5.49, "step": 1055 }, { "epoch": 0.07083207566153536, "grad_norm": 0.14654483443128458, "learning_rate": 2e-05, "loss": 5.4534, "step": 1056 }, { "epoch": 0.0708991514907603, "grad_norm": 0.14078132247927755, "learning_rate": 2e-05, "loss": 5.4384, "step": 1057 }, { "epoch": 0.07096622731998524, "grad_norm": 0.1407883127223292, "learning_rate": 2e-05, "loss": 5.4599, "step": 1058 }, { "epoch": 0.07103330314921018, "grad_norm": 0.1404300390243042, "learning_rate": 2e-05, "loss": 5.629, "step": 1059 }, { "epoch": 0.07110037897843512, "grad_norm": 0.14367245900913395, "learning_rate": 2e-05, "loss": 5.6084, "step": 1060 }, { "epoch": 0.07116745480766005, "grad_norm": 0.14015981020280674, "learning_rate": 2e-05, "loss": 5.3851, "step": 1061 }, { "epoch": 0.071234530636885, "grad_norm": 0.13598385520107845, "learning_rate": 2e-05, "loss": 5.5021, "step": 1062 }, { "epoch": 0.07130160646610993, "grad_norm": 0.1398234397174872, "learning_rate": 2e-05, "loss": 5.4356, "step": 1063 }, { "epoch": 0.07136868229533487, "grad_norm": 0.14761029735908618, "learning_rate": 2e-05, "loss": 5.5146, "step": 1064 }, { "epoch": 0.07143575812455981, "grad_norm": 0.13777721500850715, "learning_rate": 2e-05, "loss": 5.5455, "step": 1065 }, { "epoch": 0.07150283395378475, "grad_norm": 0.1510481568356787, "learning_rate": 2e-05, "loss": 5.5922, "step": 1066 }, { "epoch": 0.07156990978300969, "grad_norm": 0.13725662016717374, "learning_rate": 2e-05, "loss": 5.5206, "step": 1067 }, { "epoch": 0.07163698561223462, "grad_norm": 0.14034392248368274, "learning_rate": 2e-05, "loss": 5.5104, "step": 1068 }, { "epoch": 0.07170406144145958, "grad_norm": 0.13452173211377477, "learning_rate": 2e-05, "loss": 5.3784, "step": 1069 }, { "epoch": 0.07177113727068452, "grad_norm": 0.14377743296526738, "learning_rate": 2e-05, "loss": 5.412, "step": 1070 }, { "epoch": 0.07183821309990945, "grad_norm": 0.14796813518763766, "learning_rate": 2e-05, "loss": 5.4716, "step": 1071 }, { "epoch": 0.07190528892913439, "grad_norm": 0.13877900951054004, "learning_rate": 2e-05, "loss": 5.5335, "step": 1072 }, { "epoch": 0.07197236475835933, "grad_norm": 0.1418218083548077, "learning_rate": 2e-05, "loss": 5.4379, "step": 1073 }, { "epoch": 0.07203944058758427, "grad_norm": 0.13808535465538047, "learning_rate": 2e-05, "loss": 5.3946, "step": 1074 }, { "epoch": 0.07210651641680921, "grad_norm": 0.13787068354150553, "learning_rate": 2e-05, "loss": 5.4985, "step": 1075 }, { "epoch": 0.07217359224603415, "grad_norm": 0.14553252974202666, "learning_rate": 2e-05, "loss": 5.3735, "step": 1076 }, { "epoch": 0.07224066807525908, "grad_norm": 0.14126519970976545, "learning_rate": 2e-05, "loss": 5.503, "step": 1077 }, { "epoch": 0.07230774390448402, "grad_norm": 0.14386066438497633, "learning_rate": 2e-05, "loss": 5.4668, "step": 1078 }, { "epoch": 0.07237481973370896, "grad_norm": 0.14319137965131715, "learning_rate": 2e-05, "loss": 5.4432, "step": 1079 }, { "epoch": 0.0724418955629339, "grad_norm": 0.145466636132673, "learning_rate": 2e-05, "loss": 5.4764, "step": 1080 }, { "epoch": 0.07250897139215884, "grad_norm": 0.1383770266311131, "learning_rate": 2e-05, "loss": 5.4225, "step": 1081 }, { "epoch": 0.07257604722138378, "grad_norm": 0.1388380094793619, "learning_rate": 2e-05, "loss": 5.5216, "step": 1082 }, { "epoch": 0.07264312305060872, "grad_norm": 0.143174754656856, "learning_rate": 2e-05, "loss": 5.6383, "step": 1083 }, { "epoch": 0.07271019887983365, "grad_norm": 0.14270063662045063, "learning_rate": 2e-05, "loss": 5.5725, "step": 1084 }, { "epoch": 0.07277727470905859, "grad_norm": 0.142513594273265, "learning_rate": 2e-05, "loss": 5.3141, "step": 1085 }, { "epoch": 0.07284435053828353, "grad_norm": 0.13813919071852465, "learning_rate": 2e-05, "loss": 5.5079, "step": 1086 }, { "epoch": 0.07291142636750847, "grad_norm": 0.14001492106441765, "learning_rate": 2e-05, "loss": 5.37, "step": 1087 }, { "epoch": 0.07297850219673341, "grad_norm": 0.14833141785339185, "learning_rate": 2e-05, "loss": 5.4302, "step": 1088 }, { "epoch": 0.07304557802595835, "grad_norm": 0.14319809727780125, "learning_rate": 2e-05, "loss": 5.6504, "step": 1089 }, { "epoch": 0.07311265385518328, "grad_norm": 0.15023110031578885, "learning_rate": 2e-05, "loss": 5.4674, "step": 1090 }, { "epoch": 0.07317972968440822, "grad_norm": 0.14391542059478438, "learning_rate": 2e-05, "loss": 5.345, "step": 1091 }, { "epoch": 0.07324680551363316, "grad_norm": 0.13875135309894027, "learning_rate": 2e-05, "loss": 5.4768, "step": 1092 }, { "epoch": 0.0733138813428581, "grad_norm": 0.14617946371935692, "learning_rate": 2e-05, "loss": 5.4104, "step": 1093 }, { "epoch": 0.07338095717208304, "grad_norm": 0.14352086030089384, "learning_rate": 2e-05, "loss": 5.4158, "step": 1094 }, { "epoch": 0.07344803300130798, "grad_norm": 0.1407758648836727, "learning_rate": 2e-05, "loss": 5.5167, "step": 1095 }, { "epoch": 0.07351510883053292, "grad_norm": 0.13690388037278067, "learning_rate": 2e-05, "loss": 5.4551, "step": 1096 }, { "epoch": 0.07358218465975785, "grad_norm": 0.14326613200988836, "learning_rate": 2e-05, "loss": 5.5469, "step": 1097 }, { "epoch": 0.07364926048898279, "grad_norm": 0.14025574217353903, "learning_rate": 2e-05, "loss": 5.4878, "step": 1098 }, { "epoch": 0.07371633631820773, "grad_norm": 0.13826395568501904, "learning_rate": 2e-05, "loss": 5.3831, "step": 1099 }, { "epoch": 0.07378341214743267, "grad_norm": 0.1412223290548939, "learning_rate": 2e-05, "loss": 5.548, "step": 1100 }, { "epoch": 0.07385048797665761, "grad_norm": 0.13571291285795697, "learning_rate": 2e-05, "loss": 5.4309, "step": 1101 }, { "epoch": 0.07391756380588255, "grad_norm": 0.13831213480833468, "learning_rate": 2e-05, "loss": 5.4065, "step": 1102 }, { "epoch": 0.07398463963510749, "grad_norm": 0.14504910093075676, "learning_rate": 2e-05, "loss": 5.5017, "step": 1103 }, { "epoch": 0.07405171546433242, "grad_norm": 0.14168429083685968, "learning_rate": 2e-05, "loss": 5.4493, "step": 1104 }, { "epoch": 0.07411879129355736, "grad_norm": 0.14469188290322638, "learning_rate": 2e-05, "loss": 5.5204, "step": 1105 }, { "epoch": 0.0741858671227823, "grad_norm": 0.14185924515940823, "learning_rate": 2e-05, "loss": 5.4555, "step": 1106 }, { "epoch": 0.07425294295200724, "grad_norm": 0.14612197526895698, "learning_rate": 2e-05, "loss": 5.4726, "step": 1107 }, { "epoch": 0.07432001878123218, "grad_norm": 0.14121288580058683, "learning_rate": 2e-05, "loss": 5.3779, "step": 1108 }, { "epoch": 0.07438709461045712, "grad_norm": 0.14943595548179592, "learning_rate": 2e-05, "loss": 5.3419, "step": 1109 }, { "epoch": 0.07445417043968205, "grad_norm": 0.1396906667646618, "learning_rate": 2e-05, "loss": 5.2597, "step": 1110 }, { "epoch": 0.074521246268907, "grad_norm": 0.14010939088213667, "learning_rate": 2e-05, "loss": 5.4394, "step": 1111 }, { "epoch": 0.07458832209813193, "grad_norm": 0.14310651381279835, "learning_rate": 2e-05, "loss": 5.5552, "step": 1112 }, { "epoch": 0.07465539792735687, "grad_norm": 0.14154181175386166, "learning_rate": 2e-05, "loss": 5.465, "step": 1113 }, { "epoch": 0.07472247375658182, "grad_norm": 0.1388526904015761, "learning_rate": 2e-05, "loss": 5.4133, "step": 1114 }, { "epoch": 0.07478954958580676, "grad_norm": 0.13943142320190335, "learning_rate": 2e-05, "loss": 5.6327, "step": 1115 }, { "epoch": 0.0748566254150317, "grad_norm": 0.14196690009070131, "learning_rate": 2e-05, "loss": 5.5694, "step": 1116 }, { "epoch": 0.07492370124425664, "grad_norm": 0.14172971013563768, "learning_rate": 2e-05, "loss": 5.4827, "step": 1117 }, { "epoch": 0.07499077707348158, "grad_norm": 0.14921824667168682, "learning_rate": 2e-05, "loss": 5.5972, "step": 1118 }, { "epoch": 0.07505785290270651, "grad_norm": 0.14266500204850666, "learning_rate": 2e-05, "loss": 5.697, "step": 1119 }, { "epoch": 0.07512492873193145, "grad_norm": 0.1399058048528367, "learning_rate": 2e-05, "loss": 5.53, "step": 1120 }, { "epoch": 0.07519200456115639, "grad_norm": 0.1379672146718741, "learning_rate": 2e-05, "loss": 5.5225, "step": 1121 }, { "epoch": 0.07525908039038133, "grad_norm": 0.13669976798965122, "learning_rate": 2e-05, "loss": 5.5489, "step": 1122 }, { "epoch": 0.07532615621960627, "grad_norm": 0.1498855010450185, "learning_rate": 2e-05, "loss": 5.4513, "step": 1123 }, { "epoch": 0.07539323204883121, "grad_norm": 0.141315877085081, "learning_rate": 2e-05, "loss": 5.4295, "step": 1124 }, { "epoch": 0.07546030787805615, "grad_norm": 0.14970094927335933, "learning_rate": 2e-05, "loss": 5.5392, "step": 1125 }, { "epoch": 0.07552738370728108, "grad_norm": 0.13937016528933227, "learning_rate": 2e-05, "loss": 5.4836, "step": 1126 }, { "epoch": 0.07559445953650602, "grad_norm": 0.14454906140057708, "learning_rate": 2e-05, "loss": 5.4551, "step": 1127 }, { "epoch": 0.07566153536573096, "grad_norm": 0.14364119520212693, "learning_rate": 2e-05, "loss": 5.4696, "step": 1128 }, { "epoch": 0.0757286111949559, "grad_norm": 0.14686814266625314, "learning_rate": 2e-05, "loss": 5.5364, "step": 1129 }, { "epoch": 0.07579568702418084, "grad_norm": 0.14205250678237097, "learning_rate": 2e-05, "loss": 5.3681, "step": 1130 }, { "epoch": 0.07586276285340578, "grad_norm": 0.1417902049569719, "learning_rate": 2e-05, "loss": 5.4931, "step": 1131 }, { "epoch": 0.07592983868263072, "grad_norm": 0.14015524254332487, "learning_rate": 2e-05, "loss": 5.454, "step": 1132 }, { "epoch": 0.07599691451185565, "grad_norm": 0.14006062815305553, "learning_rate": 2e-05, "loss": 5.4379, "step": 1133 }, { "epoch": 0.07606399034108059, "grad_norm": 0.1412175960450175, "learning_rate": 2e-05, "loss": 5.5073, "step": 1134 }, { "epoch": 0.07613106617030553, "grad_norm": 0.14836329420713437, "learning_rate": 2e-05, "loss": 5.4641, "step": 1135 }, { "epoch": 0.07619814199953047, "grad_norm": 0.13711186688293733, "learning_rate": 2e-05, "loss": 5.5374, "step": 1136 }, { "epoch": 0.07626521782875541, "grad_norm": 0.15034168932998612, "learning_rate": 2e-05, "loss": 5.4421, "step": 1137 }, { "epoch": 0.07633229365798035, "grad_norm": 0.14757455320995166, "learning_rate": 2e-05, "loss": 5.4796, "step": 1138 }, { "epoch": 0.07639936948720528, "grad_norm": 0.14295354758394763, "learning_rate": 2e-05, "loss": 5.3849, "step": 1139 }, { "epoch": 0.07646644531643022, "grad_norm": 0.14051213201591073, "learning_rate": 2e-05, "loss": 5.5789, "step": 1140 }, { "epoch": 0.07653352114565516, "grad_norm": 0.1451991728939992, "learning_rate": 2e-05, "loss": 5.5705, "step": 1141 }, { "epoch": 0.0766005969748801, "grad_norm": 0.14744993143529694, "learning_rate": 2e-05, "loss": 5.4072, "step": 1142 }, { "epoch": 0.07666767280410504, "grad_norm": 0.1359864047610627, "learning_rate": 2e-05, "loss": 5.5598, "step": 1143 }, { "epoch": 0.07673474863332998, "grad_norm": 0.14329474704277556, "learning_rate": 2e-05, "loss": 5.463, "step": 1144 }, { "epoch": 0.07680182446255492, "grad_norm": 0.1437953329730129, "learning_rate": 2e-05, "loss": 5.5751, "step": 1145 }, { "epoch": 0.07686890029177985, "grad_norm": 0.13566370020749074, "learning_rate": 2e-05, "loss": 5.5121, "step": 1146 }, { "epoch": 0.07693597612100479, "grad_norm": 0.13865861667817359, "learning_rate": 2e-05, "loss": 5.5791, "step": 1147 }, { "epoch": 0.07700305195022973, "grad_norm": 0.14137836957841587, "learning_rate": 2e-05, "loss": 5.4182, "step": 1148 }, { "epoch": 0.07707012777945467, "grad_norm": 0.13438963155897532, "learning_rate": 2e-05, "loss": 5.4419, "step": 1149 }, { "epoch": 0.07713720360867961, "grad_norm": 0.13971576616661888, "learning_rate": 2e-05, "loss": 5.5189, "step": 1150 }, { "epoch": 0.07720427943790455, "grad_norm": 0.15071537608823352, "learning_rate": 2e-05, "loss": 5.6116, "step": 1151 }, { "epoch": 0.07727135526712949, "grad_norm": 0.1497273942093903, "learning_rate": 2e-05, "loss": 5.4276, "step": 1152 }, { "epoch": 0.07733843109635442, "grad_norm": 0.14675851494809283, "learning_rate": 2e-05, "loss": 5.3975, "step": 1153 }, { "epoch": 0.07740550692557936, "grad_norm": 0.15775326150127558, "learning_rate": 2e-05, "loss": 5.3885, "step": 1154 }, { "epoch": 0.0774725827548043, "grad_norm": 0.15763459469359942, "learning_rate": 2e-05, "loss": 5.4187, "step": 1155 }, { "epoch": 0.07753965858402924, "grad_norm": 0.14972605150511542, "learning_rate": 2e-05, "loss": 5.7013, "step": 1156 }, { "epoch": 0.07760673441325418, "grad_norm": 0.14941510795187188, "learning_rate": 2e-05, "loss": 5.6841, "step": 1157 }, { "epoch": 0.07767381024247912, "grad_norm": 0.14388876023524758, "learning_rate": 2e-05, "loss": 5.4145, "step": 1158 }, { "epoch": 0.07774088607170405, "grad_norm": 0.1414274638635389, "learning_rate": 2e-05, "loss": 5.5352, "step": 1159 }, { "epoch": 0.077807961900929, "grad_norm": 0.1448866724512301, "learning_rate": 2e-05, "loss": 5.5246, "step": 1160 }, { "epoch": 0.07787503773015395, "grad_norm": 0.14445148029316138, "learning_rate": 2e-05, "loss": 5.4582, "step": 1161 }, { "epoch": 0.07794211355937888, "grad_norm": 0.13867493141039325, "learning_rate": 2e-05, "loss": 5.529, "step": 1162 }, { "epoch": 0.07800918938860382, "grad_norm": 0.14241103154775353, "learning_rate": 2e-05, "loss": 5.5174, "step": 1163 }, { "epoch": 0.07807626521782876, "grad_norm": 0.137269020714671, "learning_rate": 2e-05, "loss": 5.3919, "step": 1164 }, { "epoch": 0.0781433410470537, "grad_norm": 0.1416581383252918, "learning_rate": 2e-05, "loss": 5.4569, "step": 1165 }, { "epoch": 0.07821041687627864, "grad_norm": 0.14282486212942994, "learning_rate": 2e-05, "loss": 5.5374, "step": 1166 }, { "epoch": 0.07827749270550358, "grad_norm": 0.14576104632973472, "learning_rate": 2e-05, "loss": 5.4376, "step": 1167 }, { "epoch": 0.07834456853472851, "grad_norm": 0.1439491106233289, "learning_rate": 2e-05, "loss": 5.3542, "step": 1168 }, { "epoch": 0.07841164436395345, "grad_norm": 0.14029559017199444, "learning_rate": 2e-05, "loss": 5.5309, "step": 1169 }, { "epoch": 0.07847872019317839, "grad_norm": 0.13861767576638004, "learning_rate": 2e-05, "loss": 5.5262, "step": 1170 }, { "epoch": 0.07854579602240333, "grad_norm": 0.13605406254857547, "learning_rate": 2e-05, "loss": 5.402, "step": 1171 }, { "epoch": 0.07861287185162827, "grad_norm": 0.1460571393535904, "learning_rate": 2e-05, "loss": 5.5562, "step": 1172 }, { "epoch": 0.07867994768085321, "grad_norm": 0.1427100012264957, "learning_rate": 2e-05, "loss": 5.3891, "step": 1173 }, { "epoch": 0.07874702351007815, "grad_norm": 0.13862309871492792, "learning_rate": 2e-05, "loss": 5.4592, "step": 1174 }, { "epoch": 0.07881409933930308, "grad_norm": 0.14165401441640516, "learning_rate": 2e-05, "loss": 5.6267, "step": 1175 }, { "epoch": 0.07888117516852802, "grad_norm": 0.14428629101664345, "learning_rate": 2e-05, "loss": 5.4491, "step": 1176 }, { "epoch": 0.07894825099775296, "grad_norm": 0.13708697694439304, "learning_rate": 2e-05, "loss": 5.4609, "step": 1177 }, { "epoch": 0.0790153268269779, "grad_norm": 0.14219321351245726, "learning_rate": 2e-05, "loss": 5.4581, "step": 1178 }, { "epoch": 0.07908240265620284, "grad_norm": 0.14475176213082103, "learning_rate": 2e-05, "loss": 5.4705, "step": 1179 }, { "epoch": 0.07914947848542778, "grad_norm": 0.14495323607053287, "learning_rate": 2e-05, "loss": 5.6, "step": 1180 }, { "epoch": 0.07921655431465272, "grad_norm": 0.1469052936398813, "learning_rate": 2e-05, "loss": 5.3686, "step": 1181 }, { "epoch": 0.07928363014387765, "grad_norm": 0.14016774976625285, "learning_rate": 2e-05, "loss": 5.4303, "step": 1182 }, { "epoch": 0.07935070597310259, "grad_norm": 0.14581741620028651, "learning_rate": 2e-05, "loss": 5.4811, "step": 1183 }, { "epoch": 0.07941778180232753, "grad_norm": 0.14561274071106803, "learning_rate": 2e-05, "loss": 5.4177, "step": 1184 }, { "epoch": 0.07948485763155247, "grad_norm": 0.14248876696428192, "learning_rate": 2e-05, "loss": 5.5295, "step": 1185 }, { "epoch": 0.07955193346077741, "grad_norm": 0.14258629549593332, "learning_rate": 2e-05, "loss": 5.5004, "step": 1186 }, { "epoch": 0.07961900929000235, "grad_norm": 0.15171497412873902, "learning_rate": 2e-05, "loss": 5.456, "step": 1187 }, { "epoch": 0.07968608511922728, "grad_norm": 0.13765355491737286, "learning_rate": 2e-05, "loss": 5.5511, "step": 1188 }, { "epoch": 0.07975316094845222, "grad_norm": 0.13331337694325643, "learning_rate": 2e-05, "loss": 5.45, "step": 1189 }, { "epoch": 0.07982023677767716, "grad_norm": 0.14369864811228147, "learning_rate": 2e-05, "loss": 5.4154, "step": 1190 }, { "epoch": 0.0798873126069021, "grad_norm": 0.13934538108135266, "learning_rate": 2e-05, "loss": 5.529, "step": 1191 }, { "epoch": 0.07995438843612704, "grad_norm": 0.1387065288897377, "learning_rate": 2e-05, "loss": 5.4245, "step": 1192 }, { "epoch": 0.08002146426535198, "grad_norm": 0.13725346030769553, "learning_rate": 2e-05, "loss": 5.4823, "step": 1193 }, { "epoch": 0.08008854009457692, "grad_norm": 0.14181522105240052, "learning_rate": 2e-05, "loss": 5.5834, "step": 1194 }, { "epoch": 0.08015561592380185, "grad_norm": 0.13933488499479074, "learning_rate": 2e-05, "loss": 5.5666, "step": 1195 }, { "epoch": 0.08022269175302679, "grad_norm": 0.1450222428363668, "learning_rate": 2e-05, "loss": 5.3183, "step": 1196 }, { "epoch": 0.08028976758225173, "grad_norm": 0.13456240498817187, "learning_rate": 2e-05, "loss": 5.5776, "step": 1197 }, { "epoch": 0.08035684341147667, "grad_norm": 0.13995869787569398, "learning_rate": 2e-05, "loss": 5.3833, "step": 1198 }, { "epoch": 0.08042391924070161, "grad_norm": 0.1356441202877071, "learning_rate": 2e-05, "loss": 5.418, "step": 1199 }, { "epoch": 0.08049099506992655, "grad_norm": 0.14863068702431556, "learning_rate": 2e-05, "loss": 5.3933, "step": 1200 }, { "epoch": 0.08055807089915148, "grad_norm": 0.15243129602460476, "learning_rate": 2e-05, "loss": 5.3849, "step": 1201 }, { "epoch": 0.08062514672837642, "grad_norm": 0.14559793057631934, "learning_rate": 2e-05, "loss": 5.4994, "step": 1202 }, { "epoch": 0.08069222255760136, "grad_norm": 0.14444955043448668, "learning_rate": 2e-05, "loss": 5.5834, "step": 1203 }, { "epoch": 0.0807592983868263, "grad_norm": 0.13988687565641014, "learning_rate": 2e-05, "loss": 5.4657, "step": 1204 }, { "epoch": 0.08082637421605125, "grad_norm": 0.14420819305763274, "learning_rate": 2e-05, "loss": 5.4773, "step": 1205 }, { "epoch": 0.08089345004527619, "grad_norm": 0.14742108493999984, "learning_rate": 2e-05, "loss": 5.512, "step": 1206 }, { "epoch": 0.08096052587450113, "grad_norm": 0.13974213138278402, "learning_rate": 2e-05, "loss": 5.4927, "step": 1207 }, { "epoch": 0.08102760170372607, "grad_norm": 0.14361355206249993, "learning_rate": 2e-05, "loss": 5.5512, "step": 1208 }, { "epoch": 0.081094677532951, "grad_norm": 0.1439746585907705, "learning_rate": 2e-05, "loss": 5.4776, "step": 1209 }, { "epoch": 0.08116175336217595, "grad_norm": 0.14224476792250978, "learning_rate": 2e-05, "loss": 5.5649, "step": 1210 }, { "epoch": 0.08122882919140088, "grad_norm": 0.14180373490556625, "learning_rate": 2e-05, "loss": 5.5023, "step": 1211 }, { "epoch": 0.08129590502062582, "grad_norm": 0.1421707939155885, "learning_rate": 2e-05, "loss": 5.4037, "step": 1212 }, { "epoch": 0.08136298084985076, "grad_norm": 0.15568110549071132, "learning_rate": 2e-05, "loss": 5.3902, "step": 1213 }, { "epoch": 0.0814300566790757, "grad_norm": 0.14097309466557634, "learning_rate": 2e-05, "loss": 5.3046, "step": 1214 }, { "epoch": 0.08149713250830064, "grad_norm": 0.13690270879247768, "learning_rate": 2e-05, "loss": 5.4637, "step": 1215 }, { "epoch": 0.08156420833752558, "grad_norm": 0.1383294024200251, "learning_rate": 2e-05, "loss": 5.374, "step": 1216 }, { "epoch": 0.08163128416675051, "grad_norm": 0.13731438562875722, "learning_rate": 2e-05, "loss": 5.5753, "step": 1217 }, { "epoch": 0.08169835999597545, "grad_norm": 0.1344968347752966, "learning_rate": 2e-05, "loss": 5.401, "step": 1218 }, { "epoch": 0.08176543582520039, "grad_norm": 0.14580935846297446, "learning_rate": 2e-05, "loss": 5.3928, "step": 1219 }, { "epoch": 0.08183251165442533, "grad_norm": 0.13862608763428322, "learning_rate": 2e-05, "loss": 5.43, "step": 1220 }, { "epoch": 0.08189958748365027, "grad_norm": 0.13775352933912643, "learning_rate": 2e-05, "loss": 5.4381, "step": 1221 }, { "epoch": 0.08196666331287521, "grad_norm": 0.13795105633831883, "learning_rate": 2e-05, "loss": 5.6114, "step": 1222 }, { "epoch": 0.08203373914210015, "grad_norm": 0.13882046307373774, "learning_rate": 2e-05, "loss": 5.516, "step": 1223 }, { "epoch": 0.08210081497132508, "grad_norm": 0.13726080734303064, "learning_rate": 2e-05, "loss": 5.3876, "step": 1224 }, { "epoch": 0.08216789080055002, "grad_norm": 0.1357022412430794, "learning_rate": 2e-05, "loss": 5.4565, "step": 1225 }, { "epoch": 0.08223496662977496, "grad_norm": 0.1397268904644595, "learning_rate": 2e-05, "loss": 5.618, "step": 1226 }, { "epoch": 0.0823020424589999, "grad_norm": 0.13905483205575644, "learning_rate": 2e-05, "loss": 5.4696, "step": 1227 }, { "epoch": 0.08236911828822484, "grad_norm": 0.13957779800863448, "learning_rate": 2e-05, "loss": 5.3884, "step": 1228 }, { "epoch": 0.08243619411744978, "grad_norm": 0.14016284661818446, "learning_rate": 2e-05, "loss": 5.4235, "step": 1229 }, { "epoch": 0.08250326994667471, "grad_norm": 0.1419329863054458, "learning_rate": 2e-05, "loss": 5.464, "step": 1230 }, { "epoch": 0.08257034577589965, "grad_norm": 0.1381730352621069, "learning_rate": 2e-05, "loss": 5.3382, "step": 1231 }, { "epoch": 0.08263742160512459, "grad_norm": 0.13902620071228722, "learning_rate": 2e-05, "loss": 5.5059, "step": 1232 }, { "epoch": 0.08270449743434953, "grad_norm": 0.14448608486257666, "learning_rate": 2e-05, "loss": 5.2856, "step": 1233 }, { "epoch": 0.08277157326357447, "grad_norm": 0.13700705800394947, "learning_rate": 2e-05, "loss": 5.4649, "step": 1234 }, { "epoch": 0.08283864909279941, "grad_norm": 0.13757637252375599, "learning_rate": 2e-05, "loss": 5.5843, "step": 1235 }, { "epoch": 0.08290572492202435, "grad_norm": 0.13850377329026067, "learning_rate": 2e-05, "loss": 5.3955, "step": 1236 }, { "epoch": 0.08297280075124928, "grad_norm": 0.14160081985817075, "learning_rate": 2e-05, "loss": 5.5125, "step": 1237 }, { "epoch": 0.08303987658047422, "grad_norm": 0.1322479742677396, "learning_rate": 2e-05, "loss": 5.5084, "step": 1238 }, { "epoch": 0.08310695240969916, "grad_norm": 0.1454671179330801, "learning_rate": 2e-05, "loss": 5.4719, "step": 1239 }, { "epoch": 0.0831740282389241, "grad_norm": 0.14266170253037294, "learning_rate": 2e-05, "loss": 5.4322, "step": 1240 }, { "epoch": 0.08324110406814904, "grad_norm": 0.13585619447341524, "learning_rate": 2e-05, "loss": 5.5123, "step": 1241 }, { "epoch": 0.08330817989737398, "grad_norm": 0.14849820823452817, "learning_rate": 2e-05, "loss": 5.5173, "step": 1242 }, { "epoch": 0.08337525572659892, "grad_norm": 0.14543715953344263, "learning_rate": 2e-05, "loss": 5.5148, "step": 1243 }, { "epoch": 0.08344233155582385, "grad_norm": 0.1459546125475233, "learning_rate": 2e-05, "loss": 5.552, "step": 1244 }, { "epoch": 0.08350940738504879, "grad_norm": 0.1386335120351884, "learning_rate": 2e-05, "loss": 5.6138, "step": 1245 }, { "epoch": 0.08357648321427373, "grad_norm": 0.13960381761972612, "learning_rate": 2e-05, "loss": 5.6059, "step": 1246 }, { "epoch": 0.08364355904349867, "grad_norm": 0.141316925471431, "learning_rate": 2e-05, "loss": 5.2984, "step": 1247 }, { "epoch": 0.08371063487272361, "grad_norm": 0.1474974535827872, "learning_rate": 2e-05, "loss": 5.5959, "step": 1248 }, { "epoch": 0.08377771070194855, "grad_norm": 0.13451415781274684, "learning_rate": 2e-05, "loss": 5.5253, "step": 1249 }, { "epoch": 0.0838447865311735, "grad_norm": 0.14660248870583265, "learning_rate": 2e-05, "loss": 5.4126, "step": 1250 }, { "epoch": 0.08391186236039844, "grad_norm": 0.1366119473228817, "learning_rate": 2e-05, "loss": 5.5314, "step": 1251 }, { "epoch": 0.08397893818962338, "grad_norm": 0.1376404335669714, "learning_rate": 2e-05, "loss": 5.5085, "step": 1252 }, { "epoch": 0.08404601401884831, "grad_norm": 0.14697870564318216, "learning_rate": 2e-05, "loss": 5.5257, "step": 1253 }, { "epoch": 0.08411308984807325, "grad_norm": 0.1385388928126056, "learning_rate": 2e-05, "loss": 5.5064, "step": 1254 }, { "epoch": 0.08418016567729819, "grad_norm": 0.13864248997072806, "learning_rate": 2e-05, "loss": 5.4451, "step": 1255 }, { "epoch": 0.08424724150652313, "grad_norm": 0.13928840466967118, "learning_rate": 2e-05, "loss": 5.5414, "step": 1256 }, { "epoch": 0.08431431733574807, "grad_norm": 0.1438527501394235, "learning_rate": 2e-05, "loss": 5.5946, "step": 1257 }, { "epoch": 0.084381393164973, "grad_norm": 0.1483817494510357, "learning_rate": 2e-05, "loss": 5.5329, "step": 1258 }, { "epoch": 0.08444846899419794, "grad_norm": 0.1436334211930949, "learning_rate": 2e-05, "loss": 5.4315, "step": 1259 }, { "epoch": 0.08451554482342288, "grad_norm": 0.13794680458943412, "learning_rate": 2e-05, "loss": 5.3955, "step": 1260 }, { "epoch": 0.08458262065264782, "grad_norm": 0.14038165944998016, "learning_rate": 2e-05, "loss": 5.5834, "step": 1261 }, { "epoch": 0.08464969648187276, "grad_norm": 0.14930209685151472, "learning_rate": 2e-05, "loss": 5.4734, "step": 1262 }, { "epoch": 0.0847167723110977, "grad_norm": 0.13962483816095564, "learning_rate": 2e-05, "loss": 5.4421, "step": 1263 }, { "epoch": 0.08478384814032264, "grad_norm": 0.13859763317349108, "learning_rate": 2e-05, "loss": 5.465, "step": 1264 }, { "epoch": 0.08485092396954758, "grad_norm": 0.1464107327685371, "learning_rate": 2e-05, "loss": 5.3452, "step": 1265 }, { "epoch": 0.08491799979877251, "grad_norm": 0.14126231353237328, "learning_rate": 2e-05, "loss": 5.5179, "step": 1266 }, { "epoch": 0.08498507562799745, "grad_norm": 0.14505936282697152, "learning_rate": 2e-05, "loss": 5.5057, "step": 1267 }, { "epoch": 0.08505215145722239, "grad_norm": 0.14875912251143386, "learning_rate": 2e-05, "loss": 5.4218, "step": 1268 }, { "epoch": 0.08511922728644733, "grad_norm": 0.1389754046925835, "learning_rate": 2e-05, "loss": 5.3983, "step": 1269 }, { "epoch": 0.08518630311567227, "grad_norm": 0.13967068183000111, "learning_rate": 2e-05, "loss": 5.5094, "step": 1270 }, { "epoch": 0.0852533789448972, "grad_norm": 0.1488209200460791, "learning_rate": 2e-05, "loss": 5.3736, "step": 1271 }, { "epoch": 0.08532045477412215, "grad_norm": 0.14794142692671336, "learning_rate": 2e-05, "loss": 5.5669, "step": 1272 }, { "epoch": 0.08538753060334708, "grad_norm": 0.1422316034288501, "learning_rate": 2e-05, "loss": 5.4294, "step": 1273 }, { "epoch": 0.08545460643257202, "grad_norm": 0.14379568630833592, "learning_rate": 2e-05, "loss": 5.4329, "step": 1274 }, { "epoch": 0.08552168226179696, "grad_norm": 0.14849849902957413, "learning_rate": 2e-05, "loss": 5.3962, "step": 1275 }, { "epoch": 0.0855887580910219, "grad_norm": 0.14141089016964428, "learning_rate": 2e-05, "loss": 5.4206, "step": 1276 }, { "epoch": 0.08565583392024684, "grad_norm": 0.1471683328338236, "learning_rate": 2e-05, "loss": 5.4604, "step": 1277 }, { "epoch": 0.08572290974947178, "grad_norm": 0.1434382386596933, "learning_rate": 2e-05, "loss": 5.494, "step": 1278 }, { "epoch": 0.08578998557869671, "grad_norm": 0.15575258717989754, "learning_rate": 2e-05, "loss": 5.5048, "step": 1279 }, { "epoch": 0.08585706140792165, "grad_norm": 0.1472395239208941, "learning_rate": 2e-05, "loss": 5.4326, "step": 1280 }, { "epoch": 0.08592413723714659, "grad_norm": 0.1536841213581419, "learning_rate": 2e-05, "loss": 5.4758, "step": 1281 }, { "epoch": 0.08599121306637153, "grad_norm": 0.15743232549146766, "learning_rate": 2e-05, "loss": 5.5082, "step": 1282 }, { "epoch": 0.08605828889559647, "grad_norm": 0.14011920242559417, "learning_rate": 2e-05, "loss": 5.4909, "step": 1283 }, { "epoch": 0.08612536472482141, "grad_norm": 0.14984687688555404, "learning_rate": 2e-05, "loss": 5.4604, "step": 1284 }, { "epoch": 0.08619244055404635, "grad_norm": 0.1522986003276844, "learning_rate": 2e-05, "loss": 5.3815, "step": 1285 }, { "epoch": 0.08625951638327128, "grad_norm": 0.14845541629021203, "learning_rate": 2e-05, "loss": 5.535, "step": 1286 }, { "epoch": 0.08632659221249622, "grad_norm": 0.14804710367644944, "learning_rate": 2e-05, "loss": 5.2777, "step": 1287 }, { "epoch": 0.08639366804172116, "grad_norm": 0.14857635704508282, "learning_rate": 2e-05, "loss": 5.5023, "step": 1288 }, { "epoch": 0.0864607438709461, "grad_norm": 0.1440843472293447, "learning_rate": 2e-05, "loss": 5.4168, "step": 1289 }, { "epoch": 0.08652781970017104, "grad_norm": 0.14929841899364843, "learning_rate": 2e-05, "loss": 5.4569, "step": 1290 }, { "epoch": 0.08659489552939598, "grad_norm": 0.14950326813066775, "learning_rate": 2e-05, "loss": 5.3887, "step": 1291 }, { "epoch": 0.08666197135862092, "grad_norm": 0.14338849328799566, "learning_rate": 2e-05, "loss": 5.4991, "step": 1292 }, { "epoch": 0.08672904718784585, "grad_norm": 0.14272238210600874, "learning_rate": 2e-05, "loss": 5.5214, "step": 1293 }, { "epoch": 0.08679612301707079, "grad_norm": 0.14483528809254628, "learning_rate": 2e-05, "loss": 5.4525, "step": 1294 }, { "epoch": 0.08686319884629573, "grad_norm": 0.14826009932239873, "learning_rate": 2e-05, "loss": 5.4484, "step": 1295 }, { "epoch": 0.08693027467552068, "grad_norm": 0.1417323097171973, "learning_rate": 2e-05, "loss": 5.5627, "step": 1296 }, { "epoch": 0.08699735050474562, "grad_norm": 0.14416745275351678, "learning_rate": 2e-05, "loss": 5.4807, "step": 1297 }, { "epoch": 0.08706442633397056, "grad_norm": 0.1454610647293804, "learning_rate": 2e-05, "loss": 5.531, "step": 1298 }, { "epoch": 0.0871315021631955, "grad_norm": 0.1499431951937672, "learning_rate": 2e-05, "loss": 5.5077, "step": 1299 }, { "epoch": 0.08719857799242044, "grad_norm": 0.14067372191873329, "learning_rate": 2e-05, "loss": 5.5985, "step": 1300 }, { "epoch": 0.08726565382164538, "grad_norm": 0.14346860534227457, "learning_rate": 2e-05, "loss": 5.4961, "step": 1301 }, { "epoch": 0.08733272965087031, "grad_norm": 0.13948958944753448, "learning_rate": 2e-05, "loss": 5.4222, "step": 1302 }, { "epoch": 0.08739980548009525, "grad_norm": 0.14791815130034747, "learning_rate": 2e-05, "loss": 5.5149, "step": 1303 }, { "epoch": 0.08746688130932019, "grad_norm": 0.14026761410621832, "learning_rate": 2e-05, "loss": 5.3621, "step": 1304 }, { "epoch": 0.08753395713854513, "grad_norm": 0.14170758880151219, "learning_rate": 2e-05, "loss": 5.5562, "step": 1305 }, { "epoch": 0.08760103296777007, "grad_norm": 0.14270336080594997, "learning_rate": 2e-05, "loss": 5.503, "step": 1306 }, { "epoch": 0.087668108796995, "grad_norm": 0.14757315957068892, "learning_rate": 2e-05, "loss": 5.515, "step": 1307 }, { "epoch": 0.08773518462621994, "grad_norm": 0.1393286351501242, "learning_rate": 2e-05, "loss": 5.4519, "step": 1308 }, { "epoch": 0.08780226045544488, "grad_norm": 0.13538465016769555, "learning_rate": 2e-05, "loss": 5.5524, "step": 1309 }, { "epoch": 0.08786933628466982, "grad_norm": 0.14315300209426393, "learning_rate": 2e-05, "loss": 5.4182, "step": 1310 }, { "epoch": 0.08793641211389476, "grad_norm": 0.14651034617221165, "learning_rate": 2e-05, "loss": 5.5472, "step": 1311 }, { "epoch": 0.0880034879431197, "grad_norm": 0.14091980011421534, "learning_rate": 2e-05, "loss": 5.4431, "step": 1312 }, { "epoch": 0.08807056377234464, "grad_norm": 0.14962838203591108, "learning_rate": 2e-05, "loss": 5.4233, "step": 1313 }, { "epoch": 0.08813763960156958, "grad_norm": 0.15037937406690022, "learning_rate": 2e-05, "loss": 5.5357, "step": 1314 }, { "epoch": 0.08820471543079451, "grad_norm": 0.1385031835115251, "learning_rate": 2e-05, "loss": 5.4967, "step": 1315 }, { "epoch": 0.08827179126001945, "grad_norm": 0.14415758347376742, "learning_rate": 2e-05, "loss": 5.3607, "step": 1316 }, { "epoch": 0.08833886708924439, "grad_norm": 0.146394607543345, "learning_rate": 2e-05, "loss": 5.5424, "step": 1317 }, { "epoch": 0.08840594291846933, "grad_norm": 0.1464684196111146, "learning_rate": 2e-05, "loss": 5.4402, "step": 1318 }, { "epoch": 0.08847301874769427, "grad_norm": 0.1446010863015858, "learning_rate": 2e-05, "loss": 5.4206, "step": 1319 }, { "epoch": 0.0885400945769192, "grad_norm": 0.1409837732684613, "learning_rate": 2e-05, "loss": 5.5623, "step": 1320 }, { "epoch": 0.08860717040614415, "grad_norm": 0.14800656384437572, "learning_rate": 2e-05, "loss": 5.5133, "step": 1321 }, { "epoch": 0.08867424623536908, "grad_norm": 0.13927893940429178, "learning_rate": 2e-05, "loss": 5.4167, "step": 1322 }, { "epoch": 0.08874132206459402, "grad_norm": 0.14336809332156725, "learning_rate": 2e-05, "loss": 5.4193, "step": 1323 }, { "epoch": 0.08880839789381896, "grad_norm": 0.146472791808987, "learning_rate": 2e-05, "loss": 5.452, "step": 1324 }, { "epoch": 0.0888754737230439, "grad_norm": 0.13980344274198794, "learning_rate": 2e-05, "loss": 5.3657, "step": 1325 }, { "epoch": 0.08894254955226884, "grad_norm": 0.13946413683056713, "learning_rate": 2e-05, "loss": 5.4592, "step": 1326 }, { "epoch": 0.08900962538149378, "grad_norm": 0.13785693630517237, "learning_rate": 2e-05, "loss": 5.4442, "step": 1327 }, { "epoch": 0.08907670121071871, "grad_norm": 0.14487621962101752, "learning_rate": 2e-05, "loss": 5.5019, "step": 1328 }, { "epoch": 0.08914377703994365, "grad_norm": 0.14655467997402982, "learning_rate": 2e-05, "loss": 5.3784, "step": 1329 }, { "epoch": 0.08921085286916859, "grad_norm": 0.14282406432797431, "learning_rate": 2e-05, "loss": 5.4847, "step": 1330 }, { "epoch": 0.08927792869839353, "grad_norm": 0.14070380212180883, "learning_rate": 2e-05, "loss": 5.38, "step": 1331 }, { "epoch": 0.08934500452761847, "grad_norm": 0.13683969247205371, "learning_rate": 2e-05, "loss": 5.3617, "step": 1332 }, { "epoch": 0.0894120803568434, "grad_norm": 0.1415213448382773, "learning_rate": 2e-05, "loss": 5.3499, "step": 1333 }, { "epoch": 0.08947915618606835, "grad_norm": 0.1492196024626836, "learning_rate": 2e-05, "loss": 5.411, "step": 1334 }, { "epoch": 0.08954623201529328, "grad_norm": 0.14010746746139968, "learning_rate": 2e-05, "loss": 5.698, "step": 1335 }, { "epoch": 0.08961330784451822, "grad_norm": 0.1485465918122867, "learning_rate": 2e-05, "loss": 5.5733, "step": 1336 }, { "epoch": 0.08968038367374316, "grad_norm": 0.14883575388857279, "learning_rate": 2e-05, "loss": 5.4234, "step": 1337 }, { "epoch": 0.0897474595029681, "grad_norm": 0.13837125847988602, "learning_rate": 2e-05, "loss": 5.3907, "step": 1338 }, { "epoch": 0.08981453533219304, "grad_norm": 0.14908854717470627, "learning_rate": 2e-05, "loss": 5.6591, "step": 1339 }, { "epoch": 0.08988161116141798, "grad_norm": 0.15029909036932895, "learning_rate": 2e-05, "loss": 5.4337, "step": 1340 }, { "epoch": 0.08994868699064293, "grad_norm": 0.13659116517487482, "learning_rate": 2e-05, "loss": 5.6395, "step": 1341 }, { "epoch": 0.09001576281986787, "grad_norm": 0.1470585989345011, "learning_rate": 2e-05, "loss": 5.5943, "step": 1342 }, { "epoch": 0.0900828386490928, "grad_norm": 0.1440970320916433, "learning_rate": 2e-05, "loss": 5.4318, "step": 1343 }, { "epoch": 0.09014991447831774, "grad_norm": 0.14544803253010719, "learning_rate": 2e-05, "loss": 5.4892, "step": 1344 }, { "epoch": 0.09021699030754268, "grad_norm": 0.14935013879638187, "learning_rate": 2e-05, "loss": 5.5691, "step": 1345 }, { "epoch": 0.09028406613676762, "grad_norm": 0.14070388530930056, "learning_rate": 2e-05, "loss": 5.48, "step": 1346 }, { "epoch": 0.09035114196599256, "grad_norm": 0.14402562059908094, "learning_rate": 2e-05, "loss": 5.376, "step": 1347 }, { "epoch": 0.0904182177952175, "grad_norm": 0.14552149564165368, "learning_rate": 2e-05, "loss": 5.5209, "step": 1348 }, { "epoch": 0.09048529362444244, "grad_norm": 0.14310681695339325, "learning_rate": 2e-05, "loss": 5.5059, "step": 1349 }, { "epoch": 0.09055236945366738, "grad_norm": 0.13950865992279246, "learning_rate": 2e-05, "loss": 5.5437, "step": 1350 }, { "epoch": 0.09061944528289231, "grad_norm": 0.14208513306434092, "learning_rate": 2e-05, "loss": 5.4837, "step": 1351 }, { "epoch": 0.09068652111211725, "grad_norm": 0.13982778232989176, "learning_rate": 2e-05, "loss": 5.4343, "step": 1352 }, { "epoch": 0.09075359694134219, "grad_norm": 0.14031506079600467, "learning_rate": 2e-05, "loss": 5.4884, "step": 1353 }, { "epoch": 0.09082067277056713, "grad_norm": 0.15088647973548525, "learning_rate": 2e-05, "loss": 5.3649, "step": 1354 }, { "epoch": 0.09088774859979207, "grad_norm": 0.13463564055952962, "learning_rate": 2e-05, "loss": 5.4304, "step": 1355 }, { "epoch": 0.090954824429017, "grad_norm": 0.14149913910140366, "learning_rate": 2e-05, "loss": 5.4084, "step": 1356 }, { "epoch": 0.09102190025824194, "grad_norm": 0.1433544612229547, "learning_rate": 2e-05, "loss": 5.3883, "step": 1357 }, { "epoch": 0.09108897608746688, "grad_norm": 0.14346381541416334, "learning_rate": 2e-05, "loss": 5.4532, "step": 1358 }, { "epoch": 0.09115605191669182, "grad_norm": 0.1365856383913997, "learning_rate": 2e-05, "loss": 5.5309, "step": 1359 }, { "epoch": 0.09122312774591676, "grad_norm": 0.1507498943639077, "learning_rate": 2e-05, "loss": 5.4414, "step": 1360 }, { "epoch": 0.0912902035751417, "grad_norm": 0.1422688355936516, "learning_rate": 2e-05, "loss": 5.5128, "step": 1361 }, { "epoch": 0.09135727940436664, "grad_norm": 0.14632624161612934, "learning_rate": 2e-05, "loss": 5.483, "step": 1362 }, { "epoch": 0.09142435523359158, "grad_norm": 0.14134062101034955, "learning_rate": 2e-05, "loss": 5.4691, "step": 1363 }, { "epoch": 0.09149143106281651, "grad_norm": 0.14993726814192915, "learning_rate": 2e-05, "loss": 5.4476, "step": 1364 }, { "epoch": 0.09155850689204145, "grad_norm": 0.14520501059016772, "learning_rate": 2e-05, "loss": 5.3734, "step": 1365 }, { "epoch": 0.09162558272126639, "grad_norm": 0.14209286145426178, "learning_rate": 2e-05, "loss": 5.4849, "step": 1366 }, { "epoch": 0.09169265855049133, "grad_norm": 0.14197068080504954, "learning_rate": 2e-05, "loss": 5.3586, "step": 1367 }, { "epoch": 0.09175973437971627, "grad_norm": 0.14567417513884753, "learning_rate": 2e-05, "loss": 5.3951, "step": 1368 }, { "epoch": 0.0918268102089412, "grad_norm": 0.13846187603755725, "learning_rate": 2e-05, "loss": 5.4962, "step": 1369 }, { "epoch": 0.09189388603816614, "grad_norm": 0.13684859803714142, "learning_rate": 2e-05, "loss": 5.4262, "step": 1370 }, { "epoch": 0.09196096186739108, "grad_norm": 0.13547004176919145, "learning_rate": 2e-05, "loss": 5.4044, "step": 1371 }, { "epoch": 0.09202803769661602, "grad_norm": 0.13704379942294992, "learning_rate": 2e-05, "loss": 5.4251, "step": 1372 }, { "epoch": 0.09209511352584096, "grad_norm": 0.14658912197712248, "learning_rate": 2e-05, "loss": 5.4321, "step": 1373 }, { "epoch": 0.0921621893550659, "grad_norm": 0.14078754752370842, "learning_rate": 2e-05, "loss": 5.5168, "step": 1374 }, { "epoch": 0.09222926518429084, "grad_norm": 0.1373571887020972, "learning_rate": 2e-05, "loss": 5.3446, "step": 1375 }, { "epoch": 0.09229634101351578, "grad_norm": 0.13806212900216036, "learning_rate": 2e-05, "loss": 5.4385, "step": 1376 }, { "epoch": 0.09236341684274071, "grad_norm": 0.1441362791309543, "learning_rate": 2e-05, "loss": 5.5375, "step": 1377 }, { "epoch": 0.09243049267196565, "grad_norm": 0.14221368434281853, "learning_rate": 2e-05, "loss": 5.4707, "step": 1378 }, { "epoch": 0.09249756850119059, "grad_norm": 0.14251898868609914, "learning_rate": 2e-05, "loss": 5.5217, "step": 1379 }, { "epoch": 0.09256464433041553, "grad_norm": 0.13821790731105055, "learning_rate": 2e-05, "loss": 5.4537, "step": 1380 }, { "epoch": 0.09263172015964047, "grad_norm": 0.1420218417801707, "learning_rate": 2e-05, "loss": 5.4498, "step": 1381 }, { "epoch": 0.0926987959888654, "grad_norm": 0.1439368145498818, "learning_rate": 2e-05, "loss": 5.3649, "step": 1382 }, { "epoch": 0.09276587181809035, "grad_norm": 0.1478590258683936, "learning_rate": 2e-05, "loss": 5.4757, "step": 1383 }, { "epoch": 0.09283294764731528, "grad_norm": 0.14823345196361726, "learning_rate": 2e-05, "loss": 5.4178, "step": 1384 }, { "epoch": 0.09290002347654022, "grad_norm": 0.1438452571782001, "learning_rate": 2e-05, "loss": 5.3932, "step": 1385 }, { "epoch": 0.09296709930576516, "grad_norm": 0.1351761783974815, "learning_rate": 2e-05, "loss": 5.3545, "step": 1386 }, { "epoch": 0.09303417513499011, "grad_norm": 0.1440153626235563, "learning_rate": 2e-05, "loss": 5.4654, "step": 1387 }, { "epoch": 0.09310125096421505, "grad_norm": 0.15230903630949522, "learning_rate": 2e-05, "loss": 5.5669, "step": 1388 }, { "epoch": 0.09316832679343999, "grad_norm": 0.14587576696540575, "learning_rate": 2e-05, "loss": 5.3719, "step": 1389 }, { "epoch": 0.09323540262266493, "grad_norm": 0.1467190359080022, "learning_rate": 2e-05, "loss": 5.456, "step": 1390 }, { "epoch": 0.09330247845188987, "grad_norm": 0.15094003316282553, "learning_rate": 2e-05, "loss": 5.5228, "step": 1391 }, { "epoch": 0.0933695542811148, "grad_norm": 0.1436135415786532, "learning_rate": 2e-05, "loss": 5.4696, "step": 1392 }, { "epoch": 0.09343663011033974, "grad_norm": 0.1385222587434453, "learning_rate": 2e-05, "loss": 5.5118, "step": 1393 }, { "epoch": 0.09350370593956468, "grad_norm": 0.1470136271444273, "learning_rate": 2e-05, "loss": 5.4563, "step": 1394 }, { "epoch": 0.09357078176878962, "grad_norm": 0.1536992646908051, "learning_rate": 2e-05, "loss": 5.4609, "step": 1395 }, { "epoch": 0.09363785759801456, "grad_norm": 0.14172583532555863, "learning_rate": 2e-05, "loss": 5.5036, "step": 1396 }, { "epoch": 0.0937049334272395, "grad_norm": 0.13956270672432364, "learning_rate": 2e-05, "loss": 5.3792, "step": 1397 }, { "epoch": 0.09377200925646444, "grad_norm": 0.15048477968946694, "learning_rate": 2e-05, "loss": 5.5912, "step": 1398 }, { "epoch": 0.09383908508568937, "grad_norm": 0.14036206973798984, "learning_rate": 2e-05, "loss": 5.6266, "step": 1399 }, { "epoch": 0.09390616091491431, "grad_norm": 0.14789654039172342, "learning_rate": 2e-05, "loss": 5.4464, "step": 1400 }, { "epoch": 0.09397323674413925, "grad_norm": 0.14304027797456179, "learning_rate": 2e-05, "loss": 5.3957, "step": 1401 }, { "epoch": 0.09404031257336419, "grad_norm": 0.13693673616388563, "learning_rate": 2e-05, "loss": 5.4935, "step": 1402 }, { "epoch": 0.09410738840258913, "grad_norm": 0.1408985494160115, "learning_rate": 2e-05, "loss": 5.4413, "step": 1403 }, { "epoch": 0.09417446423181407, "grad_norm": 0.14469354787296979, "learning_rate": 2e-05, "loss": 5.5589, "step": 1404 }, { "epoch": 0.094241540061039, "grad_norm": 0.14906176205114333, "learning_rate": 2e-05, "loss": 5.5683, "step": 1405 }, { "epoch": 0.09430861589026394, "grad_norm": 0.13952881665570002, "learning_rate": 2e-05, "loss": 5.5546, "step": 1406 }, { "epoch": 0.09437569171948888, "grad_norm": 0.14160559105183865, "learning_rate": 2e-05, "loss": 5.4777, "step": 1407 }, { "epoch": 0.09444276754871382, "grad_norm": 0.1456131541734397, "learning_rate": 2e-05, "loss": 5.3941, "step": 1408 }, { "epoch": 0.09450984337793876, "grad_norm": 0.1436878539134071, "learning_rate": 2e-05, "loss": 5.4318, "step": 1409 }, { "epoch": 0.0945769192071637, "grad_norm": 0.14379645516385584, "learning_rate": 2e-05, "loss": 5.3143, "step": 1410 }, { "epoch": 0.09464399503638864, "grad_norm": 0.14616398315369994, "learning_rate": 2e-05, "loss": 5.4936, "step": 1411 }, { "epoch": 0.09471107086561358, "grad_norm": 0.14309080850810996, "learning_rate": 2e-05, "loss": 5.4587, "step": 1412 }, { "epoch": 0.09477814669483851, "grad_norm": 0.14277810194761908, "learning_rate": 2e-05, "loss": 5.5601, "step": 1413 }, { "epoch": 0.09484522252406345, "grad_norm": 0.14026133428598478, "learning_rate": 2e-05, "loss": 5.6291, "step": 1414 }, { "epoch": 0.09491229835328839, "grad_norm": 0.14347709978629164, "learning_rate": 2e-05, "loss": 5.46, "step": 1415 }, { "epoch": 0.09497937418251333, "grad_norm": 0.1372592112755479, "learning_rate": 2e-05, "loss": 5.4698, "step": 1416 }, { "epoch": 0.09504645001173827, "grad_norm": 0.1452696323322186, "learning_rate": 2e-05, "loss": 5.5036, "step": 1417 }, { "epoch": 0.0951135258409632, "grad_norm": 0.14146311632595904, "learning_rate": 2e-05, "loss": 5.4214, "step": 1418 }, { "epoch": 0.09518060167018814, "grad_norm": 0.13792383024976448, "learning_rate": 2e-05, "loss": 5.4614, "step": 1419 }, { "epoch": 0.09524767749941308, "grad_norm": 0.14556430157345474, "learning_rate": 2e-05, "loss": 5.4545, "step": 1420 }, { "epoch": 0.09531475332863802, "grad_norm": 0.1453958325506217, "learning_rate": 2e-05, "loss": 5.4486, "step": 1421 }, { "epoch": 0.09538182915786296, "grad_norm": 0.15123047878587004, "learning_rate": 2e-05, "loss": 5.424, "step": 1422 }, { "epoch": 0.0954489049870879, "grad_norm": 0.13693484174831866, "learning_rate": 2e-05, "loss": 5.4068, "step": 1423 }, { "epoch": 0.09551598081631284, "grad_norm": 0.15194491067390015, "learning_rate": 2e-05, "loss": 5.4902, "step": 1424 }, { "epoch": 0.09558305664553778, "grad_norm": 0.15595662181242484, "learning_rate": 2e-05, "loss": 5.5339, "step": 1425 }, { "epoch": 0.09565013247476271, "grad_norm": 0.14376313069922783, "learning_rate": 2e-05, "loss": 5.4155, "step": 1426 }, { "epoch": 0.09571720830398765, "grad_norm": 0.14570914540211619, "learning_rate": 2e-05, "loss": 5.6691, "step": 1427 }, { "epoch": 0.09578428413321259, "grad_norm": 0.1501387387113468, "learning_rate": 2e-05, "loss": 5.5234, "step": 1428 }, { "epoch": 0.09585135996243753, "grad_norm": 0.1417732097367562, "learning_rate": 2e-05, "loss": 5.4644, "step": 1429 }, { "epoch": 0.09591843579166247, "grad_norm": 0.1461061811047711, "learning_rate": 2e-05, "loss": 5.4655, "step": 1430 }, { "epoch": 0.0959855116208874, "grad_norm": 0.14521329841012698, "learning_rate": 2e-05, "loss": 5.5397, "step": 1431 }, { "epoch": 0.09605258745011236, "grad_norm": 0.14250003719317686, "learning_rate": 2e-05, "loss": 5.4671, "step": 1432 }, { "epoch": 0.0961196632793373, "grad_norm": 0.1444623618466229, "learning_rate": 2e-05, "loss": 5.5584, "step": 1433 }, { "epoch": 0.09618673910856224, "grad_norm": 0.1558963611723596, "learning_rate": 2e-05, "loss": 5.4584, "step": 1434 }, { "epoch": 0.09625381493778717, "grad_norm": 0.1425850999994635, "learning_rate": 2e-05, "loss": 5.4323, "step": 1435 }, { "epoch": 0.09632089076701211, "grad_norm": 0.1443912791393191, "learning_rate": 2e-05, "loss": 5.4698, "step": 1436 }, { "epoch": 0.09638796659623705, "grad_norm": 0.14682650254472662, "learning_rate": 2e-05, "loss": 5.5409, "step": 1437 }, { "epoch": 0.09645504242546199, "grad_norm": 0.15195608423093737, "learning_rate": 2e-05, "loss": 5.5503, "step": 1438 }, { "epoch": 0.09652211825468693, "grad_norm": 0.14121706913474383, "learning_rate": 2e-05, "loss": 5.4285, "step": 1439 }, { "epoch": 0.09658919408391187, "grad_norm": 0.14697444500133303, "learning_rate": 2e-05, "loss": 5.5413, "step": 1440 }, { "epoch": 0.0966562699131368, "grad_norm": 0.13942281678168458, "learning_rate": 2e-05, "loss": 5.4538, "step": 1441 }, { "epoch": 0.09672334574236174, "grad_norm": 0.14104285155413987, "learning_rate": 2e-05, "loss": 5.5466, "step": 1442 }, { "epoch": 0.09679042157158668, "grad_norm": 0.1427125486775428, "learning_rate": 2e-05, "loss": 5.5444, "step": 1443 }, { "epoch": 0.09685749740081162, "grad_norm": 0.15264755065161484, "learning_rate": 2e-05, "loss": 5.3982, "step": 1444 }, { "epoch": 0.09692457323003656, "grad_norm": 0.1448888528103812, "learning_rate": 2e-05, "loss": 5.4352, "step": 1445 }, { "epoch": 0.0969916490592615, "grad_norm": 0.14267306588088355, "learning_rate": 2e-05, "loss": 5.4476, "step": 1446 }, { "epoch": 0.09705872488848644, "grad_norm": 0.15051597184981824, "learning_rate": 2e-05, "loss": 5.4023, "step": 1447 }, { "epoch": 0.09712580071771137, "grad_norm": 0.14202500258740233, "learning_rate": 2e-05, "loss": 5.4592, "step": 1448 }, { "epoch": 0.09719287654693631, "grad_norm": 0.1463733107045511, "learning_rate": 2e-05, "loss": 5.5573, "step": 1449 }, { "epoch": 0.09725995237616125, "grad_norm": 0.13877765131952716, "learning_rate": 2e-05, "loss": 5.527, "step": 1450 }, { "epoch": 0.09732702820538619, "grad_norm": 0.14287613691378953, "learning_rate": 2e-05, "loss": 5.2484, "step": 1451 }, { "epoch": 0.09739410403461113, "grad_norm": 0.1402116935936757, "learning_rate": 2e-05, "loss": 5.4496, "step": 1452 }, { "epoch": 0.09746117986383607, "grad_norm": 0.13685702564068633, "learning_rate": 2e-05, "loss": 5.5175, "step": 1453 }, { "epoch": 0.097528255693061, "grad_norm": 0.14914382011286004, "learning_rate": 2e-05, "loss": 5.5026, "step": 1454 }, { "epoch": 0.09759533152228594, "grad_norm": 0.14328685861448667, "learning_rate": 2e-05, "loss": 5.5092, "step": 1455 }, { "epoch": 0.09766240735151088, "grad_norm": 0.13996318480142278, "learning_rate": 2e-05, "loss": 5.41, "step": 1456 }, { "epoch": 0.09772948318073582, "grad_norm": 0.14510732309808208, "learning_rate": 2e-05, "loss": 5.5072, "step": 1457 }, { "epoch": 0.09779655900996076, "grad_norm": 0.14638870773696838, "learning_rate": 2e-05, "loss": 5.6076, "step": 1458 }, { "epoch": 0.0978636348391857, "grad_norm": 0.1405136863450372, "learning_rate": 2e-05, "loss": 5.3722, "step": 1459 }, { "epoch": 0.09793071066841064, "grad_norm": 0.14090502202056135, "learning_rate": 2e-05, "loss": 5.5572, "step": 1460 }, { "epoch": 0.09799778649763558, "grad_norm": 0.14052988343236372, "learning_rate": 2e-05, "loss": 5.587, "step": 1461 }, { "epoch": 0.09806486232686051, "grad_norm": 0.1453986509363523, "learning_rate": 2e-05, "loss": 5.4771, "step": 1462 }, { "epoch": 0.09813193815608545, "grad_norm": 0.13637432305133412, "learning_rate": 2e-05, "loss": 5.4542, "step": 1463 }, { "epoch": 0.09819901398531039, "grad_norm": 0.14250382738529394, "learning_rate": 2e-05, "loss": 5.4672, "step": 1464 }, { "epoch": 0.09826608981453533, "grad_norm": 0.1378262893888623, "learning_rate": 2e-05, "loss": 5.3691, "step": 1465 }, { "epoch": 0.09833316564376027, "grad_norm": 0.13439725142698983, "learning_rate": 2e-05, "loss": 5.3776, "step": 1466 }, { "epoch": 0.0984002414729852, "grad_norm": 0.1471630990442746, "learning_rate": 2e-05, "loss": 5.5504, "step": 1467 }, { "epoch": 0.09846731730221014, "grad_norm": 0.14399568495256856, "learning_rate": 2e-05, "loss": 5.3869, "step": 1468 }, { "epoch": 0.09853439313143508, "grad_norm": 0.1477748217555435, "learning_rate": 2e-05, "loss": 5.4054, "step": 1469 }, { "epoch": 0.09860146896066002, "grad_norm": 0.1532820043239232, "learning_rate": 2e-05, "loss": 5.4448, "step": 1470 }, { "epoch": 0.09866854478988496, "grad_norm": 0.14055123999015937, "learning_rate": 2e-05, "loss": 5.5743, "step": 1471 }, { "epoch": 0.0987356206191099, "grad_norm": 0.14586513009013108, "learning_rate": 2e-05, "loss": 5.5328, "step": 1472 }, { "epoch": 0.09880269644833484, "grad_norm": 0.1481414525560901, "learning_rate": 2e-05, "loss": 5.376, "step": 1473 }, { "epoch": 0.09886977227755978, "grad_norm": 0.14133102583847396, "learning_rate": 2e-05, "loss": 5.5989, "step": 1474 }, { "epoch": 0.09893684810678471, "grad_norm": 0.14808957932939534, "learning_rate": 2e-05, "loss": 5.4285, "step": 1475 }, { "epoch": 0.09900392393600965, "grad_norm": 0.1499221993714861, "learning_rate": 2e-05, "loss": 5.4268, "step": 1476 }, { "epoch": 0.09907099976523459, "grad_norm": 0.14754184921942953, "learning_rate": 2e-05, "loss": 5.3247, "step": 1477 }, { "epoch": 0.09913807559445954, "grad_norm": 0.14500279027375446, "learning_rate": 2e-05, "loss": 5.5994, "step": 1478 }, { "epoch": 0.09920515142368448, "grad_norm": 0.14176833655694496, "learning_rate": 2e-05, "loss": 5.6478, "step": 1479 }, { "epoch": 0.09927222725290942, "grad_norm": 0.1394116809594869, "learning_rate": 2e-05, "loss": 5.6334, "step": 1480 }, { "epoch": 0.09933930308213436, "grad_norm": 0.13881772708575743, "learning_rate": 2e-05, "loss": 5.4758, "step": 1481 }, { "epoch": 0.0994063789113593, "grad_norm": 0.1437889938262072, "learning_rate": 2e-05, "loss": 5.4788, "step": 1482 }, { "epoch": 0.09947345474058424, "grad_norm": 0.1471051561515782, "learning_rate": 2e-05, "loss": 5.449, "step": 1483 }, { "epoch": 0.09954053056980917, "grad_norm": 0.13730453970681833, "learning_rate": 2e-05, "loss": 5.4207, "step": 1484 }, { "epoch": 0.09960760639903411, "grad_norm": 0.14443814260612675, "learning_rate": 2e-05, "loss": 5.4672, "step": 1485 }, { "epoch": 0.09967468222825905, "grad_norm": 0.15344638471921557, "learning_rate": 2e-05, "loss": 5.4765, "step": 1486 }, { "epoch": 0.09974175805748399, "grad_norm": 0.13818076111041894, "learning_rate": 2e-05, "loss": 5.4206, "step": 1487 }, { "epoch": 0.09980883388670893, "grad_norm": 0.14890323978630507, "learning_rate": 2e-05, "loss": 5.482, "step": 1488 }, { "epoch": 0.09987590971593387, "grad_norm": 0.14416808613099583, "learning_rate": 2e-05, "loss": 5.6296, "step": 1489 }, { "epoch": 0.0999429855451588, "grad_norm": 0.1353993174088175, "learning_rate": 2e-05, "loss": 5.4365, "step": 1490 }, { "epoch": 0.10001006137438374, "grad_norm": 0.1424246198847154, "learning_rate": 2e-05, "loss": 5.4142, "step": 1491 }, { "epoch": 0.10007713720360868, "grad_norm": 0.14695274609391934, "learning_rate": 2e-05, "loss": 5.5465, "step": 1492 }, { "epoch": 0.10014421303283362, "grad_norm": 0.1405966075130509, "learning_rate": 2e-05, "loss": 5.5469, "step": 1493 }, { "epoch": 0.10021128886205856, "grad_norm": 0.13802340853919748, "learning_rate": 2e-05, "loss": 5.4378, "step": 1494 }, { "epoch": 0.1002783646912835, "grad_norm": 0.1391301305923459, "learning_rate": 2e-05, "loss": 5.5521, "step": 1495 }, { "epoch": 0.10034544052050844, "grad_norm": 0.14376622390767146, "learning_rate": 2e-05, "loss": 5.4303, "step": 1496 }, { "epoch": 0.10041251634973337, "grad_norm": 0.13956750056836595, "learning_rate": 2e-05, "loss": 5.4598, "step": 1497 }, { "epoch": 0.10047959217895831, "grad_norm": 0.14870804120934086, "learning_rate": 2e-05, "loss": 5.4101, "step": 1498 }, { "epoch": 0.10054666800818325, "grad_norm": 0.1436567174772107, "learning_rate": 2e-05, "loss": 5.3582, "step": 1499 }, { "epoch": 0.10061374383740819, "grad_norm": 0.13488798243158245, "learning_rate": 2e-05, "loss": 5.418, "step": 1500 }, { "epoch": 0.10068081966663313, "grad_norm": 0.13321629375527355, "learning_rate": 2e-05, "loss": 5.505, "step": 1501 }, { "epoch": 0.10074789549585807, "grad_norm": 0.14931682206030206, "learning_rate": 2e-05, "loss": 5.3826, "step": 1502 }, { "epoch": 0.100814971325083, "grad_norm": 0.14365824907490743, "learning_rate": 2e-05, "loss": 5.6168, "step": 1503 }, { "epoch": 0.10088204715430794, "grad_norm": 0.14740139694631255, "learning_rate": 2e-05, "loss": 5.418, "step": 1504 }, { "epoch": 0.10094912298353288, "grad_norm": 0.14514302943048846, "learning_rate": 2e-05, "loss": 5.5008, "step": 1505 }, { "epoch": 0.10101619881275782, "grad_norm": 0.1431642248898501, "learning_rate": 2e-05, "loss": 5.4949, "step": 1506 }, { "epoch": 0.10108327464198276, "grad_norm": 0.14368899929172005, "learning_rate": 2e-05, "loss": 5.4035, "step": 1507 }, { "epoch": 0.1011503504712077, "grad_norm": 0.14077032725242883, "learning_rate": 2e-05, "loss": 5.4832, "step": 1508 }, { "epoch": 0.10121742630043264, "grad_norm": 0.1331346339592145, "learning_rate": 2e-05, "loss": 5.4048, "step": 1509 }, { "epoch": 0.10128450212965757, "grad_norm": 0.14817593911720636, "learning_rate": 2e-05, "loss": 5.4464, "step": 1510 }, { "epoch": 0.10135157795888251, "grad_norm": 0.13620501882606223, "learning_rate": 2e-05, "loss": 5.474, "step": 1511 }, { "epoch": 0.10141865378810745, "grad_norm": 0.1385327664836683, "learning_rate": 2e-05, "loss": 5.4458, "step": 1512 }, { "epoch": 0.10148572961733239, "grad_norm": 0.13838402522671625, "learning_rate": 2e-05, "loss": 5.4552, "step": 1513 }, { "epoch": 0.10155280544655733, "grad_norm": 0.1496067977321398, "learning_rate": 2e-05, "loss": 5.5641, "step": 1514 }, { "epoch": 0.10161988127578227, "grad_norm": 0.1460906635242363, "learning_rate": 2e-05, "loss": 5.3561, "step": 1515 }, { "epoch": 0.1016869571050072, "grad_norm": 0.14706094594030525, "learning_rate": 2e-05, "loss": 5.4643, "step": 1516 }, { "epoch": 0.10175403293423214, "grad_norm": 0.14243887042602202, "learning_rate": 2e-05, "loss": 5.5024, "step": 1517 }, { "epoch": 0.10182110876345708, "grad_norm": 0.14099101347442228, "learning_rate": 2e-05, "loss": 5.4459, "step": 1518 }, { "epoch": 0.10188818459268202, "grad_norm": 0.13656408160939715, "learning_rate": 2e-05, "loss": 5.4697, "step": 1519 }, { "epoch": 0.10195526042190696, "grad_norm": 0.15422594105675147, "learning_rate": 2e-05, "loss": 5.561, "step": 1520 }, { "epoch": 0.1020223362511319, "grad_norm": 0.145138258002595, "learning_rate": 2e-05, "loss": 5.405, "step": 1521 }, { "epoch": 0.10208941208035684, "grad_norm": 0.1354271071035837, "learning_rate": 2e-05, "loss": 5.469, "step": 1522 }, { "epoch": 0.10215648790958179, "grad_norm": 0.14356568361781288, "learning_rate": 2e-05, "loss": 5.4135, "step": 1523 }, { "epoch": 0.10222356373880673, "grad_norm": 0.14228170418508898, "learning_rate": 2e-05, "loss": 5.6452, "step": 1524 }, { "epoch": 0.10229063956803167, "grad_norm": 0.1407475056817347, "learning_rate": 2e-05, "loss": 5.5819, "step": 1525 }, { "epoch": 0.1023577153972566, "grad_norm": 0.1367415422805007, "learning_rate": 2e-05, "loss": 5.4516, "step": 1526 }, { "epoch": 0.10242479122648154, "grad_norm": 0.14452418013102258, "learning_rate": 2e-05, "loss": 5.4371, "step": 1527 }, { "epoch": 0.10249186705570648, "grad_norm": 0.14923818561048818, "learning_rate": 2e-05, "loss": 5.541, "step": 1528 }, { "epoch": 0.10255894288493142, "grad_norm": 0.13814565566242995, "learning_rate": 2e-05, "loss": 5.5311, "step": 1529 }, { "epoch": 0.10262601871415636, "grad_norm": 0.14219689525430027, "learning_rate": 2e-05, "loss": 5.5101, "step": 1530 }, { "epoch": 0.1026930945433813, "grad_norm": 0.14010004173628177, "learning_rate": 2e-05, "loss": 5.5177, "step": 1531 }, { "epoch": 0.10276017037260624, "grad_norm": 0.148023726161561, "learning_rate": 2e-05, "loss": 5.3813, "step": 1532 }, { "epoch": 0.10282724620183117, "grad_norm": 0.14288422052927535, "learning_rate": 2e-05, "loss": 5.5802, "step": 1533 }, { "epoch": 0.10289432203105611, "grad_norm": 0.13586985331286183, "learning_rate": 2e-05, "loss": 5.2623, "step": 1534 }, { "epoch": 0.10296139786028105, "grad_norm": 0.14666474655625442, "learning_rate": 2e-05, "loss": 5.5293, "step": 1535 }, { "epoch": 0.10302847368950599, "grad_norm": 0.1420314853632253, "learning_rate": 2e-05, "loss": 5.3172, "step": 1536 }, { "epoch": 0.10309554951873093, "grad_norm": 0.14226017170205124, "learning_rate": 2e-05, "loss": 5.2921, "step": 1537 }, { "epoch": 0.10316262534795587, "grad_norm": 0.13756992554008657, "learning_rate": 2e-05, "loss": 5.4588, "step": 1538 }, { "epoch": 0.1032297011771808, "grad_norm": 0.14012281621931746, "learning_rate": 2e-05, "loss": 5.5935, "step": 1539 }, { "epoch": 0.10329677700640574, "grad_norm": 0.14614080045313507, "learning_rate": 2e-05, "loss": 5.5484, "step": 1540 }, { "epoch": 0.10336385283563068, "grad_norm": 0.15049913958678665, "learning_rate": 2e-05, "loss": 5.39, "step": 1541 }, { "epoch": 0.10343092866485562, "grad_norm": 0.14398375990120477, "learning_rate": 2e-05, "loss": 5.5998, "step": 1542 }, { "epoch": 0.10349800449408056, "grad_norm": 0.14232048242289982, "learning_rate": 2e-05, "loss": 5.4925, "step": 1543 }, { "epoch": 0.1035650803233055, "grad_norm": 0.13749243783503928, "learning_rate": 2e-05, "loss": 5.5421, "step": 1544 }, { "epoch": 0.10363215615253044, "grad_norm": 0.1449515872877285, "learning_rate": 2e-05, "loss": 5.4696, "step": 1545 }, { "epoch": 0.10369923198175537, "grad_norm": 0.15096689226647395, "learning_rate": 2e-05, "loss": 5.3836, "step": 1546 }, { "epoch": 0.10376630781098031, "grad_norm": 0.14168366064977186, "learning_rate": 2e-05, "loss": 5.459, "step": 1547 }, { "epoch": 0.10383338364020525, "grad_norm": 0.15649980635578664, "learning_rate": 2e-05, "loss": 5.3599, "step": 1548 }, { "epoch": 0.10390045946943019, "grad_norm": 0.13776447492028498, "learning_rate": 2e-05, "loss": 5.41, "step": 1549 }, { "epoch": 0.10396753529865513, "grad_norm": 0.1364754513749412, "learning_rate": 2e-05, "loss": 5.3024, "step": 1550 }, { "epoch": 0.10403461112788007, "grad_norm": 0.13708255968508387, "learning_rate": 2e-05, "loss": 5.511, "step": 1551 }, { "epoch": 0.104101686957105, "grad_norm": 0.1446875922306525, "learning_rate": 2e-05, "loss": 5.4001, "step": 1552 }, { "epoch": 0.10416876278632994, "grad_norm": 0.1424665685699538, "learning_rate": 2e-05, "loss": 5.4592, "step": 1553 }, { "epoch": 0.10423583861555488, "grad_norm": 0.14503345132812664, "learning_rate": 2e-05, "loss": 5.5307, "step": 1554 }, { "epoch": 0.10430291444477982, "grad_norm": 0.1497884504962539, "learning_rate": 2e-05, "loss": 5.5048, "step": 1555 }, { "epoch": 0.10436999027400476, "grad_norm": 0.15605988268366836, "learning_rate": 2e-05, "loss": 5.6675, "step": 1556 }, { "epoch": 0.1044370661032297, "grad_norm": 0.14137419045183583, "learning_rate": 2e-05, "loss": 5.6229, "step": 1557 }, { "epoch": 0.10450414193245464, "grad_norm": 0.1436336763573979, "learning_rate": 2e-05, "loss": 5.5, "step": 1558 }, { "epoch": 0.10457121776167957, "grad_norm": 0.15059536305628135, "learning_rate": 2e-05, "loss": 5.5414, "step": 1559 }, { "epoch": 0.10463829359090451, "grad_norm": 0.13972773358580998, "learning_rate": 2e-05, "loss": 5.4575, "step": 1560 }, { "epoch": 0.10470536942012945, "grad_norm": 0.1416903880782502, "learning_rate": 2e-05, "loss": 5.4733, "step": 1561 }, { "epoch": 0.10477244524935439, "grad_norm": 0.14669716622132847, "learning_rate": 2e-05, "loss": 5.3905, "step": 1562 }, { "epoch": 0.10483952107857933, "grad_norm": 0.14481199075925796, "learning_rate": 2e-05, "loss": 5.4331, "step": 1563 }, { "epoch": 0.10490659690780427, "grad_norm": 0.1396169563659074, "learning_rate": 2e-05, "loss": 5.4084, "step": 1564 }, { "epoch": 0.1049736727370292, "grad_norm": 0.15186513923788264, "learning_rate": 2e-05, "loss": 5.3706, "step": 1565 }, { "epoch": 0.10504074856625414, "grad_norm": 0.14350223124207856, "learning_rate": 2e-05, "loss": 5.46, "step": 1566 }, { "epoch": 0.10510782439547908, "grad_norm": 0.14641523299354378, "learning_rate": 2e-05, "loss": 5.3888, "step": 1567 }, { "epoch": 0.10517490022470402, "grad_norm": 0.15058467741464682, "learning_rate": 2e-05, "loss": 5.5003, "step": 1568 }, { "epoch": 0.10524197605392897, "grad_norm": 0.14952971975024862, "learning_rate": 2e-05, "loss": 5.3552, "step": 1569 }, { "epoch": 0.10530905188315391, "grad_norm": 0.1447270362406306, "learning_rate": 2e-05, "loss": 5.3409, "step": 1570 }, { "epoch": 0.10537612771237885, "grad_norm": 0.14401496436437722, "learning_rate": 2e-05, "loss": 5.5279, "step": 1571 }, { "epoch": 0.10544320354160379, "grad_norm": 0.15791995478912157, "learning_rate": 2e-05, "loss": 5.4671, "step": 1572 }, { "epoch": 0.10551027937082873, "grad_norm": 0.14677332053639972, "learning_rate": 2e-05, "loss": 5.4675, "step": 1573 }, { "epoch": 0.10557735520005367, "grad_norm": 0.14809925522160325, "learning_rate": 2e-05, "loss": 5.5621, "step": 1574 }, { "epoch": 0.1056444310292786, "grad_norm": 0.14443655329215635, "learning_rate": 2e-05, "loss": 5.5806, "step": 1575 }, { "epoch": 0.10571150685850354, "grad_norm": 0.15187316508938864, "learning_rate": 2e-05, "loss": 5.5074, "step": 1576 }, { "epoch": 0.10577858268772848, "grad_norm": 0.14727212147372276, "learning_rate": 2e-05, "loss": 5.4041, "step": 1577 }, { "epoch": 0.10584565851695342, "grad_norm": 0.1389546512875751, "learning_rate": 2e-05, "loss": 5.4859, "step": 1578 }, { "epoch": 0.10591273434617836, "grad_norm": 0.14439034799852285, "learning_rate": 2e-05, "loss": 5.5473, "step": 1579 }, { "epoch": 0.1059798101754033, "grad_norm": 0.14507559965320502, "learning_rate": 2e-05, "loss": 5.5735, "step": 1580 }, { "epoch": 0.10604688600462824, "grad_norm": 0.14267571223864173, "learning_rate": 2e-05, "loss": 5.4555, "step": 1581 }, { "epoch": 0.10611396183385317, "grad_norm": 0.14210478643676563, "learning_rate": 2e-05, "loss": 5.3845, "step": 1582 }, { "epoch": 0.10618103766307811, "grad_norm": 0.13712446562584085, "learning_rate": 2e-05, "loss": 5.5484, "step": 1583 }, { "epoch": 0.10624811349230305, "grad_norm": 0.15361039319791728, "learning_rate": 2e-05, "loss": 5.3919, "step": 1584 }, { "epoch": 0.10631518932152799, "grad_norm": 0.14336898327100167, "learning_rate": 2e-05, "loss": 5.5685, "step": 1585 }, { "epoch": 0.10638226515075293, "grad_norm": 0.1377696405133565, "learning_rate": 2e-05, "loss": 5.2445, "step": 1586 }, { "epoch": 0.10644934097997787, "grad_norm": 0.14969591078211128, "learning_rate": 2e-05, "loss": 5.5519, "step": 1587 }, { "epoch": 0.1065164168092028, "grad_norm": 0.1445595093201612, "learning_rate": 2e-05, "loss": 5.4675, "step": 1588 }, { "epoch": 0.10658349263842774, "grad_norm": 0.1432980178585363, "learning_rate": 2e-05, "loss": 5.4389, "step": 1589 }, { "epoch": 0.10665056846765268, "grad_norm": 0.1415484202135085, "learning_rate": 2e-05, "loss": 5.3719, "step": 1590 }, { "epoch": 0.10671764429687762, "grad_norm": 0.14765808557992754, "learning_rate": 2e-05, "loss": 5.4103, "step": 1591 }, { "epoch": 0.10678472012610256, "grad_norm": 0.1573044394493838, "learning_rate": 2e-05, "loss": 5.3392, "step": 1592 }, { "epoch": 0.1068517959553275, "grad_norm": 0.13156425781974831, "learning_rate": 2e-05, "loss": 5.3795, "step": 1593 }, { "epoch": 0.10691887178455244, "grad_norm": 0.1399307448335487, "learning_rate": 2e-05, "loss": 5.6144, "step": 1594 }, { "epoch": 0.10698594761377737, "grad_norm": 0.16623993319067248, "learning_rate": 2e-05, "loss": 5.484, "step": 1595 }, { "epoch": 0.10705302344300231, "grad_norm": 0.1486480344029779, "learning_rate": 2e-05, "loss": 5.3755, "step": 1596 }, { "epoch": 0.10712009927222725, "grad_norm": 0.1470066652907782, "learning_rate": 2e-05, "loss": 5.4575, "step": 1597 }, { "epoch": 0.10718717510145219, "grad_norm": 0.14508658043720463, "learning_rate": 2e-05, "loss": 5.2721, "step": 1598 }, { "epoch": 0.10725425093067713, "grad_norm": 0.15204217421002456, "learning_rate": 2e-05, "loss": 5.6199, "step": 1599 }, { "epoch": 0.10732132675990207, "grad_norm": 0.1380977637575904, "learning_rate": 2e-05, "loss": 5.465, "step": 1600 }, { "epoch": 0.107388402589127, "grad_norm": 0.14082072021061, "learning_rate": 2e-05, "loss": 5.2922, "step": 1601 }, { "epoch": 0.10745547841835194, "grad_norm": 0.14366300546598018, "learning_rate": 2e-05, "loss": 5.4276, "step": 1602 }, { "epoch": 0.10752255424757688, "grad_norm": 0.14642812533400576, "learning_rate": 2e-05, "loss": 5.4281, "step": 1603 }, { "epoch": 0.10758963007680182, "grad_norm": 0.14608200514108957, "learning_rate": 2e-05, "loss": 5.2989, "step": 1604 }, { "epoch": 0.10765670590602676, "grad_norm": 0.14004022503958988, "learning_rate": 2e-05, "loss": 5.4821, "step": 1605 }, { "epoch": 0.1077237817352517, "grad_norm": 0.14733918204446922, "learning_rate": 2e-05, "loss": 5.4471, "step": 1606 }, { "epoch": 0.10779085756447664, "grad_norm": 0.1464914525330342, "learning_rate": 2e-05, "loss": 5.4009, "step": 1607 }, { "epoch": 0.10785793339370157, "grad_norm": 0.14675367350917357, "learning_rate": 2e-05, "loss": 5.3843, "step": 1608 }, { "epoch": 0.10792500922292651, "grad_norm": 0.15322463407308098, "learning_rate": 2e-05, "loss": 5.3873, "step": 1609 }, { "epoch": 0.10799208505215145, "grad_norm": 0.14673559051695673, "learning_rate": 2e-05, "loss": 5.5637, "step": 1610 }, { "epoch": 0.10805916088137639, "grad_norm": 0.1494705331046625, "learning_rate": 2e-05, "loss": 5.5287, "step": 1611 }, { "epoch": 0.10812623671060133, "grad_norm": 0.14411773039872902, "learning_rate": 2e-05, "loss": 5.4967, "step": 1612 }, { "epoch": 0.10819331253982627, "grad_norm": 0.14195931389543873, "learning_rate": 2e-05, "loss": 5.4672, "step": 1613 }, { "epoch": 0.10826038836905122, "grad_norm": 0.15472984288834787, "learning_rate": 2e-05, "loss": 5.4513, "step": 1614 }, { "epoch": 0.10832746419827616, "grad_norm": 0.1562775797511193, "learning_rate": 2e-05, "loss": 5.3802, "step": 1615 }, { "epoch": 0.1083945400275011, "grad_norm": 0.14585106005807363, "learning_rate": 2e-05, "loss": 5.6416, "step": 1616 }, { "epoch": 0.10846161585672603, "grad_norm": 0.15043701513070074, "learning_rate": 2e-05, "loss": 5.5746, "step": 1617 }, { "epoch": 0.10852869168595097, "grad_norm": 0.14931235041865532, "learning_rate": 2e-05, "loss": 5.4682, "step": 1618 }, { "epoch": 0.10859576751517591, "grad_norm": 0.1444564169913332, "learning_rate": 2e-05, "loss": 5.5137, "step": 1619 }, { "epoch": 0.10866284334440085, "grad_norm": 0.14341798887990245, "learning_rate": 2e-05, "loss": 5.3757, "step": 1620 }, { "epoch": 0.10872991917362579, "grad_norm": 0.13593163083512766, "learning_rate": 2e-05, "loss": 5.4316, "step": 1621 }, { "epoch": 0.10879699500285073, "grad_norm": 0.1462960234488021, "learning_rate": 2e-05, "loss": 5.5694, "step": 1622 }, { "epoch": 0.10886407083207567, "grad_norm": 0.1489646357403895, "learning_rate": 2e-05, "loss": 5.339, "step": 1623 }, { "epoch": 0.1089311466613006, "grad_norm": 0.13877426185852562, "learning_rate": 2e-05, "loss": 5.4126, "step": 1624 }, { "epoch": 0.10899822249052554, "grad_norm": 0.1393355934197879, "learning_rate": 2e-05, "loss": 5.4991, "step": 1625 }, { "epoch": 0.10906529831975048, "grad_norm": 0.13819675166658885, "learning_rate": 2e-05, "loss": 5.3163, "step": 1626 }, { "epoch": 0.10913237414897542, "grad_norm": 0.1432720616667487, "learning_rate": 2e-05, "loss": 5.4859, "step": 1627 }, { "epoch": 0.10919944997820036, "grad_norm": 0.15732635256014468, "learning_rate": 2e-05, "loss": 5.4684, "step": 1628 }, { "epoch": 0.1092665258074253, "grad_norm": 0.14271893851940626, "learning_rate": 2e-05, "loss": 5.4247, "step": 1629 }, { "epoch": 0.10933360163665024, "grad_norm": 0.13904309106942067, "learning_rate": 2e-05, "loss": 5.6436, "step": 1630 }, { "epoch": 0.10940067746587517, "grad_norm": 0.1458382128306477, "learning_rate": 2e-05, "loss": 5.3394, "step": 1631 }, { "epoch": 0.10946775329510011, "grad_norm": 0.14542384974558076, "learning_rate": 2e-05, "loss": 5.571, "step": 1632 }, { "epoch": 0.10953482912432505, "grad_norm": 0.1473579993103474, "learning_rate": 2e-05, "loss": 5.6455, "step": 1633 }, { "epoch": 0.10960190495354999, "grad_norm": 0.14803626971590494, "learning_rate": 2e-05, "loss": 5.4905, "step": 1634 }, { "epoch": 0.10966898078277493, "grad_norm": 0.1443703665206401, "learning_rate": 2e-05, "loss": 5.347, "step": 1635 }, { "epoch": 0.10973605661199987, "grad_norm": 0.14615683191088877, "learning_rate": 2e-05, "loss": 5.4399, "step": 1636 }, { "epoch": 0.1098031324412248, "grad_norm": 0.14542917194443583, "learning_rate": 2e-05, "loss": 5.4712, "step": 1637 }, { "epoch": 0.10987020827044974, "grad_norm": 0.14241383508357144, "learning_rate": 2e-05, "loss": 5.4413, "step": 1638 }, { "epoch": 0.10993728409967468, "grad_norm": 0.14099400607943796, "learning_rate": 2e-05, "loss": 5.3902, "step": 1639 }, { "epoch": 0.11000435992889962, "grad_norm": 0.14372499176572798, "learning_rate": 2e-05, "loss": 5.3919, "step": 1640 }, { "epoch": 0.11007143575812456, "grad_norm": 0.14484381290392995, "learning_rate": 2e-05, "loss": 5.5169, "step": 1641 }, { "epoch": 0.1101385115873495, "grad_norm": 0.13949303692361023, "learning_rate": 2e-05, "loss": 5.4244, "step": 1642 }, { "epoch": 0.11020558741657444, "grad_norm": 0.14020444405814161, "learning_rate": 2e-05, "loss": 5.4815, "step": 1643 }, { "epoch": 0.11027266324579937, "grad_norm": 0.14375005513463238, "learning_rate": 2e-05, "loss": 5.4014, "step": 1644 }, { "epoch": 0.11033973907502431, "grad_norm": 0.1434973797511504, "learning_rate": 2e-05, "loss": 5.4947, "step": 1645 }, { "epoch": 0.11040681490424925, "grad_norm": 0.14174880273486512, "learning_rate": 2e-05, "loss": 5.3112, "step": 1646 }, { "epoch": 0.11047389073347419, "grad_norm": 0.14138289550912594, "learning_rate": 2e-05, "loss": 5.5642, "step": 1647 }, { "epoch": 0.11054096656269913, "grad_norm": 0.14479067614998764, "learning_rate": 2e-05, "loss": 5.2692, "step": 1648 }, { "epoch": 0.11060804239192407, "grad_norm": 0.1478340134098868, "learning_rate": 2e-05, "loss": 5.459, "step": 1649 }, { "epoch": 0.110675118221149, "grad_norm": 0.13909230019320293, "learning_rate": 2e-05, "loss": 5.4042, "step": 1650 }, { "epoch": 0.11074219405037394, "grad_norm": 0.1474088816508054, "learning_rate": 2e-05, "loss": 5.3549, "step": 1651 }, { "epoch": 0.11080926987959888, "grad_norm": 0.14885096916718182, "learning_rate": 2e-05, "loss": 5.2909, "step": 1652 }, { "epoch": 0.11087634570882382, "grad_norm": 0.13788542139191265, "learning_rate": 2e-05, "loss": 5.4232, "step": 1653 }, { "epoch": 0.11094342153804876, "grad_norm": 0.14121594324857864, "learning_rate": 2e-05, "loss": 5.441, "step": 1654 }, { "epoch": 0.1110104973672737, "grad_norm": 0.15019807667517873, "learning_rate": 2e-05, "loss": 5.5792, "step": 1655 }, { "epoch": 0.11107757319649864, "grad_norm": 0.14842626129979514, "learning_rate": 2e-05, "loss": 5.4296, "step": 1656 }, { "epoch": 0.11114464902572357, "grad_norm": 0.14511358120898157, "learning_rate": 2e-05, "loss": 5.3396, "step": 1657 }, { "epoch": 0.11121172485494851, "grad_norm": 0.14389158901967103, "learning_rate": 2e-05, "loss": 5.4206, "step": 1658 }, { "epoch": 0.11127880068417345, "grad_norm": 0.14621390599378412, "learning_rate": 2e-05, "loss": 5.5877, "step": 1659 }, { "epoch": 0.1113458765133984, "grad_norm": 0.1367833181639195, "learning_rate": 2e-05, "loss": 5.5572, "step": 1660 }, { "epoch": 0.11141295234262334, "grad_norm": 0.1543871516887754, "learning_rate": 2e-05, "loss": 5.4674, "step": 1661 }, { "epoch": 0.11148002817184828, "grad_norm": 0.1532344016609872, "learning_rate": 2e-05, "loss": 5.3888, "step": 1662 }, { "epoch": 0.11154710400107322, "grad_norm": 0.15281589254277947, "learning_rate": 2e-05, "loss": 5.4768, "step": 1663 }, { "epoch": 0.11161417983029816, "grad_norm": 0.15642266862766188, "learning_rate": 2e-05, "loss": 5.4643, "step": 1664 }, { "epoch": 0.1116812556595231, "grad_norm": 0.15260758744362116, "learning_rate": 2e-05, "loss": 5.4734, "step": 1665 }, { "epoch": 0.11174833148874803, "grad_norm": 0.13960684956017316, "learning_rate": 2e-05, "loss": 5.5088, "step": 1666 }, { "epoch": 0.11181540731797297, "grad_norm": 0.14736728747289005, "learning_rate": 2e-05, "loss": 5.4526, "step": 1667 }, { "epoch": 0.11188248314719791, "grad_norm": 0.1586010165621013, "learning_rate": 2e-05, "loss": 5.4917, "step": 1668 }, { "epoch": 0.11194955897642285, "grad_norm": 0.1389775995023264, "learning_rate": 2e-05, "loss": 5.6105, "step": 1669 }, { "epoch": 0.11201663480564779, "grad_norm": 0.14776175175617412, "learning_rate": 2e-05, "loss": 5.4752, "step": 1670 }, { "epoch": 0.11208371063487273, "grad_norm": 0.1590314186160879, "learning_rate": 2e-05, "loss": 5.33, "step": 1671 }, { "epoch": 0.11215078646409767, "grad_norm": 0.13827655770008004, "learning_rate": 2e-05, "loss": 5.4445, "step": 1672 }, { "epoch": 0.1122178622933226, "grad_norm": 0.13986179021167408, "learning_rate": 2e-05, "loss": 5.5222, "step": 1673 }, { "epoch": 0.11228493812254754, "grad_norm": 0.15401166275088443, "learning_rate": 2e-05, "loss": 5.5212, "step": 1674 }, { "epoch": 0.11235201395177248, "grad_norm": 0.14920027591484544, "learning_rate": 2e-05, "loss": 5.5593, "step": 1675 }, { "epoch": 0.11241908978099742, "grad_norm": 0.13860587985135756, "learning_rate": 2e-05, "loss": 5.5041, "step": 1676 }, { "epoch": 0.11248616561022236, "grad_norm": 0.14374442973273263, "learning_rate": 2e-05, "loss": 5.3825, "step": 1677 }, { "epoch": 0.1125532414394473, "grad_norm": 0.15242830165851026, "learning_rate": 2e-05, "loss": 5.4534, "step": 1678 }, { "epoch": 0.11262031726867223, "grad_norm": 0.13789350788196741, "learning_rate": 2e-05, "loss": 5.4437, "step": 1679 }, { "epoch": 0.11268739309789717, "grad_norm": 0.14727969116791859, "learning_rate": 2e-05, "loss": 5.4111, "step": 1680 }, { "epoch": 0.11275446892712211, "grad_norm": 0.14214987824960282, "learning_rate": 2e-05, "loss": 5.4867, "step": 1681 }, { "epoch": 0.11282154475634705, "grad_norm": 0.14595970499774005, "learning_rate": 2e-05, "loss": 5.6435, "step": 1682 }, { "epoch": 0.11288862058557199, "grad_norm": 0.1434022753861315, "learning_rate": 2e-05, "loss": 5.5328, "step": 1683 }, { "epoch": 0.11295569641479693, "grad_norm": 0.1457297480536738, "learning_rate": 2e-05, "loss": 5.4568, "step": 1684 }, { "epoch": 0.11302277224402187, "grad_norm": 0.14987248755891258, "learning_rate": 2e-05, "loss": 5.4784, "step": 1685 }, { "epoch": 0.1130898480732468, "grad_norm": 0.13987902377617775, "learning_rate": 2e-05, "loss": 5.4271, "step": 1686 }, { "epoch": 0.11315692390247174, "grad_norm": 0.1475009358990396, "learning_rate": 2e-05, "loss": 5.5332, "step": 1687 }, { "epoch": 0.11322399973169668, "grad_norm": 0.13862240008295632, "learning_rate": 2e-05, "loss": 5.3133, "step": 1688 }, { "epoch": 0.11329107556092162, "grad_norm": 0.14104596315377577, "learning_rate": 2e-05, "loss": 5.5168, "step": 1689 }, { "epoch": 0.11335815139014656, "grad_norm": 0.13613384468398085, "learning_rate": 2e-05, "loss": 5.4714, "step": 1690 }, { "epoch": 0.1134252272193715, "grad_norm": 0.15111878256758637, "learning_rate": 2e-05, "loss": 5.4363, "step": 1691 }, { "epoch": 0.11349230304859644, "grad_norm": 0.15563268135133437, "learning_rate": 2e-05, "loss": 5.2485, "step": 1692 }, { "epoch": 0.11355937887782137, "grad_norm": 0.14284752130755368, "learning_rate": 2e-05, "loss": 5.5026, "step": 1693 }, { "epoch": 0.11362645470704631, "grad_norm": 0.14522601727915224, "learning_rate": 2e-05, "loss": 5.3625, "step": 1694 }, { "epoch": 0.11369353053627125, "grad_norm": 0.14841178320487514, "learning_rate": 2e-05, "loss": 5.5814, "step": 1695 }, { "epoch": 0.11376060636549619, "grad_norm": 0.15210437457114429, "learning_rate": 2e-05, "loss": 5.3907, "step": 1696 }, { "epoch": 0.11382768219472113, "grad_norm": 0.14710045648465214, "learning_rate": 2e-05, "loss": 5.5314, "step": 1697 }, { "epoch": 0.11389475802394607, "grad_norm": 0.14721650360640648, "learning_rate": 2e-05, "loss": 5.4857, "step": 1698 }, { "epoch": 0.113961833853171, "grad_norm": 0.14009601987820763, "learning_rate": 2e-05, "loss": 5.4213, "step": 1699 }, { "epoch": 0.11402890968239594, "grad_norm": 0.1426760604779711, "learning_rate": 2e-05, "loss": 5.4738, "step": 1700 }, { "epoch": 0.11409598551162088, "grad_norm": 0.14925255464763285, "learning_rate": 2e-05, "loss": 5.3759, "step": 1701 }, { "epoch": 0.11416306134084582, "grad_norm": 0.1413761049364667, "learning_rate": 2e-05, "loss": 5.386, "step": 1702 }, { "epoch": 0.11423013717007076, "grad_norm": 0.14321421076611238, "learning_rate": 2e-05, "loss": 5.5736, "step": 1703 }, { "epoch": 0.1142972129992957, "grad_norm": 0.16049628905342794, "learning_rate": 2e-05, "loss": 5.3788, "step": 1704 }, { "epoch": 0.11436428882852065, "grad_norm": 0.148256803816772, "learning_rate": 2e-05, "loss": 5.396, "step": 1705 }, { "epoch": 0.11443136465774559, "grad_norm": 0.14364586016983996, "learning_rate": 2e-05, "loss": 5.4314, "step": 1706 }, { "epoch": 0.11449844048697053, "grad_norm": 0.1467029419685999, "learning_rate": 2e-05, "loss": 5.3013, "step": 1707 }, { "epoch": 0.11456551631619546, "grad_norm": 0.15522267515990815, "learning_rate": 2e-05, "loss": 5.3878, "step": 1708 }, { "epoch": 0.1146325921454204, "grad_norm": 0.14234215779710443, "learning_rate": 2e-05, "loss": 5.468, "step": 1709 }, { "epoch": 0.11469966797464534, "grad_norm": 0.14477630312220202, "learning_rate": 2e-05, "loss": 5.4098, "step": 1710 }, { "epoch": 0.11476674380387028, "grad_norm": 0.1509622446590184, "learning_rate": 2e-05, "loss": 5.5572, "step": 1711 }, { "epoch": 0.11483381963309522, "grad_norm": 0.14139622715702943, "learning_rate": 2e-05, "loss": 5.4228, "step": 1712 }, { "epoch": 0.11490089546232016, "grad_norm": 0.13728283539232577, "learning_rate": 2e-05, "loss": 5.3704, "step": 1713 }, { "epoch": 0.1149679712915451, "grad_norm": 0.14385065722903867, "learning_rate": 2e-05, "loss": 5.5163, "step": 1714 }, { "epoch": 0.11503504712077003, "grad_norm": 0.1469554980532785, "learning_rate": 2e-05, "loss": 5.3983, "step": 1715 }, { "epoch": 0.11510212294999497, "grad_norm": 0.14154326326620378, "learning_rate": 2e-05, "loss": 5.5492, "step": 1716 }, { "epoch": 0.11516919877921991, "grad_norm": 0.14635400548053296, "learning_rate": 2e-05, "loss": 5.5457, "step": 1717 }, { "epoch": 0.11523627460844485, "grad_norm": 0.14685027467780085, "learning_rate": 2e-05, "loss": 5.6477, "step": 1718 }, { "epoch": 0.11530335043766979, "grad_norm": 0.1406667127728588, "learning_rate": 2e-05, "loss": 5.4419, "step": 1719 }, { "epoch": 0.11537042626689473, "grad_norm": 0.141575020626003, "learning_rate": 2e-05, "loss": 5.4964, "step": 1720 }, { "epoch": 0.11543750209611967, "grad_norm": 0.14438793775109762, "learning_rate": 2e-05, "loss": 5.5023, "step": 1721 }, { "epoch": 0.1155045779253446, "grad_norm": 0.13945321539802924, "learning_rate": 2e-05, "loss": 5.5138, "step": 1722 }, { "epoch": 0.11557165375456954, "grad_norm": 0.14060607284160484, "learning_rate": 2e-05, "loss": 5.4901, "step": 1723 }, { "epoch": 0.11563872958379448, "grad_norm": 0.1362904880946167, "learning_rate": 2e-05, "loss": 5.31, "step": 1724 }, { "epoch": 0.11570580541301942, "grad_norm": 0.14362229052016126, "learning_rate": 2e-05, "loss": 5.5493, "step": 1725 }, { "epoch": 0.11577288124224436, "grad_norm": 0.1417223455591491, "learning_rate": 2e-05, "loss": 5.4912, "step": 1726 }, { "epoch": 0.1158399570714693, "grad_norm": 0.13874346449573324, "learning_rate": 2e-05, "loss": 5.4304, "step": 1727 }, { "epoch": 0.11590703290069423, "grad_norm": 0.1402481347128242, "learning_rate": 2e-05, "loss": 5.4172, "step": 1728 }, { "epoch": 0.11597410872991917, "grad_norm": 0.14179166456469974, "learning_rate": 2e-05, "loss": 5.4687, "step": 1729 }, { "epoch": 0.11604118455914411, "grad_norm": 0.14653321047772336, "learning_rate": 2e-05, "loss": 5.3887, "step": 1730 }, { "epoch": 0.11610826038836905, "grad_norm": 0.13824177741203886, "learning_rate": 2e-05, "loss": 5.4771, "step": 1731 }, { "epoch": 0.11617533621759399, "grad_norm": 0.1342768519973022, "learning_rate": 2e-05, "loss": 5.4803, "step": 1732 }, { "epoch": 0.11624241204681893, "grad_norm": 0.1456812486907552, "learning_rate": 2e-05, "loss": 5.4328, "step": 1733 }, { "epoch": 0.11630948787604387, "grad_norm": 0.1513258729106233, "learning_rate": 2e-05, "loss": 5.4768, "step": 1734 }, { "epoch": 0.1163765637052688, "grad_norm": 0.1431748355942768, "learning_rate": 2e-05, "loss": 5.3471, "step": 1735 }, { "epoch": 0.11644363953449374, "grad_norm": 0.14893919113240348, "learning_rate": 2e-05, "loss": 5.3854, "step": 1736 }, { "epoch": 0.11651071536371868, "grad_norm": 0.14415972683589934, "learning_rate": 2e-05, "loss": 5.4222, "step": 1737 }, { "epoch": 0.11657779119294362, "grad_norm": 0.14288291873988201, "learning_rate": 2e-05, "loss": 5.4704, "step": 1738 }, { "epoch": 0.11664486702216856, "grad_norm": 0.1430572554290256, "learning_rate": 2e-05, "loss": 5.3307, "step": 1739 }, { "epoch": 0.1167119428513935, "grad_norm": 0.1408172983349332, "learning_rate": 2e-05, "loss": 5.5201, "step": 1740 }, { "epoch": 0.11677901868061843, "grad_norm": 0.15343006715211654, "learning_rate": 2e-05, "loss": 5.3665, "step": 1741 }, { "epoch": 0.11684609450984337, "grad_norm": 0.14408194940862395, "learning_rate": 2e-05, "loss": 5.4308, "step": 1742 }, { "epoch": 0.11691317033906831, "grad_norm": 0.14272070304441373, "learning_rate": 2e-05, "loss": 5.3602, "step": 1743 }, { "epoch": 0.11698024616829325, "grad_norm": 0.14756400979045037, "learning_rate": 2e-05, "loss": 5.4559, "step": 1744 }, { "epoch": 0.11704732199751819, "grad_norm": 0.15268672806362318, "learning_rate": 2e-05, "loss": 5.4401, "step": 1745 }, { "epoch": 0.11711439782674313, "grad_norm": 0.1444870186418168, "learning_rate": 2e-05, "loss": 5.485, "step": 1746 }, { "epoch": 0.11718147365596807, "grad_norm": 0.15958947824192685, "learning_rate": 2e-05, "loss": 5.3768, "step": 1747 }, { "epoch": 0.117248549485193, "grad_norm": 0.14276410168388082, "learning_rate": 2e-05, "loss": 5.4266, "step": 1748 }, { "epoch": 0.11731562531441794, "grad_norm": 0.14966506905791563, "learning_rate": 2e-05, "loss": 5.4444, "step": 1749 }, { "epoch": 0.1173827011436429, "grad_norm": 0.1570601019559566, "learning_rate": 2e-05, "loss": 5.6154, "step": 1750 }, { "epoch": 0.11744977697286783, "grad_norm": 0.14403242458640272, "learning_rate": 2e-05, "loss": 5.5484, "step": 1751 }, { "epoch": 0.11751685280209277, "grad_norm": 0.15491311043837644, "learning_rate": 2e-05, "loss": 5.5698, "step": 1752 }, { "epoch": 0.11758392863131771, "grad_norm": 0.15027091376162985, "learning_rate": 2e-05, "loss": 5.4104, "step": 1753 }, { "epoch": 0.11765100446054265, "grad_norm": 0.14855468085812534, "learning_rate": 2e-05, "loss": 5.4722, "step": 1754 }, { "epoch": 0.11771808028976759, "grad_norm": 0.14665593020355294, "learning_rate": 2e-05, "loss": 5.3527, "step": 1755 }, { "epoch": 0.11778515611899253, "grad_norm": 0.14758459949186548, "learning_rate": 2e-05, "loss": 5.4116, "step": 1756 }, { "epoch": 0.11785223194821746, "grad_norm": 0.14748622661312627, "learning_rate": 2e-05, "loss": 5.3467, "step": 1757 }, { "epoch": 0.1179193077774424, "grad_norm": 0.1513088952598225, "learning_rate": 2e-05, "loss": 5.5316, "step": 1758 }, { "epoch": 0.11798638360666734, "grad_norm": 0.1410334433970815, "learning_rate": 2e-05, "loss": 5.3608, "step": 1759 }, { "epoch": 0.11805345943589228, "grad_norm": 0.14319378127285576, "learning_rate": 2e-05, "loss": 5.5108, "step": 1760 }, { "epoch": 0.11812053526511722, "grad_norm": 0.1482146029818278, "learning_rate": 2e-05, "loss": 5.5355, "step": 1761 }, { "epoch": 0.11818761109434216, "grad_norm": 0.15346423024042347, "learning_rate": 2e-05, "loss": 5.4403, "step": 1762 }, { "epoch": 0.1182546869235671, "grad_norm": 0.1418742754596509, "learning_rate": 2e-05, "loss": 5.4136, "step": 1763 }, { "epoch": 0.11832176275279203, "grad_norm": 0.1469556641440429, "learning_rate": 2e-05, "loss": 5.5213, "step": 1764 }, { "epoch": 0.11838883858201697, "grad_norm": 0.15353016253842008, "learning_rate": 2e-05, "loss": 5.4088, "step": 1765 }, { "epoch": 0.11845591441124191, "grad_norm": 0.14449141948659278, "learning_rate": 2e-05, "loss": 5.4051, "step": 1766 }, { "epoch": 0.11852299024046685, "grad_norm": 0.14507206827471877, "learning_rate": 2e-05, "loss": 5.4058, "step": 1767 }, { "epoch": 0.11859006606969179, "grad_norm": 0.15068196251027213, "learning_rate": 2e-05, "loss": 5.6398, "step": 1768 }, { "epoch": 0.11865714189891673, "grad_norm": 0.1477465627195587, "learning_rate": 2e-05, "loss": 5.5693, "step": 1769 }, { "epoch": 0.11872421772814166, "grad_norm": 0.14848756675269678, "learning_rate": 2e-05, "loss": 5.4161, "step": 1770 }, { "epoch": 0.1187912935573666, "grad_norm": 0.14899502324690714, "learning_rate": 2e-05, "loss": 5.5583, "step": 1771 }, { "epoch": 0.11885836938659154, "grad_norm": 0.15169170494286424, "learning_rate": 2e-05, "loss": 5.4646, "step": 1772 }, { "epoch": 0.11892544521581648, "grad_norm": 0.14124669424193217, "learning_rate": 2e-05, "loss": 5.5158, "step": 1773 }, { "epoch": 0.11899252104504142, "grad_norm": 0.15739325085314027, "learning_rate": 2e-05, "loss": 5.4314, "step": 1774 }, { "epoch": 0.11905959687426636, "grad_norm": 0.15627831597940647, "learning_rate": 2e-05, "loss": 5.5581, "step": 1775 }, { "epoch": 0.1191266727034913, "grad_norm": 0.1521970702616037, "learning_rate": 2e-05, "loss": 5.3976, "step": 1776 }, { "epoch": 0.11919374853271623, "grad_norm": 0.1455824628738951, "learning_rate": 2e-05, "loss": 5.3878, "step": 1777 }, { "epoch": 0.11926082436194117, "grad_norm": 0.15565406336028872, "learning_rate": 2e-05, "loss": 5.6286, "step": 1778 }, { "epoch": 0.11932790019116611, "grad_norm": 0.1505384288164595, "learning_rate": 2e-05, "loss": 5.3285, "step": 1779 }, { "epoch": 0.11939497602039105, "grad_norm": 0.13975783166486241, "learning_rate": 2e-05, "loss": 5.4006, "step": 1780 }, { "epoch": 0.11946205184961599, "grad_norm": 0.14950799239837476, "learning_rate": 2e-05, "loss": 5.4227, "step": 1781 }, { "epoch": 0.11952912767884093, "grad_norm": 0.1426771400723545, "learning_rate": 2e-05, "loss": 5.4882, "step": 1782 }, { "epoch": 0.11959620350806587, "grad_norm": 0.13996186209425482, "learning_rate": 2e-05, "loss": 5.5467, "step": 1783 }, { "epoch": 0.1196632793372908, "grad_norm": 0.1485821056601315, "learning_rate": 2e-05, "loss": 5.6052, "step": 1784 }, { "epoch": 0.11973035516651574, "grad_norm": 0.14619350655453636, "learning_rate": 2e-05, "loss": 5.5082, "step": 1785 }, { "epoch": 0.11979743099574068, "grad_norm": 0.15114660555487425, "learning_rate": 2e-05, "loss": 5.4602, "step": 1786 }, { "epoch": 0.11986450682496562, "grad_norm": 0.14429797285235818, "learning_rate": 2e-05, "loss": 5.3817, "step": 1787 }, { "epoch": 0.11993158265419056, "grad_norm": 0.14385447232376583, "learning_rate": 2e-05, "loss": 5.4046, "step": 1788 }, { "epoch": 0.1199986584834155, "grad_norm": 0.1449642799706084, "learning_rate": 2e-05, "loss": 5.4803, "step": 1789 }, { "epoch": 0.12006573431264043, "grad_norm": 0.14700692297466353, "learning_rate": 2e-05, "loss": 5.4412, "step": 1790 }, { "epoch": 0.12013281014186537, "grad_norm": 0.14104283101532653, "learning_rate": 2e-05, "loss": 5.3852, "step": 1791 }, { "epoch": 0.12019988597109031, "grad_norm": 0.14367192373260235, "learning_rate": 2e-05, "loss": 5.5578, "step": 1792 }, { "epoch": 0.12026696180031525, "grad_norm": 0.14372272432025077, "learning_rate": 2e-05, "loss": 5.5144, "step": 1793 }, { "epoch": 0.12033403762954019, "grad_norm": 0.14473254828566526, "learning_rate": 2e-05, "loss": 5.517, "step": 1794 }, { "epoch": 0.12040111345876513, "grad_norm": 0.14677646426660745, "learning_rate": 2e-05, "loss": 5.5758, "step": 1795 }, { "epoch": 0.12046818928799008, "grad_norm": 0.14434640604466636, "learning_rate": 2e-05, "loss": 5.4673, "step": 1796 }, { "epoch": 0.12053526511721502, "grad_norm": 0.13746608510826885, "learning_rate": 2e-05, "loss": 5.5242, "step": 1797 }, { "epoch": 0.12060234094643996, "grad_norm": 0.13860857264334797, "learning_rate": 2e-05, "loss": 5.543, "step": 1798 }, { "epoch": 0.1206694167756649, "grad_norm": 0.14167408581935662, "learning_rate": 2e-05, "loss": 5.4349, "step": 1799 }, { "epoch": 0.12073649260488983, "grad_norm": 0.14362706352981583, "learning_rate": 2e-05, "loss": 5.4523, "step": 1800 }, { "epoch": 0.12080356843411477, "grad_norm": 0.14019906744931399, "learning_rate": 2e-05, "loss": 5.3003, "step": 1801 }, { "epoch": 0.12087064426333971, "grad_norm": 0.14997627086751703, "learning_rate": 2e-05, "loss": 5.4395, "step": 1802 }, { "epoch": 0.12093772009256465, "grad_norm": 0.1411805325612246, "learning_rate": 2e-05, "loss": 5.4087, "step": 1803 }, { "epoch": 0.12100479592178959, "grad_norm": 0.14429349603104452, "learning_rate": 2e-05, "loss": 5.5002, "step": 1804 }, { "epoch": 0.12107187175101453, "grad_norm": 0.14134480976193212, "learning_rate": 2e-05, "loss": 5.4201, "step": 1805 }, { "epoch": 0.12113894758023946, "grad_norm": 0.14400373537430505, "learning_rate": 2e-05, "loss": 5.4729, "step": 1806 }, { "epoch": 0.1212060234094644, "grad_norm": 0.13692019638223304, "learning_rate": 2e-05, "loss": 5.5117, "step": 1807 }, { "epoch": 0.12127309923868934, "grad_norm": 0.14467803600626575, "learning_rate": 2e-05, "loss": 5.4546, "step": 1808 }, { "epoch": 0.12134017506791428, "grad_norm": 0.14163202456653476, "learning_rate": 2e-05, "loss": 5.28, "step": 1809 }, { "epoch": 0.12140725089713922, "grad_norm": 0.14784062979483414, "learning_rate": 2e-05, "loss": 5.4748, "step": 1810 }, { "epoch": 0.12147432672636416, "grad_norm": 0.15147882453855518, "learning_rate": 2e-05, "loss": 5.3962, "step": 1811 }, { "epoch": 0.1215414025555891, "grad_norm": 0.1457760362401531, "learning_rate": 2e-05, "loss": 5.4332, "step": 1812 }, { "epoch": 0.12160847838481403, "grad_norm": 0.15215096305764989, "learning_rate": 2e-05, "loss": 5.4669, "step": 1813 }, { "epoch": 0.12167555421403897, "grad_norm": 0.146954077302628, "learning_rate": 2e-05, "loss": 5.4683, "step": 1814 }, { "epoch": 0.12174263004326391, "grad_norm": 0.1473960805957019, "learning_rate": 2e-05, "loss": 5.5006, "step": 1815 }, { "epoch": 0.12180970587248885, "grad_norm": 0.14899294016149073, "learning_rate": 2e-05, "loss": 5.4919, "step": 1816 }, { "epoch": 0.12187678170171379, "grad_norm": 0.15536722103927272, "learning_rate": 2e-05, "loss": 5.4115, "step": 1817 }, { "epoch": 0.12194385753093873, "grad_norm": 0.1457999949778778, "learning_rate": 2e-05, "loss": 5.415, "step": 1818 }, { "epoch": 0.12201093336016366, "grad_norm": 0.16759430394248373, "learning_rate": 2e-05, "loss": 5.4224, "step": 1819 }, { "epoch": 0.1220780091893886, "grad_norm": 0.14509672532067855, "learning_rate": 2e-05, "loss": 5.3685, "step": 1820 }, { "epoch": 0.12214508501861354, "grad_norm": 0.14006795050158746, "learning_rate": 2e-05, "loss": 5.5244, "step": 1821 }, { "epoch": 0.12221216084783848, "grad_norm": 0.14884161729066828, "learning_rate": 2e-05, "loss": 5.521, "step": 1822 }, { "epoch": 0.12227923667706342, "grad_norm": 0.1660048239175091, "learning_rate": 2e-05, "loss": 5.5241, "step": 1823 }, { "epoch": 0.12234631250628836, "grad_norm": 0.1451615853965432, "learning_rate": 2e-05, "loss": 5.2139, "step": 1824 }, { "epoch": 0.1224133883355133, "grad_norm": 0.14097635186848775, "learning_rate": 2e-05, "loss": 5.4372, "step": 1825 }, { "epoch": 0.12248046416473823, "grad_norm": 0.14200340832336866, "learning_rate": 2e-05, "loss": 5.3357, "step": 1826 }, { "epoch": 0.12254753999396317, "grad_norm": 0.14119726168484806, "learning_rate": 2e-05, "loss": 5.3946, "step": 1827 }, { "epoch": 0.12261461582318811, "grad_norm": 0.14972409064749148, "learning_rate": 2e-05, "loss": 5.3873, "step": 1828 }, { "epoch": 0.12268169165241305, "grad_norm": 0.1444109028534444, "learning_rate": 2e-05, "loss": 5.4474, "step": 1829 }, { "epoch": 0.12274876748163799, "grad_norm": 0.14667671317738692, "learning_rate": 2e-05, "loss": 5.4004, "step": 1830 }, { "epoch": 0.12281584331086293, "grad_norm": 0.15319668421207489, "learning_rate": 2e-05, "loss": 5.4554, "step": 1831 }, { "epoch": 0.12288291914008787, "grad_norm": 0.14859146718439736, "learning_rate": 2e-05, "loss": 5.4052, "step": 1832 }, { "epoch": 0.1229499949693128, "grad_norm": 0.15259370193275856, "learning_rate": 2e-05, "loss": 5.4208, "step": 1833 }, { "epoch": 0.12301707079853774, "grad_norm": 0.16701384899575464, "learning_rate": 2e-05, "loss": 5.3841, "step": 1834 }, { "epoch": 0.12308414662776268, "grad_norm": 0.15466878829413863, "learning_rate": 2e-05, "loss": 5.4822, "step": 1835 }, { "epoch": 0.12315122245698762, "grad_norm": 0.1412058327952156, "learning_rate": 2e-05, "loss": 5.4583, "step": 1836 }, { "epoch": 0.12321829828621256, "grad_norm": 0.1596037769834483, "learning_rate": 2e-05, "loss": 5.4654, "step": 1837 }, { "epoch": 0.1232853741154375, "grad_norm": 0.14664410190725172, "learning_rate": 2e-05, "loss": 5.3442, "step": 1838 }, { "epoch": 0.12335244994466243, "grad_norm": 0.15262053663577485, "learning_rate": 2e-05, "loss": 5.3854, "step": 1839 }, { "epoch": 0.12341952577388737, "grad_norm": 0.14092589111297743, "learning_rate": 2e-05, "loss": 5.5207, "step": 1840 }, { "epoch": 0.12348660160311233, "grad_norm": 0.15231160188741388, "learning_rate": 2e-05, "loss": 5.4059, "step": 1841 }, { "epoch": 0.12355367743233726, "grad_norm": 0.14521952531422674, "learning_rate": 2e-05, "loss": 5.4591, "step": 1842 }, { "epoch": 0.1236207532615622, "grad_norm": 0.1509501358463259, "learning_rate": 2e-05, "loss": 5.5053, "step": 1843 }, { "epoch": 0.12368782909078714, "grad_norm": 0.14549316231166712, "learning_rate": 2e-05, "loss": 5.3897, "step": 1844 }, { "epoch": 0.12375490492001208, "grad_norm": 0.14614218362454823, "learning_rate": 2e-05, "loss": 5.4183, "step": 1845 }, { "epoch": 0.12382198074923702, "grad_norm": 0.14321150185029133, "learning_rate": 2e-05, "loss": 5.2831, "step": 1846 }, { "epoch": 0.12388905657846196, "grad_norm": 0.1401953172109717, "learning_rate": 2e-05, "loss": 5.5607, "step": 1847 }, { "epoch": 0.1239561324076869, "grad_norm": 0.15283473959572416, "learning_rate": 2e-05, "loss": 5.5246, "step": 1848 }, { "epoch": 0.12402320823691183, "grad_norm": 0.14542488368869816, "learning_rate": 2e-05, "loss": 5.4721, "step": 1849 }, { "epoch": 0.12409028406613677, "grad_norm": 0.142788269645899, "learning_rate": 2e-05, "loss": 5.3877, "step": 1850 }, { "epoch": 0.12415735989536171, "grad_norm": 0.1596319510109966, "learning_rate": 2e-05, "loss": 5.3418, "step": 1851 }, { "epoch": 0.12422443572458665, "grad_norm": 0.14236119882169496, "learning_rate": 2e-05, "loss": 5.5382, "step": 1852 }, { "epoch": 0.12429151155381159, "grad_norm": 0.1450573742205188, "learning_rate": 2e-05, "loss": 5.4858, "step": 1853 }, { "epoch": 0.12435858738303653, "grad_norm": 0.1439699942350802, "learning_rate": 2e-05, "loss": 5.3036, "step": 1854 }, { "epoch": 0.12442566321226146, "grad_norm": 0.14623640847729258, "learning_rate": 2e-05, "loss": 5.4425, "step": 1855 }, { "epoch": 0.1244927390414864, "grad_norm": 0.14485557228166146, "learning_rate": 2e-05, "loss": 5.4867, "step": 1856 }, { "epoch": 0.12455981487071134, "grad_norm": 0.13982738752351312, "learning_rate": 2e-05, "loss": 5.4371, "step": 1857 }, { "epoch": 0.12462689069993628, "grad_norm": 0.14719835269097867, "learning_rate": 2e-05, "loss": 5.4933, "step": 1858 }, { "epoch": 0.12469396652916122, "grad_norm": 0.14078426163277716, "learning_rate": 2e-05, "loss": 5.4916, "step": 1859 }, { "epoch": 0.12476104235838616, "grad_norm": 0.14297307890878072, "learning_rate": 2e-05, "loss": 5.3307, "step": 1860 }, { "epoch": 0.1248281181876111, "grad_norm": 0.139571070028144, "learning_rate": 2e-05, "loss": 5.336, "step": 1861 }, { "epoch": 0.12489519401683603, "grad_norm": 0.14754626481044328, "learning_rate": 2e-05, "loss": 5.5069, "step": 1862 }, { "epoch": 0.12496226984606097, "grad_norm": 0.1476192120179093, "learning_rate": 2e-05, "loss": 5.4071, "step": 1863 }, { "epoch": 0.12502934567528592, "grad_norm": 0.13944848725842776, "learning_rate": 2e-05, "loss": 5.4003, "step": 1864 }, { "epoch": 0.12509642150451086, "grad_norm": 0.13628749788420894, "learning_rate": 2e-05, "loss": 5.5264, "step": 1865 }, { "epoch": 0.1251634973337358, "grad_norm": 0.1406361742064489, "learning_rate": 2e-05, "loss": 5.3506, "step": 1866 }, { "epoch": 0.12523057316296074, "grad_norm": 0.15379868404412883, "learning_rate": 2e-05, "loss": 5.5913, "step": 1867 }, { "epoch": 0.12529764899218568, "grad_norm": 0.14041029202813018, "learning_rate": 2e-05, "loss": 5.5097, "step": 1868 }, { "epoch": 0.12536472482141062, "grad_norm": 0.1375451448225345, "learning_rate": 2e-05, "loss": 5.4259, "step": 1869 }, { "epoch": 0.12543180065063556, "grad_norm": 0.14495931203673493, "learning_rate": 2e-05, "loss": 5.3572, "step": 1870 }, { "epoch": 0.1254988764798605, "grad_norm": 0.13947340564711863, "learning_rate": 2e-05, "loss": 5.6391, "step": 1871 }, { "epoch": 0.12556595230908543, "grad_norm": 0.14001171728829823, "learning_rate": 2e-05, "loss": 5.4373, "step": 1872 }, { "epoch": 0.12563302813831037, "grad_norm": 0.13996946780171685, "learning_rate": 2e-05, "loss": 5.4768, "step": 1873 }, { "epoch": 0.1257001039675353, "grad_norm": 0.15940514558606575, "learning_rate": 2e-05, "loss": 5.5038, "step": 1874 }, { "epoch": 0.12576717979676025, "grad_norm": 0.13998073533499764, "learning_rate": 2e-05, "loss": 5.3869, "step": 1875 }, { "epoch": 0.1258342556259852, "grad_norm": 0.14507551405040323, "learning_rate": 2e-05, "loss": 5.5977, "step": 1876 }, { "epoch": 0.12590133145521012, "grad_norm": 0.14912971550955875, "learning_rate": 2e-05, "loss": 5.5175, "step": 1877 }, { "epoch": 0.12596840728443506, "grad_norm": 0.1436517082454124, "learning_rate": 2e-05, "loss": 5.4746, "step": 1878 }, { "epoch": 0.12603548311366, "grad_norm": 0.14712974202744067, "learning_rate": 2e-05, "loss": 5.4649, "step": 1879 }, { "epoch": 0.12610255894288494, "grad_norm": 0.14958263073342865, "learning_rate": 2e-05, "loss": 5.5377, "step": 1880 }, { "epoch": 0.12616963477210988, "grad_norm": 0.13893508607020036, "learning_rate": 2e-05, "loss": 5.5317, "step": 1881 }, { "epoch": 0.12623671060133482, "grad_norm": 0.14249023315739473, "learning_rate": 2e-05, "loss": 5.5115, "step": 1882 }, { "epoch": 0.12630378643055976, "grad_norm": 0.1390799480003924, "learning_rate": 2e-05, "loss": 5.4715, "step": 1883 }, { "epoch": 0.1263708622597847, "grad_norm": 0.13853824550256427, "learning_rate": 2e-05, "loss": 5.7263, "step": 1884 }, { "epoch": 0.12643793808900963, "grad_norm": 0.1429390863825981, "learning_rate": 2e-05, "loss": 5.4034, "step": 1885 }, { "epoch": 0.12650501391823457, "grad_norm": 0.1358428277225231, "learning_rate": 2e-05, "loss": 5.4367, "step": 1886 }, { "epoch": 0.1265720897474595, "grad_norm": 0.13757380572600764, "learning_rate": 2e-05, "loss": 5.5579, "step": 1887 }, { "epoch": 0.12663916557668445, "grad_norm": 0.13883640479380413, "learning_rate": 2e-05, "loss": 5.5154, "step": 1888 }, { "epoch": 0.1267062414059094, "grad_norm": 0.14680222118438638, "learning_rate": 2e-05, "loss": 5.5166, "step": 1889 }, { "epoch": 0.12677331723513433, "grad_norm": 0.14522425770104957, "learning_rate": 2e-05, "loss": 5.4997, "step": 1890 }, { "epoch": 0.12684039306435926, "grad_norm": 0.1410063239950081, "learning_rate": 2e-05, "loss": 5.4814, "step": 1891 }, { "epoch": 0.1269074688935842, "grad_norm": 0.14273449715245337, "learning_rate": 2e-05, "loss": 5.3577, "step": 1892 }, { "epoch": 0.12697454472280914, "grad_norm": 0.14514641394216807, "learning_rate": 2e-05, "loss": 5.5355, "step": 1893 }, { "epoch": 0.12704162055203408, "grad_norm": 0.15444539024524154, "learning_rate": 2e-05, "loss": 5.4365, "step": 1894 }, { "epoch": 0.12710869638125902, "grad_norm": 0.1418333521815973, "learning_rate": 2e-05, "loss": 5.3972, "step": 1895 }, { "epoch": 0.12717577221048396, "grad_norm": 0.14020006919043362, "learning_rate": 2e-05, "loss": 5.4056, "step": 1896 }, { "epoch": 0.1272428480397089, "grad_norm": 0.14581041269686396, "learning_rate": 2e-05, "loss": 5.4439, "step": 1897 }, { "epoch": 0.12730992386893383, "grad_norm": 0.1494569391065688, "learning_rate": 2e-05, "loss": 5.4719, "step": 1898 }, { "epoch": 0.12737699969815877, "grad_norm": 0.14118852772518073, "learning_rate": 2e-05, "loss": 5.291, "step": 1899 }, { "epoch": 0.1274440755273837, "grad_norm": 0.1516878162001335, "learning_rate": 2e-05, "loss": 5.4671, "step": 1900 }, { "epoch": 0.12751115135660865, "grad_norm": 0.14412963651074975, "learning_rate": 2e-05, "loss": 5.4391, "step": 1901 }, { "epoch": 0.1275782271858336, "grad_norm": 0.15173290793838062, "learning_rate": 2e-05, "loss": 5.4234, "step": 1902 }, { "epoch": 0.12764530301505853, "grad_norm": 0.15498519094169122, "learning_rate": 2e-05, "loss": 5.6382, "step": 1903 }, { "epoch": 0.12771237884428346, "grad_norm": 0.14258458657910297, "learning_rate": 2e-05, "loss": 5.4708, "step": 1904 }, { "epoch": 0.1277794546735084, "grad_norm": 0.14485376639806907, "learning_rate": 2e-05, "loss": 5.6236, "step": 1905 }, { "epoch": 0.12784653050273334, "grad_norm": 0.1396741327894716, "learning_rate": 2e-05, "loss": 5.3889, "step": 1906 }, { "epoch": 0.12791360633195828, "grad_norm": 0.14389001065350449, "learning_rate": 2e-05, "loss": 5.5392, "step": 1907 }, { "epoch": 0.12798068216118322, "grad_norm": 0.1397066841512997, "learning_rate": 2e-05, "loss": 5.4658, "step": 1908 }, { "epoch": 0.12804775799040816, "grad_norm": 0.14517243869301452, "learning_rate": 2e-05, "loss": 5.4767, "step": 1909 }, { "epoch": 0.1281148338196331, "grad_norm": 0.1388457872976633, "learning_rate": 2e-05, "loss": 5.5761, "step": 1910 }, { "epoch": 0.12818190964885803, "grad_norm": 0.14866331702902877, "learning_rate": 2e-05, "loss": 5.5116, "step": 1911 }, { "epoch": 0.12824898547808297, "grad_norm": 0.14075411505799154, "learning_rate": 2e-05, "loss": 5.4761, "step": 1912 }, { "epoch": 0.1283160613073079, "grad_norm": 0.1432050551367564, "learning_rate": 2e-05, "loss": 5.5468, "step": 1913 }, { "epoch": 0.12838313713653285, "grad_norm": 0.13859470417222766, "learning_rate": 2e-05, "loss": 5.5306, "step": 1914 }, { "epoch": 0.1284502129657578, "grad_norm": 0.14500776706881147, "learning_rate": 2e-05, "loss": 5.3401, "step": 1915 }, { "epoch": 0.12851728879498273, "grad_norm": 0.1418737488216362, "learning_rate": 2e-05, "loss": 5.5752, "step": 1916 }, { "epoch": 0.12858436462420766, "grad_norm": 0.137981971877405, "learning_rate": 2e-05, "loss": 5.5088, "step": 1917 }, { "epoch": 0.1286514404534326, "grad_norm": 0.1462245767545126, "learning_rate": 2e-05, "loss": 5.4777, "step": 1918 }, { "epoch": 0.12871851628265754, "grad_norm": 0.1356981712344521, "learning_rate": 2e-05, "loss": 5.5119, "step": 1919 }, { "epoch": 0.12878559211188248, "grad_norm": 0.14204418232973992, "learning_rate": 2e-05, "loss": 5.6162, "step": 1920 }, { "epoch": 0.12885266794110742, "grad_norm": 0.15423285890345337, "learning_rate": 2e-05, "loss": 5.3968, "step": 1921 }, { "epoch": 0.12891974377033236, "grad_norm": 0.1424266148329285, "learning_rate": 2e-05, "loss": 5.4853, "step": 1922 }, { "epoch": 0.1289868195995573, "grad_norm": 0.1448449818573184, "learning_rate": 2e-05, "loss": 5.478, "step": 1923 }, { "epoch": 0.12905389542878223, "grad_norm": 0.14961858589307625, "learning_rate": 2e-05, "loss": 5.5355, "step": 1924 }, { "epoch": 0.12912097125800717, "grad_norm": 0.14587724104963767, "learning_rate": 2e-05, "loss": 5.3631, "step": 1925 }, { "epoch": 0.1291880470872321, "grad_norm": 0.1413589834181679, "learning_rate": 2e-05, "loss": 5.4963, "step": 1926 }, { "epoch": 0.12925512291645705, "grad_norm": 0.14994918456690476, "learning_rate": 2e-05, "loss": 5.639, "step": 1927 }, { "epoch": 0.129322198745682, "grad_norm": 0.1514793497508335, "learning_rate": 2e-05, "loss": 5.44, "step": 1928 }, { "epoch": 0.12938927457490693, "grad_norm": 0.13849240355975054, "learning_rate": 2e-05, "loss": 5.4278, "step": 1929 }, { "epoch": 0.12945635040413186, "grad_norm": 0.14574276114739315, "learning_rate": 2e-05, "loss": 5.3476, "step": 1930 }, { "epoch": 0.1295234262333568, "grad_norm": 0.14827970077252967, "learning_rate": 2e-05, "loss": 5.3418, "step": 1931 }, { "epoch": 0.12959050206258174, "grad_norm": 0.1504019455227756, "learning_rate": 2e-05, "loss": 5.4283, "step": 1932 }, { "epoch": 0.12965757789180668, "grad_norm": 0.14784610859339736, "learning_rate": 2e-05, "loss": 5.5211, "step": 1933 }, { "epoch": 0.12972465372103162, "grad_norm": 0.14782922214675503, "learning_rate": 2e-05, "loss": 5.4848, "step": 1934 }, { "epoch": 0.12979172955025656, "grad_norm": 0.14866727881423464, "learning_rate": 2e-05, "loss": 5.5655, "step": 1935 }, { "epoch": 0.1298588053794815, "grad_norm": 0.14702017211930346, "learning_rate": 2e-05, "loss": 5.4899, "step": 1936 }, { "epoch": 0.12992588120870643, "grad_norm": 0.14490956006807051, "learning_rate": 2e-05, "loss": 5.4319, "step": 1937 }, { "epoch": 0.12999295703793137, "grad_norm": 0.14604952746485914, "learning_rate": 2e-05, "loss": 5.3937, "step": 1938 }, { "epoch": 0.1300600328671563, "grad_norm": 0.14661868018948063, "learning_rate": 2e-05, "loss": 5.5462, "step": 1939 }, { "epoch": 0.13012710869638125, "grad_norm": 0.1412940904409667, "learning_rate": 2e-05, "loss": 5.4726, "step": 1940 }, { "epoch": 0.1301941845256062, "grad_norm": 0.1399941198613891, "learning_rate": 2e-05, "loss": 5.4317, "step": 1941 }, { "epoch": 0.13026126035483113, "grad_norm": 0.14209111357235482, "learning_rate": 2e-05, "loss": 5.413, "step": 1942 }, { "epoch": 0.13032833618405607, "grad_norm": 0.14345695700888858, "learning_rate": 2e-05, "loss": 5.4637, "step": 1943 }, { "epoch": 0.130395412013281, "grad_norm": 0.140390032319596, "learning_rate": 2e-05, "loss": 5.4833, "step": 1944 }, { "epoch": 0.13046248784250594, "grad_norm": 0.14957044459065189, "learning_rate": 2e-05, "loss": 5.4601, "step": 1945 }, { "epoch": 0.13052956367173088, "grad_norm": 0.15047774100179956, "learning_rate": 2e-05, "loss": 5.4188, "step": 1946 }, { "epoch": 0.13059663950095582, "grad_norm": 0.13661743945238827, "learning_rate": 2e-05, "loss": 5.4341, "step": 1947 }, { "epoch": 0.13066371533018076, "grad_norm": 0.1479856596852377, "learning_rate": 2e-05, "loss": 5.4485, "step": 1948 }, { "epoch": 0.1307307911594057, "grad_norm": 0.14849125874761002, "learning_rate": 2e-05, "loss": 5.3778, "step": 1949 }, { "epoch": 0.13079786698863063, "grad_norm": 0.138677944949231, "learning_rate": 2e-05, "loss": 5.5215, "step": 1950 }, { "epoch": 0.13086494281785557, "grad_norm": 0.14113947834098162, "learning_rate": 2e-05, "loss": 5.401, "step": 1951 }, { "epoch": 0.1309320186470805, "grad_norm": 0.1394489516464427, "learning_rate": 2e-05, "loss": 5.5037, "step": 1952 }, { "epoch": 0.13099909447630545, "grad_norm": 0.14306128135125692, "learning_rate": 2e-05, "loss": 5.4163, "step": 1953 }, { "epoch": 0.1310661703055304, "grad_norm": 0.13735841387593792, "learning_rate": 2e-05, "loss": 5.2974, "step": 1954 }, { "epoch": 0.13113324613475535, "grad_norm": 0.13723900171513598, "learning_rate": 2e-05, "loss": 5.5029, "step": 1955 }, { "epoch": 0.1312003219639803, "grad_norm": 0.1465056126284347, "learning_rate": 2e-05, "loss": 5.5137, "step": 1956 }, { "epoch": 0.13126739779320523, "grad_norm": 0.14029783326024664, "learning_rate": 2e-05, "loss": 5.7241, "step": 1957 }, { "epoch": 0.13133447362243017, "grad_norm": 0.13784125081570311, "learning_rate": 2e-05, "loss": 5.3664, "step": 1958 }, { "epoch": 0.1314015494516551, "grad_norm": 0.1409874070215786, "learning_rate": 2e-05, "loss": 5.4472, "step": 1959 }, { "epoch": 0.13146862528088005, "grad_norm": 0.14270340617250896, "learning_rate": 2e-05, "loss": 5.5299, "step": 1960 }, { "epoch": 0.13153570111010499, "grad_norm": 0.14912783971889138, "learning_rate": 2e-05, "loss": 5.4865, "step": 1961 }, { "epoch": 0.13160277693932992, "grad_norm": 0.1422516542171241, "learning_rate": 2e-05, "loss": 5.4486, "step": 1962 }, { "epoch": 0.13166985276855486, "grad_norm": 0.1453387064797123, "learning_rate": 2e-05, "loss": 5.3643, "step": 1963 }, { "epoch": 0.1317369285977798, "grad_norm": 0.14949059271812726, "learning_rate": 2e-05, "loss": 5.6873, "step": 1964 }, { "epoch": 0.13180400442700474, "grad_norm": 0.1404432059411551, "learning_rate": 2e-05, "loss": 5.5055, "step": 1965 }, { "epoch": 0.13187108025622968, "grad_norm": 0.1432290805989564, "learning_rate": 2e-05, "loss": 5.4086, "step": 1966 }, { "epoch": 0.13193815608545462, "grad_norm": 0.14123093013397076, "learning_rate": 2e-05, "loss": 5.4156, "step": 1967 }, { "epoch": 0.13200523191467955, "grad_norm": 0.14214029368687892, "learning_rate": 2e-05, "loss": 5.3892, "step": 1968 }, { "epoch": 0.1320723077439045, "grad_norm": 0.135519582306652, "learning_rate": 2e-05, "loss": 5.4769, "step": 1969 }, { "epoch": 0.13213938357312943, "grad_norm": 0.14237597309070363, "learning_rate": 2e-05, "loss": 5.5432, "step": 1970 }, { "epoch": 0.13220645940235437, "grad_norm": 0.13881577946563872, "learning_rate": 2e-05, "loss": 5.4897, "step": 1971 }, { "epoch": 0.1322735352315793, "grad_norm": 0.13825644313865254, "learning_rate": 2e-05, "loss": 5.4958, "step": 1972 }, { "epoch": 0.13234061106080425, "grad_norm": 0.14815681523144525, "learning_rate": 2e-05, "loss": 5.449, "step": 1973 }, { "epoch": 0.13240768689002919, "grad_norm": 0.14751084474456727, "learning_rate": 2e-05, "loss": 5.4038, "step": 1974 }, { "epoch": 0.13247476271925412, "grad_norm": 0.1414761258267842, "learning_rate": 2e-05, "loss": 5.4305, "step": 1975 }, { "epoch": 0.13254183854847906, "grad_norm": 0.14716181431497516, "learning_rate": 2e-05, "loss": 5.448, "step": 1976 }, { "epoch": 0.132608914377704, "grad_norm": 0.13806666511571658, "learning_rate": 2e-05, "loss": 5.5455, "step": 1977 }, { "epoch": 0.13267599020692894, "grad_norm": 0.13922469720910405, "learning_rate": 2e-05, "loss": 5.4677, "step": 1978 }, { "epoch": 0.13274306603615388, "grad_norm": 0.14865891412056084, "learning_rate": 2e-05, "loss": 5.3761, "step": 1979 }, { "epoch": 0.13281014186537882, "grad_norm": 0.1424763076427973, "learning_rate": 2e-05, "loss": 5.474, "step": 1980 }, { "epoch": 0.13287721769460376, "grad_norm": 0.1454696590822448, "learning_rate": 2e-05, "loss": 5.4709, "step": 1981 }, { "epoch": 0.1329442935238287, "grad_norm": 0.13865041053389177, "learning_rate": 2e-05, "loss": 5.3813, "step": 1982 }, { "epoch": 0.13301136935305363, "grad_norm": 0.14551415534948073, "learning_rate": 2e-05, "loss": 5.5256, "step": 1983 }, { "epoch": 0.13307844518227857, "grad_norm": 0.1474735984356603, "learning_rate": 2e-05, "loss": 5.4556, "step": 1984 }, { "epoch": 0.1331455210115035, "grad_norm": 0.14394084568015036, "learning_rate": 2e-05, "loss": 5.3924, "step": 1985 }, { "epoch": 0.13321259684072845, "grad_norm": 0.1461027510380752, "learning_rate": 2e-05, "loss": 5.4599, "step": 1986 }, { "epoch": 0.1332796726699534, "grad_norm": 0.1488483071654375, "learning_rate": 2e-05, "loss": 5.5081, "step": 1987 }, { "epoch": 0.13334674849917832, "grad_norm": 0.1396847864715195, "learning_rate": 2e-05, "loss": 5.369, "step": 1988 }, { "epoch": 0.13341382432840326, "grad_norm": 0.14002972632189548, "learning_rate": 2e-05, "loss": 5.329, "step": 1989 }, { "epoch": 0.1334809001576282, "grad_norm": 0.14073485541851943, "learning_rate": 2e-05, "loss": 5.4307, "step": 1990 }, { "epoch": 0.13354797598685314, "grad_norm": 0.14217330375588472, "learning_rate": 2e-05, "loss": 5.4872, "step": 1991 }, { "epoch": 0.13361505181607808, "grad_norm": 0.14052184462782014, "learning_rate": 2e-05, "loss": 5.4685, "step": 1992 }, { "epoch": 0.13368212764530302, "grad_norm": 0.14813376420323252, "learning_rate": 2e-05, "loss": 5.3759, "step": 1993 }, { "epoch": 0.13374920347452796, "grad_norm": 0.13646098275699603, "learning_rate": 2e-05, "loss": 5.6466, "step": 1994 }, { "epoch": 0.1338162793037529, "grad_norm": 0.13957628925526722, "learning_rate": 2e-05, "loss": 5.5453, "step": 1995 }, { "epoch": 0.13388335513297783, "grad_norm": 0.14932722857328756, "learning_rate": 2e-05, "loss": 5.3958, "step": 1996 }, { "epoch": 0.13395043096220277, "grad_norm": 0.15151667494292811, "learning_rate": 2e-05, "loss": 5.4424, "step": 1997 }, { "epoch": 0.1340175067914277, "grad_norm": 0.14609554326218485, "learning_rate": 2e-05, "loss": 5.3698, "step": 1998 }, { "epoch": 0.13408458262065265, "grad_norm": 0.14181228128269063, "learning_rate": 2e-05, "loss": 5.4951, "step": 1999 }, { "epoch": 0.1341516584498776, "grad_norm": 0.1494612490698301, "learning_rate": 2e-05, "loss": 5.5776, "step": 2000 }, { "epoch": 0.13421873427910253, "grad_norm": 0.1447508913663154, "learning_rate": 2e-05, "loss": 5.4347, "step": 2001 }, { "epoch": 0.13428581010832746, "grad_norm": 0.135354379403607, "learning_rate": 2e-05, "loss": 5.4453, "step": 2002 }, { "epoch": 0.1343528859375524, "grad_norm": 0.1374306260589667, "learning_rate": 2e-05, "loss": 5.4898, "step": 2003 }, { "epoch": 0.13441996176677734, "grad_norm": 0.13969995883588932, "learning_rate": 2e-05, "loss": 5.4929, "step": 2004 }, { "epoch": 0.13448703759600228, "grad_norm": 0.139586502232014, "learning_rate": 2e-05, "loss": 5.483, "step": 2005 }, { "epoch": 0.13455411342522722, "grad_norm": 0.1387304998321867, "learning_rate": 2e-05, "loss": 5.4568, "step": 2006 }, { "epoch": 0.13462118925445216, "grad_norm": 0.13693910305412965, "learning_rate": 2e-05, "loss": 5.4467, "step": 2007 }, { "epoch": 0.1346882650836771, "grad_norm": 0.1368695243524809, "learning_rate": 2e-05, "loss": 5.5192, "step": 2008 }, { "epoch": 0.13475534091290203, "grad_norm": 0.1438899579255941, "learning_rate": 2e-05, "loss": 5.3723, "step": 2009 }, { "epoch": 0.13482241674212697, "grad_norm": 0.14654282517301112, "learning_rate": 2e-05, "loss": 5.4737, "step": 2010 }, { "epoch": 0.1348894925713519, "grad_norm": 0.13720505402364921, "learning_rate": 2e-05, "loss": 5.5048, "step": 2011 }, { "epoch": 0.13495656840057685, "grad_norm": 0.1450592621289548, "learning_rate": 2e-05, "loss": 5.2893, "step": 2012 }, { "epoch": 0.1350236442298018, "grad_norm": 0.13857029719389655, "learning_rate": 2e-05, "loss": 5.3594, "step": 2013 }, { "epoch": 0.13509072005902673, "grad_norm": 0.147941972618289, "learning_rate": 2e-05, "loss": 5.3475, "step": 2014 }, { "epoch": 0.13515779588825166, "grad_norm": 0.14386229383674137, "learning_rate": 2e-05, "loss": 5.5088, "step": 2015 }, { "epoch": 0.1352248717174766, "grad_norm": 0.13832508383023687, "learning_rate": 2e-05, "loss": 5.4, "step": 2016 }, { "epoch": 0.13529194754670154, "grad_norm": 0.1410064007792989, "learning_rate": 2e-05, "loss": 5.4877, "step": 2017 }, { "epoch": 0.13535902337592648, "grad_norm": 0.1457767986959116, "learning_rate": 2e-05, "loss": 5.4947, "step": 2018 }, { "epoch": 0.13542609920515142, "grad_norm": 0.1432904923965809, "learning_rate": 2e-05, "loss": 5.5244, "step": 2019 }, { "epoch": 0.13549317503437636, "grad_norm": 0.13967503937241313, "learning_rate": 2e-05, "loss": 5.4017, "step": 2020 }, { "epoch": 0.1355602508636013, "grad_norm": 0.14837688305869343, "learning_rate": 2e-05, "loss": 5.529, "step": 2021 }, { "epoch": 0.13562732669282623, "grad_norm": 0.13737611932726357, "learning_rate": 2e-05, "loss": 5.3823, "step": 2022 }, { "epoch": 0.13569440252205117, "grad_norm": 0.14123374240617587, "learning_rate": 2e-05, "loss": 5.3857, "step": 2023 }, { "epoch": 0.1357614783512761, "grad_norm": 0.16900805642757474, "learning_rate": 2e-05, "loss": 5.4784, "step": 2024 }, { "epoch": 0.13582855418050105, "grad_norm": 0.14684777583518316, "learning_rate": 2e-05, "loss": 5.334, "step": 2025 }, { "epoch": 0.135895630009726, "grad_norm": 0.1446995972305585, "learning_rate": 2e-05, "loss": 5.4327, "step": 2026 }, { "epoch": 0.13596270583895093, "grad_norm": 0.14118202053839035, "learning_rate": 2e-05, "loss": 5.446, "step": 2027 }, { "epoch": 0.13602978166817586, "grad_norm": 0.14555215160068108, "learning_rate": 2e-05, "loss": 5.4693, "step": 2028 }, { "epoch": 0.1360968574974008, "grad_norm": 0.1401068715846395, "learning_rate": 2e-05, "loss": 5.3414, "step": 2029 }, { "epoch": 0.13616393332662574, "grad_norm": 0.14626878551584535, "learning_rate": 2e-05, "loss": 5.3572, "step": 2030 }, { "epoch": 0.13623100915585068, "grad_norm": 0.1423787095430719, "learning_rate": 2e-05, "loss": 5.3844, "step": 2031 }, { "epoch": 0.13629808498507562, "grad_norm": 0.14412293339296053, "learning_rate": 2e-05, "loss": 5.4014, "step": 2032 }, { "epoch": 0.13636516081430056, "grad_norm": 0.15175324440485932, "learning_rate": 2e-05, "loss": 5.389, "step": 2033 }, { "epoch": 0.1364322366435255, "grad_norm": 0.1397606777273193, "learning_rate": 2e-05, "loss": 5.4963, "step": 2034 }, { "epoch": 0.13649931247275043, "grad_norm": 0.14674533195408315, "learning_rate": 2e-05, "loss": 5.4667, "step": 2035 }, { "epoch": 0.13656638830197537, "grad_norm": 0.1447364283306658, "learning_rate": 2e-05, "loss": 5.2774, "step": 2036 }, { "epoch": 0.1366334641312003, "grad_norm": 0.14365806038075374, "learning_rate": 2e-05, "loss": 5.5085, "step": 2037 }, { "epoch": 0.13670053996042525, "grad_norm": 0.14616284778443692, "learning_rate": 2e-05, "loss": 5.4005, "step": 2038 }, { "epoch": 0.1367676157896502, "grad_norm": 0.149914301050013, "learning_rate": 2e-05, "loss": 5.5219, "step": 2039 }, { "epoch": 0.13683469161887513, "grad_norm": 0.15234679438903173, "learning_rate": 2e-05, "loss": 5.4839, "step": 2040 }, { "epoch": 0.13690176744810006, "grad_norm": 0.14143879390156558, "learning_rate": 2e-05, "loss": 5.3685, "step": 2041 }, { "epoch": 0.136968843277325, "grad_norm": 0.14990220753598701, "learning_rate": 2e-05, "loss": 5.503, "step": 2042 }, { "epoch": 0.13703591910654994, "grad_norm": 0.1449612956720609, "learning_rate": 2e-05, "loss": 5.5107, "step": 2043 }, { "epoch": 0.13710299493577488, "grad_norm": 0.16479745823575775, "learning_rate": 2e-05, "loss": 5.527, "step": 2044 }, { "epoch": 0.13717007076499982, "grad_norm": 0.1451770064850456, "learning_rate": 2e-05, "loss": 5.458, "step": 2045 }, { "epoch": 0.13723714659422478, "grad_norm": 0.14789898391209222, "learning_rate": 2e-05, "loss": 5.4245, "step": 2046 }, { "epoch": 0.13730422242344972, "grad_norm": 0.14693746529462, "learning_rate": 2e-05, "loss": 5.508, "step": 2047 }, { "epoch": 0.13737129825267466, "grad_norm": 0.14860532131671506, "learning_rate": 2e-05, "loss": 5.5726, "step": 2048 }, { "epoch": 0.1374383740818996, "grad_norm": 0.14814345311288496, "learning_rate": 2e-05, "loss": 5.4744, "step": 2049 }, { "epoch": 0.13750544991112454, "grad_norm": 0.14666424999576003, "learning_rate": 2e-05, "loss": 5.6521, "step": 2050 }, { "epoch": 0.13757252574034948, "grad_norm": 0.15168902537562473, "learning_rate": 2e-05, "loss": 5.4311, "step": 2051 }, { "epoch": 0.13763960156957442, "grad_norm": 0.14328150444215826, "learning_rate": 2e-05, "loss": 5.5479, "step": 2052 }, { "epoch": 0.13770667739879935, "grad_norm": 0.1425490431984674, "learning_rate": 2e-05, "loss": 5.612, "step": 2053 }, { "epoch": 0.1377737532280243, "grad_norm": 0.14956636970194537, "learning_rate": 2e-05, "loss": 5.5478, "step": 2054 }, { "epoch": 0.13784082905724923, "grad_norm": 0.14180943967009133, "learning_rate": 2e-05, "loss": 5.5183, "step": 2055 }, { "epoch": 0.13790790488647417, "grad_norm": 0.13570420702588037, "learning_rate": 2e-05, "loss": 5.5137, "step": 2056 }, { "epoch": 0.1379749807156991, "grad_norm": 0.1517526719084816, "learning_rate": 2e-05, "loss": 5.4302, "step": 2057 }, { "epoch": 0.13804205654492405, "grad_norm": 0.14640368761339154, "learning_rate": 2e-05, "loss": 5.4569, "step": 2058 }, { "epoch": 0.13810913237414899, "grad_norm": 0.1395307926984185, "learning_rate": 2e-05, "loss": 5.4405, "step": 2059 }, { "epoch": 0.13817620820337392, "grad_norm": 0.14198355180662323, "learning_rate": 2e-05, "loss": 5.4186, "step": 2060 }, { "epoch": 0.13824328403259886, "grad_norm": 0.1614154130269461, "learning_rate": 2e-05, "loss": 5.3232, "step": 2061 }, { "epoch": 0.1383103598618238, "grad_norm": 0.15129914459303087, "learning_rate": 2e-05, "loss": 5.5696, "step": 2062 }, { "epoch": 0.13837743569104874, "grad_norm": 0.13555279176585308, "learning_rate": 2e-05, "loss": 5.3998, "step": 2063 }, { "epoch": 0.13844451152027368, "grad_norm": 0.14959272535112905, "learning_rate": 2e-05, "loss": 5.4603, "step": 2064 }, { "epoch": 0.13851158734949862, "grad_norm": 0.1413531061216604, "learning_rate": 2e-05, "loss": 5.4323, "step": 2065 }, { "epoch": 0.13857866317872355, "grad_norm": 0.15615542950039502, "learning_rate": 2e-05, "loss": 5.4343, "step": 2066 }, { "epoch": 0.1386457390079485, "grad_norm": 0.14526522337447637, "learning_rate": 2e-05, "loss": 5.5183, "step": 2067 }, { "epoch": 0.13871281483717343, "grad_norm": 0.14011432675400964, "learning_rate": 2e-05, "loss": 5.4477, "step": 2068 }, { "epoch": 0.13877989066639837, "grad_norm": 0.1448029966268039, "learning_rate": 2e-05, "loss": 5.4613, "step": 2069 }, { "epoch": 0.1388469664956233, "grad_norm": 0.1497322643153707, "learning_rate": 2e-05, "loss": 5.2901, "step": 2070 }, { "epoch": 0.13891404232484825, "grad_norm": 0.14241235246061373, "learning_rate": 2e-05, "loss": 5.4274, "step": 2071 }, { "epoch": 0.13898111815407319, "grad_norm": 0.14007582098754184, "learning_rate": 2e-05, "loss": 5.2934, "step": 2072 }, { "epoch": 0.13904819398329812, "grad_norm": 0.1454765670991779, "learning_rate": 2e-05, "loss": 5.582, "step": 2073 }, { "epoch": 0.13911526981252306, "grad_norm": 0.14230513638145892, "learning_rate": 2e-05, "loss": 5.4706, "step": 2074 }, { "epoch": 0.139182345641748, "grad_norm": 0.14434933079648948, "learning_rate": 2e-05, "loss": 5.4723, "step": 2075 }, { "epoch": 0.13924942147097294, "grad_norm": 0.147421816105899, "learning_rate": 2e-05, "loss": 5.3296, "step": 2076 }, { "epoch": 0.13931649730019788, "grad_norm": 0.13653657684269718, "learning_rate": 2e-05, "loss": 5.516, "step": 2077 }, { "epoch": 0.13938357312942282, "grad_norm": 0.1374611697153059, "learning_rate": 2e-05, "loss": 5.506, "step": 2078 }, { "epoch": 0.13945064895864775, "grad_norm": 0.14054374810427406, "learning_rate": 2e-05, "loss": 5.3506, "step": 2079 }, { "epoch": 0.1395177247878727, "grad_norm": 0.14588591518712912, "learning_rate": 2e-05, "loss": 5.402, "step": 2080 }, { "epoch": 0.13958480061709763, "grad_norm": 0.14541747549107972, "learning_rate": 2e-05, "loss": 5.4473, "step": 2081 }, { "epoch": 0.13965187644632257, "grad_norm": 0.14735580456517158, "learning_rate": 2e-05, "loss": 5.4775, "step": 2082 }, { "epoch": 0.1397189522755475, "grad_norm": 0.14913060319894267, "learning_rate": 2e-05, "loss": 5.5262, "step": 2083 }, { "epoch": 0.13978602810477245, "grad_norm": 0.147982923460394, "learning_rate": 2e-05, "loss": 5.577, "step": 2084 }, { "epoch": 0.13985310393399739, "grad_norm": 0.14612828677525555, "learning_rate": 2e-05, "loss": 5.4546, "step": 2085 }, { "epoch": 0.13992017976322232, "grad_norm": 0.14380085827701397, "learning_rate": 2e-05, "loss": 5.3639, "step": 2086 }, { "epoch": 0.13998725559244726, "grad_norm": 0.14734830397347093, "learning_rate": 2e-05, "loss": 5.3922, "step": 2087 }, { "epoch": 0.1400543314216722, "grad_norm": 0.14435415307227997, "learning_rate": 2e-05, "loss": 5.566, "step": 2088 }, { "epoch": 0.14012140725089714, "grad_norm": 0.13674176147553868, "learning_rate": 2e-05, "loss": 5.5672, "step": 2089 }, { "epoch": 0.14018848308012208, "grad_norm": 0.1434648950600529, "learning_rate": 2e-05, "loss": 5.5865, "step": 2090 }, { "epoch": 0.14025555890934702, "grad_norm": 0.1433419822775209, "learning_rate": 2e-05, "loss": 5.4708, "step": 2091 }, { "epoch": 0.14032263473857196, "grad_norm": 0.145537044047533, "learning_rate": 2e-05, "loss": 5.3192, "step": 2092 }, { "epoch": 0.1403897105677969, "grad_norm": 0.14688936720125742, "learning_rate": 2e-05, "loss": 5.4323, "step": 2093 }, { "epoch": 0.14045678639702183, "grad_norm": 0.1516534831742794, "learning_rate": 2e-05, "loss": 5.5181, "step": 2094 }, { "epoch": 0.14052386222624677, "grad_norm": 0.14629229619602857, "learning_rate": 2e-05, "loss": 5.448, "step": 2095 }, { "epoch": 0.1405909380554717, "grad_norm": 0.1401692963256046, "learning_rate": 2e-05, "loss": 5.3799, "step": 2096 }, { "epoch": 0.14065801388469665, "grad_norm": 0.1539715245059927, "learning_rate": 2e-05, "loss": 5.534, "step": 2097 }, { "epoch": 0.1407250897139216, "grad_norm": 0.1415371240069618, "learning_rate": 2e-05, "loss": 5.4638, "step": 2098 }, { "epoch": 0.14079216554314652, "grad_norm": 0.14804672453359422, "learning_rate": 2e-05, "loss": 5.5047, "step": 2099 }, { "epoch": 0.14085924137237146, "grad_norm": 0.14262560952410744, "learning_rate": 2e-05, "loss": 5.4216, "step": 2100 }, { "epoch": 0.1409263172015964, "grad_norm": 0.14208317465400025, "learning_rate": 2e-05, "loss": 5.5134, "step": 2101 }, { "epoch": 0.14099339303082134, "grad_norm": 0.14946715975716882, "learning_rate": 2e-05, "loss": 5.3661, "step": 2102 }, { "epoch": 0.14106046886004628, "grad_norm": 0.14069190633653889, "learning_rate": 2e-05, "loss": 5.4868, "step": 2103 }, { "epoch": 0.14112754468927122, "grad_norm": 0.13571094887960813, "learning_rate": 2e-05, "loss": 5.3288, "step": 2104 }, { "epoch": 0.14119462051849616, "grad_norm": 0.14455144686027474, "learning_rate": 2e-05, "loss": 5.4554, "step": 2105 }, { "epoch": 0.1412616963477211, "grad_norm": 0.1459431448506834, "learning_rate": 2e-05, "loss": 5.3115, "step": 2106 }, { "epoch": 0.14132877217694603, "grad_norm": 0.1384018680219067, "learning_rate": 2e-05, "loss": 5.5212, "step": 2107 }, { "epoch": 0.14139584800617097, "grad_norm": 0.14976967076771533, "learning_rate": 2e-05, "loss": 5.3876, "step": 2108 }, { "epoch": 0.1414629238353959, "grad_norm": 0.14454852192919818, "learning_rate": 2e-05, "loss": 5.6078, "step": 2109 }, { "epoch": 0.14152999966462085, "grad_norm": 0.14163177259582685, "learning_rate": 2e-05, "loss": 5.4388, "step": 2110 }, { "epoch": 0.1415970754938458, "grad_norm": 0.14883238154839137, "learning_rate": 2e-05, "loss": 5.4799, "step": 2111 }, { "epoch": 0.14166415132307072, "grad_norm": 0.14911614115275804, "learning_rate": 2e-05, "loss": 5.5243, "step": 2112 }, { "epoch": 0.14173122715229566, "grad_norm": 0.13433864203980456, "learning_rate": 2e-05, "loss": 5.3935, "step": 2113 }, { "epoch": 0.1417983029815206, "grad_norm": 0.1465446417328623, "learning_rate": 2e-05, "loss": 5.4771, "step": 2114 }, { "epoch": 0.14186537881074554, "grad_norm": 0.14600045400918793, "learning_rate": 2e-05, "loss": 5.3702, "step": 2115 }, { "epoch": 0.14193245463997048, "grad_norm": 0.14406164889968967, "learning_rate": 2e-05, "loss": 5.4832, "step": 2116 }, { "epoch": 0.14199953046919542, "grad_norm": 0.14090250065905935, "learning_rate": 2e-05, "loss": 5.4051, "step": 2117 }, { "epoch": 0.14206660629842036, "grad_norm": 0.1430461520860423, "learning_rate": 2e-05, "loss": 5.4033, "step": 2118 }, { "epoch": 0.1421336821276453, "grad_norm": 0.14040505064416364, "learning_rate": 2e-05, "loss": 5.5316, "step": 2119 }, { "epoch": 0.14220075795687023, "grad_norm": 0.13844218925268514, "learning_rate": 2e-05, "loss": 5.6115, "step": 2120 }, { "epoch": 0.14226783378609517, "grad_norm": 0.15981607081188468, "learning_rate": 2e-05, "loss": 5.4412, "step": 2121 }, { "epoch": 0.1423349096153201, "grad_norm": 0.14497336791094118, "learning_rate": 2e-05, "loss": 5.4611, "step": 2122 }, { "epoch": 0.14240198544454505, "grad_norm": 0.14537369836705907, "learning_rate": 2e-05, "loss": 5.4546, "step": 2123 }, { "epoch": 0.14246906127377, "grad_norm": 0.15327627284900278, "learning_rate": 2e-05, "loss": 5.5293, "step": 2124 }, { "epoch": 0.14253613710299493, "grad_norm": 0.16332699981039317, "learning_rate": 2e-05, "loss": 5.4914, "step": 2125 }, { "epoch": 0.14260321293221986, "grad_norm": 0.14181143872765845, "learning_rate": 2e-05, "loss": 5.5039, "step": 2126 }, { "epoch": 0.1426702887614448, "grad_norm": 0.16042452925068149, "learning_rate": 2e-05, "loss": 5.3038, "step": 2127 }, { "epoch": 0.14273736459066974, "grad_norm": 0.15474470048925182, "learning_rate": 2e-05, "loss": 5.6319, "step": 2128 }, { "epoch": 0.14280444041989468, "grad_norm": 0.1473012960551814, "learning_rate": 2e-05, "loss": 5.4647, "step": 2129 }, { "epoch": 0.14287151624911962, "grad_norm": 0.14819107898481138, "learning_rate": 2e-05, "loss": 5.5904, "step": 2130 }, { "epoch": 0.14293859207834456, "grad_norm": 0.15625828449722654, "learning_rate": 2e-05, "loss": 5.3722, "step": 2131 }, { "epoch": 0.1430056679075695, "grad_norm": 0.1494386459669284, "learning_rate": 2e-05, "loss": 5.3977, "step": 2132 }, { "epoch": 0.14307274373679443, "grad_norm": 0.1425782423003123, "learning_rate": 2e-05, "loss": 5.4499, "step": 2133 }, { "epoch": 0.14313981956601937, "grad_norm": 0.15249503770800232, "learning_rate": 2e-05, "loss": 5.5472, "step": 2134 }, { "epoch": 0.1432068953952443, "grad_norm": 0.1520570401289044, "learning_rate": 2e-05, "loss": 5.4849, "step": 2135 }, { "epoch": 0.14327397122446925, "grad_norm": 0.14510983100042368, "learning_rate": 2e-05, "loss": 5.4872, "step": 2136 }, { "epoch": 0.14334104705369421, "grad_norm": 0.14342754556248508, "learning_rate": 2e-05, "loss": 5.5008, "step": 2137 }, { "epoch": 0.14340812288291915, "grad_norm": 0.1530994065118648, "learning_rate": 2e-05, "loss": 5.2658, "step": 2138 }, { "epoch": 0.1434751987121441, "grad_norm": 0.14587688386255726, "learning_rate": 2e-05, "loss": 5.5043, "step": 2139 }, { "epoch": 0.14354227454136903, "grad_norm": 0.14396024841486615, "learning_rate": 2e-05, "loss": 5.5005, "step": 2140 }, { "epoch": 0.14360935037059397, "grad_norm": 0.1497870886897319, "learning_rate": 2e-05, "loss": 5.5405, "step": 2141 }, { "epoch": 0.1436764261998189, "grad_norm": 0.15436407238643793, "learning_rate": 2e-05, "loss": 5.4933, "step": 2142 }, { "epoch": 0.14374350202904385, "grad_norm": 0.15210275575774748, "learning_rate": 2e-05, "loss": 5.6512, "step": 2143 }, { "epoch": 0.14381057785826878, "grad_norm": 0.14589209349364704, "learning_rate": 2e-05, "loss": 5.4501, "step": 2144 }, { "epoch": 0.14387765368749372, "grad_norm": 0.15336538657993987, "learning_rate": 2e-05, "loss": 5.5339, "step": 2145 }, { "epoch": 0.14394472951671866, "grad_norm": 0.14618768853695918, "learning_rate": 2e-05, "loss": 5.4508, "step": 2146 }, { "epoch": 0.1440118053459436, "grad_norm": 0.1420582185925039, "learning_rate": 2e-05, "loss": 5.3978, "step": 2147 }, { "epoch": 0.14407888117516854, "grad_norm": 0.14480051300594715, "learning_rate": 2e-05, "loss": 5.4336, "step": 2148 }, { "epoch": 0.14414595700439348, "grad_norm": 0.14757642621031064, "learning_rate": 2e-05, "loss": 5.3278, "step": 2149 }, { "epoch": 0.14421303283361842, "grad_norm": 0.14622554225796436, "learning_rate": 2e-05, "loss": 5.4272, "step": 2150 }, { "epoch": 0.14428010866284335, "grad_norm": 0.14664582831387782, "learning_rate": 2e-05, "loss": 5.5526, "step": 2151 }, { "epoch": 0.1443471844920683, "grad_norm": 0.14587066778078878, "learning_rate": 2e-05, "loss": 5.4913, "step": 2152 }, { "epoch": 0.14441426032129323, "grad_norm": 0.14987580868355235, "learning_rate": 2e-05, "loss": 5.4201, "step": 2153 }, { "epoch": 0.14448133615051817, "grad_norm": 0.14278796309540473, "learning_rate": 2e-05, "loss": 5.5058, "step": 2154 }, { "epoch": 0.1445484119797431, "grad_norm": 0.14535872377461978, "learning_rate": 2e-05, "loss": 5.5851, "step": 2155 }, { "epoch": 0.14461548780896805, "grad_norm": 0.15244676984563427, "learning_rate": 2e-05, "loss": 5.3563, "step": 2156 }, { "epoch": 0.14468256363819298, "grad_norm": 0.14305299912458483, "learning_rate": 2e-05, "loss": 5.5083, "step": 2157 }, { "epoch": 0.14474963946741792, "grad_norm": 0.14831822003093237, "learning_rate": 2e-05, "loss": 5.452, "step": 2158 }, { "epoch": 0.14481671529664286, "grad_norm": 0.14068281987142375, "learning_rate": 2e-05, "loss": 5.6524, "step": 2159 }, { "epoch": 0.1448837911258678, "grad_norm": 0.136001486156199, "learning_rate": 2e-05, "loss": 5.4686, "step": 2160 }, { "epoch": 0.14495086695509274, "grad_norm": 0.13927899315634554, "learning_rate": 2e-05, "loss": 5.5185, "step": 2161 }, { "epoch": 0.14501794278431768, "grad_norm": 0.14010794400150614, "learning_rate": 2e-05, "loss": 5.5248, "step": 2162 }, { "epoch": 0.14508501861354262, "grad_norm": 0.14554846241013303, "learning_rate": 2e-05, "loss": 5.4205, "step": 2163 }, { "epoch": 0.14515209444276755, "grad_norm": 0.1401670456559598, "learning_rate": 2e-05, "loss": 5.3287, "step": 2164 }, { "epoch": 0.1452191702719925, "grad_norm": 0.13695529979492535, "learning_rate": 2e-05, "loss": 5.3917, "step": 2165 }, { "epoch": 0.14528624610121743, "grad_norm": 0.13870956115657285, "learning_rate": 2e-05, "loss": 5.5754, "step": 2166 }, { "epoch": 0.14535332193044237, "grad_norm": 0.14419526180863027, "learning_rate": 2e-05, "loss": 5.3541, "step": 2167 }, { "epoch": 0.1454203977596673, "grad_norm": 0.14658600936191776, "learning_rate": 2e-05, "loss": 5.475, "step": 2168 }, { "epoch": 0.14548747358889225, "grad_norm": 0.14590146222600692, "learning_rate": 2e-05, "loss": 5.4832, "step": 2169 }, { "epoch": 0.14555454941811719, "grad_norm": 0.1439962269381841, "learning_rate": 2e-05, "loss": 5.3578, "step": 2170 }, { "epoch": 0.14562162524734212, "grad_norm": 0.14580884668233787, "learning_rate": 2e-05, "loss": 5.4147, "step": 2171 }, { "epoch": 0.14568870107656706, "grad_norm": 0.14242865571192384, "learning_rate": 2e-05, "loss": 5.4067, "step": 2172 }, { "epoch": 0.145755776905792, "grad_norm": 0.14735232844407223, "learning_rate": 2e-05, "loss": 5.4937, "step": 2173 }, { "epoch": 0.14582285273501694, "grad_norm": 0.1388656679383615, "learning_rate": 2e-05, "loss": 5.5155, "step": 2174 }, { "epoch": 0.14588992856424188, "grad_norm": 0.13938224549764658, "learning_rate": 2e-05, "loss": 5.4448, "step": 2175 }, { "epoch": 0.14595700439346682, "grad_norm": 0.14709734331036806, "learning_rate": 2e-05, "loss": 5.6044, "step": 2176 }, { "epoch": 0.14602408022269175, "grad_norm": 0.14439610456307594, "learning_rate": 2e-05, "loss": 5.5195, "step": 2177 }, { "epoch": 0.1460911560519167, "grad_norm": 0.13945714518718524, "learning_rate": 2e-05, "loss": 5.4022, "step": 2178 }, { "epoch": 0.14615823188114163, "grad_norm": 0.14918330422225048, "learning_rate": 2e-05, "loss": 5.5542, "step": 2179 }, { "epoch": 0.14622530771036657, "grad_norm": 0.14199905843355493, "learning_rate": 2e-05, "loss": 5.4567, "step": 2180 }, { "epoch": 0.1462923835395915, "grad_norm": 0.13958120837397936, "learning_rate": 2e-05, "loss": 5.4251, "step": 2181 }, { "epoch": 0.14635945936881645, "grad_norm": 0.1435304665307187, "learning_rate": 2e-05, "loss": 5.5456, "step": 2182 }, { "epoch": 0.14642653519804139, "grad_norm": 0.143505956709722, "learning_rate": 2e-05, "loss": 5.4328, "step": 2183 }, { "epoch": 0.14649361102726632, "grad_norm": 0.14519884727835403, "learning_rate": 2e-05, "loss": 5.3977, "step": 2184 }, { "epoch": 0.14656068685649126, "grad_norm": 0.1493951824550538, "learning_rate": 2e-05, "loss": 5.5119, "step": 2185 }, { "epoch": 0.1466277626857162, "grad_norm": 0.14894069244209124, "learning_rate": 2e-05, "loss": 5.5139, "step": 2186 }, { "epoch": 0.14669483851494114, "grad_norm": 0.14261482756850957, "learning_rate": 2e-05, "loss": 5.4404, "step": 2187 }, { "epoch": 0.14676191434416608, "grad_norm": 0.1405120190329673, "learning_rate": 2e-05, "loss": 5.3772, "step": 2188 }, { "epoch": 0.14682899017339102, "grad_norm": 0.15127998187901046, "learning_rate": 2e-05, "loss": 5.3881, "step": 2189 }, { "epoch": 0.14689606600261595, "grad_norm": 0.14465456808578442, "learning_rate": 2e-05, "loss": 5.5839, "step": 2190 }, { "epoch": 0.1469631418318409, "grad_norm": 0.14546482835722385, "learning_rate": 2e-05, "loss": 5.437, "step": 2191 }, { "epoch": 0.14703021766106583, "grad_norm": 0.15266618479632738, "learning_rate": 2e-05, "loss": 5.5806, "step": 2192 }, { "epoch": 0.14709729349029077, "grad_norm": 0.14043600101504575, "learning_rate": 2e-05, "loss": 5.5099, "step": 2193 }, { "epoch": 0.1471643693195157, "grad_norm": 0.1392318886014651, "learning_rate": 2e-05, "loss": 5.4123, "step": 2194 }, { "epoch": 0.14723144514874065, "grad_norm": 0.14988425749621023, "learning_rate": 2e-05, "loss": 5.4001, "step": 2195 }, { "epoch": 0.14729852097796559, "grad_norm": 0.14841366435953765, "learning_rate": 2e-05, "loss": 5.3973, "step": 2196 }, { "epoch": 0.14736559680719052, "grad_norm": 0.14146947574584215, "learning_rate": 2e-05, "loss": 5.556, "step": 2197 }, { "epoch": 0.14743267263641546, "grad_norm": 0.15176055246465603, "learning_rate": 2e-05, "loss": 5.3864, "step": 2198 }, { "epoch": 0.1474997484656404, "grad_norm": 0.15154767872970293, "learning_rate": 2e-05, "loss": 5.3568, "step": 2199 }, { "epoch": 0.14756682429486534, "grad_norm": 0.1356737335629346, "learning_rate": 2e-05, "loss": 5.4971, "step": 2200 }, { "epoch": 0.14763390012409028, "grad_norm": 0.14602027840593182, "learning_rate": 2e-05, "loss": 5.5186, "step": 2201 }, { "epoch": 0.14770097595331522, "grad_norm": 0.14476867460642287, "learning_rate": 2e-05, "loss": 5.4412, "step": 2202 }, { "epoch": 0.14776805178254016, "grad_norm": 0.14090129458077094, "learning_rate": 2e-05, "loss": 5.4852, "step": 2203 }, { "epoch": 0.1478351276117651, "grad_norm": 0.14932640146795353, "learning_rate": 2e-05, "loss": 5.4009, "step": 2204 }, { "epoch": 0.14790220344099003, "grad_norm": 0.1431406445598946, "learning_rate": 2e-05, "loss": 5.4374, "step": 2205 }, { "epoch": 0.14796927927021497, "grad_norm": 0.14582358722961314, "learning_rate": 2e-05, "loss": 5.3745, "step": 2206 }, { "epoch": 0.1480363550994399, "grad_norm": 0.1390614303786016, "learning_rate": 2e-05, "loss": 5.6172, "step": 2207 }, { "epoch": 0.14810343092866485, "grad_norm": 0.1400337280454805, "learning_rate": 2e-05, "loss": 5.4746, "step": 2208 }, { "epoch": 0.14817050675788979, "grad_norm": 0.14745711725274677, "learning_rate": 2e-05, "loss": 5.4337, "step": 2209 }, { "epoch": 0.14823758258711472, "grad_norm": 0.14250627037811864, "learning_rate": 2e-05, "loss": 5.4953, "step": 2210 }, { "epoch": 0.14830465841633966, "grad_norm": 0.14556539121841414, "learning_rate": 2e-05, "loss": 5.4818, "step": 2211 }, { "epoch": 0.1483717342455646, "grad_norm": 0.14712378306325027, "learning_rate": 2e-05, "loss": 5.6014, "step": 2212 }, { "epoch": 0.14843881007478954, "grad_norm": 0.1406234374043565, "learning_rate": 2e-05, "loss": 5.4194, "step": 2213 }, { "epoch": 0.14850588590401448, "grad_norm": 0.14274074721247146, "learning_rate": 2e-05, "loss": 5.4534, "step": 2214 }, { "epoch": 0.14857296173323942, "grad_norm": 0.14979598280292283, "learning_rate": 2e-05, "loss": 5.4035, "step": 2215 }, { "epoch": 0.14864003756246436, "grad_norm": 0.1453821618882125, "learning_rate": 2e-05, "loss": 5.4294, "step": 2216 }, { "epoch": 0.1487071133916893, "grad_norm": 0.14069108566684205, "learning_rate": 2e-05, "loss": 5.4717, "step": 2217 }, { "epoch": 0.14877418922091423, "grad_norm": 0.14182602725321994, "learning_rate": 2e-05, "loss": 5.3915, "step": 2218 }, { "epoch": 0.14884126505013917, "grad_norm": 0.14916687410903165, "learning_rate": 2e-05, "loss": 5.5567, "step": 2219 }, { "epoch": 0.1489083408793641, "grad_norm": 0.14884857125911571, "learning_rate": 2e-05, "loss": 5.4354, "step": 2220 }, { "epoch": 0.14897541670858905, "grad_norm": 0.14257903090820245, "learning_rate": 2e-05, "loss": 5.4405, "step": 2221 }, { "epoch": 0.149042492537814, "grad_norm": 0.15517829393693822, "learning_rate": 2e-05, "loss": 5.3558, "step": 2222 }, { "epoch": 0.14910956836703892, "grad_norm": 0.1451589564604694, "learning_rate": 2e-05, "loss": 5.4937, "step": 2223 }, { "epoch": 0.14917664419626386, "grad_norm": 0.14736625165276526, "learning_rate": 2e-05, "loss": 5.5075, "step": 2224 }, { "epoch": 0.1492437200254888, "grad_norm": 0.16117011334112802, "learning_rate": 2e-05, "loss": 5.4581, "step": 2225 }, { "epoch": 0.14931079585471374, "grad_norm": 0.1527624923482027, "learning_rate": 2e-05, "loss": 5.4298, "step": 2226 }, { "epoch": 0.14937787168393868, "grad_norm": 0.14502147403392052, "learning_rate": 2e-05, "loss": 5.5065, "step": 2227 }, { "epoch": 0.14944494751316365, "grad_norm": 0.15810757819337676, "learning_rate": 2e-05, "loss": 5.5084, "step": 2228 }, { "epoch": 0.14951202334238858, "grad_norm": 0.1571042219769501, "learning_rate": 2e-05, "loss": 5.5172, "step": 2229 }, { "epoch": 0.14957909917161352, "grad_norm": 0.14277814223132904, "learning_rate": 2e-05, "loss": 5.4234, "step": 2230 }, { "epoch": 0.14964617500083846, "grad_norm": 0.1488686070843933, "learning_rate": 2e-05, "loss": 5.4678, "step": 2231 }, { "epoch": 0.1497132508300634, "grad_norm": 0.15864424669060126, "learning_rate": 2e-05, "loss": 5.5101, "step": 2232 }, { "epoch": 0.14978032665928834, "grad_norm": 0.14838415723065443, "learning_rate": 2e-05, "loss": 5.4721, "step": 2233 }, { "epoch": 0.14984740248851328, "grad_norm": 0.14078026528329415, "learning_rate": 2e-05, "loss": 5.5701, "step": 2234 }, { "epoch": 0.14991447831773821, "grad_norm": 0.15356249959265666, "learning_rate": 2e-05, "loss": 5.3723, "step": 2235 }, { "epoch": 0.14998155414696315, "grad_norm": 0.1465234196894582, "learning_rate": 2e-05, "loss": 5.4654, "step": 2236 }, { "epoch": 0.1500486299761881, "grad_norm": 0.14998744357722843, "learning_rate": 2e-05, "loss": 5.4431, "step": 2237 }, { "epoch": 0.15011570580541303, "grad_norm": 0.14743260824118767, "learning_rate": 2e-05, "loss": 5.6873, "step": 2238 }, { "epoch": 0.15018278163463797, "grad_norm": 0.14209126148813192, "learning_rate": 2e-05, "loss": 5.4788, "step": 2239 }, { "epoch": 0.1502498574638629, "grad_norm": 0.1524338052146644, "learning_rate": 2e-05, "loss": 5.4811, "step": 2240 }, { "epoch": 0.15031693329308785, "grad_norm": 0.14701611645049883, "learning_rate": 2e-05, "loss": 5.4079, "step": 2241 }, { "epoch": 0.15038400912231278, "grad_norm": 0.1423477713808431, "learning_rate": 2e-05, "loss": 5.4371, "step": 2242 }, { "epoch": 0.15045108495153772, "grad_norm": 0.1412729839765959, "learning_rate": 2e-05, "loss": 5.5248, "step": 2243 }, { "epoch": 0.15051816078076266, "grad_norm": 0.15299669197471893, "learning_rate": 2e-05, "loss": 5.5234, "step": 2244 }, { "epoch": 0.1505852366099876, "grad_norm": 0.15097127108213343, "learning_rate": 2e-05, "loss": 5.5145, "step": 2245 }, { "epoch": 0.15065231243921254, "grad_norm": 0.15650572664124027, "learning_rate": 2e-05, "loss": 5.4188, "step": 2246 }, { "epoch": 0.15071938826843748, "grad_norm": 0.14165577413322508, "learning_rate": 2e-05, "loss": 5.6206, "step": 2247 }, { "epoch": 0.15078646409766241, "grad_norm": 0.14550932225739427, "learning_rate": 2e-05, "loss": 5.4602, "step": 2248 }, { "epoch": 0.15085353992688735, "grad_norm": 0.15161439769008564, "learning_rate": 2e-05, "loss": 5.5719, "step": 2249 }, { "epoch": 0.1509206157561123, "grad_norm": 0.1416963164733599, "learning_rate": 2e-05, "loss": 5.459, "step": 2250 }, { "epoch": 0.15098769158533723, "grad_norm": 0.1400122232410672, "learning_rate": 2e-05, "loss": 5.2783, "step": 2251 }, { "epoch": 0.15105476741456217, "grad_norm": 0.1429691044726434, "learning_rate": 2e-05, "loss": 5.5537, "step": 2252 }, { "epoch": 0.1511218432437871, "grad_norm": 0.1460582380069382, "learning_rate": 2e-05, "loss": 5.4785, "step": 2253 }, { "epoch": 0.15118891907301205, "grad_norm": 0.15753842120203146, "learning_rate": 2e-05, "loss": 5.4207, "step": 2254 }, { "epoch": 0.15125599490223698, "grad_norm": 0.14509997776657477, "learning_rate": 2e-05, "loss": 5.4169, "step": 2255 }, { "epoch": 0.15132307073146192, "grad_norm": 0.1415921386618362, "learning_rate": 2e-05, "loss": 5.4356, "step": 2256 }, { "epoch": 0.15139014656068686, "grad_norm": 0.14785146233164903, "learning_rate": 2e-05, "loss": 5.3496, "step": 2257 }, { "epoch": 0.1514572223899118, "grad_norm": 0.15086071810392968, "learning_rate": 2e-05, "loss": 5.4337, "step": 2258 }, { "epoch": 0.15152429821913674, "grad_norm": 0.14799069108598728, "learning_rate": 2e-05, "loss": 5.5839, "step": 2259 }, { "epoch": 0.15159137404836168, "grad_norm": 0.14899993294556388, "learning_rate": 2e-05, "loss": 5.4858, "step": 2260 }, { "epoch": 0.15165844987758662, "grad_norm": 0.16238576602059426, "learning_rate": 2e-05, "loss": 5.377, "step": 2261 }, { "epoch": 0.15172552570681155, "grad_norm": 0.14208003644090292, "learning_rate": 2e-05, "loss": 5.4137, "step": 2262 }, { "epoch": 0.1517926015360365, "grad_norm": 0.14011384053976045, "learning_rate": 2e-05, "loss": 5.4898, "step": 2263 }, { "epoch": 0.15185967736526143, "grad_norm": 0.14519204184100665, "learning_rate": 2e-05, "loss": 5.4516, "step": 2264 }, { "epoch": 0.15192675319448637, "grad_norm": 0.16102552409919463, "learning_rate": 2e-05, "loss": 5.3099, "step": 2265 }, { "epoch": 0.1519938290237113, "grad_norm": 0.1410919397614373, "learning_rate": 2e-05, "loss": 5.5681, "step": 2266 }, { "epoch": 0.15206090485293625, "grad_norm": 0.1502947123798291, "learning_rate": 2e-05, "loss": 5.3604, "step": 2267 }, { "epoch": 0.15212798068216118, "grad_norm": 0.14630220817539488, "learning_rate": 2e-05, "loss": 5.4826, "step": 2268 }, { "epoch": 0.15219505651138612, "grad_norm": 0.1538987882435936, "learning_rate": 2e-05, "loss": 5.3781, "step": 2269 }, { "epoch": 0.15226213234061106, "grad_norm": 0.15178125495389094, "learning_rate": 2e-05, "loss": 5.4667, "step": 2270 }, { "epoch": 0.152329208169836, "grad_norm": 0.13989645217075813, "learning_rate": 2e-05, "loss": 5.5333, "step": 2271 }, { "epoch": 0.15239628399906094, "grad_norm": 0.14339244285451885, "learning_rate": 2e-05, "loss": 5.5345, "step": 2272 }, { "epoch": 0.15246335982828588, "grad_norm": 0.1581101726326712, "learning_rate": 2e-05, "loss": 5.4492, "step": 2273 }, { "epoch": 0.15253043565751082, "grad_norm": 0.14044416841294247, "learning_rate": 2e-05, "loss": 5.4124, "step": 2274 }, { "epoch": 0.15259751148673575, "grad_norm": 0.15354680019139158, "learning_rate": 2e-05, "loss": 5.4627, "step": 2275 }, { "epoch": 0.1526645873159607, "grad_norm": 0.14965510974736163, "learning_rate": 2e-05, "loss": 5.6755, "step": 2276 }, { "epoch": 0.15273166314518563, "grad_norm": 0.14054227982067247, "learning_rate": 2e-05, "loss": 5.4601, "step": 2277 }, { "epoch": 0.15279873897441057, "grad_norm": 0.14763564405088236, "learning_rate": 2e-05, "loss": 5.5406, "step": 2278 }, { "epoch": 0.1528658148036355, "grad_norm": 0.14781745796692083, "learning_rate": 2e-05, "loss": 5.252, "step": 2279 }, { "epoch": 0.15293289063286045, "grad_norm": 0.13788657848427882, "learning_rate": 2e-05, "loss": 5.4278, "step": 2280 }, { "epoch": 0.15299996646208538, "grad_norm": 0.14235988307616068, "learning_rate": 2e-05, "loss": 5.398, "step": 2281 }, { "epoch": 0.15306704229131032, "grad_norm": 0.14761321434016464, "learning_rate": 2e-05, "loss": 5.3973, "step": 2282 }, { "epoch": 0.15313411812053526, "grad_norm": 0.1469045034281214, "learning_rate": 2e-05, "loss": 5.4509, "step": 2283 }, { "epoch": 0.1532011939497602, "grad_norm": 0.1395772060824934, "learning_rate": 2e-05, "loss": 5.4697, "step": 2284 }, { "epoch": 0.15326826977898514, "grad_norm": 0.14269735970812533, "learning_rate": 2e-05, "loss": 5.4874, "step": 2285 }, { "epoch": 0.15333534560821008, "grad_norm": 0.15165497629321506, "learning_rate": 2e-05, "loss": 5.5399, "step": 2286 }, { "epoch": 0.15340242143743502, "grad_norm": 0.14593553524768385, "learning_rate": 2e-05, "loss": 5.4496, "step": 2287 }, { "epoch": 0.15346949726665995, "grad_norm": 0.1495158109531524, "learning_rate": 2e-05, "loss": 5.4605, "step": 2288 }, { "epoch": 0.1535365730958849, "grad_norm": 0.14427889031991184, "learning_rate": 2e-05, "loss": 5.5328, "step": 2289 }, { "epoch": 0.15360364892510983, "grad_norm": 0.14293444331385835, "learning_rate": 2e-05, "loss": 5.4198, "step": 2290 }, { "epoch": 0.15367072475433477, "grad_norm": 0.14300788897448516, "learning_rate": 2e-05, "loss": 5.5457, "step": 2291 }, { "epoch": 0.1537378005835597, "grad_norm": 0.15839380993173519, "learning_rate": 2e-05, "loss": 5.4744, "step": 2292 }, { "epoch": 0.15380487641278465, "grad_norm": 0.14287159479701322, "learning_rate": 2e-05, "loss": 5.4212, "step": 2293 }, { "epoch": 0.15387195224200959, "grad_norm": 0.14656089975723957, "learning_rate": 2e-05, "loss": 5.3293, "step": 2294 }, { "epoch": 0.15393902807123452, "grad_norm": 0.14490025836650466, "learning_rate": 2e-05, "loss": 5.3642, "step": 2295 }, { "epoch": 0.15400610390045946, "grad_norm": 0.1443542456273993, "learning_rate": 2e-05, "loss": 5.5044, "step": 2296 }, { "epoch": 0.1540731797296844, "grad_norm": 0.1346579724718058, "learning_rate": 2e-05, "loss": 5.5314, "step": 2297 }, { "epoch": 0.15414025555890934, "grad_norm": 0.13990973010521732, "learning_rate": 2e-05, "loss": 5.3165, "step": 2298 }, { "epoch": 0.15420733138813428, "grad_norm": 0.1474743572771329, "learning_rate": 2e-05, "loss": 5.555, "step": 2299 }, { "epoch": 0.15427440721735922, "grad_norm": 0.13783941455984747, "learning_rate": 2e-05, "loss": 5.4094, "step": 2300 }, { "epoch": 0.15434148304658415, "grad_norm": 0.14385386809671397, "learning_rate": 2e-05, "loss": 5.3911, "step": 2301 }, { "epoch": 0.1544085588758091, "grad_norm": 0.15509563998690715, "learning_rate": 2e-05, "loss": 5.5374, "step": 2302 }, { "epoch": 0.15447563470503403, "grad_norm": 0.13849701367142336, "learning_rate": 2e-05, "loss": 5.4053, "step": 2303 }, { "epoch": 0.15454271053425897, "grad_norm": 0.1379753445525031, "learning_rate": 2e-05, "loss": 5.5365, "step": 2304 }, { "epoch": 0.1546097863634839, "grad_norm": 0.1463537822310483, "learning_rate": 2e-05, "loss": 5.6788, "step": 2305 }, { "epoch": 0.15467686219270885, "grad_norm": 0.14593362015315364, "learning_rate": 2e-05, "loss": 5.4789, "step": 2306 }, { "epoch": 0.15474393802193379, "grad_norm": 0.14128212457608336, "learning_rate": 2e-05, "loss": 5.3883, "step": 2307 }, { "epoch": 0.15481101385115872, "grad_norm": 0.14518735415621012, "learning_rate": 2e-05, "loss": 5.4495, "step": 2308 }, { "epoch": 0.15487808968038366, "grad_norm": 0.15297932827664257, "learning_rate": 2e-05, "loss": 5.4036, "step": 2309 }, { "epoch": 0.1549451655096086, "grad_norm": 0.14250968749806642, "learning_rate": 2e-05, "loss": 5.4629, "step": 2310 }, { "epoch": 0.15501224133883354, "grad_norm": 0.14176724327749027, "learning_rate": 2e-05, "loss": 5.5528, "step": 2311 }, { "epoch": 0.15507931716805848, "grad_norm": 0.15678985461689396, "learning_rate": 2e-05, "loss": 5.5408, "step": 2312 }, { "epoch": 0.15514639299728342, "grad_norm": 0.1438157660426479, "learning_rate": 2e-05, "loss": 5.3095, "step": 2313 }, { "epoch": 0.15521346882650836, "grad_norm": 0.14569620654338855, "learning_rate": 2e-05, "loss": 5.5339, "step": 2314 }, { "epoch": 0.1552805446557333, "grad_norm": 0.15447518775506502, "learning_rate": 2e-05, "loss": 5.3291, "step": 2315 }, { "epoch": 0.15534762048495823, "grad_norm": 0.14461796037760485, "learning_rate": 2e-05, "loss": 5.4785, "step": 2316 }, { "epoch": 0.15541469631418317, "grad_norm": 0.1460319735692185, "learning_rate": 2e-05, "loss": 5.4682, "step": 2317 }, { "epoch": 0.1554817721434081, "grad_norm": 0.15427368979106088, "learning_rate": 2e-05, "loss": 5.601, "step": 2318 }, { "epoch": 0.15554884797263308, "grad_norm": 0.14232126372826354, "learning_rate": 2e-05, "loss": 5.3739, "step": 2319 }, { "epoch": 0.155615923801858, "grad_norm": 0.14131855537227234, "learning_rate": 2e-05, "loss": 5.4119, "step": 2320 }, { "epoch": 0.15568299963108295, "grad_norm": 0.15281059650498724, "learning_rate": 2e-05, "loss": 5.4194, "step": 2321 }, { "epoch": 0.1557500754603079, "grad_norm": 0.1492611365731564, "learning_rate": 2e-05, "loss": 5.5631, "step": 2322 }, { "epoch": 0.15581715128953283, "grad_norm": 0.14218122528193228, "learning_rate": 2e-05, "loss": 5.4645, "step": 2323 }, { "epoch": 0.15588422711875777, "grad_norm": 0.14632708519101542, "learning_rate": 2e-05, "loss": 5.5426, "step": 2324 }, { "epoch": 0.1559513029479827, "grad_norm": 0.14857366712729925, "learning_rate": 2e-05, "loss": 5.3962, "step": 2325 }, { "epoch": 0.15601837877720764, "grad_norm": 0.1381742955875007, "learning_rate": 2e-05, "loss": 5.4067, "step": 2326 }, { "epoch": 0.15608545460643258, "grad_norm": 0.14093020594417033, "learning_rate": 2e-05, "loss": 5.3968, "step": 2327 }, { "epoch": 0.15615253043565752, "grad_norm": 0.15163084726916026, "learning_rate": 2e-05, "loss": 5.4639, "step": 2328 }, { "epoch": 0.15621960626488246, "grad_norm": 0.14052181429635524, "learning_rate": 2e-05, "loss": 5.3433, "step": 2329 }, { "epoch": 0.1562866820941074, "grad_norm": 0.14076437535567424, "learning_rate": 2e-05, "loss": 5.4577, "step": 2330 }, { "epoch": 0.15635375792333234, "grad_norm": 0.15994112976457536, "learning_rate": 2e-05, "loss": 5.5112, "step": 2331 }, { "epoch": 0.15642083375255728, "grad_norm": 0.14787053249484572, "learning_rate": 2e-05, "loss": 5.4783, "step": 2332 }, { "epoch": 0.15648790958178221, "grad_norm": 0.14379822738763856, "learning_rate": 2e-05, "loss": 5.4488, "step": 2333 }, { "epoch": 0.15655498541100715, "grad_norm": 0.14466737255612103, "learning_rate": 2e-05, "loss": 5.3279, "step": 2334 }, { "epoch": 0.1566220612402321, "grad_norm": 0.14234589272269482, "learning_rate": 2e-05, "loss": 5.337, "step": 2335 }, { "epoch": 0.15668913706945703, "grad_norm": 0.1474708564561655, "learning_rate": 2e-05, "loss": 5.4071, "step": 2336 }, { "epoch": 0.15675621289868197, "grad_norm": 0.15872848546892468, "learning_rate": 2e-05, "loss": 5.3319, "step": 2337 }, { "epoch": 0.1568232887279069, "grad_norm": 0.13906693290851507, "learning_rate": 2e-05, "loss": 5.4662, "step": 2338 }, { "epoch": 0.15689036455713185, "grad_norm": 0.146758494209616, "learning_rate": 2e-05, "loss": 5.5377, "step": 2339 }, { "epoch": 0.15695744038635678, "grad_norm": 0.14466956002255082, "learning_rate": 2e-05, "loss": 5.6068, "step": 2340 }, { "epoch": 0.15702451621558172, "grad_norm": 0.15101040715007039, "learning_rate": 2e-05, "loss": 5.4567, "step": 2341 }, { "epoch": 0.15709159204480666, "grad_norm": 0.1383889866808492, "learning_rate": 2e-05, "loss": 5.4893, "step": 2342 }, { "epoch": 0.1571586678740316, "grad_norm": 0.14576142055712432, "learning_rate": 2e-05, "loss": 5.4446, "step": 2343 }, { "epoch": 0.15722574370325654, "grad_norm": 0.14264499200962064, "learning_rate": 2e-05, "loss": 5.4257, "step": 2344 }, { "epoch": 0.15729281953248148, "grad_norm": 0.14047120332190827, "learning_rate": 2e-05, "loss": 5.423, "step": 2345 }, { "epoch": 0.15735989536170641, "grad_norm": 0.14000617612931074, "learning_rate": 2e-05, "loss": 5.4419, "step": 2346 }, { "epoch": 0.15742697119093135, "grad_norm": 0.13846066797179535, "learning_rate": 2e-05, "loss": 5.4984, "step": 2347 }, { "epoch": 0.1574940470201563, "grad_norm": 0.1452805748940401, "learning_rate": 2e-05, "loss": 5.4695, "step": 2348 }, { "epoch": 0.15756112284938123, "grad_norm": 0.14804840319473359, "learning_rate": 2e-05, "loss": 5.6158, "step": 2349 }, { "epoch": 0.15762819867860617, "grad_norm": 0.14148547182330085, "learning_rate": 2e-05, "loss": 5.5848, "step": 2350 }, { "epoch": 0.1576952745078311, "grad_norm": 0.15533229017682926, "learning_rate": 2e-05, "loss": 5.3411, "step": 2351 }, { "epoch": 0.15776235033705605, "grad_norm": 0.14671536178520045, "learning_rate": 2e-05, "loss": 5.4486, "step": 2352 }, { "epoch": 0.15782942616628098, "grad_norm": 0.14266904759102816, "learning_rate": 2e-05, "loss": 5.4372, "step": 2353 }, { "epoch": 0.15789650199550592, "grad_norm": 0.14255542211768477, "learning_rate": 2e-05, "loss": 5.3497, "step": 2354 }, { "epoch": 0.15796357782473086, "grad_norm": 0.14507958252532455, "learning_rate": 2e-05, "loss": 5.5019, "step": 2355 }, { "epoch": 0.1580306536539558, "grad_norm": 0.148113391721394, "learning_rate": 2e-05, "loss": 5.5866, "step": 2356 }, { "epoch": 0.15809772948318074, "grad_norm": 0.15192451042737692, "learning_rate": 2e-05, "loss": 5.2997, "step": 2357 }, { "epoch": 0.15816480531240568, "grad_norm": 0.14638840157250887, "learning_rate": 2e-05, "loss": 5.3291, "step": 2358 }, { "epoch": 0.15823188114163061, "grad_norm": 0.14401264352904009, "learning_rate": 2e-05, "loss": 5.4581, "step": 2359 }, { "epoch": 0.15829895697085555, "grad_norm": 0.14054968350110053, "learning_rate": 2e-05, "loss": 5.2867, "step": 2360 }, { "epoch": 0.1583660328000805, "grad_norm": 0.1442489339767662, "learning_rate": 2e-05, "loss": 5.5353, "step": 2361 }, { "epoch": 0.15843310862930543, "grad_norm": 0.14579198364257284, "learning_rate": 2e-05, "loss": 5.5005, "step": 2362 }, { "epoch": 0.15850018445853037, "grad_norm": 0.1447535931455502, "learning_rate": 2e-05, "loss": 5.4216, "step": 2363 }, { "epoch": 0.1585672602877553, "grad_norm": 0.14231432042940004, "learning_rate": 2e-05, "loss": 5.5694, "step": 2364 }, { "epoch": 0.15863433611698025, "grad_norm": 0.13863330570613475, "learning_rate": 2e-05, "loss": 5.5704, "step": 2365 }, { "epoch": 0.15870141194620518, "grad_norm": 0.14107277331954826, "learning_rate": 2e-05, "loss": 5.3608, "step": 2366 }, { "epoch": 0.15876848777543012, "grad_norm": 0.14620546600502357, "learning_rate": 2e-05, "loss": 5.4877, "step": 2367 }, { "epoch": 0.15883556360465506, "grad_norm": 0.1443168444763351, "learning_rate": 2e-05, "loss": 5.4214, "step": 2368 }, { "epoch": 0.15890263943388, "grad_norm": 0.15068525676391123, "learning_rate": 2e-05, "loss": 5.4563, "step": 2369 }, { "epoch": 0.15896971526310494, "grad_norm": 0.1451898049614518, "learning_rate": 2e-05, "loss": 5.4835, "step": 2370 }, { "epoch": 0.15903679109232988, "grad_norm": 0.14123767103545656, "learning_rate": 2e-05, "loss": 5.3891, "step": 2371 }, { "epoch": 0.15910386692155482, "grad_norm": 0.1418601006909018, "learning_rate": 2e-05, "loss": 5.5425, "step": 2372 }, { "epoch": 0.15917094275077975, "grad_norm": 0.1405122826624735, "learning_rate": 2e-05, "loss": 5.5063, "step": 2373 }, { "epoch": 0.1592380185800047, "grad_norm": 0.14601902513594311, "learning_rate": 2e-05, "loss": 5.3881, "step": 2374 }, { "epoch": 0.15930509440922963, "grad_norm": 0.14050065107798837, "learning_rate": 2e-05, "loss": 5.5875, "step": 2375 }, { "epoch": 0.15937217023845457, "grad_norm": 0.1472672005177831, "learning_rate": 2e-05, "loss": 5.556, "step": 2376 }, { "epoch": 0.1594392460676795, "grad_norm": 0.14121171956892806, "learning_rate": 2e-05, "loss": 5.466, "step": 2377 }, { "epoch": 0.15950632189690445, "grad_norm": 0.14198356157085337, "learning_rate": 2e-05, "loss": 5.5323, "step": 2378 }, { "epoch": 0.15957339772612938, "grad_norm": 0.14283160145559864, "learning_rate": 2e-05, "loss": 5.4047, "step": 2379 }, { "epoch": 0.15964047355535432, "grad_norm": 0.14424342998396564, "learning_rate": 2e-05, "loss": 5.3986, "step": 2380 }, { "epoch": 0.15970754938457926, "grad_norm": 0.14275374736912896, "learning_rate": 2e-05, "loss": 5.4472, "step": 2381 }, { "epoch": 0.1597746252138042, "grad_norm": 0.14055076114585294, "learning_rate": 2e-05, "loss": 5.4199, "step": 2382 }, { "epoch": 0.15984170104302914, "grad_norm": 0.14430590939194896, "learning_rate": 2e-05, "loss": 5.5807, "step": 2383 }, { "epoch": 0.15990877687225408, "grad_norm": 0.14568661492738738, "learning_rate": 2e-05, "loss": 5.4035, "step": 2384 }, { "epoch": 0.15997585270147902, "grad_norm": 0.14533866084154484, "learning_rate": 2e-05, "loss": 5.4999, "step": 2385 }, { "epoch": 0.16004292853070395, "grad_norm": 0.14165591007247472, "learning_rate": 2e-05, "loss": 5.4439, "step": 2386 }, { "epoch": 0.1601100043599289, "grad_norm": 0.15333755943418206, "learning_rate": 2e-05, "loss": 5.4862, "step": 2387 }, { "epoch": 0.16017708018915383, "grad_norm": 0.15097394370361303, "learning_rate": 2e-05, "loss": 5.463, "step": 2388 }, { "epoch": 0.16024415601837877, "grad_norm": 0.1437002248399569, "learning_rate": 2e-05, "loss": 5.3247, "step": 2389 }, { "epoch": 0.1603112318476037, "grad_norm": 0.15252247747533082, "learning_rate": 2e-05, "loss": 5.5615, "step": 2390 }, { "epoch": 0.16037830767682865, "grad_norm": 0.14718975005068968, "learning_rate": 2e-05, "loss": 5.4205, "step": 2391 }, { "epoch": 0.16044538350605358, "grad_norm": 0.1392312283211577, "learning_rate": 2e-05, "loss": 5.623, "step": 2392 }, { "epoch": 0.16051245933527852, "grad_norm": 0.1440021197958591, "learning_rate": 2e-05, "loss": 5.491, "step": 2393 }, { "epoch": 0.16057953516450346, "grad_norm": 0.14931590207611323, "learning_rate": 2e-05, "loss": 5.4724, "step": 2394 }, { "epoch": 0.1606466109937284, "grad_norm": 0.13793300310197534, "learning_rate": 2e-05, "loss": 5.5663, "step": 2395 }, { "epoch": 0.16071368682295334, "grad_norm": 0.14491600666368062, "learning_rate": 2e-05, "loss": 5.3338, "step": 2396 }, { "epoch": 0.16078076265217828, "grad_norm": 0.14624699069343033, "learning_rate": 2e-05, "loss": 5.4416, "step": 2397 }, { "epoch": 0.16084783848140322, "grad_norm": 0.14060469449896057, "learning_rate": 2e-05, "loss": 5.509, "step": 2398 }, { "epoch": 0.16091491431062815, "grad_norm": 0.14333667352401153, "learning_rate": 2e-05, "loss": 5.453, "step": 2399 }, { "epoch": 0.1609819901398531, "grad_norm": 0.1409207379905979, "learning_rate": 2e-05, "loss": 5.4672, "step": 2400 }, { "epoch": 0.16104906596907803, "grad_norm": 0.14727288087553977, "learning_rate": 2e-05, "loss": 5.3683, "step": 2401 }, { "epoch": 0.16111614179830297, "grad_norm": 0.1444922269327931, "learning_rate": 2e-05, "loss": 5.469, "step": 2402 }, { "epoch": 0.1611832176275279, "grad_norm": 0.14470715103006052, "learning_rate": 2e-05, "loss": 5.3595, "step": 2403 }, { "epoch": 0.16125029345675285, "grad_norm": 0.13843112904215, "learning_rate": 2e-05, "loss": 5.5286, "step": 2404 }, { "epoch": 0.16131736928597779, "grad_norm": 0.13596651182104355, "learning_rate": 2e-05, "loss": 5.4537, "step": 2405 }, { "epoch": 0.16138444511520272, "grad_norm": 0.1388494231211852, "learning_rate": 2e-05, "loss": 5.527, "step": 2406 }, { "epoch": 0.16145152094442766, "grad_norm": 0.1405488705765877, "learning_rate": 2e-05, "loss": 5.5408, "step": 2407 }, { "epoch": 0.1615185967736526, "grad_norm": 0.14393091950556156, "learning_rate": 2e-05, "loss": 5.3754, "step": 2408 }, { "epoch": 0.16158567260287757, "grad_norm": 0.1384426150831119, "learning_rate": 2e-05, "loss": 5.3615, "step": 2409 }, { "epoch": 0.1616527484321025, "grad_norm": 0.14487927622349348, "learning_rate": 2e-05, "loss": 5.4452, "step": 2410 }, { "epoch": 0.16171982426132744, "grad_norm": 0.14137470834390445, "learning_rate": 2e-05, "loss": 5.4825, "step": 2411 }, { "epoch": 0.16178690009055238, "grad_norm": 0.14263974961942927, "learning_rate": 2e-05, "loss": 5.4822, "step": 2412 }, { "epoch": 0.16185397591977732, "grad_norm": 0.14753571888811567, "learning_rate": 2e-05, "loss": 5.4323, "step": 2413 }, { "epoch": 0.16192105174900226, "grad_norm": 0.14181372368889528, "learning_rate": 2e-05, "loss": 5.4081, "step": 2414 }, { "epoch": 0.1619881275782272, "grad_norm": 0.14260603994467655, "learning_rate": 2e-05, "loss": 5.4608, "step": 2415 }, { "epoch": 0.16205520340745214, "grad_norm": 0.14392079443855477, "learning_rate": 2e-05, "loss": 5.4377, "step": 2416 }, { "epoch": 0.16212227923667707, "grad_norm": 0.14384362961327227, "learning_rate": 2e-05, "loss": 5.3727, "step": 2417 }, { "epoch": 0.162189355065902, "grad_norm": 0.13620456338263684, "learning_rate": 2e-05, "loss": 5.4398, "step": 2418 }, { "epoch": 0.16225643089512695, "grad_norm": 0.14453631292157756, "learning_rate": 2e-05, "loss": 5.3456, "step": 2419 }, { "epoch": 0.1623235067243519, "grad_norm": 0.148943054681391, "learning_rate": 2e-05, "loss": 5.4943, "step": 2420 }, { "epoch": 0.16239058255357683, "grad_norm": 0.14697834827808967, "learning_rate": 2e-05, "loss": 5.3806, "step": 2421 }, { "epoch": 0.16245765838280177, "grad_norm": 0.14614976762829437, "learning_rate": 2e-05, "loss": 5.4693, "step": 2422 }, { "epoch": 0.1625247342120267, "grad_norm": 0.1495011278735686, "learning_rate": 2e-05, "loss": 5.4211, "step": 2423 }, { "epoch": 0.16259181004125164, "grad_norm": 0.15181933473717937, "learning_rate": 2e-05, "loss": 5.5551, "step": 2424 }, { "epoch": 0.16265888587047658, "grad_norm": 0.1489569692650287, "learning_rate": 2e-05, "loss": 5.4797, "step": 2425 }, { "epoch": 0.16272596169970152, "grad_norm": 0.14704931376380545, "learning_rate": 2e-05, "loss": 5.4029, "step": 2426 }, { "epoch": 0.16279303752892646, "grad_norm": 0.15638987284030623, "learning_rate": 2e-05, "loss": 5.3929, "step": 2427 }, { "epoch": 0.1628601133581514, "grad_norm": 0.1415923895528077, "learning_rate": 2e-05, "loss": 5.5635, "step": 2428 }, { "epoch": 0.16292718918737634, "grad_norm": 0.14864587033960683, "learning_rate": 2e-05, "loss": 5.3037, "step": 2429 }, { "epoch": 0.16299426501660128, "grad_norm": 0.1540098858896406, "learning_rate": 2e-05, "loss": 5.3839, "step": 2430 }, { "epoch": 0.1630613408458262, "grad_norm": 0.14910669579702795, "learning_rate": 2e-05, "loss": 5.4609, "step": 2431 }, { "epoch": 0.16312841667505115, "grad_norm": 0.1537364439712487, "learning_rate": 2e-05, "loss": 5.4577, "step": 2432 }, { "epoch": 0.1631954925042761, "grad_norm": 0.14738337576787472, "learning_rate": 2e-05, "loss": 5.3937, "step": 2433 }, { "epoch": 0.16326256833350103, "grad_norm": 0.14695192696232734, "learning_rate": 2e-05, "loss": 5.4412, "step": 2434 }, { "epoch": 0.16332964416272597, "grad_norm": 0.13994657459932627, "learning_rate": 2e-05, "loss": 5.555, "step": 2435 }, { "epoch": 0.1633967199919509, "grad_norm": 0.15573085673056739, "learning_rate": 2e-05, "loss": 5.4626, "step": 2436 }, { "epoch": 0.16346379582117584, "grad_norm": 0.15569600994706778, "learning_rate": 2e-05, "loss": 5.6321, "step": 2437 }, { "epoch": 0.16353087165040078, "grad_norm": 0.14659400525148494, "learning_rate": 2e-05, "loss": 5.5168, "step": 2438 }, { "epoch": 0.16359794747962572, "grad_norm": 0.14710385566080444, "learning_rate": 2e-05, "loss": 5.6336, "step": 2439 }, { "epoch": 0.16366502330885066, "grad_norm": 0.14565712105286344, "learning_rate": 2e-05, "loss": 5.3387, "step": 2440 }, { "epoch": 0.1637320991380756, "grad_norm": 0.14829389956014913, "learning_rate": 2e-05, "loss": 5.456, "step": 2441 }, { "epoch": 0.16379917496730054, "grad_norm": 0.1483173395084134, "learning_rate": 2e-05, "loss": 5.3844, "step": 2442 }, { "epoch": 0.16386625079652548, "grad_norm": 0.14052437686159897, "learning_rate": 2e-05, "loss": 5.4255, "step": 2443 }, { "epoch": 0.16393332662575041, "grad_norm": 0.14391721692042328, "learning_rate": 2e-05, "loss": 5.5697, "step": 2444 }, { "epoch": 0.16400040245497535, "grad_norm": 0.1529104101336224, "learning_rate": 2e-05, "loss": 5.3822, "step": 2445 }, { "epoch": 0.1640674782842003, "grad_norm": 0.14719125296781704, "learning_rate": 2e-05, "loss": 5.415, "step": 2446 }, { "epoch": 0.16413455411342523, "grad_norm": 0.1374033884970539, "learning_rate": 2e-05, "loss": 5.518, "step": 2447 }, { "epoch": 0.16420162994265017, "grad_norm": 0.14612561144026992, "learning_rate": 2e-05, "loss": 5.4392, "step": 2448 }, { "epoch": 0.1642687057718751, "grad_norm": 0.1546508760281814, "learning_rate": 2e-05, "loss": 5.3576, "step": 2449 }, { "epoch": 0.16433578160110004, "grad_norm": 0.14718804437613583, "learning_rate": 2e-05, "loss": 5.3804, "step": 2450 }, { "epoch": 0.16440285743032498, "grad_norm": 0.14864599892015126, "learning_rate": 2e-05, "loss": 5.5916, "step": 2451 }, { "epoch": 0.16446993325954992, "grad_norm": 0.14704502735602804, "learning_rate": 2e-05, "loss": 5.4664, "step": 2452 }, { "epoch": 0.16453700908877486, "grad_norm": 0.14069335525647098, "learning_rate": 2e-05, "loss": 5.4435, "step": 2453 }, { "epoch": 0.1646040849179998, "grad_norm": 0.146616862770691, "learning_rate": 2e-05, "loss": 5.3489, "step": 2454 }, { "epoch": 0.16467116074722474, "grad_norm": 0.14104082784698949, "learning_rate": 2e-05, "loss": 5.43, "step": 2455 }, { "epoch": 0.16473823657644968, "grad_norm": 0.15559415172745028, "learning_rate": 2e-05, "loss": 5.5461, "step": 2456 }, { "epoch": 0.16480531240567461, "grad_norm": 0.14475506512930691, "learning_rate": 2e-05, "loss": 5.5084, "step": 2457 }, { "epoch": 0.16487238823489955, "grad_norm": 0.1395780744887203, "learning_rate": 2e-05, "loss": 5.4207, "step": 2458 }, { "epoch": 0.1649394640641245, "grad_norm": 0.1444394343684067, "learning_rate": 2e-05, "loss": 5.5522, "step": 2459 }, { "epoch": 0.16500653989334943, "grad_norm": 0.15230689604385875, "learning_rate": 2e-05, "loss": 5.5231, "step": 2460 }, { "epoch": 0.16507361572257437, "grad_norm": 0.14203627698267512, "learning_rate": 2e-05, "loss": 5.4083, "step": 2461 }, { "epoch": 0.1651406915517993, "grad_norm": 0.14622774936503205, "learning_rate": 2e-05, "loss": 5.5527, "step": 2462 }, { "epoch": 0.16520776738102425, "grad_norm": 0.13926649462720872, "learning_rate": 2e-05, "loss": 5.3688, "step": 2463 }, { "epoch": 0.16527484321024918, "grad_norm": 0.1468571309121751, "learning_rate": 2e-05, "loss": 5.5108, "step": 2464 }, { "epoch": 0.16534191903947412, "grad_norm": 0.1472747397156153, "learning_rate": 2e-05, "loss": 5.5592, "step": 2465 }, { "epoch": 0.16540899486869906, "grad_norm": 0.1479018245579689, "learning_rate": 2e-05, "loss": 5.4527, "step": 2466 }, { "epoch": 0.165476070697924, "grad_norm": 0.1435352654371062, "learning_rate": 2e-05, "loss": 5.4107, "step": 2467 }, { "epoch": 0.16554314652714894, "grad_norm": 0.14711805060894925, "learning_rate": 2e-05, "loss": 5.4536, "step": 2468 }, { "epoch": 0.16561022235637388, "grad_norm": 0.14226674667382594, "learning_rate": 2e-05, "loss": 5.4629, "step": 2469 }, { "epoch": 0.16567729818559881, "grad_norm": 0.14123014462152864, "learning_rate": 2e-05, "loss": 5.4486, "step": 2470 }, { "epoch": 0.16574437401482375, "grad_norm": 0.14037503317714725, "learning_rate": 2e-05, "loss": 5.5148, "step": 2471 }, { "epoch": 0.1658114498440487, "grad_norm": 0.14691746862422148, "learning_rate": 2e-05, "loss": 5.4775, "step": 2472 }, { "epoch": 0.16587852567327363, "grad_norm": 0.14011376358581162, "learning_rate": 2e-05, "loss": 5.5388, "step": 2473 }, { "epoch": 0.16594560150249857, "grad_norm": 0.14474927034015614, "learning_rate": 2e-05, "loss": 5.5722, "step": 2474 }, { "epoch": 0.1660126773317235, "grad_norm": 0.14983012045777636, "learning_rate": 2e-05, "loss": 5.4251, "step": 2475 }, { "epoch": 0.16607975316094845, "grad_norm": 0.14596881905416767, "learning_rate": 2e-05, "loss": 5.51, "step": 2476 }, { "epoch": 0.16614682899017338, "grad_norm": 0.1423328884456758, "learning_rate": 2e-05, "loss": 5.523, "step": 2477 }, { "epoch": 0.16621390481939832, "grad_norm": 0.14970430880858662, "learning_rate": 2e-05, "loss": 5.3283, "step": 2478 }, { "epoch": 0.16628098064862326, "grad_norm": 0.14071292896562543, "learning_rate": 2e-05, "loss": 5.4286, "step": 2479 }, { "epoch": 0.1663480564778482, "grad_norm": 0.1403332150501888, "learning_rate": 2e-05, "loss": 5.5011, "step": 2480 }, { "epoch": 0.16641513230707314, "grad_norm": 0.1488165086606482, "learning_rate": 2e-05, "loss": 5.529, "step": 2481 }, { "epoch": 0.16648220813629808, "grad_norm": 0.14863697784443577, "learning_rate": 2e-05, "loss": 5.3631, "step": 2482 }, { "epoch": 0.16654928396552302, "grad_norm": 0.1582226178701242, "learning_rate": 2e-05, "loss": 5.5396, "step": 2483 }, { "epoch": 0.16661635979474795, "grad_norm": 0.146555542941495, "learning_rate": 2e-05, "loss": 5.5681, "step": 2484 }, { "epoch": 0.1666834356239729, "grad_norm": 0.143759175618965, "learning_rate": 2e-05, "loss": 5.4657, "step": 2485 }, { "epoch": 0.16675051145319783, "grad_norm": 0.14591120245358957, "learning_rate": 2e-05, "loss": 5.3344, "step": 2486 }, { "epoch": 0.16681758728242277, "grad_norm": 0.1443768288311367, "learning_rate": 2e-05, "loss": 5.5123, "step": 2487 }, { "epoch": 0.1668846631116477, "grad_norm": 0.1517836606210499, "learning_rate": 2e-05, "loss": 5.4953, "step": 2488 }, { "epoch": 0.16695173894087265, "grad_norm": 0.15018255158157978, "learning_rate": 2e-05, "loss": 5.3909, "step": 2489 }, { "epoch": 0.16701881477009758, "grad_norm": 0.15408063460111734, "learning_rate": 2e-05, "loss": 5.4457, "step": 2490 }, { "epoch": 0.16708589059932252, "grad_norm": 0.15581964816728078, "learning_rate": 2e-05, "loss": 5.3865, "step": 2491 }, { "epoch": 0.16715296642854746, "grad_norm": 0.14446025666215612, "learning_rate": 2e-05, "loss": 5.5154, "step": 2492 }, { "epoch": 0.1672200422577724, "grad_norm": 0.15337181690999274, "learning_rate": 2e-05, "loss": 5.4878, "step": 2493 }, { "epoch": 0.16728711808699734, "grad_norm": 0.15383406617498016, "learning_rate": 2e-05, "loss": 5.4583, "step": 2494 }, { "epoch": 0.16735419391622228, "grad_norm": 0.14502013863342825, "learning_rate": 2e-05, "loss": 5.2352, "step": 2495 }, { "epoch": 0.16742126974544722, "grad_norm": 0.15086844840347172, "learning_rate": 2e-05, "loss": 5.3865, "step": 2496 }, { "epoch": 0.16748834557467215, "grad_norm": 0.15129741960051374, "learning_rate": 2e-05, "loss": 5.6155, "step": 2497 }, { "epoch": 0.1675554214038971, "grad_norm": 0.14769741057473393, "learning_rate": 2e-05, "loss": 5.3708, "step": 2498 }, { "epoch": 0.16762249723312203, "grad_norm": 0.1428227216032321, "learning_rate": 2e-05, "loss": 5.5101, "step": 2499 }, { "epoch": 0.167689573062347, "grad_norm": 0.153125230388444, "learning_rate": 2e-05, "loss": 5.3704, "step": 2500 }, { "epoch": 0.16775664889157194, "grad_norm": 0.14334216860909743, "learning_rate": 2e-05, "loss": 5.4267, "step": 2501 }, { "epoch": 0.16782372472079687, "grad_norm": 0.14051468999010686, "learning_rate": 2e-05, "loss": 5.505, "step": 2502 }, { "epoch": 0.1678908005500218, "grad_norm": 0.1463822354280018, "learning_rate": 2e-05, "loss": 5.5593, "step": 2503 }, { "epoch": 0.16795787637924675, "grad_norm": 0.15397087365010878, "learning_rate": 2e-05, "loss": 5.4199, "step": 2504 }, { "epoch": 0.1680249522084717, "grad_norm": 0.1503609189562958, "learning_rate": 2e-05, "loss": 5.489, "step": 2505 }, { "epoch": 0.16809202803769663, "grad_norm": 0.15221787536572576, "learning_rate": 2e-05, "loss": 5.4132, "step": 2506 }, { "epoch": 0.16815910386692157, "grad_norm": 0.14121505644195256, "learning_rate": 2e-05, "loss": 5.5514, "step": 2507 }, { "epoch": 0.1682261796961465, "grad_norm": 0.14247784496313234, "learning_rate": 2e-05, "loss": 5.3842, "step": 2508 }, { "epoch": 0.16829325552537144, "grad_norm": 0.13904329871335092, "learning_rate": 2e-05, "loss": 5.5479, "step": 2509 }, { "epoch": 0.16836033135459638, "grad_norm": 0.14258450757056296, "learning_rate": 2e-05, "loss": 5.5255, "step": 2510 }, { "epoch": 0.16842740718382132, "grad_norm": 0.14763348743247004, "learning_rate": 2e-05, "loss": 5.5388, "step": 2511 }, { "epoch": 0.16849448301304626, "grad_norm": 0.14246726380123792, "learning_rate": 2e-05, "loss": 5.4087, "step": 2512 }, { "epoch": 0.1685615588422712, "grad_norm": 0.14549953143961375, "learning_rate": 2e-05, "loss": 5.5256, "step": 2513 }, { "epoch": 0.16862863467149614, "grad_norm": 0.15488517306874744, "learning_rate": 2e-05, "loss": 5.4583, "step": 2514 }, { "epoch": 0.16869571050072107, "grad_norm": 0.14661708893609032, "learning_rate": 2e-05, "loss": 5.5287, "step": 2515 }, { "epoch": 0.168762786329946, "grad_norm": 0.14498820294071055, "learning_rate": 2e-05, "loss": 5.4559, "step": 2516 }, { "epoch": 0.16882986215917095, "grad_norm": 0.15534088412557862, "learning_rate": 2e-05, "loss": 5.4762, "step": 2517 }, { "epoch": 0.1688969379883959, "grad_norm": 0.14469022445763455, "learning_rate": 2e-05, "loss": 5.44, "step": 2518 }, { "epoch": 0.16896401381762083, "grad_norm": 0.1491309692918812, "learning_rate": 2e-05, "loss": 5.3983, "step": 2519 }, { "epoch": 0.16903108964684577, "grad_norm": 0.14412939240514347, "learning_rate": 2e-05, "loss": 5.3184, "step": 2520 }, { "epoch": 0.1690981654760707, "grad_norm": 0.14863584431136675, "learning_rate": 2e-05, "loss": 5.4974, "step": 2521 }, { "epoch": 0.16916524130529564, "grad_norm": 0.1457113867389181, "learning_rate": 2e-05, "loss": 5.4469, "step": 2522 }, { "epoch": 0.16923231713452058, "grad_norm": 0.14586032189747342, "learning_rate": 2e-05, "loss": 5.4538, "step": 2523 }, { "epoch": 0.16929939296374552, "grad_norm": 0.14436916075773082, "learning_rate": 2e-05, "loss": 5.3812, "step": 2524 }, { "epoch": 0.16936646879297046, "grad_norm": 0.1438460089824862, "learning_rate": 2e-05, "loss": 5.5459, "step": 2525 }, { "epoch": 0.1694335446221954, "grad_norm": 0.14858133505452512, "learning_rate": 2e-05, "loss": 5.2766, "step": 2526 }, { "epoch": 0.16950062045142034, "grad_norm": 0.14327387565957866, "learning_rate": 2e-05, "loss": 5.3842, "step": 2527 }, { "epoch": 0.16956769628064527, "grad_norm": 0.13766648705931764, "learning_rate": 2e-05, "loss": 5.495, "step": 2528 }, { "epoch": 0.1696347721098702, "grad_norm": 0.14375311309086336, "learning_rate": 2e-05, "loss": 5.4773, "step": 2529 }, { "epoch": 0.16970184793909515, "grad_norm": 0.15714557625651787, "learning_rate": 2e-05, "loss": 5.4038, "step": 2530 }, { "epoch": 0.1697689237683201, "grad_norm": 0.14878397673859023, "learning_rate": 2e-05, "loss": 5.4108, "step": 2531 }, { "epoch": 0.16983599959754503, "grad_norm": 0.14568128767722408, "learning_rate": 2e-05, "loss": 5.531, "step": 2532 }, { "epoch": 0.16990307542676997, "grad_norm": 0.14998663230601098, "learning_rate": 2e-05, "loss": 5.5074, "step": 2533 }, { "epoch": 0.1699701512559949, "grad_norm": 0.1532521644342136, "learning_rate": 2e-05, "loss": 5.603, "step": 2534 }, { "epoch": 0.17003722708521984, "grad_norm": 0.15190230926023732, "learning_rate": 2e-05, "loss": 5.4622, "step": 2535 }, { "epoch": 0.17010430291444478, "grad_norm": 0.14891375046742839, "learning_rate": 2e-05, "loss": 5.446, "step": 2536 }, { "epoch": 0.17017137874366972, "grad_norm": 0.14116277914526928, "learning_rate": 2e-05, "loss": 5.4213, "step": 2537 }, { "epoch": 0.17023845457289466, "grad_norm": 0.1597181887843393, "learning_rate": 2e-05, "loss": 5.5109, "step": 2538 }, { "epoch": 0.1703055304021196, "grad_norm": 0.14692313895599957, "learning_rate": 2e-05, "loss": 5.626, "step": 2539 }, { "epoch": 0.17037260623134454, "grad_norm": 0.13903869536577884, "learning_rate": 2e-05, "loss": 5.4246, "step": 2540 }, { "epoch": 0.17043968206056948, "grad_norm": 0.14981260472766256, "learning_rate": 2e-05, "loss": 5.2405, "step": 2541 }, { "epoch": 0.1705067578897944, "grad_norm": 0.15502904623142638, "learning_rate": 2e-05, "loss": 5.4533, "step": 2542 }, { "epoch": 0.17057383371901935, "grad_norm": 0.14405677478882048, "learning_rate": 2e-05, "loss": 5.3952, "step": 2543 }, { "epoch": 0.1706409095482443, "grad_norm": 0.14452201132290635, "learning_rate": 2e-05, "loss": 5.4525, "step": 2544 }, { "epoch": 0.17070798537746923, "grad_norm": 0.15316940108836102, "learning_rate": 2e-05, "loss": 5.4131, "step": 2545 }, { "epoch": 0.17077506120669417, "grad_norm": 0.14904761356248086, "learning_rate": 2e-05, "loss": 5.5189, "step": 2546 }, { "epoch": 0.1708421370359191, "grad_norm": 0.14516760933144324, "learning_rate": 2e-05, "loss": 5.4998, "step": 2547 }, { "epoch": 0.17090921286514404, "grad_norm": 0.15616887055689624, "learning_rate": 2e-05, "loss": 5.4678, "step": 2548 }, { "epoch": 0.17097628869436898, "grad_norm": 0.15939941201040944, "learning_rate": 2e-05, "loss": 5.2871, "step": 2549 }, { "epoch": 0.17104336452359392, "grad_norm": 0.13941306936917536, "learning_rate": 2e-05, "loss": 5.4881, "step": 2550 }, { "epoch": 0.17111044035281886, "grad_norm": 0.14942280679639994, "learning_rate": 2e-05, "loss": 5.6343, "step": 2551 }, { "epoch": 0.1711775161820438, "grad_norm": 0.14657035317538097, "learning_rate": 2e-05, "loss": 5.338, "step": 2552 }, { "epoch": 0.17124459201126874, "grad_norm": 0.14399463887162492, "learning_rate": 2e-05, "loss": 5.4352, "step": 2553 }, { "epoch": 0.17131166784049368, "grad_norm": 0.14313600762184503, "learning_rate": 2e-05, "loss": 5.3833, "step": 2554 }, { "epoch": 0.17137874366971861, "grad_norm": 0.13820762699174657, "learning_rate": 2e-05, "loss": 5.4736, "step": 2555 }, { "epoch": 0.17144581949894355, "grad_norm": 0.14277506653599614, "learning_rate": 2e-05, "loss": 5.5234, "step": 2556 }, { "epoch": 0.1715128953281685, "grad_norm": 0.13722279723212297, "learning_rate": 2e-05, "loss": 5.353, "step": 2557 }, { "epoch": 0.17157997115739343, "grad_norm": 0.14309688166331935, "learning_rate": 2e-05, "loss": 5.5954, "step": 2558 }, { "epoch": 0.17164704698661837, "grad_norm": 0.1431883071968044, "learning_rate": 2e-05, "loss": 5.3412, "step": 2559 }, { "epoch": 0.1717141228158433, "grad_norm": 0.1385769118567593, "learning_rate": 2e-05, "loss": 5.4145, "step": 2560 }, { "epoch": 0.17178119864506824, "grad_norm": 0.13683091479682571, "learning_rate": 2e-05, "loss": 5.3548, "step": 2561 }, { "epoch": 0.17184827447429318, "grad_norm": 0.1507851029788443, "learning_rate": 2e-05, "loss": 5.5468, "step": 2562 }, { "epoch": 0.17191535030351812, "grad_norm": 0.149524309959163, "learning_rate": 2e-05, "loss": 5.3498, "step": 2563 }, { "epoch": 0.17198242613274306, "grad_norm": 0.14025023771948, "learning_rate": 2e-05, "loss": 5.4778, "step": 2564 }, { "epoch": 0.172049501961968, "grad_norm": 0.14977864327059184, "learning_rate": 2e-05, "loss": 5.3942, "step": 2565 }, { "epoch": 0.17211657779119294, "grad_norm": 0.14670199837453518, "learning_rate": 2e-05, "loss": 5.5356, "step": 2566 }, { "epoch": 0.17218365362041788, "grad_norm": 0.14096757608258875, "learning_rate": 2e-05, "loss": 5.3267, "step": 2567 }, { "epoch": 0.17225072944964281, "grad_norm": 0.13820350684768012, "learning_rate": 2e-05, "loss": 5.5, "step": 2568 }, { "epoch": 0.17231780527886775, "grad_norm": 0.14178975332184338, "learning_rate": 2e-05, "loss": 5.3783, "step": 2569 }, { "epoch": 0.1723848811080927, "grad_norm": 0.14966146070522898, "learning_rate": 2e-05, "loss": 5.4972, "step": 2570 }, { "epoch": 0.17245195693731763, "grad_norm": 0.1447915902058335, "learning_rate": 2e-05, "loss": 5.4362, "step": 2571 }, { "epoch": 0.17251903276654257, "grad_norm": 0.14400967710915882, "learning_rate": 2e-05, "loss": 5.4193, "step": 2572 }, { "epoch": 0.1725861085957675, "grad_norm": 0.14221123064682598, "learning_rate": 2e-05, "loss": 5.5736, "step": 2573 }, { "epoch": 0.17265318442499245, "grad_norm": 0.14181898824140626, "learning_rate": 2e-05, "loss": 5.5926, "step": 2574 }, { "epoch": 0.17272026025421738, "grad_norm": 0.1379653535792752, "learning_rate": 2e-05, "loss": 5.4791, "step": 2575 }, { "epoch": 0.17278733608344232, "grad_norm": 0.14432015873467263, "learning_rate": 2e-05, "loss": 5.3382, "step": 2576 }, { "epoch": 0.17285441191266726, "grad_norm": 0.14133352979716302, "learning_rate": 2e-05, "loss": 5.5156, "step": 2577 }, { "epoch": 0.1729214877418922, "grad_norm": 0.1406533973839638, "learning_rate": 2e-05, "loss": 5.3921, "step": 2578 }, { "epoch": 0.17298856357111714, "grad_norm": 0.14107192407303223, "learning_rate": 2e-05, "loss": 5.5486, "step": 2579 }, { "epoch": 0.17305563940034208, "grad_norm": 0.1454510521294858, "learning_rate": 2e-05, "loss": 5.5314, "step": 2580 }, { "epoch": 0.17312271522956701, "grad_norm": 0.14265554324679652, "learning_rate": 2e-05, "loss": 5.3908, "step": 2581 }, { "epoch": 0.17318979105879195, "grad_norm": 0.1498309314122728, "learning_rate": 2e-05, "loss": 5.5536, "step": 2582 }, { "epoch": 0.1732568668880169, "grad_norm": 0.13908207804614398, "learning_rate": 2e-05, "loss": 5.4089, "step": 2583 }, { "epoch": 0.17332394271724183, "grad_norm": 0.14382631155236927, "learning_rate": 2e-05, "loss": 5.5428, "step": 2584 }, { "epoch": 0.17339101854646677, "grad_norm": 0.15081781863531626, "learning_rate": 2e-05, "loss": 5.4121, "step": 2585 }, { "epoch": 0.1734580943756917, "grad_norm": 0.14714714347337973, "learning_rate": 2e-05, "loss": 5.5223, "step": 2586 }, { "epoch": 0.17352517020491665, "grad_norm": 0.14297866772501977, "learning_rate": 2e-05, "loss": 5.4195, "step": 2587 }, { "epoch": 0.17359224603414158, "grad_norm": 0.1457451048375711, "learning_rate": 2e-05, "loss": 5.5022, "step": 2588 }, { "epoch": 0.17365932186336652, "grad_norm": 0.14653831179435475, "learning_rate": 2e-05, "loss": 5.4827, "step": 2589 }, { "epoch": 0.17372639769259146, "grad_norm": 0.14882892534755696, "learning_rate": 2e-05, "loss": 5.4405, "step": 2590 }, { "epoch": 0.17379347352181643, "grad_norm": 0.14080708104937065, "learning_rate": 2e-05, "loss": 5.3888, "step": 2591 }, { "epoch": 0.17386054935104137, "grad_norm": 0.14368760097482722, "learning_rate": 2e-05, "loss": 5.406, "step": 2592 }, { "epoch": 0.1739276251802663, "grad_norm": 0.14700209159404232, "learning_rate": 2e-05, "loss": 5.5815, "step": 2593 }, { "epoch": 0.17399470100949124, "grad_norm": 0.14441299524057935, "learning_rate": 2e-05, "loss": 5.531, "step": 2594 }, { "epoch": 0.17406177683871618, "grad_norm": 0.14195813061211418, "learning_rate": 2e-05, "loss": 5.42, "step": 2595 }, { "epoch": 0.17412885266794112, "grad_norm": 0.1385742418863064, "learning_rate": 2e-05, "loss": 5.4735, "step": 2596 }, { "epoch": 0.17419592849716606, "grad_norm": 0.13999079413236934, "learning_rate": 2e-05, "loss": 5.5407, "step": 2597 }, { "epoch": 0.174263004326391, "grad_norm": 0.1381939618223655, "learning_rate": 2e-05, "loss": 5.2716, "step": 2598 }, { "epoch": 0.17433008015561594, "grad_norm": 0.1435419586298622, "learning_rate": 2e-05, "loss": 5.4596, "step": 2599 }, { "epoch": 0.17439715598484087, "grad_norm": 0.14055597295911545, "learning_rate": 2e-05, "loss": 5.4539, "step": 2600 }, { "epoch": 0.1744642318140658, "grad_norm": 0.14763784106228464, "learning_rate": 2e-05, "loss": 5.5546, "step": 2601 }, { "epoch": 0.17453130764329075, "grad_norm": 0.14415645761204046, "learning_rate": 2e-05, "loss": 5.4906, "step": 2602 }, { "epoch": 0.1745983834725157, "grad_norm": 0.14195762727198927, "learning_rate": 2e-05, "loss": 5.5428, "step": 2603 }, { "epoch": 0.17466545930174063, "grad_norm": 0.1450020273484119, "learning_rate": 2e-05, "loss": 5.5559, "step": 2604 }, { "epoch": 0.17473253513096557, "grad_norm": 0.15141374075155653, "learning_rate": 2e-05, "loss": 5.4512, "step": 2605 }, { "epoch": 0.1747996109601905, "grad_norm": 0.14230648253803874, "learning_rate": 2e-05, "loss": 5.3241, "step": 2606 }, { "epoch": 0.17486668678941544, "grad_norm": 0.14864009509934972, "learning_rate": 2e-05, "loss": 5.3433, "step": 2607 }, { "epoch": 0.17493376261864038, "grad_norm": 0.1550631145366756, "learning_rate": 2e-05, "loss": 5.5168, "step": 2608 }, { "epoch": 0.17500083844786532, "grad_norm": 0.1438838288894027, "learning_rate": 2e-05, "loss": 5.5118, "step": 2609 }, { "epoch": 0.17506791427709026, "grad_norm": 0.14032889457994752, "learning_rate": 2e-05, "loss": 5.3631, "step": 2610 }, { "epoch": 0.1751349901063152, "grad_norm": 0.15628384122754785, "learning_rate": 2e-05, "loss": 5.5051, "step": 2611 }, { "epoch": 0.17520206593554014, "grad_norm": 0.15177421551967998, "learning_rate": 2e-05, "loss": 5.3739, "step": 2612 }, { "epoch": 0.17526914176476507, "grad_norm": 0.148069587949307, "learning_rate": 2e-05, "loss": 5.4031, "step": 2613 }, { "epoch": 0.17533621759399, "grad_norm": 0.1483591440474578, "learning_rate": 2e-05, "loss": 5.336, "step": 2614 }, { "epoch": 0.17540329342321495, "grad_norm": 0.14660471756854285, "learning_rate": 2e-05, "loss": 5.4489, "step": 2615 }, { "epoch": 0.1754703692524399, "grad_norm": 0.1463103057879146, "learning_rate": 2e-05, "loss": 5.3364, "step": 2616 }, { "epoch": 0.17553744508166483, "grad_norm": 0.1375757834191774, "learning_rate": 2e-05, "loss": 5.4093, "step": 2617 }, { "epoch": 0.17560452091088977, "grad_norm": 0.14765199128969533, "learning_rate": 2e-05, "loss": 5.4037, "step": 2618 }, { "epoch": 0.1756715967401147, "grad_norm": 0.14717473347746984, "learning_rate": 2e-05, "loss": 5.4089, "step": 2619 }, { "epoch": 0.17573867256933964, "grad_norm": 0.14380617346966226, "learning_rate": 2e-05, "loss": 5.4359, "step": 2620 }, { "epoch": 0.17580574839856458, "grad_norm": 0.1421633363916329, "learning_rate": 2e-05, "loss": 5.5549, "step": 2621 }, { "epoch": 0.17587282422778952, "grad_norm": 0.1442571869768685, "learning_rate": 2e-05, "loss": 5.3888, "step": 2622 }, { "epoch": 0.17593990005701446, "grad_norm": 0.14656364103136127, "learning_rate": 2e-05, "loss": 5.465, "step": 2623 }, { "epoch": 0.1760069758862394, "grad_norm": 0.1456528183909178, "learning_rate": 2e-05, "loss": 5.4578, "step": 2624 }, { "epoch": 0.17607405171546434, "grad_norm": 0.15195099716392566, "learning_rate": 2e-05, "loss": 5.5, "step": 2625 }, { "epoch": 0.17614112754468927, "grad_norm": 0.15006684856112534, "learning_rate": 2e-05, "loss": 5.4918, "step": 2626 }, { "epoch": 0.1762082033739142, "grad_norm": 0.1431274176488639, "learning_rate": 2e-05, "loss": 5.428, "step": 2627 }, { "epoch": 0.17627527920313915, "grad_norm": 0.14942317917910963, "learning_rate": 2e-05, "loss": 5.5498, "step": 2628 }, { "epoch": 0.1763423550323641, "grad_norm": 0.15221842826883383, "learning_rate": 2e-05, "loss": 5.3539, "step": 2629 }, { "epoch": 0.17640943086158903, "grad_norm": 0.14446214704974156, "learning_rate": 2e-05, "loss": 5.5342, "step": 2630 }, { "epoch": 0.17647650669081397, "grad_norm": 0.1414175728282391, "learning_rate": 2e-05, "loss": 5.597, "step": 2631 }, { "epoch": 0.1765435825200389, "grad_norm": 0.14161112375737342, "learning_rate": 2e-05, "loss": 5.4085, "step": 2632 }, { "epoch": 0.17661065834926384, "grad_norm": 0.15247717739353916, "learning_rate": 2e-05, "loss": 5.539, "step": 2633 }, { "epoch": 0.17667773417848878, "grad_norm": 0.14753757360803704, "learning_rate": 2e-05, "loss": 5.5711, "step": 2634 }, { "epoch": 0.17674481000771372, "grad_norm": 0.14763146888509768, "learning_rate": 2e-05, "loss": 5.397, "step": 2635 }, { "epoch": 0.17681188583693866, "grad_norm": 0.14798587618270526, "learning_rate": 2e-05, "loss": 5.4314, "step": 2636 }, { "epoch": 0.1768789616661636, "grad_norm": 0.1425484358944931, "learning_rate": 2e-05, "loss": 5.2556, "step": 2637 }, { "epoch": 0.17694603749538854, "grad_norm": 0.14269894422658494, "learning_rate": 2e-05, "loss": 5.3461, "step": 2638 }, { "epoch": 0.17701311332461347, "grad_norm": 0.14166497812989312, "learning_rate": 2e-05, "loss": 5.3771, "step": 2639 }, { "epoch": 0.1770801891538384, "grad_norm": 0.1391571415376557, "learning_rate": 2e-05, "loss": 5.4219, "step": 2640 }, { "epoch": 0.17714726498306335, "grad_norm": 0.14746512084361776, "learning_rate": 2e-05, "loss": 5.4347, "step": 2641 }, { "epoch": 0.1772143408122883, "grad_norm": 0.14116341505120372, "learning_rate": 2e-05, "loss": 5.457, "step": 2642 }, { "epoch": 0.17728141664151323, "grad_norm": 0.1384718884831778, "learning_rate": 2e-05, "loss": 5.5101, "step": 2643 }, { "epoch": 0.17734849247073817, "grad_norm": 0.1452093019498892, "learning_rate": 2e-05, "loss": 5.5889, "step": 2644 }, { "epoch": 0.1774155682999631, "grad_norm": 0.14323930671024596, "learning_rate": 2e-05, "loss": 5.4952, "step": 2645 }, { "epoch": 0.17748264412918804, "grad_norm": 0.1434597925335034, "learning_rate": 2e-05, "loss": 5.5873, "step": 2646 }, { "epoch": 0.17754971995841298, "grad_norm": 0.1472814790240145, "learning_rate": 2e-05, "loss": 5.4212, "step": 2647 }, { "epoch": 0.17761679578763792, "grad_norm": 0.14723157954616875, "learning_rate": 2e-05, "loss": 5.4142, "step": 2648 }, { "epoch": 0.17768387161686286, "grad_norm": 0.1424632648440163, "learning_rate": 2e-05, "loss": 5.6168, "step": 2649 }, { "epoch": 0.1777509474460878, "grad_norm": 0.1478986383009081, "learning_rate": 2e-05, "loss": 5.5999, "step": 2650 }, { "epoch": 0.17781802327531274, "grad_norm": 0.13960371808328045, "learning_rate": 2e-05, "loss": 5.4329, "step": 2651 }, { "epoch": 0.17788509910453768, "grad_norm": 0.1525053275354145, "learning_rate": 2e-05, "loss": 5.3542, "step": 2652 }, { "epoch": 0.1779521749337626, "grad_norm": 0.13802781553167734, "learning_rate": 2e-05, "loss": 5.5051, "step": 2653 }, { "epoch": 0.17801925076298755, "grad_norm": 0.14738663285153394, "learning_rate": 2e-05, "loss": 5.4788, "step": 2654 }, { "epoch": 0.1780863265922125, "grad_norm": 0.1479337981843923, "learning_rate": 2e-05, "loss": 5.3711, "step": 2655 }, { "epoch": 0.17815340242143743, "grad_norm": 0.15134046369634582, "learning_rate": 2e-05, "loss": 5.3958, "step": 2656 }, { "epoch": 0.17822047825066237, "grad_norm": 0.13970549209905672, "learning_rate": 2e-05, "loss": 5.6274, "step": 2657 }, { "epoch": 0.1782875540798873, "grad_norm": 0.14311049887555619, "learning_rate": 2e-05, "loss": 5.4381, "step": 2658 }, { "epoch": 0.17835462990911224, "grad_norm": 0.15183222374207075, "learning_rate": 2e-05, "loss": 5.5167, "step": 2659 }, { "epoch": 0.17842170573833718, "grad_norm": 0.15369356492098218, "learning_rate": 2e-05, "loss": 5.4665, "step": 2660 }, { "epoch": 0.17848878156756212, "grad_norm": 0.15517384157640332, "learning_rate": 2e-05, "loss": 5.3945, "step": 2661 }, { "epoch": 0.17855585739678706, "grad_norm": 0.1432847080804353, "learning_rate": 2e-05, "loss": 5.57, "step": 2662 }, { "epoch": 0.178622933226012, "grad_norm": 0.14467138511265848, "learning_rate": 2e-05, "loss": 5.5256, "step": 2663 }, { "epoch": 0.17869000905523694, "grad_norm": 0.14325072474701503, "learning_rate": 2e-05, "loss": 5.4998, "step": 2664 }, { "epoch": 0.17875708488446188, "grad_norm": 0.14083759093790876, "learning_rate": 2e-05, "loss": 5.512, "step": 2665 }, { "epoch": 0.1788241607136868, "grad_norm": 0.1418019764393983, "learning_rate": 2e-05, "loss": 5.4711, "step": 2666 }, { "epoch": 0.17889123654291175, "grad_norm": 0.1465686497629299, "learning_rate": 2e-05, "loss": 5.5252, "step": 2667 }, { "epoch": 0.1789583123721367, "grad_norm": 0.14881814625406883, "learning_rate": 2e-05, "loss": 5.4585, "step": 2668 }, { "epoch": 0.17902538820136163, "grad_norm": 0.14785337629111087, "learning_rate": 2e-05, "loss": 5.5233, "step": 2669 }, { "epoch": 0.17909246403058657, "grad_norm": 0.14261456501281292, "learning_rate": 2e-05, "loss": 5.4047, "step": 2670 }, { "epoch": 0.1791595398598115, "grad_norm": 0.14375018882959187, "learning_rate": 2e-05, "loss": 5.3515, "step": 2671 }, { "epoch": 0.17922661568903644, "grad_norm": 0.14038132868925005, "learning_rate": 2e-05, "loss": 5.4422, "step": 2672 }, { "epoch": 0.17929369151826138, "grad_norm": 0.13987256477233506, "learning_rate": 2e-05, "loss": 5.382, "step": 2673 }, { "epoch": 0.17936076734748632, "grad_norm": 0.14011649115468783, "learning_rate": 2e-05, "loss": 5.6314, "step": 2674 }, { "epoch": 0.17942784317671126, "grad_norm": 0.14884877473985555, "learning_rate": 2e-05, "loss": 5.4311, "step": 2675 }, { "epoch": 0.1794949190059362, "grad_norm": 0.14546404752440997, "learning_rate": 2e-05, "loss": 5.4192, "step": 2676 }, { "epoch": 0.17956199483516114, "grad_norm": 0.13967347764260432, "learning_rate": 2e-05, "loss": 5.5249, "step": 2677 }, { "epoch": 0.17962907066438608, "grad_norm": 0.14521520915931624, "learning_rate": 2e-05, "loss": 5.5246, "step": 2678 }, { "epoch": 0.17969614649361101, "grad_norm": 0.1447311625517521, "learning_rate": 2e-05, "loss": 5.5465, "step": 2679 }, { "epoch": 0.17976322232283595, "grad_norm": 0.14143206780291115, "learning_rate": 2e-05, "loss": 5.3928, "step": 2680 }, { "epoch": 0.1798302981520609, "grad_norm": 0.14334486130070173, "learning_rate": 2e-05, "loss": 5.3819, "step": 2681 }, { "epoch": 0.17989737398128586, "grad_norm": 0.15291732014122578, "learning_rate": 2e-05, "loss": 5.5048, "step": 2682 }, { "epoch": 0.1799644498105108, "grad_norm": 0.14466943312902764, "learning_rate": 2e-05, "loss": 5.5716, "step": 2683 }, { "epoch": 0.18003152563973573, "grad_norm": 0.14620871349027964, "learning_rate": 2e-05, "loss": 5.4146, "step": 2684 }, { "epoch": 0.18009860146896067, "grad_norm": 0.1451252652445664, "learning_rate": 2e-05, "loss": 5.4678, "step": 2685 }, { "epoch": 0.1801656772981856, "grad_norm": 0.14460873433451146, "learning_rate": 2e-05, "loss": 5.5468, "step": 2686 }, { "epoch": 0.18023275312741055, "grad_norm": 0.13963124286347603, "learning_rate": 2e-05, "loss": 5.4634, "step": 2687 }, { "epoch": 0.1802998289566355, "grad_norm": 0.1420901321803039, "learning_rate": 2e-05, "loss": 5.3788, "step": 2688 }, { "epoch": 0.18036690478586043, "grad_norm": 0.14343231481174518, "learning_rate": 2e-05, "loss": 5.4295, "step": 2689 }, { "epoch": 0.18043398061508537, "grad_norm": 0.13709606755888748, "learning_rate": 2e-05, "loss": 5.5792, "step": 2690 }, { "epoch": 0.1805010564443103, "grad_norm": 0.1468829691034455, "learning_rate": 2e-05, "loss": 5.4253, "step": 2691 }, { "epoch": 0.18056813227353524, "grad_norm": 0.14048648408284678, "learning_rate": 2e-05, "loss": 5.4853, "step": 2692 }, { "epoch": 0.18063520810276018, "grad_norm": 0.14213439859482596, "learning_rate": 2e-05, "loss": 5.4991, "step": 2693 }, { "epoch": 0.18070228393198512, "grad_norm": 0.1479588447526294, "learning_rate": 2e-05, "loss": 5.4792, "step": 2694 }, { "epoch": 0.18076935976121006, "grad_norm": 0.1446408909061203, "learning_rate": 2e-05, "loss": 5.6064, "step": 2695 }, { "epoch": 0.180836435590435, "grad_norm": 0.14184480954709416, "learning_rate": 2e-05, "loss": 5.481, "step": 2696 }, { "epoch": 0.18090351141965993, "grad_norm": 0.14481317391697998, "learning_rate": 2e-05, "loss": 5.5432, "step": 2697 }, { "epoch": 0.18097058724888487, "grad_norm": 0.14829224119551393, "learning_rate": 2e-05, "loss": 5.5077, "step": 2698 }, { "epoch": 0.1810376630781098, "grad_norm": 0.13742211598567433, "learning_rate": 2e-05, "loss": 5.3533, "step": 2699 }, { "epoch": 0.18110473890733475, "grad_norm": 0.13895532949840653, "learning_rate": 2e-05, "loss": 5.455, "step": 2700 }, { "epoch": 0.1811718147365597, "grad_norm": 0.14259474766112434, "learning_rate": 2e-05, "loss": 5.4728, "step": 2701 }, { "epoch": 0.18123889056578463, "grad_norm": 0.14779709666127674, "learning_rate": 2e-05, "loss": 5.4634, "step": 2702 }, { "epoch": 0.18130596639500957, "grad_norm": 0.1373766791159465, "learning_rate": 2e-05, "loss": 5.2521, "step": 2703 }, { "epoch": 0.1813730422242345, "grad_norm": 0.14163590753237487, "learning_rate": 2e-05, "loss": 5.4684, "step": 2704 }, { "epoch": 0.18144011805345944, "grad_norm": 0.14626805488372077, "learning_rate": 2e-05, "loss": 5.2864, "step": 2705 }, { "epoch": 0.18150719388268438, "grad_norm": 0.1425107728687988, "learning_rate": 2e-05, "loss": 5.542, "step": 2706 }, { "epoch": 0.18157426971190932, "grad_norm": 0.144817730661276, "learning_rate": 2e-05, "loss": 5.4461, "step": 2707 }, { "epoch": 0.18164134554113426, "grad_norm": 0.1493922397984341, "learning_rate": 2e-05, "loss": 5.5852, "step": 2708 }, { "epoch": 0.1817084213703592, "grad_norm": 0.14772091061664142, "learning_rate": 2e-05, "loss": 5.515, "step": 2709 }, { "epoch": 0.18177549719958414, "grad_norm": 0.14612150406129878, "learning_rate": 2e-05, "loss": 5.4428, "step": 2710 }, { "epoch": 0.18184257302880907, "grad_norm": 0.14354653922212127, "learning_rate": 2e-05, "loss": 5.4599, "step": 2711 }, { "epoch": 0.181909648858034, "grad_norm": 0.161969393548271, "learning_rate": 2e-05, "loss": 5.3918, "step": 2712 }, { "epoch": 0.18197672468725895, "grad_norm": 0.1425930264558271, "learning_rate": 2e-05, "loss": 5.469, "step": 2713 }, { "epoch": 0.1820438005164839, "grad_norm": 0.15342648392973557, "learning_rate": 2e-05, "loss": 5.6016, "step": 2714 }, { "epoch": 0.18211087634570883, "grad_norm": 0.14690007618081394, "learning_rate": 2e-05, "loss": 5.5322, "step": 2715 }, { "epoch": 0.18217795217493377, "grad_norm": 0.1526073653053425, "learning_rate": 2e-05, "loss": 5.4781, "step": 2716 }, { "epoch": 0.1822450280041587, "grad_norm": 0.14258527717093078, "learning_rate": 2e-05, "loss": 5.3875, "step": 2717 }, { "epoch": 0.18231210383338364, "grad_norm": 0.15120540034054075, "learning_rate": 2e-05, "loss": 5.4578, "step": 2718 }, { "epoch": 0.18237917966260858, "grad_norm": 0.15403161016922193, "learning_rate": 2e-05, "loss": 5.3631, "step": 2719 }, { "epoch": 0.18244625549183352, "grad_norm": 0.149735981742783, "learning_rate": 2e-05, "loss": 5.456, "step": 2720 }, { "epoch": 0.18251333132105846, "grad_norm": 0.1442048921818772, "learning_rate": 2e-05, "loss": 5.4762, "step": 2721 }, { "epoch": 0.1825804071502834, "grad_norm": 0.1580239085696565, "learning_rate": 2e-05, "loss": 5.5962, "step": 2722 }, { "epoch": 0.18264748297950834, "grad_norm": 0.15596444762442319, "learning_rate": 2e-05, "loss": 5.3992, "step": 2723 }, { "epoch": 0.18271455880873327, "grad_norm": 0.15152131974537503, "learning_rate": 2e-05, "loss": 5.4442, "step": 2724 }, { "epoch": 0.1827816346379582, "grad_norm": 0.16651044146488672, "learning_rate": 2e-05, "loss": 5.4338, "step": 2725 }, { "epoch": 0.18284871046718315, "grad_norm": 0.1566294463142734, "learning_rate": 2e-05, "loss": 5.4635, "step": 2726 }, { "epoch": 0.1829157862964081, "grad_norm": 0.15208251439897374, "learning_rate": 2e-05, "loss": 5.4638, "step": 2727 }, { "epoch": 0.18298286212563303, "grad_norm": 0.16130038926673448, "learning_rate": 2e-05, "loss": 5.5043, "step": 2728 }, { "epoch": 0.18304993795485797, "grad_norm": 0.15103364020010096, "learning_rate": 2e-05, "loss": 5.4099, "step": 2729 }, { "epoch": 0.1831170137840829, "grad_norm": 0.1407859004150815, "learning_rate": 2e-05, "loss": 5.3311, "step": 2730 }, { "epoch": 0.18318408961330784, "grad_norm": 0.15245416632274317, "learning_rate": 2e-05, "loss": 5.5932, "step": 2731 }, { "epoch": 0.18325116544253278, "grad_norm": 0.15435921761370103, "learning_rate": 2e-05, "loss": 5.3567, "step": 2732 }, { "epoch": 0.18331824127175772, "grad_norm": 0.14207736295263176, "learning_rate": 2e-05, "loss": 5.4004, "step": 2733 }, { "epoch": 0.18338531710098266, "grad_norm": 0.1541551064521531, "learning_rate": 2e-05, "loss": 5.373, "step": 2734 }, { "epoch": 0.1834523929302076, "grad_norm": 0.15438840326322978, "learning_rate": 2e-05, "loss": 5.428, "step": 2735 }, { "epoch": 0.18351946875943254, "grad_norm": 0.1508957888806164, "learning_rate": 2e-05, "loss": 5.3842, "step": 2736 }, { "epoch": 0.18358654458865747, "grad_norm": 0.15056652095232892, "learning_rate": 2e-05, "loss": 5.4896, "step": 2737 }, { "epoch": 0.1836536204178824, "grad_norm": 0.15683653732639452, "learning_rate": 2e-05, "loss": 5.4238, "step": 2738 }, { "epoch": 0.18372069624710735, "grad_norm": 0.14136064398771256, "learning_rate": 2e-05, "loss": 5.4592, "step": 2739 }, { "epoch": 0.1837877720763323, "grad_norm": 0.14530185931233602, "learning_rate": 2e-05, "loss": 5.4354, "step": 2740 }, { "epoch": 0.18385484790555723, "grad_norm": 0.1426834525108686, "learning_rate": 2e-05, "loss": 5.4332, "step": 2741 }, { "epoch": 0.18392192373478217, "grad_norm": 0.1408597167301315, "learning_rate": 2e-05, "loss": 5.4947, "step": 2742 }, { "epoch": 0.1839889995640071, "grad_norm": 0.15610807574360927, "learning_rate": 2e-05, "loss": 5.6129, "step": 2743 }, { "epoch": 0.18405607539323204, "grad_norm": 0.14602729654530727, "learning_rate": 2e-05, "loss": 5.438, "step": 2744 }, { "epoch": 0.18412315122245698, "grad_norm": 0.162189654013063, "learning_rate": 2e-05, "loss": 5.5383, "step": 2745 }, { "epoch": 0.18419022705168192, "grad_norm": 0.14376361851029346, "learning_rate": 2e-05, "loss": 5.3917, "step": 2746 }, { "epoch": 0.18425730288090686, "grad_norm": 0.14820936781411953, "learning_rate": 2e-05, "loss": 5.3879, "step": 2747 }, { "epoch": 0.1843243787101318, "grad_norm": 0.1509668959826662, "learning_rate": 2e-05, "loss": 5.5084, "step": 2748 }, { "epoch": 0.18439145453935674, "grad_norm": 0.14415063719527912, "learning_rate": 2e-05, "loss": 5.4013, "step": 2749 }, { "epoch": 0.18445853036858167, "grad_norm": 0.15394337679983203, "learning_rate": 2e-05, "loss": 5.3641, "step": 2750 }, { "epoch": 0.1845256061978066, "grad_norm": 0.14943830999848118, "learning_rate": 2e-05, "loss": 5.4878, "step": 2751 }, { "epoch": 0.18459268202703155, "grad_norm": 0.15306978949617306, "learning_rate": 2e-05, "loss": 5.4063, "step": 2752 }, { "epoch": 0.1846597578562565, "grad_norm": 0.14796552718289854, "learning_rate": 2e-05, "loss": 5.3559, "step": 2753 }, { "epoch": 0.18472683368548143, "grad_norm": 0.16068124344554538, "learning_rate": 2e-05, "loss": 5.5151, "step": 2754 }, { "epoch": 0.18479390951470637, "grad_norm": 0.14478781246575143, "learning_rate": 2e-05, "loss": 5.4016, "step": 2755 }, { "epoch": 0.1848609853439313, "grad_norm": 0.14595903385641057, "learning_rate": 2e-05, "loss": 5.5139, "step": 2756 }, { "epoch": 0.18492806117315624, "grad_norm": 0.14541072156839957, "learning_rate": 2e-05, "loss": 5.5161, "step": 2757 }, { "epoch": 0.18499513700238118, "grad_norm": 0.1544261597403794, "learning_rate": 2e-05, "loss": 5.4788, "step": 2758 }, { "epoch": 0.18506221283160612, "grad_norm": 0.14763974846137595, "learning_rate": 2e-05, "loss": 5.503, "step": 2759 }, { "epoch": 0.18512928866083106, "grad_norm": 0.14375436940507896, "learning_rate": 2e-05, "loss": 5.5288, "step": 2760 }, { "epoch": 0.185196364490056, "grad_norm": 0.15286340799644232, "learning_rate": 2e-05, "loss": 5.408, "step": 2761 }, { "epoch": 0.18526344031928094, "grad_norm": 0.14775311585695267, "learning_rate": 2e-05, "loss": 5.4461, "step": 2762 }, { "epoch": 0.18533051614850587, "grad_norm": 0.14522994453796517, "learning_rate": 2e-05, "loss": 5.4787, "step": 2763 }, { "epoch": 0.1853975919777308, "grad_norm": 0.15468972742722598, "learning_rate": 2e-05, "loss": 5.4885, "step": 2764 }, { "epoch": 0.18546466780695575, "grad_norm": 0.14762409806535293, "learning_rate": 2e-05, "loss": 5.2785, "step": 2765 }, { "epoch": 0.1855317436361807, "grad_norm": 0.14602240294651064, "learning_rate": 2e-05, "loss": 5.4999, "step": 2766 }, { "epoch": 0.18559881946540563, "grad_norm": 0.14988994705552325, "learning_rate": 2e-05, "loss": 5.3385, "step": 2767 }, { "epoch": 0.18566589529463057, "grad_norm": 0.1417013672882784, "learning_rate": 2e-05, "loss": 5.4869, "step": 2768 }, { "epoch": 0.1857329711238555, "grad_norm": 0.14465079861219646, "learning_rate": 2e-05, "loss": 5.4668, "step": 2769 }, { "epoch": 0.18580004695308044, "grad_norm": 0.1483081013498772, "learning_rate": 2e-05, "loss": 5.4942, "step": 2770 }, { "epoch": 0.18586712278230538, "grad_norm": 0.15386824332441423, "learning_rate": 2e-05, "loss": 5.5363, "step": 2771 }, { "epoch": 0.18593419861153032, "grad_norm": 0.140655535647225, "learning_rate": 2e-05, "loss": 5.5318, "step": 2772 }, { "epoch": 0.1860012744407553, "grad_norm": 0.14525645416409158, "learning_rate": 2e-05, "loss": 5.4724, "step": 2773 }, { "epoch": 0.18606835026998023, "grad_norm": 0.1453030410120523, "learning_rate": 2e-05, "loss": 5.3575, "step": 2774 }, { "epoch": 0.18613542609920516, "grad_norm": 0.15139970626429225, "learning_rate": 2e-05, "loss": 5.5578, "step": 2775 }, { "epoch": 0.1862025019284301, "grad_norm": 0.1380328181502607, "learning_rate": 2e-05, "loss": 5.3625, "step": 2776 }, { "epoch": 0.18626957775765504, "grad_norm": 0.14222643249375677, "learning_rate": 2e-05, "loss": 5.4745, "step": 2777 }, { "epoch": 0.18633665358687998, "grad_norm": 0.1518684449334853, "learning_rate": 2e-05, "loss": 5.4542, "step": 2778 }, { "epoch": 0.18640372941610492, "grad_norm": 0.15455306309884742, "learning_rate": 2e-05, "loss": 5.3815, "step": 2779 }, { "epoch": 0.18647080524532986, "grad_norm": 0.1368926311066186, "learning_rate": 2e-05, "loss": 5.487, "step": 2780 }, { "epoch": 0.1865378810745548, "grad_norm": 0.14749508507398076, "learning_rate": 2e-05, "loss": 5.6635, "step": 2781 }, { "epoch": 0.18660495690377973, "grad_norm": 0.1554580748416227, "learning_rate": 2e-05, "loss": 5.444, "step": 2782 }, { "epoch": 0.18667203273300467, "grad_norm": 0.1445286448880499, "learning_rate": 2e-05, "loss": 5.4118, "step": 2783 }, { "epoch": 0.1867391085622296, "grad_norm": 0.1570790952450586, "learning_rate": 2e-05, "loss": 5.549, "step": 2784 }, { "epoch": 0.18680618439145455, "grad_norm": 0.1406678087234311, "learning_rate": 2e-05, "loss": 5.4334, "step": 2785 }, { "epoch": 0.1868732602206795, "grad_norm": 0.14130881816708843, "learning_rate": 2e-05, "loss": 5.4972, "step": 2786 }, { "epoch": 0.18694033604990443, "grad_norm": 0.14912546607224975, "learning_rate": 2e-05, "loss": 5.4707, "step": 2787 }, { "epoch": 0.18700741187912936, "grad_norm": 0.14221206355037808, "learning_rate": 2e-05, "loss": 5.5247, "step": 2788 }, { "epoch": 0.1870744877083543, "grad_norm": 0.13620575288463382, "learning_rate": 2e-05, "loss": 5.3195, "step": 2789 }, { "epoch": 0.18714156353757924, "grad_norm": 0.13942055899930447, "learning_rate": 2e-05, "loss": 5.4656, "step": 2790 }, { "epoch": 0.18720863936680418, "grad_norm": 0.13762686775756489, "learning_rate": 2e-05, "loss": 5.3106, "step": 2791 }, { "epoch": 0.18727571519602912, "grad_norm": 0.14104708237963642, "learning_rate": 2e-05, "loss": 5.5166, "step": 2792 }, { "epoch": 0.18734279102525406, "grad_norm": 0.14387836108780838, "learning_rate": 2e-05, "loss": 5.4443, "step": 2793 }, { "epoch": 0.187409866854479, "grad_norm": 0.14852322677390425, "learning_rate": 2e-05, "loss": 5.6148, "step": 2794 }, { "epoch": 0.18747694268370393, "grad_norm": 0.1373032761849103, "learning_rate": 2e-05, "loss": 5.4469, "step": 2795 }, { "epoch": 0.18754401851292887, "grad_norm": 0.14733484001729938, "learning_rate": 2e-05, "loss": 5.4145, "step": 2796 }, { "epoch": 0.1876110943421538, "grad_norm": 0.1445911783593561, "learning_rate": 2e-05, "loss": 5.4494, "step": 2797 }, { "epoch": 0.18767817017137875, "grad_norm": 0.1504190609642747, "learning_rate": 2e-05, "loss": 5.4726, "step": 2798 }, { "epoch": 0.1877452460006037, "grad_norm": 0.1426187510550691, "learning_rate": 2e-05, "loss": 5.3889, "step": 2799 }, { "epoch": 0.18781232182982863, "grad_norm": 0.15500711248605933, "learning_rate": 2e-05, "loss": 5.4148, "step": 2800 }, { "epoch": 0.18787939765905357, "grad_norm": 0.1441784902913559, "learning_rate": 2e-05, "loss": 5.4567, "step": 2801 }, { "epoch": 0.1879464734882785, "grad_norm": 0.13962365403417273, "learning_rate": 2e-05, "loss": 5.302, "step": 2802 }, { "epoch": 0.18801354931750344, "grad_norm": 0.14860217834896863, "learning_rate": 2e-05, "loss": 5.4326, "step": 2803 }, { "epoch": 0.18808062514672838, "grad_norm": 0.14716173046589545, "learning_rate": 2e-05, "loss": 5.5374, "step": 2804 }, { "epoch": 0.18814770097595332, "grad_norm": 0.14649588594771532, "learning_rate": 2e-05, "loss": 5.474, "step": 2805 }, { "epoch": 0.18821477680517826, "grad_norm": 0.15367918569550446, "learning_rate": 2e-05, "loss": 5.4133, "step": 2806 }, { "epoch": 0.1882818526344032, "grad_norm": 0.15158986858518406, "learning_rate": 2e-05, "loss": 5.5573, "step": 2807 }, { "epoch": 0.18834892846362813, "grad_norm": 0.1431643320024629, "learning_rate": 2e-05, "loss": 5.4964, "step": 2808 }, { "epoch": 0.18841600429285307, "grad_norm": 0.15153110795528937, "learning_rate": 2e-05, "loss": 5.4612, "step": 2809 }, { "epoch": 0.188483080122078, "grad_norm": 0.14914957267185086, "learning_rate": 2e-05, "loss": 5.6928, "step": 2810 }, { "epoch": 0.18855015595130295, "grad_norm": 0.14804035514882688, "learning_rate": 2e-05, "loss": 5.4081, "step": 2811 }, { "epoch": 0.1886172317805279, "grad_norm": 0.1446475806562322, "learning_rate": 2e-05, "loss": 5.559, "step": 2812 }, { "epoch": 0.18868430760975283, "grad_norm": 0.14098515559135927, "learning_rate": 2e-05, "loss": 5.5323, "step": 2813 }, { "epoch": 0.18875138343897777, "grad_norm": 0.14644313606939166, "learning_rate": 2e-05, "loss": 5.5955, "step": 2814 }, { "epoch": 0.1888184592682027, "grad_norm": 0.15042458276289053, "learning_rate": 2e-05, "loss": 5.5086, "step": 2815 }, { "epoch": 0.18888553509742764, "grad_norm": 0.14558034065585312, "learning_rate": 2e-05, "loss": 5.2867, "step": 2816 }, { "epoch": 0.18895261092665258, "grad_norm": 0.13490574351485027, "learning_rate": 2e-05, "loss": 5.4483, "step": 2817 }, { "epoch": 0.18901968675587752, "grad_norm": 0.14399322630548952, "learning_rate": 2e-05, "loss": 5.4852, "step": 2818 }, { "epoch": 0.18908676258510246, "grad_norm": 0.14873050911469793, "learning_rate": 2e-05, "loss": 5.419, "step": 2819 }, { "epoch": 0.1891538384143274, "grad_norm": 0.14062363108697062, "learning_rate": 2e-05, "loss": 5.3979, "step": 2820 }, { "epoch": 0.18922091424355234, "grad_norm": 0.14625551336910175, "learning_rate": 2e-05, "loss": 5.5137, "step": 2821 }, { "epoch": 0.18928799007277727, "grad_norm": 0.14391127016380653, "learning_rate": 2e-05, "loss": 5.4028, "step": 2822 }, { "epoch": 0.1893550659020022, "grad_norm": 0.1460262753577528, "learning_rate": 2e-05, "loss": 5.5043, "step": 2823 }, { "epoch": 0.18942214173122715, "grad_norm": 0.1495346580748495, "learning_rate": 2e-05, "loss": 5.5205, "step": 2824 }, { "epoch": 0.1894892175604521, "grad_norm": 0.14365089700990966, "learning_rate": 2e-05, "loss": 5.4019, "step": 2825 }, { "epoch": 0.18955629338967703, "grad_norm": 0.1480401307073672, "learning_rate": 2e-05, "loss": 5.5275, "step": 2826 }, { "epoch": 0.18962336921890197, "grad_norm": 0.1432371967272493, "learning_rate": 2e-05, "loss": 5.2553, "step": 2827 }, { "epoch": 0.1896904450481269, "grad_norm": 0.14682541935574384, "learning_rate": 2e-05, "loss": 5.4161, "step": 2828 }, { "epoch": 0.18975752087735184, "grad_norm": 0.15127493187002242, "learning_rate": 2e-05, "loss": 5.5113, "step": 2829 }, { "epoch": 0.18982459670657678, "grad_norm": 0.14193655844692712, "learning_rate": 2e-05, "loss": 5.5175, "step": 2830 }, { "epoch": 0.18989167253580172, "grad_norm": 0.149888673996713, "learning_rate": 2e-05, "loss": 5.4137, "step": 2831 }, { "epoch": 0.18995874836502666, "grad_norm": 0.15261384859298927, "learning_rate": 2e-05, "loss": 5.6057, "step": 2832 }, { "epoch": 0.1900258241942516, "grad_norm": 0.14016270777498008, "learning_rate": 2e-05, "loss": 5.3879, "step": 2833 }, { "epoch": 0.19009290002347654, "grad_norm": 0.13795216577659897, "learning_rate": 2e-05, "loss": 5.5151, "step": 2834 }, { "epoch": 0.19015997585270147, "grad_norm": 0.14961518059548296, "learning_rate": 2e-05, "loss": 5.4497, "step": 2835 }, { "epoch": 0.1902270516819264, "grad_norm": 0.1503754769406176, "learning_rate": 2e-05, "loss": 5.5119, "step": 2836 }, { "epoch": 0.19029412751115135, "grad_norm": 0.1457004641868794, "learning_rate": 2e-05, "loss": 5.3622, "step": 2837 }, { "epoch": 0.1903612033403763, "grad_norm": 0.1468702368491807, "learning_rate": 2e-05, "loss": 5.4572, "step": 2838 }, { "epoch": 0.19042827916960123, "grad_norm": 0.14609720986799696, "learning_rate": 2e-05, "loss": 5.4388, "step": 2839 }, { "epoch": 0.19049535499882617, "grad_norm": 0.14414027482299493, "learning_rate": 2e-05, "loss": 5.469, "step": 2840 }, { "epoch": 0.1905624308280511, "grad_norm": 0.14670241542858473, "learning_rate": 2e-05, "loss": 5.2781, "step": 2841 }, { "epoch": 0.19062950665727604, "grad_norm": 0.14318237467451572, "learning_rate": 2e-05, "loss": 5.4333, "step": 2842 }, { "epoch": 0.19069658248650098, "grad_norm": 0.15338943257226637, "learning_rate": 2e-05, "loss": 5.3478, "step": 2843 }, { "epoch": 0.19076365831572592, "grad_norm": 0.14589096640450133, "learning_rate": 2e-05, "loss": 5.4255, "step": 2844 }, { "epoch": 0.19083073414495086, "grad_norm": 0.13895507997822087, "learning_rate": 2e-05, "loss": 5.4334, "step": 2845 }, { "epoch": 0.1908978099741758, "grad_norm": 0.14306723954204897, "learning_rate": 2e-05, "loss": 5.4905, "step": 2846 }, { "epoch": 0.19096488580340074, "grad_norm": 0.14519262260760044, "learning_rate": 2e-05, "loss": 5.3107, "step": 2847 }, { "epoch": 0.19103196163262567, "grad_norm": 0.1536035240889774, "learning_rate": 2e-05, "loss": 5.5027, "step": 2848 }, { "epoch": 0.1910990374618506, "grad_norm": 0.1461179990146056, "learning_rate": 2e-05, "loss": 5.4916, "step": 2849 }, { "epoch": 0.19116611329107555, "grad_norm": 0.1397937145348719, "learning_rate": 2e-05, "loss": 5.4947, "step": 2850 }, { "epoch": 0.1912331891203005, "grad_norm": 0.16158993138502456, "learning_rate": 2e-05, "loss": 5.4313, "step": 2851 }, { "epoch": 0.19130026494952543, "grad_norm": 0.15848794293139673, "learning_rate": 2e-05, "loss": 5.4933, "step": 2852 }, { "epoch": 0.19136734077875037, "grad_norm": 0.14298786795795257, "learning_rate": 2e-05, "loss": 5.3269, "step": 2853 }, { "epoch": 0.1914344166079753, "grad_norm": 0.15084784600941908, "learning_rate": 2e-05, "loss": 5.4492, "step": 2854 }, { "epoch": 0.19150149243720024, "grad_norm": 0.1497944368921153, "learning_rate": 2e-05, "loss": 5.447, "step": 2855 }, { "epoch": 0.19156856826642518, "grad_norm": 0.14554790478013255, "learning_rate": 2e-05, "loss": 5.6201, "step": 2856 }, { "epoch": 0.19163564409565012, "grad_norm": 0.14519836170893075, "learning_rate": 2e-05, "loss": 5.34, "step": 2857 }, { "epoch": 0.19170271992487506, "grad_norm": 0.1541332672491981, "learning_rate": 2e-05, "loss": 5.6442, "step": 2858 }, { "epoch": 0.1917697957541, "grad_norm": 0.143269012402466, "learning_rate": 2e-05, "loss": 5.4413, "step": 2859 }, { "epoch": 0.19183687158332494, "grad_norm": 0.1465888609781174, "learning_rate": 2e-05, "loss": 5.4873, "step": 2860 }, { "epoch": 0.19190394741254987, "grad_norm": 0.14463491684166604, "learning_rate": 2e-05, "loss": 5.5835, "step": 2861 }, { "epoch": 0.1919710232417748, "grad_norm": 0.15009420783234587, "learning_rate": 2e-05, "loss": 5.5894, "step": 2862 }, { "epoch": 0.19203809907099975, "grad_norm": 0.1432231677263341, "learning_rate": 2e-05, "loss": 5.5369, "step": 2863 }, { "epoch": 0.19210517490022472, "grad_norm": 0.14165033841451444, "learning_rate": 2e-05, "loss": 5.5392, "step": 2864 }, { "epoch": 0.19217225072944966, "grad_norm": 0.14803025931130132, "learning_rate": 2e-05, "loss": 5.5598, "step": 2865 }, { "epoch": 0.1922393265586746, "grad_norm": 0.15647560470016336, "learning_rate": 2e-05, "loss": 5.293, "step": 2866 }, { "epoch": 0.19230640238789953, "grad_norm": 0.14825904475899848, "learning_rate": 2e-05, "loss": 5.531, "step": 2867 }, { "epoch": 0.19237347821712447, "grad_norm": 0.15001004503178583, "learning_rate": 2e-05, "loss": 5.2631, "step": 2868 }, { "epoch": 0.1924405540463494, "grad_norm": 0.1391735848468873, "learning_rate": 2e-05, "loss": 5.5774, "step": 2869 }, { "epoch": 0.19250762987557435, "grad_norm": 0.14532375646203816, "learning_rate": 2e-05, "loss": 5.3736, "step": 2870 }, { "epoch": 0.1925747057047993, "grad_norm": 0.1468656291839792, "learning_rate": 2e-05, "loss": 5.4287, "step": 2871 }, { "epoch": 0.19264178153402423, "grad_norm": 0.1416055489028985, "learning_rate": 2e-05, "loss": 5.4278, "step": 2872 }, { "epoch": 0.19270885736324916, "grad_norm": 0.14627645099368602, "learning_rate": 2e-05, "loss": 5.3814, "step": 2873 }, { "epoch": 0.1927759331924741, "grad_norm": 0.13947088066865052, "learning_rate": 2e-05, "loss": 5.4594, "step": 2874 }, { "epoch": 0.19284300902169904, "grad_norm": 0.14330913600085518, "learning_rate": 2e-05, "loss": 5.3831, "step": 2875 }, { "epoch": 0.19291008485092398, "grad_norm": 0.14235222976570247, "learning_rate": 2e-05, "loss": 5.43, "step": 2876 }, { "epoch": 0.19297716068014892, "grad_norm": 0.1402779631021663, "learning_rate": 2e-05, "loss": 5.553, "step": 2877 }, { "epoch": 0.19304423650937386, "grad_norm": 0.16429993451400132, "learning_rate": 2e-05, "loss": 5.4997, "step": 2878 }, { "epoch": 0.1931113123385988, "grad_norm": 0.14157038787220258, "learning_rate": 2e-05, "loss": 5.421, "step": 2879 }, { "epoch": 0.19317838816782373, "grad_norm": 0.14648374932740207, "learning_rate": 2e-05, "loss": 5.3988, "step": 2880 }, { "epoch": 0.19324546399704867, "grad_norm": 0.14154215326838476, "learning_rate": 2e-05, "loss": 5.4902, "step": 2881 }, { "epoch": 0.1933125398262736, "grad_norm": 0.1424851202592815, "learning_rate": 2e-05, "loss": 5.366, "step": 2882 }, { "epoch": 0.19337961565549855, "grad_norm": 0.14059870584448472, "learning_rate": 2e-05, "loss": 5.5355, "step": 2883 }, { "epoch": 0.1934466914847235, "grad_norm": 0.14935950219158964, "learning_rate": 2e-05, "loss": 5.5553, "step": 2884 }, { "epoch": 0.19351376731394843, "grad_norm": 0.14275499011538914, "learning_rate": 2e-05, "loss": 5.4438, "step": 2885 }, { "epoch": 0.19358084314317336, "grad_norm": 0.14400243706243512, "learning_rate": 2e-05, "loss": 5.5797, "step": 2886 }, { "epoch": 0.1936479189723983, "grad_norm": 0.14670462175667487, "learning_rate": 2e-05, "loss": 5.6267, "step": 2887 }, { "epoch": 0.19371499480162324, "grad_norm": 0.1454304716232998, "learning_rate": 2e-05, "loss": 5.5018, "step": 2888 }, { "epoch": 0.19378207063084818, "grad_norm": 0.1409886488750574, "learning_rate": 2e-05, "loss": 5.3999, "step": 2889 }, { "epoch": 0.19384914646007312, "grad_norm": 0.14510571418571616, "learning_rate": 2e-05, "loss": 5.3707, "step": 2890 }, { "epoch": 0.19391622228929806, "grad_norm": 0.15303720996987785, "learning_rate": 2e-05, "loss": 5.5571, "step": 2891 }, { "epoch": 0.193983298118523, "grad_norm": 0.15239792851986214, "learning_rate": 2e-05, "loss": 5.3889, "step": 2892 }, { "epoch": 0.19405037394774793, "grad_norm": 0.14084363883606532, "learning_rate": 2e-05, "loss": 5.4495, "step": 2893 }, { "epoch": 0.19411744977697287, "grad_norm": 0.14285824642692535, "learning_rate": 2e-05, "loss": 5.3917, "step": 2894 }, { "epoch": 0.1941845256061978, "grad_norm": 0.1516624702427743, "learning_rate": 2e-05, "loss": 5.373, "step": 2895 }, { "epoch": 0.19425160143542275, "grad_norm": 0.14257036787884608, "learning_rate": 2e-05, "loss": 5.4641, "step": 2896 }, { "epoch": 0.1943186772646477, "grad_norm": 0.14697329044831953, "learning_rate": 2e-05, "loss": 5.4466, "step": 2897 }, { "epoch": 0.19438575309387263, "grad_norm": 0.1507798655710949, "learning_rate": 2e-05, "loss": 5.406, "step": 2898 }, { "epoch": 0.19445282892309756, "grad_norm": 0.14839607566750623, "learning_rate": 2e-05, "loss": 5.5276, "step": 2899 }, { "epoch": 0.1945199047523225, "grad_norm": 0.14504630689286738, "learning_rate": 2e-05, "loss": 5.3915, "step": 2900 }, { "epoch": 0.19458698058154744, "grad_norm": 0.1450465230432479, "learning_rate": 2e-05, "loss": 5.469, "step": 2901 }, { "epoch": 0.19465405641077238, "grad_norm": 0.147474843975319, "learning_rate": 2e-05, "loss": 5.5343, "step": 2902 }, { "epoch": 0.19472113223999732, "grad_norm": 0.14323788470312038, "learning_rate": 2e-05, "loss": 5.4055, "step": 2903 }, { "epoch": 0.19478820806922226, "grad_norm": 0.1416027150700193, "learning_rate": 2e-05, "loss": 5.5662, "step": 2904 }, { "epoch": 0.1948552838984472, "grad_norm": 0.1469141777885197, "learning_rate": 2e-05, "loss": 5.3496, "step": 2905 }, { "epoch": 0.19492235972767213, "grad_norm": 0.1393415885910724, "learning_rate": 2e-05, "loss": 5.5315, "step": 2906 }, { "epoch": 0.19498943555689707, "grad_norm": 0.13803519236915313, "learning_rate": 2e-05, "loss": 5.3177, "step": 2907 }, { "epoch": 0.195056511386122, "grad_norm": 0.15044764516446688, "learning_rate": 2e-05, "loss": 5.3923, "step": 2908 }, { "epoch": 0.19512358721534695, "grad_norm": 0.1465902076227777, "learning_rate": 2e-05, "loss": 5.5504, "step": 2909 }, { "epoch": 0.1951906630445719, "grad_norm": 0.14476497371196184, "learning_rate": 2e-05, "loss": 5.4916, "step": 2910 }, { "epoch": 0.19525773887379683, "grad_norm": 0.14067391054563042, "learning_rate": 2e-05, "loss": 5.551, "step": 2911 }, { "epoch": 0.19532481470302177, "grad_norm": 0.14310475035001582, "learning_rate": 2e-05, "loss": 5.4926, "step": 2912 }, { "epoch": 0.1953918905322467, "grad_norm": 0.14137499452177452, "learning_rate": 2e-05, "loss": 5.5288, "step": 2913 }, { "epoch": 0.19545896636147164, "grad_norm": 0.1422984321620052, "learning_rate": 2e-05, "loss": 5.3457, "step": 2914 }, { "epoch": 0.19552604219069658, "grad_norm": 0.14875434772696083, "learning_rate": 2e-05, "loss": 5.5561, "step": 2915 }, { "epoch": 0.19559311801992152, "grad_norm": 0.1494127224488448, "learning_rate": 2e-05, "loss": 5.5104, "step": 2916 }, { "epoch": 0.19566019384914646, "grad_norm": 0.15162618474343506, "learning_rate": 2e-05, "loss": 5.4282, "step": 2917 }, { "epoch": 0.1957272696783714, "grad_norm": 0.14019621118932557, "learning_rate": 2e-05, "loss": 5.3947, "step": 2918 }, { "epoch": 0.19579434550759633, "grad_norm": 0.14691067164752378, "learning_rate": 2e-05, "loss": 5.4656, "step": 2919 }, { "epoch": 0.19586142133682127, "grad_norm": 0.14490590862901187, "learning_rate": 2e-05, "loss": 5.4807, "step": 2920 }, { "epoch": 0.1959284971660462, "grad_norm": 0.14644723159612702, "learning_rate": 2e-05, "loss": 5.6153, "step": 2921 }, { "epoch": 0.19599557299527115, "grad_norm": 0.14171362047899058, "learning_rate": 2e-05, "loss": 5.3515, "step": 2922 }, { "epoch": 0.1960626488244961, "grad_norm": 0.15254886209315727, "learning_rate": 2e-05, "loss": 5.3314, "step": 2923 }, { "epoch": 0.19612972465372103, "grad_norm": 0.14397449922930072, "learning_rate": 2e-05, "loss": 5.4136, "step": 2924 }, { "epoch": 0.19619680048294597, "grad_norm": 0.14217260222584494, "learning_rate": 2e-05, "loss": 5.3712, "step": 2925 }, { "epoch": 0.1962638763121709, "grad_norm": 0.14396192623322784, "learning_rate": 2e-05, "loss": 5.3989, "step": 2926 }, { "epoch": 0.19633095214139584, "grad_norm": 0.14798556040184313, "learning_rate": 2e-05, "loss": 5.4361, "step": 2927 }, { "epoch": 0.19639802797062078, "grad_norm": 0.1596110501625947, "learning_rate": 2e-05, "loss": 5.4708, "step": 2928 }, { "epoch": 0.19646510379984572, "grad_norm": 0.13849286762911275, "learning_rate": 2e-05, "loss": 5.466, "step": 2929 }, { "epoch": 0.19653217962907066, "grad_norm": 0.14550873180838436, "learning_rate": 2e-05, "loss": 5.2974, "step": 2930 }, { "epoch": 0.1965992554582956, "grad_norm": 0.1452155635316248, "learning_rate": 2e-05, "loss": 5.3564, "step": 2931 }, { "epoch": 0.19666633128752053, "grad_norm": 0.14506895506157305, "learning_rate": 2e-05, "loss": 5.5959, "step": 2932 }, { "epoch": 0.19673340711674547, "grad_norm": 0.14291070006779216, "learning_rate": 2e-05, "loss": 5.4041, "step": 2933 }, { "epoch": 0.1968004829459704, "grad_norm": 0.1496124887368297, "learning_rate": 2e-05, "loss": 5.3981, "step": 2934 }, { "epoch": 0.19686755877519535, "grad_norm": 0.1477471892679978, "learning_rate": 2e-05, "loss": 5.5308, "step": 2935 }, { "epoch": 0.1969346346044203, "grad_norm": 0.1454020489397554, "learning_rate": 2e-05, "loss": 5.3662, "step": 2936 }, { "epoch": 0.19700171043364523, "grad_norm": 0.1440655944722557, "learning_rate": 2e-05, "loss": 5.4327, "step": 2937 }, { "epoch": 0.19706878626287017, "grad_norm": 0.14048519518114946, "learning_rate": 2e-05, "loss": 5.2793, "step": 2938 }, { "epoch": 0.1971358620920951, "grad_norm": 0.147431055952558, "learning_rate": 2e-05, "loss": 5.2396, "step": 2939 }, { "epoch": 0.19720293792132004, "grad_norm": 0.14135486565868316, "learning_rate": 2e-05, "loss": 5.4079, "step": 2940 }, { "epoch": 0.19727001375054498, "grad_norm": 0.1457632065633095, "learning_rate": 2e-05, "loss": 5.4476, "step": 2941 }, { "epoch": 0.19733708957976992, "grad_norm": 0.14423474478854234, "learning_rate": 2e-05, "loss": 5.3503, "step": 2942 }, { "epoch": 0.19740416540899486, "grad_norm": 0.13646219392576245, "learning_rate": 2e-05, "loss": 5.3473, "step": 2943 }, { "epoch": 0.1974712412382198, "grad_norm": 0.1452072373436208, "learning_rate": 2e-05, "loss": 5.4034, "step": 2944 }, { "epoch": 0.19753831706744474, "grad_norm": 0.13886950230872394, "learning_rate": 2e-05, "loss": 5.53, "step": 2945 }, { "epoch": 0.19760539289666967, "grad_norm": 0.15021817086696096, "learning_rate": 2e-05, "loss": 5.373, "step": 2946 }, { "epoch": 0.1976724687258946, "grad_norm": 0.14358627487370337, "learning_rate": 2e-05, "loss": 5.5446, "step": 2947 }, { "epoch": 0.19773954455511955, "grad_norm": 0.14006671558069284, "learning_rate": 2e-05, "loss": 5.3814, "step": 2948 }, { "epoch": 0.1978066203843445, "grad_norm": 0.14617541309244586, "learning_rate": 2e-05, "loss": 5.3645, "step": 2949 }, { "epoch": 0.19787369621356943, "grad_norm": 0.1408974284803663, "learning_rate": 2e-05, "loss": 5.4826, "step": 2950 }, { "epoch": 0.19794077204279437, "grad_norm": 0.14432837367587809, "learning_rate": 2e-05, "loss": 5.4906, "step": 2951 }, { "epoch": 0.1980078478720193, "grad_norm": 0.1419883461572055, "learning_rate": 2e-05, "loss": 5.4803, "step": 2952 }, { "epoch": 0.19807492370124424, "grad_norm": 0.13786954744113436, "learning_rate": 2e-05, "loss": 5.4306, "step": 2953 }, { "epoch": 0.19814199953046918, "grad_norm": 0.14977241792447413, "learning_rate": 2e-05, "loss": 5.3918, "step": 2954 }, { "epoch": 0.19820907535969415, "grad_norm": 0.1488793118886182, "learning_rate": 2e-05, "loss": 5.4931, "step": 2955 }, { "epoch": 0.1982761511889191, "grad_norm": 0.15066805631126257, "learning_rate": 2e-05, "loss": 5.3643, "step": 2956 }, { "epoch": 0.19834322701814402, "grad_norm": 0.14039294609730038, "learning_rate": 2e-05, "loss": 5.4024, "step": 2957 }, { "epoch": 0.19841030284736896, "grad_norm": 0.1491944935638163, "learning_rate": 2e-05, "loss": 5.4599, "step": 2958 }, { "epoch": 0.1984773786765939, "grad_norm": 0.1385960263272716, "learning_rate": 2e-05, "loss": 5.4046, "step": 2959 }, { "epoch": 0.19854445450581884, "grad_norm": 0.14011673784966122, "learning_rate": 2e-05, "loss": 5.5052, "step": 2960 }, { "epoch": 0.19861153033504378, "grad_norm": 0.14373254507816535, "learning_rate": 2e-05, "loss": 5.5055, "step": 2961 }, { "epoch": 0.19867860616426872, "grad_norm": 0.14907275217826996, "learning_rate": 2e-05, "loss": 5.3759, "step": 2962 }, { "epoch": 0.19874568199349366, "grad_norm": 0.14554415970519327, "learning_rate": 2e-05, "loss": 5.4974, "step": 2963 }, { "epoch": 0.1988127578227186, "grad_norm": 0.1412335780950371, "learning_rate": 2e-05, "loss": 5.4916, "step": 2964 }, { "epoch": 0.19887983365194353, "grad_norm": 0.1453733363268682, "learning_rate": 2e-05, "loss": 5.3742, "step": 2965 }, { "epoch": 0.19894690948116847, "grad_norm": 0.150013769308575, "learning_rate": 2e-05, "loss": 5.4442, "step": 2966 }, { "epoch": 0.1990139853103934, "grad_norm": 0.1447414116162861, "learning_rate": 2e-05, "loss": 5.439, "step": 2967 }, { "epoch": 0.19908106113961835, "grad_norm": 0.14440229339971944, "learning_rate": 2e-05, "loss": 5.3161, "step": 2968 }, { "epoch": 0.1991481369688433, "grad_norm": 0.14168709151750497, "learning_rate": 2e-05, "loss": 5.4734, "step": 2969 }, { "epoch": 0.19921521279806823, "grad_norm": 0.14581846115145042, "learning_rate": 2e-05, "loss": 5.508, "step": 2970 }, { "epoch": 0.19928228862729316, "grad_norm": 0.1553515211920712, "learning_rate": 2e-05, "loss": 5.4904, "step": 2971 }, { "epoch": 0.1993493644565181, "grad_norm": 0.14930143558295958, "learning_rate": 2e-05, "loss": 5.4535, "step": 2972 }, { "epoch": 0.19941644028574304, "grad_norm": 0.14424912912144242, "learning_rate": 2e-05, "loss": 5.406, "step": 2973 }, { "epoch": 0.19948351611496798, "grad_norm": 0.14928587791927395, "learning_rate": 2e-05, "loss": 5.4346, "step": 2974 }, { "epoch": 0.19955059194419292, "grad_norm": 0.1501448890260835, "learning_rate": 2e-05, "loss": 5.549, "step": 2975 }, { "epoch": 0.19961766777341786, "grad_norm": 0.15285281060663317, "learning_rate": 2e-05, "loss": 5.4986, "step": 2976 }, { "epoch": 0.1996847436026428, "grad_norm": 0.14894586563965428, "learning_rate": 2e-05, "loss": 5.4183, "step": 2977 }, { "epoch": 0.19975181943186773, "grad_norm": 0.14506213434268828, "learning_rate": 2e-05, "loss": 5.591, "step": 2978 }, { "epoch": 0.19981889526109267, "grad_norm": 0.1398618418000234, "learning_rate": 2e-05, "loss": 5.5728, "step": 2979 }, { "epoch": 0.1998859710903176, "grad_norm": 0.14548107968677967, "learning_rate": 2e-05, "loss": 5.361, "step": 2980 }, { "epoch": 0.19995304691954255, "grad_norm": 0.1527923048202449, "learning_rate": 2e-05, "loss": 5.4691, "step": 2981 }, { "epoch": 0.2000201227487675, "grad_norm": 0.13865889975026682, "learning_rate": 2e-05, "loss": 5.4724, "step": 2982 }, { "epoch": 0.20008719857799243, "grad_norm": 0.14406150591607975, "learning_rate": 2e-05, "loss": 5.512, "step": 2983 }, { "epoch": 0.20015427440721736, "grad_norm": 0.15273271941215824, "learning_rate": 2e-05, "loss": 5.4474, "step": 2984 }, { "epoch": 0.2002213502364423, "grad_norm": 0.15415823641395715, "learning_rate": 2e-05, "loss": 5.6198, "step": 2985 }, { "epoch": 0.20028842606566724, "grad_norm": 0.1425073688530836, "learning_rate": 2e-05, "loss": 5.4004, "step": 2986 }, { "epoch": 0.20035550189489218, "grad_norm": 0.14155754497040868, "learning_rate": 2e-05, "loss": 5.4924, "step": 2987 }, { "epoch": 0.20042257772411712, "grad_norm": 0.1514580387316104, "learning_rate": 2e-05, "loss": 5.4208, "step": 2988 }, { "epoch": 0.20048965355334206, "grad_norm": 0.1579241935874637, "learning_rate": 2e-05, "loss": 5.4915, "step": 2989 }, { "epoch": 0.200556729382567, "grad_norm": 0.15378931524097514, "learning_rate": 2e-05, "loss": 5.5408, "step": 2990 }, { "epoch": 0.20062380521179193, "grad_norm": 0.15092259684605497, "learning_rate": 2e-05, "loss": 5.3496, "step": 2991 }, { "epoch": 0.20069088104101687, "grad_norm": 0.14950536600708964, "learning_rate": 2e-05, "loss": 5.5204, "step": 2992 }, { "epoch": 0.2007579568702418, "grad_norm": 0.15792384311486418, "learning_rate": 2e-05, "loss": 5.4055, "step": 2993 }, { "epoch": 0.20082503269946675, "grad_norm": 0.1503961952033972, "learning_rate": 2e-05, "loss": 5.3813, "step": 2994 }, { "epoch": 0.2008921085286917, "grad_norm": 0.1470019024708196, "learning_rate": 2e-05, "loss": 5.4011, "step": 2995 }, { "epoch": 0.20095918435791663, "grad_norm": 0.15425820093858025, "learning_rate": 2e-05, "loss": 5.4303, "step": 2996 }, { "epoch": 0.20102626018714156, "grad_norm": 0.15902190633585256, "learning_rate": 2e-05, "loss": 5.4514, "step": 2997 }, { "epoch": 0.2010933360163665, "grad_norm": 0.1491845283942766, "learning_rate": 2e-05, "loss": 5.5094, "step": 2998 }, { "epoch": 0.20116041184559144, "grad_norm": 0.1449410759603749, "learning_rate": 2e-05, "loss": 5.4203, "step": 2999 }, { "epoch": 0.20122748767481638, "grad_norm": 0.15198180333884964, "learning_rate": 2e-05, "loss": 5.3732, "step": 3000 }, { "epoch": 0.20129456350404132, "grad_norm": 0.14307529685683662, "learning_rate": 2e-05, "loss": 5.3358, "step": 3001 }, { "epoch": 0.20136163933326626, "grad_norm": 0.14712041125910671, "learning_rate": 2e-05, "loss": 5.4459, "step": 3002 }, { "epoch": 0.2014287151624912, "grad_norm": 0.14934833205345135, "learning_rate": 2e-05, "loss": 5.3753, "step": 3003 }, { "epoch": 0.20149579099171613, "grad_norm": 0.14952202147035834, "learning_rate": 2e-05, "loss": 5.6429, "step": 3004 }, { "epoch": 0.20156286682094107, "grad_norm": 0.15032636294942786, "learning_rate": 2e-05, "loss": 5.3222, "step": 3005 }, { "epoch": 0.201629942650166, "grad_norm": 0.14471347402146728, "learning_rate": 2e-05, "loss": 5.5036, "step": 3006 }, { "epoch": 0.20169701847939095, "grad_norm": 0.14708795060029764, "learning_rate": 2e-05, "loss": 5.6083, "step": 3007 }, { "epoch": 0.2017640943086159, "grad_norm": 0.1547541836407129, "learning_rate": 2e-05, "loss": 5.3391, "step": 3008 }, { "epoch": 0.20183117013784083, "grad_norm": 0.1439416517725702, "learning_rate": 2e-05, "loss": 5.49, "step": 3009 }, { "epoch": 0.20189824596706576, "grad_norm": 0.14598872997078965, "learning_rate": 2e-05, "loss": 5.5713, "step": 3010 }, { "epoch": 0.2019653217962907, "grad_norm": 0.1406840286073035, "learning_rate": 2e-05, "loss": 5.5344, "step": 3011 }, { "epoch": 0.20203239762551564, "grad_norm": 0.14309123967604118, "learning_rate": 2e-05, "loss": 5.4291, "step": 3012 }, { "epoch": 0.20209947345474058, "grad_norm": 0.14753820072330392, "learning_rate": 2e-05, "loss": 5.3468, "step": 3013 }, { "epoch": 0.20216654928396552, "grad_norm": 0.14611787469283935, "learning_rate": 2e-05, "loss": 5.4188, "step": 3014 }, { "epoch": 0.20223362511319046, "grad_norm": 0.14751560672488104, "learning_rate": 2e-05, "loss": 5.4619, "step": 3015 }, { "epoch": 0.2023007009424154, "grad_norm": 0.1516185010812857, "learning_rate": 2e-05, "loss": 5.5085, "step": 3016 }, { "epoch": 0.20236777677164033, "grad_norm": 0.1406281677341113, "learning_rate": 2e-05, "loss": 5.3114, "step": 3017 }, { "epoch": 0.20243485260086527, "grad_norm": 0.1447394514764819, "learning_rate": 2e-05, "loss": 5.4112, "step": 3018 }, { "epoch": 0.2025019284300902, "grad_norm": 0.15826957399022767, "learning_rate": 2e-05, "loss": 5.3987, "step": 3019 }, { "epoch": 0.20256900425931515, "grad_norm": 0.14759104499065448, "learning_rate": 2e-05, "loss": 5.4559, "step": 3020 }, { "epoch": 0.2026360800885401, "grad_norm": 0.1463592915398451, "learning_rate": 2e-05, "loss": 5.4726, "step": 3021 }, { "epoch": 0.20270315591776503, "grad_norm": 0.15318663395955756, "learning_rate": 2e-05, "loss": 5.4005, "step": 3022 }, { "epoch": 0.20277023174698997, "grad_norm": 0.15285848570593338, "learning_rate": 2e-05, "loss": 5.602, "step": 3023 }, { "epoch": 0.2028373075762149, "grad_norm": 0.1444383521060343, "learning_rate": 2e-05, "loss": 5.4171, "step": 3024 }, { "epoch": 0.20290438340543984, "grad_norm": 0.15722692752490042, "learning_rate": 2e-05, "loss": 5.6045, "step": 3025 }, { "epoch": 0.20297145923466478, "grad_norm": 0.14455820122339122, "learning_rate": 2e-05, "loss": 5.4946, "step": 3026 }, { "epoch": 0.20303853506388972, "grad_norm": 0.1378644971484586, "learning_rate": 2e-05, "loss": 5.4347, "step": 3027 }, { "epoch": 0.20310561089311466, "grad_norm": 0.15561304764081713, "learning_rate": 2e-05, "loss": 5.3947, "step": 3028 }, { "epoch": 0.2031726867223396, "grad_norm": 0.1550419748531997, "learning_rate": 2e-05, "loss": 5.5441, "step": 3029 }, { "epoch": 0.20323976255156453, "grad_norm": 0.1469918152921543, "learning_rate": 2e-05, "loss": 5.4192, "step": 3030 }, { "epoch": 0.20330683838078947, "grad_norm": 0.14925571325921638, "learning_rate": 2e-05, "loss": 5.4191, "step": 3031 }, { "epoch": 0.2033739142100144, "grad_norm": 0.15580832083467977, "learning_rate": 2e-05, "loss": 5.5111, "step": 3032 }, { "epoch": 0.20344099003923935, "grad_norm": 0.1469633740790967, "learning_rate": 2e-05, "loss": 5.4672, "step": 3033 }, { "epoch": 0.2035080658684643, "grad_norm": 0.14541796164468618, "learning_rate": 2e-05, "loss": 5.3813, "step": 3034 }, { "epoch": 0.20357514169768923, "grad_norm": 0.1457480369224864, "learning_rate": 2e-05, "loss": 5.4848, "step": 3035 }, { "epoch": 0.20364221752691417, "grad_norm": 0.15139681659143542, "learning_rate": 2e-05, "loss": 5.3304, "step": 3036 }, { "epoch": 0.2037092933561391, "grad_norm": 0.14609842605462234, "learning_rate": 2e-05, "loss": 5.348, "step": 3037 }, { "epoch": 0.20377636918536404, "grad_norm": 0.1491384865468703, "learning_rate": 2e-05, "loss": 5.5582, "step": 3038 }, { "epoch": 0.20384344501458898, "grad_norm": 0.14477399941425576, "learning_rate": 2e-05, "loss": 5.4143, "step": 3039 }, { "epoch": 0.20391052084381392, "grad_norm": 0.14526841609338192, "learning_rate": 2e-05, "loss": 5.4806, "step": 3040 }, { "epoch": 0.20397759667303886, "grad_norm": 0.14665081328067248, "learning_rate": 2e-05, "loss": 5.4762, "step": 3041 }, { "epoch": 0.2040446725022638, "grad_norm": 0.14088013445111372, "learning_rate": 2e-05, "loss": 5.4019, "step": 3042 }, { "epoch": 0.20411174833148873, "grad_norm": 0.14067080565106252, "learning_rate": 2e-05, "loss": 5.461, "step": 3043 }, { "epoch": 0.20417882416071367, "grad_norm": 0.14203718642181404, "learning_rate": 2e-05, "loss": 5.3919, "step": 3044 }, { "epoch": 0.2042458999899386, "grad_norm": 0.1443076929440761, "learning_rate": 2e-05, "loss": 5.4189, "step": 3045 }, { "epoch": 0.20431297581916358, "grad_norm": 0.15702968043159682, "learning_rate": 2e-05, "loss": 5.524, "step": 3046 }, { "epoch": 0.20438005164838852, "grad_norm": 0.15510329913132856, "learning_rate": 2e-05, "loss": 5.5507, "step": 3047 }, { "epoch": 0.20444712747761346, "grad_norm": 0.15240369296619588, "learning_rate": 2e-05, "loss": 5.5217, "step": 3048 }, { "epoch": 0.2045142033068384, "grad_norm": 0.14794097646818508, "learning_rate": 2e-05, "loss": 5.4078, "step": 3049 }, { "epoch": 0.20458127913606333, "grad_norm": 0.14471544849454027, "learning_rate": 2e-05, "loss": 5.4842, "step": 3050 }, { "epoch": 0.20464835496528827, "grad_norm": 0.1477870596710543, "learning_rate": 2e-05, "loss": 5.3182, "step": 3051 }, { "epoch": 0.2047154307945132, "grad_norm": 0.1498538820148795, "learning_rate": 2e-05, "loss": 5.518, "step": 3052 }, { "epoch": 0.20478250662373815, "grad_norm": 0.14533719782339297, "learning_rate": 2e-05, "loss": 5.6222, "step": 3053 }, { "epoch": 0.20484958245296309, "grad_norm": 0.14485387796394072, "learning_rate": 2e-05, "loss": 5.4144, "step": 3054 }, { "epoch": 0.20491665828218802, "grad_norm": 0.14890287660451448, "learning_rate": 2e-05, "loss": 5.5016, "step": 3055 }, { "epoch": 0.20498373411141296, "grad_norm": 0.15389066140015165, "learning_rate": 2e-05, "loss": 5.3707, "step": 3056 }, { "epoch": 0.2050508099406379, "grad_norm": 0.13853450760140834, "learning_rate": 2e-05, "loss": 5.4202, "step": 3057 }, { "epoch": 0.20511788576986284, "grad_norm": 0.14586949257984608, "learning_rate": 2e-05, "loss": 5.5217, "step": 3058 }, { "epoch": 0.20518496159908778, "grad_norm": 0.1447376551167985, "learning_rate": 2e-05, "loss": 5.467, "step": 3059 }, { "epoch": 0.20525203742831272, "grad_norm": 0.14572140049185617, "learning_rate": 2e-05, "loss": 5.499, "step": 3060 }, { "epoch": 0.20531911325753766, "grad_norm": 0.1411607688031229, "learning_rate": 2e-05, "loss": 5.4357, "step": 3061 }, { "epoch": 0.2053861890867626, "grad_norm": 0.15111612573315109, "learning_rate": 2e-05, "loss": 5.4026, "step": 3062 }, { "epoch": 0.20545326491598753, "grad_norm": 0.1530701160224052, "learning_rate": 2e-05, "loss": 5.6171, "step": 3063 }, { "epoch": 0.20552034074521247, "grad_norm": 0.14180896945216864, "learning_rate": 2e-05, "loss": 5.5197, "step": 3064 }, { "epoch": 0.2055874165744374, "grad_norm": 0.150533677399395, "learning_rate": 2e-05, "loss": 5.6637, "step": 3065 }, { "epoch": 0.20565449240366235, "grad_norm": 0.16139257674162552, "learning_rate": 2e-05, "loss": 5.4199, "step": 3066 }, { "epoch": 0.2057215682328873, "grad_norm": 0.14819002880831822, "learning_rate": 2e-05, "loss": 5.3762, "step": 3067 }, { "epoch": 0.20578864406211222, "grad_norm": 0.14312912764192268, "learning_rate": 2e-05, "loss": 5.5971, "step": 3068 }, { "epoch": 0.20585571989133716, "grad_norm": 0.15011488469917977, "learning_rate": 2e-05, "loss": 5.5412, "step": 3069 }, { "epoch": 0.2059227957205621, "grad_norm": 0.1522955329626157, "learning_rate": 2e-05, "loss": 5.4292, "step": 3070 }, { "epoch": 0.20598987154978704, "grad_norm": 0.13825495118739617, "learning_rate": 2e-05, "loss": 5.4571, "step": 3071 }, { "epoch": 0.20605694737901198, "grad_norm": 0.1515500318919278, "learning_rate": 2e-05, "loss": 5.5153, "step": 3072 }, { "epoch": 0.20612402320823692, "grad_norm": 0.1512379040154233, "learning_rate": 2e-05, "loss": 5.4477, "step": 3073 }, { "epoch": 0.20619109903746186, "grad_norm": 0.14639059241416527, "learning_rate": 2e-05, "loss": 5.4339, "step": 3074 }, { "epoch": 0.2062581748666868, "grad_norm": 0.14300149376879287, "learning_rate": 2e-05, "loss": 5.3653, "step": 3075 }, { "epoch": 0.20632525069591173, "grad_norm": 0.13824641512010052, "learning_rate": 2e-05, "loss": 5.5345, "step": 3076 }, { "epoch": 0.20639232652513667, "grad_norm": 0.14349130958336473, "learning_rate": 2e-05, "loss": 5.4653, "step": 3077 }, { "epoch": 0.2064594023543616, "grad_norm": 0.14720363780776052, "learning_rate": 2e-05, "loss": 5.4328, "step": 3078 }, { "epoch": 0.20652647818358655, "grad_norm": 0.14495890879856804, "learning_rate": 2e-05, "loss": 5.4221, "step": 3079 }, { "epoch": 0.2065935540128115, "grad_norm": 0.14999245671107905, "learning_rate": 2e-05, "loss": 5.4082, "step": 3080 }, { "epoch": 0.20666062984203643, "grad_norm": 0.1423471871868468, "learning_rate": 2e-05, "loss": 5.3873, "step": 3081 }, { "epoch": 0.20672770567126136, "grad_norm": 0.14458321749817846, "learning_rate": 2e-05, "loss": 5.3577, "step": 3082 }, { "epoch": 0.2067947815004863, "grad_norm": 0.1487991137110896, "learning_rate": 2e-05, "loss": 5.504, "step": 3083 }, { "epoch": 0.20686185732971124, "grad_norm": 0.1489922212059875, "learning_rate": 2e-05, "loss": 5.5994, "step": 3084 }, { "epoch": 0.20692893315893618, "grad_norm": 0.14316286435275205, "learning_rate": 2e-05, "loss": 5.4223, "step": 3085 }, { "epoch": 0.20699600898816112, "grad_norm": 0.1437785535676628, "learning_rate": 2e-05, "loss": 5.5321, "step": 3086 }, { "epoch": 0.20706308481738606, "grad_norm": 0.14419866075194698, "learning_rate": 2e-05, "loss": 5.4459, "step": 3087 }, { "epoch": 0.207130160646611, "grad_norm": 0.14433626923338122, "learning_rate": 2e-05, "loss": 5.4169, "step": 3088 }, { "epoch": 0.20719723647583593, "grad_norm": 0.14101152975084455, "learning_rate": 2e-05, "loss": 5.4673, "step": 3089 }, { "epoch": 0.20726431230506087, "grad_norm": 0.14785642561144374, "learning_rate": 2e-05, "loss": 5.4537, "step": 3090 }, { "epoch": 0.2073313881342858, "grad_norm": 0.1485231640271306, "learning_rate": 2e-05, "loss": 5.4409, "step": 3091 }, { "epoch": 0.20739846396351075, "grad_norm": 0.1427861150447786, "learning_rate": 2e-05, "loss": 5.3397, "step": 3092 }, { "epoch": 0.2074655397927357, "grad_norm": 0.1455448676900824, "learning_rate": 2e-05, "loss": 5.478, "step": 3093 }, { "epoch": 0.20753261562196063, "grad_norm": 0.14138365405725, "learning_rate": 2e-05, "loss": 5.5633, "step": 3094 }, { "epoch": 0.20759969145118556, "grad_norm": 0.14179023979294056, "learning_rate": 2e-05, "loss": 5.4528, "step": 3095 }, { "epoch": 0.2076667672804105, "grad_norm": 0.1438254618429699, "learning_rate": 2e-05, "loss": 5.4525, "step": 3096 }, { "epoch": 0.20773384310963544, "grad_norm": 0.1423015275787241, "learning_rate": 2e-05, "loss": 5.4377, "step": 3097 }, { "epoch": 0.20780091893886038, "grad_norm": 0.14643741836625412, "learning_rate": 2e-05, "loss": 5.4445, "step": 3098 }, { "epoch": 0.20786799476808532, "grad_norm": 0.14535342766017958, "learning_rate": 2e-05, "loss": 5.513, "step": 3099 }, { "epoch": 0.20793507059731026, "grad_norm": 0.14292422123627008, "learning_rate": 2e-05, "loss": 5.5064, "step": 3100 }, { "epoch": 0.2080021464265352, "grad_norm": 0.14652997007372942, "learning_rate": 2e-05, "loss": 5.4605, "step": 3101 }, { "epoch": 0.20806922225576013, "grad_norm": 0.14108371346331539, "learning_rate": 2e-05, "loss": 5.4824, "step": 3102 }, { "epoch": 0.20813629808498507, "grad_norm": 0.15138135369770928, "learning_rate": 2e-05, "loss": 5.5671, "step": 3103 }, { "epoch": 0.20820337391421, "grad_norm": 0.15446227763580825, "learning_rate": 2e-05, "loss": 5.5476, "step": 3104 }, { "epoch": 0.20827044974343495, "grad_norm": 0.14455946589848603, "learning_rate": 2e-05, "loss": 5.4699, "step": 3105 }, { "epoch": 0.2083375255726599, "grad_norm": 0.15556147259144065, "learning_rate": 2e-05, "loss": 5.3669, "step": 3106 }, { "epoch": 0.20840460140188483, "grad_norm": 0.1473600234054407, "learning_rate": 2e-05, "loss": 5.495, "step": 3107 }, { "epoch": 0.20847167723110976, "grad_norm": 0.14546566514039044, "learning_rate": 2e-05, "loss": 5.5236, "step": 3108 }, { "epoch": 0.2085387530603347, "grad_norm": 0.15207936101904435, "learning_rate": 2e-05, "loss": 5.6357, "step": 3109 }, { "epoch": 0.20860582888955964, "grad_norm": 0.1683997431935039, "learning_rate": 2e-05, "loss": 5.4837, "step": 3110 }, { "epoch": 0.20867290471878458, "grad_norm": 0.14979660783303064, "learning_rate": 2e-05, "loss": 5.4416, "step": 3111 }, { "epoch": 0.20873998054800952, "grad_norm": 0.1397880593419318, "learning_rate": 2e-05, "loss": 5.4395, "step": 3112 }, { "epoch": 0.20880705637723446, "grad_norm": 0.14062837375466844, "learning_rate": 2e-05, "loss": 5.4333, "step": 3113 }, { "epoch": 0.2088741322064594, "grad_norm": 0.15061629227849166, "learning_rate": 2e-05, "loss": 5.516, "step": 3114 }, { "epoch": 0.20894120803568433, "grad_norm": 0.14680081895498812, "learning_rate": 2e-05, "loss": 5.3401, "step": 3115 }, { "epoch": 0.20900828386490927, "grad_norm": 0.14102022693714888, "learning_rate": 2e-05, "loss": 5.4155, "step": 3116 }, { "epoch": 0.2090753596941342, "grad_norm": 0.1399473133115436, "learning_rate": 2e-05, "loss": 5.3414, "step": 3117 }, { "epoch": 0.20914243552335915, "grad_norm": 0.1439648553301222, "learning_rate": 2e-05, "loss": 5.4817, "step": 3118 }, { "epoch": 0.2092095113525841, "grad_norm": 0.14056909046096888, "learning_rate": 2e-05, "loss": 5.403, "step": 3119 }, { "epoch": 0.20927658718180903, "grad_norm": 0.14105973042678108, "learning_rate": 2e-05, "loss": 5.4675, "step": 3120 }, { "epoch": 0.20934366301103396, "grad_norm": 0.1397450244096343, "learning_rate": 2e-05, "loss": 5.3693, "step": 3121 }, { "epoch": 0.2094107388402589, "grad_norm": 0.13733889778034322, "learning_rate": 2e-05, "loss": 5.6639, "step": 3122 }, { "epoch": 0.20947781466948384, "grad_norm": 0.15010955109128885, "learning_rate": 2e-05, "loss": 5.6688, "step": 3123 }, { "epoch": 0.20954489049870878, "grad_norm": 0.13893557795618847, "learning_rate": 2e-05, "loss": 5.3771, "step": 3124 }, { "epoch": 0.20961196632793372, "grad_norm": 0.14121483235751464, "learning_rate": 2e-05, "loss": 5.4867, "step": 3125 }, { "epoch": 0.20967904215715866, "grad_norm": 0.15358336776355982, "learning_rate": 2e-05, "loss": 5.3989, "step": 3126 }, { "epoch": 0.2097461179863836, "grad_norm": 0.14406810623991959, "learning_rate": 2e-05, "loss": 5.5001, "step": 3127 }, { "epoch": 0.20981319381560853, "grad_norm": 0.14197236869185342, "learning_rate": 2e-05, "loss": 5.5416, "step": 3128 }, { "epoch": 0.20988026964483347, "grad_norm": 0.1418805774394157, "learning_rate": 2e-05, "loss": 5.4431, "step": 3129 }, { "epoch": 0.2099473454740584, "grad_norm": 0.14411777774640497, "learning_rate": 2e-05, "loss": 5.4108, "step": 3130 }, { "epoch": 0.21001442130328335, "grad_norm": 0.1453863508997706, "learning_rate": 2e-05, "loss": 5.3509, "step": 3131 }, { "epoch": 0.2100814971325083, "grad_norm": 0.13690146130641365, "learning_rate": 2e-05, "loss": 5.4378, "step": 3132 }, { "epoch": 0.21014857296173323, "grad_norm": 0.14395788264883455, "learning_rate": 2e-05, "loss": 5.5103, "step": 3133 }, { "epoch": 0.21021564879095817, "grad_norm": 0.1468773331653128, "learning_rate": 2e-05, "loss": 5.3514, "step": 3134 }, { "epoch": 0.2102827246201831, "grad_norm": 0.13928174181323075, "learning_rate": 2e-05, "loss": 5.4697, "step": 3135 }, { "epoch": 0.21034980044940804, "grad_norm": 0.13813346516889752, "learning_rate": 2e-05, "loss": 5.4438, "step": 3136 }, { "epoch": 0.210416876278633, "grad_norm": 0.14097473210244377, "learning_rate": 2e-05, "loss": 5.6157, "step": 3137 }, { "epoch": 0.21048395210785795, "grad_norm": 0.1420229371175282, "learning_rate": 2e-05, "loss": 5.6865, "step": 3138 }, { "epoch": 0.21055102793708289, "grad_norm": 0.14541383268996122, "learning_rate": 2e-05, "loss": 5.3944, "step": 3139 }, { "epoch": 0.21061810376630782, "grad_norm": 0.14447513673296128, "learning_rate": 2e-05, "loss": 5.4197, "step": 3140 }, { "epoch": 0.21068517959553276, "grad_norm": 0.13951692048735007, "learning_rate": 2e-05, "loss": 5.44, "step": 3141 }, { "epoch": 0.2107522554247577, "grad_norm": 0.14601729616907377, "learning_rate": 2e-05, "loss": 5.3517, "step": 3142 }, { "epoch": 0.21081933125398264, "grad_norm": 0.14288211221162064, "learning_rate": 2e-05, "loss": 5.3477, "step": 3143 }, { "epoch": 0.21088640708320758, "grad_norm": 0.14340310440472676, "learning_rate": 2e-05, "loss": 5.5222, "step": 3144 }, { "epoch": 0.21095348291243252, "grad_norm": 0.15689416530214037, "learning_rate": 2e-05, "loss": 5.4915, "step": 3145 }, { "epoch": 0.21102055874165745, "grad_norm": 0.14978960066678307, "learning_rate": 2e-05, "loss": 5.5155, "step": 3146 }, { "epoch": 0.2110876345708824, "grad_norm": 0.14333937157852977, "learning_rate": 2e-05, "loss": 5.5935, "step": 3147 }, { "epoch": 0.21115471040010733, "grad_norm": 0.15003934278966846, "learning_rate": 2e-05, "loss": 5.4227, "step": 3148 }, { "epoch": 0.21122178622933227, "grad_norm": 0.1636410947478519, "learning_rate": 2e-05, "loss": 5.4248, "step": 3149 }, { "epoch": 0.2112888620585572, "grad_norm": 0.14607752283981512, "learning_rate": 2e-05, "loss": 5.422, "step": 3150 }, { "epoch": 0.21135593788778215, "grad_norm": 0.14513506994288355, "learning_rate": 2e-05, "loss": 5.472, "step": 3151 }, { "epoch": 0.21142301371700709, "grad_norm": 0.14452170337977938, "learning_rate": 2e-05, "loss": 5.5079, "step": 3152 }, { "epoch": 0.21149008954623202, "grad_norm": 0.13936625305994524, "learning_rate": 2e-05, "loss": 5.5671, "step": 3153 }, { "epoch": 0.21155716537545696, "grad_norm": 0.14761432001487632, "learning_rate": 2e-05, "loss": 5.4575, "step": 3154 }, { "epoch": 0.2116242412046819, "grad_norm": 0.14248112726470508, "learning_rate": 2e-05, "loss": 5.4651, "step": 3155 }, { "epoch": 0.21169131703390684, "grad_norm": 0.15024303380893234, "learning_rate": 2e-05, "loss": 5.4376, "step": 3156 }, { "epoch": 0.21175839286313178, "grad_norm": 0.13688357045495048, "learning_rate": 2e-05, "loss": 5.5145, "step": 3157 }, { "epoch": 0.21182546869235672, "grad_norm": 0.14113721288626668, "learning_rate": 2e-05, "loss": 5.5873, "step": 3158 }, { "epoch": 0.21189254452158165, "grad_norm": 0.15612672163791408, "learning_rate": 2e-05, "loss": 5.4972, "step": 3159 }, { "epoch": 0.2119596203508066, "grad_norm": 0.13889565587375519, "learning_rate": 2e-05, "loss": 5.3583, "step": 3160 }, { "epoch": 0.21202669618003153, "grad_norm": 0.14595303473167015, "learning_rate": 2e-05, "loss": 5.3332, "step": 3161 }, { "epoch": 0.21209377200925647, "grad_norm": 0.1557610289199363, "learning_rate": 2e-05, "loss": 5.3329, "step": 3162 }, { "epoch": 0.2121608478384814, "grad_norm": 0.1445801329059681, "learning_rate": 2e-05, "loss": 5.3537, "step": 3163 }, { "epoch": 0.21222792366770635, "grad_norm": 0.16015550030013584, "learning_rate": 2e-05, "loss": 5.5481, "step": 3164 }, { "epoch": 0.21229499949693129, "grad_norm": 0.14403213981203153, "learning_rate": 2e-05, "loss": 5.3959, "step": 3165 }, { "epoch": 0.21236207532615622, "grad_norm": 0.1468230070825452, "learning_rate": 2e-05, "loss": 5.2819, "step": 3166 }, { "epoch": 0.21242915115538116, "grad_norm": 0.1403777006966255, "learning_rate": 2e-05, "loss": 5.5679, "step": 3167 }, { "epoch": 0.2124962269846061, "grad_norm": 0.1551011245255942, "learning_rate": 2e-05, "loss": 5.562, "step": 3168 }, { "epoch": 0.21256330281383104, "grad_norm": 0.14890717538912843, "learning_rate": 2e-05, "loss": 5.4026, "step": 3169 }, { "epoch": 0.21263037864305598, "grad_norm": 0.14826186593089416, "learning_rate": 2e-05, "loss": 5.5025, "step": 3170 }, { "epoch": 0.21269745447228092, "grad_norm": 0.1446312561245324, "learning_rate": 2e-05, "loss": 5.2921, "step": 3171 }, { "epoch": 0.21276453030150586, "grad_norm": 0.1527081329037847, "learning_rate": 2e-05, "loss": 5.3111, "step": 3172 }, { "epoch": 0.2128316061307308, "grad_norm": 0.13974109410024763, "learning_rate": 2e-05, "loss": 5.4236, "step": 3173 }, { "epoch": 0.21289868195995573, "grad_norm": 0.14551930917950906, "learning_rate": 2e-05, "loss": 5.5298, "step": 3174 }, { "epoch": 0.21296575778918067, "grad_norm": 0.137250810046067, "learning_rate": 2e-05, "loss": 5.4215, "step": 3175 }, { "epoch": 0.2130328336184056, "grad_norm": 0.1407280997901509, "learning_rate": 2e-05, "loss": 5.5511, "step": 3176 }, { "epoch": 0.21309990944763055, "grad_norm": 0.1371370898914445, "learning_rate": 2e-05, "loss": 5.4351, "step": 3177 }, { "epoch": 0.2131669852768555, "grad_norm": 0.14419278137887057, "learning_rate": 2e-05, "loss": 5.4751, "step": 3178 }, { "epoch": 0.21323406110608042, "grad_norm": 0.1433636171299948, "learning_rate": 2e-05, "loss": 5.4056, "step": 3179 }, { "epoch": 0.21330113693530536, "grad_norm": 0.14472148554097586, "learning_rate": 2e-05, "loss": 5.2808, "step": 3180 }, { "epoch": 0.2133682127645303, "grad_norm": 0.14728076298555853, "learning_rate": 2e-05, "loss": 5.5075, "step": 3181 }, { "epoch": 0.21343528859375524, "grad_norm": 0.14789719435084364, "learning_rate": 2e-05, "loss": 5.4698, "step": 3182 }, { "epoch": 0.21350236442298018, "grad_norm": 0.14191005317421204, "learning_rate": 2e-05, "loss": 5.5851, "step": 3183 }, { "epoch": 0.21356944025220512, "grad_norm": 0.1549257820440864, "learning_rate": 2e-05, "loss": 5.4668, "step": 3184 }, { "epoch": 0.21363651608143006, "grad_norm": 0.1521058709323602, "learning_rate": 2e-05, "loss": 5.4621, "step": 3185 }, { "epoch": 0.213703591910655, "grad_norm": 0.1416611609507571, "learning_rate": 2e-05, "loss": 5.454, "step": 3186 }, { "epoch": 0.21377066773987993, "grad_norm": 0.14213355592181234, "learning_rate": 2e-05, "loss": 5.3991, "step": 3187 }, { "epoch": 0.21383774356910487, "grad_norm": 0.14764065001647575, "learning_rate": 2e-05, "loss": 5.5522, "step": 3188 }, { "epoch": 0.2139048193983298, "grad_norm": 0.14588074924878125, "learning_rate": 2e-05, "loss": 5.463, "step": 3189 }, { "epoch": 0.21397189522755475, "grad_norm": 0.14130905345234476, "learning_rate": 2e-05, "loss": 5.3555, "step": 3190 }, { "epoch": 0.2140389710567797, "grad_norm": 0.1467764646808757, "learning_rate": 2e-05, "loss": 5.5283, "step": 3191 }, { "epoch": 0.21410604688600463, "grad_norm": 0.14902790681130731, "learning_rate": 2e-05, "loss": 5.4571, "step": 3192 }, { "epoch": 0.21417312271522956, "grad_norm": 0.1481406915322595, "learning_rate": 2e-05, "loss": 5.5246, "step": 3193 }, { "epoch": 0.2142401985444545, "grad_norm": 0.14196425274046848, "learning_rate": 2e-05, "loss": 5.3809, "step": 3194 }, { "epoch": 0.21430727437367944, "grad_norm": 0.14749043729935088, "learning_rate": 2e-05, "loss": 5.5198, "step": 3195 }, { "epoch": 0.21437435020290438, "grad_norm": 0.14972169286494344, "learning_rate": 2e-05, "loss": 5.4541, "step": 3196 }, { "epoch": 0.21444142603212932, "grad_norm": 0.14476836684585181, "learning_rate": 2e-05, "loss": 5.591, "step": 3197 }, { "epoch": 0.21450850186135426, "grad_norm": 0.14204390727496452, "learning_rate": 2e-05, "loss": 5.4689, "step": 3198 }, { "epoch": 0.2145755776905792, "grad_norm": 0.15151050088300744, "learning_rate": 2e-05, "loss": 5.5278, "step": 3199 }, { "epoch": 0.21464265351980413, "grad_norm": 0.14227925152320525, "learning_rate": 2e-05, "loss": 5.387, "step": 3200 }, { "epoch": 0.21470972934902907, "grad_norm": 0.14558011965356218, "learning_rate": 2e-05, "loss": 5.5078, "step": 3201 }, { "epoch": 0.214776805178254, "grad_norm": 0.14756292718284436, "learning_rate": 2e-05, "loss": 5.4132, "step": 3202 }, { "epoch": 0.21484388100747895, "grad_norm": 0.14235864038439133, "learning_rate": 2e-05, "loss": 5.4844, "step": 3203 }, { "epoch": 0.2149109568367039, "grad_norm": 0.15624519886200725, "learning_rate": 2e-05, "loss": 5.3471, "step": 3204 }, { "epoch": 0.21497803266592883, "grad_norm": 0.15070229734644655, "learning_rate": 2e-05, "loss": 5.2519, "step": 3205 }, { "epoch": 0.21504510849515376, "grad_norm": 0.1450976489631152, "learning_rate": 2e-05, "loss": 5.5052, "step": 3206 }, { "epoch": 0.2151121843243787, "grad_norm": 0.1531313471246393, "learning_rate": 2e-05, "loss": 5.5375, "step": 3207 }, { "epoch": 0.21517926015360364, "grad_norm": 0.14150071520545918, "learning_rate": 2e-05, "loss": 5.3538, "step": 3208 }, { "epoch": 0.21524633598282858, "grad_norm": 0.14109334493976483, "learning_rate": 2e-05, "loss": 5.4656, "step": 3209 }, { "epoch": 0.21531341181205352, "grad_norm": 0.14654135730583567, "learning_rate": 2e-05, "loss": 5.4281, "step": 3210 }, { "epoch": 0.21538048764127846, "grad_norm": 0.1448929443754271, "learning_rate": 2e-05, "loss": 5.5809, "step": 3211 }, { "epoch": 0.2154475634705034, "grad_norm": 0.14222667671408426, "learning_rate": 2e-05, "loss": 5.6381, "step": 3212 }, { "epoch": 0.21551463929972833, "grad_norm": 0.14910683795322652, "learning_rate": 2e-05, "loss": 5.4656, "step": 3213 }, { "epoch": 0.21558171512895327, "grad_norm": 0.15093734562031114, "learning_rate": 2e-05, "loss": 5.3076, "step": 3214 }, { "epoch": 0.2156487909581782, "grad_norm": 0.1483919168014747, "learning_rate": 2e-05, "loss": 5.5474, "step": 3215 }, { "epoch": 0.21571586678740315, "grad_norm": 0.15535868975876266, "learning_rate": 2e-05, "loss": 5.4795, "step": 3216 }, { "epoch": 0.2157829426166281, "grad_norm": 0.14198821066288736, "learning_rate": 2e-05, "loss": 5.5111, "step": 3217 }, { "epoch": 0.21585001844585303, "grad_norm": 0.1447599838450322, "learning_rate": 2e-05, "loss": 5.4439, "step": 3218 }, { "epoch": 0.21591709427507796, "grad_norm": 0.15128707909766362, "learning_rate": 2e-05, "loss": 5.4865, "step": 3219 }, { "epoch": 0.2159841701043029, "grad_norm": 0.14264956653343508, "learning_rate": 2e-05, "loss": 5.3493, "step": 3220 }, { "epoch": 0.21605124593352784, "grad_norm": 0.1403301628615722, "learning_rate": 2e-05, "loss": 5.6286, "step": 3221 }, { "epoch": 0.21611832176275278, "grad_norm": 0.15528770396372726, "learning_rate": 2e-05, "loss": 5.5553, "step": 3222 }, { "epoch": 0.21618539759197772, "grad_norm": 0.14319346434747807, "learning_rate": 2e-05, "loss": 5.3659, "step": 3223 }, { "epoch": 0.21625247342120266, "grad_norm": 0.14091543787734176, "learning_rate": 2e-05, "loss": 5.3949, "step": 3224 }, { "epoch": 0.2163195492504276, "grad_norm": 0.1486425088198627, "learning_rate": 2e-05, "loss": 5.4431, "step": 3225 }, { "epoch": 0.21638662507965253, "grad_norm": 0.14397993746299345, "learning_rate": 2e-05, "loss": 5.4264, "step": 3226 }, { "epoch": 0.21645370090887747, "grad_norm": 0.1469011599629992, "learning_rate": 2e-05, "loss": 5.5253, "step": 3227 }, { "epoch": 0.21652077673810244, "grad_norm": 0.1531627977947837, "learning_rate": 2e-05, "loss": 5.3494, "step": 3228 }, { "epoch": 0.21658785256732738, "grad_norm": 0.14520325677009419, "learning_rate": 2e-05, "loss": 5.4708, "step": 3229 }, { "epoch": 0.21665492839655232, "grad_norm": 0.1407434117043489, "learning_rate": 2e-05, "loss": 5.4839, "step": 3230 }, { "epoch": 0.21672200422577725, "grad_norm": 0.14700419185765715, "learning_rate": 2e-05, "loss": 5.4021, "step": 3231 }, { "epoch": 0.2167890800550022, "grad_norm": 0.15434291636388722, "learning_rate": 2e-05, "loss": 5.3639, "step": 3232 }, { "epoch": 0.21685615588422713, "grad_norm": 0.14570134096286255, "learning_rate": 2e-05, "loss": 5.421, "step": 3233 }, { "epoch": 0.21692323171345207, "grad_norm": 0.1472617270373705, "learning_rate": 2e-05, "loss": 5.4665, "step": 3234 }, { "epoch": 0.216990307542677, "grad_norm": 0.14876830020010962, "learning_rate": 2e-05, "loss": 5.5196, "step": 3235 }, { "epoch": 0.21705738337190195, "grad_norm": 0.1534012752778758, "learning_rate": 2e-05, "loss": 5.3637, "step": 3236 }, { "epoch": 0.21712445920112688, "grad_norm": 0.1419744824114444, "learning_rate": 2e-05, "loss": 5.4377, "step": 3237 }, { "epoch": 0.21719153503035182, "grad_norm": 0.1498624838089467, "learning_rate": 2e-05, "loss": 5.6543, "step": 3238 }, { "epoch": 0.21725861085957676, "grad_norm": 0.15403356299412144, "learning_rate": 2e-05, "loss": 5.5744, "step": 3239 }, { "epoch": 0.2173256866888017, "grad_norm": 0.1433927376672018, "learning_rate": 2e-05, "loss": 5.5316, "step": 3240 }, { "epoch": 0.21739276251802664, "grad_norm": 0.14889277686589178, "learning_rate": 2e-05, "loss": 5.4039, "step": 3241 }, { "epoch": 0.21745983834725158, "grad_norm": 0.14310320150254194, "learning_rate": 2e-05, "loss": 5.595, "step": 3242 }, { "epoch": 0.21752691417647652, "grad_norm": 0.14054839129450325, "learning_rate": 2e-05, "loss": 5.5437, "step": 3243 }, { "epoch": 0.21759399000570145, "grad_norm": 0.14145450877128427, "learning_rate": 2e-05, "loss": 5.5252, "step": 3244 }, { "epoch": 0.2176610658349264, "grad_norm": 0.14823888110315733, "learning_rate": 2e-05, "loss": 5.4098, "step": 3245 }, { "epoch": 0.21772814166415133, "grad_norm": 0.14593728728926186, "learning_rate": 2e-05, "loss": 5.4083, "step": 3246 }, { "epoch": 0.21779521749337627, "grad_norm": 0.14625440803653394, "learning_rate": 2e-05, "loss": 5.5574, "step": 3247 }, { "epoch": 0.2178622933226012, "grad_norm": 0.14707612878696016, "learning_rate": 2e-05, "loss": 5.4345, "step": 3248 }, { "epoch": 0.21792936915182615, "grad_norm": 0.14025601940512689, "learning_rate": 2e-05, "loss": 5.537, "step": 3249 }, { "epoch": 0.21799644498105109, "grad_norm": 0.15137618728006727, "learning_rate": 2e-05, "loss": 5.5326, "step": 3250 }, { "epoch": 0.21806352081027602, "grad_norm": 0.1480765163879402, "learning_rate": 2e-05, "loss": 5.3968, "step": 3251 }, { "epoch": 0.21813059663950096, "grad_norm": 0.14618901214492772, "learning_rate": 2e-05, "loss": 5.4493, "step": 3252 }, { "epoch": 0.2181976724687259, "grad_norm": 0.14908071561772773, "learning_rate": 2e-05, "loss": 5.4404, "step": 3253 }, { "epoch": 0.21826474829795084, "grad_norm": 0.15103693742322666, "learning_rate": 2e-05, "loss": 5.4662, "step": 3254 }, { "epoch": 0.21833182412717578, "grad_norm": 0.13993391759080342, "learning_rate": 2e-05, "loss": 5.431, "step": 3255 }, { "epoch": 0.21839889995640072, "grad_norm": 0.14469112313820162, "learning_rate": 2e-05, "loss": 5.5377, "step": 3256 }, { "epoch": 0.21846597578562565, "grad_norm": 0.15549967815390786, "learning_rate": 2e-05, "loss": 5.477, "step": 3257 }, { "epoch": 0.2185330516148506, "grad_norm": 0.15013196977659907, "learning_rate": 2e-05, "loss": 5.4123, "step": 3258 }, { "epoch": 0.21860012744407553, "grad_norm": 0.15391835666807346, "learning_rate": 2e-05, "loss": 5.424, "step": 3259 }, { "epoch": 0.21866720327330047, "grad_norm": 0.15189234727666506, "learning_rate": 2e-05, "loss": 5.3853, "step": 3260 }, { "epoch": 0.2187342791025254, "grad_norm": 0.14316351491160523, "learning_rate": 2e-05, "loss": 5.522, "step": 3261 }, { "epoch": 0.21880135493175035, "grad_norm": 0.15071205847410488, "learning_rate": 2e-05, "loss": 5.5165, "step": 3262 }, { "epoch": 0.21886843076097529, "grad_norm": 0.14677867840987388, "learning_rate": 2e-05, "loss": 5.31, "step": 3263 }, { "epoch": 0.21893550659020022, "grad_norm": 0.14316265616788784, "learning_rate": 2e-05, "loss": 5.392, "step": 3264 }, { "epoch": 0.21900258241942516, "grad_norm": 0.14514472590572142, "learning_rate": 2e-05, "loss": 5.5157, "step": 3265 }, { "epoch": 0.2190696582486501, "grad_norm": 0.1436745732882428, "learning_rate": 2e-05, "loss": 5.5249, "step": 3266 }, { "epoch": 0.21913673407787504, "grad_norm": 0.14903705531823794, "learning_rate": 2e-05, "loss": 5.558, "step": 3267 }, { "epoch": 0.21920380990709998, "grad_norm": 0.14453176518241367, "learning_rate": 2e-05, "loss": 5.4199, "step": 3268 }, { "epoch": 0.21927088573632492, "grad_norm": 0.14253449044735628, "learning_rate": 2e-05, "loss": 5.6043, "step": 3269 }, { "epoch": 0.21933796156554985, "grad_norm": 0.1479703266613821, "learning_rate": 2e-05, "loss": 5.3884, "step": 3270 }, { "epoch": 0.2194050373947748, "grad_norm": 0.14400393451620722, "learning_rate": 2e-05, "loss": 5.4795, "step": 3271 }, { "epoch": 0.21947211322399973, "grad_norm": 0.1486525039917543, "learning_rate": 2e-05, "loss": 5.5454, "step": 3272 }, { "epoch": 0.21953918905322467, "grad_norm": 0.15274187946424048, "learning_rate": 2e-05, "loss": 5.5541, "step": 3273 }, { "epoch": 0.2196062648824496, "grad_norm": 0.1535018462194799, "learning_rate": 2e-05, "loss": 5.3389, "step": 3274 }, { "epoch": 0.21967334071167455, "grad_norm": 0.1420593685377505, "learning_rate": 2e-05, "loss": 5.5557, "step": 3275 }, { "epoch": 0.21974041654089949, "grad_norm": 0.1487048801258307, "learning_rate": 2e-05, "loss": 5.5296, "step": 3276 }, { "epoch": 0.21980749237012442, "grad_norm": 0.14747191990978456, "learning_rate": 2e-05, "loss": 5.527, "step": 3277 }, { "epoch": 0.21987456819934936, "grad_norm": 0.14125494959602256, "learning_rate": 2e-05, "loss": 5.3928, "step": 3278 }, { "epoch": 0.2199416440285743, "grad_norm": 0.14698309356986805, "learning_rate": 2e-05, "loss": 5.3914, "step": 3279 }, { "epoch": 0.22000871985779924, "grad_norm": 0.15091536411562856, "learning_rate": 2e-05, "loss": 5.5673, "step": 3280 }, { "epoch": 0.22007579568702418, "grad_norm": 0.14345997483532777, "learning_rate": 2e-05, "loss": 5.3973, "step": 3281 }, { "epoch": 0.22014287151624912, "grad_norm": 0.14016199289496584, "learning_rate": 2e-05, "loss": 5.5704, "step": 3282 }, { "epoch": 0.22020994734547406, "grad_norm": 0.14522198421909455, "learning_rate": 2e-05, "loss": 5.4374, "step": 3283 }, { "epoch": 0.220277023174699, "grad_norm": 0.14663207333966252, "learning_rate": 2e-05, "loss": 5.4705, "step": 3284 }, { "epoch": 0.22034409900392393, "grad_norm": 0.14839668137633485, "learning_rate": 2e-05, "loss": 5.4721, "step": 3285 }, { "epoch": 0.22041117483314887, "grad_norm": 0.14384895434823158, "learning_rate": 2e-05, "loss": 5.2787, "step": 3286 }, { "epoch": 0.2204782506623738, "grad_norm": 0.14323077201248935, "learning_rate": 2e-05, "loss": 5.3615, "step": 3287 }, { "epoch": 0.22054532649159875, "grad_norm": 0.15661144852426015, "learning_rate": 2e-05, "loss": 5.3908, "step": 3288 }, { "epoch": 0.2206124023208237, "grad_norm": 0.14969866329865555, "learning_rate": 2e-05, "loss": 5.4178, "step": 3289 }, { "epoch": 0.22067947815004862, "grad_norm": 0.14150137018848877, "learning_rate": 2e-05, "loss": 5.369, "step": 3290 }, { "epoch": 0.22074655397927356, "grad_norm": 0.14733168570098099, "learning_rate": 2e-05, "loss": 5.478, "step": 3291 }, { "epoch": 0.2208136298084985, "grad_norm": 0.14074849988433014, "learning_rate": 2e-05, "loss": 5.4357, "step": 3292 }, { "epoch": 0.22088070563772344, "grad_norm": 0.15360931466649588, "learning_rate": 2e-05, "loss": 5.4907, "step": 3293 }, { "epoch": 0.22094778146694838, "grad_norm": 0.1441457513637673, "learning_rate": 2e-05, "loss": 5.3295, "step": 3294 }, { "epoch": 0.22101485729617332, "grad_norm": 0.1454151361769673, "learning_rate": 2e-05, "loss": 5.4096, "step": 3295 }, { "epoch": 0.22108193312539826, "grad_norm": 0.1482526917231663, "learning_rate": 2e-05, "loss": 5.4018, "step": 3296 }, { "epoch": 0.2211490089546232, "grad_norm": 0.14779623260860175, "learning_rate": 2e-05, "loss": 5.4151, "step": 3297 }, { "epoch": 0.22121608478384813, "grad_norm": 0.14494605532190408, "learning_rate": 2e-05, "loss": 5.5063, "step": 3298 }, { "epoch": 0.22128316061307307, "grad_norm": 0.14773534375713246, "learning_rate": 2e-05, "loss": 5.3223, "step": 3299 }, { "epoch": 0.221350236442298, "grad_norm": 0.15442321120473268, "learning_rate": 2e-05, "loss": 5.403, "step": 3300 }, { "epoch": 0.22141731227152295, "grad_norm": 0.13754664208473438, "learning_rate": 2e-05, "loss": 5.5776, "step": 3301 }, { "epoch": 0.2214843881007479, "grad_norm": 0.1412273666297994, "learning_rate": 2e-05, "loss": 5.5048, "step": 3302 }, { "epoch": 0.22155146392997283, "grad_norm": 0.14711497291752107, "learning_rate": 2e-05, "loss": 5.503, "step": 3303 }, { "epoch": 0.22161853975919776, "grad_norm": 0.14899014956174633, "learning_rate": 2e-05, "loss": 5.418, "step": 3304 }, { "epoch": 0.2216856155884227, "grad_norm": 0.14121523671287264, "learning_rate": 2e-05, "loss": 5.4677, "step": 3305 }, { "epoch": 0.22175269141764764, "grad_norm": 0.15424168549465048, "learning_rate": 2e-05, "loss": 5.4395, "step": 3306 }, { "epoch": 0.22181976724687258, "grad_norm": 0.14163486181898038, "learning_rate": 2e-05, "loss": 5.4555, "step": 3307 }, { "epoch": 0.22188684307609752, "grad_norm": 0.14515407993835638, "learning_rate": 2e-05, "loss": 5.4077, "step": 3308 }, { "epoch": 0.22195391890532246, "grad_norm": 0.14571876901783734, "learning_rate": 2e-05, "loss": 5.5175, "step": 3309 }, { "epoch": 0.2220209947345474, "grad_norm": 0.14402978354917284, "learning_rate": 2e-05, "loss": 5.5138, "step": 3310 }, { "epoch": 0.22208807056377233, "grad_norm": 0.1417472126820956, "learning_rate": 2e-05, "loss": 5.4274, "step": 3311 }, { "epoch": 0.22215514639299727, "grad_norm": 0.1450794222434863, "learning_rate": 2e-05, "loss": 5.3783, "step": 3312 }, { "epoch": 0.2222222222222222, "grad_norm": 0.1431991683922062, "learning_rate": 2e-05, "loss": 5.3527, "step": 3313 }, { "epoch": 0.22228929805144715, "grad_norm": 0.13859018370680973, "learning_rate": 2e-05, "loss": 5.3462, "step": 3314 }, { "epoch": 0.2223563738806721, "grad_norm": 0.14407800033283474, "learning_rate": 2e-05, "loss": 5.5719, "step": 3315 }, { "epoch": 0.22242344970989703, "grad_norm": 0.15198461766798824, "learning_rate": 2e-05, "loss": 5.4305, "step": 3316 }, { "epoch": 0.22249052553912196, "grad_norm": 0.14830205883938186, "learning_rate": 2e-05, "loss": 5.5057, "step": 3317 }, { "epoch": 0.2225576013683469, "grad_norm": 0.1456247687418642, "learning_rate": 2e-05, "loss": 5.563, "step": 3318 }, { "epoch": 0.22262467719757187, "grad_norm": 0.148248463875476, "learning_rate": 2e-05, "loss": 5.512, "step": 3319 }, { "epoch": 0.2226917530267968, "grad_norm": 0.14897145918519974, "learning_rate": 2e-05, "loss": 5.4374, "step": 3320 }, { "epoch": 0.22275882885602175, "grad_norm": 0.13984777476904017, "learning_rate": 2e-05, "loss": 5.5934, "step": 3321 }, { "epoch": 0.22282590468524668, "grad_norm": 0.13851013485340352, "learning_rate": 2e-05, "loss": 5.409, "step": 3322 }, { "epoch": 0.22289298051447162, "grad_norm": 0.14887849420642546, "learning_rate": 2e-05, "loss": 5.3563, "step": 3323 }, { "epoch": 0.22296005634369656, "grad_norm": 0.1399338920570312, "learning_rate": 2e-05, "loss": 5.3355, "step": 3324 }, { "epoch": 0.2230271321729215, "grad_norm": 0.1437955602929798, "learning_rate": 2e-05, "loss": 5.4169, "step": 3325 }, { "epoch": 0.22309420800214644, "grad_norm": 0.14574781976642454, "learning_rate": 2e-05, "loss": 5.4015, "step": 3326 }, { "epoch": 0.22316128383137138, "grad_norm": 0.1452574798881683, "learning_rate": 2e-05, "loss": 5.4049, "step": 3327 }, { "epoch": 0.22322835966059631, "grad_norm": 0.14171525233435625, "learning_rate": 2e-05, "loss": 5.5121, "step": 3328 }, { "epoch": 0.22329543548982125, "grad_norm": 0.143561487256503, "learning_rate": 2e-05, "loss": 5.394, "step": 3329 }, { "epoch": 0.2233625113190462, "grad_norm": 0.14548669086288804, "learning_rate": 2e-05, "loss": 5.358, "step": 3330 }, { "epoch": 0.22342958714827113, "grad_norm": 0.1391528585107554, "learning_rate": 2e-05, "loss": 5.5248, "step": 3331 }, { "epoch": 0.22349666297749607, "grad_norm": 0.14379887452054305, "learning_rate": 2e-05, "loss": 5.3513, "step": 3332 }, { "epoch": 0.223563738806721, "grad_norm": 0.14445073766755653, "learning_rate": 2e-05, "loss": 5.4468, "step": 3333 }, { "epoch": 0.22363081463594595, "grad_norm": 0.14474146647118105, "learning_rate": 2e-05, "loss": 5.4753, "step": 3334 }, { "epoch": 0.22369789046517088, "grad_norm": 0.14160179207495563, "learning_rate": 2e-05, "loss": 5.63, "step": 3335 }, { "epoch": 0.22376496629439582, "grad_norm": 0.1359150134910531, "learning_rate": 2e-05, "loss": 5.3557, "step": 3336 }, { "epoch": 0.22383204212362076, "grad_norm": 0.13853732856451736, "learning_rate": 2e-05, "loss": 5.315, "step": 3337 }, { "epoch": 0.2238991179528457, "grad_norm": 0.1466469528710574, "learning_rate": 2e-05, "loss": 5.4195, "step": 3338 }, { "epoch": 0.22396619378207064, "grad_norm": 0.145500337621402, "learning_rate": 2e-05, "loss": 5.384, "step": 3339 }, { "epoch": 0.22403326961129558, "grad_norm": 0.14527607811757642, "learning_rate": 2e-05, "loss": 5.4743, "step": 3340 }, { "epoch": 0.22410034544052052, "grad_norm": 0.14417387607357432, "learning_rate": 2e-05, "loss": 5.5089, "step": 3341 }, { "epoch": 0.22416742126974545, "grad_norm": 0.14642908162842402, "learning_rate": 2e-05, "loss": 5.4497, "step": 3342 }, { "epoch": 0.2242344970989704, "grad_norm": 0.14648563762939465, "learning_rate": 2e-05, "loss": 5.4514, "step": 3343 }, { "epoch": 0.22430157292819533, "grad_norm": 0.14969092754863134, "learning_rate": 2e-05, "loss": 5.3663, "step": 3344 }, { "epoch": 0.22436864875742027, "grad_norm": 0.1453288398702135, "learning_rate": 2e-05, "loss": 5.4268, "step": 3345 }, { "epoch": 0.2244357245866452, "grad_norm": 0.1501449053626544, "learning_rate": 2e-05, "loss": 5.3492, "step": 3346 }, { "epoch": 0.22450280041587015, "grad_norm": 0.14118306940853848, "learning_rate": 2e-05, "loss": 5.3102, "step": 3347 }, { "epoch": 0.22456987624509508, "grad_norm": 0.1452119444311658, "learning_rate": 2e-05, "loss": 5.4148, "step": 3348 }, { "epoch": 0.22463695207432002, "grad_norm": 0.1462631968904061, "learning_rate": 2e-05, "loss": 5.5368, "step": 3349 }, { "epoch": 0.22470402790354496, "grad_norm": 0.14258471998355282, "learning_rate": 2e-05, "loss": 5.4189, "step": 3350 }, { "epoch": 0.2247711037327699, "grad_norm": 0.14053407163490214, "learning_rate": 2e-05, "loss": 5.4726, "step": 3351 }, { "epoch": 0.22483817956199484, "grad_norm": 0.14868010203872675, "learning_rate": 2e-05, "loss": 5.4498, "step": 3352 }, { "epoch": 0.22490525539121978, "grad_norm": 0.15479655157381114, "learning_rate": 2e-05, "loss": 5.5889, "step": 3353 }, { "epoch": 0.22497233122044472, "grad_norm": 0.14789543006263284, "learning_rate": 2e-05, "loss": 5.4545, "step": 3354 }, { "epoch": 0.22503940704966965, "grad_norm": 0.14540681134346461, "learning_rate": 2e-05, "loss": 5.4728, "step": 3355 }, { "epoch": 0.2251064828788946, "grad_norm": 0.1418552082768162, "learning_rate": 2e-05, "loss": 5.4496, "step": 3356 }, { "epoch": 0.22517355870811953, "grad_norm": 0.14996099255898973, "learning_rate": 2e-05, "loss": 5.4556, "step": 3357 }, { "epoch": 0.22524063453734447, "grad_norm": 0.1466803115494233, "learning_rate": 2e-05, "loss": 5.3674, "step": 3358 }, { "epoch": 0.2253077103665694, "grad_norm": 0.14245217870405116, "learning_rate": 2e-05, "loss": 5.3492, "step": 3359 }, { "epoch": 0.22537478619579435, "grad_norm": 0.14662140655960706, "learning_rate": 2e-05, "loss": 5.4699, "step": 3360 }, { "epoch": 0.22544186202501929, "grad_norm": 0.14690895233302784, "learning_rate": 2e-05, "loss": 5.3943, "step": 3361 }, { "epoch": 0.22550893785424422, "grad_norm": 0.14813828259902137, "learning_rate": 2e-05, "loss": 5.4247, "step": 3362 }, { "epoch": 0.22557601368346916, "grad_norm": 0.14560137512926544, "learning_rate": 2e-05, "loss": 5.4723, "step": 3363 }, { "epoch": 0.2256430895126941, "grad_norm": 0.13979113657917733, "learning_rate": 2e-05, "loss": 5.3748, "step": 3364 }, { "epoch": 0.22571016534191904, "grad_norm": 0.14708609837116485, "learning_rate": 2e-05, "loss": 5.4125, "step": 3365 }, { "epoch": 0.22577724117114398, "grad_norm": 0.14614842506070927, "learning_rate": 2e-05, "loss": 5.5455, "step": 3366 }, { "epoch": 0.22584431700036892, "grad_norm": 0.1474168667901843, "learning_rate": 2e-05, "loss": 5.4803, "step": 3367 }, { "epoch": 0.22591139282959385, "grad_norm": 0.15021074426409145, "learning_rate": 2e-05, "loss": 5.363, "step": 3368 }, { "epoch": 0.2259784686588188, "grad_norm": 0.14144366793432883, "learning_rate": 2e-05, "loss": 5.5611, "step": 3369 }, { "epoch": 0.22604554448804373, "grad_norm": 0.142355204104326, "learning_rate": 2e-05, "loss": 5.5282, "step": 3370 }, { "epoch": 0.22611262031726867, "grad_norm": 0.15224386056668945, "learning_rate": 2e-05, "loss": 5.4439, "step": 3371 }, { "epoch": 0.2261796961464936, "grad_norm": 0.14253957973109355, "learning_rate": 2e-05, "loss": 5.5983, "step": 3372 }, { "epoch": 0.22624677197571855, "grad_norm": 0.1469803824604715, "learning_rate": 2e-05, "loss": 5.5141, "step": 3373 }, { "epoch": 0.22631384780494349, "grad_norm": 0.14141834465343275, "learning_rate": 2e-05, "loss": 5.5093, "step": 3374 }, { "epoch": 0.22638092363416842, "grad_norm": 0.14481688844242438, "learning_rate": 2e-05, "loss": 5.3723, "step": 3375 }, { "epoch": 0.22644799946339336, "grad_norm": 0.15328909068071533, "learning_rate": 2e-05, "loss": 5.4328, "step": 3376 }, { "epoch": 0.2265150752926183, "grad_norm": 0.14125314777478323, "learning_rate": 2e-05, "loss": 5.4363, "step": 3377 }, { "epoch": 0.22658215112184324, "grad_norm": 0.14248935886426195, "learning_rate": 2e-05, "loss": 5.3558, "step": 3378 }, { "epoch": 0.22664922695106818, "grad_norm": 0.14040882023903561, "learning_rate": 2e-05, "loss": 5.4221, "step": 3379 }, { "epoch": 0.22671630278029312, "grad_norm": 0.14171605958872224, "learning_rate": 2e-05, "loss": 5.4552, "step": 3380 }, { "epoch": 0.22678337860951805, "grad_norm": 0.14645327192167784, "learning_rate": 2e-05, "loss": 5.4088, "step": 3381 }, { "epoch": 0.226850454438743, "grad_norm": 0.14040358696436314, "learning_rate": 2e-05, "loss": 5.3237, "step": 3382 }, { "epoch": 0.22691753026796793, "grad_norm": 0.1384265205732937, "learning_rate": 2e-05, "loss": 5.338, "step": 3383 }, { "epoch": 0.22698460609719287, "grad_norm": 0.14383877630117523, "learning_rate": 2e-05, "loss": 5.4695, "step": 3384 }, { "epoch": 0.2270516819264178, "grad_norm": 0.15100340295462444, "learning_rate": 2e-05, "loss": 5.3593, "step": 3385 }, { "epoch": 0.22711875775564275, "grad_norm": 0.14130549310510448, "learning_rate": 2e-05, "loss": 5.505, "step": 3386 }, { "epoch": 0.22718583358486769, "grad_norm": 0.14256663011015833, "learning_rate": 2e-05, "loss": 5.5269, "step": 3387 }, { "epoch": 0.22725290941409262, "grad_norm": 0.15085827804340995, "learning_rate": 2e-05, "loss": 5.4699, "step": 3388 }, { "epoch": 0.22731998524331756, "grad_norm": 0.14828126460966765, "learning_rate": 2e-05, "loss": 5.3036, "step": 3389 }, { "epoch": 0.2273870610725425, "grad_norm": 0.14498907927164847, "learning_rate": 2e-05, "loss": 5.447, "step": 3390 }, { "epoch": 0.22745413690176744, "grad_norm": 0.14413043156888655, "learning_rate": 2e-05, "loss": 5.6121, "step": 3391 }, { "epoch": 0.22752121273099238, "grad_norm": 0.14234778631638192, "learning_rate": 2e-05, "loss": 5.4039, "step": 3392 }, { "epoch": 0.22758828856021732, "grad_norm": 0.13753813639303925, "learning_rate": 2e-05, "loss": 5.2939, "step": 3393 }, { "epoch": 0.22765536438944226, "grad_norm": 0.1528255449976704, "learning_rate": 2e-05, "loss": 5.2939, "step": 3394 }, { "epoch": 0.2277224402186672, "grad_norm": 0.14421302592863017, "learning_rate": 2e-05, "loss": 5.3352, "step": 3395 }, { "epoch": 0.22778951604789213, "grad_norm": 0.15010412878968277, "learning_rate": 2e-05, "loss": 5.5838, "step": 3396 }, { "epoch": 0.22785659187711707, "grad_norm": 0.15464374175148057, "learning_rate": 2e-05, "loss": 5.4792, "step": 3397 }, { "epoch": 0.227923667706342, "grad_norm": 0.14797515090400598, "learning_rate": 2e-05, "loss": 5.4047, "step": 3398 }, { "epoch": 0.22799074353556695, "grad_norm": 0.1425283979324664, "learning_rate": 2e-05, "loss": 5.4424, "step": 3399 }, { "epoch": 0.22805781936479189, "grad_norm": 0.16006468965061685, "learning_rate": 2e-05, "loss": 5.4375, "step": 3400 }, { "epoch": 0.22812489519401682, "grad_norm": 0.14523925385595776, "learning_rate": 2e-05, "loss": 5.5093, "step": 3401 }, { "epoch": 0.22819197102324176, "grad_norm": 0.14726897613179657, "learning_rate": 2e-05, "loss": 5.4243, "step": 3402 }, { "epoch": 0.2282590468524667, "grad_norm": 0.14690783609994362, "learning_rate": 2e-05, "loss": 5.4418, "step": 3403 }, { "epoch": 0.22832612268169164, "grad_norm": 0.1565688716198834, "learning_rate": 2e-05, "loss": 5.3845, "step": 3404 }, { "epoch": 0.22839319851091658, "grad_norm": 0.14404877109519776, "learning_rate": 2e-05, "loss": 5.4298, "step": 3405 }, { "epoch": 0.22846027434014152, "grad_norm": 0.15211231795216418, "learning_rate": 2e-05, "loss": 5.4065, "step": 3406 }, { "epoch": 0.22852735016936646, "grad_norm": 0.15012913020479116, "learning_rate": 2e-05, "loss": 5.3704, "step": 3407 }, { "epoch": 0.2285944259985914, "grad_norm": 0.14492099842965925, "learning_rate": 2e-05, "loss": 5.407, "step": 3408 }, { "epoch": 0.22866150182781636, "grad_norm": 0.1468269201156155, "learning_rate": 2e-05, "loss": 5.416, "step": 3409 }, { "epoch": 0.2287285776570413, "grad_norm": 0.1464244650819511, "learning_rate": 2e-05, "loss": 5.4724, "step": 3410 }, { "epoch": 0.22879565348626624, "grad_norm": 0.14878296563401044, "learning_rate": 2e-05, "loss": 5.5534, "step": 3411 }, { "epoch": 0.22886272931549118, "grad_norm": 0.13959208750680374, "learning_rate": 2e-05, "loss": 5.4725, "step": 3412 }, { "epoch": 0.22892980514471611, "grad_norm": 0.15171536164152227, "learning_rate": 2e-05, "loss": 5.3108, "step": 3413 }, { "epoch": 0.22899688097394105, "grad_norm": 0.14533678396583066, "learning_rate": 2e-05, "loss": 5.3943, "step": 3414 }, { "epoch": 0.229063956803166, "grad_norm": 0.1450557298121004, "learning_rate": 2e-05, "loss": 5.5631, "step": 3415 }, { "epoch": 0.22913103263239093, "grad_norm": 0.1518773227471714, "learning_rate": 2e-05, "loss": 5.3694, "step": 3416 }, { "epoch": 0.22919810846161587, "grad_norm": 0.14291958947556643, "learning_rate": 2e-05, "loss": 5.4769, "step": 3417 }, { "epoch": 0.2292651842908408, "grad_norm": 0.15401083614957103, "learning_rate": 2e-05, "loss": 5.4287, "step": 3418 }, { "epoch": 0.22933226012006575, "grad_norm": 0.1571937733112811, "learning_rate": 2e-05, "loss": 5.507, "step": 3419 }, { "epoch": 0.22939933594929068, "grad_norm": 0.15682683820937696, "learning_rate": 2e-05, "loss": 5.6094, "step": 3420 }, { "epoch": 0.22946641177851562, "grad_norm": 0.14697385852311382, "learning_rate": 2e-05, "loss": 5.4774, "step": 3421 }, { "epoch": 0.22953348760774056, "grad_norm": 0.1622536003713561, "learning_rate": 2e-05, "loss": 5.4923, "step": 3422 }, { "epoch": 0.2296005634369655, "grad_norm": 0.14725023302156642, "learning_rate": 2e-05, "loss": 5.493, "step": 3423 }, { "epoch": 0.22966763926619044, "grad_norm": 0.14245512755067502, "learning_rate": 2e-05, "loss": 5.4466, "step": 3424 }, { "epoch": 0.22973471509541538, "grad_norm": 0.14632260849898043, "learning_rate": 2e-05, "loss": 5.4943, "step": 3425 }, { "epoch": 0.22980179092464031, "grad_norm": 0.163994804967215, "learning_rate": 2e-05, "loss": 5.4126, "step": 3426 }, { "epoch": 0.22986886675386525, "grad_norm": 0.1465502317429114, "learning_rate": 2e-05, "loss": 5.4666, "step": 3427 }, { "epoch": 0.2299359425830902, "grad_norm": 0.15241126838888572, "learning_rate": 2e-05, "loss": 5.5447, "step": 3428 }, { "epoch": 0.23000301841231513, "grad_norm": 0.1514294029175094, "learning_rate": 2e-05, "loss": 5.4466, "step": 3429 }, { "epoch": 0.23007009424154007, "grad_norm": 0.15463798545433138, "learning_rate": 2e-05, "loss": 5.4349, "step": 3430 }, { "epoch": 0.230137170070765, "grad_norm": 0.14434279594774765, "learning_rate": 2e-05, "loss": 5.3553, "step": 3431 }, { "epoch": 0.23020424589998995, "grad_norm": 0.15159219476294827, "learning_rate": 2e-05, "loss": 5.5237, "step": 3432 }, { "epoch": 0.23027132172921488, "grad_norm": 0.14740066096012147, "learning_rate": 2e-05, "loss": 5.448, "step": 3433 }, { "epoch": 0.23033839755843982, "grad_norm": 0.1439765915280828, "learning_rate": 2e-05, "loss": 5.2935, "step": 3434 }, { "epoch": 0.23040547338766476, "grad_norm": 0.1471021682882794, "learning_rate": 2e-05, "loss": 5.4327, "step": 3435 }, { "epoch": 0.2304725492168897, "grad_norm": 0.14297794671159805, "learning_rate": 2e-05, "loss": 5.4506, "step": 3436 }, { "epoch": 0.23053962504611464, "grad_norm": 0.14308505249073547, "learning_rate": 2e-05, "loss": 5.313, "step": 3437 }, { "epoch": 0.23060670087533958, "grad_norm": 0.1477452365182314, "learning_rate": 2e-05, "loss": 5.4021, "step": 3438 }, { "epoch": 0.23067377670456451, "grad_norm": 0.1407774898466553, "learning_rate": 2e-05, "loss": 5.4568, "step": 3439 }, { "epoch": 0.23074085253378945, "grad_norm": 0.1458398085465949, "learning_rate": 2e-05, "loss": 5.4968, "step": 3440 }, { "epoch": 0.2308079283630144, "grad_norm": 0.14672830791462918, "learning_rate": 2e-05, "loss": 5.6024, "step": 3441 }, { "epoch": 0.23087500419223933, "grad_norm": 0.1407893521094788, "learning_rate": 2e-05, "loss": 5.3723, "step": 3442 }, { "epoch": 0.23094208002146427, "grad_norm": 0.14272943978723976, "learning_rate": 2e-05, "loss": 5.4523, "step": 3443 }, { "epoch": 0.2310091558506892, "grad_norm": 0.15382186391746266, "learning_rate": 2e-05, "loss": 5.4774, "step": 3444 }, { "epoch": 0.23107623167991415, "grad_norm": 0.1457638253653574, "learning_rate": 2e-05, "loss": 5.5033, "step": 3445 }, { "epoch": 0.23114330750913908, "grad_norm": 0.14927022932748973, "learning_rate": 2e-05, "loss": 5.37, "step": 3446 }, { "epoch": 0.23121038333836402, "grad_norm": 0.13740387995035644, "learning_rate": 2e-05, "loss": 5.4411, "step": 3447 }, { "epoch": 0.23127745916758896, "grad_norm": 0.14247515026610486, "learning_rate": 2e-05, "loss": 5.4212, "step": 3448 }, { "epoch": 0.2313445349968139, "grad_norm": 0.1469279389738208, "learning_rate": 2e-05, "loss": 5.5468, "step": 3449 }, { "epoch": 0.23141161082603884, "grad_norm": 0.14226201634245214, "learning_rate": 2e-05, "loss": 5.4399, "step": 3450 }, { "epoch": 0.23147868665526378, "grad_norm": 0.14075024937949654, "learning_rate": 2e-05, "loss": 5.3123, "step": 3451 }, { "epoch": 0.23154576248448872, "grad_norm": 0.14596852944026004, "learning_rate": 2e-05, "loss": 5.3916, "step": 3452 }, { "epoch": 0.23161283831371365, "grad_norm": 0.1554609594807938, "learning_rate": 2e-05, "loss": 5.5661, "step": 3453 }, { "epoch": 0.2316799141429386, "grad_norm": 0.14429336364512899, "learning_rate": 2e-05, "loss": 5.4758, "step": 3454 }, { "epoch": 0.23174698997216353, "grad_norm": 0.14286153756938957, "learning_rate": 2e-05, "loss": 5.3602, "step": 3455 }, { "epoch": 0.23181406580138847, "grad_norm": 0.14364683734649591, "learning_rate": 2e-05, "loss": 5.4448, "step": 3456 }, { "epoch": 0.2318811416306134, "grad_norm": 0.1423188118060661, "learning_rate": 2e-05, "loss": 5.4563, "step": 3457 }, { "epoch": 0.23194821745983835, "grad_norm": 0.15434914701733232, "learning_rate": 2e-05, "loss": 5.4899, "step": 3458 }, { "epoch": 0.23201529328906328, "grad_norm": 0.15044817775695768, "learning_rate": 2e-05, "loss": 5.377, "step": 3459 }, { "epoch": 0.23208236911828822, "grad_norm": 0.1447980728131735, "learning_rate": 2e-05, "loss": 5.5556, "step": 3460 }, { "epoch": 0.23214944494751316, "grad_norm": 0.1473657646073293, "learning_rate": 2e-05, "loss": 5.513, "step": 3461 }, { "epoch": 0.2322165207767381, "grad_norm": 0.1540195375523279, "learning_rate": 2e-05, "loss": 5.3536, "step": 3462 }, { "epoch": 0.23228359660596304, "grad_norm": 0.15140455484196938, "learning_rate": 2e-05, "loss": 5.4865, "step": 3463 }, { "epoch": 0.23235067243518798, "grad_norm": 0.1425535170030364, "learning_rate": 2e-05, "loss": 5.3209, "step": 3464 }, { "epoch": 0.23241774826441292, "grad_norm": 0.14989365672785368, "learning_rate": 2e-05, "loss": 5.3769, "step": 3465 }, { "epoch": 0.23248482409363785, "grad_norm": 0.14819540122780983, "learning_rate": 2e-05, "loss": 5.2552, "step": 3466 }, { "epoch": 0.2325518999228628, "grad_norm": 0.14893145733154475, "learning_rate": 2e-05, "loss": 5.4109, "step": 3467 }, { "epoch": 0.23261897575208773, "grad_norm": 0.14504790903785594, "learning_rate": 2e-05, "loss": 5.4506, "step": 3468 }, { "epoch": 0.23268605158131267, "grad_norm": 0.15100237555233534, "learning_rate": 2e-05, "loss": 5.5081, "step": 3469 }, { "epoch": 0.2327531274105376, "grad_norm": 0.14828666784357752, "learning_rate": 2e-05, "loss": 5.4153, "step": 3470 }, { "epoch": 0.23282020323976255, "grad_norm": 0.14958997152927891, "learning_rate": 2e-05, "loss": 5.5048, "step": 3471 }, { "epoch": 0.23288727906898748, "grad_norm": 0.15029568920565664, "learning_rate": 2e-05, "loss": 5.4305, "step": 3472 }, { "epoch": 0.23295435489821242, "grad_norm": 0.15398294644031, "learning_rate": 2e-05, "loss": 5.4668, "step": 3473 }, { "epoch": 0.23302143072743736, "grad_norm": 0.13548851722105862, "learning_rate": 2e-05, "loss": 5.5548, "step": 3474 }, { "epoch": 0.2330885065566623, "grad_norm": 0.15430529302511342, "learning_rate": 2e-05, "loss": 5.3476, "step": 3475 }, { "epoch": 0.23315558238588724, "grad_norm": 0.14153151682452225, "learning_rate": 2e-05, "loss": 5.4333, "step": 3476 }, { "epoch": 0.23322265821511218, "grad_norm": 0.14232073483353452, "learning_rate": 2e-05, "loss": 5.5149, "step": 3477 }, { "epoch": 0.23328973404433712, "grad_norm": 0.1440756610156961, "learning_rate": 2e-05, "loss": 5.4249, "step": 3478 }, { "epoch": 0.23335680987356205, "grad_norm": 0.14911651047203564, "learning_rate": 2e-05, "loss": 5.4429, "step": 3479 }, { "epoch": 0.233423885702787, "grad_norm": 0.15000868980779755, "learning_rate": 2e-05, "loss": 5.4138, "step": 3480 }, { "epoch": 0.23349096153201193, "grad_norm": 0.14457725499352092, "learning_rate": 2e-05, "loss": 5.3754, "step": 3481 }, { "epoch": 0.23355803736123687, "grad_norm": 0.14157268385249638, "learning_rate": 2e-05, "loss": 5.3373, "step": 3482 }, { "epoch": 0.2336251131904618, "grad_norm": 0.15794916005534493, "learning_rate": 2e-05, "loss": 5.38, "step": 3483 }, { "epoch": 0.23369218901968675, "grad_norm": 0.1503698387383471, "learning_rate": 2e-05, "loss": 5.5343, "step": 3484 }, { "epoch": 0.23375926484891169, "grad_norm": 0.13872005015624286, "learning_rate": 2e-05, "loss": 5.4305, "step": 3485 }, { "epoch": 0.23382634067813662, "grad_norm": 0.14663081469979158, "learning_rate": 2e-05, "loss": 5.3847, "step": 3486 }, { "epoch": 0.23389341650736156, "grad_norm": 0.14664795209165352, "learning_rate": 2e-05, "loss": 5.4122, "step": 3487 }, { "epoch": 0.2339604923365865, "grad_norm": 0.13615857855746596, "learning_rate": 2e-05, "loss": 5.3387, "step": 3488 }, { "epoch": 0.23402756816581144, "grad_norm": 0.14138756527291793, "learning_rate": 2e-05, "loss": 5.3293, "step": 3489 }, { "epoch": 0.23409464399503638, "grad_norm": 0.1473045378295767, "learning_rate": 2e-05, "loss": 5.443, "step": 3490 }, { "epoch": 0.23416171982426132, "grad_norm": 0.1493431720275265, "learning_rate": 2e-05, "loss": 5.557, "step": 3491 }, { "epoch": 0.23422879565348625, "grad_norm": 0.1431735933763373, "learning_rate": 2e-05, "loss": 5.3499, "step": 3492 }, { "epoch": 0.2342958714827112, "grad_norm": 0.14248237496004026, "learning_rate": 2e-05, "loss": 5.365, "step": 3493 }, { "epoch": 0.23436294731193613, "grad_norm": 0.15630906030636985, "learning_rate": 2e-05, "loss": 5.3829, "step": 3494 }, { "epoch": 0.23443002314116107, "grad_norm": 0.1539356198777102, "learning_rate": 2e-05, "loss": 5.5334, "step": 3495 }, { "epoch": 0.234497098970386, "grad_norm": 0.14489593999433611, "learning_rate": 2e-05, "loss": 5.4182, "step": 3496 }, { "epoch": 0.23456417479961095, "grad_norm": 0.15279631393833318, "learning_rate": 2e-05, "loss": 5.5939, "step": 3497 }, { "epoch": 0.23463125062883589, "grad_norm": 0.15048770041683063, "learning_rate": 2e-05, "loss": 5.4113, "step": 3498 }, { "epoch": 0.23469832645806082, "grad_norm": 0.14277888894777233, "learning_rate": 2e-05, "loss": 5.4244, "step": 3499 }, { "epoch": 0.2347654022872858, "grad_norm": 0.14752158291177558, "learning_rate": 2e-05, "loss": 5.3852, "step": 3500 }, { "epoch": 0.23483247811651073, "grad_norm": 0.1475969242931102, "learning_rate": 2e-05, "loss": 5.389, "step": 3501 }, { "epoch": 0.23489955394573567, "grad_norm": 0.14429252207405874, "learning_rate": 2e-05, "loss": 5.5832, "step": 3502 }, { "epoch": 0.2349666297749606, "grad_norm": 0.1510452579535065, "learning_rate": 2e-05, "loss": 5.3752, "step": 3503 }, { "epoch": 0.23503370560418554, "grad_norm": 0.14280845807719614, "learning_rate": 2e-05, "loss": 5.528, "step": 3504 }, { "epoch": 0.23510078143341048, "grad_norm": 0.13904514598864562, "learning_rate": 2e-05, "loss": 5.4954, "step": 3505 }, { "epoch": 0.23516785726263542, "grad_norm": 0.14999033017336383, "learning_rate": 2e-05, "loss": 5.495, "step": 3506 }, { "epoch": 0.23523493309186036, "grad_norm": 0.14992065335260224, "learning_rate": 2e-05, "loss": 5.5619, "step": 3507 }, { "epoch": 0.2353020089210853, "grad_norm": 0.14270677132538787, "learning_rate": 2e-05, "loss": 5.5249, "step": 3508 }, { "epoch": 0.23536908475031024, "grad_norm": 0.14028672998724132, "learning_rate": 2e-05, "loss": 5.4181, "step": 3509 }, { "epoch": 0.23543616057953518, "grad_norm": 0.1438872156184744, "learning_rate": 2e-05, "loss": 5.509, "step": 3510 }, { "epoch": 0.2355032364087601, "grad_norm": 0.13445183921744125, "learning_rate": 2e-05, "loss": 5.3785, "step": 3511 }, { "epoch": 0.23557031223798505, "grad_norm": 0.14351326430539219, "learning_rate": 2e-05, "loss": 5.3795, "step": 3512 }, { "epoch": 0.23563738806721, "grad_norm": 0.1385464283831032, "learning_rate": 2e-05, "loss": 5.3699, "step": 3513 }, { "epoch": 0.23570446389643493, "grad_norm": 0.1423727695269095, "learning_rate": 2e-05, "loss": 5.5019, "step": 3514 }, { "epoch": 0.23577153972565987, "grad_norm": 0.14893616145305844, "learning_rate": 2e-05, "loss": 5.476, "step": 3515 }, { "epoch": 0.2358386155548848, "grad_norm": 0.14503258207101175, "learning_rate": 2e-05, "loss": 5.5196, "step": 3516 }, { "epoch": 0.23590569138410974, "grad_norm": 0.144262530989285, "learning_rate": 2e-05, "loss": 5.2734, "step": 3517 }, { "epoch": 0.23597276721333468, "grad_norm": 0.15243440179725587, "learning_rate": 2e-05, "loss": 5.6379, "step": 3518 }, { "epoch": 0.23603984304255962, "grad_norm": 0.14688829945791226, "learning_rate": 2e-05, "loss": 5.3312, "step": 3519 }, { "epoch": 0.23610691887178456, "grad_norm": 0.1408167230208686, "learning_rate": 2e-05, "loss": 5.5007, "step": 3520 }, { "epoch": 0.2361739947010095, "grad_norm": 0.14811484390425758, "learning_rate": 2e-05, "loss": 5.362, "step": 3521 }, { "epoch": 0.23624107053023444, "grad_norm": 0.14487484193208983, "learning_rate": 2e-05, "loss": 5.2847, "step": 3522 }, { "epoch": 0.23630814635945938, "grad_norm": 0.14225623478135174, "learning_rate": 2e-05, "loss": 5.524, "step": 3523 }, { "epoch": 0.23637522218868431, "grad_norm": 0.14417705268103714, "learning_rate": 2e-05, "loss": 5.3848, "step": 3524 }, { "epoch": 0.23644229801790925, "grad_norm": 0.15439279877724937, "learning_rate": 2e-05, "loss": 5.3358, "step": 3525 }, { "epoch": 0.2365093738471342, "grad_norm": 0.1520473111980663, "learning_rate": 2e-05, "loss": 5.3886, "step": 3526 }, { "epoch": 0.23657644967635913, "grad_norm": 0.1477856534585931, "learning_rate": 2e-05, "loss": 5.5556, "step": 3527 }, { "epoch": 0.23664352550558407, "grad_norm": 0.14843615579474798, "learning_rate": 2e-05, "loss": 5.4057, "step": 3528 }, { "epoch": 0.236710601334809, "grad_norm": 0.1415872776402702, "learning_rate": 2e-05, "loss": 5.4979, "step": 3529 }, { "epoch": 0.23677767716403395, "grad_norm": 0.14756182423521544, "learning_rate": 2e-05, "loss": 5.4503, "step": 3530 }, { "epoch": 0.23684475299325888, "grad_norm": 0.15226614404675812, "learning_rate": 2e-05, "loss": 5.408, "step": 3531 }, { "epoch": 0.23691182882248382, "grad_norm": 0.14807275574227344, "learning_rate": 2e-05, "loss": 5.4532, "step": 3532 }, { "epoch": 0.23697890465170876, "grad_norm": 0.1486057618220397, "learning_rate": 2e-05, "loss": 5.2641, "step": 3533 }, { "epoch": 0.2370459804809337, "grad_norm": 0.1436224862867708, "learning_rate": 2e-05, "loss": 5.365, "step": 3534 }, { "epoch": 0.23711305631015864, "grad_norm": 0.14886049371535018, "learning_rate": 2e-05, "loss": 5.4489, "step": 3535 }, { "epoch": 0.23718013213938358, "grad_norm": 0.14543112434322694, "learning_rate": 2e-05, "loss": 5.4001, "step": 3536 }, { "epoch": 0.23724720796860851, "grad_norm": 0.14177993838503603, "learning_rate": 2e-05, "loss": 5.5375, "step": 3537 }, { "epoch": 0.23731428379783345, "grad_norm": 0.1429583396398804, "learning_rate": 2e-05, "loss": 5.4175, "step": 3538 }, { "epoch": 0.2373813596270584, "grad_norm": 0.14872975530012988, "learning_rate": 2e-05, "loss": 5.4468, "step": 3539 }, { "epoch": 0.23744843545628333, "grad_norm": 0.1459126885075444, "learning_rate": 2e-05, "loss": 5.4484, "step": 3540 }, { "epoch": 0.23751551128550827, "grad_norm": 0.14597136972819438, "learning_rate": 2e-05, "loss": 5.4412, "step": 3541 }, { "epoch": 0.2375825871147332, "grad_norm": 0.1432984832948743, "learning_rate": 2e-05, "loss": 5.3988, "step": 3542 }, { "epoch": 0.23764966294395815, "grad_norm": 0.14140264500951494, "learning_rate": 2e-05, "loss": 5.4056, "step": 3543 }, { "epoch": 0.23771673877318308, "grad_norm": 0.1431937122299652, "learning_rate": 2e-05, "loss": 5.4013, "step": 3544 }, { "epoch": 0.23778381460240802, "grad_norm": 0.14400014551122753, "learning_rate": 2e-05, "loss": 5.5884, "step": 3545 }, { "epoch": 0.23785089043163296, "grad_norm": 0.14337834691045587, "learning_rate": 2e-05, "loss": 5.4924, "step": 3546 }, { "epoch": 0.2379179662608579, "grad_norm": 0.14423321464853942, "learning_rate": 2e-05, "loss": 5.2118, "step": 3547 }, { "epoch": 0.23798504209008284, "grad_norm": 0.1494603828070573, "learning_rate": 2e-05, "loss": 5.4425, "step": 3548 }, { "epoch": 0.23805211791930778, "grad_norm": 0.1468641727126983, "learning_rate": 2e-05, "loss": 5.3462, "step": 3549 }, { "epoch": 0.23811919374853271, "grad_norm": 0.14535791353807256, "learning_rate": 2e-05, "loss": 5.35, "step": 3550 }, { "epoch": 0.23818626957775765, "grad_norm": 0.14726389618685923, "learning_rate": 2e-05, "loss": 5.4449, "step": 3551 }, { "epoch": 0.2382533454069826, "grad_norm": 0.14886412298754007, "learning_rate": 2e-05, "loss": 5.3301, "step": 3552 }, { "epoch": 0.23832042123620753, "grad_norm": 0.1447393389411653, "learning_rate": 2e-05, "loss": 5.446, "step": 3553 }, { "epoch": 0.23838749706543247, "grad_norm": 0.15149331545970063, "learning_rate": 2e-05, "loss": 5.398, "step": 3554 }, { "epoch": 0.2384545728946574, "grad_norm": 0.14306428571320842, "learning_rate": 2e-05, "loss": 5.4073, "step": 3555 }, { "epoch": 0.23852164872388235, "grad_norm": 0.14645303842261187, "learning_rate": 2e-05, "loss": 5.4945, "step": 3556 }, { "epoch": 0.23858872455310728, "grad_norm": 0.15418742552398584, "learning_rate": 2e-05, "loss": 5.5669, "step": 3557 }, { "epoch": 0.23865580038233222, "grad_norm": 0.15006469042220186, "learning_rate": 2e-05, "loss": 5.3745, "step": 3558 }, { "epoch": 0.23872287621155716, "grad_norm": 0.1490717846176477, "learning_rate": 2e-05, "loss": 5.5104, "step": 3559 }, { "epoch": 0.2387899520407821, "grad_norm": 0.14782202119172577, "learning_rate": 2e-05, "loss": 5.6135, "step": 3560 }, { "epoch": 0.23885702787000704, "grad_norm": 0.13979660984578765, "learning_rate": 2e-05, "loss": 5.5384, "step": 3561 }, { "epoch": 0.23892410369923198, "grad_norm": 0.15194676048056097, "learning_rate": 2e-05, "loss": 5.5315, "step": 3562 }, { "epoch": 0.23899117952845692, "grad_norm": 0.15701889055919505, "learning_rate": 2e-05, "loss": 5.5611, "step": 3563 }, { "epoch": 0.23905825535768185, "grad_norm": 0.14897405207653808, "learning_rate": 2e-05, "loss": 5.4128, "step": 3564 }, { "epoch": 0.2391253311869068, "grad_norm": 0.14635998946054196, "learning_rate": 2e-05, "loss": 5.4536, "step": 3565 }, { "epoch": 0.23919240701613173, "grad_norm": 0.14590361784241354, "learning_rate": 2e-05, "loss": 5.4474, "step": 3566 }, { "epoch": 0.23925948284535667, "grad_norm": 0.1498224270900654, "learning_rate": 2e-05, "loss": 5.3986, "step": 3567 }, { "epoch": 0.2393265586745816, "grad_norm": 0.1407735369937029, "learning_rate": 2e-05, "loss": 5.4607, "step": 3568 }, { "epoch": 0.23939363450380655, "grad_norm": 0.1483895539546799, "learning_rate": 2e-05, "loss": 5.4781, "step": 3569 }, { "epoch": 0.23946071033303148, "grad_norm": 0.14651194215700625, "learning_rate": 2e-05, "loss": 5.365, "step": 3570 }, { "epoch": 0.23952778616225642, "grad_norm": 0.14731860878398914, "learning_rate": 2e-05, "loss": 5.3801, "step": 3571 }, { "epoch": 0.23959486199148136, "grad_norm": 0.14426093008705018, "learning_rate": 2e-05, "loss": 5.3618, "step": 3572 }, { "epoch": 0.2396619378207063, "grad_norm": 0.14651187314024125, "learning_rate": 2e-05, "loss": 5.4822, "step": 3573 }, { "epoch": 0.23972901364993124, "grad_norm": 0.1376884545853337, "learning_rate": 2e-05, "loss": 5.3039, "step": 3574 }, { "epoch": 0.23979608947915618, "grad_norm": 0.14828809284676006, "learning_rate": 2e-05, "loss": 5.557, "step": 3575 }, { "epoch": 0.23986316530838112, "grad_norm": 0.1508494931122979, "learning_rate": 2e-05, "loss": 5.4242, "step": 3576 }, { "epoch": 0.23993024113760605, "grad_norm": 0.14495512147559056, "learning_rate": 2e-05, "loss": 5.4805, "step": 3577 }, { "epoch": 0.239997316966831, "grad_norm": 0.13877851207772546, "learning_rate": 2e-05, "loss": 5.5121, "step": 3578 }, { "epoch": 0.24006439279605593, "grad_norm": 0.14814119844113935, "learning_rate": 2e-05, "loss": 5.4427, "step": 3579 }, { "epoch": 0.24013146862528087, "grad_norm": 0.15009500487518862, "learning_rate": 2e-05, "loss": 5.415, "step": 3580 }, { "epoch": 0.2401985444545058, "grad_norm": 0.14627368534510096, "learning_rate": 2e-05, "loss": 5.4022, "step": 3581 }, { "epoch": 0.24026562028373075, "grad_norm": 0.1474676260741314, "learning_rate": 2e-05, "loss": 5.4589, "step": 3582 }, { "epoch": 0.24033269611295568, "grad_norm": 0.14760527699882298, "learning_rate": 2e-05, "loss": 5.651, "step": 3583 }, { "epoch": 0.24039977194218062, "grad_norm": 0.13822640472121228, "learning_rate": 2e-05, "loss": 5.5158, "step": 3584 }, { "epoch": 0.24046684777140556, "grad_norm": 0.1476073540813708, "learning_rate": 2e-05, "loss": 5.5566, "step": 3585 }, { "epoch": 0.2405339236006305, "grad_norm": 0.15050068481690607, "learning_rate": 2e-05, "loss": 5.349, "step": 3586 }, { "epoch": 0.24060099942985544, "grad_norm": 0.15515377109165715, "learning_rate": 2e-05, "loss": 5.5146, "step": 3587 }, { "epoch": 0.24066807525908038, "grad_norm": 0.14670988659683643, "learning_rate": 2e-05, "loss": 5.3674, "step": 3588 }, { "epoch": 0.24073515108830532, "grad_norm": 0.14360318996635782, "learning_rate": 2e-05, "loss": 5.5265, "step": 3589 }, { "epoch": 0.24080222691753025, "grad_norm": 0.14920994153156833, "learning_rate": 2e-05, "loss": 5.3462, "step": 3590 }, { "epoch": 0.24086930274675522, "grad_norm": 0.15357917390173467, "learning_rate": 2e-05, "loss": 5.487, "step": 3591 }, { "epoch": 0.24093637857598016, "grad_norm": 0.13866932594504833, "learning_rate": 2e-05, "loss": 5.394, "step": 3592 }, { "epoch": 0.2410034544052051, "grad_norm": 0.13928740049290605, "learning_rate": 2e-05, "loss": 5.4935, "step": 3593 }, { "epoch": 0.24107053023443004, "grad_norm": 0.1559794287200337, "learning_rate": 2e-05, "loss": 5.4796, "step": 3594 }, { "epoch": 0.24113760606365497, "grad_norm": 0.15810769698501753, "learning_rate": 2e-05, "loss": 5.4507, "step": 3595 }, { "epoch": 0.2412046818928799, "grad_norm": 0.14015399892917801, "learning_rate": 2e-05, "loss": 5.4271, "step": 3596 }, { "epoch": 0.24127175772210485, "grad_norm": 0.14935159513319188, "learning_rate": 2e-05, "loss": 5.6543, "step": 3597 }, { "epoch": 0.2413388335513298, "grad_norm": 0.15259754328296427, "learning_rate": 2e-05, "loss": 5.3617, "step": 3598 }, { "epoch": 0.24140590938055473, "grad_norm": 0.14248153915199116, "learning_rate": 2e-05, "loss": 5.5191, "step": 3599 }, { "epoch": 0.24147298520977967, "grad_norm": 0.14719228140804352, "learning_rate": 2e-05, "loss": 5.4631, "step": 3600 }, { "epoch": 0.2415400610390046, "grad_norm": 0.150872142997005, "learning_rate": 2e-05, "loss": 5.1958, "step": 3601 }, { "epoch": 0.24160713686822954, "grad_norm": 0.15848387675462616, "learning_rate": 2e-05, "loss": 5.6504, "step": 3602 }, { "epoch": 0.24167421269745448, "grad_norm": 0.1535823111291965, "learning_rate": 2e-05, "loss": 5.3971, "step": 3603 }, { "epoch": 0.24174128852667942, "grad_norm": 0.15431040139636454, "learning_rate": 2e-05, "loss": 5.4069, "step": 3604 }, { "epoch": 0.24180836435590436, "grad_norm": 0.14845076148491315, "learning_rate": 2e-05, "loss": 5.3998, "step": 3605 }, { "epoch": 0.2418754401851293, "grad_norm": 0.1439580280563471, "learning_rate": 2e-05, "loss": 5.4982, "step": 3606 }, { "epoch": 0.24194251601435424, "grad_norm": 0.15751179764710962, "learning_rate": 2e-05, "loss": 5.5652, "step": 3607 }, { "epoch": 0.24200959184357917, "grad_norm": 0.16556794501088962, "learning_rate": 2e-05, "loss": 5.3973, "step": 3608 }, { "epoch": 0.2420766676728041, "grad_norm": 0.1467700363359307, "learning_rate": 2e-05, "loss": 5.4524, "step": 3609 }, { "epoch": 0.24214374350202905, "grad_norm": 0.14497654148201797, "learning_rate": 2e-05, "loss": 5.4067, "step": 3610 }, { "epoch": 0.242210819331254, "grad_norm": 0.1544073697127881, "learning_rate": 2e-05, "loss": 5.5565, "step": 3611 }, { "epoch": 0.24227789516047893, "grad_norm": 0.15412135702011834, "learning_rate": 2e-05, "loss": 5.5116, "step": 3612 }, { "epoch": 0.24234497098970387, "grad_norm": 0.15051854975464043, "learning_rate": 2e-05, "loss": 5.5159, "step": 3613 }, { "epoch": 0.2424120468189288, "grad_norm": 0.1526509858189395, "learning_rate": 2e-05, "loss": 5.4543, "step": 3614 }, { "epoch": 0.24247912264815374, "grad_norm": 0.15302755061037368, "learning_rate": 2e-05, "loss": 5.3998, "step": 3615 }, { "epoch": 0.24254619847737868, "grad_norm": 0.15757455898302683, "learning_rate": 2e-05, "loss": 5.4923, "step": 3616 }, { "epoch": 0.24261327430660362, "grad_norm": 0.14166734259692065, "learning_rate": 2e-05, "loss": 5.332, "step": 3617 }, { "epoch": 0.24268035013582856, "grad_norm": 0.1460331856112316, "learning_rate": 2e-05, "loss": 5.4391, "step": 3618 }, { "epoch": 0.2427474259650535, "grad_norm": 0.14436487902974549, "learning_rate": 2e-05, "loss": 5.3426, "step": 3619 }, { "epoch": 0.24281450179427844, "grad_norm": 0.14817355376869543, "learning_rate": 2e-05, "loss": 5.4375, "step": 3620 }, { "epoch": 0.24288157762350338, "grad_norm": 0.14938676055263458, "learning_rate": 2e-05, "loss": 5.4074, "step": 3621 }, { "epoch": 0.2429486534527283, "grad_norm": 0.14366101523528207, "learning_rate": 2e-05, "loss": 5.4859, "step": 3622 }, { "epoch": 0.24301572928195325, "grad_norm": 0.14507404526858197, "learning_rate": 2e-05, "loss": 5.3432, "step": 3623 }, { "epoch": 0.2430828051111782, "grad_norm": 0.14366351234079802, "learning_rate": 2e-05, "loss": 5.236, "step": 3624 }, { "epoch": 0.24314988094040313, "grad_norm": 0.14065453590913157, "learning_rate": 2e-05, "loss": 5.3985, "step": 3625 }, { "epoch": 0.24321695676962807, "grad_norm": 0.14310340429876883, "learning_rate": 2e-05, "loss": 5.5357, "step": 3626 }, { "epoch": 0.243284032598853, "grad_norm": 0.14561869461475388, "learning_rate": 2e-05, "loss": 5.4521, "step": 3627 }, { "epoch": 0.24335110842807794, "grad_norm": 0.14718491287197175, "learning_rate": 2e-05, "loss": 5.541, "step": 3628 }, { "epoch": 0.24341818425730288, "grad_norm": 0.15811075863944035, "learning_rate": 2e-05, "loss": 5.5721, "step": 3629 }, { "epoch": 0.24348526008652782, "grad_norm": 0.1437612483477177, "learning_rate": 2e-05, "loss": 5.4801, "step": 3630 }, { "epoch": 0.24355233591575276, "grad_norm": 0.14965258242721233, "learning_rate": 2e-05, "loss": 5.4442, "step": 3631 }, { "epoch": 0.2436194117449777, "grad_norm": 0.14638016257053468, "learning_rate": 2e-05, "loss": 5.4041, "step": 3632 }, { "epoch": 0.24368648757420264, "grad_norm": 0.14798879041810833, "learning_rate": 2e-05, "loss": 5.5178, "step": 3633 }, { "epoch": 0.24375356340342758, "grad_norm": 0.1456325835833653, "learning_rate": 2e-05, "loss": 5.4517, "step": 3634 }, { "epoch": 0.24382063923265251, "grad_norm": 0.1512598891344156, "learning_rate": 2e-05, "loss": 5.5248, "step": 3635 }, { "epoch": 0.24388771506187745, "grad_norm": 0.14296517565879271, "learning_rate": 2e-05, "loss": 5.2985, "step": 3636 }, { "epoch": 0.2439547908911024, "grad_norm": 0.14712023750578207, "learning_rate": 2e-05, "loss": 5.4917, "step": 3637 }, { "epoch": 0.24402186672032733, "grad_norm": 0.14756282521171962, "learning_rate": 2e-05, "loss": 5.4272, "step": 3638 }, { "epoch": 0.24408894254955227, "grad_norm": 0.14730008794004007, "learning_rate": 2e-05, "loss": 5.5766, "step": 3639 }, { "epoch": 0.2441560183787772, "grad_norm": 0.1396090166995398, "learning_rate": 2e-05, "loss": 5.4968, "step": 3640 }, { "epoch": 0.24422309420800214, "grad_norm": 0.14251020257537156, "learning_rate": 2e-05, "loss": 5.5977, "step": 3641 }, { "epoch": 0.24429017003722708, "grad_norm": 0.1464377714773839, "learning_rate": 2e-05, "loss": 5.5075, "step": 3642 }, { "epoch": 0.24435724586645202, "grad_norm": 0.14620689406947984, "learning_rate": 2e-05, "loss": 5.3781, "step": 3643 }, { "epoch": 0.24442432169567696, "grad_norm": 0.13980799334282612, "learning_rate": 2e-05, "loss": 5.4723, "step": 3644 }, { "epoch": 0.2444913975249019, "grad_norm": 0.15254023659266766, "learning_rate": 2e-05, "loss": 5.5534, "step": 3645 }, { "epoch": 0.24455847335412684, "grad_norm": 0.14857320394254767, "learning_rate": 2e-05, "loss": 5.5112, "step": 3646 }, { "epoch": 0.24462554918335178, "grad_norm": 0.14440664136098097, "learning_rate": 2e-05, "loss": 5.4414, "step": 3647 }, { "epoch": 0.24469262501257671, "grad_norm": 0.14974671831282674, "learning_rate": 2e-05, "loss": 5.3042, "step": 3648 }, { "epoch": 0.24475970084180165, "grad_norm": 0.149235735910126, "learning_rate": 2e-05, "loss": 5.4287, "step": 3649 }, { "epoch": 0.2448267766710266, "grad_norm": 0.14814151198799683, "learning_rate": 2e-05, "loss": 5.4355, "step": 3650 }, { "epoch": 0.24489385250025153, "grad_norm": 0.14496377003461322, "learning_rate": 2e-05, "loss": 5.4834, "step": 3651 }, { "epoch": 0.24496092832947647, "grad_norm": 0.16068906904224686, "learning_rate": 2e-05, "loss": 5.5942, "step": 3652 }, { "epoch": 0.2450280041587014, "grad_norm": 0.14587251339580518, "learning_rate": 2e-05, "loss": 5.5138, "step": 3653 }, { "epoch": 0.24509507998792635, "grad_norm": 0.14508859271347674, "learning_rate": 2e-05, "loss": 5.5362, "step": 3654 }, { "epoch": 0.24516215581715128, "grad_norm": 0.14269287334823746, "learning_rate": 2e-05, "loss": 5.4556, "step": 3655 }, { "epoch": 0.24522923164637622, "grad_norm": 0.14282063322235541, "learning_rate": 2e-05, "loss": 5.3507, "step": 3656 }, { "epoch": 0.24529630747560116, "grad_norm": 0.14213918266620293, "learning_rate": 2e-05, "loss": 5.4482, "step": 3657 }, { "epoch": 0.2453633833048261, "grad_norm": 0.1414017873713863, "learning_rate": 2e-05, "loss": 5.358, "step": 3658 }, { "epoch": 0.24543045913405104, "grad_norm": 0.14491051464040294, "learning_rate": 2e-05, "loss": 5.4104, "step": 3659 }, { "epoch": 0.24549753496327598, "grad_norm": 0.16307578897831285, "learning_rate": 2e-05, "loss": 5.332, "step": 3660 }, { "epoch": 0.24556461079250091, "grad_norm": 0.15323985260479694, "learning_rate": 2e-05, "loss": 5.645, "step": 3661 }, { "epoch": 0.24563168662172585, "grad_norm": 0.14618345101920524, "learning_rate": 2e-05, "loss": 5.5975, "step": 3662 }, { "epoch": 0.2456987624509508, "grad_norm": 0.15171265184919502, "learning_rate": 2e-05, "loss": 5.516, "step": 3663 }, { "epoch": 0.24576583828017573, "grad_norm": 0.1459931451054656, "learning_rate": 2e-05, "loss": 5.4694, "step": 3664 }, { "epoch": 0.24583291410940067, "grad_norm": 0.14177041467419058, "learning_rate": 2e-05, "loss": 5.5929, "step": 3665 }, { "epoch": 0.2458999899386256, "grad_norm": 0.14672452588147475, "learning_rate": 2e-05, "loss": 5.6084, "step": 3666 }, { "epoch": 0.24596706576785055, "grad_norm": 0.14533926188747898, "learning_rate": 2e-05, "loss": 5.4525, "step": 3667 }, { "epoch": 0.24603414159707548, "grad_norm": 0.14981871517150505, "learning_rate": 2e-05, "loss": 5.3662, "step": 3668 }, { "epoch": 0.24610121742630042, "grad_norm": 0.141950703064853, "learning_rate": 2e-05, "loss": 5.5151, "step": 3669 }, { "epoch": 0.24616829325552536, "grad_norm": 0.14813405072550195, "learning_rate": 2e-05, "loss": 5.407, "step": 3670 }, { "epoch": 0.2462353690847503, "grad_norm": 0.14502555308141019, "learning_rate": 2e-05, "loss": 5.4116, "step": 3671 }, { "epoch": 0.24630244491397524, "grad_norm": 0.13977201120380367, "learning_rate": 2e-05, "loss": 5.3089, "step": 3672 }, { "epoch": 0.24636952074320018, "grad_norm": 0.14439439972962667, "learning_rate": 2e-05, "loss": 5.4013, "step": 3673 }, { "epoch": 0.24643659657242512, "grad_norm": 0.155415093590654, "learning_rate": 2e-05, "loss": 5.52, "step": 3674 }, { "epoch": 0.24650367240165005, "grad_norm": 0.1443454422547581, "learning_rate": 2e-05, "loss": 5.4487, "step": 3675 }, { "epoch": 0.246570748230875, "grad_norm": 0.14617138806972998, "learning_rate": 2e-05, "loss": 5.3552, "step": 3676 }, { "epoch": 0.24663782406009993, "grad_norm": 0.1473838118538251, "learning_rate": 2e-05, "loss": 5.5768, "step": 3677 }, { "epoch": 0.24670489988932487, "grad_norm": 0.14007642205061163, "learning_rate": 2e-05, "loss": 5.5146, "step": 3678 }, { "epoch": 0.2467719757185498, "grad_norm": 0.14288371376793463, "learning_rate": 2e-05, "loss": 5.3388, "step": 3679 }, { "epoch": 0.24683905154777475, "grad_norm": 0.15348546011205694, "learning_rate": 2e-05, "loss": 5.546, "step": 3680 }, { "epoch": 0.24690612737699968, "grad_norm": 0.1487894866190546, "learning_rate": 2e-05, "loss": 5.4602, "step": 3681 }, { "epoch": 0.24697320320622465, "grad_norm": 0.15238782699231776, "learning_rate": 2e-05, "loss": 5.4395, "step": 3682 }, { "epoch": 0.2470402790354496, "grad_norm": 0.15212350324453344, "learning_rate": 2e-05, "loss": 5.4079, "step": 3683 }, { "epoch": 0.24710735486467453, "grad_norm": 0.1490143392798747, "learning_rate": 2e-05, "loss": 5.3618, "step": 3684 }, { "epoch": 0.24717443069389947, "grad_norm": 0.1492434181391777, "learning_rate": 2e-05, "loss": 5.3927, "step": 3685 }, { "epoch": 0.2472415065231244, "grad_norm": 0.14856207578599978, "learning_rate": 2e-05, "loss": 5.4936, "step": 3686 }, { "epoch": 0.24730858235234934, "grad_norm": 0.1433958767245302, "learning_rate": 2e-05, "loss": 5.3249, "step": 3687 }, { "epoch": 0.24737565818157428, "grad_norm": 0.1459598563999363, "learning_rate": 2e-05, "loss": 5.4155, "step": 3688 }, { "epoch": 0.24744273401079922, "grad_norm": 0.14893763962528928, "learning_rate": 2e-05, "loss": 5.3354, "step": 3689 }, { "epoch": 0.24750980984002416, "grad_norm": 0.14281587981729854, "learning_rate": 2e-05, "loss": 5.4495, "step": 3690 }, { "epoch": 0.2475768856692491, "grad_norm": 0.1454576184429072, "learning_rate": 2e-05, "loss": 5.4419, "step": 3691 }, { "epoch": 0.24764396149847404, "grad_norm": 0.14992064152839124, "learning_rate": 2e-05, "loss": 5.4173, "step": 3692 }, { "epoch": 0.24771103732769897, "grad_norm": 0.1457027203661565, "learning_rate": 2e-05, "loss": 5.4855, "step": 3693 }, { "epoch": 0.2477781131569239, "grad_norm": 0.13712480928021217, "learning_rate": 2e-05, "loss": 5.3965, "step": 3694 }, { "epoch": 0.24784518898614885, "grad_norm": 0.1506826082913375, "learning_rate": 2e-05, "loss": 5.4105, "step": 3695 }, { "epoch": 0.2479122648153738, "grad_norm": 0.14475299215034776, "learning_rate": 2e-05, "loss": 5.3777, "step": 3696 }, { "epoch": 0.24797934064459873, "grad_norm": 0.14241727883424193, "learning_rate": 2e-05, "loss": 5.4091, "step": 3697 }, { "epoch": 0.24804641647382367, "grad_norm": 0.15242285404685815, "learning_rate": 2e-05, "loss": 5.5338, "step": 3698 }, { "epoch": 0.2481134923030486, "grad_norm": 0.14355888793153426, "learning_rate": 2e-05, "loss": 5.3647, "step": 3699 }, { "epoch": 0.24818056813227354, "grad_norm": 0.14509671238841956, "learning_rate": 2e-05, "loss": 5.5661, "step": 3700 }, { "epoch": 0.24824764396149848, "grad_norm": 0.15105971295539494, "learning_rate": 2e-05, "loss": 5.4519, "step": 3701 }, { "epoch": 0.24831471979072342, "grad_norm": 0.14776018167086183, "learning_rate": 2e-05, "loss": 5.3371, "step": 3702 }, { "epoch": 0.24838179561994836, "grad_norm": 0.14908279479261377, "learning_rate": 2e-05, "loss": 5.4098, "step": 3703 }, { "epoch": 0.2484488714491733, "grad_norm": 0.14701820043350675, "learning_rate": 2e-05, "loss": 5.2194, "step": 3704 }, { "epoch": 0.24851594727839824, "grad_norm": 0.14137802709287903, "learning_rate": 2e-05, "loss": 5.3789, "step": 3705 }, { "epoch": 0.24858302310762317, "grad_norm": 0.14528206783243755, "learning_rate": 2e-05, "loss": 5.5396, "step": 3706 }, { "epoch": 0.2486500989368481, "grad_norm": 0.1530842988147788, "learning_rate": 2e-05, "loss": 5.3475, "step": 3707 }, { "epoch": 0.24871717476607305, "grad_norm": 0.14914422394800178, "learning_rate": 2e-05, "loss": 5.4432, "step": 3708 }, { "epoch": 0.248784250595298, "grad_norm": 0.14537689798870795, "learning_rate": 2e-05, "loss": 5.4776, "step": 3709 }, { "epoch": 0.24885132642452293, "grad_norm": 0.14557369011991786, "learning_rate": 2e-05, "loss": 5.4662, "step": 3710 }, { "epoch": 0.24891840225374787, "grad_norm": 0.15725903389508947, "learning_rate": 2e-05, "loss": 5.4533, "step": 3711 }, { "epoch": 0.2489854780829728, "grad_norm": 0.14321110469564327, "learning_rate": 2e-05, "loss": 5.3905, "step": 3712 }, { "epoch": 0.24905255391219774, "grad_norm": 0.14172155653544785, "learning_rate": 2e-05, "loss": 5.5528, "step": 3713 }, { "epoch": 0.24911962974142268, "grad_norm": 0.14778521722156662, "learning_rate": 2e-05, "loss": 5.4289, "step": 3714 }, { "epoch": 0.24918670557064762, "grad_norm": 0.14685067760891327, "learning_rate": 2e-05, "loss": 5.406, "step": 3715 }, { "epoch": 0.24925378139987256, "grad_norm": 0.14686093946460982, "learning_rate": 2e-05, "loss": 5.5111, "step": 3716 }, { "epoch": 0.2493208572290975, "grad_norm": 0.14921292697583893, "learning_rate": 2e-05, "loss": 5.3773, "step": 3717 }, { "epoch": 0.24938793305832244, "grad_norm": 0.14574756178581724, "learning_rate": 2e-05, "loss": 5.4568, "step": 3718 }, { "epoch": 0.24945500888754737, "grad_norm": 0.14294330856094223, "learning_rate": 2e-05, "loss": 5.4588, "step": 3719 }, { "epoch": 0.2495220847167723, "grad_norm": 0.14732073835469947, "learning_rate": 2e-05, "loss": 5.4592, "step": 3720 }, { "epoch": 0.24958916054599725, "grad_norm": 0.14214136761395885, "learning_rate": 2e-05, "loss": 5.5093, "step": 3721 }, { "epoch": 0.2496562363752222, "grad_norm": 0.15016760130954998, "learning_rate": 2e-05, "loss": 5.4977, "step": 3722 }, { "epoch": 0.24972331220444713, "grad_norm": 0.1449032185486146, "learning_rate": 2e-05, "loss": 5.2996, "step": 3723 }, { "epoch": 0.24979038803367207, "grad_norm": 0.14698770217095639, "learning_rate": 2e-05, "loss": 5.4727, "step": 3724 }, { "epoch": 0.249857463862897, "grad_norm": 0.14735703440715242, "learning_rate": 2e-05, "loss": 5.4362, "step": 3725 }, { "epoch": 0.24992453969212194, "grad_norm": 0.14584714101193985, "learning_rate": 2e-05, "loss": 5.4359, "step": 3726 }, { "epoch": 0.24999161552134688, "grad_norm": 0.14714183157184782, "learning_rate": 2e-05, "loss": 5.3559, "step": 3727 }, { "epoch": 0.25005869135057185, "grad_norm": 0.14185319776475563, "learning_rate": 2e-05, "loss": 5.6197, "step": 3728 }, { "epoch": 0.2501257671797968, "grad_norm": 0.14445518904789215, "learning_rate": 2e-05, "loss": 5.6117, "step": 3729 }, { "epoch": 0.2501928430090217, "grad_norm": 0.14256658378959786, "learning_rate": 2e-05, "loss": 5.5514, "step": 3730 }, { "epoch": 0.25025991883824666, "grad_norm": 0.1456064586389974, "learning_rate": 2e-05, "loss": 5.4543, "step": 3731 }, { "epoch": 0.2503269946674716, "grad_norm": 0.1469171011547121, "learning_rate": 2e-05, "loss": 5.4971, "step": 3732 }, { "epoch": 0.25039407049669654, "grad_norm": 0.15165530625035067, "learning_rate": 2e-05, "loss": 5.3415, "step": 3733 }, { "epoch": 0.2504611463259215, "grad_norm": 0.14581348188795631, "learning_rate": 2e-05, "loss": 5.4631, "step": 3734 }, { "epoch": 0.2505282221551464, "grad_norm": 0.14654471430777896, "learning_rate": 2e-05, "loss": 5.5605, "step": 3735 }, { "epoch": 0.25059529798437136, "grad_norm": 0.14291705656766163, "learning_rate": 2e-05, "loss": 5.5325, "step": 3736 }, { "epoch": 0.2506623738135963, "grad_norm": 0.1570339929073719, "learning_rate": 2e-05, "loss": 5.532, "step": 3737 }, { "epoch": 0.25072944964282123, "grad_norm": 0.1481393537747991, "learning_rate": 2e-05, "loss": 5.4221, "step": 3738 }, { "epoch": 0.2507965254720462, "grad_norm": 0.14942812086890742, "learning_rate": 2e-05, "loss": 5.5956, "step": 3739 }, { "epoch": 0.2508636013012711, "grad_norm": 0.15728490446844373, "learning_rate": 2e-05, "loss": 5.3462, "step": 3740 }, { "epoch": 0.25093067713049605, "grad_norm": 0.14820747106245247, "learning_rate": 2e-05, "loss": 5.5815, "step": 3741 }, { "epoch": 0.250997752959721, "grad_norm": 0.15508411824280052, "learning_rate": 2e-05, "loss": 5.4309, "step": 3742 }, { "epoch": 0.2510648287889459, "grad_norm": 0.1558609173490631, "learning_rate": 2e-05, "loss": 5.3829, "step": 3743 }, { "epoch": 0.25113190461817086, "grad_norm": 0.15560234145068735, "learning_rate": 2e-05, "loss": 5.3069, "step": 3744 }, { "epoch": 0.2511989804473958, "grad_norm": 0.1491312291805695, "learning_rate": 2e-05, "loss": 5.3313, "step": 3745 }, { "epoch": 0.25126605627662074, "grad_norm": 0.1630677209260026, "learning_rate": 2e-05, "loss": 5.4552, "step": 3746 }, { "epoch": 0.2513331321058457, "grad_norm": 0.1537602319057921, "learning_rate": 2e-05, "loss": 5.4446, "step": 3747 }, { "epoch": 0.2514002079350706, "grad_norm": 0.14658220678469497, "learning_rate": 2e-05, "loss": 5.3901, "step": 3748 }, { "epoch": 0.25146728376429556, "grad_norm": 0.14897090348839365, "learning_rate": 2e-05, "loss": 5.3604, "step": 3749 }, { "epoch": 0.2515343595935205, "grad_norm": 0.16215945255206385, "learning_rate": 2e-05, "loss": 5.4713, "step": 3750 }, { "epoch": 0.25160143542274543, "grad_norm": 0.1416117342497126, "learning_rate": 2e-05, "loss": 5.2477, "step": 3751 }, { "epoch": 0.2516685112519704, "grad_norm": 0.14825452416081117, "learning_rate": 2e-05, "loss": 5.5632, "step": 3752 }, { "epoch": 0.2517355870811953, "grad_norm": 0.1604543401766358, "learning_rate": 2e-05, "loss": 5.4536, "step": 3753 }, { "epoch": 0.25180266291042025, "grad_norm": 0.14660550400353697, "learning_rate": 2e-05, "loss": 5.561, "step": 3754 }, { "epoch": 0.2518697387396452, "grad_norm": 0.14069657923597087, "learning_rate": 2e-05, "loss": 5.4712, "step": 3755 }, { "epoch": 0.2519368145688701, "grad_norm": 0.1474505129365506, "learning_rate": 2e-05, "loss": 5.5361, "step": 3756 }, { "epoch": 0.25200389039809507, "grad_norm": 0.15000483023731537, "learning_rate": 2e-05, "loss": 5.4254, "step": 3757 }, { "epoch": 0.25207096622732, "grad_norm": 0.1510018599640942, "learning_rate": 2e-05, "loss": 5.3964, "step": 3758 }, { "epoch": 0.25213804205654494, "grad_norm": 0.14974894808130063, "learning_rate": 2e-05, "loss": 5.3482, "step": 3759 }, { "epoch": 0.2522051178857699, "grad_norm": 0.14453146224163974, "learning_rate": 2e-05, "loss": 5.4588, "step": 3760 }, { "epoch": 0.2522721937149948, "grad_norm": 0.15149553743198957, "learning_rate": 2e-05, "loss": 5.3518, "step": 3761 }, { "epoch": 0.25233926954421976, "grad_norm": 0.15140957033331257, "learning_rate": 2e-05, "loss": 5.4638, "step": 3762 }, { "epoch": 0.2524063453734447, "grad_norm": 0.15756766371317613, "learning_rate": 2e-05, "loss": 5.419, "step": 3763 }, { "epoch": 0.25247342120266963, "grad_norm": 0.1522980868889702, "learning_rate": 2e-05, "loss": 5.4874, "step": 3764 }, { "epoch": 0.2525404970318946, "grad_norm": 0.14788416174679844, "learning_rate": 2e-05, "loss": 5.4031, "step": 3765 }, { "epoch": 0.2526075728611195, "grad_norm": 0.14981441306040225, "learning_rate": 2e-05, "loss": 5.6343, "step": 3766 }, { "epoch": 0.25267464869034445, "grad_norm": 0.16060534549702618, "learning_rate": 2e-05, "loss": 5.4702, "step": 3767 }, { "epoch": 0.2527417245195694, "grad_norm": 0.14968009800683948, "learning_rate": 2e-05, "loss": 5.337, "step": 3768 }, { "epoch": 0.2528088003487943, "grad_norm": 0.1487903883699617, "learning_rate": 2e-05, "loss": 5.5062, "step": 3769 }, { "epoch": 0.25287587617801927, "grad_norm": 0.15088994900414518, "learning_rate": 2e-05, "loss": 5.4112, "step": 3770 }, { "epoch": 0.2529429520072442, "grad_norm": 0.17307729005810243, "learning_rate": 2e-05, "loss": 5.4044, "step": 3771 }, { "epoch": 0.25301002783646914, "grad_norm": 0.14467965448648515, "learning_rate": 2e-05, "loss": 5.4468, "step": 3772 }, { "epoch": 0.2530771036656941, "grad_norm": 0.1570833072890613, "learning_rate": 2e-05, "loss": 5.5496, "step": 3773 }, { "epoch": 0.253144179494919, "grad_norm": 0.15385072835908956, "learning_rate": 2e-05, "loss": 5.4976, "step": 3774 }, { "epoch": 0.25321125532414396, "grad_norm": 0.15863042549085676, "learning_rate": 2e-05, "loss": 5.4356, "step": 3775 }, { "epoch": 0.2532783311533689, "grad_norm": 0.1495700043424086, "learning_rate": 2e-05, "loss": 5.3646, "step": 3776 }, { "epoch": 0.25334540698259383, "grad_norm": 0.15822225013416757, "learning_rate": 2e-05, "loss": 5.3834, "step": 3777 }, { "epoch": 0.2534124828118188, "grad_norm": 0.151028509168438, "learning_rate": 2e-05, "loss": 5.5125, "step": 3778 }, { "epoch": 0.2534795586410437, "grad_norm": 0.1580718401617811, "learning_rate": 2e-05, "loss": 5.4318, "step": 3779 }, { "epoch": 0.25354663447026865, "grad_norm": 0.15241922542955663, "learning_rate": 2e-05, "loss": 5.5999, "step": 3780 }, { "epoch": 0.2536137102994936, "grad_norm": 0.15025556110340946, "learning_rate": 2e-05, "loss": 5.4019, "step": 3781 }, { "epoch": 0.2536807861287185, "grad_norm": 0.1597545137971037, "learning_rate": 2e-05, "loss": 5.4116, "step": 3782 }, { "epoch": 0.25374786195794347, "grad_norm": 0.14521459928213876, "learning_rate": 2e-05, "loss": 5.3941, "step": 3783 }, { "epoch": 0.2538149377871684, "grad_norm": 0.152505074989981, "learning_rate": 2e-05, "loss": 5.3728, "step": 3784 }, { "epoch": 0.25388201361639334, "grad_norm": 0.14091224865268126, "learning_rate": 2e-05, "loss": 5.4428, "step": 3785 }, { "epoch": 0.2539490894456183, "grad_norm": 0.14637375470173636, "learning_rate": 2e-05, "loss": 5.4399, "step": 3786 }, { "epoch": 0.2540161652748432, "grad_norm": 0.1469739070043092, "learning_rate": 2e-05, "loss": 5.5227, "step": 3787 }, { "epoch": 0.25408324110406816, "grad_norm": 0.14646275326756972, "learning_rate": 2e-05, "loss": 5.5411, "step": 3788 }, { "epoch": 0.2541503169332931, "grad_norm": 0.15032197446633735, "learning_rate": 2e-05, "loss": 5.3783, "step": 3789 }, { "epoch": 0.25421739276251804, "grad_norm": 0.14617054790420025, "learning_rate": 2e-05, "loss": 5.3097, "step": 3790 }, { "epoch": 0.254284468591743, "grad_norm": 0.1461232428379103, "learning_rate": 2e-05, "loss": 5.4596, "step": 3791 }, { "epoch": 0.2543515444209679, "grad_norm": 0.14750983523767622, "learning_rate": 2e-05, "loss": 5.3047, "step": 3792 }, { "epoch": 0.25441862025019285, "grad_norm": 0.14369089058307205, "learning_rate": 2e-05, "loss": 5.4326, "step": 3793 }, { "epoch": 0.2544856960794178, "grad_norm": 0.14370718070951505, "learning_rate": 2e-05, "loss": 5.4828, "step": 3794 }, { "epoch": 0.2545527719086427, "grad_norm": 0.15415262987685194, "learning_rate": 2e-05, "loss": 5.4204, "step": 3795 }, { "epoch": 0.25461984773786767, "grad_norm": 0.1418703521584847, "learning_rate": 2e-05, "loss": 5.4497, "step": 3796 }, { "epoch": 0.2546869235670926, "grad_norm": 0.14686268848231318, "learning_rate": 2e-05, "loss": 5.4446, "step": 3797 }, { "epoch": 0.25475399939631754, "grad_norm": 0.14763250521108806, "learning_rate": 2e-05, "loss": 5.3383, "step": 3798 }, { "epoch": 0.2548210752255425, "grad_norm": 0.14658773401178146, "learning_rate": 2e-05, "loss": 5.631, "step": 3799 }, { "epoch": 0.2548881510547674, "grad_norm": 0.13927429931640617, "learning_rate": 2e-05, "loss": 5.4475, "step": 3800 }, { "epoch": 0.25495522688399236, "grad_norm": 0.15900578915114108, "learning_rate": 2e-05, "loss": 5.4303, "step": 3801 }, { "epoch": 0.2550223027132173, "grad_norm": 0.1440663609132752, "learning_rate": 2e-05, "loss": 5.2744, "step": 3802 }, { "epoch": 0.25508937854244224, "grad_norm": 0.14750285643092637, "learning_rate": 2e-05, "loss": 5.3756, "step": 3803 }, { "epoch": 0.2551564543716672, "grad_norm": 0.15487437217489824, "learning_rate": 2e-05, "loss": 5.5675, "step": 3804 }, { "epoch": 0.2552235302008921, "grad_norm": 0.14333348006163538, "learning_rate": 2e-05, "loss": 5.3834, "step": 3805 }, { "epoch": 0.25529060603011705, "grad_norm": 0.15086835385834996, "learning_rate": 2e-05, "loss": 5.477, "step": 3806 }, { "epoch": 0.255357681859342, "grad_norm": 0.15251924229132585, "learning_rate": 2e-05, "loss": 5.4025, "step": 3807 }, { "epoch": 0.25542475768856693, "grad_norm": 0.1491293072520056, "learning_rate": 2e-05, "loss": 5.3184, "step": 3808 }, { "epoch": 0.25549183351779187, "grad_norm": 0.15171308682643084, "learning_rate": 2e-05, "loss": 5.4826, "step": 3809 }, { "epoch": 0.2555589093470168, "grad_norm": 0.14670873216509572, "learning_rate": 2e-05, "loss": 5.6444, "step": 3810 }, { "epoch": 0.25562598517624174, "grad_norm": 0.1461583902989626, "learning_rate": 2e-05, "loss": 5.5252, "step": 3811 }, { "epoch": 0.2556930610054667, "grad_norm": 0.1552298614685276, "learning_rate": 2e-05, "loss": 5.3742, "step": 3812 }, { "epoch": 0.2557601368346916, "grad_norm": 0.15263088244554182, "learning_rate": 2e-05, "loss": 5.5625, "step": 3813 }, { "epoch": 0.25582721266391656, "grad_norm": 0.14703879994773886, "learning_rate": 2e-05, "loss": 5.3791, "step": 3814 }, { "epoch": 0.2558942884931415, "grad_norm": 0.14592848941260114, "learning_rate": 2e-05, "loss": 5.5148, "step": 3815 }, { "epoch": 0.25596136432236644, "grad_norm": 0.155792664196072, "learning_rate": 2e-05, "loss": 5.4763, "step": 3816 }, { "epoch": 0.2560284401515914, "grad_norm": 0.14735033733964242, "learning_rate": 2e-05, "loss": 5.6019, "step": 3817 }, { "epoch": 0.2560955159808163, "grad_norm": 0.13735293431396386, "learning_rate": 2e-05, "loss": 5.4016, "step": 3818 }, { "epoch": 0.25616259181004125, "grad_norm": 0.15081874571023168, "learning_rate": 2e-05, "loss": 5.5326, "step": 3819 }, { "epoch": 0.2562296676392662, "grad_norm": 0.1447062918806684, "learning_rate": 2e-05, "loss": 5.6289, "step": 3820 }, { "epoch": 0.25629674346849113, "grad_norm": 0.14860696124792633, "learning_rate": 2e-05, "loss": 5.6646, "step": 3821 }, { "epoch": 0.25636381929771607, "grad_norm": 0.14705168204088293, "learning_rate": 2e-05, "loss": 5.3814, "step": 3822 }, { "epoch": 0.256430895126941, "grad_norm": 0.1504028860838788, "learning_rate": 2e-05, "loss": 5.5603, "step": 3823 }, { "epoch": 0.25649797095616594, "grad_norm": 0.1506198426992679, "learning_rate": 2e-05, "loss": 5.5622, "step": 3824 }, { "epoch": 0.2565650467853909, "grad_norm": 0.15015485439058282, "learning_rate": 2e-05, "loss": 5.4685, "step": 3825 }, { "epoch": 0.2566321226146158, "grad_norm": 0.14783799152041394, "learning_rate": 2e-05, "loss": 5.4901, "step": 3826 }, { "epoch": 0.25669919844384076, "grad_norm": 0.14556315217727664, "learning_rate": 2e-05, "loss": 5.5117, "step": 3827 }, { "epoch": 0.2567662742730657, "grad_norm": 0.14748817661083055, "learning_rate": 2e-05, "loss": 5.3964, "step": 3828 }, { "epoch": 0.25683335010229064, "grad_norm": 0.1463040750964665, "learning_rate": 2e-05, "loss": 5.4465, "step": 3829 }, { "epoch": 0.2569004259315156, "grad_norm": 0.15307181328508748, "learning_rate": 2e-05, "loss": 5.4208, "step": 3830 }, { "epoch": 0.2569675017607405, "grad_norm": 0.1454127697673739, "learning_rate": 2e-05, "loss": 5.3367, "step": 3831 }, { "epoch": 0.25703457758996545, "grad_norm": 0.14649840510161494, "learning_rate": 2e-05, "loss": 5.5103, "step": 3832 }, { "epoch": 0.2571016534191904, "grad_norm": 0.1548959519444856, "learning_rate": 2e-05, "loss": 5.4667, "step": 3833 }, { "epoch": 0.25716872924841533, "grad_norm": 0.1565744244466912, "learning_rate": 2e-05, "loss": 5.4358, "step": 3834 }, { "epoch": 0.25723580507764027, "grad_norm": 0.14936729323215645, "learning_rate": 2e-05, "loss": 5.4376, "step": 3835 }, { "epoch": 0.2573028809068652, "grad_norm": 0.1491608006987523, "learning_rate": 2e-05, "loss": 5.4414, "step": 3836 }, { "epoch": 0.25736995673609014, "grad_norm": 0.14811164173008498, "learning_rate": 2e-05, "loss": 5.4935, "step": 3837 }, { "epoch": 0.2574370325653151, "grad_norm": 0.14763781029501316, "learning_rate": 2e-05, "loss": 5.425, "step": 3838 }, { "epoch": 0.25750410839454, "grad_norm": 0.14470296608904837, "learning_rate": 2e-05, "loss": 5.4426, "step": 3839 }, { "epoch": 0.25757118422376496, "grad_norm": 0.1426215585856793, "learning_rate": 2e-05, "loss": 5.4508, "step": 3840 }, { "epoch": 0.2576382600529899, "grad_norm": 0.15617763765753245, "learning_rate": 2e-05, "loss": 5.4295, "step": 3841 }, { "epoch": 0.25770533588221484, "grad_norm": 0.1469652035429382, "learning_rate": 2e-05, "loss": 5.4878, "step": 3842 }, { "epoch": 0.2577724117114398, "grad_norm": 0.14025767234877837, "learning_rate": 2e-05, "loss": 5.4263, "step": 3843 }, { "epoch": 0.2578394875406647, "grad_norm": 0.14460337641806692, "learning_rate": 2e-05, "loss": 5.5263, "step": 3844 }, { "epoch": 0.25790656336988965, "grad_norm": 0.14428370616266806, "learning_rate": 2e-05, "loss": 5.6108, "step": 3845 }, { "epoch": 0.2579736391991146, "grad_norm": 0.14754051544332777, "learning_rate": 2e-05, "loss": 5.4248, "step": 3846 }, { "epoch": 0.25804071502833953, "grad_norm": 0.1417889974685112, "learning_rate": 2e-05, "loss": 5.4731, "step": 3847 }, { "epoch": 0.25810779085756447, "grad_norm": 0.1405041118169406, "learning_rate": 2e-05, "loss": 5.4093, "step": 3848 }, { "epoch": 0.2581748666867894, "grad_norm": 0.14858446476047155, "learning_rate": 2e-05, "loss": 5.5142, "step": 3849 }, { "epoch": 0.25824194251601434, "grad_norm": 0.14073322544613465, "learning_rate": 2e-05, "loss": 5.4722, "step": 3850 }, { "epoch": 0.2583090183452393, "grad_norm": 0.14826431672421808, "learning_rate": 2e-05, "loss": 5.4229, "step": 3851 }, { "epoch": 0.2583760941744642, "grad_norm": 0.14755538902732437, "learning_rate": 2e-05, "loss": 5.5777, "step": 3852 }, { "epoch": 0.25844317000368916, "grad_norm": 0.14771586463461778, "learning_rate": 2e-05, "loss": 5.5744, "step": 3853 }, { "epoch": 0.2585102458329141, "grad_norm": 0.14775919043476643, "learning_rate": 2e-05, "loss": 5.5132, "step": 3854 }, { "epoch": 0.25857732166213904, "grad_norm": 0.1410593424589937, "learning_rate": 2e-05, "loss": 5.5138, "step": 3855 }, { "epoch": 0.258644397491364, "grad_norm": 0.1505672524848943, "learning_rate": 2e-05, "loss": 5.2929, "step": 3856 }, { "epoch": 0.2587114733205889, "grad_norm": 0.15266885993994908, "learning_rate": 2e-05, "loss": 5.4722, "step": 3857 }, { "epoch": 0.25877854914981385, "grad_norm": 0.1595065505652384, "learning_rate": 2e-05, "loss": 5.4876, "step": 3858 }, { "epoch": 0.2588456249790388, "grad_norm": 0.1475495976306526, "learning_rate": 2e-05, "loss": 5.3795, "step": 3859 }, { "epoch": 0.25891270080826373, "grad_norm": 0.15039833089015914, "learning_rate": 2e-05, "loss": 5.5299, "step": 3860 }, { "epoch": 0.25897977663748867, "grad_norm": 0.15212458560309677, "learning_rate": 2e-05, "loss": 5.4493, "step": 3861 }, { "epoch": 0.2590468524667136, "grad_norm": 0.14830852614970091, "learning_rate": 2e-05, "loss": 5.4419, "step": 3862 }, { "epoch": 0.25911392829593854, "grad_norm": 0.1472576863894037, "learning_rate": 2e-05, "loss": 5.4509, "step": 3863 }, { "epoch": 0.2591810041251635, "grad_norm": 0.16413465281062362, "learning_rate": 2e-05, "loss": 5.3765, "step": 3864 }, { "epoch": 0.2592480799543884, "grad_norm": 0.15304029171526892, "learning_rate": 2e-05, "loss": 5.5081, "step": 3865 }, { "epoch": 0.25931515578361336, "grad_norm": 0.1490416272033554, "learning_rate": 2e-05, "loss": 5.4831, "step": 3866 }, { "epoch": 0.2593822316128383, "grad_norm": 0.15357769296085355, "learning_rate": 2e-05, "loss": 5.5006, "step": 3867 }, { "epoch": 0.25944930744206324, "grad_norm": 0.15082440023242194, "learning_rate": 2e-05, "loss": 5.3, "step": 3868 }, { "epoch": 0.2595163832712882, "grad_norm": 0.1406917571594882, "learning_rate": 2e-05, "loss": 5.4954, "step": 3869 }, { "epoch": 0.2595834591005131, "grad_norm": 0.1417999790171647, "learning_rate": 2e-05, "loss": 5.5183, "step": 3870 }, { "epoch": 0.25965053492973805, "grad_norm": 0.1576180265007791, "learning_rate": 2e-05, "loss": 5.3812, "step": 3871 }, { "epoch": 0.259717610758963, "grad_norm": 0.1435934273525419, "learning_rate": 2e-05, "loss": 5.5301, "step": 3872 }, { "epoch": 0.25978468658818793, "grad_norm": 0.14153321502770955, "learning_rate": 2e-05, "loss": 5.3181, "step": 3873 }, { "epoch": 0.25985176241741287, "grad_norm": 0.1484391906080592, "learning_rate": 2e-05, "loss": 5.3863, "step": 3874 }, { "epoch": 0.2599188382466378, "grad_norm": 0.14866640606694945, "learning_rate": 2e-05, "loss": 5.4691, "step": 3875 }, { "epoch": 0.25998591407586275, "grad_norm": 0.14430413833194114, "learning_rate": 2e-05, "loss": 5.4291, "step": 3876 }, { "epoch": 0.2600529899050877, "grad_norm": 0.15408431081341858, "learning_rate": 2e-05, "loss": 5.471, "step": 3877 }, { "epoch": 0.2601200657343126, "grad_norm": 0.156422461148273, "learning_rate": 2e-05, "loss": 5.4978, "step": 3878 }, { "epoch": 0.26018714156353756, "grad_norm": 0.1457714697220853, "learning_rate": 2e-05, "loss": 5.414, "step": 3879 }, { "epoch": 0.2602542173927625, "grad_norm": 0.1429113153140056, "learning_rate": 2e-05, "loss": 5.4492, "step": 3880 }, { "epoch": 0.26032129322198744, "grad_norm": 0.1520664443222717, "learning_rate": 2e-05, "loss": 5.4853, "step": 3881 }, { "epoch": 0.2603883690512124, "grad_norm": 0.14763482164581493, "learning_rate": 2e-05, "loss": 5.4926, "step": 3882 }, { "epoch": 0.2604554448804373, "grad_norm": 0.1437934131799041, "learning_rate": 2e-05, "loss": 5.4853, "step": 3883 }, { "epoch": 0.26052252070966225, "grad_norm": 0.14536924910016258, "learning_rate": 2e-05, "loss": 5.529, "step": 3884 }, { "epoch": 0.2605895965388872, "grad_norm": 0.14392646022379038, "learning_rate": 2e-05, "loss": 5.443, "step": 3885 }, { "epoch": 0.26065667236811213, "grad_norm": 0.1490503659274492, "learning_rate": 2e-05, "loss": 5.4564, "step": 3886 }, { "epoch": 0.26072374819733707, "grad_norm": 0.15401581298820416, "learning_rate": 2e-05, "loss": 5.5146, "step": 3887 }, { "epoch": 0.260790824026562, "grad_norm": 0.1451792881645716, "learning_rate": 2e-05, "loss": 5.4419, "step": 3888 }, { "epoch": 0.26085789985578695, "grad_norm": 0.14623125135775142, "learning_rate": 2e-05, "loss": 5.5564, "step": 3889 }, { "epoch": 0.2609249756850119, "grad_norm": 0.16040584855767342, "learning_rate": 2e-05, "loss": 5.4483, "step": 3890 }, { "epoch": 0.2609920515142368, "grad_norm": 0.14454050225788131, "learning_rate": 2e-05, "loss": 5.5406, "step": 3891 }, { "epoch": 0.26105912734346176, "grad_norm": 0.15719706931169644, "learning_rate": 2e-05, "loss": 5.5034, "step": 3892 }, { "epoch": 0.2611262031726867, "grad_norm": 0.1535580874405099, "learning_rate": 2e-05, "loss": 5.4852, "step": 3893 }, { "epoch": 0.26119327900191164, "grad_norm": 0.14711019299757844, "learning_rate": 2e-05, "loss": 5.3776, "step": 3894 }, { "epoch": 0.2612603548311366, "grad_norm": 0.15194779175487633, "learning_rate": 2e-05, "loss": 5.3385, "step": 3895 }, { "epoch": 0.2613274306603615, "grad_norm": 0.14930527392892204, "learning_rate": 2e-05, "loss": 5.4857, "step": 3896 }, { "epoch": 0.26139450648958645, "grad_norm": 0.1413100453519304, "learning_rate": 2e-05, "loss": 5.5, "step": 3897 }, { "epoch": 0.2614615823188114, "grad_norm": 0.15073416321804295, "learning_rate": 2e-05, "loss": 5.4392, "step": 3898 }, { "epoch": 0.26152865814803633, "grad_norm": 0.1419101693912533, "learning_rate": 2e-05, "loss": 5.3436, "step": 3899 }, { "epoch": 0.26159573397726127, "grad_norm": 0.1483493834825796, "learning_rate": 2e-05, "loss": 5.4655, "step": 3900 }, { "epoch": 0.2616628098064862, "grad_norm": 0.1416133825578569, "learning_rate": 2e-05, "loss": 5.3605, "step": 3901 }, { "epoch": 0.26172988563571115, "grad_norm": 0.14401923491992427, "learning_rate": 2e-05, "loss": 5.5828, "step": 3902 }, { "epoch": 0.2617969614649361, "grad_norm": 0.15144671099689408, "learning_rate": 2e-05, "loss": 5.478, "step": 3903 }, { "epoch": 0.261864037294161, "grad_norm": 0.14393306547585147, "learning_rate": 2e-05, "loss": 5.4686, "step": 3904 }, { "epoch": 0.26193111312338596, "grad_norm": 0.14233292188288868, "learning_rate": 2e-05, "loss": 5.3599, "step": 3905 }, { "epoch": 0.2619981889526109, "grad_norm": 0.14665231024257255, "learning_rate": 2e-05, "loss": 5.3773, "step": 3906 }, { "epoch": 0.26206526478183584, "grad_norm": 0.15419678874675444, "learning_rate": 2e-05, "loss": 5.4649, "step": 3907 }, { "epoch": 0.2621323406110608, "grad_norm": 0.1551897951298403, "learning_rate": 2e-05, "loss": 5.4885, "step": 3908 }, { "epoch": 0.26219941644028577, "grad_norm": 0.1504294017000213, "learning_rate": 2e-05, "loss": 5.5096, "step": 3909 }, { "epoch": 0.2622664922695107, "grad_norm": 0.1515171634727965, "learning_rate": 2e-05, "loss": 5.4202, "step": 3910 }, { "epoch": 0.26233356809873565, "grad_norm": 0.1485661533598932, "learning_rate": 2e-05, "loss": 5.3466, "step": 3911 }, { "epoch": 0.2624006439279606, "grad_norm": 0.15183025941885792, "learning_rate": 2e-05, "loss": 5.5035, "step": 3912 }, { "epoch": 0.2624677197571855, "grad_norm": 0.14732117833667932, "learning_rate": 2e-05, "loss": 5.1908, "step": 3913 }, { "epoch": 0.26253479558641046, "grad_norm": 0.14970898733761662, "learning_rate": 2e-05, "loss": 5.4572, "step": 3914 }, { "epoch": 0.2626018714156354, "grad_norm": 0.1570667863469998, "learning_rate": 2e-05, "loss": 5.4822, "step": 3915 }, { "epoch": 0.26266894724486034, "grad_norm": 0.15037231996967612, "learning_rate": 2e-05, "loss": 5.602, "step": 3916 }, { "epoch": 0.2627360230740853, "grad_norm": 0.14599062190341205, "learning_rate": 2e-05, "loss": 5.5927, "step": 3917 }, { "epoch": 0.2628030989033102, "grad_norm": 0.15241655082673747, "learning_rate": 2e-05, "loss": 5.3781, "step": 3918 }, { "epoch": 0.26287017473253516, "grad_norm": 0.14834906260284883, "learning_rate": 2e-05, "loss": 5.4393, "step": 3919 }, { "epoch": 0.2629372505617601, "grad_norm": 0.1388934989012939, "learning_rate": 2e-05, "loss": 5.4115, "step": 3920 }, { "epoch": 0.26300432639098503, "grad_norm": 0.14629063040826207, "learning_rate": 2e-05, "loss": 5.3671, "step": 3921 }, { "epoch": 0.26307140222020997, "grad_norm": 0.15262539854031368, "learning_rate": 2e-05, "loss": 5.4317, "step": 3922 }, { "epoch": 0.2631384780494349, "grad_norm": 0.13944660490305602, "learning_rate": 2e-05, "loss": 5.36, "step": 3923 }, { "epoch": 0.26320555387865985, "grad_norm": 0.14970038808776698, "learning_rate": 2e-05, "loss": 5.3034, "step": 3924 }, { "epoch": 0.2632726297078848, "grad_norm": 0.14666369191607267, "learning_rate": 2e-05, "loss": 5.358, "step": 3925 }, { "epoch": 0.2633397055371097, "grad_norm": 0.1464924921731778, "learning_rate": 2e-05, "loss": 5.3519, "step": 3926 }, { "epoch": 0.26340678136633466, "grad_norm": 0.14295580467681032, "learning_rate": 2e-05, "loss": 5.3948, "step": 3927 }, { "epoch": 0.2634738571955596, "grad_norm": 0.15315361360115567, "learning_rate": 2e-05, "loss": 5.5026, "step": 3928 }, { "epoch": 0.26354093302478454, "grad_norm": 0.1549928196158761, "learning_rate": 2e-05, "loss": 5.2922, "step": 3929 }, { "epoch": 0.2636080088540095, "grad_norm": 0.14831235448930838, "learning_rate": 2e-05, "loss": 5.4309, "step": 3930 }, { "epoch": 0.2636750846832344, "grad_norm": 0.14631020907619874, "learning_rate": 2e-05, "loss": 5.348, "step": 3931 }, { "epoch": 0.26374216051245936, "grad_norm": 0.14315035868996318, "learning_rate": 2e-05, "loss": 5.4006, "step": 3932 }, { "epoch": 0.2638092363416843, "grad_norm": 0.138619142752392, "learning_rate": 2e-05, "loss": 5.339, "step": 3933 }, { "epoch": 0.26387631217090923, "grad_norm": 0.14090254676230443, "learning_rate": 2e-05, "loss": 5.3838, "step": 3934 }, { "epoch": 0.26394338800013417, "grad_norm": 0.14146528358016539, "learning_rate": 2e-05, "loss": 5.5542, "step": 3935 }, { "epoch": 0.2640104638293591, "grad_norm": 0.15280248826125098, "learning_rate": 2e-05, "loss": 5.3512, "step": 3936 }, { "epoch": 0.26407753965858405, "grad_norm": 0.1365271342293026, "learning_rate": 2e-05, "loss": 5.4337, "step": 3937 }, { "epoch": 0.264144615487809, "grad_norm": 0.14160049577973796, "learning_rate": 2e-05, "loss": 5.6048, "step": 3938 }, { "epoch": 0.2642116913170339, "grad_norm": 0.14497702634602677, "learning_rate": 2e-05, "loss": 5.4811, "step": 3939 }, { "epoch": 0.26427876714625886, "grad_norm": 0.14378048039287575, "learning_rate": 2e-05, "loss": 5.5025, "step": 3940 }, { "epoch": 0.2643458429754838, "grad_norm": 0.15170355920336792, "learning_rate": 2e-05, "loss": 5.4393, "step": 3941 }, { "epoch": 0.26441291880470874, "grad_norm": 0.1491013017415287, "learning_rate": 2e-05, "loss": 5.3491, "step": 3942 }, { "epoch": 0.2644799946339337, "grad_norm": 0.15300722296795793, "learning_rate": 2e-05, "loss": 5.5461, "step": 3943 }, { "epoch": 0.2645470704631586, "grad_norm": 0.1422665084623843, "learning_rate": 2e-05, "loss": 5.4859, "step": 3944 }, { "epoch": 0.26461414629238356, "grad_norm": 0.14086613750178717, "learning_rate": 2e-05, "loss": 5.4694, "step": 3945 }, { "epoch": 0.2646812221216085, "grad_norm": 0.14246314511349153, "learning_rate": 2e-05, "loss": 5.4805, "step": 3946 }, { "epoch": 0.26474829795083343, "grad_norm": 0.1478285621139698, "learning_rate": 2e-05, "loss": 5.4613, "step": 3947 }, { "epoch": 0.26481537378005837, "grad_norm": 0.1381168605582325, "learning_rate": 2e-05, "loss": 5.4834, "step": 3948 }, { "epoch": 0.2648824496092833, "grad_norm": 0.14302195792472977, "learning_rate": 2e-05, "loss": 5.4121, "step": 3949 }, { "epoch": 0.26494952543850825, "grad_norm": 0.14662659215734974, "learning_rate": 2e-05, "loss": 5.3428, "step": 3950 }, { "epoch": 0.2650166012677332, "grad_norm": 0.14353767664312628, "learning_rate": 2e-05, "loss": 5.4539, "step": 3951 }, { "epoch": 0.2650836770969581, "grad_norm": 0.15503948120396469, "learning_rate": 2e-05, "loss": 5.372, "step": 3952 }, { "epoch": 0.26515075292618306, "grad_norm": 0.14106078704891806, "learning_rate": 2e-05, "loss": 5.4382, "step": 3953 }, { "epoch": 0.265217828755408, "grad_norm": 0.14534439590715068, "learning_rate": 2e-05, "loss": 5.5083, "step": 3954 }, { "epoch": 0.26528490458463294, "grad_norm": 0.14851161744696703, "learning_rate": 2e-05, "loss": 5.4736, "step": 3955 }, { "epoch": 0.2653519804138579, "grad_norm": 0.1409302765135972, "learning_rate": 2e-05, "loss": 5.3879, "step": 3956 }, { "epoch": 0.2654190562430828, "grad_norm": 0.1560010311825181, "learning_rate": 2e-05, "loss": 5.5792, "step": 3957 }, { "epoch": 0.26548613207230776, "grad_norm": 0.14469956025173653, "learning_rate": 2e-05, "loss": 5.6303, "step": 3958 }, { "epoch": 0.2655532079015327, "grad_norm": 0.15427136406346895, "learning_rate": 2e-05, "loss": 5.3819, "step": 3959 }, { "epoch": 0.26562028373075763, "grad_norm": 0.1419142390436936, "learning_rate": 2e-05, "loss": 5.4751, "step": 3960 }, { "epoch": 0.26568735955998257, "grad_norm": 0.14024886363030958, "learning_rate": 2e-05, "loss": 5.4081, "step": 3961 }, { "epoch": 0.2657544353892075, "grad_norm": 0.14106118556531205, "learning_rate": 2e-05, "loss": 5.4772, "step": 3962 }, { "epoch": 0.26582151121843245, "grad_norm": 0.14514251672518408, "learning_rate": 2e-05, "loss": 5.393, "step": 3963 }, { "epoch": 0.2658885870476574, "grad_norm": 0.1442537694326055, "learning_rate": 2e-05, "loss": 5.5645, "step": 3964 }, { "epoch": 0.2659556628768823, "grad_norm": 0.14791947101068886, "learning_rate": 2e-05, "loss": 5.329, "step": 3965 }, { "epoch": 0.26602273870610726, "grad_norm": 0.14291208790020654, "learning_rate": 2e-05, "loss": 5.5062, "step": 3966 }, { "epoch": 0.2660898145353322, "grad_norm": 0.15183833808373706, "learning_rate": 2e-05, "loss": 5.281, "step": 3967 }, { "epoch": 0.26615689036455714, "grad_norm": 0.15192532819502788, "learning_rate": 2e-05, "loss": 5.3464, "step": 3968 }, { "epoch": 0.2662239661937821, "grad_norm": 0.1465302899812399, "learning_rate": 2e-05, "loss": 5.5133, "step": 3969 }, { "epoch": 0.266291042023007, "grad_norm": 0.14724375900207798, "learning_rate": 2e-05, "loss": 5.244, "step": 3970 }, { "epoch": 0.26635811785223196, "grad_norm": 0.1589793481967423, "learning_rate": 2e-05, "loss": 5.4654, "step": 3971 }, { "epoch": 0.2664251936814569, "grad_norm": 0.14623702837254354, "learning_rate": 2e-05, "loss": 5.4248, "step": 3972 }, { "epoch": 0.26649226951068183, "grad_norm": 0.149889735227389, "learning_rate": 2e-05, "loss": 5.2071, "step": 3973 }, { "epoch": 0.2665593453399068, "grad_norm": 0.14827475231902176, "learning_rate": 2e-05, "loss": 5.5338, "step": 3974 }, { "epoch": 0.2666264211691317, "grad_norm": 0.15200149399781676, "learning_rate": 2e-05, "loss": 5.3729, "step": 3975 }, { "epoch": 0.26669349699835665, "grad_norm": 0.15610579812519548, "learning_rate": 2e-05, "loss": 5.3062, "step": 3976 }, { "epoch": 0.2667605728275816, "grad_norm": 0.14780158405639887, "learning_rate": 2e-05, "loss": 5.4289, "step": 3977 }, { "epoch": 0.2668276486568065, "grad_norm": 0.1485248240817027, "learning_rate": 2e-05, "loss": 5.5061, "step": 3978 }, { "epoch": 0.26689472448603146, "grad_norm": 0.15110609963248903, "learning_rate": 2e-05, "loss": 5.5296, "step": 3979 }, { "epoch": 0.2669618003152564, "grad_norm": 0.1546623604405714, "learning_rate": 2e-05, "loss": 5.4598, "step": 3980 }, { "epoch": 0.26702887614448134, "grad_norm": 0.1560529227763332, "learning_rate": 2e-05, "loss": 5.5236, "step": 3981 }, { "epoch": 0.2670959519737063, "grad_norm": 0.15160115450932635, "learning_rate": 2e-05, "loss": 5.3832, "step": 3982 }, { "epoch": 0.2671630278029312, "grad_norm": 0.14940625709504377, "learning_rate": 2e-05, "loss": 5.4199, "step": 3983 }, { "epoch": 0.26723010363215616, "grad_norm": 0.15089418420658865, "learning_rate": 2e-05, "loss": 5.4891, "step": 3984 }, { "epoch": 0.2672971794613811, "grad_norm": 0.15149544081078248, "learning_rate": 2e-05, "loss": 5.4743, "step": 3985 }, { "epoch": 0.26736425529060603, "grad_norm": 0.1590470388736261, "learning_rate": 2e-05, "loss": 5.3959, "step": 3986 }, { "epoch": 0.267431331119831, "grad_norm": 0.14569233425858072, "learning_rate": 2e-05, "loss": 5.421, "step": 3987 }, { "epoch": 0.2674984069490559, "grad_norm": 0.15299566876328138, "learning_rate": 2e-05, "loss": 5.3958, "step": 3988 }, { "epoch": 0.26756548277828085, "grad_norm": 0.1530500652312546, "learning_rate": 2e-05, "loss": 5.4839, "step": 3989 }, { "epoch": 0.2676325586075058, "grad_norm": 0.1493130861294051, "learning_rate": 2e-05, "loss": 5.433, "step": 3990 }, { "epoch": 0.2676996344367307, "grad_norm": 0.14637908964705038, "learning_rate": 2e-05, "loss": 5.5441, "step": 3991 }, { "epoch": 0.26776671026595567, "grad_norm": 0.14461814367437367, "learning_rate": 2e-05, "loss": 5.5222, "step": 3992 }, { "epoch": 0.2678337860951806, "grad_norm": 0.14941149992524735, "learning_rate": 2e-05, "loss": 5.5347, "step": 3993 }, { "epoch": 0.26790086192440554, "grad_norm": 0.15257382657383972, "learning_rate": 2e-05, "loss": 5.5434, "step": 3994 }, { "epoch": 0.2679679377536305, "grad_norm": 0.14264895502712277, "learning_rate": 2e-05, "loss": 5.4963, "step": 3995 }, { "epoch": 0.2680350135828554, "grad_norm": 0.1529453805687773, "learning_rate": 2e-05, "loss": 5.4391, "step": 3996 }, { "epoch": 0.26810208941208036, "grad_norm": 0.14387444150300663, "learning_rate": 2e-05, "loss": 5.4612, "step": 3997 }, { "epoch": 0.2681691652413053, "grad_norm": 0.1528660580646947, "learning_rate": 2e-05, "loss": 5.4257, "step": 3998 }, { "epoch": 0.26823624107053023, "grad_norm": 0.15016025250543324, "learning_rate": 2e-05, "loss": 5.4457, "step": 3999 }, { "epoch": 0.2683033168997552, "grad_norm": 0.1506383656572807, "learning_rate": 2e-05, "loss": 5.4627, "step": 4000 }, { "epoch": 0.2683703927289801, "grad_norm": 0.1491226530026307, "learning_rate": 2e-05, "loss": 5.3009, "step": 4001 }, { "epoch": 0.26843746855820505, "grad_norm": 0.1479862197980063, "learning_rate": 2e-05, "loss": 5.3096, "step": 4002 }, { "epoch": 0.26850454438743, "grad_norm": 0.15844295410647932, "learning_rate": 2e-05, "loss": 5.3801, "step": 4003 }, { "epoch": 0.2685716202166549, "grad_norm": 0.14948410190571407, "learning_rate": 2e-05, "loss": 5.4065, "step": 4004 }, { "epoch": 0.26863869604587987, "grad_norm": 0.15637300431842427, "learning_rate": 2e-05, "loss": 5.3617, "step": 4005 }, { "epoch": 0.2687057718751048, "grad_norm": 0.1651002846933649, "learning_rate": 2e-05, "loss": 5.3597, "step": 4006 }, { "epoch": 0.26877284770432974, "grad_norm": 0.16170135210059888, "learning_rate": 2e-05, "loss": 5.6067, "step": 4007 }, { "epoch": 0.2688399235335547, "grad_norm": 0.1533657588586712, "learning_rate": 2e-05, "loss": 5.4495, "step": 4008 }, { "epoch": 0.2689069993627796, "grad_norm": 0.15426636658665674, "learning_rate": 2e-05, "loss": 5.3782, "step": 4009 }, { "epoch": 0.26897407519200456, "grad_norm": 0.15897235335286908, "learning_rate": 2e-05, "loss": 5.4384, "step": 4010 }, { "epoch": 0.2690411510212295, "grad_norm": 0.1501870374393895, "learning_rate": 2e-05, "loss": 5.48, "step": 4011 }, { "epoch": 0.26910822685045444, "grad_norm": 0.14628722061409957, "learning_rate": 2e-05, "loss": 5.4806, "step": 4012 }, { "epoch": 0.2691753026796794, "grad_norm": 0.15770345305024938, "learning_rate": 2e-05, "loss": 5.5962, "step": 4013 }, { "epoch": 0.2692423785089043, "grad_norm": 0.15924108180482627, "learning_rate": 2e-05, "loss": 5.4535, "step": 4014 }, { "epoch": 0.26930945433812925, "grad_norm": 0.14698488926723588, "learning_rate": 2e-05, "loss": 5.5448, "step": 4015 }, { "epoch": 0.2693765301673542, "grad_norm": 0.15015298849145567, "learning_rate": 2e-05, "loss": 5.3155, "step": 4016 }, { "epoch": 0.2694436059965791, "grad_norm": 0.15647140162074277, "learning_rate": 2e-05, "loss": 5.3404, "step": 4017 }, { "epoch": 0.26951068182580407, "grad_norm": 0.14859888982245759, "learning_rate": 2e-05, "loss": 5.4315, "step": 4018 }, { "epoch": 0.269577757655029, "grad_norm": 0.1436600048186052, "learning_rate": 2e-05, "loss": 5.3825, "step": 4019 }, { "epoch": 0.26964483348425394, "grad_norm": 0.15560455067486184, "learning_rate": 2e-05, "loss": 5.5367, "step": 4020 }, { "epoch": 0.2697119093134789, "grad_norm": 0.15844949643341005, "learning_rate": 2e-05, "loss": 5.3786, "step": 4021 }, { "epoch": 0.2697789851427038, "grad_norm": 0.15122823814188055, "learning_rate": 2e-05, "loss": 5.4912, "step": 4022 }, { "epoch": 0.26984606097192876, "grad_norm": 0.15103928086612023, "learning_rate": 2e-05, "loss": 5.5401, "step": 4023 }, { "epoch": 0.2699131368011537, "grad_norm": 0.156539107872407, "learning_rate": 2e-05, "loss": 5.4523, "step": 4024 }, { "epoch": 0.26998021263037864, "grad_norm": 0.15130195428876642, "learning_rate": 2e-05, "loss": 5.4775, "step": 4025 }, { "epoch": 0.2700472884596036, "grad_norm": 0.14459042634177868, "learning_rate": 2e-05, "loss": 5.2902, "step": 4026 }, { "epoch": 0.2701143642888285, "grad_norm": 0.1491607686163095, "learning_rate": 2e-05, "loss": 5.4352, "step": 4027 }, { "epoch": 0.27018144011805345, "grad_norm": 0.1440402534633965, "learning_rate": 2e-05, "loss": 5.3895, "step": 4028 }, { "epoch": 0.2702485159472784, "grad_norm": 0.15550186736623625, "learning_rate": 2e-05, "loss": 5.3848, "step": 4029 }, { "epoch": 0.27031559177650333, "grad_norm": 0.1395105477728193, "learning_rate": 2e-05, "loss": 5.61, "step": 4030 }, { "epoch": 0.27038266760572827, "grad_norm": 0.14352264799104159, "learning_rate": 2e-05, "loss": 5.3972, "step": 4031 }, { "epoch": 0.2704497434349532, "grad_norm": 0.15306248515528173, "learning_rate": 2e-05, "loss": 5.4519, "step": 4032 }, { "epoch": 0.27051681926417814, "grad_norm": 0.14294084866275805, "learning_rate": 2e-05, "loss": 5.5591, "step": 4033 }, { "epoch": 0.2705838950934031, "grad_norm": 0.1510892460330588, "learning_rate": 2e-05, "loss": 5.4882, "step": 4034 }, { "epoch": 0.270650970922628, "grad_norm": 0.15467593086176307, "learning_rate": 2e-05, "loss": 5.4687, "step": 4035 }, { "epoch": 0.27071804675185296, "grad_norm": 0.1443802659692695, "learning_rate": 2e-05, "loss": 5.4275, "step": 4036 }, { "epoch": 0.2707851225810779, "grad_norm": 0.14109784694533045, "learning_rate": 2e-05, "loss": 5.4305, "step": 4037 }, { "epoch": 0.27085219841030284, "grad_norm": 0.1506743384497489, "learning_rate": 2e-05, "loss": 5.3074, "step": 4038 }, { "epoch": 0.2709192742395278, "grad_norm": 0.14412564676548184, "learning_rate": 2e-05, "loss": 5.5208, "step": 4039 }, { "epoch": 0.2709863500687527, "grad_norm": 0.14359260235394306, "learning_rate": 2e-05, "loss": 5.5417, "step": 4040 }, { "epoch": 0.27105342589797765, "grad_norm": 0.152271357844385, "learning_rate": 2e-05, "loss": 5.3863, "step": 4041 }, { "epoch": 0.2711205017272026, "grad_norm": 0.14231721872606426, "learning_rate": 2e-05, "loss": 5.4674, "step": 4042 }, { "epoch": 0.27118757755642753, "grad_norm": 0.14783462599573524, "learning_rate": 2e-05, "loss": 5.4661, "step": 4043 }, { "epoch": 0.27125465338565247, "grad_norm": 0.15470527384936894, "learning_rate": 2e-05, "loss": 5.5303, "step": 4044 }, { "epoch": 0.2713217292148774, "grad_norm": 0.14160153972229034, "learning_rate": 2e-05, "loss": 5.4565, "step": 4045 }, { "epoch": 0.27138880504410234, "grad_norm": 0.1458510318409033, "learning_rate": 2e-05, "loss": 5.3225, "step": 4046 }, { "epoch": 0.2714558808733273, "grad_norm": 0.14609365537346464, "learning_rate": 2e-05, "loss": 5.3759, "step": 4047 }, { "epoch": 0.2715229567025522, "grad_norm": 0.14356178478901552, "learning_rate": 2e-05, "loss": 5.4336, "step": 4048 }, { "epoch": 0.27159003253177716, "grad_norm": 0.144997692370976, "learning_rate": 2e-05, "loss": 5.4863, "step": 4049 }, { "epoch": 0.2716571083610021, "grad_norm": 0.14748758711363538, "learning_rate": 2e-05, "loss": 5.5075, "step": 4050 }, { "epoch": 0.27172418419022704, "grad_norm": 0.1491829699718845, "learning_rate": 2e-05, "loss": 5.3835, "step": 4051 }, { "epoch": 0.271791260019452, "grad_norm": 0.14877011809843038, "learning_rate": 2e-05, "loss": 5.4923, "step": 4052 }, { "epoch": 0.2718583358486769, "grad_norm": 0.14957604341918115, "learning_rate": 2e-05, "loss": 5.3951, "step": 4053 }, { "epoch": 0.27192541167790185, "grad_norm": 0.1552661196361312, "learning_rate": 2e-05, "loss": 5.5378, "step": 4054 }, { "epoch": 0.2719924875071268, "grad_norm": 0.15146516915767905, "learning_rate": 2e-05, "loss": 5.4808, "step": 4055 }, { "epoch": 0.27205956333635173, "grad_norm": 0.16191816221876068, "learning_rate": 2e-05, "loss": 5.5532, "step": 4056 }, { "epoch": 0.27212663916557667, "grad_norm": 0.1514337350992612, "learning_rate": 2e-05, "loss": 5.45, "step": 4057 }, { "epoch": 0.2721937149948016, "grad_norm": 0.16639592704904713, "learning_rate": 2e-05, "loss": 5.4002, "step": 4058 }, { "epoch": 0.27226079082402654, "grad_norm": 0.14756464890440504, "learning_rate": 2e-05, "loss": 5.4645, "step": 4059 }, { "epoch": 0.2723278666532515, "grad_norm": 0.15539069524109989, "learning_rate": 2e-05, "loss": 5.3893, "step": 4060 }, { "epoch": 0.2723949424824764, "grad_norm": 0.15018571363125266, "learning_rate": 2e-05, "loss": 5.3747, "step": 4061 }, { "epoch": 0.27246201831170136, "grad_norm": 0.16201294288413687, "learning_rate": 2e-05, "loss": 5.5854, "step": 4062 }, { "epoch": 0.2725290941409263, "grad_norm": 0.14649527033602472, "learning_rate": 2e-05, "loss": 5.4915, "step": 4063 }, { "epoch": 0.27259616997015124, "grad_norm": 0.1486480958896805, "learning_rate": 2e-05, "loss": 5.4531, "step": 4064 }, { "epoch": 0.2726632457993762, "grad_norm": 0.1522739555437533, "learning_rate": 2e-05, "loss": 5.4016, "step": 4065 }, { "epoch": 0.2727303216286011, "grad_norm": 0.15556159222965216, "learning_rate": 2e-05, "loss": 5.4615, "step": 4066 }, { "epoch": 0.27279739745782605, "grad_norm": 0.14766064252694125, "learning_rate": 2e-05, "loss": 5.4462, "step": 4067 }, { "epoch": 0.272864473287051, "grad_norm": 0.15345247497740444, "learning_rate": 2e-05, "loss": 5.3887, "step": 4068 }, { "epoch": 0.27293154911627593, "grad_norm": 0.16445379288701506, "learning_rate": 2e-05, "loss": 5.3135, "step": 4069 }, { "epoch": 0.27299862494550087, "grad_norm": 0.14867166578157512, "learning_rate": 2e-05, "loss": 5.4107, "step": 4070 }, { "epoch": 0.2730657007747258, "grad_norm": 0.14348702191429824, "learning_rate": 2e-05, "loss": 5.4058, "step": 4071 }, { "epoch": 0.27313277660395074, "grad_norm": 0.16109184780567992, "learning_rate": 2e-05, "loss": 5.4352, "step": 4072 }, { "epoch": 0.2731998524331757, "grad_norm": 0.15874077859733723, "learning_rate": 2e-05, "loss": 5.4482, "step": 4073 }, { "epoch": 0.2732669282624006, "grad_norm": 0.14926414454299927, "learning_rate": 2e-05, "loss": 5.6299, "step": 4074 }, { "epoch": 0.27333400409162556, "grad_norm": 0.15514703189121695, "learning_rate": 2e-05, "loss": 5.4423, "step": 4075 }, { "epoch": 0.2734010799208505, "grad_norm": 0.15289669340540735, "learning_rate": 2e-05, "loss": 5.496, "step": 4076 }, { "epoch": 0.27346815575007544, "grad_norm": 0.14784179912154802, "learning_rate": 2e-05, "loss": 5.4637, "step": 4077 }, { "epoch": 0.2735352315793004, "grad_norm": 0.15309393763101015, "learning_rate": 2e-05, "loss": 5.584, "step": 4078 }, { "epoch": 0.2736023074085253, "grad_norm": 0.15323639274083803, "learning_rate": 2e-05, "loss": 5.4334, "step": 4079 }, { "epoch": 0.27366938323775025, "grad_norm": 0.14694123643484733, "learning_rate": 2e-05, "loss": 5.4059, "step": 4080 }, { "epoch": 0.2737364590669752, "grad_norm": 0.14819336260368848, "learning_rate": 2e-05, "loss": 5.5959, "step": 4081 }, { "epoch": 0.27380353489620013, "grad_norm": 0.1428748049394505, "learning_rate": 2e-05, "loss": 5.3837, "step": 4082 }, { "epoch": 0.27387061072542507, "grad_norm": 0.15379129208287598, "learning_rate": 2e-05, "loss": 5.3461, "step": 4083 }, { "epoch": 0.27393768655465, "grad_norm": 0.14993411784787034, "learning_rate": 2e-05, "loss": 5.632, "step": 4084 }, { "epoch": 0.27400476238387494, "grad_norm": 0.14324043086721272, "learning_rate": 2e-05, "loss": 5.4773, "step": 4085 }, { "epoch": 0.2740718382130999, "grad_norm": 0.15412434909886946, "learning_rate": 2e-05, "loss": 5.5022, "step": 4086 }, { "epoch": 0.2741389140423248, "grad_norm": 0.14464645574447263, "learning_rate": 2e-05, "loss": 5.5289, "step": 4087 }, { "epoch": 0.27420598987154976, "grad_norm": 0.1525400868062185, "learning_rate": 2e-05, "loss": 5.3105, "step": 4088 }, { "epoch": 0.2742730657007747, "grad_norm": 0.1439768415401476, "learning_rate": 2e-05, "loss": 5.5135, "step": 4089 }, { "epoch": 0.27434014152999964, "grad_norm": 0.15081843220636237, "learning_rate": 2e-05, "loss": 5.3704, "step": 4090 }, { "epoch": 0.27440721735922463, "grad_norm": 0.14582595422154468, "learning_rate": 2e-05, "loss": 5.4494, "step": 4091 }, { "epoch": 0.27447429318844957, "grad_norm": 0.1520888535031454, "learning_rate": 2e-05, "loss": 5.4683, "step": 4092 }, { "epoch": 0.2745413690176745, "grad_norm": 0.1443313101156938, "learning_rate": 2e-05, "loss": 5.5678, "step": 4093 }, { "epoch": 0.27460844484689945, "grad_norm": 0.14869574062537894, "learning_rate": 2e-05, "loss": 5.537, "step": 4094 }, { "epoch": 0.2746755206761244, "grad_norm": 0.15064387718796454, "learning_rate": 2e-05, "loss": 5.4022, "step": 4095 }, { "epoch": 0.2747425965053493, "grad_norm": 0.1456645371340093, "learning_rate": 2e-05, "loss": 5.4509, "step": 4096 }, { "epoch": 0.27480967233457426, "grad_norm": 0.1486928354832347, "learning_rate": 2e-05, "loss": 5.4929, "step": 4097 }, { "epoch": 0.2748767481637992, "grad_norm": 0.1522355285410282, "learning_rate": 2e-05, "loss": 5.314, "step": 4098 }, { "epoch": 0.27494382399302414, "grad_norm": 0.15467220232952622, "learning_rate": 2e-05, "loss": 5.3424, "step": 4099 }, { "epoch": 0.2750108998222491, "grad_norm": 0.1501567214299044, "learning_rate": 2e-05, "loss": 5.455, "step": 4100 }, { "epoch": 0.275077975651474, "grad_norm": 0.15733114066295525, "learning_rate": 2e-05, "loss": 5.6115, "step": 4101 }, { "epoch": 0.27514505148069895, "grad_norm": 0.15180588695206082, "learning_rate": 2e-05, "loss": 5.2306, "step": 4102 }, { "epoch": 0.2752121273099239, "grad_norm": 0.15727496676538766, "learning_rate": 2e-05, "loss": 5.5279, "step": 4103 }, { "epoch": 0.27527920313914883, "grad_norm": 0.1544517405112958, "learning_rate": 2e-05, "loss": 5.4355, "step": 4104 }, { "epoch": 0.27534627896837377, "grad_norm": 0.1598482361541539, "learning_rate": 2e-05, "loss": 5.4472, "step": 4105 }, { "epoch": 0.2754133547975987, "grad_norm": 0.15582340920433344, "learning_rate": 2e-05, "loss": 5.369, "step": 4106 }, { "epoch": 0.27548043062682365, "grad_norm": 0.1475449891490308, "learning_rate": 2e-05, "loss": 5.4543, "step": 4107 }, { "epoch": 0.2755475064560486, "grad_norm": 0.16543471690675432, "learning_rate": 2e-05, "loss": 5.5459, "step": 4108 }, { "epoch": 0.2756145822852735, "grad_norm": 0.16244288708880583, "learning_rate": 2e-05, "loss": 5.3778, "step": 4109 }, { "epoch": 0.27568165811449846, "grad_norm": 0.15532901434566232, "learning_rate": 2e-05, "loss": 5.541, "step": 4110 }, { "epoch": 0.2757487339437234, "grad_norm": 0.15138743205016916, "learning_rate": 2e-05, "loss": 5.5883, "step": 4111 }, { "epoch": 0.27581580977294834, "grad_norm": 0.15812695668384347, "learning_rate": 2e-05, "loss": 5.3698, "step": 4112 }, { "epoch": 0.2758828856021733, "grad_norm": 0.15152618403766843, "learning_rate": 2e-05, "loss": 5.4902, "step": 4113 }, { "epoch": 0.2759499614313982, "grad_norm": 0.1589672420581012, "learning_rate": 2e-05, "loss": 5.5266, "step": 4114 }, { "epoch": 0.27601703726062315, "grad_norm": 0.16761580538270693, "learning_rate": 2e-05, "loss": 5.4443, "step": 4115 }, { "epoch": 0.2760841130898481, "grad_norm": 0.15982646374919612, "learning_rate": 2e-05, "loss": 5.624, "step": 4116 }, { "epoch": 0.27615118891907303, "grad_norm": 0.14601158824396357, "learning_rate": 2e-05, "loss": 5.4237, "step": 4117 }, { "epoch": 0.27621826474829797, "grad_norm": 0.15759836363620652, "learning_rate": 2e-05, "loss": 5.4242, "step": 4118 }, { "epoch": 0.2762853405775229, "grad_norm": 0.1628020328155809, "learning_rate": 2e-05, "loss": 5.3557, "step": 4119 }, { "epoch": 0.27635241640674785, "grad_norm": 0.1536032641970959, "learning_rate": 2e-05, "loss": 5.3027, "step": 4120 }, { "epoch": 0.2764194922359728, "grad_norm": 0.15605052472157843, "learning_rate": 2e-05, "loss": 5.4223, "step": 4121 }, { "epoch": 0.2764865680651977, "grad_norm": 0.1820969902861478, "learning_rate": 2e-05, "loss": 5.4095, "step": 4122 }, { "epoch": 0.27655364389442266, "grad_norm": 0.16397926952136221, "learning_rate": 2e-05, "loss": 5.5056, "step": 4123 }, { "epoch": 0.2766207197236476, "grad_norm": 0.14673198283871172, "learning_rate": 2e-05, "loss": 5.5883, "step": 4124 }, { "epoch": 0.27668779555287254, "grad_norm": 0.1640938474772526, "learning_rate": 2e-05, "loss": 5.5054, "step": 4125 }, { "epoch": 0.2767548713820975, "grad_norm": 0.16061381092949495, "learning_rate": 2e-05, "loss": 5.4159, "step": 4126 }, { "epoch": 0.2768219472113224, "grad_norm": 0.14342164030566945, "learning_rate": 2e-05, "loss": 5.4554, "step": 4127 }, { "epoch": 0.27688902304054736, "grad_norm": 0.1559434792775877, "learning_rate": 2e-05, "loss": 5.396, "step": 4128 }, { "epoch": 0.2769560988697723, "grad_norm": 0.15539676962788176, "learning_rate": 2e-05, "loss": 5.3616, "step": 4129 }, { "epoch": 0.27702317469899723, "grad_norm": 0.15433447065500333, "learning_rate": 2e-05, "loss": 5.4342, "step": 4130 }, { "epoch": 0.27709025052822217, "grad_norm": 0.14622631409558381, "learning_rate": 2e-05, "loss": 5.5679, "step": 4131 }, { "epoch": 0.2771573263574471, "grad_norm": 0.15956853347736275, "learning_rate": 2e-05, "loss": 5.4336, "step": 4132 }, { "epoch": 0.27722440218667205, "grad_norm": 0.1615426320250839, "learning_rate": 2e-05, "loss": 5.3634, "step": 4133 }, { "epoch": 0.277291478015897, "grad_norm": 0.15208850531064724, "learning_rate": 2e-05, "loss": 5.4281, "step": 4134 }, { "epoch": 0.2773585538451219, "grad_norm": 0.14830541811032244, "learning_rate": 2e-05, "loss": 5.3906, "step": 4135 }, { "epoch": 0.27742562967434686, "grad_norm": 0.15949689886700133, "learning_rate": 2e-05, "loss": 5.3592, "step": 4136 }, { "epoch": 0.2774927055035718, "grad_norm": 0.14721716640531549, "learning_rate": 2e-05, "loss": 5.4507, "step": 4137 }, { "epoch": 0.27755978133279674, "grad_norm": 0.1477403648455687, "learning_rate": 2e-05, "loss": 5.34, "step": 4138 }, { "epoch": 0.2776268571620217, "grad_norm": 0.1539807589677248, "learning_rate": 2e-05, "loss": 5.5452, "step": 4139 }, { "epoch": 0.2776939329912466, "grad_norm": 0.16099008450939825, "learning_rate": 2e-05, "loss": 5.6494, "step": 4140 }, { "epoch": 0.27776100882047156, "grad_norm": 0.14834040413891822, "learning_rate": 2e-05, "loss": 5.5821, "step": 4141 }, { "epoch": 0.2778280846496965, "grad_norm": 0.1545181555443981, "learning_rate": 2e-05, "loss": 5.4691, "step": 4142 }, { "epoch": 0.27789516047892143, "grad_norm": 0.14912546129805215, "learning_rate": 2e-05, "loss": 5.5659, "step": 4143 }, { "epoch": 0.27796223630814637, "grad_norm": 0.14288639505888356, "learning_rate": 2e-05, "loss": 5.4708, "step": 4144 }, { "epoch": 0.2780293121373713, "grad_norm": 0.15450336006430415, "learning_rate": 2e-05, "loss": 5.3666, "step": 4145 }, { "epoch": 0.27809638796659625, "grad_norm": 0.14996483340793465, "learning_rate": 2e-05, "loss": 5.4091, "step": 4146 }, { "epoch": 0.2781634637958212, "grad_norm": 0.14761261997075417, "learning_rate": 2e-05, "loss": 5.4636, "step": 4147 }, { "epoch": 0.2782305396250461, "grad_norm": 0.14381956413504757, "learning_rate": 2e-05, "loss": 5.4845, "step": 4148 }, { "epoch": 0.27829761545427106, "grad_norm": 0.14300703638995454, "learning_rate": 2e-05, "loss": 5.416, "step": 4149 }, { "epoch": 0.278364691283496, "grad_norm": 0.14622953558546262, "learning_rate": 2e-05, "loss": 5.5719, "step": 4150 }, { "epoch": 0.27843176711272094, "grad_norm": 0.14806251125801995, "learning_rate": 2e-05, "loss": 5.5814, "step": 4151 }, { "epoch": 0.2784988429419459, "grad_norm": 0.14259532621084495, "learning_rate": 2e-05, "loss": 5.4264, "step": 4152 }, { "epoch": 0.2785659187711708, "grad_norm": 0.14235747638672733, "learning_rate": 2e-05, "loss": 5.3471, "step": 4153 }, { "epoch": 0.27863299460039576, "grad_norm": 0.141832544719108, "learning_rate": 2e-05, "loss": 5.3878, "step": 4154 }, { "epoch": 0.2787000704296207, "grad_norm": 0.14408348193020845, "learning_rate": 2e-05, "loss": 5.4368, "step": 4155 }, { "epoch": 0.27876714625884563, "grad_norm": 0.14226990978082277, "learning_rate": 2e-05, "loss": 5.4957, "step": 4156 }, { "epoch": 0.27883422208807057, "grad_norm": 0.14218932570966877, "learning_rate": 2e-05, "loss": 5.5188, "step": 4157 }, { "epoch": 0.2789012979172955, "grad_norm": 0.14377661241677933, "learning_rate": 2e-05, "loss": 5.4219, "step": 4158 }, { "epoch": 0.27896837374652045, "grad_norm": 0.1456146730981462, "learning_rate": 2e-05, "loss": 5.4299, "step": 4159 }, { "epoch": 0.2790354495757454, "grad_norm": 0.14671007785539272, "learning_rate": 2e-05, "loss": 5.469, "step": 4160 }, { "epoch": 0.2791025254049703, "grad_norm": 0.14948237138971202, "learning_rate": 2e-05, "loss": 5.4916, "step": 4161 }, { "epoch": 0.27916960123419526, "grad_norm": 0.1468236459499631, "learning_rate": 2e-05, "loss": 5.4853, "step": 4162 }, { "epoch": 0.2792366770634202, "grad_norm": 0.15905936796499737, "learning_rate": 2e-05, "loss": 5.5655, "step": 4163 }, { "epoch": 0.27930375289264514, "grad_norm": 0.15467165982750114, "learning_rate": 2e-05, "loss": 5.3729, "step": 4164 }, { "epoch": 0.2793708287218701, "grad_norm": 0.15755806898519642, "learning_rate": 2e-05, "loss": 5.4365, "step": 4165 }, { "epoch": 0.279437904551095, "grad_norm": 0.15428913035791528, "learning_rate": 2e-05, "loss": 5.508, "step": 4166 }, { "epoch": 0.27950498038031996, "grad_norm": 0.14857360779200088, "learning_rate": 2e-05, "loss": 5.3467, "step": 4167 }, { "epoch": 0.2795720562095449, "grad_norm": 0.1564175276209204, "learning_rate": 2e-05, "loss": 5.4481, "step": 4168 }, { "epoch": 0.27963913203876983, "grad_norm": 0.15514158477316717, "learning_rate": 2e-05, "loss": 5.3002, "step": 4169 }, { "epoch": 0.27970620786799477, "grad_norm": 0.14790686312905987, "learning_rate": 2e-05, "loss": 5.4431, "step": 4170 }, { "epoch": 0.2797732836972197, "grad_norm": 0.15365544574792472, "learning_rate": 2e-05, "loss": 5.3262, "step": 4171 }, { "epoch": 0.27984035952644465, "grad_norm": 0.15290492268639044, "learning_rate": 2e-05, "loss": 5.4973, "step": 4172 }, { "epoch": 0.2799074353556696, "grad_norm": 0.14033138523962047, "learning_rate": 2e-05, "loss": 5.3542, "step": 4173 }, { "epoch": 0.2799745111848945, "grad_norm": 0.15712957055940224, "learning_rate": 2e-05, "loss": 5.3513, "step": 4174 }, { "epoch": 0.28004158701411946, "grad_norm": 0.14290999720345637, "learning_rate": 2e-05, "loss": 5.3529, "step": 4175 }, { "epoch": 0.2801086628433444, "grad_norm": 0.1488015984379451, "learning_rate": 2e-05, "loss": 5.606, "step": 4176 }, { "epoch": 0.28017573867256934, "grad_norm": 0.14622815311354515, "learning_rate": 2e-05, "loss": 5.3576, "step": 4177 }, { "epoch": 0.2802428145017943, "grad_norm": 0.1568594891759342, "learning_rate": 2e-05, "loss": 5.349, "step": 4178 }, { "epoch": 0.2803098903310192, "grad_norm": 0.14731535138858792, "learning_rate": 2e-05, "loss": 5.397, "step": 4179 }, { "epoch": 0.28037696616024416, "grad_norm": 0.1467484556578845, "learning_rate": 2e-05, "loss": 5.3449, "step": 4180 }, { "epoch": 0.2804440419894691, "grad_norm": 0.15275502248270334, "learning_rate": 2e-05, "loss": 5.4738, "step": 4181 }, { "epoch": 0.28051111781869403, "grad_norm": 0.14334141755914057, "learning_rate": 2e-05, "loss": 5.3698, "step": 4182 }, { "epoch": 0.28057819364791897, "grad_norm": 0.13882716663401373, "learning_rate": 2e-05, "loss": 5.4051, "step": 4183 }, { "epoch": 0.2806452694771439, "grad_norm": 0.15564568447183752, "learning_rate": 2e-05, "loss": 5.459, "step": 4184 }, { "epoch": 0.28071234530636885, "grad_norm": 0.1500014552308296, "learning_rate": 2e-05, "loss": 5.4165, "step": 4185 }, { "epoch": 0.2807794211355938, "grad_norm": 0.15189539813154015, "learning_rate": 2e-05, "loss": 5.3912, "step": 4186 }, { "epoch": 0.2808464969648187, "grad_norm": 0.14451676195304103, "learning_rate": 2e-05, "loss": 5.4254, "step": 4187 }, { "epoch": 0.28091357279404366, "grad_norm": 0.14034127385671746, "learning_rate": 2e-05, "loss": 5.4794, "step": 4188 }, { "epoch": 0.2809806486232686, "grad_norm": 0.14902262907837083, "learning_rate": 2e-05, "loss": 5.427, "step": 4189 }, { "epoch": 0.28104772445249354, "grad_norm": 0.14314775836058813, "learning_rate": 2e-05, "loss": 5.4628, "step": 4190 }, { "epoch": 0.2811148002817185, "grad_norm": 0.15074886905597892, "learning_rate": 2e-05, "loss": 5.535, "step": 4191 }, { "epoch": 0.2811818761109434, "grad_norm": 0.14782900024066162, "learning_rate": 2e-05, "loss": 5.3076, "step": 4192 }, { "epoch": 0.28124895194016836, "grad_norm": 0.14696032416589855, "learning_rate": 2e-05, "loss": 5.3914, "step": 4193 }, { "epoch": 0.2813160277693933, "grad_norm": 0.14215574163411482, "learning_rate": 2e-05, "loss": 5.514, "step": 4194 }, { "epoch": 0.28138310359861823, "grad_norm": 0.1474656141750625, "learning_rate": 2e-05, "loss": 5.3927, "step": 4195 }, { "epoch": 0.2814501794278432, "grad_norm": 0.14574274058110845, "learning_rate": 2e-05, "loss": 5.3956, "step": 4196 }, { "epoch": 0.2815172552570681, "grad_norm": 0.1401248331516252, "learning_rate": 2e-05, "loss": 5.5348, "step": 4197 }, { "epoch": 0.28158433108629305, "grad_norm": 0.14959442910966878, "learning_rate": 2e-05, "loss": 5.452, "step": 4198 }, { "epoch": 0.281651406915518, "grad_norm": 0.15403469901247946, "learning_rate": 2e-05, "loss": 5.5064, "step": 4199 }, { "epoch": 0.2817184827447429, "grad_norm": 0.14827839395139344, "learning_rate": 2e-05, "loss": 5.3541, "step": 4200 }, { "epoch": 0.28178555857396786, "grad_norm": 0.1383740848699618, "learning_rate": 2e-05, "loss": 5.3293, "step": 4201 }, { "epoch": 0.2818526344031928, "grad_norm": 0.1499252854927373, "learning_rate": 2e-05, "loss": 5.4775, "step": 4202 }, { "epoch": 0.28191971023241774, "grad_norm": 0.1554792443971762, "learning_rate": 2e-05, "loss": 5.3921, "step": 4203 }, { "epoch": 0.2819867860616427, "grad_norm": 0.14638100891734543, "learning_rate": 2e-05, "loss": 5.4626, "step": 4204 }, { "epoch": 0.2820538618908676, "grad_norm": 0.14666548034922644, "learning_rate": 2e-05, "loss": 5.328, "step": 4205 }, { "epoch": 0.28212093772009256, "grad_norm": 0.14884032752229057, "learning_rate": 2e-05, "loss": 5.2952, "step": 4206 }, { "epoch": 0.2821880135493175, "grad_norm": 0.14510826304858915, "learning_rate": 2e-05, "loss": 5.4527, "step": 4207 }, { "epoch": 0.28225508937854243, "grad_norm": 0.15595811184824168, "learning_rate": 2e-05, "loss": 5.618, "step": 4208 }, { "epoch": 0.2823221652077674, "grad_norm": 0.16191743973650896, "learning_rate": 2e-05, "loss": 5.3533, "step": 4209 }, { "epoch": 0.2823892410369923, "grad_norm": 0.15258227415808104, "learning_rate": 2e-05, "loss": 5.4655, "step": 4210 }, { "epoch": 0.28245631686621725, "grad_norm": 0.14851336250865804, "learning_rate": 2e-05, "loss": 5.3805, "step": 4211 }, { "epoch": 0.2825233926954422, "grad_norm": 0.1647521698813511, "learning_rate": 2e-05, "loss": 5.4848, "step": 4212 }, { "epoch": 0.2825904685246671, "grad_norm": 0.1517039055581649, "learning_rate": 2e-05, "loss": 5.3001, "step": 4213 }, { "epoch": 0.28265754435389207, "grad_norm": 0.1475924345944766, "learning_rate": 2e-05, "loss": 5.4462, "step": 4214 }, { "epoch": 0.282724620183117, "grad_norm": 0.15120550351234685, "learning_rate": 2e-05, "loss": 5.5405, "step": 4215 }, { "epoch": 0.28279169601234194, "grad_norm": 0.1690886397091709, "learning_rate": 2e-05, "loss": 5.4468, "step": 4216 }, { "epoch": 0.2828587718415669, "grad_norm": 0.14657669637677134, "learning_rate": 2e-05, "loss": 5.4542, "step": 4217 }, { "epoch": 0.2829258476707918, "grad_norm": 0.16140914921294114, "learning_rate": 2e-05, "loss": 5.293, "step": 4218 }, { "epoch": 0.28299292350001676, "grad_norm": 0.1528805094124935, "learning_rate": 2e-05, "loss": 5.6173, "step": 4219 }, { "epoch": 0.2830599993292417, "grad_norm": 0.1447067373184129, "learning_rate": 2e-05, "loss": 5.3825, "step": 4220 }, { "epoch": 0.28312707515846663, "grad_norm": 0.15762814207113207, "learning_rate": 2e-05, "loss": 5.2724, "step": 4221 }, { "epoch": 0.2831941509876916, "grad_norm": 0.1511915831229116, "learning_rate": 2e-05, "loss": 5.4543, "step": 4222 }, { "epoch": 0.2832612268169165, "grad_norm": 0.15398291091200844, "learning_rate": 2e-05, "loss": 5.4624, "step": 4223 }, { "epoch": 0.28332830264614145, "grad_norm": 0.15428083126602, "learning_rate": 2e-05, "loss": 5.3306, "step": 4224 }, { "epoch": 0.2833953784753664, "grad_norm": 0.15369427482719158, "learning_rate": 2e-05, "loss": 5.4152, "step": 4225 }, { "epoch": 0.2834624543045913, "grad_norm": 0.15025278925108435, "learning_rate": 2e-05, "loss": 5.428, "step": 4226 }, { "epoch": 0.28352953013381627, "grad_norm": 0.14402203189121326, "learning_rate": 2e-05, "loss": 5.4837, "step": 4227 }, { "epoch": 0.2835966059630412, "grad_norm": 0.15624450031401624, "learning_rate": 2e-05, "loss": 5.513, "step": 4228 }, { "epoch": 0.28366368179226614, "grad_norm": 0.15845192321540835, "learning_rate": 2e-05, "loss": 5.5336, "step": 4229 }, { "epoch": 0.2837307576214911, "grad_norm": 0.15069380865015877, "learning_rate": 2e-05, "loss": 5.522, "step": 4230 }, { "epoch": 0.283797833450716, "grad_norm": 0.15741295650484055, "learning_rate": 2e-05, "loss": 5.4564, "step": 4231 }, { "epoch": 0.28386490927994096, "grad_norm": 0.15550182829337533, "learning_rate": 2e-05, "loss": 5.4777, "step": 4232 }, { "epoch": 0.2839319851091659, "grad_norm": 0.1462831659853542, "learning_rate": 2e-05, "loss": 5.3358, "step": 4233 }, { "epoch": 0.28399906093839083, "grad_norm": 0.14953505529372607, "learning_rate": 2e-05, "loss": 5.4687, "step": 4234 }, { "epoch": 0.2840661367676158, "grad_norm": 0.14770102032657817, "learning_rate": 2e-05, "loss": 5.4371, "step": 4235 }, { "epoch": 0.2841332125968407, "grad_norm": 0.1460947997895811, "learning_rate": 2e-05, "loss": 5.4132, "step": 4236 }, { "epoch": 0.28420028842606565, "grad_norm": 0.14641119995317137, "learning_rate": 2e-05, "loss": 5.3903, "step": 4237 }, { "epoch": 0.2842673642552906, "grad_norm": 0.1465546840599459, "learning_rate": 2e-05, "loss": 5.3945, "step": 4238 }, { "epoch": 0.2843344400845155, "grad_norm": 0.14470492581411526, "learning_rate": 2e-05, "loss": 5.4289, "step": 4239 }, { "epoch": 0.28440151591374047, "grad_norm": 0.14902268564176963, "learning_rate": 2e-05, "loss": 5.4603, "step": 4240 }, { "epoch": 0.2844685917429654, "grad_norm": 0.13940256052779812, "learning_rate": 2e-05, "loss": 5.3546, "step": 4241 }, { "epoch": 0.28453566757219034, "grad_norm": 0.13963924675026573, "learning_rate": 2e-05, "loss": 5.423, "step": 4242 }, { "epoch": 0.2846027434014153, "grad_norm": 0.1398212103324495, "learning_rate": 2e-05, "loss": 5.4062, "step": 4243 }, { "epoch": 0.2846698192306402, "grad_norm": 0.1545992844861654, "learning_rate": 2e-05, "loss": 5.5085, "step": 4244 }, { "epoch": 0.28473689505986516, "grad_norm": 0.1431345672617432, "learning_rate": 2e-05, "loss": 5.4896, "step": 4245 }, { "epoch": 0.2848039708890901, "grad_norm": 0.1524796085236813, "learning_rate": 2e-05, "loss": 5.346, "step": 4246 }, { "epoch": 0.28487104671831504, "grad_norm": 0.15297949604255817, "learning_rate": 2e-05, "loss": 5.5322, "step": 4247 }, { "epoch": 0.28493812254754, "grad_norm": 0.14092259222754722, "learning_rate": 2e-05, "loss": 5.3634, "step": 4248 }, { "epoch": 0.2850051983767649, "grad_norm": 0.14555791032461388, "learning_rate": 2e-05, "loss": 5.5858, "step": 4249 }, { "epoch": 0.28507227420598985, "grad_norm": 0.14154241573589404, "learning_rate": 2e-05, "loss": 5.3594, "step": 4250 }, { "epoch": 0.2851393500352148, "grad_norm": 0.1466775146157266, "learning_rate": 2e-05, "loss": 5.5421, "step": 4251 }, { "epoch": 0.2852064258644397, "grad_norm": 0.14014761227278524, "learning_rate": 2e-05, "loss": 5.3744, "step": 4252 }, { "epoch": 0.28527350169366467, "grad_norm": 0.15173824257328689, "learning_rate": 2e-05, "loss": 5.279, "step": 4253 }, { "epoch": 0.2853405775228896, "grad_norm": 0.14251731130802656, "learning_rate": 2e-05, "loss": 5.4049, "step": 4254 }, { "epoch": 0.28540765335211454, "grad_norm": 0.1455381096664899, "learning_rate": 2e-05, "loss": 5.5024, "step": 4255 }, { "epoch": 0.2854747291813395, "grad_norm": 0.1427573540339378, "learning_rate": 2e-05, "loss": 5.4839, "step": 4256 }, { "epoch": 0.2855418050105644, "grad_norm": 0.15045251391984651, "learning_rate": 2e-05, "loss": 5.4157, "step": 4257 }, { "epoch": 0.28560888083978936, "grad_norm": 0.14435715512928363, "learning_rate": 2e-05, "loss": 5.4799, "step": 4258 }, { "epoch": 0.2856759566690143, "grad_norm": 0.1455162445726727, "learning_rate": 2e-05, "loss": 5.4887, "step": 4259 }, { "epoch": 0.28574303249823924, "grad_norm": 0.1424067586851983, "learning_rate": 2e-05, "loss": 5.4017, "step": 4260 }, { "epoch": 0.2858101083274642, "grad_norm": 0.15221837115703768, "learning_rate": 2e-05, "loss": 5.4975, "step": 4261 }, { "epoch": 0.2858771841566891, "grad_norm": 0.14696557163057142, "learning_rate": 2e-05, "loss": 5.4884, "step": 4262 }, { "epoch": 0.28594425998591405, "grad_norm": 0.14550119046362278, "learning_rate": 2e-05, "loss": 5.4121, "step": 4263 }, { "epoch": 0.286011335815139, "grad_norm": 0.14298297968219814, "learning_rate": 2e-05, "loss": 5.4657, "step": 4264 }, { "epoch": 0.28607841164436393, "grad_norm": 0.1532524102246561, "learning_rate": 2e-05, "loss": 5.3701, "step": 4265 }, { "epoch": 0.28614548747358887, "grad_norm": 0.14283410644476482, "learning_rate": 2e-05, "loss": 5.3352, "step": 4266 }, { "epoch": 0.2862125633028138, "grad_norm": 0.144203414744147, "learning_rate": 2e-05, "loss": 5.4735, "step": 4267 }, { "epoch": 0.28627963913203874, "grad_norm": 0.14729842141463267, "learning_rate": 2e-05, "loss": 5.4758, "step": 4268 }, { "epoch": 0.2863467149612637, "grad_norm": 0.14541994368856737, "learning_rate": 2e-05, "loss": 5.5883, "step": 4269 }, { "epoch": 0.2864137907904886, "grad_norm": 0.1437541539115959, "learning_rate": 2e-05, "loss": 5.2122, "step": 4270 }, { "epoch": 0.28648086661971356, "grad_norm": 0.14930893513950666, "learning_rate": 2e-05, "loss": 5.3754, "step": 4271 }, { "epoch": 0.2865479424489385, "grad_norm": 0.1494631918432986, "learning_rate": 2e-05, "loss": 5.4587, "step": 4272 }, { "epoch": 0.2866150182781635, "grad_norm": 0.15222266826343825, "learning_rate": 2e-05, "loss": 5.5045, "step": 4273 }, { "epoch": 0.28668209410738843, "grad_norm": 0.14718894360367596, "learning_rate": 2e-05, "loss": 5.6169, "step": 4274 }, { "epoch": 0.28674916993661337, "grad_norm": 0.14977024941949604, "learning_rate": 2e-05, "loss": 5.5523, "step": 4275 }, { "epoch": 0.2868162457658383, "grad_norm": 0.14617614944006752, "learning_rate": 2e-05, "loss": 5.5, "step": 4276 }, { "epoch": 0.28688332159506325, "grad_norm": 0.1396768816950971, "learning_rate": 2e-05, "loss": 5.3594, "step": 4277 }, { "epoch": 0.2869503974242882, "grad_norm": 0.1417514557075887, "learning_rate": 2e-05, "loss": 5.3836, "step": 4278 }, { "epoch": 0.2870174732535131, "grad_norm": 0.15002992399542392, "learning_rate": 2e-05, "loss": 5.4586, "step": 4279 }, { "epoch": 0.28708454908273806, "grad_norm": 0.1555813753189421, "learning_rate": 2e-05, "loss": 5.562, "step": 4280 }, { "epoch": 0.287151624911963, "grad_norm": 0.14252335442648353, "learning_rate": 2e-05, "loss": 5.4556, "step": 4281 }, { "epoch": 0.28721870074118794, "grad_norm": 0.14445060486795183, "learning_rate": 2e-05, "loss": 5.2581, "step": 4282 }, { "epoch": 0.2872857765704129, "grad_norm": 0.1520681897236542, "learning_rate": 2e-05, "loss": 5.4272, "step": 4283 }, { "epoch": 0.2873528523996378, "grad_norm": 0.14807937495954765, "learning_rate": 2e-05, "loss": 5.3824, "step": 4284 }, { "epoch": 0.28741992822886275, "grad_norm": 0.1449371621135098, "learning_rate": 2e-05, "loss": 5.4415, "step": 4285 }, { "epoch": 0.2874870040580877, "grad_norm": 0.1428422126491746, "learning_rate": 2e-05, "loss": 5.4085, "step": 4286 }, { "epoch": 0.28755407988731263, "grad_norm": 0.15344917760474994, "learning_rate": 2e-05, "loss": 5.3433, "step": 4287 }, { "epoch": 0.28762115571653757, "grad_norm": 0.15105761832320116, "learning_rate": 2e-05, "loss": 5.4843, "step": 4288 }, { "epoch": 0.2876882315457625, "grad_norm": 0.14724558691720097, "learning_rate": 2e-05, "loss": 5.3819, "step": 4289 }, { "epoch": 0.28775530737498745, "grad_norm": 0.14816770269809068, "learning_rate": 2e-05, "loss": 5.4391, "step": 4290 }, { "epoch": 0.2878223832042124, "grad_norm": 0.15337850201303138, "learning_rate": 2e-05, "loss": 5.4682, "step": 4291 }, { "epoch": 0.2878894590334373, "grad_norm": 0.14874407320366576, "learning_rate": 2e-05, "loss": 5.4108, "step": 4292 }, { "epoch": 0.28795653486266226, "grad_norm": 0.15238311601592341, "learning_rate": 2e-05, "loss": 5.4416, "step": 4293 }, { "epoch": 0.2880236106918872, "grad_norm": 0.1458882285462947, "learning_rate": 2e-05, "loss": 5.552, "step": 4294 }, { "epoch": 0.28809068652111214, "grad_norm": 0.1642624077157309, "learning_rate": 2e-05, "loss": 5.4825, "step": 4295 }, { "epoch": 0.2881577623503371, "grad_norm": 0.15529732770382726, "learning_rate": 2e-05, "loss": 5.5949, "step": 4296 }, { "epoch": 0.288224838179562, "grad_norm": 0.15024288125546403, "learning_rate": 2e-05, "loss": 5.3361, "step": 4297 }, { "epoch": 0.28829191400878695, "grad_norm": 0.15819722878698475, "learning_rate": 2e-05, "loss": 5.4869, "step": 4298 }, { "epoch": 0.2883589898380119, "grad_norm": 0.15345583034394156, "learning_rate": 2e-05, "loss": 5.5279, "step": 4299 }, { "epoch": 0.28842606566723683, "grad_norm": 0.14381233987527614, "learning_rate": 2e-05, "loss": 5.4576, "step": 4300 }, { "epoch": 0.28849314149646177, "grad_norm": 0.14615054076258546, "learning_rate": 2e-05, "loss": 5.4926, "step": 4301 }, { "epoch": 0.2885602173256867, "grad_norm": 0.14183682211588364, "learning_rate": 2e-05, "loss": 5.3402, "step": 4302 }, { "epoch": 0.28862729315491165, "grad_norm": 0.14717642058018157, "learning_rate": 2e-05, "loss": 5.4094, "step": 4303 }, { "epoch": 0.2886943689841366, "grad_norm": 0.14605156989785198, "learning_rate": 2e-05, "loss": 5.4023, "step": 4304 }, { "epoch": 0.2887614448133615, "grad_norm": 0.14247841949594403, "learning_rate": 2e-05, "loss": 5.4223, "step": 4305 }, { "epoch": 0.28882852064258646, "grad_norm": 0.15054617023011208, "learning_rate": 2e-05, "loss": 5.358, "step": 4306 }, { "epoch": 0.2888955964718114, "grad_norm": 0.14439270322498457, "learning_rate": 2e-05, "loss": 5.4692, "step": 4307 }, { "epoch": 0.28896267230103634, "grad_norm": 0.1449675409269972, "learning_rate": 2e-05, "loss": 5.3064, "step": 4308 }, { "epoch": 0.2890297481302613, "grad_norm": 0.14267879918924425, "learning_rate": 2e-05, "loss": 5.4579, "step": 4309 }, { "epoch": 0.2890968239594862, "grad_norm": 0.14485250689728393, "learning_rate": 2e-05, "loss": 5.4665, "step": 4310 }, { "epoch": 0.28916389978871115, "grad_norm": 0.1481834686438, "learning_rate": 2e-05, "loss": 5.5298, "step": 4311 }, { "epoch": 0.2892309756179361, "grad_norm": 0.14747414129523978, "learning_rate": 2e-05, "loss": 5.4527, "step": 4312 }, { "epoch": 0.28929805144716103, "grad_norm": 0.14801995445269878, "learning_rate": 2e-05, "loss": 5.4542, "step": 4313 }, { "epoch": 0.28936512727638597, "grad_norm": 0.15289396494609864, "learning_rate": 2e-05, "loss": 5.4983, "step": 4314 }, { "epoch": 0.2894322031056109, "grad_norm": 0.14445784161868983, "learning_rate": 2e-05, "loss": 5.4533, "step": 4315 }, { "epoch": 0.28949927893483585, "grad_norm": 0.155576578738933, "learning_rate": 2e-05, "loss": 5.3076, "step": 4316 }, { "epoch": 0.2895663547640608, "grad_norm": 0.14699644009880616, "learning_rate": 2e-05, "loss": 5.4229, "step": 4317 }, { "epoch": 0.2896334305932857, "grad_norm": 0.1488365524057276, "learning_rate": 2e-05, "loss": 5.4691, "step": 4318 }, { "epoch": 0.28970050642251066, "grad_norm": 0.15108991112734016, "learning_rate": 2e-05, "loss": 5.2401, "step": 4319 }, { "epoch": 0.2897675822517356, "grad_norm": 0.14426307248364983, "learning_rate": 2e-05, "loss": 5.6304, "step": 4320 }, { "epoch": 0.28983465808096054, "grad_norm": 0.15533068273241857, "learning_rate": 2e-05, "loss": 5.4293, "step": 4321 }, { "epoch": 0.2899017339101855, "grad_norm": 0.1427738767420857, "learning_rate": 2e-05, "loss": 5.4394, "step": 4322 }, { "epoch": 0.2899688097394104, "grad_norm": 0.14284822360424737, "learning_rate": 2e-05, "loss": 5.3493, "step": 4323 }, { "epoch": 0.29003588556863535, "grad_norm": 0.148833307756587, "learning_rate": 2e-05, "loss": 5.4584, "step": 4324 }, { "epoch": 0.2901029613978603, "grad_norm": 0.14503416008537315, "learning_rate": 2e-05, "loss": 5.4992, "step": 4325 }, { "epoch": 0.29017003722708523, "grad_norm": 0.14563257658197198, "learning_rate": 2e-05, "loss": 5.514, "step": 4326 }, { "epoch": 0.29023711305631017, "grad_norm": 0.14011796899937717, "learning_rate": 2e-05, "loss": 5.4515, "step": 4327 }, { "epoch": 0.2903041888855351, "grad_norm": 0.1488081468341025, "learning_rate": 2e-05, "loss": 5.5469, "step": 4328 }, { "epoch": 0.29037126471476005, "grad_norm": 0.14467475651196673, "learning_rate": 2e-05, "loss": 5.4835, "step": 4329 }, { "epoch": 0.290438340543985, "grad_norm": 0.14360370321116983, "learning_rate": 2e-05, "loss": 5.5184, "step": 4330 }, { "epoch": 0.2905054163732099, "grad_norm": 0.14624865996482275, "learning_rate": 2e-05, "loss": 5.5744, "step": 4331 }, { "epoch": 0.29057249220243486, "grad_norm": 0.15576076980633363, "learning_rate": 2e-05, "loss": 5.4778, "step": 4332 }, { "epoch": 0.2906395680316598, "grad_norm": 0.14864447880188478, "learning_rate": 2e-05, "loss": 5.4392, "step": 4333 }, { "epoch": 0.29070664386088474, "grad_norm": 0.14156268680209527, "learning_rate": 2e-05, "loss": 5.3782, "step": 4334 }, { "epoch": 0.2907737196901097, "grad_norm": 0.1490225963056267, "learning_rate": 2e-05, "loss": 5.3003, "step": 4335 }, { "epoch": 0.2908407955193346, "grad_norm": 0.1435082157957248, "learning_rate": 2e-05, "loss": 5.36, "step": 4336 }, { "epoch": 0.29090787134855955, "grad_norm": 0.14857276303867534, "learning_rate": 2e-05, "loss": 5.5576, "step": 4337 }, { "epoch": 0.2909749471777845, "grad_norm": 0.14476241511603105, "learning_rate": 2e-05, "loss": 5.433, "step": 4338 }, { "epoch": 0.29104202300700943, "grad_norm": 0.15483432577596914, "learning_rate": 2e-05, "loss": 5.5437, "step": 4339 }, { "epoch": 0.29110909883623437, "grad_norm": 0.14294267323724022, "learning_rate": 2e-05, "loss": 5.4895, "step": 4340 }, { "epoch": 0.2911761746654593, "grad_norm": 0.14182168303428214, "learning_rate": 2e-05, "loss": 5.4269, "step": 4341 }, { "epoch": 0.29124325049468425, "grad_norm": 0.14728199619845228, "learning_rate": 2e-05, "loss": 5.5103, "step": 4342 }, { "epoch": 0.2913103263239092, "grad_norm": 0.14780555936889528, "learning_rate": 2e-05, "loss": 5.3953, "step": 4343 }, { "epoch": 0.2913774021531341, "grad_norm": 0.15540606676039548, "learning_rate": 2e-05, "loss": 5.5326, "step": 4344 }, { "epoch": 0.29144447798235906, "grad_norm": 0.14321255713923634, "learning_rate": 2e-05, "loss": 5.4475, "step": 4345 }, { "epoch": 0.291511553811584, "grad_norm": 0.16422176130238858, "learning_rate": 2e-05, "loss": 5.4024, "step": 4346 }, { "epoch": 0.29157862964080894, "grad_norm": 0.1666693770309578, "learning_rate": 2e-05, "loss": 5.3495, "step": 4347 }, { "epoch": 0.2916457054700339, "grad_norm": 0.14132604925109069, "learning_rate": 2e-05, "loss": 5.4086, "step": 4348 }, { "epoch": 0.2917127812992588, "grad_norm": 0.15400337114408036, "learning_rate": 2e-05, "loss": 5.3539, "step": 4349 }, { "epoch": 0.29177985712848375, "grad_norm": 0.14979018276038622, "learning_rate": 2e-05, "loss": 5.4969, "step": 4350 }, { "epoch": 0.2918469329577087, "grad_norm": 0.15776750933257022, "learning_rate": 2e-05, "loss": 5.4548, "step": 4351 }, { "epoch": 0.29191400878693363, "grad_norm": 0.14681575721973045, "learning_rate": 2e-05, "loss": 5.417, "step": 4352 }, { "epoch": 0.29198108461615857, "grad_norm": 0.15785570703274107, "learning_rate": 2e-05, "loss": 5.5033, "step": 4353 }, { "epoch": 0.2920481604453835, "grad_norm": 0.14917910401799198, "learning_rate": 2e-05, "loss": 5.4511, "step": 4354 }, { "epoch": 0.29211523627460845, "grad_norm": 0.14274659393309294, "learning_rate": 2e-05, "loss": 5.4525, "step": 4355 }, { "epoch": 0.2921823121038334, "grad_norm": 0.15043793573792263, "learning_rate": 2e-05, "loss": 5.4095, "step": 4356 }, { "epoch": 0.2922493879330583, "grad_norm": 0.14459329603168142, "learning_rate": 2e-05, "loss": 5.4744, "step": 4357 }, { "epoch": 0.29231646376228326, "grad_norm": 0.1438452005111967, "learning_rate": 2e-05, "loss": 5.5961, "step": 4358 }, { "epoch": 0.2923835395915082, "grad_norm": 0.15267815843964683, "learning_rate": 2e-05, "loss": 5.4859, "step": 4359 }, { "epoch": 0.29245061542073314, "grad_norm": 0.15317532505140832, "learning_rate": 2e-05, "loss": 5.4174, "step": 4360 }, { "epoch": 0.2925176912499581, "grad_norm": 0.14951602274794937, "learning_rate": 2e-05, "loss": 5.4645, "step": 4361 }, { "epoch": 0.292584767079183, "grad_norm": 0.14418839642863918, "learning_rate": 2e-05, "loss": 5.4448, "step": 4362 }, { "epoch": 0.29265184290840796, "grad_norm": 0.14974391073500276, "learning_rate": 2e-05, "loss": 5.4212, "step": 4363 }, { "epoch": 0.2927189187376329, "grad_norm": 0.1505174566015882, "learning_rate": 2e-05, "loss": 5.4077, "step": 4364 }, { "epoch": 0.29278599456685783, "grad_norm": 0.16085270475530777, "learning_rate": 2e-05, "loss": 5.3752, "step": 4365 }, { "epoch": 0.29285307039608277, "grad_norm": 0.15495109361103707, "learning_rate": 2e-05, "loss": 5.4285, "step": 4366 }, { "epoch": 0.2929201462253077, "grad_norm": 0.14833805759782798, "learning_rate": 2e-05, "loss": 5.4404, "step": 4367 }, { "epoch": 0.29298722205453265, "grad_norm": 0.14115324921952613, "learning_rate": 2e-05, "loss": 5.3789, "step": 4368 }, { "epoch": 0.2930542978837576, "grad_norm": 0.15822603829061954, "learning_rate": 2e-05, "loss": 5.2587, "step": 4369 }, { "epoch": 0.2931213737129825, "grad_norm": 0.15678305989512015, "learning_rate": 2e-05, "loss": 5.4746, "step": 4370 }, { "epoch": 0.29318844954220746, "grad_norm": 0.15166846111621055, "learning_rate": 2e-05, "loss": 5.4235, "step": 4371 }, { "epoch": 0.2932555253714324, "grad_norm": 0.14472523301687565, "learning_rate": 2e-05, "loss": 5.4848, "step": 4372 }, { "epoch": 0.29332260120065734, "grad_norm": 0.16099566881847294, "learning_rate": 2e-05, "loss": 5.4336, "step": 4373 }, { "epoch": 0.2933896770298823, "grad_norm": 0.1401712186724716, "learning_rate": 2e-05, "loss": 5.3825, "step": 4374 }, { "epoch": 0.2934567528591072, "grad_norm": 0.14381719732024206, "learning_rate": 2e-05, "loss": 5.4695, "step": 4375 }, { "epoch": 0.29352382868833216, "grad_norm": 0.1539149795790917, "learning_rate": 2e-05, "loss": 5.3745, "step": 4376 }, { "epoch": 0.2935909045175571, "grad_norm": 0.14384358779099418, "learning_rate": 2e-05, "loss": 5.4185, "step": 4377 }, { "epoch": 0.29365798034678203, "grad_norm": 0.14980271897636543, "learning_rate": 2e-05, "loss": 5.3998, "step": 4378 }, { "epoch": 0.29372505617600697, "grad_norm": 0.14751700166753023, "learning_rate": 2e-05, "loss": 5.4583, "step": 4379 }, { "epoch": 0.2937921320052319, "grad_norm": 0.15025908167934107, "learning_rate": 2e-05, "loss": 5.2662, "step": 4380 }, { "epoch": 0.29385920783445685, "grad_norm": 0.13611355302971653, "learning_rate": 2e-05, "loss": 5.3662, "step": 4381 }, { "epoch": 0.2939262836636818, "grad_norm": 0.14494893343278173, "learning_rate": 2e-05, "loss": 5.3998, "step": 4382 }, { "epoch": 0.2939933594929067, "grad_norm": 0.15069572941396875, "learning_rate": 2e-05, "loss": 5.4282, "step": 4383 }, { "epoch": 0.29406043532213166, "grad_norm": 0.1491662264512494, "learning_rate": 2e-05, "loss": 5.5031, "step": 4384 }, { "epoch": 0.2941275111513566, "grad_norm": 0.14871270794708297, "learning_rate": 2e-05, "loss": 5.3856, "step": 4385 }, { "epoch": 0.29419458698058154, "grad_norm": 0.14020941658232328, "learning_rate": 2e-05, "loss": 5.4273, "step": 4386 }, { "epoch": 0.2942616628098065, "grad_norm": 0.14058482186589166, "learning_rate": 2e-05, "loss": 5.6088, "step": 4387 }, { "epoch": 0.2943287386390314, "grad_norm": 0.1448478448679813, "learning_rate": 2e-05, "loss": 5.3369, "step": 4388 }, { "epoch": 0.29439581446825636, "grad_norm": 0.13848112939753796, "learning_rate": 2e-05, "loss": 5.3978, "step": 4389 }, { "epoch": 0.2944628902974813, "grad_norm": 0.15421834380214994, "learning_rate": 2e-05, "loss": 5.4192, "step": 4390 }, { "epoch": 0.29452996612670623, "grad_norm": 0.14490901826190594, "learning_rate": 2e-05, "loss": 5.3661, "step": 4391 }, { "epoch": 0.29459704195593117, "grad_norm": 0.15202333469189552, "learning_rate": 2e-05, "loss": 5.4159, "step": 4392 }, { "epoch": 0.2946641177851561, "grad_norm": 0.14823784492761957, "learning_rate": 2e-05, "loss": 5.5003, "step": 4393 }, { "epoch": 0.29473119361438105, "grad_norm": 0.14246537939700812, "learning_rate": 2e-05, "loss": 5.5757, "step": 4394 }, { "epoch": 0.294798269443606, "grad_norm": 0.14293471968587468, "learning_rate": 2e-05, "loss": 5.4068, "step": 4395 }, { "epoch": 0.2948653452728309, "grad_norm": 0.14882017491081653, "learning_rate": 2e-05, "loss": 5.5194, "step": 4396 }, { "epoch": 0.29493242110205586, "grad_norm": 0.1508040710568239, "learning_rate": 2e-05, "loss": 5.3642, "step": 4397 }, { "epoch": 0.2949994969312808, "grad_norm": 0.1427744858949439, "learning_rate": 2e-05, "loss": 5.3706, "step": 4398 }, { "epoch": 0.29506657276050574, "grad_norm": 0.14986633544094705, "learning_rate": 2e-05, "loss": 5.5787, "step": 4399 }, { "epoch": 0.2951336485897307, "grad_norm": 0.14473535437439156, "learning_rate": 2e-05, "loss": 5.4453, "step": 4400 }, { "epoch": 0.2952007244189556, "grad_norm": 0.15058433889888256, "learning_rate": 2e-05, "loss": 5.491, "step": 4401 }, { "epoch": 0.29526780024818056, "grad_norm": 0.1409289391080362, "learning_rate": 2e-05, "loss": 5.3046, "step": 4402 }, { "epoch": 0.2953348760774055, "grad_norm": 0.14491970562493814, "learning_rate": 2e-05, "loss": 5.4534, "step": 4403 }, { "epoch": 0.29540195190663043, "grad_norm": 0.14717005845628495, "learning_rate": 2e-05, "loss": 5.3929, "step": 4404 }, { "epoch": 0.29546902773585537, "grad_norm": 0.14729994198806665, "learning_rate": 2e-05, "loss": 5.5103, "step": 4405 }, { "epoch": 0.2955361035650803, "grad_norm": 0.14434017108729597, "learning_rate": 2e-05, "loss": 5.4031, "step": 4406 }, { "epoch": 0.29560317939430525, "grad_norm": 0.14793860476063672, "learning_rate": 2e-05, "loss": 5.4988, "step": 4407 }, { "epoch": 0.2956702552235302, "grad_norm": 0.14876447857390662, "learning_rate": 2e-05, "loss": 5.412, "step": 4408 }, { "epoch": 0.2957373310527551, "grad_norm": 0.14569830718837296, "learning_rate": 2e-05, "loss": 5.3668, "step": 4409 }, { "epoch": 0.29580440688198006, "grad_norm": 0.14632030385002823, "learning_rate": 2e-05, "loss": 5.4831, "step": 4410 }, { "epoch": 0.295871482711205, "grad_norm": 0.1476153505678602, "learning_rate": 2e-05, "loss": 5.3409, "step": 4411 }, { "epoch": 0.29593855854042994, "grad_norm": 0.14777398583212123, "learning_rate": 2e-05, "loss": 5.549, "step": 4412 }, { "epoch": 0.2960056343696549, "grad_norm": 0.1448086725182846, "learning_rate": 2e-05, "loss": 5.3771, "step": 4413 }, { "epoch": 0.2960727101988798, "grad_norm": 0.14777133451904728, "learning_rate": 2e-05, "loss": 5.35, "step": 4414 }, { "epoch": 0.29613978602810476, "grad_norm": 0.14266071386095056, "learning_rate": 2e-05, "loss": 5.4658, "step": 4415 }, { "epoch": 0.2962068618573297, "grad_norm": 0.14299321872054224, "learning_rate": 2e-05, "loss": 5.432, "step": 4416 }, { "epoch": 0.29627393768655463, "grad_norm": 0.1447166550864562, "learning_rate": 2e-05, "loss": 5.4378, "step": 4417 }, { "epoch": 0.29634101351577957, "grad_norm": 0.14307158506394135, "learning_rate": 2e-05, "loss": 5.6019, "step": 4418 }, { "epoch": 0.2964080893450045, "grad_norm": 0.15423776942039816, "learning_rate": 2e-05, "loss": 5.3995, "step": 4419 }, { "epoch": 0.29647516517422945, "grad_norm": 0.14195525026085457, "learning_rate": 2e-05, "loss": 5.3615, "step": 4420 }, { "epoch": 0.2965422410034544, "grad_norm": 0.14826855831766986, "learning_rate": 2e-05, "loss": 5.3519, "step": 4421 }, { "epoch": 0.2966093168326793, "grad_norm": 0.14531877147399958, "learning_rate": 2e-05, "loss": 5.4244, "step": 4422 }, { "epoch": 0.29667639266190426, "grad_norm": 0.14083779448189743, "learning_rate": 2e-05, "loss": 5.2964, "step": 4423 }, { "epoch": 0.2967434684911292, "grad_norm": 0.1399890268274982, "learning_rate": 2e-05, "loss": 5.4173, "step": 4424 }, { "epoch": 0.29681054432035414, "grad_norm": 0.14924720137934283, "learning_rate": 2e-05, "loss": 5.362, "step": 4425 }, { "epoch": 0.2968776201495791, "grad_norm": 0.14965245515966227, "learning_rate": 2e-05, "loss": 5.6122, "step": 4426 }, { "epoch": 0.296944695978804, "grad_norm": 0.15673724511652062, "learning_rate": 2e-05, "loss": 5.4865, "step": 4427 }, { "epoch": 0.29701177180802896, "grad_norm": 0.14710387998951466, "learning_rate": 2e-05, "loss": 5.5013, "step": 4428 }, { "epoch": 0.2970788476372539, "grad_norm": 0.15048683130509707, "learning_rate": 2e-05, "loss": 5.4658, "step": 4429 }, { "epoch": 0.29714592346647883, "grad_norm": 0.1584824446267396, "learning_rate": 2e-05, "loss": 5.3736, "step": 4430 }, { "epoch": 0.2972129992957038, "grad_norm": 0.147278775604171, "learning_rate": 2e-05, "loss": 5.4097, "step": 4431 }, { "epoch": 0.2972800751249287, "grad_norm": 0.14205231536068444, "learning_rate": 2e-05, "loss": 5.5721, "step": 4432 }, { "epoch": 0.29734715095415365, "grad_norm": 0.1509313547195019, "learning_rate": 2e-05, "loss": 5.4198, "step": 4433 }, { "epoch": 0.2974142267833786, "grad_norm": 0.15131996830782682, "learning_rate": 2e-05, "loss": 5.4596, "step": 4434 }, { "epoch": 0.2974813026126035, "grad_norm": 0.14179402217895634, "learning_rate": 2e-05, "loss": 5.4686, "step": 4435 }, { "epoch": 0.29754837844182846, "grad_norm": 0.1485118189011419, "learning_rate": 2e-05, "loss": 5.4804, "step": 4436 }, { "epoch": 0.2976154542710534, "grad_norm": 0.1484314800655757, "learning_rate": 2e-05, "loss": 5.5409, "step": 4437 }, { "epoch": 0.29768253010027834, "grad_norm": 0.14255539655996666, "learning_rate": 2e-05, "loss": 5.3959, "step": 4438 }, { "epoch": 0.2977496059295033, "grad_norm": 0.1499389444958537, "learning_rate": 2e-05, "loss": 5.3837, "step": 4439 }, { "epoch": 0.2978166817587282, "grad_norm": 0.15009202824377305, "learning_rate": 2e-05, "loss": 5.3945, "step": 4440 }, { "epoch": 0.29788375758795316, "grad_norm": 0.14726187033022906, "learning_rate": 2e-05, "loss": 5.4124, "step": 4441 }, { "epoch": 0.2979508334171781, "grad_norm": 0.1526875965152604, "learning_rate": 2e-05, "loss": 5.6277, "step": 4442 }, { "epoch": 0.29801790924640303, "grad_norm": 0.1559547808493094, "learning_rate": 2e-05, "loss": 5.2882, "step": 4443 }, { "epoch": 0.298084985075628, "grad_norm": 0.150176601761596, "learning_rate": 2e-05, "loss": 5.4001, "step": 4444 }, { "epoch": 0.2981520609048529, "grad_norm": 0.1511907626233967, "learning_rate": 2e-05, "loss": 5.46, "step": 4445 }, { "epoch": 0.29821913673407785, "grad_norm": 0.14704256311815742, "learning_rate": 2e-05, "loss": 5.3864, "step": 4446 }, { "epoch": 0.2982862125633028, "grad_norm": 0.1564172063278242, "learning_rate": 2e-05, "loss": 5.475, "step": 4447 }, { "epoch": 0.2983532883925277, "grad_norm": 0.14431487494891698, "learning_rate": 2e-05, "loss": 5.3474, "step": 4448 }, { "epoch": 0.29842036422175267, "grad_norm": 0.15261969486484003, "learning_rate": 2e-05, "loss": 5.3892, "step": 4449 }, { "epoch": 0.2984874400509776, "grad_norm": 0.1572944377686291, "learning_rate": 2e-05, "loss": 5.5266, "step": 4450 }, { "epoch": 0.29855451588020254, "grad_norm": 0.1486465518305097, "learning_rate": 2e-05, "loss": 5.6022, "step": 4451 }, { "epoch": 0.2986215917094275, "grad_norm": 0.14185341573847346, "learning_rate": 2e-05, "loss": 5.415, "step": 4452 }, { "epoch": 0.2986886675386524, "grad_norm": 0.15008265859735462, "learning_rate": 2e-05, "loss": 5.3657, "step": 4453 }, { "epoch": 0.29875574336787736, "grad_norm": 0.15177590647888936, "learning_rate": 2e-05, "loss": 5.4348, "step": 4454 }, { "epoch": 0.29882281919710235, "grad_norm": 0.145296358062232, "learning_rate": 2e-05, "loss": 5.5344, "step": 4455 }, { "epoch": 0.2988898950263273, "grad_norm": 0.14393210131626882, "learning_rate": 2e-05, "loss": 5.3791, "step": 4456 }, { "epoch": 0.29895697085555223, "grad_norm": 0.15042910440636892, "learning_rate": 2e-05, "loss": 5.3565, "step": 4457 }, { "epoch": 0.29902404668477717, "grad_norm": 0.15163348213761865, "learning_rate": 2e-05, "loss": 5.3517, "step": 4458 }, { "epoch": 0.2990911225140021, "grad_norm": 0.14320934245938174, "learning_rate": 2e-05, "loss": 5.575, "step": 4459 }, { "epoch": 0.29915819834322704, "grad_norm": 0.14826574334609666, "learning_rate": 2e-05, "loss": 5.4004, "step": 4460 }, { "epoch": 0.299225274172452, "grad_norm": 0.15028397313407516, "learning_rate": 2e-05, "loss": 5.4854, "step": 4461 }, { "epoch": 0.2992923500016769, "grad_norm": 0.141091426544405, "learning_rate": 2e-05, "loss": 5.5127, "step": 4462 }, { "epoch": 0.29935942583090186, "grad_norm": 0.15684565249352467, "learning_rate": 2e-05, "loss": 5.3202, "step": 4463 }, { "epoch": 0.2994265016601268, "grad_norm": 0.14750241164647332, "learning_rate": 2e-05, "loss": 5.4648, "step": 4464 }, { "epoch": 0.29949357748935174, "grad_norm": 0.15276471799981303, "learning_rate": 2e-05, "loss": 5.4229, "step": 4465 }, { "epoch": 0.2995606533185767, "grad_norm": 0.15058568726440405, "learning_rate": 2e-05, "loss": 5.5332, "step": 4466 }, { "epoch": 0.2996277291478016, "grad_norm": 0.1377913711056494, "learning_rate": 2e-05, "loss": 5.4342, "step": 4467 }, { "epoch": 0.29969480497702655, "grad_norm": 0.14953139795101247, "learning_rate": 2e-05, "loss": 5.5087, "step": 4468 }, { "epoch": 0.2997618808062515, "grad_norm": 0.14765105117349295, "learning_rate": 2e-05, "loss": 5.521, "step": 4469 }, { "epoch": 0.29982895663547643, "grad_norm": 0.15276931099578667, "learning_rate": 2e-05, "loss": 5.5692, "step": 4470 }, { "epoch": 0.29989603246470137, "grad_norm": 0.14682924900176064, "learning_rate": 2e-05, "loss": 5.5785, "step": 4471 }, { "epoch": 0.2999631082939263, "grad_norm": 0.1486577590134233, "learning_rate": 2e-05, "loss": 5.3848, "step": 4472 }, { "epoch": 0.30003018412315124, "grad_norm": 0.1511372544516596, "learning_rate": 2e-05, "loss": 5.4533, "step": 4473 }, { "epoch": 0.3000972599523762, "grad_norm": 0.14873064689260387, "learning_rate": 2e-05, "loss": 5.4424, "step": 4474 }, { "epoch": 0.3001643357816011, "grad_norm": 0.15128949770112074, "learning_rate": 2e-05, "loss": 5.5394, "step": 4475 }, { "epoch": 0.30023141161082606, "grad_norm": 0.15151526690915973, "learning_rate": 2e-05, "loss": 5.5082, "step": 4476 }, { "epoch": 0.300298487440051, "grad_norm": 0.15006466407201843, "learning_rate": 2e-05, "loss": 5.3463, "step": 4477 }, { "epoch": 0.30036556326927594, "grad_norm": 0.15122503133048854, "learning_rate": 2e-05, "loss": 5.4863, "step": 4478 }, { "epoch": 0.3004326390985009, "grad_norm": 0.1475357603410436, "learning_rate": 2e-05, "loss": 5.51, "step": 4479 }, { "epoch": 0.3004997149277258, "grad_norm": 0.1446218730006559, "learning_rate": 2e-05, "loss": 5.4818, "step": 4480 }, { "epoch": 0.30056679075695075, "grad_norm": 0.14622607818349256, "learning_rate": 2e-05, "loss": 5.5227, "step": 4481 }, { "epoch": 0.3006338665861757, "grad_norm": 0.1494675085489273, "learning_rate": 2e-05, "loss": 5.4127, "step": 4482 }, { "epoch": 0.30070094241540063, "grad_norm": 0.15390793426753374, "learning_rate": 2e-05, "loss": 5.4985, "step": 4483 }, { "epoch": 0.30076801824462557, "grad_norm": 0.14558580248121492, "learning_rate": 2e-05, "loss": 5.4427, "step": 4484 }, { "epoch": 0.3008350940738505, "grad_norm": 0.15178572945715418, "learning_rate": 2e-05, "loss": 5.4088, "step": 4485 }, { "epoch": 0.30090216990307544, "grad_norm": 0.14795413195347523, "learning_rate": 2e-05, "loss": 5.4763, "step": 4486 }, { "epoch": 0.3009692457323004, "grad_norm": 0.15333680684886275, "learning_rate": 2e-05, "loss": 5.577, "step": 4487 }, { "epoch": 0.3010363215615253, "grad_norm": 0.16100091253288232, "learning_rate": 2e-05, "loss": 5.4974, "step": 4488 }, { "epoch": 0.30110339739075026, "grad_norm": 0.1463599293501929, "learning_rate": 2e-05, "loss": 5.4892, "step": 4489 }, { "epoch": 0.3011704732199752, "grad_norm": 0.15118176103732966, "learning_rate": 2e-05, "loss": 5.5019, "step": 4490 }, { "epoch": 0.30123754904920014, "grad_norm": 0.14889063309329517, "learning_rate": 2e-05, "loss": 5.4498, "step": 4491 }, { "epoch": 0.3013046248784251, "grad_norm": 0.14311443594143256, "learning_rate": 2e-05, "loss": 5.4457, "step": 4492 }, { "epoch": 0.30137170070765, "grad_norm": 0.14672927145288175, "learning_rate": 2e-05, "loss": 5.5225, "step": 4493 }, { "epoch": 0.30143877653687495, "grad_norm": 0.14672386982399532, "learning_rate": 2e-05, "loss": 5.3343, "step": 4494 }, { "epoch": 0.3015058523660999, "grad_norm": 0.1464856276896854, "learning_rate": 2e-05, "loss": 5.4957, "step": 4495 }, { "epoch": 0.30157292819532483, "grad_norm": 0.14516111755585517, "learning_rate": 2e-05, "loss": 5.4785, "step": 4496 }, { "epoch": 0.30164000402454977, "grad_norm": 0.14823654075427836, "learning_rate": 2e-05, "loss": 5.4374, "step": 4497 }, { "epoch": 0.3017070798537747, "grad_norm": 0.1532013366040041, "learning_rate": 2e-05, "loss": 5.3846, "step": 4498 }, { "epoch": 0.30177415568299965, "grad_norm": 0.1414635707436847, "learning_rate": 2e-05, "loss": 5.4443, "step": 4499 }, { "epoch": 0.3018412315122246, "grad_norm": 0.14434118253351771, "learning_rate": 2e-05, "loss": 5.3085, "step": 4500 }, { "epoch": 0.3019083073414495, "grad_norm": 0.1412344956648059, "learning_rate": 2e-05, "loss": 5.4884, "step": 4501 }, { "epoch": 0.30197538317067446, "grad_norm": 0.14170130000397244, "learning_rate": 2e-05, "loss": 5.5667, "step": 4502 }, { "epoch": 0.3020424589998994, "grad_norm": 0.1508414484737552, "learning_rate": 2e-05, "loss": 5.4676, "step": 4503 }, { "epoch": 0.30210953482912434, "grad_norm": 0.14430825440521997, "learning_rate": 2e-05, "loss": 5.4571, "step": 4504 }, { "epoch": 0.3021766106583493, "grad_norm": 0.14434438836071128, "learning_rate": 2e-05, "loss": 5.3389, "step": 4505 }, { "epoch": 0.3022436864875742, "grad_norm": 0.14636843486832962, "learning_rate": 2e-05, "loss": 5.457, "step": 4506 }, { "epoch": 0.30231076231679915, "grad_norm": 0.14597823111871303, "learning_rate": 2e-05, "loss": 5.3266, "step": 4507 }, { "epoch": 0.3023778381460241, "grad_norm": 0.1444290340959737, "learning_rate": 2e-05, "loss": 5.3679, "step": 4508 }, { "epoch": 0.30244491397524903, "grad_norm": 0.14435353593371408, "learning_rate": 2e-05, "loss": 5.4802, "step": 4509 }, { "epoch": 0.30251198980447397, "grad_norm": 0.14427640301724307, "learning_rate": 2e-05, "loss": 5.4297, "step": 4510 }, { "epoch": 0.3025790656336989, "grad_norm": 0.1493448607322142, "learning_rate": 2e-05, "loss": 5.3936, "step": 4511 }, { "epoch": 0.30264614146292385, "grad_norm": 0.1511831575627833, "learning_rate": 2e-05, "loss": 5.5371, "step": 4512 }, { "epoch": 0.3027132172921488, "grad_norm": 0.14267054118883607, "learning_rate": 2e-05, "loss": 5.4229, "step": 4513 }, { "epoch": 0.3027802931213737, "grad_norm": 0.14791303118277263, "learning_rate": 2e-05, "loss": 5.4798, "step": 4514 }, { "epoch": 0.30284736895059866, "grad_norm": 0.1489424701503919, "learning_rate": 2e-05, "loss": 5.5886, "step": 4515 }, { "epoch": 0.3029144447798236, "grad_norm": 0.15856959404029955, "learning_rate": 2e-05, "loss": 5.4192, "step": 4516 }, { "epoch": 0.30298152060904854, "grad_norm": 0.1468613996303402, "learning_rate": 2e-05, "loss": 5.3799, "step": 4517 }, { "epoch": 0.3030485964382735, "grad_norm": 0.14569601653461278, "learning_rate": 2e-05, "loss": 5.5053, "step": 4518 }, { "epoch": 0.3031156722674984, "grad_norm": 0.1437530933637486, "learning_rate": 2e-05, "loss": 5.4187, "step": 4519 }, { "epoch": 0.30318274809672335, "grad_norm": 0.1429483075687242, "learning_rate": 2e-05, "loss": 5.3659, "step": 4520 }, { "epoch": 0.3032498239259483, "grad_norm": 0.14823019972115709, "learning_rate": 2e-05, "loss": 5.4647, "step": 4521 }, { "epoch": 0.30331689975517323, "grad_norm": 0.14118602569998112, "learning_rate": 2e-05, "loss": 5.4246, "step": 4522 }, { "epoch": 0.30338397558439817, "grad_norm": 0.1367046027499298, "learning_rate": 2e-05, "loss": 5.4359, "step": 4523 }, { "epoch": 0.3034510514136231, "grad_norm": 0.14910539531423775, "learning_rate": 2e-05, "loss": 5.5252, "step": 4524 }, { "epoch": 0.30351812724284805, "grad_norm": 0.14127945636164846, "learning_rate": 2e-05, "loss": 5.4612, "step": 4525 }, { "epoch": 0.303585203072073, "grad_norm": 0.14298771721302891, "learning_rate": 2e-05, "loss": 5.4864, "step": 4526 }, { "epoch": 0.3036522789012979, "grad_norm": 0.14819389870927663, "learning_rate": 2e-05, "loss": 5.3822, "step": 4527 }, { "epoch": 0.30371935473052286, "grad_norm": 0.15159666488472187, "learning_rate": 2e-05, "loss": 5.5236, "step": 4528 }, { "epoch": 0.3037864305597478, "grad_norm": 0.1519828903230427, "learning_rate": 2e-05, "loss": 5.5077, "step": 4529 }, { "epoch": 0.30385350638897274, "grad_norm": 0.148337067669773, "learning_rate": 2e-05, "loss": 5.4608, "step": 4530 }, { "epoch": 0.3039205822181977, "grad_norm": 0.1539696762032026, "learning_rate": 2e-05, "loss": 5.5365, "step": 4531 }, { "epoch": 0.3039876580474226, "grad_norm": 0.1462703760903503, "learning_rate": 2e-05, "loss": 5.3581, "step": 4532 }, { "epoch": 0.30405473387664755, "grad_norm": 0.14747877777618507, "learning_rate": 2e-05, "loss": 5.4357, "step": 4533 }, { "epoch": 0.3041218097058725, "grad_norm": 0.14264505851686338, "learning_rate": 2e-05, "loss": 5.4042, "step": 4534 }, { "epoch": 0.30418888553509743, "grad_norm": 0.14234372042259075, "learning_rate": 2e-05, "loss": 5.4808, "step": 4535 }, { "epoch": 0.30425596136432237, "grad_norm": 0.14334791751630266, "learning_rate": 2e-05, "loss": 5.4058, "step": 4536 }, { "epoch": 0.3043230371935473, "grad_norm": 0.15336908681458847, "learning_rate": 2e-05, "loss": 5.3469, "step": 4537 }, { "epoch": 0.30439011302277225, "grad_norm": 0.15145148553399412, "learning_rate": 2e-05, "loss": 5.4359, "step": 4538 }, { "epoch": 0.3044571888519972, "grad_norm": 0.15233190838675115, "learning_rate": 2e-05, "loss": 5.3074, "step": 4539 }, { "epoch": 0.3045242646812221, "grad_norm": 0.14294687996039007, "learning_rate": 2e-05, "loss": 5.3601, "step": 4540 }, { "epoch": 0.30459134051044706, "grad_norm": 0.1481733919274403, "learning_rate": 2e-05, "loss": 5.354, "step": 4541 }, { "epoch": 0.304658416339672, "grad_norm": 0.15027888735659364, "learning_rate": 2e-05, "loss": 5.5038, "step": 4542 }, { "epoch": 0.30472549216889694, "grad_norm": 0.15709150889754572, "learning_rate": 2e-05, "loss": 5.4874, "step": 4543 }, { "epoch": 0.3047925679981219, "grad_norm": 0.14912549952516282, "learning_rate": 2e-05, "loss": 5.3845, "step": 4544 }, { "epoch": 0.3048596438273468, "grad_norm": 0.14223661685924016, "learning_rate": 2e-05, "loss": 5.4299, "step": 4545 }, { "epoch": 0.30492671965657175, "grad_norm": 0.14841725745071396, "learning_rate": 2e-05, "loss": 5.3177, "step": 4546 }, { "epoch": 0.3049937954857967, "grad_norm": 0.15274045397357197, "learning_rate": 2e-05, "loss": 5.69, "step": 4547 }, { "epoch": 0.30506087131502163, "grad_norm": 0.14604600946609206, "learning_rate": 2e-05, "loss": 5.4515, "step": 4548 }, { "epoch": 0.30512794714424657, "grad_norm": 0.1457269834699606, "learning_rate": 2e-05, "loss": 5.461, "step": 4549 }, { "epoch": 0.3051950229734715, "grad_norm": 0.15264609031276527, "learning_rate": 2e-05, "loss": 5.3926, "step": 4550 }, { "epoch": 0.30526209880269645, "grad_norm": 0.14868290403352563, "learning_rate": 2e-05, "loss": 5.5023, "step": 4551 }, { "epoch": 0.3053291746319214, "grad_norm": 0.1457399706390801, "learning_rate": 2e-05, "loss": 5.3697, "step": 4552 }, { "epoch": 0.3053962504611463, "grad_norm": 0.1546530429916773, "learning_rate": 2e-05, "loss": 5.5141, "step": 4553 }, { "epoch": 0.30546332629037126, "grad_norm": 0.15549605671735658, "learning_rate": 2e-05, "loss": 5.3758, "step": 4554 }, { "epoch": 0.3055304021195962, "grad_norm": 0.14831136184485863, "learning_rate": 2e-05, "loss": 5.5146, "step": 4555 }, { "epoch": 0.30559747794882114, "grad_norm": 0.14717603234018364, "learning_rate": 2e-05, "loss": 5.4754, "step": 4556 }, { "epoch": 0.3056645537780461, "grad_norm": 0.15738969161474375, "learning_rate": 2e-05, "loss": 5.4012, "step": 4557 }, { "epoch": 0.305731629607271, "grad_norm": 0.14801873205239519, "learning_rate": 2e-05, "loss": 5.4764, "step": 4558 }, { "epoch": 0.30579870543649595, "grad_norm": 0.14780681581374525, "learning_rate": 2e-05, "loss": 5.5604, "step": 4559 }, { "epoch": 0.3058657812657209, "grad_norm": 0.14617369869722138, "learning_rate": 2e-05, "loss": 5.4255, "step": 4560 }, { "epoch": 0.30593285709494583, "grad_norm": 0.15357989315622822, "learning_rate": 2e-05, "loss": 5.4598, "step": 4561 }, { "epoch": 0.30599993292417077, "grad_norm": 0.15368770081449487, "learning_rate": 2e-05, "loss": 5.3773, "step": 4562 }, { "epoch": 0.3060670087533957, "grad_norm": 0.1450983363320904, "learning_rate": 2e-05, "loss": 5.5133, "step": 4563 }, { "epoch": 0.30613408458262065, "grad_norm": 0.15652310656206642, "learning_rate": 2e-05, "loss": 5.4514, "step": 4564 }, { "epoch": 0.3062011604118456, "grad_norm": 0.15109419190851142, "learning_rate": 2e-05, "loss": 5.4071, "step": 4565 }, { "epoch": 0.3062682362410705, "grad_norm": 0.15211711780011758, "learning_rate": 2e-05, "loss": 5.3679, "step": 4566 }, { "epoch": 0.30633531207029546, "grad_norm": 0.14703112798997636, "learning_rate": 2e-05, "loss": 5.4159, "step": 4567 }, { "epoch": 0.3064023878995204, "grad_norm": 0.15022335343071944, "learning_rate": 2e-05, "loss": 5.3422, "step": 4568 }, { "epoch": 0.30646946372874534, "grad_norm": 0.14104542680138143, "learning_rate": 2e-05, "loss": 5.3225, "step": 4569 }, { "epoch": 0.3065365395579703, "grad_norm": 0.1486877583072616, "learning_rate": 2e-05, "loss": 5.51, "step": 4570 }, { "epoch": 0.3066036153871952, "grad_norm": 0.14443600241853174, "learning_rate": 2e-05, "loss": 5.3902, "step": 4571 }, { "epoch": 0.30667069121642015, "grad_norm": 0.14693169979282228, "learning_rate": 2e-05, "loss": 5.3677, "step": 4572 }, { "epoch": 0.3067377670456451, "grad_norm": 0.1413009762680042, "learning_rate": 2e-05, "loss": 5.3865, "step": 4573 }, { "epoch": 0.30680484287487003, "grad_norm": 0.14801137075445153, "learning_rate": 2e-05, "loss": 5.4652, "step": 4574 }, { "epoch": 0.30687191870409497, "grad_norm": 0.14248852367095532, "learning_rate": 2e-05, "loss": 5.3406, "step": 4575 }, { "epoch": 0.3069389945333199, "grad_norm": 0.13969273350202857, "learning_rate": 2e-05, "loss": 5.4499, "step": 4576 }, { "epoch": 0.30700607036254485, "grad_norm": 0.15590339112791815, "learning_rate": 2e-05, "loss": 5.3756, "step": 4577 }, { "epoch": 0.3070731461917698, "grad_norm": 0.15200230678681445, "learning_rate": 2e-05, "loss": 5.4808, "step": 4578 }, { "epoch": 0.3071402220209947, "grad_norm": 0.14363550205383782, "learning_rate": 2e-05, "loss": 5.3754, "step": 4579 }, { "epoch": 0.30720729785021966, "grad_norm": 0.1447628684834803, "learning_rate": 2e-05, "loss": 5.4548, "step": 4580 }, { "epoch": 0.3072743736794446, "grad_norm": 0.15753178554253797, "learning_rate": 2e-05, "loss": 5.3933, "step": 4581 }, { "epoch": 0.30734144950866954, "grad_norm": 0.14679519097220706, "learning_rate": 2e-05, "loss": 5.4626, "step": 4582 }, { "epoch": 0.3074085253378945, "grad_norm": 0.14363546232400354, "learning_rate": 2e-05, "loss": 5.4223, "step": 4583 }, { "epoch": 0.3074756011671194, "grad_norm": 0.15183068539389968, "learning_rate": 2e-05, "loss": 5.5223, "step": 4584 }, { "epoch": 0.30754267699634436, "grad_norm": 0.14038481092313318, "learning_rate": 2e-05, "loss": 5.3977, "step": 4585 }, { "epoch": 0.3076097528255693, "grad_norm": 0.1418424237792973, "learning_rate": 2e-05, "loss": 5.3276, "step": 4586 }, { "epoch": 0.30767682865479423, "grad_norm": 0.15383409663030792, "learning_rate": 2e-05, "loss": 5.444, "step": 4587 }, { "epoch": 0.30774390448401917, "grad_norm": 0.15967788261473903, "learning_rate": 2e-05, "loss": 5.4705, "step": 4588 }, { "epoch": 0.3078109803132441, "grad_norm": 0.14969094118129644, "learning_rate": 2e-05, "loss": 5.4152, "step": 4589 }, { "epoch": 0.30787805614246905, "grad_norm": 0.14745971048770765, "learning_rate": 2e-05, "loss": 5.3486, "step": 4590 }, { "epoch": 0.307945131971694, "grad_norm": 0.15745697954461246, "learning_rate": 2e-05, "loss": 5.6226, "step": 4591 }, { "epoch": 0.3080122078009189, "grad_norm": 0.15786906703880435, "learning_rate": 2e-05, "loss": 5.4317, "step": 4592 }, { "epoch": 0.30807928363014386, "grad_norm": 0.14501931005816235, "learning_rate": 2e-05, "loss": 5.4067, "step": 4593 }, { "epoch": 0.3081463594593688, "grad_norm": 0.14361598899551511, "learning_rate": 2e-05, "loss": 5.5272, "step": 4594 }, { "epoch": 0.30821343528859374, "grad_norm": 0.14792513235163723, "learning_rate": 2e-05, "loss": 5.4956, "step": 4595 }, { "epoch": 0.3082805111178187, "grad_norm": 0.15465535288458548, "learning_rate": 2e-05, "loss": 5.3506, "step": 4596 }, { "epoch": 0.3083475869470436, "grad_norm": 0.14495288651397753, "learning_rate": 2e-05, "loss": 5.4252, "step": 4597 }, { "epoch": 0.30841466277626856, "grad_norm": 0.15300487205158467, "learning_rate": 2e-05, "loss": 5.4056, "step": 4598 }, { "epoch": 0.3084817386054935, "grad_norm": 0.14876130399963258, "learning_rate": 2e-05, "loss": 5.4785, "step": 4599 }, { "epoch": 0.30854881443471843, "grad_norm": 0.1423464324413054, "learning_rate": 2e-05, "loss": 5.3857, "step": 4600 }, { "epoch": 0.30861589026394337, "grad_norm": 0.15731168224052566, "learning_rate": 2e-05, "loss": 5.5269, "step": 4601 }, { "epoch": 0.3086829660931683, "grad_norm": 0.1620010090212436, "learning_rate": 2e-05, "loss": 5.3271, "step": 4602 }, { "epoch": 0.30875004192239325, "grad_norm": 0.14991006628716916, "learning_rate": 2e-05, "loss": 5.4247, "step": 4603 }, { "epoch": 0.3088171177516182, "grad_norm": 0.14214095946765495, "learning_rate": 2e-05, "loss": 5.4, "step": 4604 }, { "epoch": 0.3088841935808431, "grad_norm": 0.16150230263866622, "learning_rate": 2e-05, "loss": 5.4253, "step": 4605 }, { "epoch": 0.30895126941006806, "grad_norm": 0.16026261125907118, "learning_rate": 2e-05, "loss": 5.5439, "step": 4606 }, { "epoch": 0.309018345239293, "grad_norm": 0.15536526973490877, "learning_rate": 2e-05, "loss": 5.5165, "step": 4607 }, { "epoch": 0.30908542106851794, "grad_norm": 0.15216469849732597, "learning_rate": 2e-05, "loss": 5.4482, "step": 4608 }, { "epoch": 0.3091524968977429, "grad_norm": 0.14824895901006058, "learning_rate": 2e-05, "loss": 5.407, "step": 4609 }, { "epoch": 0.3092195727269678, "grad_norm": 0.1522997881433914, "learning_rate": 2e-05, "loss": 5.3092, "step": 4610 }, { "epoch": 0.30928664855619276, "grad_norm": 0.15110638672672808, "learning_rate": 2e-05, "loss": 5.4401, "step": 4611 }, { "epoch": 0.3093537243854177, "grad_norm": 0.14537346530778378, "learning_rate": 2e-05, "loss": 5.4677, "step": 4612 }, { "epoch": 0.30942080021464263, "grad_norm": 0.15775746568947666, "learning_rate": 2e-05, "loss": 5.4528, "step": 4613 }, { "epoch": 0.30948787604386757, "grad_norm": 0.15533239438218802, "learning_rate": 2e-05, "loss": 5.3861, "step": 4614 }, { "epoch": 0.3095549518730925, "grad_norm": 0.14632138277532383, "learning_rate": 2e-05, "loss": 5.4081, "step": 4615 }, { "epoch": 0.30962202770231745, "grad_norm": 0.15123899816137057, "learning_rate": 2e-05, "loss": 5.5185, "step": 4616 }, { "epoch": 0.3096891035315424, "grad_norm": 0.14271568522137856, "learning_rate": 2e-05, "loss": 5.347, "step": 4617 }, { "epoch": 0.3097561793607673, "grad_norm": 0.14510994714522116, "learning_rate": 2e-05, "loss": 5.4973, "step": 4618 }, { "epoch": 0.30982325518999226, "grad_norm": 0.15017700552454685, "learning_rate": 2e-05, "loss": 5.5167, "step": 4619 }, { "epoch": 0.3098903310192172, "grad_norm": 0.15142894928214884, "learning_rate": 2e-05, "loss": 5.4095, "step": 4620 }, { "epoch": 0.30995740684844214, "grad_norm": 0.14225233338234333, "learning_rate": 2e-05, "loss": 5.3923, "step": 4621 }, { "epoch": 0.3100244826776671, "grad_norm": 0.1473848677660478, "learning_rate": 2e-05, "loss": 5.4573, "step": 4622 }, { "epoch": 0.310091558506892, "grad_norm": 0.14288511483486166, "learning_rate": 2e-05, "loss": 5.6104, "step": 4623 }, { "epoch": 0.31015863433611696, "grad_norm": 0.15089326267976996, "learning_rate": 2e-05, "loss": 5.4536, "step": 4624 }, { "epoch": 0.3102257101653419, "grad_norm": 0.14846862621841705, "learning_rate": 2e-05, "loss": 5.234, "step": 4625 }, { "epoch": 0.31029278599456683, "grad_norm": 0.15060386874764256, "learning_rate": 2e-05, "loss": 5.5201, "step": 4626 }, { "epoch": 0.31035986182379177, "grad_norm": 0.15090976330737346, "learning_rate": 2e-05, "loss": 5.4227, "step": 4627 }, { "epoch": 0.3104269376530167, "grad_norm": 0.14073314404816134, "learning_rate": 2e-05, "loss": 5.4055, "step": 4628 }, { "epoch": 0.31049401348224165, "grad_norm": 0.15323888058481783, "learning_rate": 2e-05, "loss": 5.38, "step": 4629 }, { "epoch": 0.3105610893114666, "grad_norm": 0.14716898982435206, "learning_rate": 2e-05, "loss": 5.4463, "step": 4630 }, { "epoch": 0.3106281651406915, "grad_norm": 0.14605328877294618, "learning_rate": 2e-05, "loss": 5.5132, "step": 4631 }, { "epoch": 0.31069524096991646, "grad_norm": 0.1425854716182849, "learning_rate": 2e-05, "loss": 5.585, "step": 4632 }, { "epoch": 0.3107623167991414, "grad_norm": 0.15604133264864842, "learning_rate": 2e-05, "loss": 5.4403, "step": 4633 }, { "epoch": 0.31082939262836634, "grad_norm": 0.15036415422329483, "learning_rate": 2e-05, "loss": 5.4621, "step": 4634 }, { "epoch": 0.3108964684575913, "grad_norm": 0.14424847348985592, "learning_rate": 2e-05, "loss": 5.4606, "step": 4635 }, { "epoch": 0.3109635442868162, "grad_norm": 0.1499394480389941, "learning_rate": 2e-05, "loss": 5.4863, "step": 4636 }, { "epoch": 0.3110306201160412, "grad_norm": 0.1407350789205912, "learning_rate": 2e-05, "loss": 5.3907, "step": 4637 }, { "epoch": 0.31109769594526615, "grad_norm": 0.14619002858065103, "learning_rate": 2e-05, "loss": 5.3948, "step": 4638 }, { "epoch": 0.3111647717744911, "grad_norm": 0.1522452265177975, "learning_rate": 2e-05, "loss": 5.5317, "step": 4639 }, { "epoch": 0.311231847603716, "grad_norm": 0.1431449930115255, "learning_rate": 2e-05, "loss": 5.4087, "step": 4640 }, { "epoch": 0.31129892343294097, "grad_norm": 0.14315969648919447, "learning_rate": 2e-05, "loss": 5.3976, "step": 4641 }, { "epoch": 0.3113659992621659, "grad_norm": 0.13943149983887937, "learning_rate": 2e-05, "loss": 5.5114, "step": 4642 }, { "epoch": 0.31143307509139084, "grad_norm": 0.14567244969960513, "learning_rate": 2e-05, "loss": 5.4642, "step": 4643 }, { "epoch": 0.3115001509206158, "grad_norm": 0.14120557048515864, "learning_rate": 2e-05, "loss": 5.5554, "step": 4644 }, { "epoch": 0.3115672267498407, "grad_norm": 0.15183074023545304, "learning_rate": 2e-05, "loss": 5.5392, "step": 4645 }, { "epoch": 0.31163430257906566, "grad_norm": 0.14946198426061172, "learning_rate": 2e-05, "loss": 5.5169, "step": 4646 }, { "epoch": 0.3117013784082906, "grad_norm": 0.14417283524310517, "learning_rate": 2e-05, "loss": 5.48, "step": 4647 }, { "epoch": 0.31176845423751554, "grad_norm": 0.14732824013239007, "learning_rate": 2e-05, "loss": 5.4588, "step": 4648 }, { "epoch": 0.3118355300667405, "grad_norm": 0.14412069077433934, "learning_rate": 2e-05, "loss": 5.3134, "step": 4649 }, { "epoch": 0.3119026058959654, "grad_norm": 0.14451732839962822, "learning_rate": 2e-05, "loss": 5.4197, "step": 4650 }, { "epoch": 0.31196968172519035, "grad_norm": 0.14373478845230606, "learning_rate": 2e-05, "loss": 5.4058, "step": 4651 }, { "epoch": 0.3120367575544153, "grad_norm": 0.1436839560481511, "learning_rate": 2e-05, "loss": 5.4031, "step": 4652 }, { "epoch": 0.31210383338364023, "grad_norm": 0.14591202190633557, "learning_rate": 2e-05, "loss": 5.3453, "step": 4653 }, { "epoch": 0.31217090921286517, "grad_norm": 0.14228471315288413, "learning_rate": 2e-05, "loss": 5.4395, "step": 4654 }, { "epoch": 0.3122379850420901, "grad_norm": 0.14237126606985576, "learning_rate": 2e-05, "loss": 5.2793, "step": 4655 }, { "epoch": 0.31230506087131504, "grad_norm": 0.1410917291965898, "learning_rate": 2e-05, "loss": 5.4637, "step": 4656 }, { "epoch": 0.31237213670054, "grad_norm": 0.14002631141178787, "learning_rate": 2e-05, "loss": 5.4526, "step": 4657 }, { "epoch": 0.3124392125297649, "grad_norm": 0.1411799617673968, "learning_rate": 2e-05, "loss": 5.5, "step": 4658 }, { "epoch": 0.31250628835898986, "grad_norm": 0.1463924059688603, "learning_rate": 2e-05, "loss": 5.4871, "step": 4659 }, { "epoch": 0.3125733641882148, "grad_norm": 0.15064391107286537, "learning_rate": 2e-05, "loss": 5.5593, "step": 4660 }, { "epoch": 0.31264044001743974, "grad_norm": 0.1461214882747298, "learning_rate": 2e-05, "loss": 5.448, "step": 4661 }, { "epoch": 0.3127075158466647, "grad_norm": 0.14840558265759152, "learning_rate": 2e-05, "loss": 5.3461, "step": 4662 }, { "epoch": 0.3127745916758896, "grad_norm": 0.1536926969546078, "learning_rate": 2e-05, "loss": 5.428, "step": 4663 }, { "epoch": 0.31284166750511455, "grad_norm": 0.1445110462687453, "learning_rate": 2e-05, "loss": 5.4134, "step": 4664 }, { "epoch": 0.3129087433343395, "grad_norm": 0.14946865419914923, "learning_rate": 2e-05, "loss": 5.2978, "step": 4665 }, { "epoch": 0.31297581916356443, "grad_norm": 0.15711096753468695, "learning_rate": 2e-05, "loss": 5.5948, "step": 4666 }, { "epoch": 0.31304289499278937, "grad_norm": 0.14275900869180216, "learning_rate": 2e-05, "loss": 5.5266, "step": 4667 }, { "epoch": 0.3131099708220143, "grad_norm": 0.1455105983809484, "learning_rate": 2e-05, "loss": 5.4734, "step": 4668 }, { "epoch": 0.31317704665123924, "grad_norm": 0.15330006678307637, "learning_rate": 2e-05, "loss": 5.3295, "step": 4669 }, { "epoch": 0.3132441224804642, "grad_norm": 0.15319858311364123, "learning_rate": 2e-05, "loss": 5.4088, "step": 4670 }, { "epoch": 0.3133111983096891, "grad_norm": 0.14349906790757497, "learning_rate": 2e-05, "loss": 5.5378, "step": 4671 }, { "epoch": 0.31337827413891406, "grad_norm": 0.150311702395463, "learning_rate": 2e-05, "loss": 5.5919, "step": 4672 }, { "epoch": 0.313445349968139, "grad_norm": 0.15396721973440974, "learning_rate": 2e-05, "loss": 5.4483, "step": 4673 }, { "epoch": 0.31351242579736394, "grad_norm": 0.15316658396002145, "learning_rate": 2e-05, "loss": 5.51, "step": 4674 }, { "epoch": 0.3135795016265889, "grad_norm": 0.1478044495152701, "learning_rate": 2e-05, "loss": 5.6538, "step": 4675 }, { "epoch": 0.3136465774558138, "grad_norm": 0.14960402787176316, "learning_rate": 2e-05, "loss": 5.5027, "step": 4676 }, { "epoch": 0.31371365328503875, "grad_norm": 0.15884806436096643, "learning_rate": 2e-05, "loss": 5.4894, "step": 4677 }, { "epoch": 0.3137807291142637, "grad_norm": 0.14946571708083475, "learning_rate": 2e-05, "loss": 5.4844, "step": 4678 }, { "epoch": 0.31384780494348863, "grad_norm": 0.1503777717971093, "learning_rate": 2e-05, "loss": 5.5498, "step": 4679 }, { "epoch": 0.31391488077271357, "grad_norm": 0.1440984086855228, "learning_rate": 2e-05, "loss": 5.3886, "step": 4680 }, { "epoch": 0.3139819566019385, "grad_norm": 0.14665558870279036, "learning_rate": 2e-05, "loss": 5.3958, "step": 4681 }, { "epoch": 0.31404903243116344, "grad_norm": 0.14933084318356385, "learning_rate": 2e-05, "loss": 5.4682, "step": 4682 }, { "epoch": 0.3141161082603884, "grad_norm": 0.14505875839129379, "learning_rate": 2e-05, "loss": 5.5749, "step": 4683 }, { "epoch": 0.3141831840896133, "grad_norm": 0.15484454505398815, "learning_rate": 2e-05, "loss": 5.3743, "step": 4684 }, { "epoch": 0.31425025991883826, "grad_norm": 0.1420258687287775, "learning_rate": 2e-05, "loss": 5.5391, "step": 4685 }, { "epoch": 0.3143173357480632, "grad_norm": 0.14407515317823788, "learning_rate": 2e-05, "loss": 5.3228, "step": 4686 }, { "epoch": 0.31438441157728814, "grad_norm": 0.15503544703983277, "learning_rate": 2e-05, "loss": 5.5453, "step": 4687 }, { "epoch": 0.3144514874065131, "grad_norm": 0.14749812950048496, "learning_rate": 2e-05, "loss": 5.3035, "step": 4688 }, { "epoch": 0.314518563235738, "grad_norm": 0.1437354107601083, "learning_rate": 2e-05, "loss": 5.4418, "step": 4689 }, { "epoch": 0.31458563906496295, "grad_norm": 0.14882058536314352, "learning_rate": 2e-05, "loss": 5.4456, "step": 4690 }, { "epoch": 0.3146527148941879, "grad_norm": 0.14624550176637166, "learning_rate": 2e-05, "loss": 5.4727, "step": 4691 }, { "epoch": 0.31471979072341283, "grad_norm": 0.1446039821704901, "learning_rate": 2e-05, "loss": 5.396, "step": 4692 }, { "epoch": 0.31478686655263777, "grad_norm": 0.14479325536998452, "learning_rate": 2e-05, "loss": 5.5231, "step": 4693 }, { "epoch": 0.3148539423818627, "grad_norm": 0.1579748244592186, "learning_rate": 2e-05, "loss": 5.4198, "step": 4694 }, { "epoch": 0.31492101821108764, "grad_norm": 0.15290371296186447, "learning_rate": 2e-05, "loss": 5.3809, "step": 4695 }, { "epoch": 0.3149880940403126, "grad_norm": 0.14725952420414185, "learning_rate": 2e-05, "loss": 5.4038, "step": 4696 }, { "epoch": 0.3150551698695375, "grad_norm": 0.15125156856875202, "learning_rate": 2e-05, "loss": 5.4824, "step": 4697 }, { "epoch": 0.31512224569876246, "grad_norm": 0.1531431360262526, "learning_rate": 2e-05, "loss": 5.4163, "step": 4698 }, { "epoch": 0.3151893215279874, "grad_norm": 0.1517734706241143, "learning_rate": 2e-05, "loss": 5.4601, "step": 4699 }, { "epoch": 0.31525639735721234, "grad_norm": 0.15012128768977626, "learning_rate": 2e-05, "loss": 5.5149, "step": 4700 }, { "epoch": 0.3153234731864373, "grad_norm": 0.1560666739708711, "learning_rate": 2e-05, "loss": 5.3988, "step": 4701 }, { "epoch": 0.3153905490156622, "grad_norm": 0.14615546409618727, "learning_rate": 2e-05, "loss": 5.4201, "step": 4702 }, { "epoch": 0.31545762484488715, "grad_norm": 0.14962648930632258, "learning_rate": 2e-05, "loss": 5.4502, "step": 4703 }, { "epoch": 0.3155247006741121, "grad_norm": 0.14319170306951273, "learning_rate": 2e-05, "loss": 5.3679, "step": 4704 }, { "epoch": 0.31559177650333703, "grad_norm": 0.14483713386430172, "learning_rate": 2e-05, "loss": 5.5286, "step": 4705 }, { "epoch": 0.31565885233256197, "grad_norm": 0.14592036567890643, "learning_rate": 2e-05, "loss": 5.4533, "step": 4706 }, { "epoch": 0.3157259281617869, "grad_norm": 0.14722402600169546, "learning_rate": 2e-05, "loss": 5.3208, "step": 4707 }, { "epoch": 0.31579300399101184, "grad_norm": 0.15117506411364792, "learning_rate": 2e-05, "loss": 5.4518, "step": 4708 }, { "epoch": 0.3158600798202368, "grad_norm": 0.14185274858092073, "learning_rate": 2e-05, "loss": 5.4207, "step": 4709 }, { "epoch": 0.3159271556494617, "grad_norm": 0.1534262341444003, "learning_rate": 2e-05, "loss": 5.3699, "step": 4710 }, { "epoch": 0.31599423147868666, "grad_norm": 0.15169831531489686, "learning_rate": 2e-05, "loss": 5.4915, "step": 4711 }, { "epoch": 0.3160613073079116, "grad_norm": 0.14446867376133682, "learning_rate": 2e-05, "loss": 5.3297, "step": 4712 }, { "epoch": 0.31612838313713654, "grad_norm": 0.15366022082024802, "learning_rate": 2e-05, "loss": 5.3355, "step": 4713 }, { "epoch": 0.3161954589663615, "grad_norm": 0.14335065283408488, "learning_rate": 2e-05, "loss": 5.4457, "step": 4714 }, { "epoch": 0.3162625347955864, "grad_norm": 0.15613503239847298, "learning_rate": 2e-05, "loss": 5.4607, "step": 4715 }, { "epoch": 0.31632961062481135, "grad_norm": 0.14484645828050904, "learning_rate": 2e-05, "loss": 5.361, "step": 4716 }, { "epoch": 0.3163966864540363, "grad_norm": 0.1510328354254125, "learning_rate": 2e-05, "loss": 5.4835, "step": 4717 }, { "epoch": 0.31646376228326123, "grad_norm": 0.14657485629072275, "learning_rate": 2e-05, "loss": 5.3565, "step": 4718 }, { "epoch": 0.31653083811248617, "grad_norm": 0.1479704743625473, "learning_rate": 2e-05, "loss": 5.4947, "step": 4719 }, { "epoch": 0.3165979139417111, "grad_norm": 0.1523568714420074, "learning_rate": 2e-05, "loss": 5.6355, "step": 4720 }, { "epoch": 0.31666498977093605, "grad_norm": 0.14934225100829807, "learning_rate": 2e-05, "loss": 5.4797, "step": 4721 }, { "epoch": 0.316732065600161, "grad_norm": 0.1470077641602299, "learning_rate": 2e-05, "loss": 5.3323, "step": 4722 }, { "epoch": 0.3167991414293859, "grad_norm": 0.1394436599797149, "learning_rate": 2e-05, "loss": 5.434, "step": 4723 }, { "epoch": 0.31686621725861086, "grad_norm": 0.15439278899327236, "learning_rate": 2e-05, "loss": 5.5446, "step": 4724 }, { "epoch": 0.3169332930878358, "grad_norm": 0.14980120464455324, "learning_rate": 2e-05, "loss": 5.462, "step": 4725 }, { "epoch": 0.31700036891706074, "grad_norm": 0.1493621236521407, "learning_rate": 2e-05, "loss": 5.4467, "step": 4726 }, { "epoch": 0.3170674447462857, "grad_norm": 0.14429911955617658, "learning_rate": 2e-05, "loss": 5.4529, "step": 4727 }, { "epoch": 0.3171345205755106, "grad_norm": 0.1421161568822683, "learning_rate": 2e-05, "loss": 5.4598, "step": 4728 }, { "epoch": 0.31720159640473555, "grad_norm": 0.14774180445775875, "learning_rate": 2e-05, "loss": 5.4473, "step": 4729 }, { "epoch": 0.3172686722339605, "grad_norm": 0.15995317087946379, "learning_rate": 2e-05, "loss": 5.4206, "step": 4730 }, { "epoch": 0.31733574806318543, "grad_norm": 0.14866129658292343, "learning_rate": 2e-05, "loss": 5.5888, "step": 4731 }, { "epoch": 0.31740282389241037, "grad_norm": 0.14985841669877367, "learning_rate": 2e-05, "loss": 5.6124, "step": 4732 }, { "epoch": 0.3174698997216353, "grad_norm": 0.16094435505092466, "learning_rate": 2e-05, "loss": 5.3304, "step": 4733 }, { "epoch": 0.31753697555086025, "grad_norm": 0.15362408602840025, "learning_rate": 2e-05, "loss": 5.4647, "step": 4734 }, { "epoch": 0.3176040513800852, "grad_norm": 0.143677071327506, "learning_rate": 2e-05, "loss": 5.3371, "step": 4735 }, { "epoch": 0.3176711272093101, "grad_norm": 0.14602987194825723, "learning_rate": 2e-05, "loss": 5.6483, "step": 4736 }, { "epoch": 0.31773820303853506, "grad_norm": 0.15237480914998594, "learning_rate": 2e-05, "loss": 5.4591, "step": 4737 }, { "epoch": 0.31780527886776, "grad_norm": 0.14824345319282412, "learning_rate": 2e-05, "loss": 5.376, "step": 4738 }, { "epoch": 0.31787235469698494, "grad_norm": 0.15883711706339843, "learning_rate": 2e-05, "loss": 5.477, "step": 4739 }, { "epoch": 0.3179394305262099, "grad_norm": 0.1454450550875837, "learning_rate": 2e-05, "loss": 5.6264, "step": 4740 }, { "epoch": 0.3180065063554348, "grad_norm": 0.14704749231694028, "learning_rate": 2e-05, "loss": 5.3835, "step": 4741 }, { "epoch": 0.31807358218465975, "grad_norm": 0.14554647604095527, "learning_rate": 2e-05, "loss": 5.4988, "step": 4742 }, { "epoch": 0.3181406580138847, "grad_norm": 0.1511932394140102, "learning_rate": 2e-05, "loss": 5.4916, "step": 4743 }, { "epoch": 0.31820773384310963, "grad_norm": 0.14707892299115866, "learning_rate": 2e-05, "loss": 5.4415, "step": 4744 }, { "epoch": 0.31827480967233457, "grad_norm": 0.14467393200453876, "learning_rate": 2e-05, "loss": 5.5599, "step": 4745 }, { "epoch": 0.3183418855015595, "grad_norm": 0.15975680007363485, "learning_rate": 2e-05, "loss": 5.5861, "step": 4746 }, { "epoch": 0.31840896133078445, "grad_norm": 0.15137620633119026, "learning_rate": 2e-05, "loss": 5.4424, "step": 4747 }, { "epoch": 0.3184760371600094, "grad_norm": 0.15268109377579053, "learning_rate": 2e-05, "loss": 5.5097, "step": 4748 }, { "epoch": 0.3185431129892343, "grad_norm": 0.15046006544523788, "learning_rate": 2e-05, "loss": 5.4022, "step": 4749 }, { "epoch": 0.31861018881845926, "grad_norm": 0.1658988526593255, "learning_rate": 2e-05, "loss": 5.5396, "step": 4750 }, { "epoch": 0.3186772646476842, "grad_norm": 0.14491326978578156, "learning_rate": 2e-05, "loss": 5.3779, "step": 4751 }, { "epoch": 0.31874434047690914, "grad_norm": 0.15260638749243158, "learning_rate": 2e-05, "loss": 5.4634, "step": 4752 }, { "epoch": 0.3188114163061341, "grad_norm": 0.14933540160204478, "learning_rate": 2e-05, "loss": 5.5724, "step": 4753 }, { "epoch": 0.318878492135359, "grad_norm": 0.14962636880566121, "learning_rate": 2e-05, "loss": 5.4717, "step": 4754 }, { "epoch": 0.31894556796458395, "grad_norm": 0.14644842386667636, "learning_rate": 2e-05, "loss": 5.3352, "step": 4755 }, { "epoch": 0.3190126437938089, "grad_norm": 0.15026614217265347, "learning_rate": 2e-05, "loss": 5.4511, "step": 4756 }, { "epoch": 0.31907971962303383, "grad_norm": 0.14692957287857503, "learning_rate": 2e-05, "loss": 5.419, "step": 4757 }, { "epoch": 0.31914679545225877, "grad_norm": 0.14471908533492484, "learning_rate": 2e-05, "loss": 5.5519, "step": 4758 }, { "epoch": 0.3192138712814837, "grad_norm": 0.1491594873713398, "learning_rate": 2e-05, "loss": 5.399, "step": 4759 }, { "epoch": 0.31928094711070865, "grad_norm": 0.16606533319754455, "learning_rate": 2e-05, "loss": 5.4168, "step": 4760 }, { "epoch": 0.3193480229399336, "grad_norm": 0.14607620328453913, "learning_rate": 2e-05, "loss": 5.586, "step": 4761 }, { "epoch": 0.3194150987691585, "grad_norm": 0.14586120858903015, "learning_rate": 2e-05, "loss": 5.4502, "step": 4762 }, { "epoch": 0.31948217459838346, "grad_norm": 0.15819008715948688, "learning_rate": 2e-05, "loss": 5.4789, "step": 4763 }, { "epoch": 0.3195492504276084, "grad_norm": 0.14949134406021342, "learning_rate": 2e-05, "loss": 5.5142, "step": 4764 }, { "epoch": 0.31961632625683334, "grad_norm": 0.1466001625886673, "learning_rate": 2e-05, "loss": 5.3461, "step": 4765 }, { "epoch": 0.3196834020860583, "grad_norm": 0.1477185343348984, "learning_rate": 2e-05, "loss": 5.4547, "step": 4766 }, { "epoch": 0.3197504779152832, "grad_norm": 0.14969009617074489, "learning_rate": 2e-05, "loss": 5.3961, "step": 4767 }, { "epoch": 0.31981755374450815, "grad_norm": 0.1553312400981821, "learning_rate": 2e-05, "loss": 5.4109, "step": 4768 }, { "epoch": 0.3198846295737331, "grad_norm": 0.14465315410274657, "learning_rate": 2e-05, "loss": 5.4955, "step": 4769 }, { "epoch": 0.31995170540295803, "grad_norm": 0.16184405939010724, "learning_rate": 2e-05, "loss": 5.4794, "step": 4770 }, { "epoch": 0.32001878123218297, "grad_norm": 0.15251150252344112, "learning_rate": 2e-05, "loss": 5.2896, "step": 4771 }, { "epoch": 0.3200858570614079, "grad_norm": 0.15109039183776096, "learning_rate": 2e-05, "loss": 5.5683, "step": 4772 }, { "epoch": 0.32015293289063285, "grad_norm": 0.1449767853838985, "learning_rate": 2e-05, "loss": 5.4546, "step": 4773 }, { "epoch": 0.3202200087198578, "grad_norm": 0.14694110909330158, "learning_rate": 2e-05, "loss": 5.3911, "step": 4774 }, { "epoch": 0.3202870845490827, "grad_norm": 0.15683261424371475, "learning_rate": 2e-05, "loss": 5.5406, "step": 4775 }, { "epoch": 0.32035416037830766, "grad_norm": 0.14622637076989373, "learning_rate": 2e-05, "loss": 5.4904, "step": 4776 }, { "epoch": 0.3204212362075326, "grad_norm": 0.14315566670875357, "learning_rate": 2e-05, "loss": 5.5556, "step": 4777 }, { "epoch": 0.32048831203675754, "grad_norm": 0.1474608907668105, "learning_rate": 2e-05, "loss": 5.3365, "step": 4778 }, { "epoch": 0.3205553878659825, "grad_norm": 0.14853789818630225, "learning_rate": 2e-05, "loss": 5.5032, "step": 4779 }, { "epoch": 0.3206224636952074, "grad_norm": 0.15266675751804593, "learning_rate": 2e-05, "loss": 5.3744, "step": 4780 }, { "epoch": 0.32068953952443235, "grad_norm": 0.14667154761382814, "learning_rate": 2e-05, "loss": 5.4037, "step": 4781 }, { "epoch": 0.3207566153536573, "grad_norm": 0.14929532404162063, "learning_rate": 2e-05, "loss": 5.5752, "step": 4782 }, { "epoch": 0.32082369118288223, "grad_norm": 0.1486821931201904, "learning_rate": 2e-05, "loss": 5.3801, "step": 4783 }, { "epoch": 0.32089076701210717, "grad_norm": 0.14062531498184336, "learning_rate": 2e-05, "loss": 5.5166, "step": 4784 }, { "epoch": 0.3209578428413321, "grad_norm": 0.14236755053261024, "learning_rate": 2e-05, "loss": 5.4559, "step": 4785 }, { "epoch": 0.32102491867055705, "grad_norm": 0.14567562011579227, "learning_rate": 2e-05, "loss": 5.4372, "step": 4786 }, { "epoch": 0.321091994499782, "grad_norm": 0.14268656111256953, "learning_rate": 2e-05, "loss": 5.5666, "step": 4787 }, { "epoch": 0.3211590703290069, "grad_norm": 0.1482719639394982, "learning_rate": 2e-05, "loss": 5.4333, "step": 4788 }, { "epoch": 0.32122614615823186, "grad_norm": 0.14691233222852676, "learning_rate": 2e-05, "loss": 5.4586, "step": 4789 }, { "epoch": 0.3212932219874568, "grad_norm": 0.14033597358985855, "learning_rate": 2e-05, "loss": 5.3343, "step": 4790 }, { "epoch": 0.32136029781668174, "grad_norm": 0.14363998407166223, "learning_rate": 2e-05, "loss": 5.4079, "step": 4791 }, { "epoch": 0.3214273736459067, "grad_norm": 0.1432881892323226, "learning_rate": 2e-05, "loss": 5.4977, "step": 4792 }, { "epoch": 0.3214944494751316, "grad_norm": 0.15048344149161572, "learning_rate": 2e-05, "loss": 5.6211, "step": 4793 }, { "epoch": 0.32156152530435655, "grad_norm": 0.1421620678582059, "learning_rate": 2e-05, "loss": 5.4104, "step": 4794 }, { "epoch": 0.3216286011335815, "grad_norm": 0.14581668119721064, "learning_rate": 2e-05, "loss": 5.4768, "step": 4795 }, { "epoch": 0.32169567696280643, "grad_norm": 0.14136199042445885, "learning_rate": 2e-05, "loss": 5.5212, "step": 4796 }, { "epoch": 0.32176275279203137, "grad_norm": 0.1454030708012873, "learning_rate": 2e-05, "loss": 5.5339, "step": 4797 }, { "epoch": 0.3218298286212563, "grad_norm": 0.15106903827201995, "learning_rate": 2e-05, "loss": 5.4554, "step": 4798 }, { "epoch": 0.32189690445048125, "grad_norm": 0.1455465547486163, "learning_rate": 2e-05, "loss": 5.4455, "step": 4799 }, { "epoch": 0.3219639802797062, "grad_norm": 0.14988136149476178, "learning_rate": 2e-05, "loss": 5.3967, "step": 4800 }, { "epoch": 0.3220310561089311, "grad_norm": 0.15126855183560264, "learning_rate": 2e-05, "loss": 5.4331, "step": 4801 }, { "epoch": 0.32209813193815606, "grad_norm": 0.14620652027081413, "learning_rate": 2e-05, "loss": 5.4578, "step": 4802 }, { "epoch": 0.322165207767381, "grad_norm": 0.14730289169786262, "learning_rate": 2e-05, "loss": 5.3637, "step": 4803 }, { "epoch": 0.32223228359660594, "grad_norm": 0.15582217012477784, "learning_rate": 2e-05, "loss": 5.5836, "step": 4804 }, { "epoch": 0.3222993594258309, "grad_norm": 0.1487633828624673, "learning_rate": 2e-05, "loss": 5.3773, "step": 4805 }, { "epoch": 0.3223664352550558, "grad_norm": 0.14510647203747284, "learning_rate": 2e-05, "loss": 5.4049, "step": 4806 }, { "epoch": 0.32243351108428076, "grad_norm": 0.1496282338027695, "learning_rate": 2e-05, "loss": 5.3154, "step": 4807 }, { "epoch": 0.3225005869135057, "grad_norm": 0.14914789233926865, "learning_rate": 2e-05, "loss": 5.4238, "step": 4808 }, { "epoch": 0.32256766274273063, "grad_norm": 0.14940360591559043, "learning_rate": 2e-05, "loss": 5.4358, "step": 4809 }, { "epoch": 0.32263473857195557, "grad_norm": 0.15335938049795061, "learning_rate": 2e-05, "loss": 5.4302, "step": 4810 }, { "epoch": 0.3227018144011805, "grad_norm": 0.14506045573241302, "learning_rate": 2e-05, "loss": 5.4565, "step": 4811 }, { "epoch": 0.32276889023040545, "grad_norm": 0.14871601714412644, "learning_rate": 2e-05, "loss": 5.4624, "step": 4812 }, { "epoch": 0.3228359660596304, "grad_norm": 0.14765537786976948, "learning_rate": 2e-05, "loss": 5.5435, "step": 4813 }, { "epoch": 0.3229030418888553, "grad_norm": 0.14971927213614827, "learning_rate": 2e-05, "loss": 5.4723, "step": 4814 }, { "epoch": 0.32297011771808026, "grad_norm": 0.1429795603115765, "learning_rate": 2e-05, "loss": 5.3743, "step": 4815 }, { "epoch": 0.3230371935473052, "grad_norm": 0.14007582103198876, "learning_rate": 2e-05, "loss": 5.469, "step": 4816 }, { "epoch": 0.32310426937653014, "grad_norm": 0.152811926415283, "learning_rate": 2e-05, "loss": 5.6733, "step": 4817 }, { "epoch": 0.32317134520575513, "grad_norm": 0.15154827132119447, "learning_rate": 2e-05, "loss": 5.5133, "step": 4818 }, { "epoch": 0.3232384210349801, "grad_norm": 0.16652475018066876, "learning_rate": 2e-05, "loss": 5.5609, "step": 4819 }, { "epoch": 0.323305496864205, "grad_norm": 0.1625925691765137, "learning_rate": 2e-05, "loss": 5.5272, "step": 4820 }, { "epoch": 0.32337257269342995, "grad_norm": 0.15142228053819187, "learning_rate": 2e-05, "loss": 5.3194, "step": 4821 }, { "epoch": 0.3234396485226549, "grad_norm": 0.16102270190331786, "learning_rate": 2e-05, "loss": 5.4048, "step": 4822 }, { "epoch": 0.3235067243518798, "grad_norm": 0.16367187994916846, "learning_rate": 2e-05, "loss": 5.3721, "step": 4823 }, { "epoch": 0.32357380018110476, "grad_norm": 0.14454282646153518, "learning_rate": 2e-05, "loss": 5.4466, "step": 4824 }, { "epoch": 0.3236408760103297, "grad_norm": 0.15310675745523536, "learning_rate": 2e-05, "loss": 5.4402, "step": 4825 }, { "epoch": 0.32370795183955464, "grad_norm": 0.16102427002304626, "learning_rate": 2e-05, "loss": 5.4738, "step": 4826 }, { "epoch": 0.3237750276687796, "grad_norm": 0.15855364961893342, "learning_rate": 2e-05, "loss": 5.3506, "step": 4827 }, { "epoch": 0.3238421034980045, "grad_norm": 0.15253864334315728, "learning_rate": 2e-05, "loss": 5.343, "step": 4828 }, { "epoch": 0.32390917932722946, "grad_norm": 0.1553936645859567, "learning_rate": 2e-05, "loss": 5.4517, "step": 4829 }, { "epoch": 0.3239762551564544, "grad_norm": 0.15916161233210682, "learning_rate": 2e-05, "loss": 5.5743, "step": 4830 }, { "epoch": 0.32404333098567933, "grad_norm": 0.15115005205816967, "learning_rate": 2e-05, "loss": 5.488, "step": 4831 }, { "epoch": 0.3241104068149043, "grad_norm": 0.1554158188105201, "learning_rate": 2e-05, "loss": 5.4197, "step": 4832 }, { "epoch": 0.3241774826441292, "grad_norm": 0.15303457957679378, "learning_rate": 2e-05, "loss": 5.3729, "step": 4833 }, { "epoch": 0.32424455847335415, "grad_norm": 0.14623170214985606, "learning_rate": 2e-05, "loss": 5.3064, "step": 4834 }, { "epoch": 0.3243116343025791, "grad_norm": 0.1466786378747016, "learning_rate": 2e-05, "loss": 5.3937, "step": 4835 }, { "epoch": 0.324378710131804, "grad_norm": 0.1559167415693833, "learning_rate": 2e-05, "loss": 5.472, "step": 4836 }, { "epoch": 0.32444578596102897, "grad_norm": 0.1464503423821888, "learning_rate": 2e-05, "loss": 5.3334, "step": 4837 }, { "epoch": 0.3245128617902539, "grad_norm": 0.15580966582765746, "learning_rate": 2e-05, "loss": 5.6105, "step": 4838 }, { "epoch": 0.32457993761947884, "grad_norm": 0.15107655181583143, "learning_rate": 2e-05, "loss": 5.5347, "step": 4839 }, { "epoch": 0.3246470134487038, "grad_norm": 0.14920950325154939, "learning_rate": 2e-05, "loss": 5.397, "step": 4840 }, { "epoch": 0.3247140892779287, "grad_norm": 0.1521811313005385, "learning_rate": 2e-05, "loss": 5.4813, "step": 4841 }, { "epoch": 0.32478116510715366, "grad_norm": 0.15313905499000402, "learning_rate": 2e-05, "loss": 5.4505, "step": 4842 }, { "epoch": 0.3248482409363786, "grad_norm": 0.1465709951650294, "learning_rate": 2e-05, "loss": 5.5851, "step": 4843 }, { "epoch": 0.32491531676560353, "grad_norm": 0.14911324354493985, "learning_rate": 2e-05, "loss": 5.2937, "step": 4844 }, { "epoch": 0.3249823925948285, "grad_norm": 0.14973730755077483, "learning_rate": 2e-05, "loss": 5.539, "step": 4845 }, { "epoch": 0.3250494684240534, "grad_norm": 0.14632297264620814, "learning_rate": 2e-05, "loss": 5.486, "step": 4846 }, { "epoch": 0.32511654425327835, "grad_norm": 0.15194585941087044, "learning_rate": 2e-05, "loss": 5.4217, "step": 4847 }, { "epoch": 0.3251836200825033, "grad_norm": 0.15241222918971428, "learning_rate": 2e-05, "loss": 5.5148, "step": 4848 }, { "epoch": 0.3252506959117282, "grad_norm": 0.14737861749362985, "learning_rate": 2e-05, "loss": 5.4619, "step": 4849 }, { "epoch": 0.32531777174095317, "grad_norm": 0.14699167338104402, "learning_rate": 2e-05, "loss": 5.3436, "step": 4850 }, { "epoch": 0.3253848475701781, "grad_norm": 0.14921530344209905, "learning_rate": 2e-05, "loss": 5.3577, "step": 4851 }, { "epoch": 0.32545192339940304, "grad_norm": 0.14718435528823598, "learning_rate": 2e-05, "loss": 5.6717, "step": 4852 }, { "epoch": 0.325518999228628, "grad_norm": 0.15499913816501207, "learning_rate": 2e-05, "loss": 5.376, "step": 4853 }, { "epoch": 0.3255860750578529, "grad_norm": 0.15253677496831808, "learning_rate": 2e-05, "loss": 5.373, "step": 4854 }, { "epoch": 0.32565315088707786, "grad_norm": 0.16627481275503478, "learning_rate": 2e-05, "loss": 5.3885, "step": 4855 }, { "epoch": 0.3257202267163028, "grad_norm": 0.1520583471337725, "learning_rate": 2e-05, "loss": 5.5676, "step": 4856 }, { "epoch": 0.32578730254552773, "grad_norm": 0.14597748512585304, "learning_rate": 2e-05, "loss": 5.2946, "step": 4857 }, { "epoch": 0.3258543783747527, "grad_norm": 0.15442009027535508, "learning_rate": 2e-05, "loss": 5.456, "step": 4858 }, { "epoch": 0.3259214542039776, "grad_norm": 0.1570651840626258, "learning_rate": 2e-05, "loss": 5.3472, "step": 4859 }, { "epoch": 0.32598853003320255, "grad_norm": 0.14789565018701195, "learning_rate": 2e-05, "loss": 5.3773, "step": 4860 }, { "epoch": 0.3260556058624275, "grad_norm": 0.15644992158727256, "learning_rate": 2e-05, "loss": 5.4177, "step": 4861 }, { "epoch": 0.3261226816916524, "grad_norm": 0.1608760691965378, "learning_rate": 2e-05, "loss": 5.5442, "step": 4862 }, { "epoch": 0.32618975752087737, "grad_norm": 0.15134519174996106, "learning_rate": 2e-05, "loss": 5.4859, "step": 4863 }, { "epoch": 0.3262568333501023, "grad_norm": 0.1477824585246446, "learning_rate": 2e-05, "loss": 5.4099, "step": 4864 }, { "epoch": 0.32632390917932724, "grad_norm": 0.16272921804413215, "learning_rate": 2e-05, "loss": 5.413, "step": 4865 }, { "epoch": 0.3263909850085522, "grad_norm": 0.15041546324645658, "learning_rate": 2e-05, "loss": 5.3017, "step": 4866 }, { "epoch": 0.3264580608377771, "grad_norm": 0.1479048988365385, "learning_rate": 2e-05, "loss": 5.5009, "step": 4867 }, { "epoch": 0.32652513666700206, "grad_norm": 0.16496502703298468, "learning_rate": 2e-05, "loss": 5.4904, "step": 4868 }, { "epoch": 0.326592212496227, "grad_norm": 0.1623687571407885, "learning_rate": 2e-05, "loss": 5.4951, "step": 4869 }, { "epoch": 0.32665928832545194, "grad_norm": 0.15266573526881952, "learning_rate": 2e-05, "loss": 5.3608, "step": 4870 }, { "epoch": 0.3267263641546769, "grad_norm": 0.15024551658641463, "learning_rate": 2e-05, "loss": 5.499, "step": 4871 }, { "epoch": 0.3267934399839018, "grad_norm": 0.15796720590133523, "learning_rate": 2e-05, "loss": 5.4481, "step": 4872 }, { "epoch": 0.32686051581312675, "grad_norm": 0.15298261698983362, "learning_rate": 2e-05, "loss": 5.3727, "step": 4873 }, { "epoch": 0.3269275916423517, "grad_norm": 0.14747903140178706, "learning_rate": 2e-05, "loss": 5.375, "step": 4874 }, { "epoch": 0.3269946674715766, "grad_norm": 0.1549224130383679, "learning_rate": 2e-05, "loss": 5.4119, "step": 4875 }, { "epoch": 0.32706174330080157, "grad_norm": 0.15378800804499815, "learning_rate": 2e-05, "loss": 5.3748, "step": 4876 }, { "epoch": 0.3271288191300265, "grad_norm": 0.1464331346652091, "learning_rate": 2e-05, "loss": 5.534, "step": 4877 }, { "epoch": 0.32719589495925144, "grad_norm": 0.1603634574953315, "learning_rate": 2e-05, "loss": 5.3508, "step": 4878 }, { "epoch": 0.3272629707884764, "grad_norm": 0.1601791384508407, "learning_rate": 2e-05, "loss": 5.3508, "step": 4879 }, { "epoch": 0.3273300466177013, "grad_norm": 0.15149161469502975, "learning_rate": 2e-05, "loss": 5.4547, "step": 4880 }, { "epoch": 0.32739712244692626, "grad_norm": 0.15526259976898338, "learning_rate": 2e-05, "loss": 5.3564, "step": 4881 }, { "epoch": 0.3274641982761512, "grad_norm": 0.1437210514469127, "learning_rate": 2e-05, "loss": 5.5054, "step": 4882 }, { "epoch": 0.32753127410537614, "grad_norm": 0.14867695342662923, "learning_rate": 2e-05, "loss": 5.5627, "step": 4883 }, { "epoch": 0.3275983499346011, "grad_norm": 0.15084129484228181, "learning_rate": 2e-05, "loss": 5.4474, "step": 4884 }, { "epoch": 0.327665425763826, "grad_norm": 0.14889543516038425, "learning_rate": 2e-05, "loss": 5.4551, "step": 4885 }, { "epoch": 0.32773250159305095, "grad_norm": 0.15551330624893006, "learning_rate": 2e-05, "loss": 5.4261, "step": 4886 }, { "epoch": 0.3277995774222759, "grad_norm": 0.1447478114061947, "learning_rate": 2e-05, "loss": 5.4297, "step": 4887 }, { "epoch": 0.32786665325150083, "grad_norm": 0.15084496870106934, "learning_rate": 2e-05, "loss": 5.4294, "step": 4888 }, { "epoch": 0.32793372908072577, "grad_norm": 0.15374740906040166, "learning_rate": 2e-05, "loss": 5.3296, "step": 4889 }, { "epoch": 0.3280008049099507, "grad_norm": 0.1493727587292466, "learning_rate": 2e-05, "loss": 5.5519, "step": 4890 }, { "epoch": 0.32806788073917564, "grad_norm": 0.14955456600521946, "learning_rate": 2e-05, "loss": 5.3655, "step": 4891 }, { "epoch": 0.3281349565684006, "grad_norm": 0.15804807604157378, "learning_rate": 2e-05, "loss": 5.5491, "step": 4892 }, { "epoch": 0.3282020323976255, "grad_norm": 0.15204583204744546, "learning_rate": 2e-05, "loss": 5.3161, "step": 4893 }, { "epoch": 0.32826910822685046, "grad_norm": 0.1603535604499304, "learning_rate": 2e-05, "loss": 5.45, "step": 4894 }, { "epoch": 0.3283361840560754, "grad_norm": 0.1537690131588415, "learning_rate": 2e-05, "loss": 5.3966, "step": 4895 }, { "epoch": 0.32840325988530034, "grad_norm": 0.15015266451826612, "learning_rate": 2e-05, "loss": 5.3603, "step": 4896 }, { "epoch": 0.3284703357145253, "grad_norm": 0.16490466382144775, "learning_rate": 2e-05, "loss": 5.4114, "step": 4897 }, { "epoch": 0.3285374115437502, "grad_norm": 0.1487291331062549, "learning_rate": 2e-05, "loss": 5.4621, "step": 4898 }, { "epoch": 0.32860448737297515, "grad_norm": 0.15158323681699107, "learning_rate": 2e-05, "loss": 5.4702, "step": 4899 }, { "epoch": 0.3286715632022001, "grad_norm": 0.1527243872167189, "learning_rate": 2e-05, "loss": 5.3573, "step": 4900 }, { "epoch": 0.32873863903142503, "grad_norm": 0.1586032242564745, "learning_rate": 2e-05, "loss": 5.3158, "step": 4901 }, { "epoch": 0.32880571486064997, "grad_norm": 0.15546112987709979, "learning_rate": 2e-05, "loss": 5.4445, "step": 4902 }, { "epoch": 0.3288727906898749, "grad_norm": 0.15208789535982745, "learning_rate": 2e-05, "loss": 5.4297, "step": 4903 }, { "epoch": 0.32893986651909984, "grad_norm": 0.15344555987248673, "learning_rate": 2e-05, "loss": 5.3794, "step": 4904 }, { "epoch": 0.3290069423483248, "grad_norm": 0.1447745800852566, "learning_rate": 2e-05, "loss": 5.4708, "step": 4905 }, { "epoch": 0.3290740181775497, "grad_norm": 0.16700366368168737, "learning_rate": 2e-05, "loss": 5.5159, "step": 4906 }, { "epoch": 0.32914109400677466, "grad_norm": 0.13672254896418126, "learning_rate": 2e-05, "loss": 5.3821, "step": 4907 }, { "epoch": 0.3292081698359996, "grad_norm": 0.14452504643997985, "learning_rate": 2e-05, "loss": 5.4799, "step": 4908 }, { "epoch": 0.32927524566522454, "grad_norm": 0.1504504370855213, "learning_rate": 2e-05, "loss": 5.5415, "step": 4909 }, { "epoch": 0.3293423214944495, "grad_norm": 0.14947634586000694, "learning_rate": 2e-05, "loss": 5.3722, "step": 4910 }, { "epoch": 0.3294093973236744, "grad_norm": 0.1438055020821984, "learning_rate": 2e-05, "loss": 5.4617, "step": 4911 }, { "epoch": 0.32947647315289935, "grad_norm": 0.15095805781385754, "learning_rate": 2e-05, "loss": 5.3854, "step": 4912 }, { "epoch": 0.3295435489821243, "grad_norm": 0.1524013112972752, "learning_rate": 2e-05, "loss": 5.515, "step": 4913 }, { "epoch": 0.32961062481134923, "grad_norm": 0.15092124662112139, "learning_rate": 2e-05, "loss": 5.2374, "step": 4914 }, { "epoch": 0.32967770064057417, "grad_norm": 0.15149304660865387, "learning_rate": 2e-05, "loss": 5.5233, "step": 4915 }, { "epoch": 0.3297447764697991, "grad_norm": 0.14506842477953535, "learning_rate": 2e-05, "loss": 5.313, "step": 4916 }, { "epoch": 0.32981185229902404, "grad_norm": 0.14642296784713754, "learning_rate": 2e-05, "loss": 5.6227, "step": 4917 }, { "epoch": 0.329878928128249, "grad_norm": 0.1581759764406734, "learning_rate": 2e-05, "loss": 5.3536, "step": 4918 }, { "epoch": 0.3299460039574739, "grad_norm": 0.1466015280249873, "learning_rate": 2e-05, "loss": 5.7307, "step": 4919 }, { "epoch": 0.33001307978669886, "grad_norm": 0.14369858767757251, "learning_rate": 2e-05, "loss": 5.3318, "step": 4920 }, { "epoch": 0.3300801556159238, "grad_norm": 0.14950512662522442, "learning_rate": 2e-05, "loss": 5.4532, "step": 4921 }, { "epoch": 0.33014723144514874, "grad_norm": 0.15335723440485768, "learning_rate": 2e-05, "loss": 5.5117, "step": 4922 }, { "epoch": 0.3302143072743737, "grad_norm": 0.14728242672890066, "learning_rate": 2e-05, "loss": 5.4316, "step": 4923 }, { "epoch": 0.3302813831035986, "grad_norm": 0.15779964111075048, "learning_rate": 2e-05, "loss": 5.368, "step": 4924 }, { "epoch": 0.33034845893282355, "grad_norm": 0.15917094856818972, "learning_rate": 2e-05, "loss": 5.3708, "step": 4925 }, { "epoch": 0.3304155347620485, "grad_norm": 0.1570397447852906, "learning_rate": 2e-05, "loss": 5.5082, "step": 4926 }, { "epoch": 0.33048261059127343, "grad_norm": 0.15361510983801951, "learning_rate": 2e-05, "loss": 5.2906, "step": 4927 }, { "epoch": 0.33054968642049837, "grad_norm": 0.14771106042281923, "learning_rate": 2e-05, "loss": 5.4131, "step": 4928 }, { "epoch": 0.3306167622497233, "grad_norm": 0.14288778698684046, "learning_rate": 2e-05, "loss": 5.3912, "step": 4929 }, { "epoch": 0.33068383807894824, "grad_norm": 0.16217206589541752, "learning_rate": 2e-05, "loss": 5.346, "step": 4930 }, { "epoch": 0.3307509139081732, "grad_norm": 0.1532795907629775, "learning_rate": 2e-05, "loss": 5.5483, "step": 4931 }, { "epoch": 0.3308179897373981, "grad_norm": 0.14755566209017612, "learning_rate": 2e-05, "loss": 5.3428, "step": 4932 }, { "epoch": 0.33088506556662306, "grad_norm": 0.15302190426729573, "learning_rate": 2e-05, "loss": 5.411, "step": 4933 }, { "epoch": 0.330952141395848, "grad_norm": 0.15829028014942445, "learning_rate": 2e-05, "loss": 5.2906, "step": 4934 }, { "epoch": 0.33101921722507294, "grad_norm": 0.15157103143597975, "learning_rate": 2e-05, "loss": 5.5845, "step": 4935 }, { "epoch": 0.3310862930542979, "grad_norm": 0.15162863692490666, "learning_rate": 2e-05, "loss": 5.3532, "step": 4936 }, { "epoch": 0.3311533688835228, "grad_norm": 0.1532964949218231, "learning_rate": 2e-05, "loss": 5.5429, "step": 4937 }, { "epoch": 0.33122044471274775, "grad_norm": 0.14313243243182644, "learning_rate": 2e-05, "loss": 5.3481, "step": 4938 }, { "epoch": 0.3312875205419727, "grad_norm": 0.15005477530623978, "learning_rate": 2e-05, "loss": 5.4408, "step": 4939 }, { "epoch": 0.33135459637119763, "grad_norm": 0.15320504116142544, "learning_rate": 2e-05, "loss": 5.3913, "step": 4940 }, { "epoch": 0.33142167220042257, "grad_norm": 0.15112906135479812, "learning_rate": 2e-05, "loss": 5.314, "step": 4941 }, { "epoch": 0.3314887480296475, "grad_norm": 0.15181364599303251, "learning_rate": 2e-05, "loss": 5.5866, "step": 4942 }, { "epoch": 0.33155582385887244, "grad_norm": 0.16604673590594168, "learning_rate": 2e-05, "loss": 5.4114, "step": 4943 }, { "epoch": 0.3316228996880974, "grad_norm": 0.15145363424491115, "learning_rate": 2e-05, "loss": 5.4415, "step": 4944 }, { "epoch": 0.3316899755173223, "grad_norm": 0.13881088136974254, "learning_rate": 2e-05, "loss": 5.3896, "step": 4945 }, { "epoch": 0.33175705134654726, "grad_norm": 0.15263046131887917, "learning_rate": 2e-05, "loss": 5.4884, "step": 4946 }, { "epoch": 0.3318241271757722, "grad_norm": 0.15458347920415602, "learning_rate": 2e-05, "loss": 5.335, "step": 4947 }, { "epoch": 0.33189120300499714, "grad_norm": 0.15161263376761253, "learning_rate": 2e-05, "loss": 5.3876, "step": 4948 }, { "epoch": 0.3319582788342221, "grad_norm": 0.16150234977754466, "learning_rate": 2e-05, "loss": 5.2274, "step": 4949 }, { "epoch": 0.332025354663447, "grad_norm": 0.1576377745233706, "learning_rate": 2e-05, "loss": 5.4598, "step": 4950 }, { "epoch": 0.33209243049267195, "grad_norm": 0.14322849080906128, "learning_rate": 2e-05, "loss": 5.3984, "step": 4951 }, { "epoch": 0.3321595063218969, "grad_norm": 0.1490021641208481, "learning_rate": 2e-05, "loss": 5.3603, "step": 4952 }, { "epoch": 0.33222658215112183, "grad_norm": 0.15354156082442474, "learning_rate": 2e-05, "loss": 5.4984, "step": 4953 }, { "epoch": 0.33229365798034677, "grad_norm": 0.1536383356974661, "learning_rate": 2e-05, "loss": 5.323, "step": 4954 }, { "epoch": 0.3323607338095717, "grad_norm": 0.14808021268218938, "learning_rate": 2e-05, "loss": 5.3162, "step": 4955 }, { "epoch": 0.33242780963879665, "grad_norm": 0.14900601076435924, "learning_rate": 2e-05, "loss": 5.349, "step": 4956 }, { "epoch": 0.3324948854680216, "grad_norm": 0.15024403248768345, "learning_rate": 2e-05, "loss": 5.5121, "step": 4957 }, { "epoch": 0.3325619612972465, "grad_norm": 0.1461424633271232, "learning_rate": 2e-05, "loss": 5.4206, "step": 4958 }, { "epoch": 0.33262903712647146, "grad_norm": 0.15650437885931862, "learning_rate": 2e-05, "loss": 5.4179, "step": 4959 }, { "epoch": 0.3326961129556964, "grad_norm": 0.15007645104289513, "learning_rate": 2e-05, "loss": 5.4365, "step": 4960 }, { "epoch": 0.33276318878492134, "grad_norm": 0.14683110143038222, "learning_rate": 2e-05, "loss": 5.4525, "step": 4961 }, { "epoch": 0.3328302646141463, "grad_norm": 0.15661969774289697, "learning_rate": 2e-05, "loss": 5.3209, "step": 4962 }, { "epoch": 0.3328973404433712, "grad_norm": 0.1583038759251261, "learning_rate": 2e-05, "loss": 5.4796, "step": 4963 }, { "epoch": 0.33296441627259615, "grad_norm": 0.14664745805358984, "learning_rate": 2e-05, "loss": 5.4342, "step": 4964 }, { "epoch": 0.3330314921018211, "grad_norm": 0.1489718735058706, "learning_rate": 2e-05, "loss": 5.404, "step": 4965 }, { "epoch": 0.33309856793104603, "grad_norm": 0.14126125186740965, "learning_rate": 2e-05, "loss": 5.5438, "step": 4966 }, { "epoch": 0.33316564376027097, "grad_norm": 0.15321073295001209, "learning_rate": 2e-05, "loss": 5.4212, "step": 4967 }, { "epoch": 0.3332327195894959, "grad_norm": 0.14393108242375746, "learning_rate": 2e-05, "loss": 5.4771, "step": 4968 }, { "epoch": 0.33329979541872085, "grad_norm": 0.14909762890709685, "learning_rate": 2e-05, "loss": 5.476, "step": 4969 }, { "epoch": 0.3333668712479458, "grad_norm": 0.14495631012481533, "learning_rate": 2e-05, "loss": 5.3794, "step": 4970 }, { "epoch": 0.3334339470771707, "grad_norm": 0.15022708991161957, "learning_rate": 2e-05, "loss": 5.5411, "step": 4971 }, { "epoch": 0.33350102290639566, "grad_norm": 0.14458705809847766, "learning_rate": 2e-05, "loss": 5.3376, "step": 4972 }, { "epoch": 0.3335680987356206, "grad_norm": 0.14172222981178004, "learning_rate": 2e-05, "loss": 5.4787, "step": 4973 }, { "epoch": 0.33363517456484554, "grad_norm": 0.14428962914230242, "learning_rate": 2e-05, "loss": 5.2374, "step": 4974 }, { "epoch": 0.3337022503940705, "grad_norm": 0.14515079274135811, "learning_rate": 2e-05, "loss": 5.4342, "step": 4975 }, { "epoch": 0.3337693262232954, "grad_norm": 0.14319913897522119, "learning_rate": 2e-05, "loss": 5.3968, "step": 4976 }, { "epoch": 0.33383640205252035, "grad_norm": 0.14474456420284734, "learning_rate": 2e-05, "loss": 5.4133, "step": 4977 }, { "epoch": 0.3339034778817453, "grad_norm": 0.1491796631745555, "learning_rate": 2e-05, "loss": 5.3199, "step": 4978 }, { "epoch": 0.33397055371097023, "grad_norm": 0.14527875385390626, "learning_rate": 2e-05, "loss": 5.3612, "step": 4979 }, { "epoch": 0.33403762954019517, "grad_norm": 0.14267925447638172, "learning_rate": 2e-05, "loss": 5.3851, "step": 4980 }, { "epoch": 0.3341047053694201, "grad_norm": 0.14504510686271152, "learning_rate": 2e-05, "loss": 5.4632, "step": 4981 }, { "epoch": 0.33417178119864505, "grad_norm": 0.14579766709189299, "learning_rate": 2e-05, "loss": 5.5059, "step": 4982 }, { "epoch": 0.33423885702787, "grad_norm": 0.1418748241572961, "learning_rate": 2e-05, "loss": 5.5389, "step": 4983 }, { "epoch": 0.3343059328570949, "grad_norm": 0.14652261344353687, "learning_rate": 2e-05, "loss": 5.5008, "step": 4984 }, { "epoch": 0.33437300868631986, "grad_norm": 0.15187027888096688, "learning_rate": 2e-05, "loss": 5.3831, "step": 4985 }, { "epoch": 0.3344400845155448, "grad_norm": 0.148567254921888, "learning_rate": 2e-05, "loss": 5.5144, "step": 4986 }, { "epoch": 0.33450716034476974, "grad_norm": 0.1387293717880951, "learning_rate": 2e-05, "loss": 5.4138, "step": 4987 }, { "epoch": 0.3345742361739947, "grad_norm": 0.13972123444970388, "learning_rate": 2e-05, "loss": 5.6487, "step": 4988 }, { "epoch": 0.3346413120032196, "grad_norm": 0.1549756640709451, "learning_rate": 2e-05, "loss": 5.3428, "step": 4989 }, { "epoch": 0.33470838783244455, "grad_norm": 0.14460314065961194, "learning_rate": 2e-05, "loss": 5.4511, "step": 4990 }, { "epoch": 0.3347754636616695, "grad_norm": 0.14568392348800133, "learning_rate": 2e-05, "loss": 5.3661, "step": 4991 }, { "epoch": 0.33484253949089443, "grad_norm": 0.15775530389517214, "learning_rate": 2e-05, "loss": 5.5294, "step": 4992 }, { "epoch": 0.33490961532011937, "grad_norm": 0.14623930618416095, "learning_rate": 2e-05, "loss": 5.563, "step": 4993 }, { "epoch": 0.3349766911493443, "grad_norm": 0.14861483843638745, "learning_rate": 2e-05, "loss": 5.4286, "step": 4994 }, { "epoch": 0.33504376697856925, "grad_norm": 0.14132549979065503, "learning_rate": 2e-05, "loss": 5.5107, "step": 4995 }, { "epoch": 0.3351108428077942, "grad_norm": 0.15705301663691884, "learning_rate": 2e-05, "loss": 5.5543, "step": 4996 }, { "epoch": 0.3351779186370191, "grad_norm": 0.14995392048119274, "learning_rate": 2e-05, "loss": 5.39, "step": 4997 }, { "epoch": 0.33524499446624406, "grad_norm": 0.14673390784312393, "learning_rate": 2e-05, "loss": 5.4117, "step": 4998 }, { "epoch": 0.335312070295469, "grad_norm": 0.1521567390380464, "learning_rate": 2e-05, "loss": 5.6023, "step": 4999 }, { "epoch": 0.335379146124694, "grad_norm": 0.14767372840167964, "learning_rate": 2e-05, "loss": 5.4305, "step": 5000 }, { "epoch": 0.33544622195391893, "grad_norm": 0.14824962194192806, "learning_rate": 2e-05, "loss": 5.4797, "step": 5001 }, { "epoch": 0.33551329778314387, "grad_norm": 0.14877878744279055, "learning_rate": 2e-05, "loss": 5.378, "step": 5002 }, { "epoch": 0.3355803736123688, "grad_norm": 0.15415716757625247, "learning_rate": 2e-05, "loss": 5.4589, "step": 5003 }, { "epoch": 0.33564744944159375, "grad_norm": 0.14554973182504086, "learning_rate": 2e-05, "loss": 5.456, "step": 5004 }, { "epoch": 0.3357145252708187, "grad_norm": 0.1493642836268805, "learning_rate": 2e-05, "loss": 5.3014, "step": 5005 }, { "epoch": 0.3357816011000436, "grad_norm": 0.1485094523771149, "learning_rate": 2e-05, "loss": 5.4269, "step": 5006 }, { "epoch": 0.33584867692926856, "grad_norm": 0.14556565883056555, "learning_rate": 2e-05, "loss": 5.3716, "step": 5007 }, { "epoch": 0.3359157527584935, "grad_norm": 0.1431052067349577, "learning_rate": 2e-05, "loss": 5.4625, "step": 5008 }, { "epoch": 0.33598282858771844, "grad_norm": 0.1492047053114808, "learning_rate": 2e-05, "loss": 5.4874, "step": 5009 }, { "epoch": 0.3360499044169434, "grad_norm": 0.14514087373012474, "learning_rate": 2e-05, "loss": 5.3163, "step": 5010 }, { "epoch": 0.3361169802461683, "grad_norm": 0.15200541244578164, "learning_rate": 2e-05, "loss": 5.4649, "step": 5011 }, { "epoch": 0.33618405607539326, "grad_norm": 0.13969286625557828, "learning_rate": 2e-05, "loss": 5.4167, "step": 5012 }, { "epoch": 0.3362511319046182, "grad_norm": 0.14555934913253313, "learning_rate": 2e-05, "loss": 5.3767, "step": 5013 }, { "epoch": 0.33631820773384313, "grad_norm": 0.14367843444935469, "learning_rate": 2e-05, "loss": 5.4789, "step": 5014 }, { "epoch": 0.33638528356306807, "grad_norm": 0.14357279506947693, "learning_rate": 2e-05, "loss": 5.3924, "step": 5015 }, { "epoch": 0.336452359392293, "grad_norm": 0.1365281811335518, "learning_rate": 2e-05, "loss": 5.48, "step": 5016 }, { "epoch": 0.33651943522151795, "grad_norm": 0.14741406147724712, "learning_rate": 2e-05, "loss": 5.4845, "step": 5017 }, { "epoch": 0.3365865110507429, "grad_norm": 0.14189689386419613, "learning_rate": 2e-05, "loss": 5.5267, "step": 5018 }, { "epoch": 0.3366535868799678, "grad_norm": 0.14681574712704917, "learning_rate": 2e-05, "loss": 5.4156, "step": 5019 }, { "epoch": 0.33672066270919276, "grad_norm": 0.1477096521882693, "learning_rate": 2e-05, "loss": 5.5242, "step": 5020 }, { "epoch": 0.3367877385384177, "grad_norm": 0.1505791829080233, "learning_rate": 2e-05, "loss": 5.4237, "step": 5021 }, { "epoch": 0.33685481436764264, "grad_norm": 0.14989309802479092, "learning_rate": 2e-05, "loss": 5.4241, "step": 5022 }, { "epoch": 0.3369218901968676, "grad_norm": 0.14229141917951757, "learning_rate": 2e-05, "loss": 5.4733, "step": 5023 }, { "epoch": 0.3369889660260925, "grad_norm": 0.14543049873946765, "learning_rate": 2e-05, "loss": 5.3328, "step": 5024 }, { "epoch": 0.33705604185531746, "grad_norm": 0.15101729833307725, "learning_rate": 2e-05, "loss": 5.5538, "step": 5025 }, { "epoch": 0.3371231176845424, "grad_norm": 0.14748389433395945, "learning_rate": 2e-05, "loss": 5.3984, "step": 5026 }, { "epoch": 0.33719019351376733, "grad_norm": 0.14136112746819238, "learning_rate": 2e-05, "loss": 5.3058, "step": 5027 }, { "epoch": 0.33725726934299227, "grad_norm": 0.14540530396121973, "learning_rate": 2e-05, "loss": 5.4253, "step": 5028 }, { "epoch": 0.3373243451722172, "grad_norm": 0.15291602326456266, "learning_rate": 2e-05, "loss": 5.3431, "step": 5029 }, { "epoch": 0.33739142100144215, "grad_norm": 0.14313508984520887, "learning_rate": 2e-05, "loss": 5.4755, "step": 5030 }, { "epoch": 0.3374584968306671, "grad_norm": 0.14927963314633153, "learning_rate": 2e-05, "loss": 5.185, "step": 5031 }, { "epoch": 0.337525572659892, "grad_norm": 0.15513212878281346, "learning_rate": 2e-05, "loss": 5.3124, "step": 5032 }, { "epoch": 0.33759264848911696, "grad_norm": 0.14636638004395433, "learning_rate": 2e-05, "loss": 5.3366, "step": 5033 }, { "epoch": 0.3376597243183419, "grad_norm": 0.14909610356472933, "learning_rate": 2e-05, "loss": 5.5983, "step": 5034 }, { "epoch": 0.33772680014756684, "grad_norm": 0.14992893648937955, "learning_rate": 2e-05, "loss": 5.3666, "step": 5035 }, { "epoch": 0.3377938759767918, "grad_norm": 0.14821226747375255, "learning_rate": 2e-05, "loss": 5.3006, "step": 5036 }, { "epoch": 0.3378609518060167, "grad_norm": 0.1483688164402614, "learning_rate": 2e-05, "loss": 5.4773, "step": 5037 }, { "epoch": 0.33792802763524166, "grad_norm": 0.15691095802720081, "learning_rate": 2e-05, "loss": 5.4189, "step": 5038 }, { "epoch": 0.3379951034644666, "grad_norm": 0.14644764213583966, "learning_rate": 2e-05, "loss": 5.399, "step": 5039 }, { "epoch": 0.33806217929369153, "grad_norm": 0.14503174818408496, "learning_rate": 2e-05, "loss": 5.4705, "step": 5040 }, { "epoch": 0.33812925512291647, "grad_norm": 0.15205580924210266, "learning_rate": 2e-05, "loss": 5.49, "step": 5041 }, { "epoch": 0.3381963309521414, "grad_norm": 0.15309680595084088, "learning_rate": 2e-05, "loss": 5.5097, "step": 5042 }, { "epoch": 0.33826340678136635, "grad_norm": 0.15107317583840355, "learning_rate": 2e-05, "loss": 5.4355, "step": 5043 }, { "epoch": 0.3383304826105913, "grad_norm": 0.15677018526578623, "learning_rate": 2e-05, "loss": 5.4304, "step": 5044 }, { "epoch": 0.3383975584398162, "grad_norm": 0.14535874965084616, "learning_rate": 2e-05, "loss": 5.449, "step": 5045 }, { "epoch": 0.33846463426904116, "grad_norm": 0.15678572080709954, "learning_rate": 2e-05, "loss": 5.3081, "step": 5046 }, { "epoch": 0.3385317100982661, "grad_norm": 0.14317364867309867, "learning_rate": 2e-05, "loss": 5.4476, "step": 5047 }, { "epoch": 0.33859878592749104, "grad_norm": 0.15384037235000783, "learning_rate": 2e-05, "loss": 5.5361, "step": 5048 }, { "epoch": 0.338665861756716, "grad_norm": 0.15514704007007674, "learning_rate": 2e-05, "loss": 5.4407, "step": 5049 }, { "epoch": 0.3387329375859409, "grad_norm": 0.1537363737958236, "learning_rate": 2e-05, "loss": 5.4759, "step": 5050 }, { "epoch": 0.33880001341516586, "grad_norm": 0.14866399911132988, "learning_rate": 2e-05, "loss": 5.4886, "step": 5051 }, { "epoch": 0.3388670892443908, "grad_norm": 0.14764895178477416, "learning_rate": 2e-05, "loss": 5.4269, "step": 5052 }, { "epoch": 0.33893416507361573, "grad_norm": 0.1528396259523453, "learning_rate": 2e-05, "loss": 5.5595, "step": 5053 }, { "epoch": 0.3390012409028407, "grad_norm": 0.15076720498602397, "learning_rate": 2e-05, "loss": 5.4098, "step": 5054 }, { "epoch": 0.3390683167320656, "grad_norm": 0.14771899224803817, "learning_rate": 2e-05, "loss": 5.4248, "step": 5055 }, { "epoch": 0.33913539256129055, "grad_norm": 0.14164189609670205, "learning_rate": 2e-05, "loss": 5.4372, "step": 5056 }, { "epoch": 0.3392024683905155, "grad_norm": 0.1465670950394505, "learning_rate": 2e-05, "loss": 5.4566, "step": 5057 }, { "epoch": 0.3392695442197404, "grad_norm": 0.16751985006456613, "learning_rate": 2e-05, "loss": 5.3737, "step": 5058 }, { "epoch": 0.33933662004896537, "grad_norm": 0.15219993090641884, "learning_rate": 2e-05, "loss": 5.5595, "step": 5059 }, { "epoch": 0.3394036958781903, "grad_norm": 0.14815377771092478, "learning_rate": 2e-05, "loss": 5.462, "step": 5060 }, { "epoch": 0.33947077170741524, "grad_norm": 0.14071685835906506, "learning_rate": 2e-05, "loss": 5.333, "step": 5061 }, { "epoch": 0.3395378475366402, "grad_norm": 0.15225623560302795, "learning_rate": 2e-05, "loss": 5.5012, "step": 5062 }, { "epoch": 0.3396049233658651, "grad_norm": 0.14522381742278026, "learning_rate": 2e-05, "loss": 5.386, "step": 5063 }, { "epoch": 0.33967199919509006, "grad_norm": 0.14571425962341106, "learning_rate": 2e-05, "loss": 5.3564, "step": 5064 }, { "epoch": 0.339739075024315, "grad_norm": 0.14425833531598134, "learning_rate": 2e-05, "loss": 5.4685, "step": 5065 }, { "epoch": 0.33980615085353993, "grad_norm": 0.14899106045045124, "learning_rate": 2e-05, "loss": 5.3626, "step": 5066 }, { "epoch": 0.3398732266827649, "grad_norm": 0.1455809637009877, "learning_rate": 2e-05, "loss": 5.3617, "step": 5067 }, { "epoch": 0.3399403025119898, "grad_norm": 0.14641716696758614, "learning_rate": 2e-05, "loss": 5.4307, "step": 5068 }, { "epoch": 0.34000737834121475, "grad_norm": 0.160679892603039, "learning_rate": 2e-05, "loss": 5.3794, "step": 5069 }, { "epoch": 0.3400744541704397, "grad_norm": 0.15284986917423876, "learning_rate": 2e-05, "loss": 5.4404, "step": 5070 }, { "epoch": 0.3401415299996646, "grad_norm": 0.14810192522655632, "learning_rate": 2e-05, "loss": 5.4329, "step": 5071 }, { "epoch": 0.34020860582888957, "grad_norm": 0.14716895700969587, "learning_rate": 2e-05, "loss": 5.4477, "step": 5072 }, { "epoch": 0.3402756816581145, "grad_norm": 0.14654399690091047, "learning_rate": 2e-05, "loss": 5.4667, "step": 5073 }, { "epoch": 0.34034275748733944, "grad_norm": 0.1482924774762828, "learning_rate": 2e-05, "loss": 5.5026, "step": 5074 }, { "epoch": 0.3404098333165644, "grad_norm": 0.1442974232296339, "learning_rate": 2e-05, "loss": 5.4355, "step": 5075 }, { "epoch": 0.3404769091457893, "grad_norm": 0.15169316940290978, "learning_rate": 2e-05, "loss": 5.3812, "step": 5076 }, { "epoch": 0.34054398497501426, "grad_norm": 0.14832829417408497, "learning_rate": 2e-05, "loss": 5.4387, "step": 5077 }, { "epoch": 0.3406110608042392, "grad_norm": 0.14883942028416625, "learning_rate": 2e-05, "loss": 5.3984, "step": 5078 }, { "epoch": 0.34067813663346413, "grad_norm": 0.1417248409493068, "learning_rate": 2e-05, "loss": 5.3005, "step": 5079 }, { "epoch": 0.3407452124626891, "grad_norm": 0.14230087649820689, "learning_rate": 2e-05, "loss": 5.4334, "step": 5080 }, { "epoch": 0.340812288291914, "grad_norm": 0.15299695140911382, "learning_rate": 2e-05, "loss": 5.3335, "step": 5081 }, { "epoch": 0.34087936412113895, "grad_norm": 0.14569532061635596, "learning_rate": 2e-05, "loss": 5.4136, "step": 5082 }, { "epoch": 0.3409464399503639, "grad_norm": 0.1511845084960759, "learning_rate": 2e-05, "loss": 5.53, "step": 5083 }, { "epoch": 0.3410135157795888, "grad_norm": 0.15180553823244108, "learning_rate": 2e-05, "loss": 5.4317, "step": 5084 }, { "epoch": 0.34108059160881377, "grad_norm": 0.14377068642447766, "learning_rate": 2e-05, "loss": 5.5213, "step": 5085 }, { "epoch": 0.3411476674380387, "grad_norm": 0.15102515874733421, "learning_rate": 2e-05, "loss": 5.346, "step": 5086 }, { "epoch": 0.34121474326726364, "grad_norm": 0.1532780254807879, "learning_rate": 2e-05, "loss": 5.3399, "step": 5087 }, { "epoch": 0.3412818190964886, "grad_norm": 0.14282291657302737, "learning_rate": 2e-05, "loss": 5.3364, "step": 5088 }, { "epoch": 0.3413488949257135, "grad_norm": 0.15601539436224252, "learning_rate": 2e-05, "loss": 5.476, "step": 5089 }, { "epoch": 0.34141597075493846, "grad_norm": 0.1460241733026985, "learning_rate": 2e-05, "loss": 5.479, "step": 5090 }, { "epoch": 0.3414830465841634, "grad_norm": 0.13995626865829525, "learning_rate": 2e-05, "loss": 5.4734, "step": 5091 }, { "epoch": 0.34155012241338834, "grad_norm": 0.15413332374449182, "learning_rate": 2e-05, "loss": 5.422, "step": 5092 }, { "epoch": 0.3416171982426133, "grad_norm": 0.14663505786806075, "learning_rate": 2e-05, "loss": 5.4044, "step": 5093 }, { "epoch": 0.3416842740718382, "grad_norm": 0.15039794221837058, "learning_rate": 2e-05, "loss": 5.3436, "step": 5094 }, { "epoch": 0.34175134990106315, "grad_norm": 0.15232111935168902, "learning_rate": 2e-05, "loss": 5.6115, "step": 5095 }, { "epoch": 0.3418184257302881, "grad_norm": 0.15779900541038544, "learning_rate": 2e-05, "loss": 5.3535, "step": 5096 }, { "epoch": 0.341885501559513, "grad_norm": 0.14906803140086322, "learning_rate": 2e-05, "loss": 5.4533, "step": 5097 }, { "epoch": 0.34195257738873797, "grad_norm": 0.14552574898172513, "learning_rate": 2e-05, "loss": 5.4968, "step": 5098 }, { "epoch": 0.3420196532179629, "grad_norm": 0.15545552641999075, "learning_rate": 2e-05, "loss": 5.3566, "step": 5099 }, { "epoch": 0.34208672904718784, "grad_norm": 0.15216406205343933, "learning_rate": 2e-05, "loss": 5.5429, "step": 5100 }, { "epoch": 0.3421538048764128, "grad_norm": 0.14947469466902283, "learning_rate": 2e-05, "loss": 5.4013, "step": 5101 }, { "epoch": 0.3422208807056377, "grad_norm": 0.14792125148436158, "learning_rate": 2e-05, "loss": 5.5185, "step": 5102 }, { "epoch": 0.34228795653486266, "grad_norm": 0.14894503062645478, "learning_rate": 2e-05, "loss": 5.3764, "step": 5103 }, { "epoch": 0.3423550323640876, "grad_norm": 0.1518954923295519, "learning_rate": 2e-05, "loss": 5.5866, "step": 5104 }, { "epoch": 0.34242210819331254, "grad_norm": 0.16041759447284476, "learning_rate": 2e-05, "loss": 5.367, "step": 5105 }, { "epoch": 0.3424891840225375, "grad_norm": 0.14865956128812252, "learning_rate": 2e-05, "loss": 5.4374, "step": 5106 }, { "epoch": 0.3425562598517624, "grad_norm": 0.1440020644400577, "learning_rate": 2e-05, "loss": 5.3714, "step": 5107 }, { "epoch": 0.34262333568098735, "grad_norm": 0.14913045975732875, "learning_rate": 2e-05, "loss": 5.4837, "step": 5108 }, { "epoch": 0.3426904115102123, "grad_norm": 0.14733692277525778, "learning_rate": 2e-05, "loss": 5.4565, "step": 5109 }, { "epoch": 0.34275748733943723, "grad_norm": 0.14721510119234313, "learning_rate": 2e-05, "loss": 5.3829, "step": 5110 }, { "epoch": 0.34282456316866217, "grad_norm": 0.1476498774636057, "learning_rate": 2e-05, "loss": 5.3947, "step": 5111 }, { "epoch": 0.3428916389978871, "grad_norm": 0.14555368426337806, "learning_rate": 2e-05, "loss": 5.3047, "step": 5112 }, { "epoch": 0.34295871482711204, "grad_norm": 0.15102031664610527, "learning_rate": 2e-05, "loss": 5.417, "step": 5113 }, { "epoch": 0.343025790656337, "grad_norm": 0.14419373006420494, "learning_rate": 2e-05, "loss": 5.3179, "step": 5114 }, { "epoch": 0.3430928664855619, "grad_norm": 0.1429482412966286, "learning_rate": 2e-05, "loss": 5.5208, "step": 5115 }, { "epoch": 0.34315994231478686, "grad_norm": 0.15451069493650213, "learning_rate": 2e-05, "loss": 5.548, "step": 5116 }, { "epoch": 0.3432270181440118, "grad_norm": 0.14527823466699522, "learning_rate": 2e-05, "loss": 5.3694, "step": 5117 }, { "epoch": 0.34329409397323674, "grad_norm": 0.15257992533036024, "learning_rate": 2e-05, "loss": 5.5369, "step": 5118 }, { "epoch": 0.3433611698024617, "grad_norm": 0.14754850308439013, "learning_rate": 2e-05, "loss": 5.423, "step": 5119 }, { "epoch": 0.3434282456316866, "grad_norm": 0.14535355419151572, "learning_rate": 2e-05, "loss": 5.52, "step": 5120 }, { "epoch": 0.34349532146091155, "grad_norm": 0.16173474665370913, "learning_rate": 2e-05, "loss": 5.6329, "step": 5121 }, { "epoch": 0.3435623972901365, "grad_norm": 0.15208462046423007, "learning_rate": 2e-05, "loss": 5.5284, "step": 5122 }, { "epoch": 0.34362947311936143, "grad_norm": 0.1402745032975412, "learning_rate": 2e-05, "loss": 5.3445, "step": 5123 }, { "epoch": 0.34369654894858637, "grad_norm": 0.15316425227719213, "learning_rate": 2e-05, "loss": 5.4263, "step": 5124 }, { "epoch": 0.3437636247778113, "grad_norm": 0.149886773350697, "learning_rate": 2e-05, "loss": 5.4182, "step": 5125 }, { "epoch": 0.34383070060703624, "grad_norm": 0.15009277834640156, "learning_rate": 2e-05, "loss": 5.3994, "step": 5126 }, { "epoch": 0.3438977764362612, "grad_norm": 0.144365614773633, "learning_rate": 2e-05, "loss": 5.6116, "step": 5127 }, { "epoch": 0.3439648522654861, "grad_norm": 0.16046795731004523, "learning_rate": 2e-05, "loss": 5.4369, "step": 5128 }, { "epoch": 0.34403192809471106, "grad_norm": 0.1483597888309453, "learning_rate": 2e-05, "loss": 5.4992, "step": 5129 }, { "epoch": 0.344099003923936, "grad_norm": 0.1503877305963891, "learning_rate": 2e-05, "loss": 5.4582, "step": 5130 }, { "epoch": 0.34416607975316094, "grad_norm": 0.14869324559247787, "learning_rate": 2e-05, "loss": 5.4273, "step": 5131 }, { "epoch": 0.3442331555823859, "grad_norm": 0.1526167963468802, "learning_rate": 2e-05, "loss": 5.4793, "step": 5132 }, { "epoch": 0.3443002314116108, "grad_norm": 0.1452060920370571, "learning_rate": 2e-05, "loss": 5.4089, "step": 5133 }, { "epoch": 0.34436730724083575, "grad_norm": 0.14308101700513118, "learning_rate": 2e-05, "loss": 5.5038, "step": 5134 }, { "epoch": 0.3444343830700607, "grad_norm": 0.16303349045104953, "learning_rate": 2e-05, "loss": 5.4334, "step": 5135 }, { "epoch": 0.34450145889928563, "grad_norm": 0.15378575175382575, "learning_rate": 2e-05, "loss": 5.386, "step": 5136 }, { "epoch": 0.34456853472851057, "grad_norm": 0.1466039791767733, "learning_rate": 2e-05, "loss": 5.4247, "step": 5137 }, { "epoch": 0.3446356105577355, "grad_norm": 0.1476463273896999, "learning_rate": 2e-05, "loss": 5.4311, "step": 5138 }, { "epoch": 0.34470268638696044, "grad_norm": 0.14776687845951914, "learning_rate": 2e-05, "loss": 5.3965, "step": 5139 }, { "epoch": 0.3447697622161854, "grad_norm": 0.14661487356212113, "learning_rate": 2e-05, "loss": 5.4841, "step": 5140 }, { "epoch": 0.3448368380454103, "grad_norm": 0.15093893171586908, "learning_rate": 2e-05, "loss": 5.3159, "step": 5141 }, { "epoch": 0.34490391387463526, "grad_norm": 0.15007865346381366, "learning_rate": 2e-05, "loss": 5.3043, "step": 5142 }, { "epoch": 0.3449709897038602, "grad_norm": 0.14445797477309952, "learning_rate": 2e-05, "loss": 5.4487, "step": 5143 }, { "epoch": 0.34503806553308514, "grad_norm": 0.14729481011770787, "learning_rate": 2e-05, "loss": 5.3301, "step": 5144 }, { "epoch": 0.3451051413623101, "grad_norm": 0.15073863319676423, "learning_rate": 2e-05, "loss": 5.469, "step": 5145 }, { "epoch": 0.345172217191535, "grad_norm": 0.15076011306452702, "learning_rate": 2e-05, "loss": 5.3446, "step": 5146 }, { "epoch": 0.34523929302075995, "grad_norm": 0.14581200851203543, "learning_rate": 2e-05, "loss": 5.392, "step": 5147 }, { "epoch": 0.3453063688499849, "grad_norm": 0.14669439325753705, "learning_rate": 2e-05, "loss": 5.4645, "step": 5148 }, { "epoch": 0.34537344467920983, "grad_norm": 0.14904762507125538, "learning_rate": 2e-05, "loss": 5.4631, "step": 5149 }, { "epoch": 0.34544052050843477, "grad_norm": 0.14697435863911823, "learning_rate": 2e-05, "loss": 5.4587, "step": 5150 }, { "epoch": 0.3455075963376597, "grad_norm": 0.14037329701695794, "learning_rate": 2e-05, "loss": 5.4196, "step": 5151 }, { "epoch": 0.34557467216688464, "grad_norm": 0.14301935901310492, "learning_rate": 2e-05, "loss": 5.384, "step": 5152 }, { "epoch": 0.3456417479961096, "grad_norm": 0.14697510037264214, "learning_rate": 2e-05, "loss": 5.6395, "step": 5153 }, { "epoch": 0.3457088238253345, "grad_norm": 0.14847370739450716, "learning_rate": 2e-05, "loss": 5.395, "step": 5154 }, { "epoch": 0.34577589965455946, "grad_norm": 0.14069037252726643, "learning_rate": 2e-05, "loss": 5.3712, "step": 5155 }, { "epoch": 0.3458429754837844, "grad_norm": 0.14759606877900136, "learning_rate": 2e-05, "loss": 5.4374, "step": 5156 }, { "epoch": 0.34591005131300934, "grad_norm": 0.14305307269265016, "learning_rate": 2e-05, "loss": 5.3977, "step": 5157 }, { "epoch": 0.3459771271422343, "grad_norm": 0.15026908769664155, "learning_rate": 2e-05, "loss": 5.4241, "step": 5158 }, { "epoch": 0.3460442029714592, "grad_norm": 0.1444880807410971, "learning_rate": 2e-05, "loss": 5.3959, "step": 5159 }, { "epoch": 0.34611127880068415, "grad_norm": 0.1481152810532922, "learning_rate": 2e-05, "loss": 5.4217, "step": 5160 }, { "epoch": 0.3461783546299091, "grad_norm": 0.1458058337060952, "learning_rate": 2e-05, "loss": 5.333, "step": 5161 }, { "epoch": 0.34624543045913403, "grad_norm": 0.14887701424791697, "learning_rate": 2e-05, "loss": 5.3154, "step": 5162 }, { "epoch": 0.34631250628835897, "grad_norm": 0.15108812533369428, "learning_rate": 2e-05, "loss": 5.4646, "step": 5163 }, { "epoch": 0.3463795821175839, "grad_norm": 0.14739915154505717, "learning_rate": 2e-05, "loss": 5.5419, "step": 5164 }, { "epoch": 0.34644665794680884, "grad_norm": 0.15874554200389152, "learning_rate": 2e-05, "loss": 5.4369, "step": 5165 }, { "epoch": 0.3465137337760338, "grad_norm": 0.142606831432432, "learning_rate": 2e-05, "loss": 5.4613, "step": 5166 }, { "epoch": 0.3465808096052587, "grad_norm": 0.15030030939323338, "learning_rate": 2e-05, "loss": 5.5152, "step": 5167 }, { "epoch": 0.34664788543448366, "grad_norm": 0.14559824052826326, "learning_rate": 2e-05, "loss": 5.5129, "step": 5168 }, { "epoch": 0.3467149612637086, "grad_norm": 0.14322059123210432, "learning_rate": 2e-05, "loss": 5.2889, "step": 5169 }, { "epoch": 0.34678203709293354, "grad_norm": 0.14349016301057502, "learning_rate": 2e-05, "loss": 5.3897, "step": 5170 }, { "epoch": 0.3468491129221585, "grad_norm": 0.15468662524943422, "learning_rate": 2e-05, "loss": 5.3195, "step": 5171 }, { "epoch": 0.3469161887513834, "grad_norm": 0.14714625895185038, "learning_rate": 2e-05, "loss": 5.4657, "step": 5172 }, { "epoch": 0.34698326458060835, "grad_norm": 0.15362850127407718, "learning_rate": 2e-05, "loss": 5.592, "step": 5173 }, { "epoch": 0.3470503404098333, "grad_norm": 0.15295827249797345, "learning_rate": 2e-05, "loss": 5.502, "step": 5174 }, { "epoch": 0.34711741623905823, "grad_norm": 0.15919253104373307, "learning_rate": 2e-05, "loss": 5.4479, "step": 5175 }, { "epoch": 0.34718449206828317, "grad_norm": 0.14936731328691719, "learning_rate": 2e-05, "loss": 5.4146, "step": 5176 }, { "epoch": 0.3472515678975081, "grad_norm": 0.1508454914915626, "learning_rate": 2e-05, "loss": 5.4266, "step": 5177 }, { "epoch": 0.34731864372673305, "grad_norm": 0.15419537860105784, "learning_rate": 2e-05, "loss": 5.4526, "step": 5178 }, { "epoch": 0.347385719555958, "grad_norm": 0.15046529066226447, "learning_rate": 2e-05, "loss": 5.3727, "step": 5179 }, { "epoch": 0.3474527953851829, "grad_norm": 0.15291162974212805, "learning_rate": 2e-05, "loss": 5.3986, "step": 5180 }, { "epoch": 0.34751987121440786, "grad_norm": 0.155532902207235, "learning_rate": 2e-05, "loss": 5.3823, "step": 5181 }, { "epoch": 0.34758694704363285, "grad_norm": 0.14503590324608562, "learning_rate": 2e-05, "loss": 5.4725, "step": 5182 }, { "epoch": 0.3476540228728578, "grad_norm": 0.14787679680568916, "learning_rate": 2e-05, "loss": 5.3663, "step": 5183 }, { "epoch": 0.34772109870208273, "grad_norm": 0.1504502574308599, "learning_rate": 2e-05, "loss": 5.3704, "step": 5184 }, { "epoch": 0.34778817453130767, "grad_norm": 0.15058335066354758, "learning_rate": 2e-05, "loss": 5.3845, "step": 5185 }, { "epoch": 0.3478552503605326, "grad_norm": 0.15073542193278786, "learning_rate": 2e-05, "loss": 5.5058, "step": 5186 }, { "epoch": 0.34792232618975755, "grad_norm": 0.15314935676932923, "learning_rate": 2e-05, "loss": 5.4658, "step": 5187 }, { "epoch": 0.3479894020189825, "grad_norm": 0.14886798336462964, "learning_rate": 2e-05, "loss": 5.4823, "step": 5188 }, { "epoch": 0.3480564778482074, "grad_norm": 0.14869454585379094, "learning_rate": 2e-05, "loss": 5.5709, "step": 5189 }, { "epoch": 0.34812355367743236, "grad_norm": 0.1410557695369779, "learning_rate": 2e-05, "loss": 5.3984, "step": 5190 }, { "epoch": 0.3481906295066573, "grad_norm": 0.14811737964250485, "learning_rate": 2e-05, "loss": 5.4348, "step": 5191 }, { "epoch": 0.34825770533588224, "grad_norm": 0.1434071792216402, "learning_rate": 2e-05, "loss": 5.4756, "step": 5192 }, { "epoch": 0.3483247811651072, "grad_norm": 0.1489233346729354, "learning_rate": 2e-05, "loss": 5.4931, "step": 5193 }, { "epoch": 0.3483918569943321, "grad_norm": 0.14805127229397902, "learning_rate": 2e-05, "loss": 5.5233, "step": 5194 }, { "epoch": 0.34845893282355705, "grad_norm": 0.14244104141022254, "learning_rate": 2e-05, "loss": 5.4813, "step": 5195 }, { "epoch": 0.348526008652782, "grad_norm": 0.14648226921756205, "learning_rate": 2e-05, "loss": 5.2961, "step": 5196 }, { "epoch": 0.34859308448200693, "grad_norm": 0.15149624342804527, "learning_rate": 2e-05, "loss": 5.3099, "step": 5197 }, { "epoch": 0.34866016031123187, "grad_norm": 0.14244434000572836, "learning_rate": 2e-05, "loss": 5.3768, "step": 5198 }, { "epoch": 0.3487272361404568, "grad_norm": 0.1499318000304649, "learning_rate": 2e-05, "loss": 5.3787, "step": 5199 }, { "epoch": 0.34879431196968175, "grad_norm": 0.15587393974263983, "learning_rate": 2e-05, "loss": 5.4867, "step": 5200 }, { "epoch": 0.3488613877989067, "grad_norm": 0.1460650093387309, "learning_rate": 2e-05, "loss": 5.3641, "step": 5201 }, { "epoch": 0.3489284636281316, "grad_norm": 0.1524897863101171, "learning_rate": 2e-05, "loss": 5.4592, "step": 5202 }, { "epoch": 0.34899553945735656, "grad_norm": 0.16377656855769968, "learning_rate": 2e-05, "loss": 5.4518, "step": 5203 }, { "epoch": 0.3490626152865815, "grad_norm": 0.14848294017923336, "learning_rate": 2e-05, "loss": 5.3847, "step": 5204 }, { "epoch": 0.34912969111580644, "grad_norm": 0.15062476247109152, "learning_rate": 2e-05, "loss": 5.386, "step": 5205 }, { "epoch": 0.3491967669450314, "grad_norm": 0.15473687546002152, "learning_rate": 2e-05, "loss": 5.5323, "step": 5206 }, { "epoch": 0.3492638427742563, "grad_norm": 0.1423327004362045, "learning_rate": 2e-05, "loss": 5.3541, "step": 5207 }, { "epoch": 0.34933091860348126, "grad_norm": 0.1438742541550418, "learning_rate": 2e-05, "loss": 5.3359, "step": 5208 }, { "epoch": 0.3493979944327062, "grad_norm": 0.146295309597274, "learning_rate": 2e-05, "loss": 5.3723, "step": 5209 }, { "epoch": 0.34946507026193113, "grad_norm": 0.14936614206500587, "learning_rate": 2e-05, "loss": 5.3761, "step": 5210 }, { "epoch": 0.34953214609115607, "grad_norm": 0.14602333132587048, "learning_rate": 2e-05, "loss": 5.4269, "step": 5211 }, { "epoch": 0.349599221920381, "grad_norm": 0.14889872404205431, "learning_rate": 2e-05, "loss": 5.4273, "step": 5212 }, { "epoch": 0.34966629774960595, "grad_norm": 0.15449491200237794, "learning_rate": 2e-05, "loss": 5.2685, "step": 5213 }, { "epoch": 0.3497333735788309, "grad_norm": 0.14976721474677326, "learning_rate": 2e-05, "loss": 5.5031, "step": 5214 }, { "epoch": 0.3498004494080558, "grad_norm": 0.14063616649617897, "learning_rate": 2e-05, "loss": 5.318, "step": 5215 }, { "epoch": 0.34986752523728076, "grad_norm": 0.1434515057512941, "learning_rate": 2e-05, "loss": 5.5416, "step": 5216 }, { "epoch": 0.3499346010665057, "grad_norm": 0.14927318510527998, "learning_rate": 2e-05, "loss": 5.3272, "step": 5217 }, { "epoch": 0.35000167689573064, "grad_norm": 0.14895709984487637, "learning_rate": 2e-05, "loss": 5.3602, "step": 5218 }, { "epoch": 0.3500687527249556, "grad_norm": 0.1510104159538387, "learning_rate": 2e-05, "loss": 5.3817, "step": 5219 }, { "epoch": 0.3501358285541805, "grad_norm": 0.15491433242463407, "learning_rate": 2e-05, "loss": 5.465, "step": 5220 }, { "epoch": 0.35020290438340546, "grad_norm": 0.14335719372174233, "learning_rate": 2e-05, "loss": 5.3551, "step": 5221 }, { "epoch": 0.3502699802126304, "grad_norm": 0.14766262764803334, "learning_rate": 2e-05, "loss": 5.5219, "step": 5222 }, { "epoch": 0.35033705604185533, "grad_norm": 0.14373299802737988, "learning_rate": 2e-05, "loss": 5.6778, "step": 5223 }, { "epoch": 0.35040413187108027, "grad_norm": 0.1486580266553973, "learning_rate": 2e-05, "loss": 5.5063, "step": 5224 }, { "epoch": 0.3504712077003052, "grad_norm": 0.15251037334339454, "learning_rate": 2e-05, "loss": 5.4811, "step": 5225 }, { "epoch": 0.35053828352953015, "grad_norm": 0.15153215903599207, "learning_rate": 2e-05, "loss": 5.4778, "step": 5226 }, { "epoch": 0.3506053593587551, "grad_norm": 0.14881816388196678, "learning_rate": 2e-05, "loss": 5.5016, "step": 5227 }, { "epoch": 0.35067243518798, "grad_norm": 0.14068144698684956, "learning_rate": 2e-05, "loss": 5.3374, "step": 5228 }, { "epoch": 0.35073951101720496, "grad_norm": 0.14902852359246077, "learning_rate": 2e-05, "loss": 5.4672, "step": 5229 }, { "epoch": 0.3508065868464299, "grad_norm": 0.14621913159536207, "learning_rate": 2e-05, "loss": 5.4966, "step": 5230 }, { "epoch": 0.35087366267565484, "grad_norm": 0.15361224348853364, "learning_rate": 2e-05, "loss": 5.5021, "step": 5231 }, { "epoch": 0.3509407385048798, "grad_norm": 0.14957295026917647, "learning_rate": 2e-05, "loss": 5.5107, "step": 5232 }, { "epoch": 0.3510078143341047, "grad_norm": 0.14907651719146167, "learning_rate": 2e-05, "loss": 5.5201, "step": 5233 }, { "epoch": 0.35107489016332966, "grad_norm": 0.14736008225005953, "learning_rate": 2e-05, "loss": 5.3595, "step": 5234 }, { "epoch": 0.3511419659925546, "grad_norm": 0.14751305942518353, "learning_rate": 2e-05, "loss": 5.3125, "step": 5235 }, { "epoch": 0.35120904182177953, "grad_norm": 0.1378198137665972, "learning_rate": 2e-05, "loss": 5.4597, "step": 5236 }, { "epoch": 0.35127611765100447, "grad_norm": 0.14764281010984415, "learning_rate": 2e-05, "loss": 5.3325, "step": 5237 }, { "epoch": 0.3513431934802294, "grad_norm": 0.14602186175612378, "learning_rate": 2e-05, "loss": 5.3474, "step": 5238 }, { "epoch": 0.35141026930945435, "grad_norm": 0.14590927229163556, "learning_rate": 2e-05, "loss": 5.3926, "step": 5239 }, { "epoch": 0.3514773451386793, "grad_norm": 0.14789009444541937, "learning_rate": 2e-05, "loss": 5.3425, "step": 5240 }, { "epoch": 0.3515444209679042, "grad_norm": 0.15219684082113613, "learning_rate": 2e-05, "loss": 5.4379, "step": 5241 }, { "epoch": 0.35161149679712916, "grad_norm": 0.15561737320847224, "learning_rate": 2e-05, "loss": 5.4796, "step": 5242 }, { "epoch": 0.3516785726263541, "grad_norm": 0.15514816719554278, "learning_rate": 2e-05, "loss": 5.3938, "step": 5243 }, { "epoch": 0.35174564845557904, "grad_norm": 0.1454694189921067, "learning_rate": 2e-05, "loss": 5.4823, "step": 5244 }, { "epoch": 0.351812724284804, "grad_norm": 0.15181493966246498, "learning_rate": 2e-05, "loss": 5.6088, "step": 5245 }, { "epoch": 0.3518798001140289, "grad_norm": 0.1630078214754584, "learning_rate": 2e-05, "loss": 5.5013, "step": 5246 }, { "epoch": 0.35194687594325386, "grad_norm": 0.14609829414048703, "learning_rate": 2e-05, "loss": 5.4895, "step": 5247 }, { "epoch": 0.3520139517724788, "grad_norm": 0.1521682942782033, "learning_rate": 2e-05, "loss": 5.5515, "step": 5248 }, { "epoch": 0.35208102760170373, "grad_norm": 0.1555422012177205, "learning_rate": 2e-05, "loss": 5.5736, "step": 5249 }, { "epoch": 0.35214810343092867, "grad_norm": 0.14917210171013387, "learning_rate": 2e-05, "loss": 5.4361, "step": 5250 }, { "epoch": 0.3522151792601536, "grad_norm": 0.1505486066241177, "learning_rate": 2e-05, "loss": 5.336, "step": 5251 }, { "epoch": 0.35228225508937855, "grad_norm": 0.14512335494369497, "learning_rate": 2e-05, "loss": 5.3559, "step": 5252 }, { "epoch": 0.3523493309186035, "grad_norm": 0.15670852378946906, "learning_rate": 2e-05, "loss": 5.2959, "step": 5253 }, { "epoch": 0.3524164067478284, "grad_norm": 0.14834890924122335, "learning_rate": 2e-05, "loss": 5.4883, "step": 5254 }, { "epoch": 0.35248348257705336, "grad_norm": 0.15265941510568792, "learning_rate": 2e-05, "loss": 5.4249, "step": 5255 }, { "epoch": 0.3525505584062783, "grad_norm": 0.14563361833170285, "learning_rate": 2e-05, "loss": 5.4364, "step": 5256 }, { "epoch": 0.35261763423550324, "grad_norm": 0.14913907856542713, "learning_rate": 2e-05, "loss": 5.5352, "step": 5257 }, { "epoch": 0.3526847100647282, "grad_norm": 0.15518222921332991, "learning_rate": 2e-05, "loss": 5.3833, "step": 5258 }, { "epoch": 0.3527517858939531, "grad_norm": 0.15050071411139485, "learning_rate": 2e-05, "loss": 5.3821, "step": 5259 }, { "epoch": 0.35281886172317806, "grad_norm": 0.14513971252592228, "learning_rate": 2e-05, "loss": 5.4534, "step": 5260 }, { "epoch": 0.352885937552403, "grad_norm": 0.15031788218016473, "learning_rate": 2e-05, "loss": 5.371, "step": 5261 }, { "epoch": 0.35295301338162793, "grad_norm": 0.15881602051917124, "learning_rate": 2e-05, "loss": 5.4628, "step": 5262 }, { "epoch": 0.35302008921085287, "grad_norm": 0.15122978427203018, "learning_rate": 2e-05, "loss": 5.5502, "step": 5263 }, { "epoch": 0.3530871650400778, "grad_norm": 0.15265258557886197, "learning_rate": 2e-05, "loss": 5.5005, "step": 5264 }, { "epoch": 0.35315424086930275, "grad_norm": 0.16096587586023706, "learning_rate": 2e-05, "loss": 5.3849, "step": 5265 }, { "epoch": 0.3532213166985277, "grad_norm": 0.14911735109177193, "learning_rate": 2e-05, "loss": 5.57, "step": 5266 }, { "epoch": 0.3532883925277526, "grad_norm": 0.16077662025712217, "learning_rate": 2e-05, "loss": 5.3453, "step": 5267 }, { "epoch": 0.35335546835697756, "grad_norm": 0.14981137340515602, "learning_rate": 2e-05, "loss": 5.4684, "step": 5268 }, { "epoch": 0.3534225441862025, "grad_norm": 0.1485026233643727, "learning_rate": 2e-05, "loss": 5.4631, "step": 5269 }, { "epoch": 0.35348962001542744, "grad_norm": 0.15250737323010907, "learning_rate": 2e-05, "loss": 5.3803, "step": 5270 }, { "epoch": 0.3535566958446524, "grad_norm": 0.1439786701090003, "learning_rate": 2e-05, "loss": 5.4159, "step": 5271 }, { "epoch": 0.3536237716738773, "grad_norm": 0.14816280189115036, "learning_rate": 2e-05, "loss": 5.4049, "step": 5272 }, { "epoch": 0.35369084750310226, "grad_norm": 0.15256926817735894, "learning_rate": 2e-05, "loss": 5.5311, "step": 5273 }, { "epoch": 0.3537579233323272, "grad_norm": 0.15337196486874283, "learning_rate": 2e-05, "loss": 5.3014, "step": 5274 }, { "epoch": 0.35382499916155213, "grad_norm": 0.1545734788996869, "learning_rate": 2e-05, "loss": 5.3558, "step": 5275 }, { "epoch": 0.3538920749907771, "grad_norm": 0.16148267909567288, "learning_rate": 2e-05, "loss": 5.4534, "step": 5276 }, { "epoch": 0.353959150820002, "grad_norm": 0.14766623248719096, "learning_rate": 2e-05, "loss": 5.4285, "step": 5277 }, { "epoch": 0.35402622664922695, "grad_norm": 0.14473777313014374, "learning_rate": 2e-05, "loss": 5.5388, "step": 5278 }, { "epoch": 0.3540933024784519, "grad_norm": 0.15306247346805718, "learning_rate": 2e-05, "loss": 5.4105, "step": 5279 }, { "epoch": 0.3541603783076768, "grad_norm": 0.15725064282936788, "learning_rate": 2e-05, "loss": 5.3614, "step": 5280 }, { "epoch": 0.35422745413690176, "grad_norm": 0.16076948380186573, "learning_rate": 2e-05, "loss": 5.4023, "step": 5281 }, { "epoch": 0.3542945299661267, "grad_norm": 0.14621678588949044, "learning_rate": 2e-05, "loss": 5.3842, "step": 5282 }, { "epoch": 0.35436160579535164, "grad_norm": 0.15770720871043084, "learning_rate": 2e-05, "loss": 5.2895, "step": 5283 }, { "epoch": 0.3544286816245766, "grad_norm": 0.1456885552617241, "learning_rate": 2e-05, "loss": 5.4052, "step": 5284 }, { "epoch": 0.3544957574538015, "grad_norm": 0.14696114398476084, "learning_rate": 2e-05, "loss": 5.4904, "step": 5285 }, { "epoch": 0.35456283328302646, "grad_norm": 0.15091160431927883, "learning_rate": 2e-05, "loss": 5.5007, "step": 5286 }, { "epoch": 0.3546299091122514, "grad_norm": 0.15974368305000927, "learning_rate": 2e-05, "loss": 5.2927, "step": 5287 }, { "epoch": 0.35469698494147633, "grad_norm": 0.1456612519839073, "learning_rate": 2e-05, "loss": 5.5063, "step": 5288 }, { "epoch": 0.3547640607707013, "grad_norm": 0.14599105203998, "learning_rate": 2e-05, "loss": 5.4737, "step": 5289 }, { "epoch": 0.3548311365999262, "grad_norm": 0.14776374158847905, "learning_rate": 2e-05, "loss": 5.4719, "step": 5290 }, { "epoch": 0.35489821242915115, "grad_norm": 0.15106502005936834, "learning_rate": 2e-05, "loss": 5.312, "step": 5291 }, { "epoch": 0.3549652882583761, "grad_norm": 0.15885376766109824, "learning_rate": 2e-05, "loss": 5.316, "step": 5292 }, { "epoch": 0.355032364087601, "grad_norm": 0.15661223744453795, "learning_rate": 2e-05, "loss": 5.3554, "step": 5293 }, { "epoch": 0.35509943991682597, "grad_norm": 0.1540948577644022, "learning_rate": 2e-05, "loss": 5.5166, "step": 5294 }, { "epoch": 0.3551665157460509, "grad_norm": 0.15323924790843024, "learning_rate": 2e-05, "loss": 5.31, "step": 5295 }, { "epoch": 0.35523359157527584, "grad_norm": 0.15044656238938336, "learning_rate": 2e-05, "loss": 5.5043, "step": 5296 }, { "epoch": 0.3553006674045008, "grad_norm": 0.15970305640259166, "learning_rate": 2e-05, "loss": 5.323, "step": 5297 }, { "epoch": 0.3553677432337257, "grad_norm": 0.1465505615516917, "learning_rate": 2e-05, "loss": 5.4317, "step": 5298 }, { "epoch": 0.35543481906295066, "grad_norm": 0.15189756429167792, "learning_rate": 2e-05, "loss": 5.5519, "step": 5299 }, { "epoch": 0.3555018948921756, "grad_norm": 0.14859591267904004, "learning_rate": 2e-05, "loss": 5.4759, "step": 5300 }, { "epoch": 0.35556897072140053, "grad_norm": 0.15517661718147013, "learning_rate": 2e-05, "loss": 5.5167, "step": 5301 }, { "epoch": 0.3556360465506255, "grad_norm": 0.14705188080798362, "learning_rate": 2e-05, "loss": 5.4043, "step": 5302 }, { "epoch": 0.3557031223798504, "grad_norm": 0.14556540002271984, "learning_rate": 2e-05, "loss": 5.4187, "step": 5303 }, { "epoch": 0.35577019820907535, "grad_norm": 0.15165302983892465, "learning_rate": 2e-05, "loss": 5.5175, "step": 5304 }, { "epoch": 0.3558372740383003, "grad_norm": 0.1440266313796821, "learning_rate": 2e-05, "loss": 5.4385, "step": 5305 }, { "epoch": 0.3559043498675252, "grad_norm": 0.15664477010070635, "learning_rate": 2e-05, "loss": 5.4896, "step": 5306 }, { "epoch": 0.35597142569675017, "grad_norm": 0.1440972793144366, "learning_rate": 2e-05, "loss": 5.3342, "step": 5307 }, { "epoch": 0.3560385015259751, "grad_norm": 0.14655709096450653, "learning_rate": 2e-05, "loss": 5.4406, "step": 5308 }, { "epoch": 0.35610557735520004, "grad_norm": 0.1520733910085585, "learning_rate": 2e-05, "loss": 5.4547, "step": 5309 }, { "epoch": 0.356172653184425, "grad_norm": 0.1480650319562172, "learning_rate": 2e-05, "loss": 5.5771, "step": 5310 }, { "epoch": 0.3562397290136499, "grad_norm": 0.1509816259325245, "learning_rate": 2e-05, "loss": 5.3344, "step": 5311 }, { "epoch": 0.35630680484287486, "grad_norm": 0.15392105406832088, "learning_rate": 2e-05, "loss": 5.5355, "step": 5312 }, { "epoch": 0.3563738806720998, "grad_norm": 0.14958227756520753, "learning_rate": 2e-05, "loss": 5.5027, "step": 5313 }, { "epoch": 0.35644095650132473, "grad_norm": 0.1461636057737527, "learning_rate": 2e-05, "loss": 5.5272, "step": 5314 }, { "epoch": 0.3565080323305497, "grad_norm": 0.15169033686389002, "learning_rate": 2e-05, "loss": 5.343, "step": 5315 }, { "epoch": 0.3565751081597746, "grad_norm": 0.14895563326329986, "learning_rate": 2e-05, "loss": 5.3513, "step": 5316 }, { "epoch": 0.35664218398899955, "grad_norm": 0.14344043019087224, "learning_rate": 2e-05, "loss": 5.4075, "step": 5317 }, { "epoch": 0.3567092598182245, "grad_norm": 0.14332848355280742, "learning_rate": 2e-05, "loss": 5.4406, "step": 5318 }, { "epoch": 0.3567763356474494, "grad_norm": 0.14675192360663347, "learning_rate": 2e-05, "loss": 5.351, "step": 5319 }, { "epoch": 0.35684341147667437, "grad_norm": 0.14824646454914178, "learning_rate": 2e-05, "loss": 5.5659, "step": 5320 }, { "epoch": 0.3569104873058993, "grad_norm": 0.15278185545125283, "learning_rate": 2e-05, "loss": 5.4191, "step": 5321 }, { "epoch": 0.35697756313512424, "grad_norm": 0.15316388777417353, "learning_rate": 2e-05, "loss": 5.3931, "step": 5322 }, { "epoch": 0.3570446389643492, "grad_norm": 0.1525510686770255, "learning_rate": 2e-05, "loss": 5.4805, "step": 5323 }, { "epoch": 0.3571117147935741, "grad_norm": 0.14151410897521827, "learning_rate": 2e-05, "loss": 5.4694, "step": 5324 }, { "epoch": 0.35717879062279906, "grad_norm": 0.15162587097774832, "learning_rate": 2e-05, "loss": 5.4309, "step": 5325 }, { "epoch": 0.357245866452024, "grad_norm": 0.14809147603578782, "learning_rate": 2e-05, "loss": 5.4279, "step": 5326 }, { "epoch": 0.35731294228124894, "grad_norm": 0.14911161831117317, "learning_rate": 2e-05, "loss": 5.5772, "step": 5327 }, { "epoch": 0.3573800181104739, "grad_norm": 0.14839891753194492, "learning_rate": 2e-05, "loss": 5.3983, "step": 5328 }, { "epoch": 0.3574470939396988, "grad_norm": 0.14365223923274434, "learning_rate": 2e-05, "loss": 5.5121, "step": 5329 }, { "epoch": 0.35751416976892375, "grad_norm": 0.14560999660712617, "learning_rate": 2e-05, "loss": 5.3283, "step": 5330 }, { "epoch": 0.3575812455981487, "grad_norm": 0.15044343282976283, "learning_rate": 2e-05, "loss": 5.408, "step": 5331 }, { "epoch": 0.3576483214273736, "grad_norm": 0.1500135478602676, "learning_rate": 2e-05, "loss": 5.3144, "step": 5332 }, { "epoch": 0.35771539725659857, "grad_norm": 0.15199188495596877, "learning_rate": 2e-05, "loss": 5.4011, "step": 5333 }, { "epoch": 0.3577824730858235, "grad_norm": 0.1507397772219753, "learning_rate": 2e-05, "loss": 5.4381, "step": 5334 }, { "epoch": 0.35784954891504844, "grad_norm": 0.14739607229388757, "learning_rate": 2e-05, "loss": 5.4397, "step": 5335 }, { "epoch": 0.3579166247442734, "grad_norm": 0.14968085721209112, "learning_rate": 2e-05, "loss": 5.4775, "step": 5336 }, { "epoch": 0.3579837005734983, "grad_norm": 0.14663965083012026, "learning_rate": 2e-05, "loss": 5.4913, "step": 5337 }, { "epoch": 0.35805077640272326, "grad_norm": 0.1481863173808244, "learning_rate": 2e-05, "loss": 5.2868, "step": 5338 }, { "epoch": 0.3581178522319482, "grad_norm": 0.1555077147956612, "learning_rate": 2e-05, "loss": 5.4665, "step": 5339 }, { "epoch": 0.35818492806117314, "grad_norm": 0.1515380133496339, "learning_rate": 2e-05, "loss": 5.5339, "step": 5340 }, { "epoch": 0.3582520038903981, "grad_norm": 0.14918812740302537, "learning_rate": 2e-05, "loss": 5.6145, "step": 5341 }, { "epoch": 0.358319079719623, "grad_norm": 0.1608628773390164, "learning_rate": 2e-05, "loss": 5.3109, "step": 5342 }, { "epoch": 0.35838615554884795, "grad_norm": 0.15362708313248413, "learning_rate": 2e-05, "loss": 5.386, "step": 5343 }, { "epoch": 0.3584532313780729, "grad_norm": 0.14345462382621876, "learning_rate": 2e-05, "loss": 5.4882, "step": 5344 }, { "epoch": 0.35852030720729783, "grad_norm": 0.1470299667578743, "learning_rate": 2e-05, "loss": 5.4041, "step": 5345 }, { "epoch": 0.35858738303652277, "grad_norm": 0.15783927682631868, "learning_rate": 2e-05, "loss": 5.5419, "step": 5346 }, { "epoch": 0.3586544588657477, "grad_norm": 0.14550541028779748, "learning_rate": 2e-05, "loss": 5.3654, "step": 5347 }, { "epoch": 0.35872153469497264, "grad_norm": 0.150669071069935, "learning_rate": 2e-05, "loss": 5.6414, "step": 5348 }, { "epoch": 0.3587886105241976, "grad_norm": 0.15218967312984985, "learning_rate": 2e-05, "loss": 5.316, "step": 5349 }, { "epoch": 0.3588556863534225, "grad_norm": 0.1543267459541231, "learning_rate": 2e-05, "loss": 5.4654, "step": 5350 }, { "epoch": 0.35892276218264746, "grad_norm": 0.14608692041068513, "learning_rate": 2e-05, "loss": 5.3546, "step": 5351 }, { "epoch": 0.3589898380118724, "grad_norm": 0.1595680367871526, "learning_rate": 2e-05, "loss": 5.5048, "step": 5352 }, { "epoch": 0.35905691384109734, "grad_norm": 0.1464542929530747, "learning_rate": 2e-05, "loss": 5.3966, "step": 5353 }, { "epoch": 0.3591239896703223, "grad_norm": 0.14641171280390386, "learning_rate": 2e-05, "loss": 5.3183, "step": 5354 }, { "epoch": 0.3591910654995472, "grad_norm": 0.15953381045293194, "learning_rate": 2e-05, "loss": 5.4017, "step": 5355 }, { "epoch": 0.35925814132877215, "grad_norm": 0.15792074369276402, "learning_rate": 2e-05, "loss": 5.5662, "step": 5356 }, { "epoch": 0.3593252171579971, "grad_norm": 0.14699273830162823, "learning_rate": 2e-05, "loss": 5.482, "step": 5357 }, { "epoch": 0.35939229298722203, "grad_norm": 0.15850176461242924, "learning_rate": 2e-05, "loss": 5.4331, "step": 5358 }, { "epoch": 0.35945936881644697, "grad_norm": 0.1512322786961055, "learning_rate": 2e-05, "loss": 5.5048, "step": 5359 }, { "epoch": 0.3595264446456719, "grad_norm": 0.15394108059472913, "learning_rate": 2e-05, "loss": 5.3828, "step": 5360 }, { "epoch": 0.35959352047489684, "grad_norm": 0.1493203874650817, "learning_rate": 2e-05, "loss": 5.3924, "step": 5361 }, { "epoch": 0.3596605963041218, "grad_norm": 0.14607735687094936, "learning_rate": 2e-05, "loss": 5.4747, "step": 5362 }, { "epoch": 0.3597276721333467, "grad_norm": 0.15386593293453207, "learning_rate": 2e-05, "loss": 5.2804, "step": 5363 }, { "epoch": 0.3597947479625717, "grad_norm": 0.14414136763097873, "learning_rate": 2e-05, "loss": 5.3001, "step": 5364 }, { "epoch": 0.35986182379179665, "grad_norm": 0.1532534020382846, "learning_rate": 2e-05, "loss": 5.5059, "step": 5365 }, { "epoch": 0.3599288996210216, "grad_norm": 0.14743252011124053, "learning_rate": 2e-05, "loss": 5.4088, "step": 5366 }, { "epoch": 0.35999597545024653, "grad_norm": 0.16100725862688164, "learning_rate": 2e-05, "loss": 5.4163, "step": 5367 }, { "epoch": 0.36006305127947147, "grad_norm": 0.150416245308718, "learning_rate": 2e-05, "loss": 5.525, "step": 5368 }, { "epoch": 0.3601301271086964, "grad_norm": 0.15840724586876206, "learning_rate": 2e-05, "loss": 5.3995, "step": 5369 }, { "epoch": 0.36019720293792135, "grad_norm": 0.14870273044192034, "learning_rate": 2e-05, "loss": 5.4945, "step": 5370 }, { "epoch": 0.3602642787671463, "grad_norm": 0.16254098778521134, "learning_rate": 2e-05, "loss": 5.4029, "step": 5371 }, { "epoch": 0.3603313545963712, "grad_norm": 0.1703353486986829, "learning_rate": 2e-05, "loss": 5.3934, "step": 5372 }, { "epoch": 0.36039843042559616, "grad_norm": 0.15009792424180876, "learning_rate": 2e-05, "loss": 5.5125, "step": 5373 }, { "epoch": 0.3604655062548211, "grad_norm": 0.15857616372053626, "learning_rate": 2e-05, "loss": 5.4008, "step": 5374 }, { "epoch": 0.36053258208404604, "grad_norm": 0.16660772089713893, "learning_rate": 2e-05, "loss": 5.4904, "step": 5375 }, { "epoch": 0.360599657913271, "grad_norm": 0.14924306836895682, "learning_rate": 2e-05, "loss": 5.4461, "step": 5376 }, { "epoch": 0.3606667337424959, "grad_norm": 0.15081122806904998, "learning_rate": 2e-05, "loss": 5.5376, "step": 5377 }, { "epoch": 0.36073380957172085, "grad_norm": 0.15518068334042262, "learning_rate": 2e-05, "loss": 5.4373, "step": 5378 }, { "epoch": 0.3608008854009458, "grad_norm": 0.1522546673898249, "learning_rate": 2e-05, "loss": 5.2947, "step": 5379 }, { "epoch": 0.36086796123017073, "grad_norm": 0.1433819861182562, "learning_rate": 2e-05, "loss": 5.4921, "step": 5380 }, { "epoch": 0.36093503705939567, "grad_norm": 0.15306982730261015, "learning_rate": 2e-05, "loss": 5.5207, "step": 5381 }, { "epoch": 0.3610021128886206, "grad_norm": 0.1458032464278277, "learning_rate": 2e-05, "loss": 5.5621, "step": 5382 }, { "epoch": 0.36106918871784555, "grad_norm": 0.14792544767278443, "learning_rate": 2e-05, "loss": 5.4118, "step": 5383 }, { "epoch": 0.3611362645470705, "grad_norm": 0.14310542516044272, "learning_rate": 2e-05, "loss": 5.3099, "step": 5384 }, { "epoch": 0.3612033403762954, "grad_norm": 0.14775757125420266, "learning_rate": 2e-05, "loss": 5.4373, "step": 5385 }, { "epoch": 0.36127041620552036, "grad_norm": 0.14833978727553715, "learning_rate": 2e-05, "loss": 5.4484, "step": 5386 }, { "epoch": 0.3613374920347453, "grad_norm": 0.1399412860996333, "learning_rate": 2e-05, "loss": 5.3783, "step": 5387 }, { "epoch": 0.36140456786397024, "grad_norm": 0.145026817938437, "learning_rate": 2e-05, "loss": 5.4682, "step": 5388 }, { "epoch": 0.3614716436931952, "grad_norm": 0.14282268215657698, "learning_rate": 2e-05, "loss": 5.3638, "step": 5389 }, { "epoch": 0.3615387195224201, "grad_norm": 0.14714781494314022, "learning_rate": 2e-05, "loss": 5.4545, "step": 5390 }, { "epoch": 0.36160579535164505, "grad_norm": 0.1470453706252815, "learning_rate": 2e-05, "loss": 5.4694, "step": 5391 }, { "epoch": 0.36167287118087, "grad_norm": 0.1489155368109233, "learning_rate": 2e-05, "loss": 5.4785, "step": 5392 }, { "epoch": 0.36173994701009493, "grad_norm": 0.1444646203724267, "learning_rate": 2e-05, "loss": 5.3012, "step": 5393 }, { "epoch": 0.36180702283931987, "grad_norm": 0.1564003168923802, "learning_rate": 2e-05, "loss": 5.4064, "step": 5394 }, { "epoch": 0.3618740986685448, "grad_norm": 0.1398891599984906, "learning_rate": 2e-05, "loss": 5.5355, "step": 5395 }, { "epoch": 0.36194117449776975, "grad_norm": 0.150794726486874, "learning_rate": 2e-05, "loss": 5.5206, "step": 5396 }, { "epoch": 0.3620082503269947, "grad_norm": 0.14903895088634517, "learning_rate": 2e-05, "loss": 5.321, "step": 5397 }, { "epoch": 0.3620753261562196, "grad_norm": 0.14358319098527464, "learning_rate": 2e-05, "loss": 5.4861, "step": 5398 }, { "epoch": 0.36214240198544456, "grad_norm": 0.14777369226457143, "learning_rate": 2e-05, "loss": 5.4696, "step": 5399 }, { "epoch": 0.3622094778146695, "grad_norm": 0.1542796190059665, "learning_rate": 2e-05, "loss": 5.3696, "step": 5400 }, { "epoch": 0.36227655364389444, "grad_norm": 0.14224087005603966, "learning_rate": 2e-05, "loss": 5.4716, "step": 5401 }, { "epoch": 0.3623436294731194, "grad_norm": 0.1500075249906703, "learning_rate": 2e-05, "loss": 5.5063, "step": 5402 }, { "epoch": 0.3624107053023443, "grad_norm": 0.15462757929106122, "learning_rate": 2e-05, "loss": 5.291, "step": 5403 }, { "epoch": 0.36247778113156925, "grad_norm": 0.15360988145903695, "learning_rate": 2e-05, "loss": 5.4209, "step": 5404 }, { "epoch": 0.3625448569607942, "grad_norm": 0.15086114281030705, "learning_rate": 2e-05, "loss": 5.3927, "step": 5405 }, { "epoch": 0.36261193279001913, "grad_norm": 0.1436206275871481, "learning_rate": 2e-05, "loss": 5.3536, "step": 5406 }, { "epoch": 0.36267900861924407, "grad_norm": 0.14762145551076325, "learning_rate": 2e-05, "loss": 5.4394, "step": 5407 }, { "epoch": 0.362746084448469, "grad_norm": 0.15879624935606546, "learning_rate": 2e-05, "loss": 5.4471, "step": 5408 }, { "epoch": 0.36281316027769395, "grad_norm": 0.1508966526036397, "learning_rate": 2e-05, "loss": 5.3826, "step": 5409 }, { "epoch": 0.3628802361069189, "grad_norm": 0.15068304705657742, "learning_rate": 2e-05, "loss": 5.4649, "step": 5410 }, { "epoch": 0.3629473119361438, "grad_norm": 0.15227407450639333, "learning_rate": 2e-05, "loss": 5.4062, "step": 5411 }, { "epoch": 0.36301438776536876, "grad_norm": 0.1590870686472759, "learning_rate": 2e-05, "loss": 5.5064, "step": 5412 }, { "epoch": 0.3630814635945937, "grad_norm": 0.1509126420242703, "learning_rate": 2e-05, "loss": 5.466, "step": 5413 }, { "epoch": 0.36314853942381864, "grad_norm": 0.1624335134070017, "learning_rate": 2e-05, "loss": 5.5075, "step": 5414 }, { "epoch": 0.3632156152530436, "grad_norm": 0.14629461673886343, "learning_rate": 2e-05, "loss": 5.5089, "step": 5415 }, { "epoch": 0.3632826910822685, "grad_norm": 0.14870930156941423, "learning_rate": 2e-05, "loss": 5.3471, "step": 5416 }, { "epoch": 0.36334976691149345, "grad_norm": 0.1558993057785215, "learning_rate": 2e-05, "loss": 5.5399, "step": 5417 }, { "epoch": 0.3634168427407184, "grad_norm": 0.14767319969966913, "learning_rate": 2e-05, "loss": 5.4371, "step": 5418 }, { "epoch": 0.36348391856994333, "grad_norm": 0.1605675770343836, "learning_rate": 2e-05, "loss": 5.5605, "step": 5419 }, { "epoch": 0.36355099439916827, "grad_norm": 0.15380803120823688, "learning_rate": 2e-05, "loss": 5.4387, "step": 5420 }, { "epoch": 0.3636180702283932, "grad_norm": 0.16145630284462967, "learning_rate": 2e-05, "loss": 5.425, "step": 5421 }, { "epoch": 0.36368514605761815, "grad_norm": 0.15643798809631754, "learning_rate": 2e-05, "loss": 5.3775, "step": 5422 }, { "epoch": 0.3637522218868431, "grad_norm": 0.15595359961352345, "learning_rate": 2e-05, "loss": 5.5683, "step": 5423 }, { "epoch": 0.363819297716068, "grad_norm": 0.1505847330340958, "learning_rate": 2e-05, "loss": 5.3621, "step": 5424 }, { "epoch": 0.36388637354529296, "grad_norm": 0.15936828445546947, "learning_rate": 2e-05, "loss": 5.5236, "step": 5425 }, { "epoch": 0.3639534493745179, "grad_norm": 0.1478977494049453, "learning_rate": 2e-05, "loss": 5.4496, "step": 5426 }, { "epoch": 0.36402052520374284, "grad_norm": 0.15198246059231868, "learning_rate": 2e-05, "loss": 5.4751, "step": 5427 }, { "epoch": 0.3640876010329678, "grad_norm": 0.15321934417824123, "learning_rate": 2e-05, "loss": 5.5062, "step": 5428 }, { "epoch": 0.3641546768621927, "grad_norm": 0.14967420688506428, "learning_rate": 2e-05, "loss": 5.4504, "step": 5429 }, { "epoch": 0.36422175269141766, "grad_norm": 0.15067380060033395, "learning_rate": 2e-05, "loss": 5.3571, "step": 5430 }, { "epoch": 0.3642888285206426, "grad_norm": 0.1505380840716092, "learning_rate": 2e-05, "loss": 5.3145, "step": 5431 }, { "epoch": 0.36435590434986753, "grad_norm": 0.15551059745141374, "learning_rate": 2e-05, "loss": 5.4843, "step": 5432 }, { "epoch": 0.36442298017909247, "grad_norm": 0.1466184925030557, "learning_rate": 2e-05, "loss": 5.5376, "step": 5433 }, { "epoch": 0.3644900560083174, "grad_norm": 0.16632733730193522, "learning_rate": 2e-05, "loss": 5.5279, "step": 5434 }, { "epoch": 0.36455713183754235, "grad_norm": 0.1497931854305357, "learning_rate": 2e-05, "loss": 5.4419, "step": 5435 }, { "epoch": 0.3646242076667673, "grad_norm": 0.15148278981268298, "learning_rate": 2e-05, "loss": 5.5459, "step": 5436 }, { "epoch": 0.3646912834959922, "grad_norm": 0.15290009328449256, "learning_rate": 2e-05, "loss": 5.5255, "step": 5437 }, { "epoch": 0.36475835932521716, "grad_norm": 0.15196666914335688, "learning_rate": 2e-05, "loss": 5.4012, "step": 5438 }, { "epoch": 0.3648254351544421, "grad_norm": 0.14977187589971255, "learning_rate": 2e-05, "loss": 5.6478, "step": 5439 }, { "epoch": 0.36489251098366704, "grad_norm": 0.14462871132650695, "learning_rate": 2e-05, "loss": 5.4296, "step": 5440 }, { "epoch": 0.364959586812892, "grad_norm": 0.1495789238067986, "learning_rate": 2e-05, "loss": 5.4062, "step": 5441 }, { "epoch": 0.3650266626421169, "grad_norm": 0.1511770121431091, "learning_rate": 2e-05, "loss": 5.5374, "step": 5442 }, { "epoch": 0.36509373847134186, "grad_norm": 0.14173752704836637, "learning_rate": 2e-05, "loss": 5.3286, "step": 5443 }, { "epoch": 0.3651608143005668, "grad_norm": 0.15472037603693517, "learning_rate": 2e-05, "loss": 5.3888, "step": 5444 }, { "epoch": 0.36522789012979173, "grad_norm": 0.15621966570559556, "learning_rate": 2e-05, "loss": 5.4552, "step": 5445 }, { "epoch": 0.36529496595901667, "grad_norm": 0.1419386137286694, "learning_rate": 2e-05, "loss": 5.4771, "step": 5446 }, { "epoch": 0.3653620417882416, "grad_norm": 0.14884053371368822, "learning_rate": 2e-05, "loss": 5.4176, "step": 5447 }, { "epoch": 0.36542911761746655, "grad_norm": 0.1416963671957745, "learning_rate": 2e-05, "loss": 5.3556, "step": 5448 }, { "epoch": 0.3654961934466915, "grad_norm": 0.1440863624056914, "learning_rate": 2e-05, "loss": 5.3743, "step": 5449 }, { "epoch": 0.3655632692759164, "grad_norm": 0.15269570296734628, "learning_rate": 2e-05, "loss": 5.3617, "step": 5450 }, { "epoch": 0.36563034510514136, "grad_norm": 0.15025591423455026, "learning_rate": 2e-05, "loss": 5.3857, "step": 5451 }, { "epoch": 0.3656974209343663, "grad_norm": 0.15290732430573467, "learning_rate": 2e-05, "loss": 5.5678, "step": 5452 }, { "epoch": 0.36576449676359124, "grad_norm": 0.14807363916804586, "learning_rate": 2e-05, "loss": 5.499, "step": 5453 }, { "epoch": 0.3658315725928162, "grad_norm": 0.14822621951617507, "learning_rate": 2e-05, "loss": 5.3129, "step": 5454 }, { "epoch": 0.3658986484220411, "grad_norm": 0.14739698964636905, "learning_rate": 2e-05, "loss": 5.4631, "step": 5455 }, { "epoch": 0.36596572425126606, "grad_norm": 0.1443745378684563, "learning_rate": 2e-05, "loss": 5.6007, "step": 5456 }, { "epoch": 0.366032800080491, "grad_norm": 0.1443358224374689, "learning_rate": 2e-05, "loss": 5.4637, "step": 5457 }, { "epoch": 0.36609987590971593, "grad_norm": 0.15250601301458852, "learning_rate": 2e-05, "loss": 5.2992, "step": 5458 }, { "epoch": 0.36616695173894087, "grad_norm": 0.14200029904806816, "learning_rate": 2e-05, "loss": 5.411, "step": 5459 }, { "epoch": 0.3662340275681658, "grad_norm": 0.14385578709457708, "learning_rate": 2e-05, "loss": 5.4756, "step": 5460 }, { "epoch": 0.36630110339739075, "grad_norm": 0.15190135863093124, "learning_rate": 2e-05, "loss": 5.3828, "step": 5461 }, { "epoch": 0.3663681792266157, "grad_norm": 0.1539195873319875, "learning_rate": 2e-05, "loss": 5.4161, "step": 5462 }, { "epoch": 0.3664352550558406, "grad_norm": 0.15307638184453828, "learning_rate": 2e-05, "loss": 5.4611, "step": 5463 }, { "epoch": 0.36650233088506556, "grad_norm": 0.15440825635557992, "learning_rate": 2e-05, "loss": 5.4932, "step": 5464 }, { "epoch": 0.3665694067142905, "grad_norm": 0.14173168835634256, "learning_rate": 2e-05, "loss": 5.3495, "step": 5465 }, { "epoch": 0.36663648254351544, "grad_norm": 0.15348950288780439, "learning_rate": 2e-05, "loss": 5.537, "step": 5466 }, { "epoch": 0.3667035583727404, "grad_norm": 0.15214400051550275, "learning_rate": 2e-05, "loss": 5.4101, "step": 5467 }, { "epoch": 0.3667706342019653, "grad_norm": 0.14922806364617922, "learning_rate": 2e-05, "loss": 5.3831, "step": 5468 }, { "epoch": 0.36683771003119026, "grad_norm": 0.15546918498452425, "learning_rate": 2e-05, "loss": 5.4034, "step": 5469 }, { "epoch": 0.3669047858604152, "grad_norm": 0.15168429633956157, "learning_rate": 2e-05, "loss": 5.4877, "step": 5470 }, { "epoch": 0.36697186168964013, "grad_norm": 0.14584836474609678, "learning_rate": 2e-05, "loss": 5.2908, "step": 5471 }, { "epoch": 0.36703893751886507, "grad_norm": 0.14941324786022872, "learning_rate": 2e-05, "loss": 5.4125, "step": 5472 }, { "epoch": 0.36710601334809, "grad_norm": 0.15120453299744185, "learning_rate": 2e-05, "loss": 5.3455, "step": 5473 }, { "epoch": 0.36717308917731495, "grad_norm": 0.15076480378099286, "learning_rate": 2e-05, "loss": 5.4192, "step": 5474 }, { "epoch": 0.3672401650065399, "grad_norm": 0.16239397512264894, "learning_rate": 2e-05, "loss": 5.5679, "step": 5475 }, { "epoch": 0.3673072408357648, "grad_norm": 0.14975349405691835, "learning_rate": 2e-05, "loss": 5.4699, "step": 5476 }, { "epoch": 0.36737431666498976, "grad_norm": 0.15323943585424546, "learning_rate": 2e-05, "loss": 5.4453, "step": 5477 }, { "epoch": 0.3674413924942147, "grad_norm": 0.15344945400186666, "learning_rate": 2e-05, "loss": 5.519, "step": 5478 }, { "epoch": 0.36750846832343964, "grad_norm": 0.16044572615055286, "learning_rate": 2e-05, "loss": 5.4974, "step": 5479 }, { "epoch": 0.3675755441526646, "grad_norm": 0.15781973054004575, "learning_rate": 2e-05, "loss": 5.5384, "step": 5480 }, { "epoch": 0.3676426199818895, "grad_norm": 0.15150732087222665, "learning_rate": 2e-05, "loss": 5.467, "step": 5481 }, { "epoch": 0.36770969581111446, "grad_norm": 0.1688274154571226, "learning_rate": 2e-05, "loss": 5.5013, "step": 5482 }, { "epoch": 0.3677767716403394, "grad_norm": 0.15034195157754587, "learning_rate": 2e-05, "loss": 5.3461, "step": 5483 }, { "epoch": 0.36784384746956433, "grad_norm": 0.14405254322538597, "learning_rate": 2e-05, "loss": 5.235, "step": 5484 }, { "epoch": 0.36791092329878927, "grad_norm": 0.14182213398551044, "learning_rate": 2e-05, "loss": 5.4434, "step": 5485 }, { "epoch": 0.3679779991280142, "grad_norm": 0.14781041887249036, "learning_rate": 2e-05, "loss": 5.4737, "step": 5486 }, { "epoch": 0.36804507495723915, "grad_norm": 0.16550221601479634, "learning_rate": 2e-05, "loss": 5.343, "step": 5487 }, { "epoch": 0.3681121507864641, "grad_norm": 0.1437688995900029, "learning_rate": 2e-05, "loss": 5.3888, "step": 5488 }, { "epoch": 0.368179226615689, "grad_norm": 0.14678792282682487, "learning_rate": 2e-05, "loss": 5.3134, "step": 5489 }, { "epoch": 0.36824630244491396, "grad_norm": 0.14432726211253152, "learning_rate": 2e-05, "loss": 5.479, "step": 5490 }, { "epoch": 0.3683133782741389, "grad_norm": 0.1473166720103478, "learning_rate": 2e-05, "loss": 5.3963, "step": 5491 }, { "epoch": 0.36838045410336384, "grad_norm": 0.15363349100641463, "learning_rate": 2e-05, "loss": 5.4514, "step": 5492 }, { "epoch": 0.3684475299325888, "grad_norm": 0.14725497298077556, "learning_rate": 2e-05, "loss": 5.3065, "step": 5493 }, { "epoch": 0.3685146057618137, "grad_norm": 0.1441947550795249, "learning_rate": 2e-05, "loss": 5.4094, "step": 5494 }, { "epoch": 0.36858168159103866, "grad_norm": 0.14864274887202222, "learning_rate": 2e-05, "loss": 5.4553, "step": 5495 }, { "epoch": 0.3686487574202636, "grad_norm": 0.14916415961213877, "learning_rate": 2e-05, "loss": 5.3625, "step": 5496 }, { "epoch": 0.36871583324948853, "grad_norm": 0.15043473156459355, "learning_rate": 2e-05, "loss": 5.5156, "step": 5497 }, { "epoch": 0.3687829090787135, "grad_norm": 0.14777345471517014, "learning_rate": 2e-05, "loss": 5.5154, "step": 5498 }, { "epoch": 0.3688499849079384, "grad_norm": 0.15852555066778976, "learning_rate": 2e-05, "loss": 5.4629, "step": 5499 }, { "epoch": 0.36891706073716335, "grad_norm": 0.15841599653649124, "learning_rate": 2e-05, "loss": 5.3811, "step": 5500 }, { "epoch": 0.3689841365663883, "grad_norm": 0.14537778019808395, "learning_rate": 2e-05, "loss": 5.3768, "step": 5501 }, { "epoch": 0.3690512123956132, "grad_norm": 0.14727291697818978, "learning_rate": 2e-05, "loss": 5.3023, "step": 5502 }, { "epoch": 0.36911828822483816, "grad_norm": 0.16372196379078102, "learning_rate": 2e-05, "loss": 5.3893, "step": 5503 }, { "epoch": 0.3691853640540631, "grad_norm": 0.15305358002299205, "learning_rate": 2e-05, "loss": 5.3671, "step": 5504 }, { "epoch": 0.36925243988328804, "grad_norm": 0.1519634368839271, "learning_rate": 2e-05, "loss": 5.4341, "step": 5505 }, { "epoch": 0.369319515712513, "grad_norm": 0.1534503443413111, "learning_rate": 2e-05, "loss": 5.4795, "step": 5506 }, { "epoch": 0.3693865915417379, "grad_norm": 0.1530998253206066, "learning_rate": 2e-05, "loss": 5.4396, "step": 5507 }, { "epoch": 0.36945366737096286, "grad_norm": 0.14408014426841553, "learning_rate": 2e-05, "loss": 5.2619, "step": 5508 }, { "epoch": 0.3695207432001878, "grad_norm": 0.15259618736667727, "learning_rate": 2e-05, "loss": 5.4282, "step": 5509 }, { "epoch": 0.36958781902941273, "grad_norm": 0.150064079456455, "learning_rate": 2e-05, "loss": 5.3441, "step": 5510 }, { "epoch": 0.3696548948586377, "grad_norm": 0.15020856718325531, "learning_rate": 2e-05, "loss": 5.3731, "step": 5511 }, { "epoch": 0.3697219706878626, "grad_norm": 0.15269552182986748, "learning_rate": 2e-05, "loss": 5.2683, "step": 5512 }, { "epoch": 0.36978904651708755, "grad_norm": 0.14814813382933187, "learning_rate": 2e-05, "loss": 5.5531, "step": 5513 }, { "epoch": 0.3698561223463125, "grad_norm": 0.1452003217550247, "learning_rate": 2e-05, "loss": 5.4592, "step": 5514 }, { "epoch": 0.3699231981755374, "grad_norm": 0.14937634031664626, "learning_rate": 2e-05, "loss": 5.3561, "step": 5515 }, { "epoch": 0.36999027400476237, "grad_norm": 0.1479573666261167, "learning_rate": 2e-05, "loss": 5.3849, "step": 5516 }, { "epoch": 0.3700573498339873, "grad_norm": 0.14112520005164267, "learning_rate": 2e-05, "loss": 5.3403, "step": 5517 }, { "epoch": 0.37012442566321224, "grad_norm": 0.1420855371835133, "learning_rate": 2e-05, "loss": 5.428, "step": 5518 }, { "epoch": 0.3701915014924372, "grad_norm": 0.1419970950961018, "learning_rate": 2e-05, "loss": 5.39, "step": 5519 }, { "epoch": 0.3702585773216621, "grad_norm": 0.14304120751869323, "learning_rate": 2e-05, "loss": 5.5319, "step": 5520 }, { "epoch": 0.37032565315088706, "grad_norm": 0.14352641900913535, "learning_rate": 2e-05, "loss": 5.4827, "step": 5521 }, { "epoch": 0.370392728980112, "grad_norm": 0.14780231835756938, "learning_rate": 2e-05, "loss": 5.4637, "step": 5522 }, { "epoch": 0.37045980480933693, "grad_norm": 0.13928018768756434, "learning_rate": 2e-05, "loss": 5.4516, "step": 5523 }, { "epoch": 0.3705268806385619, "grad_norm": 0.15120962542831176, "learning_rate": 2e-05, "loss": 5.2851, "step": 5524 }, { "epoch": 0.3705939564677868, "grad_norm": 0.14324955944748685, "learning_rate": 2e-05, "loss": 5.5173, "step": 5525 }, { "epoch": 0.37066103229701175, "grad_norm": 0.1396918276681697, "learning_rate": 2e-05, "loss": 5.3201, "step": 5526 }, { "epoch": 0.3707281081262367, "grad_norm": 0.15215704720028017, "learning_rate": 2e-05, "loss": 5.4143, "step": 5527 }, { "epoch": 0.3707951839554616, "grad_norm": 0.1491225738479851, "learning_rate": 2e-05, "loss": 5.5254, "step": 5528 }, { "epoch": 0.37086225978468657, "grad_norm": 0.14476099405627413, "learning_rate": 2e-05, "loss": 5.5344, "step": 5529 }, { "epoch": 0.3709293356139115, "grad_norm": 0.16166656373773916, "learning_rate": 2e-05, "loss": 5.4337, "step": 5530 }, { "epoch": 0.37099641144313644, "grad_norm": 0.14891293706509834, "learning_rate": 2e-05, "loss": 5.4113, "step": 5531 }, { "epoch": 0.3710634872723614, "grad_norm": 0.15803682294318247, "learning_rate": 2e-05, "loss": 5.5941, "step": 5532 }, { "epoch": 0.3711305631015863, "grad_norm": 0.15554981965507686, "learning_rate": 2e-05, "loss": 5.4206, "step": 5533 }, { "epoch": 0.37119763893081126, "grad_norm": 0.14352288001119287, "learning_rate": 2e-05, "loss": 5.4445, "step": 5534 }, { "epoch": 0.3712647147600362, "grad_norm": 0.1499274501060584, "learning_rate": 2e-05, "loss": 5.5961, "step": 5535 }, { "epoch": 0.37133179058926113, "grad_norm": 0.14810230421430595, "learning_rate": 2e-05, "loss": 5.3082, "step": 5536 }, { "epoch": 0.3713988664184861, "grad_norm": 0.14721825291792134, "learning_rate": 2e-05, "loss": 5.4266, "step": 5537 }, { "epoch": 0.371465942247711, "grad_norm": 0.15224724138266346, "learning_rate": 2e-05, "loss": 5.4565, "step": 5538 }, { "epoch": 0.37153301807693595, "grad_norm": 0.14669044198908834, "learning_rate": 2e-05, "loss": 5.4404, "step": 5539 }, { "epoch": 0.3716000939061609, "grad_norm": 0.1427712981154258, "learning_rate": 2e-05, "loss": 5.3259, "step": 5540 }, { "epoch": 0.3716671697353858, "grad_norm": 0.1540407373946393, "learning_rate": 2e-05, "loss": 5.4656, "step": 5541 }, { "epoch": 0.37173424556461077, "grad_norm": 0.14843591557650118, "learning_rate": 2e-05, "loss": 5.5033, "step": 5542 }, { "epoch": 0.3718013213938357, "grad_norm": 0.1474571034865597, "learning_rate": 2e-05, "loss": 5.3717, "step": 5543 }, { "epoch": 0.37186839722306064, "grad_norm": 0.15227443277218533, "learning_rate": 2e-05, "loss": 5.4016, "step": 5544 }, { "epoch": 0.3719354730522856, "grad_norm": 0.14123862346260413, "learning_rate": 2e-05, "loss": 5.3334, "step": 5545 }, { "epoch": 0.3720025488815106, "grad_norm": 0.14796602051134664, "learning_rate": 2e-05, "loss": 5.4211, "step": 5546 }, { "epoch": 0.3720696247107355, "grad_norm": 0.14072525105488323, "learning_rate": 2e-05, "loss": 5.3926, "step": 5547 }, { "epoch": 0.37213670053996045, "grad_norm": 0.14607779296206377, "learning_rate": 2e-05, "loss": 5.5267, "step": 5548 }, { "epoch": 0.3722037763691854, "grad_norm": 0.14266174746074073, "learning_rate": 2e-05, "loss": 5.4163, "step": 5549 }, { "epoch": 0.37227085219841033, "grad_norm": 0.14416203884262846, "learning_rate": 2e-05, "loss": 5.633, "step": 5550 }, { "epoch": 0.37233792802763527, "grad_norm": 0.14559050176652955, "learning_rate": 2e-05, "loss": 5.3644, "step": 5551 }, { "epoch": 0.3724050038568602, "grad_norm": 0.15064941292615847, "learning_rate": 2e-05, "loss": 5.4317, "step": 5552 }, { "epoch": 0.37247207968608514, "grad_norm": 0.15191189493947688, "learning_rate": 2e-05, "loss": 5.4844, "step": 5553 }, { "epoch": 0.3725391555153101, "grad_norm": 0.14656529109820923, "learning_rate": 2e-05, "loss": 5.5203, "step": 5554 }, { "epoch": 0.372606231344535, "grad_norm": 0.14839020812231873, "learning_rate": 2e-05, "loss": 5.3811, "step": 5555 }, { "epoch": 0.37267330717375996, "grad_norm": 0.14054673116525768, "learning_rate": 2e-05, "loss": 5.6181, "step": 5556 }, { "epoch": 0.3727403830029849, "grad_norm": 0.154988402156191, "learning_rate": 2e-05, "loss": 5.3881, "step": 5557 }, { "epoch": 0.37280745883220984, "grad_norm": 0.1484114432266767, "learning_rate": 2e-05, "loss": 5.3575, "step": 5558 }, { "epoch": 0.3728745346614348, "grad_norm": 0.1521287072490179, "learning_rate": 2e-05, "loss": 5.4707, "step": 5559 }, { "epoch": 0.3729416104906597, "grad_norm": 0.15287835659323368, "learning_rate": 2e-05, "loss": 5.2856, "step": 5560 }, { "epoch": 0.37300868631988465, "grad_norm": 0.15370088302341264, "learning_rate": 2e-05, "loss": 5.5718, "step": 5561 }, { "epoch": 0.3730757621491096, "grad_norm": 0.1668478097540375, "learning_rate": 2e-05, "loss": 5.4343, "step": 5562 }, { "epoch": 0.37314283797833453, "grad_norm": 0.1574447959381691, "learning_rate": 2e-05, "loss": 5.3504, "step": 5563 }, { "epoch": 0.37320991380755947, "grad_norm": 0.14594117683507551, "learning_rate": 2e-05, "loss": 5.444, "step": 5564 }, { "epoch": 0.3732769896367844, "grad_norm": 0.1510638839218263, "learning_rate": 2e-05, "loss": 5.3343, "step": 5565 }, { "epoch": 0.37334406546600934, "grad_norm": 0.15916342192380262, "learning_rate": 2e-05, "loss": 5.499, "step": 5566 }, { "epoch": 0.3734111412952343, "grad_norm": 0.16071424671918755, "learning_rate": 2e-05, "loss": 5.4411, "step": 5567 }, { "epoch": 0.3734782171244592, "grad_norm": 0.14609988837184734, "learning_rate": 2e-05, "loss": 5.2535, "step": 5568 }, { "epoch": 0.37354529295368416, "grad_norm": 0.156520907572076, "learning_rate": 2e-05, "loss": 5.2823, "step": 5569 }, { "epoch": 0.3736123687829091, "grad_norm": 0.15498779068862176, "learning_rate": 2e-05, "loss": 5.5263, "step": 5570 }, { "epoch": 0.37367944461213404, "grad_norm": 0.14710194112710948, "learning_rate": 2e-05, "loss": 5.3935, "step": 5571 }, { "epoch": 0.373746520441359, "grad_norm": 0.15671377966416863, "learning_rate": 2e-05, "loss": 5.3925, "step": 5572 }, { "epoch": 0.3738135962705839, "grad_norm": 0.15139907147728582, "learning_rate": 2e-05, "loss": 5.4074, "step": 5573 }, { "epoch": 0.37388067209980885, "grad_norm": 0.14450880234482671, "learning_rate": 2e-05, "loss": 5.3829, "step": 5574 }, { "epoch": 0.3739477479290338, "grad_norm": 0.14864949500504998, "learning_rate": 2e-05, "loss": 5.4772, "step": 5575 }, { "epoch": 0.37401482375825873, "grad_norm": 0.13850890009342715, "learning_rate": 2e-05, "loss": 5.4902, "step": 5576 }, { "epoch": 0.37408189958748367, "grad_norm": 0.14917010126674998, "learning_rate": 2e-05, "loss": 5.5175, "step": 5577 }, { "epoch": 0.3741489754167086, "grad_norm": 0.15160585090757833, "learning_rate": 2e-05, "loss": 5.354, "step": 5578 }, { "epoch": 0.37421605124593355, "grad_norm": 0.15124008302812078, "learning_rate": 2e-05, "loss": 5.3575, "step": 5579 }, { "epoch": 0.3742831270751585, "grad_norm": 0.14741318550399948, "learning_rate": 2e-05, "loss": 5.4706, "step": 5580 }, { "epoch": 0.3743502029043834, "grad_norm": 0.14668569054791308, "learning_rate": 2e-05, "loss": 5.3874, "step": 5581 }, { "epoch": 0.37441727873360836, "grad_norm": 0.15186573618789523, "learning_rate": 2e-05, "loss": 5.4287, "step": 5582 }, { "epoch": 0.3744843545628333, "grad_norm": 0.15094078552355342, "learning_rate": 2e-05, "loss": 5.4831, "step": 5583 }, { "epoch": 0.37455143039205824, "grad_norm": 0.14354134814834418, "learning_rate": 2e-05, "loss": 5.3419, "step": 5584 }, { "epoch": 0.3746185062212832, "grad_norm": 0.14454810641850804, "learning_rate": 2e-05, "loss": 5.4407, "step": 5585 }, { "epoch": 0.3746855820505081, "grad_norm": 0.15097254138078867, "learning_rate": 2e-05, "loss": 5.3702, "step": 5586 }, { "epoch": 0.37475265787973305, "grad_norm": 0.1477994122020231, "learning_rate": 2e-05, "loss": 5.5111, "step": 5587 }, { "epoch": 0.374819733708958, "grad_norm": 0.15447008820843638, "learning_rate": 2e-05, "loss": 5.5062, "step": 5588 }, { "epoch": 0.37488680953818293, "grad_norm": 0.15077334713700072, "learning_rate": 2e-05, "loss": 5.5075, "step": 5589 }, { "epoch": 0.37495388536740787, "grad_norm": 0.15414863951257665, "learning_rate": 2e-05, "loss": 5.5018, "step": 5590 }, { "epoch": 0.3750209611966328, "grad_norm": 0.15460815325086488, "learning_rate": 2e-05, "loss": 5.6081, "step": 5591 }, { "epoch": 0.37508803702585775, "grad_norm": 0.15461685778274153, "learning_rate": 2e-05, "loss": 5.4638, "step": 5592 }, { "epoch": 0.3751551128550827, "grad_norm": 0.14842581322187065, "learning_rate": 2e-05, "loss": 5.3438, "step": 5593 }, { "epoch": 0.3752221886843076, "grad_norm": 0.151392575111644, "learning_rate": 2e-05, "loss": 5.3428, "step": 5594 }, { "epoch": 0.37528926451353256, "grad_norm": 0.1434304955963111, "learning_rate": 2e-05, "loss": 5.4565, "step": 5595 }, { "epoch": 0.3753563403427575, "grad_norm": 0.1501258694477132, "learning_rate": 2e-05, "loss": 5.4388, "step": 5596 }, { "epoch": 0.37542341617198244, "grad_norm": 0.15456145768057547, "learning_rate": 2e-05, "loss": 5.441, "step": 5597 }, { "epoch": 0.3754904920012074, "grad_norm": 0.14233773086637688, "learning_rate": 2e-05, "loss": 5.4888, "step": 5598 }, { "epoch": 0.3755575678304323, "grad_norm": 0.14721919463821326, "learning_rate": 2e-05, "loss": 5.3096, "step": 5599 }, { "epoch": 0.37562464365965725, "grad_norm": 0.1536187722150757, "learning_rate": 2e-05, "loss": 5.559, "step": 5600 }, { "epoch": 0.3756917194888822, "grad_norm": 0.1489342866086286, "learning_rate": 2e-05, "loss": 5.2914, "step": 5601 }, { "epoch": 0.37575879531810713, "grad_norm": 0.1433655519035909, "learning_rate": 2e-05, "loss": 5.4682, "step": 5602 }, { "epoch": 0.37582587114733207, "grad_norm": 0.1466145011373058, "learning_rate": 2e-05, "loss": 5.3028, "step": 5603 }, { "epoch": 0.375892946976557, "grad_norm": 0.14402930264128616, "learning_rate": 2e-05, "loss": 5.4552, "step": 5604 }, { "epoch": 0.37596002280578195, "grad_norm": 0.1542693291420134, "learning_rate": 2e-05, "loss": 5.5299, "step": 5605 }, { "epoch": 0.3760270986350069, "grad_norm": 0.14201318792822237, "learning_rate": 2e-05, "loss": 5.4328, "step": 5606 }, { "epoch": 0.3760941744642318, "grad_norm": 0.1535874154397052, "learning_rate": 2e-05, "loss": 5.5668, "step": 5607 }, { "epoch": 0.37616125029345676, "grad_norm": 0.15188263496620896, "learning_rate": 2e-05, "loss": 5.4539, "step": 5608 }, { "epoch": 0.3762283261226817, "grad_norm": 0.1468698805824067, "learning_rate": 2e-05, "loss": 5.5188, "step": 5609 }, { "epoch": 0.37629540195190664, "grad_norm": 0.14428681036956412, "learning_rate": 2e-05, "loss": 5.5626, "step": 5610 }, { "epoch": 0.3763624777811316, "grad_norm": 0.1478433486706171, "learning_rate": 2e-05, "loss": 5.3745, "step": 5611 }, { "epoch": 0.3764295536103565, "grad_norm": 0.14071286527197796, "learning_rate": 2e-05, "loss": 5.3916, "step": 5612 }, { "epoch": 0.37649662943958145, "grad_norm": 0.1474376572648173, "learning_rate": 2e-05, "loss": 5.5427, "step": 5613 }, { "epoch": 0.3765637052688064, "grad_norm": 0.1552786033378589, "learning_rate": 2e-05, "loss": 5.5162, "step": 5614 }, { "epoch": 0.37663078109803133, "grad_norm": 0.1457005390957894, "learning_rate": 2e-05, "loss": 5.3894, "step": 5615 }, { "epoch": 0.37669785692725627, "grad_norm": 0.14954680571033063, "learning_rate": 2e-05, "loss": 5.4366, "step": 5616 }, { "epoch": 0.3767649327564812, "grad_norm": 0.15264090241256678, "learning_rate": 2e-05, "loss": 5.3803, "step": 5617 }, { "epoch": 0.37683200858570615, "grad_norm": 0.148066593143168, "learning_rate": 2e-05, "loss": 5.356, "step": 5618 }, { "epoch": 0.3768990844149311, "grad_norm": 0.15190660855045435, "learning_rate": 2e-05, "loss": 5.3864, "step": 5619 }, { "epoch": 0.376966160244156, "grad_norm": 0.1464637384228485, "learning_rate": 2e-05, "loss": 5.6101, "step": 5620 }, { "epoch": 0.37703323607338096, "grad_norm": 0.1521534079648647, "learning_rate": 2e-05, "loss": 5.44, "step": 5621 }, { "epoch": 0.3771003119026059, "grad_norm": 0.15571326185839346, "learning_rate": 2e-05, "loss": 5.4491, "step": 5622 }, { "epoch": 0.37716738773183084, "grad_norm": 0.14753796488181944, "learning_rate": 2e-05, "loss": 5.4489, "step": 5623 }, { "epoch": 0.3772344635610558, "grad_norm": 0.14379622495281016, "learning_rate": 2e-05, "loss": 5.4624, "step": 5624 }, { "epoch": 0.3773015393902807, "grad_norm": 0.15049692803299444, "learning_rate": 2e-05, "loss": 5.3839, "step": 5625 }, { "epoch": 0.37736861521950565, "grad_norm": 0.14867128586019063, "learning_rate": 2e-05, "loss": 5.3128, "step": 5626 }, { "epoch": 0.3774356910487306, "grad_norm": 0.14085668815504895, "learning_rate": 2e-05, "loss": 5.4422, "step": 5627 }, { "epoch": 0.37750276687795553, "grad_norm": 0.15811497814131273, "learning_rate": 2e-05, "loss": 5.3583, "step": 5628 }, { "epoch": 0.37756984270718047, "grad_norm": 0.15050840673456664, "learning_rate": 2e-05, "loss": 5.4481, "step": 5629 }, { "epoch": 0.3776369185364054, "grad_norm": 0.14464895717444495, "learning_rate": 2e-05, "loss": 5.485, "step": 5630 }, { "epoch": 0.37770399436563035, "grad_norm": 0.14537872128695847, "learning_rate": 2e-05, "loss": 5.4772, "step": 5631 }, { "epoch": 0.3777710701948553, "grad_norm": 0.14929018457211307, "learning_rate": 2e-05, "loss": 5.4757, "step": 5632 }, { "epoch": 0.3778381460240802, "grad_norm": 0.143446398956801, "learning_rate": 2e-05, "loss": 5.4993, "step": 5633 }, { "epoch": 0.37790522185330516, "grad_norm": 0.14230341964755744, "learning_rate": 2e-05, "loss": 5.4279, "step": 5634 }, { "epoch": 0.3779722976825301, "grad_norm": 0.14735019389697465, "learning_rate": 2e-05, "loss": 5.3356, "step": 5635 }, { "epoch": 0.37803937351175504, "grad_norm": 0.14242151603472458, "learning_rate": 2e-05, "loss": 5.3347, "step": 5636 }, { "epoch": 0.37810644934098, "grad_norm": 0.14187749570962258, "learning_rate": 2e-05, "loss": 5.4624, "step": 5637 }, { "epoch": 0.3781735251702049, "grad_norm": 0.14714986016908446, "learning_rate": 2e-05, "loss": 5.5736, "step": 5638 }, { "epoch": 0.37824060099942985, "grad_norm": 0.15659338169965129, "learning_rate": 2e-05, "loss": 5.388, "step": 5639 }, { "epoch": 0.3783076768286548, "grad_norm": 0.15024832631959611, "learning_rate": 2e-05, "loss": 5.3646, "step": 5640 }, { "epoch": 0.37837475265787973, "grad_norm": 0.14398377667448664, "learning_rate": 2e-05, "loss": 5.5238, "step": 5641 }, { "epoch": 0.37844182848710467, "grad_norm": 0.1549214381118798, "learning_rate": 2e-05, "loss": 5.5697, "step": 5642 }, { "epoch": 0.3785089043163296, "grad_norm": 0.1455794045705954, "learning_rate": 2e-05, "loss": 5.5641, "step": 5643 }, { "epoch": 0.37857598014555455, "grad_norm": 0.1450838024664113, "learning_rate": 2e-05, "loss": 5.3759, "step": 5644 }, { "epoch": 0.3786430559747795, "grad_norm": 0.1435940082297661, "learning_rate": 2e-05, "loss": 5.3042, "step": 5645 }, { "epoch": 0.3787101318040044, "grad_norm": 0.14534365034970545, "learning_rate": 2e-05, "loss": 5.4543, "step": 5646 }, { "epoch": 0.37877720763322936, "grad_norm": 0.15246764940722343, "learning_rate": 2e-05, "loss": 5.4133, "step": 5647 }, { "epoch": 0.3788442834624543, "grad_norm": 0.15168053306963955, "learning_rate": 2e-05, "loss": 5.5226, "step": 5648 }, { "epoch": 0.37891135929167924, "grad_norm": 0.14991256578405002, "learning_rate": 2e-05, "loss": 5.5371, "step": 5649 }, { "epoch": 0.3789784351209042, "grad_norm": 0.14811622140820938, "learning_rate": 2e-05, "loss": 5.4283, "step": 5650 }, { "epoch": 0.3790455109501291, "grad_norm": 0.14884936514944025, "learning_rate": 2e-05, "loss": 5.3068, "step": 5651 }, { "epoch": 0.37911258677935405, "grad_norm": 0.14754440824666046, "learning_rate": 2e-05, "loss": 5.4715, "step": 5652 }, { "epoch": 0.379179662608579, "grad_norm": 0.14091289456339712, "learning_rate": 2e-05, "loss": 5.4343, "step": 5653 }, { "epoch": 0.37924673843780393, "grad_norm": 0.14257657239024757, "learning_rate": 2e-05, "loss": 5.4038, "step": 5654 }, { "epoch": 0.37931381426702887, "grad_norm": 0.14558516288391837, "learning_rate": 2e-05, "loss": 5.3652, "step": 5655 }, { "epoch": 0.3793808900962538, "grad_norm": 0.14703277495103326, "learning_rate": 2e-05, "loss": 5.3626, "step": 5656 }, { "epoch": 0.37944796592547875, "grad_norm": 0.14658116734706209, "learning_rate": 2e-05, "loss": 5.2893, "step": 5657 }, { "epoch": 0.3795150417547037, "grad_norm": 0.14189107681409643, "learning_rate": 2e-05, "loss": 5.4771, "step": 5658 }, { "epoch": 0.3795821175839286, "grad_norm": 0.1438752452153265, "learning_rate": 2e-05, "loss": 5.4249, "step": 5659 }, { "epoch": 0.37964919341315356, "grad_norm": 0.14903073496653785, "learning_rate": 2e-05, "loss": 5.4643, "step": 5660 }, { "epoch": 0.3797162692423785, "grad_norm": 0.14403824052363287, "learning_rate": 2e-05, "loss": 5.5573, "step": 5661 }, { "epoch": 0.37978334507160344, "grad_norm": 0.14064920027906896, "learning_rate": 2e-05, "loss": 5.4385, "step": 5662 }, { "epoch": 0.3798504209008284, "grad_norm": 0.1439063417224582, "learning_rate": 2e-05, "loss": 5.3619, "step": 5663 }, { "epoch": 0.3799174967300533, "grad_norm": 0.14853065497675222, "learning_rate": 2e-05, "loss": 5.4511, "step": 5664 }, { "epoch": 0.37998457255927826, "grad_norm": 0.1449137590231032, "learning_rate": 2e-05, "loss": 5.4696, "step": 5665 }, { "epoch": 0.3800516483885032, "grad_norm": 0.15270187462220255, "learning_rate": 2e-05, "loss": 5.3982, "step": 5666 }, { "epoch": 0.38011872421772813, "grad_norm": 0.1519795213197557, "learning_rate": 2e-05, "loss": 5.3699, "step": 5667 }, { "epoch": 0.38018580004695307, "grad_norm": 0.14917106869611083, "learning_rate": 2e-05, "loss": 5.4342, "step": 5668 }, { "epoch": 0.380252875876178, "grad_norm": 0.1532996875820362, "learning_rate": 2e-05, "loss": 5.3641, "step": 5669 }, { "epoch": 0.38031995170540295, "grad_norm": 0.1574437664426534, "learning_rate": 2e-05, "loss": 5.4853, "step": 5670 }, { "epoch": 0.3803870275346279, "grad_norm": 0.15080333735405138, "learning_rate": 2e-05, "loss": 5.3798, "step": 5671 }, { "epoch": 0.3804541033638528, "grad_norm": 0.1484597728671174, "learning_rate": 2e-05, "loss": 5.4924, "step": 5672 }, { "epoch": 0.38052117919307776, "grad_norm": 0.16006431831640297, "learning_rate": 2e-05, "loss": 5.4482, "step": 5673 }, { "epoch": 0.3805882550223027, "grad_norm": 0.14630625317965001, "learning_rate": 2e-05, "loss": 5.4643, "step": 5674 }, { "epoch": 0.38065533085152764, "grad_norm": 0.1521513921257831, "learning_rate": 2e-05, "loss": 5.4455, "step": 5675 }, { "epoch": 0.3807224066807526, "grad_norm": 0.1564945510547526, "learning_rate": 2e-05, "loss": 5.5749, "step": 5676 }, { "epoch": 0.3807894825099775, "grad_norm": 0.16097006498441713, "learning_rate": 2e-05, "loss": 5.5165, "step": 5677 }, { "epoch": 0.38085655833920246, "grad_norm": 0.14333959791860254, "learning_rate": 2e-05, "loss": 5.4762, "step": 5678 }, { "epoch": 0.3809236341684274, "grad_norm": 0.14800044537478288, "learning_rate": 2e-05, "loss": 5.4103, "step": 5679 }, { "epoch": 0.38099070999765233, "grad_norm": 0.15605846115922317, "learning_rate": 2e-05, "loss": 5.4704, "step": 5680 }, { "epoch": 0.38105778582687727, "grad_norm": 0.14679593402373675, "learning_rate": 2e-05, "loss": 5.3302, "step": 5681 }, { "epoch": 0.3811248616561022, "grad_norm": 0.1448909781964578, "learning_rate": 2e-05, "loss": 5.5888, "step": 5682 }, { "epoch": 0.38119193748532715, "grad_norm": 0.14623760113700685, "learning_rate": 2e-05, "loss": 5.4726, "step": 5683 }, { "epoch": 0.3812590133145521, "grad_norm": 0.15143568485298878, "learning_rate": 2e-05, "loss": 5.4996, "step": 5684 }, { "epoch": 0.381326089143777, "grad_norm": 0.14353407874719562, "learning_rate": 2e-05, "loss": 5.4701, "step": 5685 }, { "epoch": 0.38139316497300196, "grad_norm": 0.15022663469564407, "learning_rate": 2e-05, "loss": 5.3713, "step": 5686 }, { "epoch": 0.3814602408022269, "grad_norm": 0.15614605463027664, "learning_rate": 2e-05, "loss": 5.4697, "step": 5687 }, { "epoch": 0.38152731663145184, "grad_norm": 0.16275272368386842, "learning_rate": 2e-05, "loss": 5.4579, "step": 5688 }, { "epoch": 0.3815943924606768, "grad_norm": 0.14376849861535926, "learning_rate": 2e-05, "loss": 5.3594, "step": 5689 }, { "epoch": 0.3816614682899017, "grad_norm": 0.1570172844027234, "learning_rate": 2e-05, "loss": 5.4402, "step": 5690 }, { "epoch": 0.38172854411912666, "grad_norm": 0.15652575741503774, "learning_rate": 2e-05, "loss": 5.41, "step": 5691 }, { "epoch": 0.3817956199483516, "grad_norm": 0.15601793407937498, "learning_rate": 2e-05, "loss": 5.4781, "step": 5692 }, { "epoch": 0.38186269577757653, "grad_norm": 0.14965045503480381, "learning_rate": 2e-05, "loss": 5.428, "step": 5693 }, { "epoch": 0.38192977160680147, "grad_norm": 0.16159814389069738, "learning_rate": 2e-05, "loss": 5.4623, "step": 5694 }, { "epoch": 0.3819968474360264, "grad_norm": 0.16346474939585, "learning_rate": 2e-05, "loss": 5.4909, "step": 5695 }, { "epoch": 0.38206392326525135, "grad_norm": 0.1456984278186836, "learning_rate": 2e-05, "loss": 5.3662, "step": 5696 }, { "epoch": 0.3821309990944763, "grad_norm": 0.16095060096330588, "learning_rate": 2e-05, "loss": 5.6356, "step": 5697 }, { "epoch": 0.3821980749237012, "grad_norm": 0.15258029562712908, "learning_rate": 2e-05, "loss": 5.4117, "step": 5698 }, { "epoch": 0.38226515075292616, "grad_norm": 0.14498546407750915, "learning_rate": 2e-05, "loss": 5.4709, "step": 5699 }, { "epoch": 0.3823322265821511, "grad_norm": 0.142385901424097, "learning_rate": 2e-05, "loss": 5.3983, "step": 5700 }, { "epoch": 0.38239930241137604, "grad_norm": 0.1517974455756827, "learning_rate": 2e-05, "loss": 5.4273, "step": 5701 }, { "epoch": 0.382466378240601, "grad_norm": 0.1509436184821043, "learning_rate": 2e-05, "loss": 5.4724, "step": 5702 }, { "epoch": 0.3825334540698259, "grad_norm": 0.15461911059893751, "learning_rate": 2e-05, "loss": 5.4528, "step": 5703 }, { "epoch": 0.38260052989905086, "grad_norm": 0.1571875701235248, "learning_rate": 2e-05, "loss": 5.3931, "step": 5704 }, { "epoch": 0.3826676057282758, "grad_norm": 0.1586439368063672, "learning_rate": 2e-05, "loss": 5.336, "step": 5705 }, { "epoch": 0.38273468155750073, "grad_norm": 0.1542788368779236, "learning_rate": 2e-05, "loss": 5.386, "step": 5706 }, { "epoch": 0.38280175738672567, "grad_norm": 0.1711005333485567, "learning_rate": 2e-05, "loss": 5.4486, "step": 5707 }, { "epoch": 0.3828688332159506, "grad_norm": 0.15487664183397837, "learning_rate": 2e-05, "loss": 5.4729, "step": 5708 }, { "epoch": 0.38293590904517555, "grad_norm": 0.15636687783978193, "learning_rate": 2e-05, "loss": 5.5162, "step": 5709 }, { "epoch": 0.3830029848744005, "grad_norm": 0.1564644942005319, "learning_rate": 2e-05, "loss": 5.2824, "step": 5710 }, { "epoch": 0.3830700607036254, "grad_norm": 0.1551127600198856, "learning_rate": 2e-05, "loss": 5.4194, "step": 5711 }, { "epoch": 0.38313713653285036, "grad_norm": 0.15365072501782398, "learning_rate": 2e-05, "loss": 5.3954, "step": 5712 }, { "epoch": 0.3832042123620753, "grad_norm": 0.15454466869657202, "learning_rate": 2e-05, "loss": 5.5393, "step": 5713 }, { "epoch": 0.38327128819130024, "grad_norm": 0.16584922791907644, "learning_rate": 2e-05, "loss": 5.4398, "step": 5714 }, { "epoch": 0.3833383640205252, "grad_norm": 0.1510082370082647, "learning_rate": 2e-05, "loss": 5.4851, "step": 5715 }, { "epoch": 0.3834054398497501, "grad_norm": 0.14796427203702556, "learning_rate": 2e-05, "loss": 5.3059, "step": 5716 }, { "epoch": 0.38347251567897506, "grad_norm": 0.16349384370009598, "learning_rate": 2e-05, "loss": 5.4024, "step": 5717 }, { "epoch": 0.3835395915082, "grad_norm": 0.14752524127202854, "learning_rate": 2e-05, "loss": 5.543, "step": 5718 }, { "epoch": 0.38360666733742493, "grad_norm": 0.15256596198128272, "learning_rate": 2e-05, "loss": 5.4807, "step": 5719 }, { "epoch": 0.38367374316664987, "grad_norm": 0.15816991975309536, "learning_rate": 2e-05, "loss": 5.3645, "step": 5720 }, { "epoch": 0.3837408189958748, "grad_norm": 0.1411437855310054, "learning_rate": 2e-05, "loss": 5.4019, "step": 5721 }, { "epoch": 0.38380789482509975, "grad_norm": 0.16252922503005524, "learning_rate": 2e-05, "loss": 5.4664, "step": 5722 }, { "epoch": 0.3838749706543247, "grad_norm": 0.15784878041507622, "learning_rate": 2e-05, "loss": 5.3616, "step": 5723 }, { "epoch": 0.3839420464835496, "grad_norm": 0.1531011345695869, "learning_rate": 2e-05, "loss": 5.3384, "step": 5724 }, { "epoch": 0.38400912231277456, "grad_norm": 0.14909718419093362, "learning_rate": 2e-05, "loss": 5.5092, "step": 5725 }, { "epoch": 0.3840761981419995, "grad_norm": 0.15896620395054176, "learning_rate": 2e-05, "loss": 5.4489, "step": 5726 }, { "epoch": 0.38414327397122444, "grad_norm": 0.1563974475413855, "learning_rate": 2e-05, "loss": 5.5092, "step": 5727 }, { "epoch": 0.38421034980044944, "grad_norm": 0.15720141549015046, "learning_rate": 2e-05, "loss": 5.5211, "step": 5728 }, { "epoch": 0.3842774256296744, "grad_norm": 0.14847943955483478, "learning_rate": 2e-05, "loss": 5.3704, "step": 5729 }, { "epoch": 0.3843445014588993, "grad_norm": 0.15235343021192485, "learning_rate": 2e-05, "loss": 5.2857, "step": 5730 }, { "epoch": 0.38441157728812425, "grad_norm": 0.15042910767708836, "learning_rate": 2e-05, "loss": 5.3759, "step": 5731 }, { "epoch": 0.3844786531173492, "grad_norm": 0.148202929430328, "learning_rate": 2e-05, "loss": 5.5483, "step": 5732 }, { "epoch": 0.38454572894657413, "grad_norm": 0.14470823721825973, "learning_rate": 2e-05, "loss": 5.4887, "step": 5733 }, { "epoch": 0.38461280477579907, "grad_norm": 0.15656849397332648, "learning_rate": 2e-05, "loss": 5.5195, "step": 5734 }, { "epoch": 0.384679880605024, "grad_norm": 0.1467692187052694, "learning_rate": 2e-05, "loss": 5.3929, "step": 5735 }, { "epoch": 0.38474695643424894, "grad_norm": 0.1473241433039828, "learning_rate": 2e-05, "loss": 5.374, "step": 5736 }, { "epoch": 0.3848140322634739, "grad_norm": 0.1470127234729622, "learning_rate": 2e-05, "loss": 5.4792, "step": 5737 }, { "epoch": 0.3848811080926988, "grad_norm": 0.1550364845840921, "learning_rate": 2e-05, "loss": 5.569, "step": 5738 }, { "epoch": 0.38494818392192376, "grad_norm": 0.14720413973134358, "learning_rate": 2e-05, "loss": 5.3792, "step": 5739 }, { "epoch": 0.3850152597511487, "grad_norm": 0.14855027141189991, "learning_rate": 2e-05, "loss": 5.4781, "step": 5740 }, { "epoch": 0.38508233558037364, "grad_norm": 0.15840216420667838, "learning_rate": 2e-05, "loss": 5.6191, "step": 5741 }, { "epoch": 0.3851494114095986, "grad_norm": 0.15391712522459483, "learning_rate": 2e-05, "loss": 5.4976, "step": 5742 }, { "epoch": 0.3852164872388235, "grad_norm": 0.1420166719611395, "learning_rate": 2e-05, "loss": 5.1992, "step": 5743 }, { "epoch": 0.38528356306804845, "grad_norm": 0.1496049195570291, "learning_rate": 2e-05, "loss": 5.4821, "step": 5744 }, { "epoch": 0.3853506388972734, "grad_norm": 0.1534824380901906, "learning_rate": 2e-05, "loss": 5.4404, "step": 5745 }, { "epoch": 0.38541771472649833, "grad_norm": 0.1494685385901173, "learning_rate": 2e-05, "loss": 5.4107, "step": 5746 }, { "epoch": 0.38548479055572327, "grad_norm": 0.15050772958602043, "learning_rate": 2e-05, "loss": 5.462, "step": 5747 }, { "epoch": 0.3855518663849482, "grad_norm": 0.1438705252563661, "learning_rate": 2e-05, "loss": 5.4682, "step": 5748 }, { "epoch": 0.38561894221417314, "grad_norm": 0.1525319704529148, "learning_rate": 2e-05, "loss": 5.3879, "step": 5749 }, { "epoch": 0.3856860180433981, "grad_norm": 0.15788115560852764, "learning_rate": 2e-05, "loss": 5.4025, "step": 5750 }, { "epoch": 0.385753093872623, "grad_norm": 0.14117548226093063, "learning_rate": 2e-05, "loss": 5.3824, "step": 5751 }, { "epoch": 0.38582016970184796, "grad_norm": 0.14697515097674868, "learning_rate": 2e-05, "loss": 5.4763, "step": 5752 }, { "epoch": 0.3858872455310729, "grad_norm": 0.1438792041875919, "learning_rate": 2e-05, "loss": 5.5323, "step": 5753 }, { "epoch": 0.38595432136029784, "grad_norm": 0.1487794927224431, "learning_rate": 2e-05, "loss": 5.4429, "step": 5754 }, { "epoch": 0.3860213971895228, "grad_norm": 0.1448073346110761, "learning_rate": 2e-05, "loss": 5.5125, "step": 5755 }, { "epoch": 0.3860884730187477, "grad_norm": 0.1408077581019159, "learning_rate": 2e-05, "loss": 5.4161, "step": 5756 }, { "epoch": 0.38615554884797265, "grad_norm": 0.14679152330572764, "learning_rate": 2e-05, "loss": 5.4588, "step": 5757 }, { "epoch": 0.3862226246771976, "grad_norm": 0.15291326219680593, "learning_rate": 2e-05, "loss": 5.4375, "step": 5758 }, { "epoch": 0.38628970050642253, "grad_norm": 0.1458944869191814, "learning_rate": 2e-05, "loss": 5.5791, "step": 5759 }, { "epoch": 0.38635677633564747, "grad_norm": 0.15233042403233657, "learning_rate": 2e-05, "loss": 5.4575, "step": 5760 }, { "epoch": 0.3864238521648724, "grad_norm": 0.15606303618977047, "learning_rate": 2e-05, "loss": 5.2747, "step": 5761 }, { "epoch": 0.38649092799409734, "grad_norm": 0.14921354907674902, "learning_rate": 2e-05, "loss": 5.5192, "step": 5762 }, { "epoch": 0.3865580038233223, "grad_norm": 0.15098907198049402, "learning_rate": 2e-05, "loss": 5.3491, "step": 5763 }, { "epoch": 0.3866250796525472, "grad_norm": 0.17200953241576947, "learning_rate": 2e-05, "loss": 5.4268, "step": 5764 }, { "epoch": 0.38669215548177216, "grad_norm": 0.14997870724062318, "learning_rate": 2e-05, "loss": 5.3868, "step": 5765 }, { "epoch": 0.3867592313109971, "grad_norm": 0.1627316920003786, "learning_rate": 2e-05, "loss": 5.3421, "step": 5766 }, { "epoch": 0.38682630714022204, "grad_norm": 0.15829288915484274, "learning_rate": 2e-05, "loss": 5.3309, "step": 5767 }, { "epoch": 0.386893382969447, "grad_norm": 0.15506305779774382, "learning_rate": 2e-05, "loss": 5.3094, "step": 5768 }, { "epoch": 0.3869604587986719, "grad_norm": 0.15238213721438468, "learning_rate": 2e-05, "loss": 5.4083, "step": 5769 }, { "epoch": 0.38702753462789685, "grad_norm": 0.1472984929465323, "learning_rate": 2e-05, "loss": 5.4418, "step": 5770 }, { "epoch": 0.3870946104571218, "grad_norm": 0.1645741855048162, "learning_rate": 2e-05, "loss": 5.3464, "step": 5771 }, { "epoch": 0.38716168628634673, "grad_norm": 0.16197113427904658, "learning_rate": 2e-05, "loss": 5.452, "step": 5772 }, { "epoch": 0.38722876211557167, "grad_norm": 0.16307425069815754, "learning_rate": 2e-05, "loss": 5.3956, "step": 5773 }, { "epoch": 0.3872958379447966, "grad_norm": 0.14666860758345238, "learning_rate": 2e-05, "loss": 5.4078, "step": 5774 }, { "epoch": 0.38736291377402154, "grad_norm": 0.1559956699809593, "learning_rate": 2e-05, "loss": 5.3764, "step": 5775 }, { "epoch": 0.3874299896032465, "grad_norm": 0.1505131884595001, "learning_rate": 2e-05, "loss": 5.5274, "step": 5776 }, { "epoch": 0.3874970654324714, "grad_norm": 0.1608953465500763, "learning_rate": 2e-05, "loss": 5.4966, "step": 5777 }, { "epoch": 0.38756414126169636, "grad_norm": 0.16026293246168155, "learning_rate": 2e-05, "loss": 5.4104, "step": 5778 }, { "epoch": 0.3876312170909213, "grad_norm": 0.1514043498248274, "learning_rate": 2e-05, "loss": 5.4426, "step": 5779 }, { "epoch": 0.38769829292014624, "grad_norm": 0.15251188741441846, "learning_rate": 2e-05, "loss": 5.4336, "step": 5780 }, { "epoch": 0.3877653687493712, "grad_norm": 0.15189685578113293, "learning_rate": 2e-05, "loss": 5.4664, "step": 5781 }, { "epoch": 0.3878324445785961, "grad_norm": 0.14565848766337064, "learning_rate": 2e-05, "loss": 5.2978, "step": 5782 }, { "epoch": 0.38789952040782105, "grad_norm": 0.1541200326792232, "learning_rate": 2e-05, "loss": 5.426, "step": 5783 }, { "epoch": 0.387966596237046, "grad_norm": 0.1470771222969767, "learning_rate": 2e-05, "loss": 5.414, "step": 5784 }, { "epoch": 0.38803367206627093, "grad_norm": 0.14215110031266326, "learning_rate": 2e-05, "loss": 5.4322, "step": 5785 }, { "epoch": 0.38810074789549587, "grad_norm": 0.1467245333970438, "learning_rate": 2e-05, "loss": 5.3082, "step": 5786 }, { "epoch": 0.3881678237247208, "grad_norm": 0.14995550201238386, "learning_rate": 2e-05, "loss": 5.4666, "step": 5787 }, { "epoch": 0.38823489955394574, "grad_norm": 0.14821301929964303, "learning_rate": 2e-05, "loss": 5.4591, "step": 5788 }, { "epoch": 0.3883019753831707, "grad_norm": 0.14506341417876575, "learning_rate": 2e-05, "loss": 5.4522, "step": 5789 }, { "epoch": 0.3883690512123956, "grad_norm": 0.15474739733267345, "learning_rate": 2e-05, "loss": 5.5266, "step": 5790 }, { "epoch": 0.38843612704162056, "grad_norm": 0.14848442985121812, "learning_rate": 2e-05, "loss": 5.5167, "step": 5791 }, { "epoch": 0.3885032028708455, "grad_norm": 0.152070366352867, "learning_rate": 2e-05, "loss": 5.4312, "step": 5792 }, { "epoch": 0.38857027870007044, "grad_norm": 0.14417811567412508, "learning_rate": 2e-05, "loss": 5.438, "step": 5793 }, { "epoch": 0.3886373545292954, "grad_norm": 0.1511343063681683, "learning_rate": 2e-05, "loss": 5.4834, "step": 5794 }, { "epoch": 0.3887044303585203, "grad_norm": 0.15159324006010275, "learning_rate": 2e-05, "loss": 5.415, "step": 5795 }, { "epoch": 0.38877150618774525, "grad_norm": 0.14816401371673357, "learning_rate": 2e-05, "loss": 5.3282, "step": 5796 }, { "epoch": 0.3888385820169702, "grad_norm": 0.14531865875849878, "learning_rate": 2e-05, "loss": 5.4478, "step": 5797 }, { "epoch": 0.38890565784619513, "grad_norm": 0.15292925069954352, "learning_rate": 2e-05, "loss": 5.3936, "step": 5798 }, { "epoch": 0.38897273367542007, "grad_norm": 0.14952620997963448, "learning_rate": 2e-05, "loss": 5.2906, "step": 5799 }, { "epoch": 0.389039809504645, "grad_norm": 0.14727831874981087, "learning_rate": 2e-05, "loss": 5.4362, "step": 5800 }, { "epoch": 0.38910688533386995, "grad_norm": 0.15525239130754134, "learning_rate": 2e-05, "loss": 5.4701, "step": 5801 }, { "epoch": 0.3891739611630949, "grad_norm": 0.14341059362473732, "learning_rate": 2e-05, "loss": 5.6545, "step": 5802 }, { "epoch": 0.3892410369923198, "grad_norm": 0.15048413618361994, "learning_rate": 2e-05, "loss": 5.4475, "step": 5803 }, { "epoch": 0.38930811282154476, "grad_norm": 0.14394208697221159, "learning_rate": 2e-05, "loss": 5.4728, "step": 5804 }, { "epoch": 0.3893751886507697, "grad_norm": 0.15132828534446185, "learning_rate": 2e-05, "loss": 5.4395, "step": 5805 }, { "epoch": 0.38944226447999464, "grad_norm": 0.14587403127016033, "learning_rate": 2e-05, "loss": 5.486, "step": 5806 }, { "epoch": 0.3895093403092196, "grad_norm": 0.14911332405043554, "learning_rate": 2e-05, "loss": 5.4372, "step": 5807 }, { "epoch": 0.3895764161384445, "grad_norm": 0.14405820374580666, "learning_rate": 2e-05, "loss": 5.3667, "step": 5808 }, { "epoch": 0.38964349196766945, "grad_norm": 0.14570594554687002, "learning_rate": 2e-05, "loss": 5.4491, "step": 5809 }, { "epoch": 0.3897105677968944, "grad_norm": 0.14082092912148184, "learning_rate": 2e-05, "loss": 5.6008, "step": 5810 }, { "epoch": 0.38977764362611933, "grad_norm": 0.1536444396204144, "learning_rate": 2e-05, "loss": 5.514, "step": 5811 }, { "epoch": 0.38984471945534427, "grad_norm": 0.14320647192058983, "learning_rate": 2e-05, "loss": 5.4099, "step": 5812 }, { "epoch": 0.3899117952845692, "grad_norm": 0.14633644126544296, "learning_rate": 2e-05, "loss": 5.4566, "step": 5813 }, { "epoch": 0.38997887111379415, "grad_norm": 0.14607862508416297, "learning_rate": 2e-05, "loss": 5.3603, "step": 5814 }, { "epoch": 0.3900459469430191, "grad_norm": 0.1543896778461844, "learning_rate": 2e-05, "loss": 5.385, "step": 5815 }, { "epoch": 0.390113022772244, "grad_norm": 0.14770766013001516, "learning_rate": 2e-05, "loss": 5.2307, "step": 5816 }, { "epoch": 0.39018009860146896, "grad_norm": 0.1465830124752459, "learning_rate": 2e-05, "loss": 5.3346, "step": 5817 }, { "epoch": 0.3902471744306939, "grad_norm": 0.1467139878006663, "learning_rate": 2e-05, "loss": 5.4702, "step": 5818 }, { "epoch": 0.39031425025991884, "grad_norm": 0.15251057024865108, "learning_rate": 2e-05, "loss": 5.4972, "step": 5819 }, { "epoch": 0.3903813260891438, "grad_norm": 0.14980022891124575, "learning_rate": 2e-05, "loss": 5.504, "step": 5820 }, { "epoch": 0.3904484019183687, "grad_norm": 0.14578889858537006, "learning_rate": 2e-05, "loss": 5.3714, "step": 5821 }, { "epoch": 0.39051547774759365, "grad_norm": 0.15746604341929926, "learning_rate": 2e-05, "loss": 5.4043, "step": 5822 }, { "epoch": 0.3905825535768186, "grad_norm": 0.14763029429451663, "learning_rate": 2e-05, "loss": 5.4789, "step": 5823 }, { "epoch": 0.39064962940604353, "grad_norm": 0.14453762727861416, "learning_rate": 2e-05, "loss": 5.4434, "step": 5824 }, { "epoch": 0.39071670523526847, "grad_norm": 0.1482714117806355, "learning_rate": 2e-05, "loss": 5.3103, "step": 5825 }, { "epoch": 0.3907837810644934, "grad_norm": 0.14277396511915819, "learning_rate": 2e-05, "loss": 5.5002, "step": 5826 }, { "epoch": 0.39085085689371835, "grad_norm": 0.14031335417959032, "learning_rate": 2e-05, "loss": 5.2838, "step": 5827 }, { "epoch": 0.3909179327229433, "grad_norm": 0.15033184572344638, "learning_rate": 2e-05, "loss": 5.3506, "step": 5828 }, { "epoch": 0.3909850085521682, "grad_norm": 0.14949829543893822, "learning_rate": 2e-05, "loss": 5.4066, "step": 5829 }, { "epoch": 0.39105208438139316, "grad_norm": 0.14406053932317042, "learning_rate": 2e-05, "loss": 5.3856, "step": 5830 }, { "epoch": 0.3911191602106181, "grad_norm": 0.14901425167444243, "learning_rate": 2e-05, "loss": 5.5368, "step": 5831 }, { "epoch": 0.39118623603984304, "grad_norm": 0.147227249338175, "learning_rate": 2e-05, "loss": 5.4054, "step": 5832 }, { "epoch": 0.391253311869068, "grad_norm": 0.14394813686051822, "learning_rate": 2e-05, "loss": 5.2778, "step": 5833 }, { "epoch": 0.3913203876982929, "grad_norm": 0.14337469739320138, "learning_rate": 2e-05, "loss": 5.5388, "step": 5834 }, { "epoch": 0.39138746352751785, "grad_norm": 0.14108897499139236, "learning_rate": 2e-05, "loss": 5.525, "step": 5835 }, { "epoch": 0.3914545393567428, "grad_norm": 0.1505485702488886, "learning_rate": 2e-05, "loss": 5.5474, "step": 5836 }, { "epoch": 0.39152161518596773, "grad_norm": 0.14798527418835022, "learning_rate": 2e-05, "loss": 5.4191, "step": 5837 }, { "epoch": 0.39158869101519267, "grad_norm": 0.1473902958249018, "learning_rate": 2e-05, "loss": 5.386, "step": 5838 }, { "epoch": 0.3916557668444176, "grad_norm": 0.1466486655459091, "learning_rate": 2e-05, "loss": 5.5092, "step": 5839 }, { "epoch": 0.39172284267364255, "grad_norm": 0.14960641071607422, "learning_rate": 2e-05, "loss": 5.4205, "step": 5840 }, { "epoch": 0.3917899185028675, "grad_norm": 0.14857934054427546, "learning_rate": 2e-05, "loss": 5.2852, "step": 5841 }, { "epoch": 0.3918569943320924, "grad_norm": 0.15083213291660186, "learning_rate": 2e-05, "loss": 5.3686, "step": 5842 }, { "epoch": 0.39192407016131736, "grad_norm": 0.14442706168103567, "learning_rate": 2e-05, "loss": 5.4237, "step": 5843 }, { "epoch": 0.3919911459905423, "grad_norm": 0.1420124008956615, "learning_rate": 2e-05, "loss": 5.3617, "step": 5844 }, { "epoch": 0.39205822181976724, "grad_norm": 0.15042761987372208, "learning_rate": 2e-05, "loss": 5.338, "step": 5845 }, { "epoch": 0.3921252976489922, "grad_norm": 0.14532655998388777, "learning_rate": 2e-05, "loss": 5.5238, "step": 5846 }, { "epoch": 0.3921923734782171, "grad_norm": 0.1506725701683815, "learning_rate": 2e-05, "loss": 5.5372, "step": 5847 }, { "epoch": 0.39225944930744205, "grad_norm": 0.14950196532591545, "learning_rate": 2e-05, "loss": 5.5097, "step": 5848 }, { "epoch": 0.392326525136667, "grad_norm": 0.1544108596795565, "learning_rate": 2e-05, "loss": 5.38, "step": 5849 }, { "epoch": 0.39239360096589193, "grad_norm": 0.14869836247070767, "learning_rate": 2e-05, "loss": 5.5782, "step": 5850 }, { "epoch": 0.39246067679511687, "grad_norm": 0.1529561095614913, "learning_rate": 2e-05, "loss": 5.468, "step": 5851 }, { "epoch": 0.3925277526243418, "grad_norm": 0.15041447037843858, "learning_rate": 2e-05, "loss": 5.3211, "step": 5852 }, { "epoch": 0.39259482845356675, "grad_norm": 0.15204153751880556, "learning_rate": 2e-05, "loss": 5.4607, "step": 5853 }, { "epoch": 0.3926619042827917, "grad_norm": 0.15440588993360396, "learning_rate": 2e-05, "loss": 5.4773, "step": 5854 }, { "epoch": 0.3927289801120166, "grad_norm": 0.15605505745468676, "learning_rate": 2e-05, "loss": 5.4669, "step": 5855 }, { "epoch": 0.39279605594124156, "grad_norm": 0.14216839607768803, "learning_rate": 2e-05, "loss": 5.4061, "step": 5856 }, { "epoch": 0.3928631317704665, "grad_norm": 0.14545635650789174, "learning_rate": 2e-05, "loss": 5.474, "step": 5857 }, { "epoch": 0.39293020759969144, "grad_norm": 0.16329896297836766, "learning_rate": 2e-05, "loss": 5.6048, "step": 5858 }, { "epoch": 0.3929972834289164, "grad_norm": 0.15041615828376692, "learning_rate": 2e-05, "loss": 5.4713, "step": 5859 }, { "epoch": 0.3930643592581413, "grad_norm": 0.14195131104651246, "learning_rate": 2e-05, "loss": 5.4492, "step": 5860 }, { "epoch": 0.39313143508736625, "grad_norm": 0.15789515463820297, "learning_rate": 2e-05, "loss": 5.3867, "step": 5861 }, { "epoch": 0.3931985109165912, "grad_norm": 0.14681535617021604, "learning_rate": 2e-05, "loss": 5.4039, "step": 5862 }, { "epoch": 0.39326558674581613, "grad_norm": 0.14581865040491213, "learning_rate": 2e-05, "loss": 5.5053, "step": 5863 }, { "epoch": 0.39333266257504107, "grad_norm": 0.15761384304150605, "learning_rate": 2e-05, "loss": 5.2644, "step": 5864 }, { "epoch": 0.393399738404266, "grad_norm": 0.1459885046920075, "learning_rate": 2e-05, "loss": 5.3983, "step": 5865 }, { "epoch": 0.39346681423349095, "grad_norm": 0.15497000026868515, "learning_rate": 2e-05, "loss": 5.4197, "step": 5866 }, { "epoch": 0.3935338900627159, "grad_norm": 0.15118018149901188, "learning_rate": 2e-05, "loss": 5.3361, "step": 5867 }, { "epoch": 0.3936009658919408, "grad_norm": 0.151844572609273, "learning_rate": 2e-05, "loss": 5.4782, "step": 5868 }, { "epoch": 0.39366804172116576, "grad_norm": 0.1572812820599554, "learning_rate": 2e-05, "loss": 5.4731, "step": 5869 }, { "epoch": 0.3937351175503907, "grad_norm": 0.1470762124958615, "learning_rate": 2e-05, "loss": 5.3743, "step": 5870 }, { "epoch": 0.39380219337961564, "grad_norm": 0.15127325905130617, "learning_rate": 2e-05, "loss": 5.4487, "step": 5871 }, { "epoch": 0.3938692692088406, "grad_norm": 0.15123197393215188, "learning_rate": 2e-05, "loss": 5.524, "step": 5872 }, { "epoch": 0.3939363450380655, "grad_norm": 0.1539080030887222, "learning_rate": 2e-05, "loss": 5.3228, "step": 5873 }, { "epoch": 0.39400342086729045, "grad_norm": 0.15331816121825945, "learning_rate": 2e-05, "loss": 5.5709, "step": 5874 }, { "epoch": 0.3940704966965154, "grad_norm": 0.15446404531313307, "learning_rate": 2e-05, "loss": 5.4255, "step": 5875 }, { "epoch": 0.39413757252574033, "grad_norm": 0.14911869554800308, "learning_rate": 2e-05, "loss": 5.3263, "step": 5876 }, { "epoch": 0.39420464835496527, "grad_norm": 0.14649057994646156, "learning_rate": 2e-05, "loss": 5.4565, "step": 5877 }, { "epoch": 0.3942717241841902, "grad_norm": 0.15676194385574097, "learning_rate": 2e-05, "loss": 5.4417, "step": 5878 }, { "epoch": 0.39433880001341515, "grad_norm": 0.15387481551009605, "learning_rate": 2e-05, "loss": 5.2368, "step": 5879 }, { "epoch": 0.3944058758426401, "grad_norm": 0.14892873339514628, "learning_rate": 2e-05, "loss": 5.3044, "step": 5880 }, { "epoch": 0.394472951671865, "grad_norm": 0.1469748621388426, "learning_rate": 2e-05, "loss": 5.3817, "step": 5881 }, { "epoch": 0.39454002750108996, "grad_norm": 0.1584177163221102, "learning_rate": 2e-05, "loss": 5.4134, "step": 5882 }, { "epoch": 0.3946071033303149, "grad_norm": 0.15940586393223796, "learning_rate": 2e-05, "loss": 5.3838, "step": 5883 }, { "epoch": 0.39467417915953984, "grad_norm": 0.14695627370702188, "learning_rate": 2e-05, "loss": 5.5238, "step": 5884 }, { "epoch": 0.3947412549887648, "grad_norm": 0.15578020267473816, "learning_rate": 2e-05, "loss": 5.4212, "step": 5885 }, { "epoch": 0.3948083308179897, "grad_norm": 0.15271411276089863, "learning_rate": 2e-05, "loss": 5.454, "step": 5886 }, { "epoch": 0.39487540664721466, "grad_norm": 0.14702760504436627, "learning_rate": 2e-05, "loss": 5.4571, "step": 5887 }, { "epoch": 0.3949424824764396, "grad_norm": 0.15330898358937073, "learning_rate": 2e-05, "loss": 5.4305, "step": 5888 }, { "epoch": 0.39500955830566453, "grad_norm": 0.16088087387838515, "learning_rate": 2e-05, "loss": 5.4522, "step": 5889 }, { "epoch": 0.39507663413488947, "grad_norm": 0.1472815247733883, "learning_rate": 2e-05, "loss": 5.5046, "step": 5890 }, { "epoch": 0.3951437099641144, "grad_norm": 0.14426069406292338, "learning_rate": 2e-05, "loss": 5.2891, "step": 5891 }, { "epoch": 0.39521078579333935, "grad_norm": 0.16311130732212054, "learning_rate": 2e-05, "loss": 5.489, "step": 5892 }, { "epoch": 0.3952778616225643, "grad_norm": 0.154712861052737, "learning_rate": 2e-05, "loss": 5.4121, "step": 5893 }, { "epoch": 0.3953449374517892, "grad_norm": 0.15025477551754537, "learning_rate": 2e-05, "loss": 5.4913, "step": 5894 }, { "epoch": 0.39541201328101416, "grad_norm": 0.14803051416651397, "learning_rate": 2e-05, "loss": 5.2498, "step": 5895 }, { "epoch": 0.3954790891102391, "grad_norm": 0.16080372268882145, "learning_rate": 2e-05, "loss": 5.4435, "step": 5896 }, { "epoch": 0.39554616493946404, "grad_norm": 0.15706826761860182, "learning_rate": 2e-05, "loss": 5.4329, "step": 5897 }, { "epoch": 0.395613240768689, "grad_norm": 0.16270738593117287, "learning_rate": 2e-05, "loss": 5.5277, "step": 5898 }, { "epoch": 0.3956803165979139, "grad_norm": 0.14919863538352876, "learning_rate": 2e-05, "loss": 5.3161, "step": 5899 }, { "epoch": 0.39574739242713886, "grad_norm": 0.15746024032806694, "learning_rate": 2e-05, "loss": 5.3774, "step": 5900 }, { "epoch": 0.3958144682563638, "grad_norm": 0.15285489980845465, "learning_rate": 2e-05, "loss": 5.4144, "step": 5901 }, { "epoch": 0.39588154408558873, "grad_norm": 0.15249777193890982, "learning_rate": 2e-05, "loss": 5.3758, "step": 5902 }, { "epoch": 0.39594861991481367, "grad_norm": 0.15013348964677142, "learning_rate": 2e-05, "loss": 5.4919, "step": 5903 }, { "epoch": 0.3960156957440386, "grad_norm": 0.14916448559579706, "learning_rate": 2e-05, "loss": 5.3973, "step": 5904 }, { "epoch": 0.39608277157326355, "grad_norm": 0.14584303489138212, "learning_rate": 2e-05, "loss": 5.4287, "step": 5905 }, { "epoch": 0.3961498474024885, "grad_norm": 0.14647074535356697, "learning_rate": 2e-05, "loss": 5.4515, "step": 5906 }, { "epoch": 0.3962169232317134, "grad_norm": 0.15266346090206342, "learning_rate": 2e-05, "loss": 5.5534, "step": 5907 }, { "epoch": 0.39628399906093836, "grad_norm": 0.1488024725750587, "learning_rate": 2e-05, "loss": 5.5299, "step": 5908 }, { "epoch": 0.39635107489016336, "grad_norm": 0.14912862642532618, "learning_rate": 2e-05, "loss": 5.4844, "step": 5909 }, { "epoch": 0.3964181507193883, "grad_norm": 0.15443940992326785, "learning_rate": 2e-05, "loss": 5.5133, "step": 5910 }, { "epoch": 0.39648522654861323, "grad_norm": 0.14633777271339263, "learning_rate": 2e-05, "loss": 5.4356, "step": 5911 }, { "epoch": 0.3965523023778382, "grad_norm": 0.15088324330558475, "learning_rate": 2e-05, "loss": 5.4363, "step": 5912 }, { "epoch": 0.3966193782070631, "grad_norm": 0.15398689821084152, "learning_rate": 2e-05, "loss": 5.396, "step": 5913 }, { "epoch": 0.39668645403628805, "grad_norm": 0.15037335404171037, "learning_rate": 2e-05, "loss": 5.4467, "step": 5914 }, { "epoch": 0.396753529865513, "grad_norm": 0.15470222329142982, "learning_rate": 2e-05, "loss": 5.4111, "step": 5915 }, { "epoch": 0.3968206056947379, "grad_norm": 0.1534803105244773, "learning_rate": 2e-05, "loss": 5.4656, "step": 5916 }, { "epoch": 0.39688768152396287, "grad_norm": 0.15381044530912338, "learning_rate": 2e-05, "loss": 5.3416, "step": 5917 }, { "epoch": 0.3969547573531878, "grad_norm": 0.1614030520270631, "learning_rate": 2e-05, "loss": 5.4099, "step": 5918 }, { "epoch": 0.39702183318241274, "grad_norm": 0.1611834406810516, "learning_rate": 2e-05, "loss": 5.5282, "step": 5919 }, { "epoch": 0.3970889090116377, "grad_norm": 0.1675946964905524, "learning_rate": 2e-05, "loss": 5.5445, "step": 5920 }, { "epoch": 0.3971559848408626, "grad_norm": 0.1527152525581282, "learning_rate": 2e-05, "loss": 5.4223, "step": 5921 }, { "epoch": 0.39722306067008756, "grad_norm": 0.1524762159041056, "learning_rate": 2e-05, "loss": 5.519, "step": 5922 }, { "epoch": 0.3972901364993125, "grad_norm": 0.1631318010120498, "learning_rate": 2e-05, "loss": 5.4225, "step": 5923 }, { "epoch": 0.39735721232853743, "grad_norm": 0.1467361516176354, "learning_rate": 2e-05, "loss": 5.5318, "step": 5924 }, { "epoch": 0.3974242881577624, "grad_norm": 0.15252256919287552, "learning_rate": 2e-05, "loss": 5.5398, "step": 5925 }, { "epoch": 0.3974913639869873, "grad_norm": 0.14448913482194106, "learning_rate": 2e-05, "loss": 5.3063, "step": 5926 }, { "epoch": 0.39755843981621225, "grad_norm": 0.14529512100080058, "learning_rate": 2e-05, "loss": 5.581, "step": 5927 }, { "epoch": 0.3976255156454372, "grad_norm": 0.15256322503581018, "learning_rate": 2e-05, "loss": 5.5163, "step": 5928 }, { "epoch": 0.3976925914746621, "grad_norm": 0.14893159733161832, "learning_rate": 2e-05, "loss": 5.3897, "step": 5929 }, { "epoch": 0.39775966730388707, "grad_norm": 0.15580480659438692, "learning_rate": 2e-05, "loss": 5.4428, "step": 5930 }, { "epoch": 0.397826743133112, "grad_norm": 0.14780760967655887, "learning_rate": 2e-05, "loss": 5.4809, "step": 5931 }, { "epoch": 0.39789381896233694, "grad_norm": 0.14510909445715506, "learning_rate": 2e-05, "loss": 5.347, "step": 5932 }, { "epoch": 0.3979608947915619, "grad_norm": 0.14370781476981787, "learning_rate": 2e-05, "loss": 5.2568, "step": 5933 }, { "epoch": 0.3980279706207868, "grad_norm": 0.14292931180412027, "learning_rate": 2e-05, "loss": 5.5042, "step": 5934 }, { "epoch": 0.39809504645001176, "grad_norm": 0.15030992316036293, "learning_rate": 2e-05, "loss": 5.5282, "step": 5935 }, { "epoch": 0.3981621222792367, "grad_norm": 0.14887999047050238, "learning_rate": 2e-05, "loss": 5.3641, "step": 5936 }, { "epoch": 0.39822919810846164, "grad_norm": 0.14644776344215313, "learning_rate": 2e-05, "loss": 5.5784, "step": 5937 }, { "epoch": 0.3982962739376866, "grad_norm": 0.14610512744194495, "learning_rate": 2e-05, "loss": 5.4894, "step": 5938 }, { "epoch": 0.3983633497669115, "grad_norm": 0.14738273914162306, "learning_rate": 2e-05, "loss": 5.3288, "step": 5939 }, { "epoch": 0.39843042559613645, "grad_norm": 0.14322379972702287, "learning_rate": 2e-05, "loss": 5.4571, "step": 5940 }, { "epoch": 0.3984975014253614, "grad_norm": 0.14731376169643412, "learning_rate": 2e-05, "loss": 5.3314, "step": 5941 }, { "epoch": 0.3985645772545863, "grad_norm": 0.15307151395920085, "learning_rate": 2e-05, "loss": 5.5605, "step": 5942 }, { "epoch": 0.39863165308381127, "grad_norm": 0.1470009280172678, "learning_rate": 2e-05, "loss": 5.4596, "step": 5943 }, { "epoch": 0.3986987289130362, "grad_norm": 0.14758255697515557, "learning_rate": 2e-05, "loss": 5.3098, "step": 5944 }, { "epoch": 0.39876580474226114, "grad_norm": 0.15022116546064915, "learning_rate": 2e-05, "loss": 5.5179, "step": 5945 }, { "epoch": 0.3988328805714861, "grad_norm": 0.1457235433148265, "learning_rate": 2e-05, "loss": 5.2951, "step": 5946 }, { "epoch": 0.398899956400711, "grad_norm": 0.14833353760049367, "learning_rate": 2e-05, "loss": 5.4332, "step": 5947 }, { "epoch": 0.39896703222993596, "grad_norm": 0.149166510993386, "learning_rate": 2e-05, "loss": 5.4068, "step": 5948 }, { "epoch": 0.3990341080591609, "grad_norm": 0.15680672811473656, "learning_rate": 2e-05, "loss": 5.5172, "step": 5949 }, { "epoch": 0.39910118388838584, "grad_norm": 0.1450015466693006, "learning_rate": 2e-05, "loss": 5.5007, "step": 5950 }, { "epoch": 0.3991682597176108, "grad_norm": 0.14599289201203672, "learning_rate": 2e-05, "loss": 5.4715, "step": 5951 }, { "epoch": 0.3992353355468357, "grad_norm": 0.15817724896534816, "learning_rate": 2e-05, "loss": 5.5397, "step": 5952 }, { "epoch": 0.39930241137606065, "grad_norm": 0.14423517782586429, "learning_rate": 2e-05, "loss": 5.3355, "step": 5953 }, { "epoch": 0.3993694872052856, "grad_norm": 0.15015891395980277, "learning_rate": 2e-05, "loss": 5.4285, "step": 5954 }, { "epoch": 0.39943656303451053, "grad_norm": 0.15910903161894122, "learning_rate": 2e-05, "loss": 5.4983, "step": 5955 }, { "epoch": 0.39950363886373547, "grad_norm": 0.1446941819929459, "learning_rate": 2e-05, "loss": 5.3558, "step": 5956 }, { "epoch": 0.3995707146929604, "grad_norm": 0.14999820687559004, "learning_rate": 2e-05, "loss": 5.544, "step": 5957 }, { "epoch": 0.39963779052218534, "grad_norm": 0.1549669433363154, "learning_rate": 2e-05, "loss": 5.3761, "step": 5958 }, { "epoch": 0.3997048663514103, "grad_norm": 0.14458153614935193, "learning_rate": 2e-05, "loss": 5.4589, "step": 5959 }, { "epoch": 0.3997719421806352, "grad_norm": 0.14452673981854028, "learning_rate": 2e-05, "loss": 5.355, "step": 5960 }, { "epoch": 0.39983901800986016, "grad_norm": 0.15211697981582967, "learning_rate": 2e-05, "loss": 5.3976, "step": 5961 }, { "epoch": 0.3999060938390851, "grad_norm": 0.15274857465846034, "learning_rate": 2e-05, "loss": 5.3967, "step": 5962 }, { "epoch": 0.39997316966831004, "grad_norm": 0.15072527679708161, "learning_rate": 2e-05, "loss": 5.4074, "step": 5963 }, { "epoch": 0.400040245497535, "grad_norm": 0.15427341397887093, "learning_rate": 2e-05, "loss": 5.5552, "step": 5964 }, { "epoch": 0.4001073213267599, "grad_norm": 0.15524144569325887, "learning_rate": 2e-05, "loss": 5.4621, "step": 5965 }, { "epoch": 0.40017439715598485, "grad_norm": 0.15974259740772193, "learning_rate": 2e-05, "loss": 5.4067, "step": 5966 }, { "epoch": 0.4002414729852098, "grad_norm": 0.15414255761401013, "learning_rate": 2e-05, "loss": 5.4926, "step": 5967 }, { "epoch": 0.40030854881443473, "grad_norm": 0.16015912465443163, "learning_rate": 2e-05, "loss": 5.4105, "step": 5968 }, { "epoch": 0.40037562464365967, "grad_norm": 0.1464117026057262, "learning_rate": 2e-05, "loss": 5.5139, "step": 5969 }, { "epoch": 0.4004427004728846, "grad_norm": 0.1457975488459741, "learning_rate": 2e-05, "loss": 5.431, "step": 5970 }, { "epoch": 0.40050977630210954, "grad_norm": 0.14849575474811771, "learning_rate": 2e-05, "loss": 5.4711, "step": 5971 }, { "epoch": 0.4005768521313345, "grad_norm": 0.15138786328699524, "learning_rate": 2e-05, "loss": 5.5419, "step": 5972 }, { "epoch": 0.4006439279605594, "grad_norm": 0.15515794784077275, "learning_rate": 2e-05, "loss": 5.3223, "step": 5973 }, { "epoch": 0.40071100378978436, "grad_norm": 0.14931956508386474, "learning_rate": 2e-05, "loss": 5.319, "step": 5974 }, { "epoch": 0.4007780796190093, "grad_norm": 0.1523360570959072, "learning_rate": 2e-05, "loss": 5.3647, "step": 5975 }, { "epoch": 0.40084515544823424, "grad_norm": 0.15030047581861997, "learning_rate": 2e-05, "loss": 5.3425, "step": 5976 }, { "epoch": 0.4009122312774592, "grad_norm": 0.1566461279408005, "learning_rate": 2e-05, "loss": 5.6564, "step": 5977 }, { "epoch": 0.4009793071066841, "grad_norm": 0.16793460818724498, "learning_rate": 2e-05, "loss": 5.5158, "step": 5978 }, { "epoch": 0.40104638293590905, "grad_norm": 0.1480784151958493, "learning_rate": 2e-05, "loss": 5.3516, "step": 5979 }, { "epoch": 0.401113458765134, "grad_norm": 0.15780079254317383, "learning_rate": 2e-05, "loss": 5.6116, "step": 5980 }, { "epoch": 0.40118053459435893, "grad_norm": 0.15974484794590924, "learning_rate": 2e-05, "loss": 5.5555, "step": 5981 }, { "epoch": 0.40124761042358387, "grad_norm": 0.15533366773400992, "learning_rate": 2e-05, "loss": 5.5404, "step": 5982 }, { "epoch": 0.4013146862528088, "grad_norm": 0.14960791855137717, "learning_rate": 2e-05, "loss": 5.3608, "step": 5983 }, { "epoch": 0.40138176208203374, "grad_norm": 0.15606271060187718, "learning_rate": 2e-05, "loss": 5.4299, "step": 5984 }, { "epoch": 0.4014488379112587, "grad_norm": 0.15213952142674927, "learning_rate": 2e-05, "loss": 5.5152, "step": 5985 }, { "epoch": 0.4015159137404836, "grad_norm": 0.15492798596919966, "learning_rate": 2e-05, "loss": 5.4458, "step": 5986 }, { "epoch": 0.40158298956970856, "grad_norm": 0.1502859466266917, "learning_rate": 2e-05, "loss": 5.305, "step": 5987 }, { "epoch": 0.4016500653989335, "grad_norm": 0.15053162582835142, "learning_rate": 2e-05, "loss": 5.472, "step": 5988 }, { "epoch": 0.40171714122815844, "grad_norm": 0.1560287783529022, "learning_rate": 2e-05, "loss": 5.4753, "step": 5989 }, { "epoch": 0.4017842170573834, "grad_norm": 0.14237230547652055, "learning_rate": 2e-05, "loss": 5.2587, "step": 5990 }, { "epoch": 0.4018512928866083, "grad_norm": 0.15172545088934214, "learning_rate": 2e-05, "loss": 5.5097, "step": 5991 }, { "epoch": 0.40191836871583325, "grad_norm": 0.16324104770353742, "learning_rate": 2e-05, "loss": 5.644, "step": 5992 }, { "epoch": 0.4019854445450582, "grad_norm": 0.15443232929486764, "learning_rate": 2e-05, "loss": 5.5166, "step": 5993 }, { "epoch": 0.40205252037428313, "grad_norm": 0.14998236761273226, "learning_rate": 2e-05, "loss": 5.3467, "step": 5994 }, { "epoch": 0.40211959620350807, "grad_norm": 0.15949096548765676, "learning_rate": 2e-05, "loss": 5.3965, "step": 5995 }, { "epoch": 0.402186672032733, "grad_norm": 0.15393847386347603, "learning_rate": 2e-05, "loss": 5.4736, "step": 5996 }, { "epoch": 0.40225374786195794, "grad_norm": 0.14873881419982177, "learning_rate": 2e-05, "loss": 5.536, "step": 5997 }, { "epoch": 0.4023208236911829, "grad_norm": 0.15832075068230408, "learning_rate": 2e-05, "loss": 5.5771, "step": 5998 }, { "epoch": 0.4023878995204078, "grad_norm": 0.1651991586554004, "learning_rate": 2e-05, "loss": 5.2404, "step": 5999 }, { "epoch": 0.40245497534963276, "grad_norm": 0.15260305276965905, "learning_rate": 2e-05, "loss": 5.4635, "step": 6000 }, { "epoch": 0.4025220511788577, "grad_norm": 0.15845256948084602, "learning_rate": 2e-05, "loss": 5.4391, "step": 6001 }, { "epoch": 0.40258912700808264, "grad_norm": 0.1533038381959413, "learning_rate": 2e-05, "loss": 5.5904, "step": 6002 }, { "epoch": 0.4026562028373076, "grad_norm": 0.1476470063009068, "learning_rate": 2e-05, "loss": 5.3976, "step": 6003 }, { "epoch": 0.4027232786665325, "grad_norm": 0.1524396060890464, "learning_rate": 2e-05, "loss": 5.3979, "step": 6004 }, { "epoch": 0.40279035449575745, "grad_norm": 0.1698079006375316, "learning_rate": 2e-05, "loss": 5.5146, "step": 6005 }, { "epoch": 0.4028574303249824, "grad_norm": 0.15222545116242248, "learning_rate": 2e-05, "loss": 5.3234, "step": 6006 }, { "epoch": 0.40292450615420733, "grad_norm": 0.15095382845030694, "learning_rate": 2e-05, "loss": 5.4241, "step": 6007 }, { "epoch": 0.40299158198343227, "grad_norm": 0.16410280022962706, "learning_rate": 2e-05, "loss": 5.5087, "step": 6008 }, { "epoch": 0.4030586578126572, "grad_norm": 0.15946479861421245, "learning_rate": 2e-05, "loss": 5.4589, "step": 6009 }, { "epoch": 0.40312573364188214, "grad_norm": 0.1601739879462838, "learning_rate": 2e-05, "loss": 5.4183, "step": 6010 }, { "epoch": 0.4031928094711071, "grad_norm": 0.15039068286143933, "learning_rate": 2e-05, "loss": 5.5256, "step": 6011 }, { "epoch": 0.403259885300332, "grad_norm": 0.15624060887507452, "learning_rate": 2e-05, "loss": 5.5165, "step": 6012 }, { "epoch": 0.40332696112955696, "grad_norm": 0.1539690397156179, "learning_rate": 2e-05, "loss": 5.4258, "step": 6013 }, { "epoch": 0.4033940369587819, "grad_norm": 0.1515976927373858, "learning_rate": 2e-05, "loss": 5.4026, "step": 6014 }, { "epoch": 0.40346111278800684, "grad_norm": 0.15704954616036437, "learning_rate": 2e-05, "loss": 5.4097, "step": 6015 }, { "epoch": 0.4035281886172318, "grad_norm": 0.16024113471010473, "learning_rate": 2e-05, "loss": 5.4373, "step": 6016 }, { "epoch": 0.4035952644464567, "grad_norm": 0.15420382377101463, "learning_rate": 2e-05, "loss": 5.4629, "step": 6017 }, { "epoch": 0.40366234027568165, "grad_norm": 0.15664716558324795, "learning_rate": 2e-05, "loss": 5.4098, "step": 6018 }, { "epoch": 0.4037294161049066, "grad_norm": 0.15180064671749896, "learning_rate": 2e-05, "loss": 5.4129, "step": 6019 }, { "epoch": 0.40379649193413153, "grad_norm": 0.1547786024366051, "learning_rate": 2e-05, "loss": 5.456, "step": 6020 }, { "epoch": 0.40386356776335647, "grad_norm": 0.1587669350642931, "learning_rate": 2e-05, "loss": 5.4224, "step": 6021 }, { "epoch": 0.4039306435925814, "grad_norm": 0.15693967334666328, "learning_rate": 2e-05, "loss": 5.6413, "step": 6022 }, { "epoch": 0.40399771942180634, "grad_norm": 0.16285352493060998, "learning_rate": 2e-05, "loss": 5.448, "step": 6023 }, { "epoch": 0.4040647952510313, "grad_norm": 0.15292817468719166, "learning_rate": 2e-05, "loss": 5.3556, "step": 6024 }, { "epoch": 0.4041318710802562, "grad_norm": 0.14462371822268005, "learning_rate": 2e-05, "loss": 5.4783, "step": 6025 }, { "epoch": 0.40419894690948116, "grad_norm": 0.14739639699681178, "learning_rate": 2e-05, "loss": 5.4944, "step": 6026 }, { "epoch": 0.4042660227387061, "grad_norm": 0.16108022105434244, "learning_rate": 2e-05, "loss": 5.3953, "step": 6027 }, { "epoch": 0.40433309856793104, "grad_norm": 0.1474509768870367, "learning_rate": 2e-05, "loss": 5.5286, "step": 6028 }, { "epoch": 0.404400174397156, "grad_norm": 0.15079750264331543, "learning_rate": 2e-05, "loss": 5.4186, "step": 6029 }, { "epoch": 0.4044672502263809, "grad_norm": 0.1458719980847305, "learning_rate": 2e-05, "loss": 5.3563, "step": 6030 }, { "epoch": 0.40453432605560585, "grad_norm": 0.15304975621717112, "learning_rate": 2e-05, "loss": 5.5925, "step": 6031 }, { "epoch": 0.4046014018848308, "grad_norm": 0.1520346560029922, "learning_rate": 2e-05, "loss": 5.4501, "step": 6032 }, { "epoch": 0.40466847771405573, "grad_norm": 0.15064836907560086, "learning_rate": 2e-05, "loss": 5.4288, "step": 6033 }, { "epoch": 0.40473555354328067, "grad_norm": 0.14603597324586737, "learning_rate": 2e-05, "loss": 5.4439, "step": 6034 }, { "epoch": 0.4048026293725056, "grad_norm": 0.15385398407709727, "learning_rate": 2e-05, "loss": 5.386, "step": 6035 }, { "epoch": 0.40486970520173055, "grad_norm": 0.14137539672449997, "learning_rate": 2e-05, "loss": 5.376, "step": 6036 }, { "epoch": 0.4049367810309555, "grad_norm": 0.14316446522327167, "learning_rate": 2e-05, "loss": 5.4896, "step": 6037 }, { "epoch": 0.4050038568601804, "grad_norm": 0.14543957958051615, "learning_rate": 2e-05, "loss": 5.3809, "step": 6038 }, { "epoch": 0.40507093268940536, "grad_norm": 0.14384966970502341, "learning_rate": 2e-05, "loss": 5.4852, "step": 6039 }, { "epoch": 0.4051380085186303, "grad_norm": 0.14261973756452165, "learning_rate": 2e-05, "loss": 5.399, "step": 6040 }, { "epoch": 0.40520508434785524, "grad_norm": 0.14594225185179877, "learning_rate": 2e-05, "loss": 5.3694, "step": 6041 }, { "epoch": 0.4052721601770802, "grad_norm": 0.14238136018724745, "learning_rate": 2e-05, "loss": 5.4951, "step": 6042 }, { "epoch": 0.4053392360063051, "grad_norm": 0.14718572165026994, "learning_rate": 2e-05, "loss": 5.4058, "step": 6043 }, { "epoch": 0.40540631183553005, "grad_norm": 0.14363938442517427, "learning_rate": 2e-05, "loss": 5.3782, "step": 6044 }, { "epoch": 0.405473387664755, "grad_norm": 0.1479353910642996, "learning_rate": 2e-05, "loss": 5.5865, "step": 6045 }, { "epoch": 0.40554046349397993, "grad_norm": 0.14623340023390166, "learning_rate": 2e-05, "loss": 5.4653, "step": 6046 }, { "epoch": 0.40560753932320487, "grad_norm": 0.14461855324385894, "learning_rate": 2e-05, "loss": 5.4816, "step": 6047 }, { "epoch": 0.4056746151524298, "grad_norm": 0.1527639109903678, "learning_rate": 2e-05, "loss": 5.3776, "step": 6048 }, { "epoch": 0.40574169098165475, "grad_norm": 0.15501073851554598, "learning_rate": 2e-05, "loss": 5.4025, "step": 6049 }, { "epoch": 0.4058087668108797, "grad_norm": 0.14969192504580944, "learning_rate": 2e-05, "loss": 5.3329, "step": 6050 }, { "epoch": 0.4058758426401046, "grad_norm": 0.14699245386539642, "learning_rate": 2e-05, "loss": 5.4822, "step": 6051 }, { "epoch": 0.40594291846932956, "grad_norm": 0.15232413374637474, "learning_rate": 2e-05, "loss": 5.5125, "step": 6052 }, { "epoch": 0.4060099942985545, "grad_norm": 0.15442690656464464, "learning_rate": 2e-05, "loss": 5.3806, "step": 6053 }, { "epoch": 0.40607707012777944, "grad_norm": 0.16098324783117463, "learning_rate": 2e-05, "loss": 5.4392, "step": 6054 }, { "epoch": 0.4061441459570044, "grad_norm": 0.1513771087140424, "learning_rate": 2e-05, "loss": 5.5473, "step": 6055 }, { "epoch": 0.4062112217862293, "grad_norm": 0.16258286685946519, "learning_rate": 2e-05, "loss": 5.4633, "step": 6056 }, { "epoch": 0.40627829761545425, "grad_norm": 0.1457785852182703, "learning_rate": 2e-05, "loss": 5.3305, "step": 6057 }, { "epoch": 0.4063453734446792, "grad_norm": 0.1446502311723748, "learning_rate": 2e-05, "loss": 5.3584, "step": 6058 }, { "epoch": 0.40641244927390413, "grad_norm": 0.15360605323060894, "learning_rate": 2e-05, "loss": 5.4209, "step": 6059 }, { "epoch": 0.40647952510312907, "grad_norm": 0.15331275633886143, "learning_rate": 2e-05, "loss": 5.4356, "step": 6060 }, { "epoch": 0.406546600932354, "grad_norm": 0.153156307319625, "learning_rate": 2e-05, "loss": 5.3788, "step": 6061 }, { "epoch": 0.40661367676157895, "grad_norm": 0.15708553491669636, "learning_rate": 2e-05, "loss": 5.4223, "step": 6062 }, { "epoch": 0.4066807525908039, "grad_norm": 0.1479300552202045, "learning_rate": 2e-05, "loss": 5.3916, "step": 6063 }, { "epoch": 0.4067478284200288, "grad_norm": 0.15279132418782454, "learning_rate": 2e-05, "loss": 5.4119, "step": 6064 }, { "epoch": 0.40681490424925376, "grad_norm": 0.14962557499541565, "learning_rate": 2e-05, "loss": 5.4328, "step": 6065 }, { "epoch": 0.4068819800784787, "grad_norm": 0.1540267964870355, "learning_rate": 2e-05, "loss": 5.2877, "step": 6066 }, { "epoch": 0.40694905590770364, "grad_norm": 0.15368758941890037, "learning_rate": 2e-05, "loss": 5.3639, "step": 6067 }, { "epoch": 0.4070161317369286, "grad_norm": 0.15500344012336434, "learning_rate": 2e-05, "loss": 5.4851, "step": 6068 }, { "epoch": 0.4070832075661535, "grad_norm": 0.14754607150670423, "learning_rate": 2e-05, "loss": 5.5639, "step": 6069 }, { "epoch": 0.40715028339537845, "grad_norm": 0.15337883625537224, "learning_rate": 2e-05, "loss": 5.3378, "step": 6070 }, { "epoch": 0.4072173592246034, "grad_norm": 0.15227212821048294, "learning_rate": 2e-05, "loss": 5.3013, "step": 6071 }, { "epoch": 0.40728443505382833, "grad_norm": 0.14900960291244386, "learning_rate": 2e-05, "loss": 5.3618, "step": 6072 }, { "epoch": 0.40735151088305327, "grad_norm": 0.14201703042706224, "learning_rate": 2e-05, "loss": 5.3742, "step": 6073 }, { "epoch": 0.4074185867122782, "grad_norm": 0.1464483912732817, "learning_rate": 2e-05, "loss": 5.5129, "step": 6074 }, { "epoch": 0.40748566254150315, "grad_norm": 0.15308299310567888, "learning_rate": 2e-05, "loss": 5.3051, "step": 6075 }, { "epoch": 0.4075527383707281, "grad_norm": 0.1462770854150524, "learning_rate": 2e-05, "loss": 5.4861, "step": 6076 }, { "epoch": 0.407619814199953, "grad_norm": 0.15044370225557685, "learning_rate": 2e-05, "loss": 5.5122, "step": 6077 }, { "epoch": 0.40768689002917796, "grad_norm": 0.1676513113476112, "learning_rate": 2e-05, "loss": 5.4518, "step": 6078 }, { "epoch": 0.4077539658584029, "grad_norm": 0.14575690015912224, "learning_rate": 2e-05, "loss": 5.4962, "step": 6079 }, { "epoch": 0.40782104168762784, "grad_norm": 0.15726576245724802, "learning_rate": 2e-05, "loss": 5.3551, "step": 6080 }, { "epoch": 0.4078881175168528, "grad_norm": 0.14541743146383224, "learning_rate": 2e-05, "loss": 5.4716, "step": 6081 }, { "epoch": 0.4079551933460777, "grad_norm": 0.16319908595976848, "learning_rate": 2e-05, "loss": 5.5158, "step": 6082 }, { "epoch": 0.40802226917530265, "grad_norm": 0.15575388296546167, "learning_rate": 2e-05, "loss": 5.5094, "step": 6083 }, { "epoch": 0.4080893450045276, "grad_norm": 0.14871726357723505, "learning_rate": 2e-05, "loss": 5.4675, "step": 6084 }, { "epoch": 0.40815642083375253, "grad_norm": 0.1518768114233469, "learning_rate": 2e-05, "loss": 5.3644, "step": 6085 }, { "epoch": 0.40822349666297747, "grad_norm": 0.15589791501222594, "learning_rate": 2e-05, "loss": 5.4819, "step": 6086 }, { "epoch": 0.4082905724922024, "grad_norm": 0.14531584804929493, "learning_rate": 2e-05, "loss": 5.2746, "step": 6087 }, { "epoch": 0.40835764832142735, "grad_norm": 0.14744504757742968, "learning_rate": 2e-05, "loss": 5.4029, "step": 6088 }, { "epoch": 0.4084247241506523, "grad_norm": 0.1485517928386306, "learning_rate": 2e-05, "loss": 5.4761, "step": 6089 }, { "epoch": 0.4084917999798772, "grad_norm": 0.14919780234425425, "learning_rate": 2e-05, "loss": 5.399, "step": 6090 }, { "epoch": 0.4085588758091022, "grad_norm": 0.14446332847796833, "learning_rate": 2e-05, "loss": 5.4434, "step": 6091 }, { "epoch": 0.40862595163832716, "grad_norm": 0.15123167708792787, "learning_rate": 2e-05, "loss": 5.3837, "step": 6092 }, { "epoch": 0.4086930274675521, "grad_norm": 0.14717028567044393, "learning_rate": 2e-05, "loss": 5.4316, "step": 6093 }, { "epoch": 0.40876010329677703, "grad_norm": 0.15042712068782174, "learning_rate": 2e-05, "loss": 5.4027, "step": 6094 }, { "epoch": 0.40882717912600197, "grad_norm": 0.14375038162597945, "learning_rate": 2e-05, "loss": 5.3387, "step": 6095 }, { "epoch": 0.4088942549552269, "grad_norm": 0.15291742270012196, "learning_rate": 2e-05, "loss": 5.5601, "step": 6096 }, { "epoch": 0.40896133078445185, "grad_norm": 0.14806975628068123, "learning_rate": 2e-05, "loss": 5.5391, "step": 6097 }, { "epoch": 0.4090284066136768, "grad_norm": 0.14601110429422573, "learning_rate": 2e-05, "loss": 5.6242, "step": 6098 }, { "epoch": 0.4090954824429017, "grad_norm": 0.14263892148998944, "learning_rate": 2e-05, "loss": 5.4961, "step": 6099 }, { "epoch": 0.40916255827212666, "grad_norm": 0.15065958473053842, "learning_rate": 2e-05, "loss": 5.3485, "step": 6100 }, { "epoch": 0.4092296341013516, "grad_norm": 0.14694193541917297, "learning_rate": 2e-05, "loss": 5.3852, "step": 6101 }, { "epoch": 0.40929670993057654, "grad_norm": 0.15278120479286747, "learning_rate": 2e-05, "loss": 5.5714, "step": 6102 }, { "epoch": 0.4093637857598015, "grad_norm": 0.16054130302940997, "learning_rate": 2e-05, "loss": 5.208, "step": 6103 }, { "epoch": 0.4094308615890264, "grad_norm": 0.1446162996874019, "learning_rate": 2e-05, "loss": 5.4623, "step": 6104 }, { "epoch": 0.40949793741825136, "grad_norm": 0.14882105241487117, "learning_rate": 2e-05, "loss": 5.3525, "step": 6105 }, { "epoch": 0.4095650132474763, "grad_norm": 0.15989120155100828, "learning_rate": 2e-05, "loss": 5.378, "step": 6106 }, { "epoch": 0.40963208907670123, "grad_norm": 0.14458361666581818, "learning_rate": 2e-05, "loss": 5.4872, "step": 6107 }, { "epoch": 0.40969916490592617, "grad_norm": 0.14737736523841655, "learning_rate": 2e-05, "loss": 5.4322, "step": 6108 }, { "epoch": 0.4097662407351511, "grad_norm": 0.16387172711224748, "learning_rate": 2e-05, "loss": 5.5087, "step": 6109 }, { "epoch": 0.40983331656437605, "grad_norm": 0.1480723348815157, "learning_rate": 2e-05, "loss": 5.4056, "step": 6110 }, { "epoch": 0.409900392393601, "grad_norm": 0.15257662510780653, "learning_rate": 2e-05, "loss": 5.5466, "step": 6111 }, { "epoch": 0.4099674682228259, "grad_norm": 0.15904679556609308, "learning_rate": 2e-05, "loss": 5.3305, "step": 6112 }, { "epoch": 0.41003454405205086, "grad_norm": 0.15154523568578862, "learning_rate": 2e-05, "loss": 5.4702, "step": 6113 }, { "epoch": 0.4101016198812758, "grad_norm": 0.15098305445598653, "learning_rate": 2e-05, "loss": 5.5554, "step": 6114 }, { "epoch": 0.41016869571050074, "grad_norm": 0.1589589698493392, "learning_rate": 2e-05, "loss": 5.3591, "step": 6115 }, { "epoch": 0.4102357715397257, "grad_norm": 0.15656133212438542, "learning_rate": 2e-05, "loss": 5.5106, "step": 6116 }, { "epoch": 0.4103028473689506, "grad_norm": 0.15140365208840909, "learning_rate": 2e-05, "loss": 5.3766, "step": 6117 }, { "epoch": 0.41036992319817556, "grad_norm": 0.1434327408736165, "learning_rate": 2e-05, "loss": 5.4013, "step": 6118 }, { "epoch": 0.4104369990274005, "grad_norm": 0.15115410891386852, "learning_rate": 2e-05, "loss": 5.4348, "step": 6119 }, { "epoch": 0.41050407485662543, "grad_norm": 0.1497631913037576, "learning_rate": 2e-05, "loss": 5.4394, "step": 6120 }, { "epoch": 0.4105711506858504, "grad_norm": 0.14568586051385393, "learning_rate": 2e-05, "loss": 5.4176, "step": 6121 }, { "epoch": 0.4106382265150753, "grad_norm": 0.15127203960472932, "learning_rate": 2e-05, "loss": 5.3339, "step": 6122 }, { "epoch": 0.41070530234430025, "grad_norm": 0.15076954556867875, "learning_rate": 2e-05, "loss": 5.4308, "step": 6123 }, { "epoch": 0.4107723781735252, "grad_norm": 0.14902824739542336, "learning_rate": 2e-05, "loss": 5.3892, "step": 6124 }, { "epoch": 0.4108394540027501, "grad_norm": 0.15369660370591312, "learning_rate": 2e-05, "loss": 5.4532, "step": 6125 }, { "epoch": 0.41090652983197506, "grad_norm": 0.1517700878127306, "learning_rate": 2e-05, "loss": 5.54, "step": 6126 }, { "epoch": 0.4109736056612, "grad_norm": 0.1478424008628996, "learning_rate": 2e-05, "loss": 5.4344, "step": 6127 }, { "epoch": 0.41104068149042494, "grad_norm": 0.15971026450019005, "learning_rate": 2e-05, "loss": 5.3187, "step": 6128 }, { "epoch": 0.4111077573196499, "grad_norm": 0.14988302029310308, "learning_rate": 2e-05, "loss": 5.4468, "step": 6129 }, { "epoch": 0.4111748331488748, "grad_norm": 0.149698365106905, "learning_rate": 2e-05, "loss": 5.5568, "step": 6130 }, { "epoch": 0.41124190897809976, "grad_norm": 0.15212818817153279, "learning_rate": 2e-05, "loss": 5.4725, "step": 6131 }, { "epoch": 0.4113089848073247, "grad_norm": 0.15848372340059896, "learning_rate": 2e-05, "loss": 5.5815, "step": 6132 }, { "epoch": 0.41137606063654963, "grad_norm": 0.15423699786438205, "learning_rate": 2e-05, "loss": 5.4055, "step": 6133 }, { "epoch": 0.4114431364657746, "grad_norm": 0.14893908040288306, "learning_rate": 2e-05, "loss": 5.3825, "step": 6134 }, { "epoch": 0.4115102122949995, "grad_norm": 0.15242625319854541, "learning_rate": 2e-05, "loss": 5.338, "step": 6135 }, { "epoch": 0.41157728812422445, "grad_norm": 0.15771477847133705, "learning_rate": 2e-05, "loss": 5.4753, "step": 6136 }, { "epoch": 0.4116443639534494, "grad_norm": 0.15334869569459353, "learning_rate": 2e-05, "loss": 5.393, "step": 6137 }, { "epoch": 0.4117114397826743, "grad_norm": 0.14610029309748182, "learning_rate": 2e-05, "loss": 5.3249, "step": 6138 }, { "epoch": 0.41177851561189927, "grad_norm": 0.15157673023211954, "learning_rate": 2e-05, "loss": 5.4579, "step": 6139 }, { "epoch": 0.4118455914411242, "grad_norm": 0.14346479193707015, "learning_rate": 2e-05, "loss": 5.4402, "step": 6140 }, { "epoch": 0.41191266727034914, "grad_norm": 0.14838402000327952, "learning_rate": 2e-05, "loss": 5.4502, "step": 6141 }, { "epoch": 0.4119797430995741, "grad_norm": 0.1430616001646572, "learning_rate": 2e-05, "loss": 5.3267, "step": 6142 }, { "epoch": 0.412046818928799, "grad_norm": 0.15980469581124987, "learning_rate": 2e-05, "loss": 5.3938, "step": 6143 }, { "epoch": 0.41211389475802396, "grad_norm": 0.14205378270823804, "learning_rate": 2e-05, "loss": 5.4707, "step": 6144 }, { "epoch": 0.4121809705872489, "grad_norm": 0.14997561769203577, "learning_rate": 2e-05, "loss": 5.3378, "step": 6145 }, { "epoch": 0.41224804641647383, "grad_norm": 0.15366498258968467, "learning_rate": 2e-05, "loss": 5.5612, "step": 6146 }, { "epoch": 0.4123151222456988, "grad_norm": 0.15688349912975239, "learning_rate": 2e-05, "loss": 5.4789, "step": 6147 }, { "epoch": 0.4123821980749237, "grad_norm": 0.14456526939338343, "learning_rate": 2e-05, "loss": 5.3515, "step": 6148 }, { "epoch": 0.41244927390414865, "grad_norm": 0.15172809385312647, "learning_rate": 2e-05, "loss": 5.5482, "step": 6149 }, { "epoch": 0.4125163497333736, "grad_norm": 0.1496753254493908, "learning_rate": 2e-05, "loss": 5.3616, "step": 6150 }, { "epoch": 0.4125834255625985, "grad_norm": 0.14683100707445412, "learning_rate": 2e-05, "loss": 5.4585, "step": 6151 }, { "epoch": 0.41265050139182347, "grad_norm": 0.15170101686392787, "learning_rate": 2e-05, "loss": 5.4207, "step": 6152 }, { "epoch": 0.4127175772210484, "grad_norm": 0.15250098212703309, "learning_rate": 2e-05, "loss": 5.5805, "step": 6153 }, { "epoch": 0.41278465305027334, "grad_norm": 0.15117191472511607, "learning_rate": 2e-05, "loss": 5.4357, "step": 6154 }, { "epoch": 0.4128517288794983, "grad_norm": 0.14958652625255425, "learning_rate": 2e-05, "loss": 5.5265, "step": 6155 }, { "epoch": 0.4129188047087232, "grad_norm": 0.1549552328992079, "learning_rate": 2e-05, "loss": 5.4175, "step": 6156 }, { "epoch": 0.41298588053794816, "grad_norm": 0.14840966341952094, "learning_rate": 2e-05, "loss": 5.5041, "step": 6157 }, { "epoch": 0.4130529563671731, "grad_norm": 0.14837594762594064, "learning_rate": 2e-05, "loss": 5.4823, "step": 6158 }, { "epoch": 0.41312003219639803, "grad_norm": 0.14633692723179006, "learning_rate": 2e-05, "loss": 5.4563, "step": 6159 }, { "epoch": 0.413187108025623, "grad_norm": 0.15219694866476308, "learning_rate": 2e-05, "loss": 5.4581, "step": 6160 }, { "epoch": 0.4132541838548479, "grad_norm": 0.146770936016127, "learning_rate": 2e-05, "loss": 5.426, "step": 6161 }, { "epoch": 0.41332125968407285, "grad_norm": 0.14531835629164536, "learning_rate": 2e-05, "loss": 5.2618, "step": 6162 }, { "epoch": 0.4133883355132978, "grad_norm": 0.1486287444434929, "learning_rate": 2e-05, "loss": 5.4312, "step": 6163 }, { "epoch": 0.4134554113425227, "grad_norm": 0.14921852784556688, "learning_rate": 2e-05, "loss": 5.4557, "step": 6164 }, { "epoch": 0.41352248717174767, "grad_norm": 0.14637180480236725, "learning_rate": 2e-05, "loss": 5.4711, "step": 6165 }, { "epoch": 0.4135895630009726, "grad_norm": 0.1724012154011378, "learning_rate": 2e-05, "loss": 5.3923, "step": 6166 }, { "epoch": 0.41365663883019754, "grad_norm": 0.15004133266552003, "learning_rate": 2e-05, "loss": 5.3885, "step": 6167 }, { "epoch": 0.4137237146594225, "grad_norm": 0.15316988103669055, "learning_rate": 2e-05, "loss": 5.4615, "step": 6168 }, { "epoch": 0.4137907904886474, "grad_norm": 0.14623395463643032, "learning_rate": 2e-05, "loss": 5.5389, "step": 6169 }, { "epoch": 0.41385786631787236, "grad_norm": 0.14808101918085212, "learning_rate": 2e-05, "loss": 5.4192, "step": 6170 }, { "epoch": 0.4139249421470973, "grad_norm": 0.149877247337902, "learning_rate": 2e-05, "loss": 5.3277, "step": 6171 }, { "epoch": 0.41399201797632224, "grad_norm": 0.14189501524026232, "learning_rate": 2e-05, "loss": 5.4958, "step": 6172 }, { "epoch": 0.4140590938055472, "grad_norm": 0.14489508681495938, "learning_rate": 2e-05, "loss": 5.2224, "step": 6173 }, { "epoch": 0.4141261696347721, "grad_norm": 0.14271955104483994, "learning_rate": 2e-05, "loss": 5.3324, "step": 6174 }, { "epoch": 0.41419324546399705, "grad_norm": 0.1432449732709902, "learning_rate": 2e-05, "loss": 5.4105, "step": 6175 }, { "epoch": 0.414260321293222, "grad_norm": 0.1429603245203277, "learning_rate": 2e-05, "loss": 5.3307, "step": 6176 }, { "epoch": 0.4143273971224469, "grad_norm": 0.1434477456316279, "learning_rate": 2e-05, "loss": 5.3888, "step": 6177 }, { "epoch": 0.41439447295167187, "grad_norm": 0.15518142713920297, "learning_rate": 2e-05, "loss": 5.352, "step": 6178 }, { "epoch": 0.4144615487808968, "grad_norm": 0.14401172798104242, "learning_rate": 2e-05, "loss": 5.4644, "step": 6179 }, { "epoch": 0.41452862461012174, "grad_norm": 0.15729530516313117, "learning_rate": 2e-05, "loss": 5.4151, "step": 6180 }, { "epoch": 0.4145957004393467, "grad_norm": 0.14732355971864336, "learning_rate": 2e-05, "loss": 5.3683, "step": 6181 }, { "epoch": 0.4146627762685716, "grad_norm": 0.14565955506072412, "learning_rate": 2e-05, "loss": 5.5106, "step": 6182 }, { "epoch": 0.41472985209779656, "grad_norm": 0.15234533745162324, "learning_rate": 2e-05, "loss": 5.4525, "step": 6183 }, { "epoch": 0.4147969279270215, "grad_norm": 0.15354433454569177, "learning_rate": 2e-05, "loss": 5.3758, "step": 6184 }, { "epoch": 0.41486400375624644, "grad_norm": 0.15406927654932262, "learning_rate": 2e-05, "loss": 5.4858, "step": 6185 }, { "epoch": 0.4149310795854714, "grad_norm": 0.14512594791563746, "learning_rate": 2e-05, "loss": 5.4833, "step": 6186 }, { "epoch": 0.4149981554146963, "grad_norm": 0.14833918876614743, "learning_rate": 2e-05, "loss": 5.3592, "step": 6187 }, { "epoch": 0.41506523124392125, "grad_norm": 0.162823890699379, "learning_rate": 2e-05, "loss": 5.6159, "step": 6188 }, { "epoch": 0.4151323070731462, "grad_norm": 0.15248920966437957, "learning_rate": 2e-05, "loss": 5.4514, "step": 6189 }, { "epoch": 0.41519938290237113, "grad_norm": 0.14752179454132905, "learning_rate": 2e-05, "loss": 5.5288, "step": 6190 }, { "epoch": 0.41526645873159607, "grad_norm": 0.15382748152958348, "learning_rate": 2e-05, "loss": 5.3071, "step": 6191 }, { "epoch": 0.415333534560821, "grad_norm": 0.15206752100770493, "learning_rate": 2e-05, "loss": 5.2151, "step": 6192 }, { "epoch": 0.41540061039004594, "grad_norm": 0.15166292563124945, "learning_rate": 2e-05, "loss": 5.3951, "step": 6193 }, { "epoch": 0.4154676862192709, "grad_norm": 0.14353771090088854, "learning_rate": 2e-05, "loss": 5.3598, "step": 6194 }, { "epoch": 0.4155347620484958, "grad_norm": 0.15607206891195802, "learning_rate": 2e-05, "loss": 5.4813, "step": 6195 }, { "epoch": 0.41560183787772076, "grad_norm": 0.15240955471116122, "learning_rate": 2e-05, "loss": 5.3344, "step": 6196 }, { "epoch": 0.4156689137069457, "grad_norm": 0.14220299492229682, "learning_rate": 2e-05, "loss": 5.4989, "step": 6197 }, { "epoch": 0.41573598953617064, "grad_norm": 0.14448273926202573, "learning_rate": 2e-05, "loss": 5.4169, "step": 6198 }, { "epoch": 0.4158030653653956, "grad_norm": 0.14752277696179278, "learning_rate": 2e-05, "loss": 5.3598, "step": 6199 }, { "epoch": 0.4158701411946205, "grad_norm": 0.1487673169243866, "learning_rate": 2e-05, "loss": 5.4494, "step": 6200 }, { "epoch": 0.41593721702384545, "grad_norm": 0.15423482820114895, "learning_rate": 2e-05, "loss": 5.369, "step": 6201 }, { "epoch": 0.4160042928530704, "grad_norm": 0.15224545886837434, "learning_rate": 2e-05, "loss": 5.3478, "step": 6202 }, { "epoch": 0.41607136868229533, "grad_norm": 0.15111854897418345, "learning_rate": 2e-05, "loss": 5.4217, "step": 6203 }, { "epoch": 0.41613844451152027, "grad_norm": 0.14513002352064175, "learning_rate": 2e-05, "loss": 5.4812, "step": 6204 }, { "epoch": 0.4162055203407452, "grad_norm": 0.15278045129940115, "learning_rate": 2e-05, "loss": 5.4462, "step": 6205 }, { "epoch": 0.41627259616997014, "grad_norm": 0.15353011611301318, "learning_rate": 2e-05, "loss": 5.4453, "step": 6206 }, { "epoch": 0.4163396719991951, "grad_norm": 0.14586770030273707, "learning_rate": 2e-05, "loss": 5.4109, "step": 6207 }, { "epoch": 0.41640674782842, "grad_norm": 0.1423842447594983, "learning_rate": 2e-05, "loss": 5.3681, "step": 6208 }, { "epoch": 0.41647382365764496, "grad_norm": 0.14882019391754242, "learning_rate": 2e-05, "loss": 5.539, "step": 6209 }, { "epoch": 0.4165408994868699, "grad_norm": 0.14348557963834951, "learning_rate": 2e-05, "loss": 5.4452, "step": 6210 }, { "epoch": 0.41660797531609484, "grad_norm": 0.15039619927816222, "learning_rate": 2e-05, "loss": 5.4336, "step": 6211 }, { "epoch": 0.4166750511453198, "grad_norm": 0.13939458139050415, "learning_rate": 2e-05, "loss": 5.4613, "step": 6212 }, { "epoch": 0.4167421269745447, "grad_norm": 0.14689257277495957, "learning_rate": 2e-05, "loss": 5.223, "step": 6213 }, { "epoch": 0.41680920280376965, "grad_norm": 0.150487088584561, "learning_rate": 2e-05, "loss": 5.5122, "step": 6214 }, { "epoch": 0.4168762786329946, "grad_norm": 0.15026701198479003, "learning_rate": 2e-05, "loss": 5.4937, "step": 6215 }, { "epoch": 0.41694335446221953, "grad_norm": 0.14851473736825993, "learning_rate": 2e-05, "loss": 5.4574, "step": 6216 }, { "epoch": 0.41701043029144447, "grad_norm": 0.1438750385177889, "learning_rate": 2e-05, "loss": 5.4049, "step": 6217 }, { "epoch": 0.4170775061206694, "grad_norm": 0.1482205165061603, "learning_rate": 2e-05, "loss": 5.3618, "step": 6218 }, { "epoch": 0.41714458194989434, "grad_norm": 0.14799252323494966, "learning_rate": 2e-05, "loss": 5.3691, "step": 6219 }, { "epoch": 0.4172116577791193, "grad_norm": 0.14614882601745888, "learning_rate": 2e-05, "loss": 5.4587, "step": 6220 }, { "epoch": 0.4172787336083442, "grad_norm": 0.1457305357527663, "learning_rate": 2e-05, "loss": 5.3378, "step": 6221 }, { "epoch": 0.41734580943756916, "grad_norm": 0.15300223575820496, "learning_rate": 2e-05, "loss": 5.5145, "step": 6222 }, { "epoch": 0.4174128852667941, "grad_norm": 0.1478922301907365, "learning_rate": 2e-05, "loss": 5.4099, "step": 6223 }, { "epoch": 0.41747996109601904, "grad_norm": 0.14879945433933275, "learning_rate": 2e-05, "loss": 5.3496, "step": 6224 }, { "epoch": 0.417547036925244, "grad_norm": 0.1507505739326256, "learning_rate": 2e-05, "loss": 5.3937, "step": 6225 }, { "epoch": 0.4176141127544689, "grad_norm": 0.1507091153310175, "learning_rate": 2e-05, "loss": 5.2893, "step": 6226 }, { "epoch": 0.41768118858369385, "grad_norm": 0.15565932321239265, "learning_rate": 2e-05, "loss": 5.5619, "step": 6227 }, { "epoch": 0.4177482644129188, "grad_norm": 0.153089756977703, "learning_rate": 2e-05, "loss": 5.5483, "step": 6228 }, { "epoch": 0.41781534024214373, "grad_norm": 0.14653417287645107, "learning_rate": 2e-05, "loss": 5.3846, "step": 6229 }, { "epoch": 0.41788241607136867, "grad_norm": 0.16677507039010597, "learning_rate": 2e-05, "loss": 5.4395, "step": 6230 }, { "epoch": 0.4179494919005936, "grad_norm": 0.15151076827398888, "learning_rate": 2e-05, "loss": 5.4421, "step": 6231 }, { "epoch": 0.41801656772981854, "grad_norm": 0.151191684336144, "learning_rate": 2e-05, "loss": 5.3537, "step": 6232 }, { "epoch": 0.4180836435590435, "grad_norm": 0.14520346000136614, "learning_rate": 2e-05, "loss": 5.3477, "step": 6233 }, { "epoch": 0.4181507193882684, "grad_norm": 0.15630497739757923, "learning_rate": 2e-05, "loss": 5.3903, "step": 6234 }, { "epoch": 0.41821779521749336, "grad_norm": 0.14745242313948076, "learning_rate": 2e-05, "loss": 5.3193, "step": 6235 }, { "epoch": 0.4182848710467183, "grad_norm": 0.15475620807369375, "learning_rate": 2e-05, "loss": 5.4467, "step": 6236 }, { "epoch": 0.41835194687594324, "grad_norm": 0.15597631423108876, "learning_rate": 2e-05, "loss": 5.4333, "step": 6237 }, { "epoch": 0.4184190227051682, "grad_norm": 0.1454715923665536, "learning_rate": 2e-05, "loss": 5.3532, "step": 6238 }, { "epoch": 0.4184860985343931, "grad_norm": 0.15405110337621403, "learning_rate": 2e-05, "loss": 5.4375, "step": 6239 }, { "epoch": 0.41855317436361805, "grad_norm": 0.14510767484656545, "learning_rate": 2e-05, "loss": 5.4406, "step": 6240 }, { "epoch": 0.418620250192843, "grad_norm": 0.14327506672282547, "learning_rate": 2e-05, "loss": 5.3759, "step": 6241 }, { "epoch": 0.41868732602206793, "grad_norm": 0.1436085215241924, "learning_rate": 2e-05, "loss": 5.4895, "step": 6242 }, { "epoch": 0.41875440185129287, "grad_norm": 0.16043325284090895, "learning_rate": 2e-05, "loss": 5.5273, "step": 6243 }, { "epoch": 0.4188214776805178, "grad_norm": 0.1471908179961017, "learning_rate": 2e-05, "loss": 5.5075, "step": 6244 }, { "epoch": 0.41888855350974274, "grad_norm": 0.15056358505816222, "learning_rate": 2e-05, "loss": 5.4601, "step": 6245 }, { "epoch": 0.4189556293389677, "grad_norm": 0.14354892536811004, "learning_rate": 2e-05, "loss": 5.2549, "step": 6246 }, { "epoch": 0.4190227051681926, "grad_norm": 0.15385620857295468, "learning_rate": 2e-05, "loss": 5.4049, "step": 6247 }, { "epoch": 0.41908978099741756, "grad_norm": 0.1424732070591553, "learning_rate": 2e-05, "loss": 5.4353, "step": 6248 }, { "epoch": 0.4191568568266425, "grad_norm": 0.15001966269652894, "learning_rate": 2e-05, "loss": 5.4707, "step": 6249 }, { "epoch": 0.41922393265586744, "grad_norm": 0.14770740112158803, "learning_rate": 2e-05, "loss": 5.5145, "step": 6250 }, { "epoch": 0.4192910084850924, "grad_norm": 0.15044719622659666, "learning_rate": 2e-05, "loss": 5.3205, "step": 6251 }, { "epoch": 0.4193580843143173, "grad_norm": 0.1527564091363946, "learning_rate": 2e-05, "loss": 5.5283, "step": 6252 }, { "epoch": 0.41942516014354225, "grad_norm": 0.15188573785871098, "learning_rate": 2e-05, "loss": 5.4013, "step": 6253 }, { "epoch": 0.4194922359727672, "grad_norm": 0.15294602291747922, "learning_rate": 2e-05, "loss": 5.522, "step": 6254 }, { "epoch": 0.41955931180199213, "grad_norm": 0.15843716597399227, "learning_rate": 2e-05, "loss": 5.3152, "step": 6255 }, { "epoch": 0.41962638763121707, "grad_norm": 0.15179939584651417, "learning_rate": 2e-05, "loss": 5.4645, "step": 6256 }, { "epoch": 0.419693463460442, "grad_norm": 0.14471588666040858, "learning_rate": 2e-05, "loss": 5.5224, "step": 6257 }, { "epoch": 0.41976053928966695, "grad_norm": 0.15220375224074542, "learning_rate": 2e-05, "loss": 5.3866, "step": 6258 }, { "epoch": 0.4198276151188919, "grad_norm": 0.15891180059895374, "learning_rate": 2e-05, "loss": 5.4016, "step": 6259 }, { "epoch": 0.4198946909481168, "grad_norm": 0.15281374239137188, "learning_rate": 2e-05, "loss": 5.4259, "step": 6260 }, { "epoch": 0.41996176677734176, "grad_norm": 0.1447042177596159, "learning_rate": 2e-05, "loss": 5.4588, "step": 6261 }, { "epoch": 0.4200288426065667, "grad_norm": 0.14936128720877714, "learning_rate": 2e-05, "loss": 5.4394, "step": 6262 }, { "epoch": 0.42009591843579164, "grad_norm": 0.1480513829830632, "learning_rate": 2e-05, "loss": 5.4874, "step": 6263 }, { "epoch": 0.4201629942650166, "grad_norm": 0.14375693722623836, "learning_rate": 2e-05, "loss": 5.3675, "step": 6264 }, { "epoch": 0.4202300700942415, "grad_norm": 0.14299636149433792, "learning_rate": 2e-05, "loss": 5.5178, "step": 6265 }, { "epoch": 0.42029714592346645, "grad_norm": 0.15514933069794587, "learning_rate": 2e-05, "loss": 5.5361, "step": 6266 }, { "epoch": 0.4203642217526914, "grad_norm": 0.14467812185900947, "learning_rate": 2e-05, "loss": 5.5949, "step": 6267 }, { "epoch": 0.42043129758191633, "grad_norm": 0.1521273815934375, "learning_rate": 2e-05, "loss": 5.3192, "step": 6268 }, { "epoch": 0.42049837341114127, "grad_norm": 0.1464862777116028, "learning_rate": 2e-05, "loss": 5.4545, "step": 6269 }, { "epoch": 0.4205654492403662, "grad_norm": 0.16111925823418224, "learning_rate": 2e-05, "loss": 5.6047, "step": 6270 }, { "epoch": 0.42063252506959115, "grad_norm": 0.14796975252678068, "learning_rate": 2e-05, "loss": 5.3861, "step": 6271 }, { "epoch": 0.4206996008988161, "grad_norm": 0.14952287798702274, "learning_rate": 2e-05, "loss": 5.3915, "step": 6272 }, { "epoch": 0.4207666767280411, "grad_norm": 0.15282892046203028, "learning_rate": 2e-05, "loss": 5.4174, "step": 6273 }, { "epoch": 0.420833752557266, "grad_norm": 0.1589997171380678, "learning_rate": 2e-05, "loss": 5.4139, "step": 6274 }, { "epoch": 0.42090082838649095, "grad_norm": 0.14862137958017682, "learning_rate": 2e-05, "loss": 5.3871, "step": 6275 }, { "epoch": 0.4209679042157159, "grad_norm": 0.14549293419163686, "learning_rate": 2e-05, "loss": 5.4168, "step": 6276 }, { "epoch": 0.42103498004494083, "grad_norm": 0.15642185184478385, "learning_rate": 2e-05, "loss": 5.4128, "step": 6277 }, { "epoch": 0.42110205587416577, "grad_norm": 0.14301914804485114, "learning_rate": 2e-05, "loss": 5.2758, "step": 6278 }, { "epoch": 0.4211691317033907, "grad_norm": 0.14699058275200158, "learning_rate": 2e-05, "loss": 5.4027, "step": 6279 }, { "epoch": 0.42123620753261565, "grad_norm": 0.14616535676223494, "learning_rate": 2e-05, "loss": 5.4022, "step": 6280 }, { "epoch": 0.4213032833618406, "grad_norm": 0.15220548597395242, "learning_rate": 2e-05, "loss": 5.5928, "step": 6281 }, { "epoch": 0.4213703591910655, "grad_norm": 0.15010775257668782, "learning_rate": 2e-05, "loss": 5.3152, "step": 6282 }, { "epoch": 0.42143743502029046, "grad_norm": 0.1518526357259836, "learning_rate": 2e-05, "loss": 5.4481, "step": 6283 }, { "epoch": 0.4215045108495154, "grad_norm": 0.14904138457613592, "learning_rate": 2e-05, "loss": 5.4421, "step": 6284 }, { "epoch": 0.42157158667874034, "grad_norm": 0.15080750347040855, "learning_rate": 2e-05, "loss": 5.4271, "step": 6285 }, { "epoch": 0.4216386625079653, "grad_norm": 0.1440761690030597, "learning_rate": 2e-05, "loss": 5.5277, "step": 6286 }, { "epoch": 0.4217057383371902, "grad_norm": 0.14424318522665128, "learning_rate": 2e-05, "loss": 5.5545, "step": 6287 }, { "epoch": 0.42177281416641516, "grad_norm": 0.1472559350447186, "learning_rate": 2e-05, "loss": 5.4581, "step": 6288 }, { "epoch": 0.4218398899956401, "grad_norm": 0.14790661005189126, "learning_rate": 2e-05, "loss": 5.3892, "step": 6289 }, { "epoch": 0.42190696582486503, "grad_norm": 0.15631683783403477, "learning_rate": 2e-05, "loss": 5.4418, "step": 6290 }, { "epoch": 0.42197404165408997, "grad_norm": 0.1538317415095551, "learning_rate": 2e-05, "loss": 5.529, "step": 6291 }, { "epoch": 0.4220411174833149, "grad_norm": 0.1465802683346149, "learning_rate": 2e-05, "loss": 5.3616, "step": 6292 }, { "epoch": 0.42210819331253985, "grad_norm": 0.14910748102731003, "learning_rate": 2e-05, "loss": 5.4102, "step": 6293 }, { "epoch": 0.4221752691417648, "grad_norm": 0.16212135060554408, "learning_rate": 2e-05, "loss": 5.5169, "step": 6294 }, { "epoch": 0.4222423449709897, "grad_norm": 0.1582649965838994, "learning_rate": 2e-05, "loss": 5.3175, "step": 6295 }, { "epoch": 0.42230942080021466, "grad_norm": 0.1468493767731153, "learning_rate": 2e-05, "loss": 5.4917, "step": 6296 }, { "epoch": 0.4223764966294396, "grad_norm": 0.15839304377719646, "learning_rate": 2e-05, "loss": 5.502, "step": 6297 }, { "epoch": 0.42244357245866454, "grad_norm": 0.16364533413533558, "learning_rate": 2e-05, "loss": 5.5592, "step": 6298 }, { "epoch": 0.4225106482878895, "grad_norm": 0.15234920820946346, "learning_rate": 2e-05, "loss": 5.3324, "step": 6299 }, { "epoch": 0.4225777241171144, "grad_norm": 0.1615345449521551, "learning_rate": 2e-05, "loss": 5.4654, "step": 6300 }, { "epoch": 0.42264479994633936, "grad_norm": 0.15130417365066476, "learning_rate": 2e-05, "loss": 5.3677, "step": 6301 }, { "epoch": 0.4227118757755643, "grad_norm": 0.15068306036917925, "learning_rate": 2e-05, "loss": 5.4435, "step": 6302 }, { "epoch": 0.42277895160478923, "grad_norm": 0.147663147686365, "learning_rate": 2e-05, "loss": 5.339, "step": 6303 }, { "epoch": 0.42284602743401417, "grad_norm": 0.13883749752196478, "learning_rate": 2e-05, "loss": 5.4882, "step": 6304 }, { "epoch": 0.4229131032632391, "grad_norm": 0.14375279316401002, "learning_rate": 2e-05, "loss": 5.46, "step": 6305 }, { "epoch": 0.42298017909246405, "grad_norm": 0.14555707459421724, "learning_rate": 2e-05, "loss": 5.4463, "step": 6306 }, { "epoch": 0.423047254921689, "grad_norm": 0.15391036158667443, "learning_rate": 2e-05, "loss": 5.3035, "step": 6307 }, { "epoch": 0.4231143307509139, "grad_norm": 0.1457817489900408, "learning_rate": 2e-05, "loss": 5.5036, "step": 6308 }, { "epoch": 0.42318140658013886, "grad_norm": 0.14328876678178493, "learning_rate": 2e-05, "loss": 5.3407, "step": 6309 }, { "epoch": 0.4232484824093638, "grad_norm": 0.1477254926824025, "learning_rate": 2e-05, "loss": 5.453, "step": 6310 }, { "epoch": 0.42331555823858874, "grad_norm": 0.1481214922280087, "learning_rate": 2e-05, "loss": 5.5024, "step": 6311 }, { "epoch": 0.4233826340678137, "grad_norm": 0.15426413393993568, "learning_rate": 2e-05, "loss": 5.4413, "step": 6312 }, { "epoch": 0.4234497098970386, "grad_norm": 0.1525589847575938, "learning_rate": 2e-05, "loss": 5.4712, "step": 6313 }, { "epoch": 0.42351678572626356, "grad_norm": 0.15218552624249482, "learning_rate": 2e-05, "loss": 5.4856, "step": 6314 }, { "epoch": 0.4235838615554885, "grad_norm": 0.14912497423905594, "learning_rate": 2e-05, "loss": 5.4413, "step": 6315 }, { "epoch": 0.42365093738471343, "grad_norm": 0.1499585820884491, "learning_rate": 2e-05, "loss": 5.3771, "step": 6316 }, { "epoch": 0.42371801321393837, "grad_norm": 0.1502630969709438, "learning_rate": 2e-05, "loss": 5.297, "step": 6317 }, { "epoch": 0.4237850890431633, "grad_norm": 0.1525721676751632, "learning_rate": 2e-05, "loss": 5.473, "step": 6318 }, { "epoch": 0.42385216487238825, "grad_norm": 0.15229626044829908, "learning_rate": 2e-05, "loss": 5.4626, "step": 6319 }, { "epoch": 0.4239192407016132, "grad_norm": 0.15255199292726954, "learning_rate": 2e-05, "loss": 5.3864, "step": 6320 }, { "epoch": 0.4239863165308381, "grad_norm": 0.14961846356886088, "learning_rate": 2e-05, "loss": 5.4497, "step": 6321 }, { "epoch": 0.42405339236006306, "grad_norm": 0.14489946545823082, "learning_rate": 2e-05, "loss": 5.5249, "step": 6322 }, { "epoch": 0.424120468189288, "grad_norm": 0.15234288818410083, "learning_rate": 2e-05, "loss": 5.2633, "step": 6323 }, { "epoch": 0.42418754401851294, "grad_norm": 0.14812149276654887, "learning_rate": 2e-05, "loss": 5.4852, "step": 6324 }, { "epoch": 0.4242546198477379, "grad_norm": 0.14841254149381464, "learning_rate": 2e-05, "loss": 5.5512, "step": 6325 }, { "epoch": 0.4243216956769628, "grad_norm": 0.15082652186283776, "learning_rate": 2e-05, "loss": 5.5245, "step": 6326 }, { "epoch": 0.42438877150618776, "grad_norm": 0.14353582523216818, "learning_rate": 2e-05, "loss": 5.4316, "step": 6327 }, { "epoch": 0.4244558473354127, "grad_norm": 0.14934034716833278, "learning_rate": 2e-05, "loss": 5.4023, "step": 6328 }, { "epoch": 0.42452292316463763, "grad_norm": 0.15915198119848054, "learning_rate": 2e-05, "loss": 5.2098, "step": 6329 }, { "epoch": 0.42458999899386257, "grad_norm": 0.14782800402427604, "learning_rate": 2e-05, "loss": 5.5379, "step": 6330 }, { "epoch": 0.4246570748230875, "grad_norm": 0.14846977375180034, "learning_rate": 2e-05, "loss": 5.4008, "step": 6331 }, { "epoch": 0.42472415065231245, "grad_norm": 0.1500486209479468, "learning_rate": 2e-05, "loss": 5.5086, "step": 6332 }, { "epoch": 0.4247912264815374, "grad_norm": 0.15060898056889266, "learning_rate": 2e-05, "loss": 5.309, "step": 6333 }, { "epoch": 0.4248583023107623, "grad_norm": 0.1508606600946992, "learning_rate": 2e-05, "loss": 5.5166, "step": 6334 }, { "epoch": 0.42492537813998726, "grad_norm": 0.14921555929636063, "learning_rate": 2e-05, "loss": 5.5702, "step": 6335 }, { "epoch": 0.4249924539692122, "grad_norm": 0.15041149736880607, "learning_rate": 2e-05, "loss": 5.5617, "step": 6336 }, { "epoch": 0.42505952979843714, "grad_norm": 0.1451681716019452, "learning_rate": 2e-05, "loss": 5.3624, "step": 6337 }, { "epoch": 0.4251266056276621, "grad_norm": 0.14880848314998352, "learning_rate": 2e-05, "loss": 5.3402, "step": 6338 }, { "epoch": 0.425193681456887, "grad_norm": 0.14524561265530886, "learning_rate": 2e-05, "loss": 5.3628, "step": 6339 }, { "epoch": 0.42526075728611196, "grad_norm": 0.14800925243553942, "learning_rate": 2e-05, "loss": 5.422, "step": 6340 }, { "epoch": 0.4253278331153369, "grad_norm": 0.14181305057171026, "learning_rate": 2e-05, "loss": 5.27, "step": 6341 }, { "epoch": 0.42539490894456183, "grad_norm": 0.14544460065508572, "learning_rate": 2e-05, "loss": 5.4706, "step": 6342 }, { "epoch": 0.42546198477378677, "grad_norm": 0.14885509482560755, "learning_rate": 2e-05, "loss": 5.4048, "step": 6343 }, { "epoch": 0.4255290606030117, "grad_norm": 0.15540881001470022, "learning_rate": 2e-05, "loss": 5.3827, "step": 6344 }, { "epoch": 0.42559613643223665, "grad_norm": 0.14196113215334985, "learning_rate": 2e-05, "loss": 5.558, "step": 6345 }, { "epoch": 0.4256632122614616, "grad_norm": 0.15090964564556247, "learning_rate": 2e-05, "loss": 5.5721, "step": 6346 }, { "epoch": 0.4257302880906865, "grad_norm": 0.15310971759885308, "learning_rate": 2e-05, "loss": 5.2969, "step": 6347 }, { "epoch": 0.42579736391991146, "grad_norm": 0.14690489069101612, "learning_rate": 2e-05, "loss": 5.3421, "step": 6348 }, { "epoch": 0.4258644397491364, "grad_norm": 0.14299142792452332, "learning_rate": 2e-05, "loss": 5.5065, "step": 6349 }, { "epoch": 0.42593151557836134, "grad_norm": 0.14402663086109949, "learning_rate": 2e-05, "loss": 5.3764, "step": 6350 }, { "epoch": 0.4259985914075863, "grad_norm": 0.14384231402536837, "learning_rate": 2e-05, "loss": 5.2908, "step": 6351 }, { "epoch": 0.4260656672368112, "grad_norm": 0.14189841708225695, "learning_rate": 2e-05, "loss": 5.4331, "step": 6352 }, { "epoch": 0.42613274306603616, "grad_norm": 0.14909073763349645, "learning_rate": 2e-05, "loss": 5.5054, "step": 6353 }, { "epoch": 0.4261998188952611, "grad_norm": 0.14853813067716531, "learning_rate": 2e-05, "loss": 5.5366, "step": 6354 }, { "epoch": 0.42626689472448603, "grad_norm": 0.14873100129395245, "learning_rate": 2e-05, "loss": 5.5537, "step": 6355 }, { "epoch": 0.426333970553711, "grad_norm": 0.15875127778007253, "learning_rate": 2e-05, "loss": 5.4858, "step": 6356 }, { "epoch": 0.4264010463829359, "grad_norm": 0.1501812051895804, "learning_rate": 2e-05, "loss": 5.362, "step": 6357 }, { "epoch": 0.42646812221216085, "grad_norm": 0.14884330491326195, "learning_rate": 2e-05, "loss": 5.3117, "step": 6358 }, { "epoch": 0.4265351980413858, "grad_norm": 0.16006967063380614, "learning_rate": 2e-05, "loss": 5.4563, "step": 6359 }, { "epoch": 0.4266022738706107, "grad_norm": 0.14776442218168975, "learning_rate": 2e-05, "loss": 5.3439, "step": 6360 }, { "epoch": 0.42666934969983566, "grad_norm": 0.15011998717111025, "learning_rate": 2e-05, "loss": 5.5161, "step": 6361 }, { "epoch": 0.4267364255290606, "grad_norm": 0.15000862265307194, "learning_rate": 2e-05, "loss": 5.4363, "step": 6362 }, { "epoch": 0.42680350135828554, "grad_norm": 0.14806097712621338, "learning_rate": 2e-05, "loss": 5.3629, "step": 6363 }, { "epoch": 0.4268705771875105, "grad_norm": 0.15530771936686952, "learning_rate": 2e-05, "loss": 5.3554, "step": 6364 }, { "epoch": 0.4269376530167354, "grad_norm": 0.15988492115346004, "learning_rate": 2e-05, "loss": 5.3779, "step": 6365 }, { "epoch": 0.42700472884596036, "grad_norm": 0.15049775642801386, "learning_rate": 2e-05, "loss": 5.4482, "step": 6366 }, { "epoch": 0.4270718046751853, "grad_norm": 0.14977308658474148, "learning_rate": 2e-05, "loss": 5.4555, "step": 6367 }, { "epoch": 0.42713888050441023, "grad_norm": 0.15419666328961787, "learning_rate": 2e-05, "loss": 5.3701, "step": 6368 }, { "epoch": 0.4272059563336352, "grad_norm": 0.14984688694792614, "learning_rate": 2e-05, "loss": 5.3434, "step": 6369 }, { "epoch": 0.4272730321628601, "grad_norm": 0.1489388949148364, "learning_rate": 2e-05, "loss": 5.3495, "step": 6370 }, { "epoch": 0.42734010799208505, "grad_norm": 0.1531576232434007, "learning_rate": 2e-05, "loss": 5.4124, "step": 6371 }, { "epoch": 0.42740718382131, "grad_norm": 0.14872504331147854, "learning_rate": 2e-05, "loss": 5.4013, "step": 6372 }, { "epoch": 0.4274742596505349, "grad_norm": 0.14672099809144326, "learning_rate": 2e-05, "loss": 5.5647, "step": 6373 }, { "epoch": 0.42754133547975987, "grad_norm": 0.1454822892726034, "learning_rate": 2e-05, "loss": 5.3571, "step": 6374 }, { "epoch": 0.4276084113089848, "grad_norm": 0.15603740321447734, "learning_rate": 2e-05, "loss": 5.3858, "step": 6375 }, { "epoch": 0.42767548713820974, "grad_norm": 0.15098213421693266, "learning_rate": 2e-05, "loss": 5.4812, "step": 6376 }, { "epoch": 0.4277425629674347, "grad_norm": 0.154305484759387, "learning_rate": 2e-05, "loss": 5.2736, "step": 6377 }, { "epoch": 0.4278096387966596, "grad_norm": 0.15745747840784957, "learning_rate": 2e-05, "loss": 5.355, "step": 6378 }, { "epoch": 0.42787671462588456, "grad_norm": 0.14742988821197034, "learning_rate": 2e-05, "loss": 5.3308, "step": 6379 }, { "epoch": 0.4279437904551095, "grad_norm": 0.152115945607217, "learning_rate": 2e-05, "loss": 5.3687, "step": 6380 }, { "epoch": 0.42801086628433443, "grad_norm": 0.1479827086120508, "learning_rate": 2e-05, "loss": 5.4318, "step": 6381 }, { "epoch": 0.4280779421135594, "grad_norm": 0.1553773220878037, "learning_rate": 2e-05, "loss": 5.427, "step": 6382 }, { "epoch": 0.4281450179427843, "grad_norm": 0.145888337533544, "learning_rate": 2e-05, "loss": 5.2282, "step": 6383 }, { "epoch": 0.42821209377200925, "grad_norm": 0.1545810029462627, "learning_rate": 2e-05, "loss": 5.5377, "step": 6384 }, { "epoch": 0.4282791696012342, "grad_norm": 0.14737752906559962, "learning_rate": 2e-05, "loss": 5.5683, "step": 6385 }, { "epoch": 0.4283462454304591, "grad_norm": 0.15292685924566551, "learning_rate": 2e-05, "loss": 5.4204, "step": 6386 }, { "epoch": 0.42841332125968407, "grad_norm": 0.1433130993420583, "learning_rate": 2e-05, "loss": 5.501, "step": 6387 }, { "epoch": 0.428480397088909, "grad_norm": 0.14312607300289157, "learning_rate": 2e-05, "loss": 5.3602, "step": 6388 }, { "epoch": 0.42854747291813394, "grad_norm": 0.14680712931886405, "learning_rate": 2e-05, "loss": 5.2793, "step": 6389 }, { "epoch": 0.4286145487473589, "grad_norm": 0.14981783086726577, "learning_rate": 2e-05, "loss": 5.4758, "step": 6390 }, { "epoch": 0.4286816245765838, "grad_norm": 0.14593466764486915, "learning_rate": 2e-05, "loss": 5.5443, "step": 6391 }, { "epoch": 0.42874870040580876, "grad_norm": 0.1493379664286977, "learning_rate": 2e-05, "loss": 5.3108, "step": 6392 }, { "epoch": 0.4288157762350337, "grad_norm": 0.15134819385202816, "learning_rate": 2e-05, "loss": 5.465, "step": 6393 }, { "epoch": 0.42888285206425864, "grad_norm": 0.15297423734525215, "learning_rate": 2e-05, "loss": 5.372, "step": 6394 }, { "epoch": 0.4289499278934836, "grad_norm": 0.1425429299323518, "learning_rate": 2e-05, "loss": 5.5074, "step": 6395 }, { "epoch": 0.4290170037227085, "grad_norm": 0.15897539228962473, "learning_rate": 2e-05, "loss": 5.5658, "step": 6396 }, { "epoch": 0.42908407955193345, "grad_norm": 0.1490652384228625, "learning_rate": 2e-05, "loss": 5.4268, "step": 6397 }, { "epoch": 0.4291511553811584, "grad_norm": 0.1547315814636295, "learning_rate": 2e-05, "loss": 5.4819, "step": 6398 }, { "epoch": 0.4292182312103833, "grad_norm": 0.15763984885845436, "learning_rate": 2e-05, "loss": 5.4269, "step": 6399 }, { "epoch": 0.42928530703960827, "grad_norm": 0.14781203099497478, "learning_rate": 2e-05, "loss": 5.5324, "step": 6400 }, { "epoch": 0.4293523828688332, "grad_norm": 0.14572788008268758, "learning_rate": 2e-05, "loss": 5.3654, "step": 6401 }, { "epoch": 0.42941945869805814, "grad_norm": 0.1458755520601082, "learning_rate": 2e-05, "loss": 5.2775, "step": 6402 }, { "epoch": 0.4294865345272831, "grad_norm": 0.1529328654873612, "learning_rate": 2e-05, "loss": 5.4064, "step": 6403 }, { "epoch": 0.429553610356508, "grad_norm": 0.15261991308240883, "learning_rate": 2e-05, "loss": 5.4322, "step": 6404 }, { "epoch": 0.42962068618573296, "grad_norm": 0.15905577783875505, "learning_rate": 2e-05, "loss": 5.5258, "step": 6405 }, { "epoch": 0.4296877620149579, "grad_norm": 0.15064357757502875, "learning_rate": 2e-05, "loss": 5.3624, "step": 6406 }, { "epoch": 0.42975483784418284, "grad_norm": 0.14840854283968974, "learning_rate": 2e-05, "loss": 5.4442, "step": 6407 }, { "epoch": 0.4298219136734078, "grad_norm": 0.14504654214928808, "learning_rate": 2e-05, "loss": 5.2371, "step": 6408 }, { "epoch": 0.4298889895026327, "grad_norm": 0.14911189443394368, "learning_rate": 2e-05, "loss": 5.4908, "step": 6409 }, { "epoch": 0.42995606533185765, "grad_norm": 0.14773236405709883, "learning_rate": 2e-05, "loss": 5.4223, "step": 6410 }, { "epoch": 0.4300231411610826, "grad_norm": 0.14157823409930834, "learning_rate": 2e-05, "loss": 5.2504, "step": 6411 }, { "epoch": 0.43009021699030753, "grad_norm": 0.14853204921675014, "learning_rate": 2e-05, "loss": 5.4086, "step": 6412 }, { "epoch": 0.43015729281953247, "grad_norm": 0.15265822388382758, "learning_rate": 2e-05, "loss": 5.5443, "step": 6413 }, { "epoch": 0.4302243686487574, "grad_norm": 0.1480659069604244, "learning_rate": 2e-05, "loss": 5.4428, "step": 6414 }, { "epoch": 0.43029144447798234, "grad_norm": 0.15424386731286038, "learning_rate": 2e-05, "loss": 5.5073, "step": 6415 }, { "epoch": 0.4303585203072073, "grad_norm": 0.1510670254529116, "learning_rate": 2e-05, "loss": 5.3653, "step": 6416 }, { "epoch": 0.4304255961364322, "grad_norm": 0.14643493882806216, "learning_rate": 2e-05, "loss": 5.3166, "step": 6417 }, { "epoch": 0.43049267196565716, "grad_norm": 0.14985285621780203, "learning_rate": 2e-05, "loss": 5.3932, "step": 6418 }, { "epoch": 0.4305597477948821, "grad_norm": 0.1467991038780593, "learning_rate": 2e-05, "loss": 5.4269, "step": 6419 }, { "epoch": 0.43062682362410704, "grad_norm": 0.15460907464710014, "learning_rate": 2e-05, "loss": 5.4842, "step": 6420 }, { "epoch": 0.430693899453332, "grad_norm": 0.15710265971778103, "learning_rate": 2e-05, "loss": 5.339, "step": 6421 }, { "epoch": 0.4307609752825569, "grad_norm": 0.15701538159094788, "learning_rate": 2e-05, "loss": 5.367, "step": 6422 }, { "epoch": 0.43082805111178185, "grad_norm": 0.15376326424912343, "learning_rate": 2e-05, "loss": 5.4265, "step": 6423 }, { "epoch": 0.4308951269410068, "grad_norm": 0.15355941175582652, "learning_rate": 2e-05, "loss": 5.3635, "step": 6424 }, { "epoch": 0.43096220277023173, "grad_norm": 0.15126633900874148, "learning_rate": 2e-05, "loss": 5.3284, "step": 6425 }, { "epoch": 0.43102927859945667, "grad_norm": 0.15409141857616335, "learning_rate": 2e-05, "loss": 5.3698, "step": 6426 }, { "epoch": 0.4310963544286816, "grad_norm": 0.14770617826253574, "learning_rate": 2e-05, "loss": 5.4317, "step": 6427 }, { "epoch": 0.43116343025790654, "grad_norm": 0.1622167039033906, "learning_rate": 2e-05, "loss": 5.4944, "step": 6428 }, { "epoch": 0.4312305060871315, "grad_norm": 0.14485107421766014, "learning_rate": 2e-05, "loss": 5.5541, "step": 6429 }, { "epoch": 0.4312975819163564, "grad_norm": 0.14596453018935882, "learning_rate": 2e-05, "loss": 5.4006, "step": 6430 }, { "epoch": 0.43136465774558136, "grad_norm": 0.15839891096282108, "learning_rate": 2e-05, "loss": 5.416, "step": 6431 }, { "epoch": 0.4314317335748063, "grad_norm": 0.1527165643426115, "learning_rate": 2e-05, "loss": 5.2916, "step": 6432 }, { "epoch": 0.43149880940403124, "grad_norm": 0.15332935619037338, "learning_rate": 2e-05, "loss": 5.348, "step": 6433 }, { "epoch": 0.4315658852332562, "grad_norm": 0.14979485726620326, "learning_rate": 2e-05, "loss": 5.3565, "step": 6434 }, { "epoch": 0.4316329610624811, "grad_norm": 0.14712689426092052, "learning_rate": 2e-05, "loss": 5.4517, "step": 6435 }, { "epoch": 0.43170003689170605, "grad_norm": 0.15328383600397746, "learning_rate": 2e-05, "loss": 5.417, "step": 6436 }, { "epoch": 0.431767112720931, "grad_norm": 0.1489991525094696, "learning_rate": 2e-05, "loss": 5.5447, "step": 6437 }, { "epoch": 0.43183418855015593, "grad_norm": 0.14363200920344524, "learning_rate": 2e-05, "loss": 5.366, "step": 6438 }, { "epoch": 0.43190126437938087, "grad_norm": 0.14443407148890078, "learning_rate": 2e-05, "loss": 5.3599, "step": 6439 }, { "epoch": 0.4319683402086058, "grad_norm": 0.144891659748065, "learning_rate": 2e-05, "loss": 5.5854, "step": 6440 }, { "epoch": 0.43203541603783074, "grad_norm": 0.1485065639946896, "learning_rate": 2e-05, "loss": 5.4249, "step": 6441 }, { "epoch": 0.4321024918670557, "grad_norm": 0.14810432321507336, "learning_rate": 2e-05, "loss": 5.415, "step": 6442 }, { "epoch": 0.4321695676962806, "grad_norm": 0.1466022937421589, "learning_rate": 2e-05, "loss": 5.602, "step": 6443 }, { "epoch": 0.43223664352550556, "grad_norm": 0.14297251774357253, "learning_rate": 2e-05, "loss": 5.6131, "step": 6444 }, { "epoch": 0.4323037193547305, "grad_norm": 0.1518150357426711, "learning_rate": 2e-05, "loss": 5.4819, "step": 6445 }, { "epoch": 0.43237079518395544, "grad_norm": 0.14803697992867357, "learning_rate": 2e-05, "loss": 5.5374, "step": 6446 }, { "epoch": 0.4324378710131804, "grad_norm": 0.14926622405614468, "learning_rate": 2e-05, "loss": 5.3633, "step": 6447 }, { "epoch": 0.4325049468424053, "grad_norm": 0.14268681145376752, "learning_rate": 2e-05, "loss": 5.3373, "step": 6448 }, { "epoch": 0.43257202267163025, "grad_norm": 0.14327260077481244, "learning_rate": 2e-05, "loss": 5.4831, "step": 6449 }, { "epoch": 0.4326390985008552, "grad_norm": 0.1471321941779846, "learning_rate": 2e-05, "loss": 5.3186, "step": 6450 }, { "epoch": 0.43270617433008013, "grad_norm": 0.15236364389787693, "learning_rate": 2e-05, "loss": 5.3837, "step": 6451 }, { "epoch": 0.43277325015930507, "grad_norm": 0.1484765755628171, "learning_rate": 2e-05, "loss": 5.5478, "step": 6452 }, { "epoch": 0.43284032598853, "grad_norm": 0.15094969861886232, "learning_rate": 2e-05, "loss": 5.4776, "step": 6453 }, { "epoch": 0.43290740181775494, "grad_norm": 0.15007158354212266, "learning_rate": 2e-05, "loss": 5.3285, "step": 6454 }, { "epoch": 0.43297447764697994, "grad_norm": 0.1482849425774349, "learning_rate": 2e-05, "loss": 5.3695, "step": 6455 }, { "epoch": 0.4330415534762049, "grad_norm": 0.14078213246126908, "learning_rate": 2e-05, "loss": 5.4401, "step": 6456 }, { "epoch": 0.4331086293054298, "grad_norm": 0.1492265619889194, "learning_rate": 2e-05, "loss": 5.431, "step": 6457 }, { "epoch": 0.43317570513465475, "grad_norm": 0.15498551491931645, "learning_rate": 2e-05, "loss": 5.3471, "step": 6458 }, { "epoch": 0.4332427809638797, "grad_norm": 0.15429732876846353, "learning_rate": 2e-05, "loss": 5.4166, "step": 6459 }, { "epoch": 0.43330985679310463, "grad_norm": 0.1541213168498063, "learning_rate": 2e-05, "loss": 5.483, "step": 6460 }, { "epoch": 0.43337693262232957, "grad_norm": 0.14344646607348122, "learning_rate": 2e-05, "loss": 5.2714, "step": 6461 }, { "epoch": 0.4334440084515545, "grad_norm": 0.14830674169560729, "learning_rate": 2e-05, "loss": 5.6067, "step": 6462 }, { "epoch": 0.43351108428077945, "grad_norm": 0.1523386618868509, "learning_rate": 2e-05, "loss": 5.4715, "step": 6463 }, { "epoch": 0.4335781601100044, "grad_norm": 0.17719785006734695, "learning_rate": 2e-05, "loss": 5.4352, "step": 6464 }, { "epoch": 0.4336452359392293, "grad_norm": 0.15559283243082037, "learning_rate": 2e-05, "loss": 5.4168, "step": 6465 }, { "epoch": 0.43371231176845426, "grad_norm": 0.14910494517060077, "learning_rate": 2e-05, "loss": 5.3749, "step": 6466 }, { "epoch": 0.4337793875976792, "grad_norm": 0.14835677456319674, "learning_rate": 2e-05, "loss": 5.4997, "step": 6467 }, { "epoch": 0.43384646342690414, "grad_norm": 0.1504196016445764, "learning_rate": 2e-05, "loss": 5.4627, "step": 6468 }, { "epoch": 0.4339135392561291, "grad_norm": 0.1473813709397406, "learning_rate": 2e-05, "loss": 5.4569, "step": 6469 }, { "epoch": 0.433980615085354, "grad_norm": 0.1511866008539804, "learning_rate": 2e-05, "loss": 5.3579, "step": 6470 }, { "epoch": 0.43404769091457895, "grad_norm": 0.1495947706842585, "learning_rate": 2e-05, "loss": 5.3498, "step": 6471 }, { "epoch": 0.4341147667438039, "grad_norm": 0.14203377167250106, "learning_rate": 2e-05, "loss": 5.2866, "step": 6472 }, { "epoch": 0.43418184257302883, "grad_norm": 0.14415821735177195, "learning_rate": 2e-05, "loss": 5.4387, "step": 6473 }, { "epoch": 0.43424891840225377, "grad_norm": 0.1431905738346033, "learning_rate": 2e-05, "loss": 5.4473, "step": 6474 }, { "epoch": 0.4343159942314787, "grad_norm": 0.15292637277025203, "learning_rate": 2e-05, "loss": 5.4572, "step": 6475 }, { "epoch": 0.43438307006070365, "grad_norm": 0.15599992981061672, "learning_rate": 2e-05, "loss": 5.4559, "step": 6476 }, { "epoch": 0.4344501458899286, "grad_norm": 0.15095865002659434, "learning_rate": 2e-05, "loss": 5.4943, "step": 6477 }, { "epoch": 0.4345172217191535, "grad_norm": 0.1486092603320759, "learning_rate": 2e-05, "loss": 5.5389, "step": 6478 }, { "epoch": 0.43458429754837846, "grad_norm": 0.15130782165134798, "learning_rate": 2e-05, "loss": 5.4693, "step": 6479 }, { "epoch": 0.4346513733776034, "grad_norm": 0.15282293421995943, "learning_rate": 2e-05, "loss": 5.4994, "step": 6480 }, { "epoch": 0.43471844920682834, "grad_norm": 0.14666742082905437, "learning_rate": 2e-05, "loss": 5.4695, "step": 6481 }, { "epoch": 0.4347855250360533, "grad_norm": 0.1550133982274994, "learning_rate": 2e-05, "loss": 5.4567, "step": 6482 }, { "epoch": 0.4348526008652782, "grad_norm": 0.14962044434609417, "learning_rate": 2e-05, "loss": 5.4674, "step": 6483 }, { "epoch": 0.43491967669450315, "grad_norm": 0.14995713809178896, "learning_rate": 2e-05, "loss": 5.3438, "step": 6484 }, { "epoch": 0.4349867525237281, "grad_norm": 0.1462576779334566, "learning_rate": 2e-05, "loss": 5.3963, "step": 6485 }, { "epoch": 0.43505382835295303, "grad_norm": 0.14641371939113967, "learning_rate": 2e-05, "loss": 5.636, "step": 6486 }, { "epoch": 0.43512090418217797, "grad_norm": 0.14976000061783576, "learning_rate": 2e-05, "loss": 5.4595, "step": 6487 }, { "epoch": 0.4351879800114029, "grad_norm": 0.1465188810384411, "learning_rate": 2e-05, "loss": 5.3589, "step": 6488 }, { "epoch": 0.43525505584062785, "grad_norm": 0.1467777269924352, "learning_rate": 2e-05, "loss": 5.563, "step": 6489 }, { "epoch": 0.4353221316698528, "grad_norm": 0.14719069998299456, "learning_rate": 2e-05, "loss": 5.4876, "step": 6490 }, { "epoch": 0.4353892074990777, "grad_norm": 0.14905333933902756, "learning_rate": 2e-05, "loss": 5.51, "step": 6491 }, { "epoch": 0.43545628332830266, "grad_norm": 0.16115701880204109, "learning_rate": 2e-05, "loss": 5.4643, "step": 6492 }, { "epoch": 0.4355233591575276, "grad_norm": 0.15439904773209534, "learning_rate": 2e-05, "loss": 5.4303, "step": 6493 }, { "epoch": 0.43559043498675254, "grad_norm": 0.15388884718054893, "learning_rate": 2e-05, "loss": 5.4861, "step": 6494 }, { "epoch": 0.4356575108159775, "grad_norm": 0.14936544203231644, "learning_rate": 2e-05, "loss": 5.5318, "step": 6495 }, { "epoch": 0.4357245866452024, "grad_norm": 0.14723547816319466, "learning_rate": 2e-05, "loss": 5.4432, "step": 6496 }, { "epoch": 0.43579166247442735, "grad_norm": 0.15809724177393877, "learning_rate": 2e-05, "loss": 5.48, "step": 6497 }, { "epoch": 0.4358587383036523, "grad_norm": 0.1452806178436295, "learning_rate": 2e-05, "loss": 5.3799, "step": 6498 }, { "epoch": 0.43592581413287723, "grad_norm": 0.1409622576039092, "learning_rate": 2e-05, "loss": 5.306, "step": 6499 }, { "epoch": 0.43599288996210217, "grad_norm": 0.1463196957721756, "learning_rate": 2e-05, "loss": 5.4365, "step": 6500 }, { "epoch": 0.4360599657913271, "grad_norm": 0.14430325126467639, "learning_rate": 2e-05, "loss": 5.313, "step": 6501 }, { "epoch": 0.43612704162055205, "grad_norm": 0.15007517797316197, "learning_rate": 2e-05, "loss": 5.3587, "step": 6502 }, { "epoch": 0.436194117449777, "grad_norm": 0.14310658126585693, "learning_rate": 2e-05, "loss": 5.4414, "step": 6503 }, { "epoch": 0.4362611932790019, "grad_norm": 0.15042199815199803, "learning_rate": 2e-05, "loss": 5.3219, "step": 6504 }, { "epoch": 0.43632826910822686, "grad_norm": 0.15125545114388883, "learning_rate": 2e-05, "loss": 5.5031, "step": 6505 }, { "epoch": 0.4363953449374518, "grad_norm": 0.14299969768165352, "learning_rate": 2e-05, "loss": 5.3501, "step": 6506 }, { "epoch": 0.43646242076667674, "grad_norm": 0.1452451071863212, "learning_rate": 2e-05, "loss": 5.525, "step": 6507 }, { "epoch": 0.4365294965959017, "grad_norm": 0.14930959899317486, "learning_rate": 2e-05, "loss": 5.4911, "step": 6508 }, { "epoch": 0.4365965724251266, "grad_norm": 0.14047390647584607, "learning_rate": 2e-05, "loss": 5.4448, "step": 6509 }, { "epoch": 0.43666364825435156, "grad_norm": 0.14901258047951998, "learning_rate": 2e-05, "loss": 5.464, "step": 6510 }, { "epoch": 0.4367307240835765, "grad_norm": 0.15298619110651454, "learning_rate": 2e-05, "loss": 5.4782, "step": 6511 }, { "epoch": 0.43679779991280143, "grad_norm": 0.14570068922637158, "learning_rate": 2e-05, "loss": 5.4523, "step": 6512 }, { "epoch": 0.43686487574202637, "grad_norm": 0.1523595927264857, "learning_rate": 2e-05, "loss": 5.3926, "step": 6513 }, { "epoch": 0.4369319515712513, "grad_norm": 0.15238609487331148, "learning_rate": 2e-05, "loss": 5.3042, "step": 6514 }, { "epoch": 0.43699902740047625, "grad_norm": 0.1488463714195997, "learning_rate": 2e-05, "loss": 5.4695, "step": 6515 }, { "epoch": 0.4370661032297012, "grad_norm": 0.14770437396759872, "learning_rate": 2e-05, "loss": 5.423, "step": 6516 }, { "epoch": 0.4371331790589261, "grad_norm": 0.14454179724666164, "learning_rate": 2e-05, "loss": 5.3732, "step": 6517 }, { "epoch": 0.43720025488815106, "grad_norm": 0.14851305582091523, "learning_rate": 2e-05, "loss": 5.3884, "step": 6518 }, { "epoch": 0.437267330717376, "grad_norm": 0.14038262755243677, "learning_rate": 2e-05, "loss": 5.5154, "step": 6519 }, { "epoch": 0.43733440654660094, "grad_norm": 0.14600285379994998, "learning_rate": 2e-05, "loss": 5.5234, "step": 6520 }, { "epoch": 0.4374014823758259, "grad_norm": 0.1662547744434856, "learning_rate": 2e-05, "loss": 5.3796, "step": 6521 }, { "epoch": 0.4374685582050508, "grad_norm": 0.15084473245905944, "learning_rate": 2e-05, "loss": 5.3938, "step": 6522 }, { "epoch": 0.43753563403427576, "grad_norm": 0.1470581665265498, "learning_rate": 2e-05, "loss": 5.4536, "step": 6523 }, { "epoch": 0.4376027098635007, "grad_norm": 0.15348952143952527, "learning_rate": 2e-05, "loss": 5.4175, "step": 6524 }, { "epoch": 0.43766978569272563, "grad_norm": 0.1499339267554661, "learning_rate": 2e-05, "loss": 5.2976, "step": 6525 }, { "epoch": 0.43773686152195057, "grad_norm": 0.14902037168569038, "learning_rate": 2e-05, "loss": 5.5762, "step": 6526 }, { "epoch": 0.4378039373511755, "grad_norm": 0.15127749371146954, "learning_rate": 2e-05, "loss": 5.2917, "step": 6527 }, { "epoch": 0.43787101318040045, "grad_norm": 0.15370515151762248, "learning_rate": 2e-05, "loss": 5.2111, "step": 6528 }, { "epoch": 0.4379380890096254, "grad_norm": 0.1427975743863519, "learning_rate": 2e-05, "loss": 5.3715, "step": 6529 }, { "epoch": 0.4380051648388503, "grad_norm": 0.1468642762682309, "learning_rate": 2e-05, "loss": 5.4783, "step": 6530 }, { "epoch": 0.43807224066807526, "grad_norm": 0.14758104624157203, "learning_rate": 2e-05, "loss": 5.3457, "step": 6531 }, { "epoch": 0.4381393164973002, "grad_norm": 0.16034040238928493, "learning_rate": 2e-05, "loss": 5.4666, "step": 6532 }, { "epoch": 0.43820639232652514, "grad_norm": 0.1613649878780952, "learning_rate": 2e-05, "loss": 5.5423, "step": 6533 }, { "epoch": 0.4382734681557501, "grad_norm": 0.15086251310094687, "learning_rate": 2e-05, "loss": 5.4065, "step": 6534 }, { "epoch": 0.438340543984975, "grad_norm": 0.14440208073266078, "learning_rate": 2e-05, "loss": 5.3114, "step": 6535 }, { "epoch": 0.43840761981419996, "grad_norm": 0.1477984132465928, "learning_rate": 2e-05, "loss": 5.4468, "step": 6536 }, { "epoch": 0.4384746956434249, "grad_norm": 0.1574625039631118, "learning_rate": 2e-05, "loss": 5.5576, "step": 6537 }, { "epoch": 0.43854177147264983, "grad_norm": 0.14927344219827374, "learning_rate": 2e-05, "loss": 5.4314, "step": 6538 }, { "epoch": 0.43860884730187477, "grad_norm": 0.15343294613825711, "learning_rate": 2e-05, "loss": 5.4566, "step": 6539 }, { "epoch": 0.4386759231310997, "grad_norm": 0.15106177547644953, "learning_rate": 2e-05, "loss": 5.268, "step": 6540 }, { "epoch": 0.43874299896032465, "grad_norm": 0.14839898937449458, "learning_rate": 2e-05, "loss": 5.3612, "step": 6541 }, { "epoch": 0.4388100747895496, "grad_norm": 0.1452975566917359, "learning_rate": 2e-05, "loss": 5.378, "step": 6542 }, { "epoch": 0.4388771506187745, "grad_norm": 0.1528872497564754, "learning_rate": 2e-05, "loss": 5.5507, "step": 6543 }, { "epoch": 0.43894422644799946, "grad_norm": 0.14814977167338475, "learning_rate": 2e-05, "loss": 5.4942, "step": 6544 }, { "epoch": 0.4390113022772244, "grad_norm": 0.15630813117447975, "learning_rate": 2e-05, "loss": 5.4068, "step": 6545 }, { "epoch": 0.43907837810644934, "grad_norm": 0.14504560674121356, "learning_rate": 2e-05, "loss": 5.4526, "step": 6546 }, { "epoch": 0.4391454539356743, "grad_norm": 0.1537792325928432, "learning_rate": 2e-05, "loss": 5.4435, "step": 6547 }, { "epoch": 0.4392125297648992, "grad_norm": 0.14606777762464698, "learning_rate": 2e-05, "loss": 5.39, "step": 6548 }, { "epoch": 0.43927960559412416, "grad_norm": 0.15548643503207374, "learning_rate": 2e-05, "loss": 5.439, "step": 6549 }, { "epoch": 0.4393466814233491, "grad_norm": 0.15103014263965145, "learning_rate": 2e-05, "loss": 5.4321, "step": 6550 }, { "epoch": 0.43941375725257403, "grad_norm": 0.15093954075195562, "learning_rate": 2e-05, "loss": 5.3862, "step": 6551 }, { "epoch": 0.43948083308179897, "grad_norm": 0.15037246276553526, "learning_rate": 2e-05, "loss": 5.3707, "step": 6552 }, { "epoch": 0.4395479089110239, "grad_norm": 0.15247950946008182, "learning_rate": 2e-05, "loss": 5.4235, "step": 6553 }, { "epoch": 0.43961498474024885, "grad_norm": 0.15038146258967283, "learning_rate": 2e-05, "loss": 5.3753, "step": 6554 }, { "epoch": 0.4396820605694738, "grad_norm": 0.14980913944491045, "learning_rate": 2e-05, "loss": 5.3934, "step": 6555 }, { "epoch": 0.4397491363986987, "grad_norm": 0.1491539124173332, "learning_rate": 2e-05, "loss": 5.4644, "step": 6556 }, { "epoch": 0.43981621222792366, "grad_norm": 0.14781749003329037, "learning_rate": 2e-05, "loss": 5.3387, "step": 6557 }, { "epoch": 0.4398832880571486, "grad_norm": 0.1580412696537096, "learning_rate": 2e-05, "loss": 5.5622, "step": 6558 }, { "epoch": 0.43995036388637354, "grad_norm": 0.15187060333007285, "learning_rate": 2e-05, "loss": 5.3755, "step": 6559 }, { "epoch": 0.4400174397155985, "grad_norm": 0.1432836723540337, "learning_rate": 2e-05, "loss": 5.5426, "step": 6560 }, { "epoch": 0.4400845155448234, "grad_norm": 0.15986158637794615, "learning_rate": 2e-05, "loss": 5.3889, "step": 6561 }, { "epoch": 0.44015159137404836, "grad_norm": 0.15896925265072182, "learning_rate": 2e-05, "loss": 5.4009, "step": 6562 }, { "epoch": 0.4402186672032733, "grad_norm": 0.15357300265078128, "learning_rate": 2e-05, "loss": 5.4508, "step": 6563 }, { "epoch": 0.44028574303249823, "grad_norm": 0.152656001905032, "learning_rate": 2e-05, "loss": 5.2975, "step": 6564 }, { "epoch": 0.44035281886172317, "grad_norm": 0.1608679636651188, "learning_rate": 2e-05, "loss": 5.4545, "step": 6565 }, { "epoch": 0.4404198946909481, "grad_norm": 0.152795132149424, "learning_rate": 2e-05, "loss": 5.4073, "step": 6566 }, { "epoch": 0.44048697052017305, "grad_norm": 0.1653384127840587, "learning_rate": 2e-05, "loss": 5.5434, "step": 6567 }, { "epoch": 0.440554046349398, "grad_norm": 0.1539568031670306, "learning_rate": 2e-05, "loss": 5.404, "step": 6568 }, { "epoch": 0.4406211221786229, "grad_norm": 0.15488489013475615, "learning_rate": 2e-05, "loss": 5.4034, "step": 6569 }, { "epoch": 0.44068819800784786, "grad_norm": 0.14798659625317692, "learning_rate": 2e-05, "loss": 5.4393, "step": 6570 }, { "epoch": 0.4407552738370728, "grad_norm": 0.14602038084829724, "learning_rate": 2e-05, "loss": 5.3848, "step": 6571 }, { "epoch": 0.44082234966629774, "grad_norm": 0.15183956923687641, "learning_rate": 2e-05, "loss": 5.4806, "step": 6572 }, { "epoch": 0.4408894254955227, "grad_norm": 0.15177138316041358, "learning_rate": 2e-05, "loss": 5.469, "step": 6573 }, { "epoch": 0.4409565013247476, "grad_norm": 0.14919998202441334, "learning_rate": 2e-05, "loss": 5.4998, "step": 6574 }, { "epoch": 0.44102357715397256, "grad_norm": 0.15182148395386746, "learning_rate": 2e-05, "loss": 5.4422, "step": 6575 }, { "epoch": 0.4410906529831975, "grad_norm": 0.15089528183703504, "learning_rate": 2e-05, "loss": 5.4463, "step": 6576 }, { "epoch": 0.44115772881242243, "grad_norm": 0.1517086275742126, "learning_rate": 2e-05, "loss": 5.4344, "step": 6577 }, { "epoch": 0.4412248046416474, "grad_norm": 0.16172123307532343, "learning_rate": 2e-05, "loss": 5.4671, "step": 6578 }, { "epoch": 0.4412918804708723, "grad_norm": 0.15396792642323331, "learning_rate": 2e-05, "loss": 5.4733, "step": 6579 }, { "epoch": 0.44135895630009725, "grad_norm": 0.15277016028578194, "learning_rate": 2e-05, "loss": 5.357, "step": 6580 }, { "epoch": 0.4414260321293222, "grad_norm": 0.1463870273372572, "learning_rate": 2e-05, "loss": 5.4811, "step": 6581 }, { "epoch": 0.4414931079585471, "grad_norm": 0.15159104671233314, "learning_rate": 2e-05, "loss": 5.3661, "step": 6582 }, { "epoch": 0.44156018378777206, "grad_norm": 0.14567407904338778, "learning_rate": 2e-05, "loss": 5.4093, "step": 6583 }, { "epoch": 0.441627259616997, "grad_norm": 0.15287392129191243, "learning_rate": 2e-05, "loss": 5.4542, "step": 6584 }, { "epoch": 0.44169433544622194, "grad_norm": 0.14788633372542784, "learning_rate": 2e-05, "loss": 5.4876, "step": 6585 }, { "epoch": 0.4417614112754469, "grad_norm": 0.148144864133421, "learning_rate": 2e-05, "loss": 5.6115, "step": 6586 }, { "epoch": 0.4418284871046718, "grad_norm": 0.15444238012652672, "learning_rate": 2e-05, "loss": 5.5204, "step": 6587 }, { "epoch": 0.44189556293389676, "grad_norm": 0.1571012716824601, "learning_rate": 2e-05, "loss": 5.5957, "step": 6588 }, { "epoch": 0.4419626387631217, "grad_norm": 0.14195006591498083, "learning_rate": 2e-05, "loss": 5.4698, "step": 6589 }, { "epoch": 0.44202971459234663, "grad_norm": 0.15175842101533307, "learning_rate": 2e-05, "loss": 5.4294, "step": 6590 }, { "epoch": 0.4420967904215716, "grad_norm": 0.1493847203135792, "learning_rate": 2e-05, "loss": 5.468, "step": 6591 }, { "epoch": 0.4421638662507965, "grad_norm": 0.14766716104716443, "learning_rate": 2e-05, "loss": 5.2206, "step": 6592 }, { "epoch": 0.44223094208002145, "grad_norm": 0.14628343335823263, "learning_rate": 2e-05, "loss": 5.432, "step": 6593 }, { "epoch": 0.4422980179092464, "grad_norm": 0.148206000410834, "learning_rate": 2e-05, "loss": 5.4537, "step": 6594 }, { "epoch": 0.4423650937384713, "grad_norm": 0.1533235544995044, "learning_rate": 2e-05, "loss": 5.3916, "step": 6595 }, { "epoch": 0.44243216956769627, "grad_norm": 0.14423944046304926, "learning_rate": 2e-05, "loss": 5.4586, "step": 6596 }, { "epoch": 0.4424992453969212, "grad_norm": 0.15122814754896363, "learning_rate": 2e-05, "loss": 5.4319, "step": 6597 }, { "epoch": 0.44256632122614614, "grad_norm": 0.15347044874275947, "learning_rate": 2e-05, "loss": 5.4954, "step": 6598 }, { "epoch": 0.4426333970553711, "grad_norm": 0.1552555881488444, "learning_rate": 2e-05, "loss": 5.2656, "step": 6599 }, { "epoch": 0.442700472884596, "grad_norm": 0.14903007994141543, "learning_rate": 2e-05, "loss": 5.3637, "step": 6600 }, { "epoch": 0.44276754871382096, "grad_norm": 0.15301671759935032, "learning_rate": 2e-05, "loss": 5.5157, "step": 6601 }, { "epoch": 0.4428346245430459, "grad_norm": 0.16217529028592492, "learning_rate": 2e-05, "loss": 5.3721, "step": 6602 }, { "epoch": 0.44290170037227083, "grad_norm": 0.1492554611312957, "learning_rate": 2e-05, "loss": 5.4199, "step": 6603 }, { "epoch": 0.4429687762014958, "grad_norm": 0.14724820169561914, "learning_rate": 2e-05, "loss": 5.4923, "step": 6604 }, { "epoch": 0.4430358520307207, "grad_norm": 0.15722310116303093, "learning_rate": 2e-05, "loss": 5.4769, "step": 6605 }, { "epoch": 0.44310292785994565, "grad_norm": 0.15177971493888537, "learning_rate": 2e-05, "loss": 5.4179, "step": 6606 }, { "epoch": 0.4431700036891706, "grad_norm": 0.1486480076807384, "learning_rate": 2e-05, "loss": 5.4301, "step": 6607 }, { "epoch": 0.4432370795183955, "grad_norm": 0.1544667587987344, "learning_rate": 2e-05, "loss": 5.4747, "step": 6608 }, { "epoch": 0.44330415534762047, "grad_norm": 0.14577529758454774, "learning_rate": 2e-05, "loss": 5.4974, "step": 6609 }, { "epoch": 0.4433712311768454, "grad_norm": 0.15151826044760752, "learning_rate": 2e-05, "loss": 5.5422, "step": 6610 }, { "epoch": 0.44343830700607034, "grad_norm": 0.14732449594468477, "learning_rate": 2e-05, "loss": 5.3737, "step": 6611 }, { "epoch": 0.4435053828352953, "grad_norm": 0.1451523064773278, "learning_rate": 2e-05, "loss": 5.5987, "step": 6612 }, { "epoch": 0.4435724586645202, "grad_norm": 0.15571006021049275, "learning_rate": 2e-05, "loss": 5.4156, "step": 6613 }, { "epoch": 0.44363953449374516, "grad_norm": 0.14665291146215828, "learning_rate": 2e-05, "loss": 5.196, "step": 6614 }, { "epoch": 0.4437066103229701, "grad_norm": 0.14406411710919398, "learning_rate": 2e-05, "loss": 5.5187, "step": 6615 }, { "epoch": 0.44377368615219503, "grad_norm": 0.15753104141748633, "learning_rate": 2e-05, "loss": 5.53, "step": 6616 }, { "epoch": 0.44384076198142, "grad_norm": 0.15194025773441955, "learning_rate": 2e-05, "loss": 5.4815, "step": 6617 }, { "epoch": 0.4439078378106449, "grad_norm": 0.1562071577079159, "learning_rate": 2e-05, "loss": 5.3143, "step": 6618 }, { "epoch": 0.44397491363986985, "grad_norm": 0.15128631934668688, "learning_rate": 2e-05, "loss": 5.5328, "step": 6619 }, { "epoch": 0.4440419894690948, "grad_norm": 0.15632108888487933, "learning_rate": 2e-05, "loss": 5.3735, "step": 6620 }, { "epoch": 0.4441090652983197, "grad_norm": 0.15336313212275132, "learning_rate": 2e-05, "loss": 5.3664, "step": 6621 }, { "epoch": 0.44417614112754467, "grad_norm": 0.1561996519035676, "learning_rate": 2e-05, "loss": 5.2853, "step": 6622 }, { "epoch": 0.4442432169567696, "grad_norm": 0.14862237098363446, "learning_rate": 2e-05, "loss": 5.3588, "step": 6623 }, { "epoch": 0.44431029278599454, "grad_norm": 0.15443687795325156, "learning_rate": 2e-05, "loss": 5.5403, "step": 6624 }, { "epoch": 0.4443773686152195, "grad_norm": 0.15622178675636322, "learning_rate": 2e-05, "loss": 5.5337, "step": 6625 }, { "epoch": 0.4444444444444444, "grad_norm": 0.14773188331480097, "learning_rate": 2e-05, "loss": 5.4149, "step": 6626 }, { "epoch": 0.44451152027366936, "grad_norm": 0.14742614398500345, "learning_rate": 2e-05, "loss": 5.5661, "step": 6627 }, { "epoch": 0.4445785961028943, "grad_norm": 0.14851724833893878, "learning_rate": 2e-05, "loss": 5.3201, "step": 6628 }, { "epoch": 0.44464567193211924, "grad_norm": 0.15060657430155902, "learning_rate": 2e-05, "loss": 5.4829, "step": 6629 }, { "epoch": 0.4447127477613442, "grad_norm": 0.14709491409047068, "learning_rate": 2e-05, "loss": 5.3822, "step": 6630 }, { "epoch": 0.4447798235905691, "grad_norm": 0.1459893070829904, "learning_rate": 2e-05, "loss": 5.412, "step": 6631 }, { "epoch": 0.44484689941979405, "grad_norm": 0.14381043355326667, "learning_rate": 2e-05, "loss": 5.4599, "step": 6632 }, { "epoch": 0.444913975249019, "grad_norm": 0.14698452418498303, "learning_rate": 2e-05, "loss": 5.4428, "step": 6633 }, { "epoch": 0.4449810510782439, "grad_norm": 0.14356264084865447, "learning_rate": 2e-05, "loss": 5.2334, "step": 6634 }, { "epoch": 0.44504812690746887, "grad_norm": 0.154447934251591, "learning_rate": 2e-05, "loss": 5.3944, "step": 6635 }, { "epoch": 0.4451152027366938, "grad_norm": 0.15239359236927882, "learning_rate": 2e-05, "loss": 5.327, "step": 6636 }, { "epoch": 0.4451822785659188, "grad_norm": 0.14594201532369264, "learning_rate": 2e-05, "loss": 5.4264, "step": 6637 }, { "epoch": 0.44524935439514374, "grad_norm": 0.1506752930741795, "learning_rate": 2e-05, "loss": 5.3287, "step": 6638 }, { "epoch": 0.4453164302243687, "grad_norm": 0.14816198753858636, "learning_rate": 2e-05, "loss": 5.4084, "step": 6639 }, { "epoch": 0.4453835060535936, "grad_norm": 0.14410547625150685, "learning_rate": 2e-05, "loss": 5.4348, "step": 6640 }, { "epoch": 0.44545058188281855, "grad_norm": 0.14613228580684476, "learning_rate": 2e-05, "loss": 5.4997, "step": 6641 }, { "epoch": 0.4455176577120435, "grad_norm": 0.1481184147095058, "learning_rate": 2e-05, "loss": 5.3997, "step": 6642 }, { "epoch": 0.44558473354126843, "grad_norm": 0.15588938931119636, "learning_rate": 2e-05, "loss": 5.3531, "step": 6643 }, { "epoch": 0.44565180937049337, "grad_norm": 0.149203954360798, "learning_rate": 2e-05, "loss": 5.478, "step": 6644 }, { "epoch": 0.4457188851997183, "grad_norm": 0.1516249028898083, "learning_rate": 2e-05, "loss": 5.5, "step": 6645 }, { "epoch": 0.44578596102894325, "grad_norm": 0.14662033084046575, "learning_rate": 2e-05, "loss": 5.4291, "step": 6646 }, { "epoch": 0.4458530368581682, "grad_norm": 0.144564878101825, "learning_rate": 2e-05, "loss": 5.4444, "step": 6647 }, { "epoch": 0.4459201126873931, "grad_norm": 0.14591729091901104, "learning_rate": 2e-05, "loss": 5.5002, "step": 6648 }, { "epoch": 0.44598718851661806, "grad_norm": 0.1566092777577826, "learning_rate": 2e-05, "loss": 5.3468, "step": 6649 }, { "epoch": 0.446054264345843, "grad_norm": 0.1490968430319641, "learning_rate": 2e-05, "loss": 5.3737, "step": 6650 }, { "epoch": 0.44612134017506794, "grad_norm": 0.1490571968351877, "learning_rate": 2e-05, "loss": 5.3728, "step": 6651 }, { "epoch": 0.4461884160042929, "grad_norm": 0.15226288731932097, "learning_rate": 2e-05, "loss": 5.4961, "step": 6652 }, { "epoch": 0.4462554918335178, "grad_norm": 0.15069540839649254, "learning_rate": 2e-05, "loss": 5.3484, "step": 6653 }, { "epoch": 0.44632256766274275, "grad_norm": 0.15312319666116453, "learning_rate": 2e-05, "loss": 5.5559, "step": 6654 }, { "epoch": 0.4463896434919677, "grad_norm": 0.1482282393806249, "learning_rate": 2e-05, "loss": 5.4519, "step": 6655 }, { "epoch": 0.44645671932119263, "grad_norm": 0.14707636960585865, "learning_rate": 2e-05, "loss": 5.4523, "step": 6656 }, { "epoch": 0.44652379515041757, "grad_norm": 0.16539437872407012, "learning_rate": 2e-05, "loss": 5.4542, "step": 6657 }, { "epoch": 0.4465908709796425, "grad_norm": 0.15399100925060719, "learning_rate": 2e-05, "loss": 5.3626, "step": 6658 }, { "epoch": 0.44665794680886745, "grad_norm": 0.14583619880442414, "learning_rate": 2e-05, "loss": 5.5018, "step": 6659 }, { "epoch": 0.4467250226380924, "grad_norm": 0.15921102018043792, "learning_rate": 2e-05, "loss": 5.4614, "step": 6660 }, { "epoch": 0.4467920984673173, "grad_norm": 0.14617146686480678, "learning_rate": 2e-05, "loss": 5.4322, "step": 6661 }, { "epoch": 0.44685917429654226, "grad_norm": 0.14846827221132186, "learning_rate": 2e-05, "loss": 5.4321, "step": 6662 }, { "epoch": 0.4469262501257672, "grad_norm": 0.14977002561281297, "learning_rate": 2e-05, "loss": 5.2613, "step": 6663 }, { "epoch": 0.44699332595499214, "grad_norm": 0.1533965718609225, "learning_rate": 2e-05, "loss": 5.4142, "step": 6664 }, { "epoch": 0.4470604017842171, "grad_norm": 0.1543869201892017, "learning_rate": 2e-05, "loss": 5.6953, "step": 6665 }, { "epoch": 0.447127477613442, "grad_norm": 0.15190651132875177, "learning_rate": 2e-05, "loss": 5.4241, "step": 6666 }, { "epoch": 0.44719455344266695, "grad_norm": 0.15244172044299725, "learning_rate": 2e-05, "loss": 5.4277, "step": 6667 }, { "epoch": 0.4472616292718919, "grad_norm": 0.14882740896208693, "learning_rate": 2e-05, "loss": 5.6013, "step": 6668 }, { "epoch": 0.44732870510111683, "grad_norm": 0.149972826343418, "learning_rate": 2e-05, "loss": 5.4807, "step": 6669 }, { "epoch": 0.44739578093034177, "grad_norm": 0.15139272906864057, "learning_rate": 2e-05, "loss": 5.5085, "step": 6670 }, { "epoch": 0.4474628567595667, "grad_norm": 0.14986785548922218, "learning_rate": 2e-05, "loss": 5.4266, "step": 6671 }, { "epoch": 0.44752993258879165, "grad_norm": 0.14820078598781897, "learning_rate": 2e-05, "loss": 5.4291, "step": 6672 }, { "epoch": 0.4475970084180166, "grad_norm": 0.15623121689510372, "learning_rate": 2e-05, "loss": 5.4235, "step": 6673 }, { "epoch": 0.4476640842472415, "grad_norm": 0.14960149526230757, "learning_rate": 2e-05, "loss": 5.5171, "step": 6674 }, { "epoch": 0.44773116007646646, "grad_norm": 0.157126873968632, "learning_rate": 2e-05, "loss": 5.3985, "step": 6675 }, { "epoch": 0.4477982359056914, "grad_norm": 0.15025003725263988, "learning_rate": 2e-05, "loss": 5.3939, "step": 6676 }, { "epoch": 0.44786531173491634, "grad_norm": 0.15522741177620403, "learning_rate": 2e-05, "loss": 5.6191, "step": 6677 }, { "epoch": 0.4479323875641413, "grad_norm": 0.1604623257892669, "learning_rate": 2e-05, "loss": 5.3341, "step": 6678 }, { "epoch": 0.4479994633933662, "grad_norm": 0.15533784618387583, "learning_rate": 2e-05, "loss": 5.3595, "step": 6679 }, { "epoch": 0.44806653922259115, "grad_norm": 0.1587687647845349, "learning_rate": 2e-05, "loss": 5.3895, "step": 6680 }, { "epoch": 0.4481336150518161, "grad_norm": 0.15805917726926447, "learning_rate": 2e-05, "loss": 5.3696, "step": 6681 }, { "epoch": 0.44820069088104103, "grad_norm": 0.16132127236066854, "learning_rate": 2e-05, "loss": 5.4513, "step": 6682 }, { "epoch": 0.44826776671026597, "grad_norm": 0.16199038202555024, "learning_rate": 2e-05, "loss": 5.3482, "step": 6683 }, { "epoch": 0.4483348425394909, "grad_norm": 0.15773989836509403, "learning_rate": 2e-05, "loss": 5.4224, "step": 6684 }, { "epoch": 0.44840191836871585, "grad_norm": 0.16537203932462535, "learning_rate": 2e-05, "loss": 5.5128, "step": 6685 }, { "epoch": 0.4484689941979408, "grad_norm": 0.15473641644344394, "learning_rate": 2e-05, "loss": 5.4736, "step": 6686 }, { "epoch": 0.4485360700271657, "grad_norm": 0.15995678927274165, "learning_rate": 2e-05, "loss": 5.3176, "step": 6687 }, { "epoch": 0.44860314585639066, "grad_norm": 0.14992014956088429, "learning_rate": 2e-05, "loss": 5.3749, "step": 6688 }, { "epoch": 0.4486702216856156, "grad_norm": 0.15060858864674606, "learning_rate": 2e-05, "loss": 5.2935, "step": 6689 }, { "epoch": 0.44873729751484054, "grad_norm": 0.14958871032634782, "learning_rate": 2e-05, "loss": 5.365, "step": 6690 }, { "epoch": 0.4488043733440655, "grad_norm": 0.14305401649297447, "learning_rate": 2e-05, "loss": 5.3295, "step": 6691 }, { "epoch": 0.4488714491732904, "grad_norm": 0.1477895959811945, "learning_rate": 2e-05, "loss": 5.4475, "step": 6692 }, { "epoch": 0.44893852500251535, "grad_norm": 0.1524956871448201, "learning_rate": 2e-05, "loss": 5.4578, "step": 6693 }, { "epoch": 0.4490056008317403, "grad_norm": 0.14719777161321448, "learning_rate": 2e-05, "loss": 5.482, "step": 6694 }, { "epoch": 0.44907267666096523, "grad_norm": 0.15556864685203514, "learning_rate": 2e-05, "loss": 5.5769, "step": 6695 }, { "epoch": 0.44913975249019017, "grad_norm": 0.1645124007062323, "learning_rate": 2e-05, "loss": 5.4926, "step": 6696 }, { "epoch": 0.4492068283194151, "grad_norm": 0.1518412650160944, "learning_rate": 2e-05, "loss": 5.3714, "step": 6697 }, { "epoch": 0.44927390414864005, "grad_norm": 0.15197246649475754, "learning_rate": 2e-05, "loss": 5.4547, "step": 6698 }, { "epoch": 0.449340979977865, "grad_norm": 0.1595081676304039, "learning_rate": 2e-05, "loss": 5.3386, "step": 6699 }, { "epoch": 0.4494080558070899, "grad_norm": 0.1526204590875003, "learning_rate": 2e-05, "loss": 5.5002, "step": 6700 }, { "epoch": 0.44947513163631486, "grad_norm": 0.1551256988403826, "learning_rate": 2e-05, "loss": 5.4193, "step": 6701 }, { "epoch": 0.4495422074655398, "grad_norm": 0.14460995143112626, "learning_rate": 2e-05, "loss": 5.4147, "step": 6702 }, { "epoch": 0.44960928329476474, "grad_norm": 0.1458574211779147, "learning_rate": 2e-05, "loss": 5.4886, "step": 6703 }, { "epoch": 0.4496763591239897, "grad_norm": 0.1483865999453952, "learning_rate": 2e-05, "loss": 5.3956, "step": 6704 }, { "epoch": 0.4497434349532146, "grad_norm": 0.15139650193523052, "learning_rate": 2e-05, "loss": 5.4573, "step": 6705 }, { "epoch": 0.44981051078243955, "grad_norm": 0.15623143453998375, "learning_rate": 2e-05, "loss": 5.5624, "step": 6706 }, { "epoch": 0.4498775866116645, "grad_norm": 0.15032086072233322, "learning_rate": 2e-05, "loss": 5.3925, "step": 6707 }, { "epoch": 0.44994466244088943, "grad_norm": 0.15340291619810498, "learning_rate": 2e-05, "loss": 5.4293, "step": 6708 }, { "epoch": 0.45001173827011437, "grad_norm": 0.15540585247498404, "learning_rate": 2e-05, "loss": 5.472, "step": 6709 }, { "epoch": 0.4500788140993393, "grad_norm": 0.15093565922251084, "learning_rate": 2e-05, "loss": 5.4898, "step": 6710 }, { "epoch": 0.45014588992856425, "grad_norm": 0.15303216862799726, "learning_rate": 2e-05, "loss": 5.4442, "step": 6711 }, { "epoch": 0.4502129657577892, "grad_norm": 0.14651228911259123, "learning_rate": 2e-05, "loss": 5.5089, "step": 6712 }, { "epoch": 0.4502800415870141, "grad_norm": 0.14262593862720402, "learning_rate": 2e-05, "loss": 5.4632, "step": 6713 }, { "epoch": 0.45034711741623906, "grad_norm": 0.14605982034033693, "learning_rate": 2e-05, "loss": 5.4776, "step": 6714 }, { "epoch": 0.450414193245464, "grad_norm": 0.14674471624171198, "learning_rate": 2e-05, "loss": 5.4052, "step": 6715 }, { "epoch": 0.45048126907468894, "grad_norm": 0.14475982916311003, "learning_rate": 2e-05, "loss": 5.3348, "step": 6716 }, { "epoch": 0.4505483449039139, "grad_norm": 0.14258355146235574, "learning_rate": 2e-05, "loss": 5.4356, "step": 6717 }, { "epoch": 0.4506154207331388, "grad_norm": 0.15603985907555126, "learning_rate": 2e-05, "loss": 5.498, "step": 6718 }, { "epoch": 0.45068249656236375, "grad_norm": 0.15225058433383828, "learning_rate": 2e-05, "loss": 5.3479, "step": 6719 }, { "epoch": 0.4507495723915887, "grad_norm": 0.15314594114711044, "learning_rate": 2e-05, "loss": 5.3656, "step": 6720 }, { "epoch": 0.45081664822081363, "grad_norm": 0.14940584863351314, "learning_rate": 2e-05, "loss": 5.3748, "step": 6721 }, { "epoch": 0.45088372405003857, "grad_norm": 0.1489712657962272, "learning_rate": 2e-05, "loss": 5.3171, "step": 6722 }, { "epoch": 0.4509507998792635, "grad_norm": 0.14869446175442633, "learning_rate": 2e-05, "loss": 5.3705, "step": 6723 }, { "epoch": 0.45101787570848845, "grad_norm": 0.1665371875529824, "learning_rate": 2e-05, "loss": 5.5524, "step": 6724 }, { "epoch": 0.4510849515377134, "grad_norm": 0.15795641471037727, "learning_rate": 2e-05, "loss": 5.42, "step": 6725 }, { "epoch": 0.4511520273669383, "grad_norm": 0.1484251275736474, "learning_rate": 2e-05, "loss": 5.4447, "step": 6726 }, { "epoch": 0.45121910319616326, "grad_norm": 0.15490746345234377, "learning_rate": 2e-05, "loss": 5.4625, "step": 6727 }, { "epoch": 0.4512861790253882, "grad_norm": 0.15040446963836424, "learning_rate": 2e-05, "loss": 5.4584, "step": 6728 }, { "epoch": 0.45135325485461314, "grad_norm": 0.14891769234703464, "learning_rate": 2e-05, "loss": 5.4338, "step": 6729 }, { "epoch": 0.4514203306838381, "grad_norm": 0.15191405531174987, "learning_rate": 2e-05, "loss": 5.4062, "step": 6730 }, { "epoch": 0.451487406513063, "grad_norm": 0.15469071893892103, "learning_rate": 2e-05, "loss": 5.4065, "step": 6731 }, { "epoch": 0.45155448234228796, "grad_norm": 0.1506459573573634, "learning_rate": 2e-05, "loss": 5.4145, "step": 6732 }, { "epoch": 0.4516215581715129, "grad_norm": 0.15841825134238746, "learning_rate": 2e-05, "loss": 5.4135, "step": 6733 }, { "epoch": 0.45168863400073783, "grad_norm": 0.15244260472909607, "learning_rate": 2e-05, "loss": 5.427, "step": 6734 }, { "epoch": 0.45175570982996277, "grad_norm": 0.15578113130632093, "learning_rate": 2e-05, "loss": 5.5414, "step": 6735 }, { "epoch": 0.4518227856591877, "grad_norm": 0.15184408795210916, "learning_rate": 2e-05, "loss": 5.4309, "step": 6736 }, { "epoch": 0.45188986148841265, "grad_norm": 0.15974802711725725, "learning_rate": 2e-05, "loss": 5.3543, "step": 6737 }, { "epoch": 0.4519569373176376, "grad_norm": 0.1518013073962905, "learning_rate": 2e-05, "loss": 5.3919, "step": 6738 }, { "epoch": 0.4520240131468625, "grad_norm": 0.1506694285647593, "learning_rate": 2e-05, "loss": 5.3523, "step": 6739 }, { "epoch": 0.45209108897608746, "grad_norm": 0.15241078006350828, "learning_rate": 2e-05, "loss": 5.5658, "step": 6740 }, { "epoch": 0.4521581648053124, "grad_norm": 0.15886447918685906, "learning_rate": 2e-05, "loss": 5.4841, "step": 6741 }, { "epoch": 0.45222524063453734, "grad_norm": 0.14747066942844014, "learning_rate": 2e-05, "loss": 5.4412, "step": 6742 }, { "epoch": 0.4522923164637623, "grad_norm": 0.1441903088604782, "learning_rate": 2e-05, "loss": 5.4629, "step": 6743 }, { "epoch": 0.4523593922929872, "grad_norm": 0.15416502428577003, "learning_rate": 2e-05, "loss": 5.431, "step": 6744 }, { "epoch": 0.45242646812221216, "grad_norm": 0.15596868958194618, "learning_rate": 2e-05, "loss": 5.3459, "step": 6745 }, { "epoch": 0.4524935439514371, "grad_norm": 0.14449522692098185, "learning_rate": 2e-05, "loss": 5.2653, "step": 6746 }, { "epoch": 0.45256061978066203, "grad_norm": 0.1436775697201396, "learning_rate": 2e-05, "loss": 5.497, "step": 6747 }, { "epoch": 0.45262769560988697, "grad_norm": 0.15880022317882891, "learning_rate": 2e-05, "loss": 5.5393, "step": 6748 }, { "epoch": 0.4526947714391119, "grad_norm": 0.14361066667199573, "learning_rate": 2e-05, "loss": 5.4315, "step": 6749 }, { "epoch": 0.45276184726833685, "grad_norm": 0.14894503374532814, "learning_rate": 2e-05, "loss": 5.4768, "step": 6750 }, { "epoch": 0.4528289230975618, "grad_norm": 0.14499630710687358, "learning_rate": 2e-05, "loss": 5.5352, "step": 6751 }, { "epoch": 0.4528959989267867, "grad_norm": 0.1493419188654647, "learning_rate": 2e-05, "loss": 5.4632, "step": 6752 }, { "epoch": 0.45296307475601166, "grad_norm": 0.14700421925374885, "learning_rate": 2e-05, "loss": 5.4791, "step": 6753 }, { "epoch": 0.4530301505852366, "grad_norm": 0.15355700882945647, "learning_rate": 2e-05, "loss": 5.4685, "step": 6754 }, { "epoch": 0.45309722641446154, "grad_norm": 0.15006343098444855, "learning_rate": 2e-05, "loss": 5.2803, "step": 6755 }, { "epoch": 0.4531643022436865, "grad_norm": 0.15568878795923285, "learning_rate": 2e-05, "loss": 5.6067, "step": 6756 }, { "epoch": 0.4532313780729114, "grad_norm": 0.15151165694055962, "learning_rate": 2e-05, "loss": 5.4049, "step": 6757 }, { "epoch": 0.45329845390213636, "grad_norm": 0.1490430211160049, "learning_rate": 2e-05, "loss": 5.3991, "step": 6758 }, { "epoch": 0.4533655297313613, "grad_norm": 0.1473519034352782, "learning_rate": 2e-05, "loss": 5.3893, "step": 6759 }, { "epoch": 0.45343260556058623, "grad_norm": 0.15555970069085348, "learning_rate": 2e-05, "loss": 5.3927, "step": 6760 }, { "epoch": 0.45349968138981117, "grad_norm": 0.14868522304892845, "learning_rate": 2e-05, "loss": 5.5196, "step": 6761 }, { "epoch": 0.4535667572190361, "grad_norm": 0.1453499895823372, "learning_rate": 2e-05, "loss": 5.2662, "step": 6762 }, { "epoch": 0.45363383304826105, "grad_norm": 0.15195613885814688, "learning_rate": 2e-05, "loss": 5.5273, "step": 6763 }, { "epoch": 0.453700908877486, "grad_norm": 0.1572684460117502, "learning_rate": 2e-05, "loss": 5.3928, "step": 6764 }, { "epoch": 0.4537679847067109, "grad_norm": 0.14908801833054158, "learning_rate": 2e-05, "loss": 5.6312, "step": 6765 }, { "epoch": 0.45383506053593586, "grad_norm": 0.15087374879702167, "learning_rate": 2e-05, "loss": 5.4087, "step": 6766 }, { "epoch": 0.4539021363651608, "grad_norm": 0.14241983601471628, "learning_rate": 2e-05, "loss": 5.5734, "step": 6767 }, { "epoch": 0.45396921219438574, "grad_norm": 0.1495772741834931, "learning_rate": 2e-05, "loss": 5.3459, "step": 6768 }, { "epoch": 0.4540362880236107, "grad_norm": 0.14309825889399053, "learning_rate": 2e-05, "loss": 5.4752, "step": 6769 }, { "epoch": 0.4541033638528356, "grad_norm": 0.1471018627262288, "learning_rate": 2e-05, "loss": 5.252, "step": 6770 }, { "epoch": 0.45417043968206056, "grad_norm": 0.14697599348999096, "learning_rate": 2e-05, "loss": 5.4274, "step": 6771 }, { "epoch": 0.4542375155112855, "grad_norm": 0.1468864888267532, "learning_rate": 2e-05, "loss": 5.3886, "step": 6772 }, { "epoch": 0.45430459134051043, "grad_norm": 0.14896204709292452, "learning_rate": 2e-05, "loss": 5.5465, "step": 6773 }, { "epoch": 0.45437166716973537, "grad_norm": 0.1416643042434383, "learning_rate": 2e-05, "loss": 5.3848, "step": 6774 }, { "epoch": 0.4544387429989603, "grad_norm": 0.14482135846457422, "learning_rate": 2e-05, "loss": 5.4253, "step": 6775 }, { "epoch": 0.45450581882818525, "grad_norm": 0.14889897420253156, "learning_rate": 2e-05, "loss": 5.3665, "step": 6776 }, { "epoch": 0.4545728946574102, "grad_norm": 0.14840040106952188, "learning_rate": 2e-05, "loss": 5.4337, "step": 6777 }, { "epoch": 0.4546399704866351, "grad_norm": 0.15272561991420458, "learning_rate": 2e-05, "loss": 5.4809, "step": 6778 }, { "epoch": 0.45470704631586006, "grad_norm": 0.16162495644330718, "learning_rate": 2e-05, "loss": 5.4434, "step": 6779 }, { "epoch": 0.454774122145085, "grad_norm": 0.14529100969601305, "learning_rate": 2e-05, "loss": 5.3613, "step": 6780 }, { "epoch": 0.45484119797430994, "grad_norm": 0.14945675857808685, "learning_rate": 2e-05, "loss": 5.4117, "step": 6781 }, { "epoch": 0.4549082738035349, "grad_norm": 0.15159926655540495, "learning_rate": 2e-05, "loss": 5.4658, "step": 6782 }, { "epoch": 0.4549753496327598, "grad_norm": 0.153422925138813, "learning_rate": 2e-05, "loss": 5.3999, "step": 6783 }, { "epoch": 0.45504242546198476, "grad_norm": 0.15508182220241562, "learning_rate": 2e-05, "loss": 5.4197, "step": 6784 }, { "epoch": 0.4551095012912097, "grad_norm": 0.15153719394643492, "learning_rate": 2e-05, "loss": 5.3045, "step": 6785 }, { "epoch": 0.45517657712043463, "grad_norm": 0.15580567264626596, "learning_rate": 2e-05, "loss": 5.4171, "step": 6786 }, { "epoch": 0.45524365294965957, "grad_norm": 0.1473647873169069, "learning_rate": 2e-05, "loss": 5.319, "step": 6787 }, { "epoch": 0.4553107287788845, "grad_norm": 0.15272026070819308, "learning_rate": 2e-05, "loss": 5.4639, "step": 6788 }, { "epoch": 0.45537780460810945, "grad_norm": 0.153414290462464, "learning_rate": 2e-05, "loss": 5.3883, "step": 6789 }, { "epoch": 0.4554448804373344, "grad_norm": 0.15090595890991929, "learning_rate": 2e-05, "loss": 5.3126, "step": 6790 }, { "epoch": 0.4555119562665593, "grad_norm": 0.15281037121437713, "learning_rate": 2e-05, "loss": 5.3654, "step": 6791 }, { "epoch": 0.45557903209578426, "grad_norm": 0.1486893429212238, "learning_rate": 2e-05, "loss": 5.3925, "step": 6792 }, { "epoch": 0.4556461079250092, "grad_norm": 0.15197232490173232, "learning_rate": 2e-05, "loss": 5.3909, "step": 6793 }, { "epoch": 0.45571318375423414, "grad_norm": 0.14574549099874146, "learning_rate": 2e-05, "loss": 5.2675, "step": 6794 }, { "epoch": 0.4557802595834591, "grad_norm": 0.140932002100928, "learning_rate": 2e-05, "loss": 5.4156, "step": 6795 }, { "epoch": 0.455847335412684, "grad_norm": 0.1497684036686364, "learning_rate": 2e-05, "loss": 5.5309, "step": 6796 }, { "epoch": 0.45591441124190896, "grad_norm": 0.1504370141736865, "learning_rate": 2e-05, "loss": 5.419, "step": 6797 }, { "epoch": 0.4559814870711339, "grad_norm": 0.1481266171180037, "learning_rate": 2e-05, "loss": 5.5354, "step": 6798 }, { "epoch": 0.45604856290035883, "grad_norm": 0.15065593803871136, "learning_rate": 2e-05, "loss": 5.5571, "step": 6799 }, { "epoch": 0.45611563872958377, "grad_norm": 0.1427567008983785, "learning_rate": 2e-05, "loss": 5.5115, "step": 6800 }, { "epoch": 0.4561827145588087, "grad_norm": 0.14413870741633147, "learning_rate": 2e-05, "loss": 5.298, "step": 6801 }, { "epoch": 0.45624979038803365, "grad_norm": 0.14613927866435575, "learning_rate": 2e-05, "loss": 5.5427, "step": 6802 }, { "epoch": 0.4563168662172586, "grad_norm": 0.1572618437385827, "learning_rate": 2e-05, "loss": 5.4561, "step": 6803 }, { "epoch": 0.4563839420464835, "grad_norm": 0.15392001653236845, "learning_rate": 2e-05, "loss": 5.4402, "step": 6804 }, { "epoch": 0.45645101787570846, "grad_norm": 0.15513710660861638, "learning_rate": 2e-05, "loss": 5.5121, "step": 6805 }, { "epoch": 0.4565180937049334, "grad_norm": 0.1531837537968394, "learning_rate": 2e-05, "loss": 5.4079, "step": 6806 }, { "epoch": 0.45658516953415834, "grad_norm": 0.14549312799348676, "learning_rate": 2e-05, "loss": 5.4178, "step": 6807 }, { "epoch": 0.4566522453633833, "grad_norm": 0.1515604646861352, "learning_rate": 2e-05, "loss": 5.5358, "step": 6808 }, { "epoch": 0.4567193211926082, "grad_norm": 0.15554387475075712, "learning_rate": 2e-05, "loss": 5.4377, "step": 6809 }, { "epoch": 0.45678639702183316, "grad_norm": 0.14248033371355506, "learning_rate": 2e-05, "loss": 5.4612, "step": 6810 }, { "epoch": 0.4568534728510581, "grad_norm": 0.14552718053370287, "learning_rate": 2e-05, "loss": 5.4726, "step": 6811 }, { "epoch": 0.45692054868028303, "grad_norm": 0.1584713522793728, "learning_rate": 2e-05, "loss": 5.4128, "step": 6812 }, { "epoch": 0.456987624509508, "grad_norm": 0.15189221423506102, "learning_rate": 2e-05, "loss": 5.3784, "step": 6813 }, { "epoch": 0.4570547003387329, "grad_norm": 0.15098210163678075, "learning_rate": 2e-05, "loss": 5.5663, "step": 6814 }, { "epoch": 0.45712177616795785, "grad_norm": 0.14998442816389243, "learning_rate": 2e-05, "loss": 5.412, "step": 6815 }, { "epoch": 0.4571888519971828, "grad_norm": 0.15148875580852159, "learning_rate": 2e-05, "loss": 5.4968, "step": 6816 }, { "epoch": 0.4572559278264077, "grad_norm": 0.1488532936339803, "learning_rate": 2e-05, "loss": 5.3634, "step": 6817 }, { "epoch": 0.4573230036556327, "grad_norm": 0.14864038668991458, "learning_rate": 2e-05, "loss": 5.443, "step": 6818 }, { "epoch": 0.45739007948485766, "grad_norm": 0.14905748875340388, "learning_rate": 2e-05, "loss": 5.3715, "step": 6819 }, { "epoch": 0.4574571553140826, "grad_norm": 0.1542489156970249, "learning_rate": 2e-05, "loss": 5.2481, "step": 6820 }, { "epoch": 0.45752423114330754, "grad_norm": 0.14465655347565348, "learning_rate": 2e-05, "loss": 5.4645, "step": 6821 }, { "epoch": 0.4575913069725325, "grad_norm": 0.14383478993836624, "learning_rate": 2e-05, "loss": 5.3609, "step": 6822 }, { "epoch": 0.4576583828017574, "grad_norm": 0.14920575818076662, "learning_rate": 2e-05, "loss": 5.5277, "step": 6823 }, { "epoch": 0.45772545863098235, "grad_norm": 0.15398943892602626, "learning_rate": 2e-05, "loss": 5.443, "step": 6824 }, { "epoch": 0.4577925344602073, "grad_norm": 0.15903410715419328, "learning_rate": 2e-05, "loss": 5.4196, "step": 6825 }, { "epoch": 0.45785961028943223, "grad_norm": 0.14944428611257876, "learning_rate": 2e-05, "loss": 5.4037, "step": 6826 }, { "epoch": 0.45792668611865717, "grad_norm": 0.14609668423029729, "learning_rate": 2e-05, "loss": 5.4424, "step": 6827 }, { "epoch": 0.4579937619478821, "grad_norm": 0.1490239788627957, "learning_rate": 2e-05, "loss": 5.4466, "step": 6828 }, { "epoch": 0.45806083777710704, "grad_norm": 0.1560167274124761, "learning_rate": 2e-05, "loss": 5.3888, "step": 6829 }, { "epoch": 0.458127913606332, "grad_norm": 0.1484792875021223, "learning_rate": 2e-05, "loss": 5.4563, "step": 6830 }, { "epoch": 0.4581949894355569, "grad_norm": 0.14644493131324363, "learning_rate": 2e-05, "loss": 5.4493, "step": 6831 }, { "epoch": 0.45826206526478186, "grad_norm": 0.14963182525052476, "learning_rate": 2e-05, "loss": 5.5202, "step": 6832 }, { "epoch": 0.4583291410940068, "grad_norm": 0.1492016315959773, "learning_rate": 2e-05, "loss": 5.4887, "step": 6833 }, { "epoch": 0.45839621692323174, "grad_norm": 0.1606279012582248, "learning_rate": 2e-05, "loss": 5.4685, "step": 6834 }, { "epoch": 0.4584632927524567, "grad_norm": 0.15341594066844494, "learning_rate": 2e-05, "loss": 5.4207, "step": 6835 }, { "epoch": 0.4585303685816816, "grad_norm": 0.1486740356555224, "learning_rate": 2e-05, "loss": 5.5475, "step": 6836 }, { "epoch": 0.45859744441090655, "grad_norm": 0.14941081289396368, "learning_rate": 2e-05, "loss": 5.4845, "step": 6837 }, { "epoch": 0.4586645202401315, "grad_norm": 0.1437072389361915, "learning_rate": 2e-05, "loss": 5.3068, "step": 6838 }, { "epoch": 0.45873159606935643, "grad_norm": 0.16230741934824738, "learning_rate": 2e-05, "loss": 5.3965, "step": 6839 }, { "epoch": 0.45879867189858137, "grad_norm": 0.1509260401817456, "learning_rate": 2e-05, "loss": 5.2249, "step": 6840 }, { "epoch": 0.4588657477278063, "grad_norm": 0.1480138557745186, "learning_rate": 2e-05, "loss": 5.5437, "step": 6841 }, { "epoch": 0.45893282355703124, "grad_norm": 0.1573583391060196, "learning_rate": 2e-05, "loss": 5.5945, "step": 6842 }, { "epoch": 0.4589998993862562, "grad_norm": 0.1563945714113123, "learning_rate": 2e-05, "loss": 5.2916, "step": 6843 }, { "epoch": 0.4590669752154811, "grad_norm": 0.15197606726096863, "learning_rate": 2e-05, "loss": 5.3326, "step": 6844 }, { "epoch": 0.45913405104470606, "grad_norm": 0.1475515685179669, "learning_rate": 2e-05, "loss": 5.5122, "step": 6845 }, { "epoch": 0.459201126873931, "grad_norm": 0.1586890013726817, "learning_rate": 2e-05, "loss": 5.3445, "step": 6846 }, { "epoch": 0.45926820270315594, "grad_norm": 0.16522729500786631, "learning_rate": 2e-05, "loss": 5.556, "step": 6847 }, { "epoch": 0.4593352785323809, "grad_norm": 0.15332365514919233, "learning_rate": 2e-05, "loss": 5.4569, "step": 6848 }, { "epoch": 0.4594023543616058, "grad_norm": 0.14468835222295723, "learning_rate": 2e-05, "loss": 5.1447, "step": 6849 }, { "epoch": 0.45946943019083075, "grad_norm": 0.1633879437538502, "learning_rate": 2e-05, "loss": 5.4097, "step": 6850 }, { "epoch": 0.4595365060200557, "grad_norm": 0.16367654628024456, "learning_rate": 2e-05, "loss": 5.5638, "step": 6851 }, { "epoch": 0.45960358184928063, "grad_norm": 0.14792256133534445, "learning_rate": 2e-05, "loss": 5.5333, "step": 6852 }, { "epoch": 0.45967065767850557, "grad_norm": 0.155761577735702, "learning_rate": 2e-05, "loss": 5.4167, "step": 6853 }, { "epoch": 0.4597377335077305, "grad_norm": 0.17104952415285546, "learning_rate": 2e-05, "loss": 5.2985, "step": 6854 }, { "epoch": 0.45980480933695544, "grad_norm": 0.16979817934350178, "learning_rate": 2e-05, "loss": 5.4223, "step": 6855 }, { "epoch": 0.4598718851661804, "grad_norm": 0.15138516126144502, "learning_rate": 2e-05, "loss": 5.478, "step": 6856 }, { "epoch": 0.4599389609954053, "grad_norm": 0.17166294857898953, "learning_rate": 2e-05, "loss": 5.3935, "step": 6857 }, { "epoch": 0.46000603682463026, "grad_norm": 0.16126027210637386, "learning_rate": 2e-05, "loss": 5.4556, "step": 6858 }, { "epoch": 0.4600731126538552, "grad_norm": 0.16306187911632683, "learning_rate": 2e-05, "loss": 5.3972, "step": 6859 }, { "epoch": 0.46014018848308014, "grad_norm": 0.1540449653355481, "learning_rate": 2e-05, "loss": 5.4879, "step": 6860 }, { "epoch": 0.4602072643123051, "grad_norm": 0.15980252006265108, "learning_rate": 2e-05, "loss": 5.3491, "step": 6861 }, { "epoch": 0.46027434014153, "grad_norm": 0.15988193402297754, "learning_rate": 2e-05, "loss": 5.5026, "step": 6862 }, { "epoch": 0.46034141597075495, "grad_norm": 0.16255255547307942, "learning_rate": 2e-05, "loss": 5.4825, "step": 6863 }, { "epoch": 0.4604084917999799, "grad_norm": 0.15395051994633366, "learning_rate": 2e-05, "loss": 5.4932, "step": 6864 }, { "epoch": 0.46047556762920483, "grad_norm": 0.15954079484248598, "learning_rate": 2e-05, "loss": 5.4023, "step": 6865 }, { "epoch": 0.46054264345842977, "grad_norm": 0.15368879107035383, "learning_rate": 2e-05, "loss": 5.4606, "step": 6866 }, { "epoch": 0.4606097192876547, "grad_norm": 0.1596556019262668, "learning_rate": 2e-05, "loss": 5.5666, "step": 6867 }, { "epoch": 0.46067679511687964, "grad_norm": 0.15638060299038178, "learning_rate": 2e-05, "loss": 5.3795, "step": 6868 }, { "epoch": 0.4607438709461046, "grad_norm": 0.1543171123634257, "learning_rate": 2e-05, "loss": 5.4698, "step": 6869 }, { "epoch": 0.4608109467753295, "grad_norm": 0.1620174065511477, "learning_rate": 2e-05, "loss": 5.4394, "step": 6870 }, { "epoch": 0.46087802260455446, "grad_norm": 0.15923071950255707, "learning_rate": 2e-05, "loss": 5.383, "step": 6871 }, { "epoch": 0.4609450984337794, "grad_norm": 0.1560591044547252, "learning_rate": 2e-05, "loss": 5.3896, "step": 6872 }, { "epoch": 0.46101217426300434, "grad_norm": 0.16056990164153642, "learning_rate": 2e-05, "loss": 5.2891, "step": 6873 }, { "epoch": 0.4610792500922293, "grad_norm": 0.15345297141644507, "learning_rate": 2e-05, "loss": 5.4526, "step": 6874 }, { "epoch": 0.4611463259214542, "grad_norm": 0.15042588598379872, "learning_rate": 2e-05, "loss": 5.5809, "step": 6875 }, { "epoch": 0.46121340175067915, "grad_norm": 0.15313454691390052, "learning_rate": 2e-05, "loss": 5.3473, "step": 6876 }, { "epoch": 0.4612804775799041, "grad_norm": 0.15215817610877422, "learning_rate": 2e-05, "loss": 5.447, "step": 6877 }, { "epoch": 0.46134755340912903, "grad_norm": 0.14576484478761279, "learning_rate": 2e-05, "loss": 5.4818, "step": 6878 }, { "epoch": 0.46141462923835397, "grad_norm": 0.15770568272415206, "learning_rate": 2e-05, "loss": 5.3461, "step": 6879 }, { "epoch": 0.4614817050675789, "grad_norm": 0.15302481145260463, "learning_rate": 2e-05, "loss": 5.4615, "step": 6880 }, { "epoch": 0.46154878089680385, "grad_norm": 0.16030093390564054, "learning_rate": 2e-05, "loss": 5.4299, "step": 6881 }, { "epoch": 0.4616158567260288, "grad_norm": 0.16396075011348726, "learning_rate": 2e-05, "loss": 5.3917, "step": 6882 }, { "epoch": 0.4616829325552537, "grad_norm": 0.14585112902347835, "learning_rate": 2e-05, "loss": 5.3386, "step": 6883 }, { "epoch": 0.46175000838447866, "grad_norm": 0.14606060603619322, "learning_rate": 2e-05, "loss": 5.4251, "step": 6884 }, { "epoch": 0.4618170842137036, "grad_norm": 0.151404840348132, "learning_rate": 2e-05, "loss": 5.4219, "step": 6885 }, { "epoch": 0.46188416004292854, "grad_norm": 0.15324013379395765, "learning_rate": 2e-05, "loss": 5.3451, "step": 6886 }, { "epoch": 0.4619512358721535, "grad_norm": 0.15543860580247973, "learning_rate": 2e-05, "loss": 5.4014, "step": 6887 }, { "epoch": 0.4620183117013784, "grad_norm": 0.15281911805965062, "learning_rate": 2e-05, "loss": 5.5368, "step": 6888 }, { "epoch": 0.46208538753060335, "grad_norm": 0.14666811658672133, "learning_rate": 2e-05, "loss": 5.5135, "step": 6889 }, { "epoch": 0.4621524633598283, "grad_norm": 0.16017857389825435, "learning_rate": 2e-05, "loss": 5.3202, "step": 6890 }, { "epoch": 0.46221953918905323, "grad_norm": 0.14742991055372673, "learning_rate": 2e-05, "loss": 5.3528, "step": 6891 }, { "epoch": 0.46228661501827817, "grad_norm": 0.1516833967947273, "learning_rate": 2e-05, "loss": 5.3472, "step": 6892 }, { "epoch": 0.4623536908475031, "grad_norm": 0.15554706278451133, "learning_rate": 2e-05, "loss": 5.418, "step": 6893 }, { "epoch": 0.46242076667672805, "grad_norm": 0.15292364999403638, "learning_rate": 2e-05, "loss": 5.5843, "step": 6894 }, { "epoch": 0.462487842505953, "grad_norm": 0.1456045806223181, "learning_rate": 2e-05, "loss": 5.482, "step": 6895 }, { "epoch": 0.4625549183351779, "grad_norm": 0.14744795452139045, "learning_rate": 2e-05, "loss": 5.3165, "step": 6896 }, { "epoch": 0.46262199416440286, "grad_norm": 0.15987486523007366, "learning_rate": 2e-05, "loss": 5.5192, "step": 6897 }, { "epoch": 0.4626890699936278, "grad_norm": 0.14634283140843818, "learning_rate": 2e-05, "loss": 5.4634, "step": 6898 }, { "epoch": 0.46275614582285274, "grad_norm": 0.15461747961660474, "learning_rate": 2e-05, "loss": 5.3416, "step": 6899 }, { "epoch": 0.4628232216520777, "grad_norm": 0.14885018183181015, "learning_rate": 2e-05, "loss": 5.5183, "step": 6900 }, { "epoch": 0.4628902974813026, "grad_norm": 0.15677463588083337, "learning_rate": 2e-05, "loss": 5.4841, "step": 6901 }, { "epoch": 0.46295737331052755, "grad_norm": 0.15884081096055716, "learning_rate": 2e-05, "loss": 5.4438, "step": 6902 }, { "epoch": 0.4630244491397525, "grad_norm": 0.15696451354859234, "learning_rate": 2e-05, "loss": 5.5286, "step": 6903 }, { "epoch": 0.46309152496897743, "grad_norm": 0.15193108496692742, "learning_rate": 2e-05, "loss": 5.4049, "step": 6904 }, { "epoch": 0.46315860079820237, "grad_norm": 0.15623535823596021, "learning_rate": 2e-05, "loss": 5.2706, "step": 6905 }, { "epoch": 0.4632256766274273, "grad_norm": 0.15077800595437188, "learning_rate": 2e-05, "loss": 5.377, "step": 6906 }, { "epoch": 0.46329275245665225, "grad_norm": 0.14441539537996173, "learning_rate": 2e-05, "loss": 5.3369, "step": 6907 }, { "epoch": 0.4633598282858772, "grad_norm": 0.15836694628089862, "learning_rate": 2e-05, "loss": 5.5044, "step": 6908 }, { "epoch": 0.4634269041151021, "grad_norm": 0.15390534497418698, "learning_rate": 2e-05, "loss": 5.383, "step": 6909 }, { "epoch": 0.46349397994432706, "grad_norm": 0.1487593422830048, "learning_rate": 2e-05, "loss": 5.4491, "step": 6910 }, { "epoch": 0.463561055773552, "grad_norm": 0.14991572565228262, "learning_rate": 2e-05, "loss": 5.515, "step": 6911 }, { "epoch": 0.46362813160277694, "grad_norm": 0.16462358333259483, "learning_rate": 2e-05, "loss": 5.3525, "step": 6912 }, { "epoch": 0.4636952074320019, "grad_norm": 0.15021014157892695, "learning_rate": 2e-05, "loss": 5.4491, "step": 6913 }, { "epoch": 0.4637622832612268, "grad_norm": 0.1503320038624078, "learning_rate": 2e-05, "loss": 5.3151, "step": 6914 }, { "epoch": 0.46382935909045175, "grad_norm": 0.15281805741785356, "learning_rate": 2e-05, "loss": 5.5246, "step": 6915 }, { "epoch": 0.4638964349196767, "grad_norm": 0.14528153041482786, "learning_rate": 2e-05, "loss": 5.4037, "step": 6916 }, { "epoch": 0.46396351074890163, "grad_norm": 0.14586281185449368, "learning_rate": 2e-05, "loss": 5.4463, "step": 6917 }, { "epoch": 0.46403058657812657, "grad_norm": 0.15134114694695414, "learning_rate": 2e-05, "loss": 5.5017, "step": 6918 }, { "epoch": 0.4640976624073515, "grad_norm": 0.14987264986425503, "learning_rate": 2e-05, "loss": 5.36, "step": 6919 }, { "epoch": 0.46416473823657645, "grad_norm": 0.150838808903471, "learning_rate": 2e-05, "loss": 5.4782, "step": 6920 }, { "epoch": 0.4642318140658014, "grad_norm": 0.16607860069060745, "learning_rate": 2e-05, "loss": 5.3559, "step": 6921 }, { "epoch": 0.4642988898950263, "grad_norm": 0.15087429925567544, "learning_rate": 2e-05, "loss": 5.3145, "step": 6922 }, { "epoch": 0.46436596572425126, "grad_norm": 0.1498953318541709, "learning_rate": 2e-05, "loss": 5.5187, "step": 6923 }, { "epoch": 0.4644330415534762, "grad_norm": 0.15463250418527355, "learning_rate": 2e-05, "loss": 5.2846, "step": 6924 }, { "epoch": 0.46450011738270114, "grad_norm": 0.15255499592922106, "learning_rate": 2e-05, "loss": 5.4978, "step": 6925 }, { "epoch": 0.4645671932119261, "grad_norm": 0.1493142687296615, "learning_rate": 2e-05, "loss": 5.4225, "step": 6926 }, { "epoch": 0.464634269041151, "grad_norm": 0.15243735812981327, "learning_rate": 2e-05, "loss": 5.4747, "step": 6927 }, { "epoch": 0.46470134487037595, "grad_norm": 0.15202042563306756, "learning_rate": 2e-05, "loss": 5.5682, "step": 6928 }, { "epoch": 0.4647684206996009, "grad_norm": 0.15083950388730794, "learning_rate": 2e-05, "loss": 5.4143, "step": 6929 }, { "epoch": 0.46483549652882583, "grad_norm": 0.15756573244834723, "learning_rate": 2e-05, "loss": 5.3373, "step": 6930 }, { "epoch": 0.46490257235805077, "grad_norm": 0.14565159462507912, "learning_rate": 2e-05, "loss": 5.365, "step": 6931 }, { "epoch": 0.4649696481872757, "grad_norm": 0.15538131716712208, "learning_rate": 2e-05, "loss": 5.4973, "step": 6932 }, { "epoch": 0.46503672401650065, "grad_norm": 0.14658396990056732, "learning_rate": 2e-05, "loss": 5.3672, "step": 6933 }, { "epoch": 0.4651037998457256, "grad_norm": 0.14895246555611186, "learning_rate": 2e-05, "loss": 5.5306, "step": 6934 }, { "epoch": 0.4651708756749505, "grad_norm": 0.14576682536311208, "learning_rate": 2e-05, "loss": 5.4516, "step": 6935 }, { "epoch": 0.46523795150417546, "grad_norm": 0.15044826716769136, "learning_rate": 2e-05, "loss": 5.352, "step": 6936 }, { "epoch": 0.4653050273334004, "grad_norm": 0.1499369976870163, "learning_rate": 2e-05, "loss": 5.4599, "step": 6937 }, { "epoch": 0.46537210316262534, "grad_norm": 0.14619066504401848, "learning_rate": 2e-05, "loss": 5.4802, "step": 6938 }, { "epoch": 0.4654391789918503, "grad_norm": 0.14499564909946372, "learning_rate": 2e-05, "loss": 5.4248, "step": 6939 }, { "epoch": 0.4655062548210752, "grad_norm": 0.1450752512136105, "learning_rate": 2e-05, "loss": 5.428, "step": 6940 }, { "epoch": 0.46557333065030015, "grad_norm": 0.1521223880857344, "learning_rate": 2e-05, "loss": 5.3318, "step": 6941 }, { "epoch": 0.4656404064795251, "grad_norm": 0.14509791375908196, "learning_rate": 2e-05, "loss": 5.5075, "step": 6942 }, { "epoch": 0.46570748230875003, "grad_norm": 0.15544054107190958, "learning_rate": 2e-05, "loss": 5.4559, "step": 6943 }, { "epoch": 0.46577455813797497, "grad_norm": 0.14575167730400948, "learning_rate": 2e-05, "loss": 5.3916, "step": 6944 }, { "epoch": 0.4658416339671999, "grad_norm": 0.14807596060419498, "learning_rate": 2e-05, "loss": 5.4131, "step": 6945 }, { "epoch": 0.46590870979642485, "grad_norm": 0.14322391638655688, "learning_rate": 2e-05, "loss": 5.3715, "step": 6946 }, { "epoch": 0.4659757856256498, "grad_norm": 0.15063873837732356, "learning_rate": 2e-05, "loss": 5.5099, "step": 6947 }, { "epoch": 0.4660428614548747, "grad_norm": 0.15105829587822217, "learning_rate": 2e-05, "loss": 5.2705, "step": 6948 }, { "epoch": 0.46610993728409966, "grad_norm": 0.14459596212435813, "learning_rate": 2e-05, "loss": 5.3792, "step": 6949 }, { "epoch": 0.4661770131133246, "grad_norm": 0.16080605490714095, "learning_rate": 2e-05, "loss": 5.4508, "step": 6950 }, { "epoch": 0.46624408894254954, "grad_norm": 0.16460932887660304, "learning_rate": 2e-05, "loss": 5.3322, "step": 6951 }, { "epoch": 0.4663111647717745, "grad_norm": 0.15574547189581986, "learning_rate": 2e-05, "loss": 5.4011, "step": 6952 }, { "epoch": 0.4663782406009994, "grad_norm": 0.1577607376432456, "learning_rate": 2e-05, "loss": 5.4805, "step": 6953 }, { "epoch": 0.46644531643022435, "grad_norm": 0.17555296342978122, "learning_rate": 2e-05, "loss": 5.2703, "step": 6954 }, { "epoch": 0.4665123922594493, "grad_norm": 0.16838851330235774, "learning_rate": 2e-05, "loss": 5.2932, "step": 6955 }, { "epoch": 0.46657946808867423, "grad_norm": 0.15901917232692345, "learning_rate": 2e-05, "loss": 5.5105, "step": 6956 }, { "epoch": 0.46664654391789917, "grad_norm": 0.16861780933168086, "learning_rate": 2e-05, "loss": 5.3871, "step": 6957 }, { "epoch": 0.4667136197471241, "grad_norm": 0.16326064206530153, "learning_rate": 2e-05, "loss": 5.4557, "step": 6958 }, { "epoch": 0.46678069557634905, "grad_norm": 0.1633761599671977, "learning_rate": 2e-05, "loss": 5.3838, "step": 6959 }, { "epoch": 0.466847771405574, "grad_norm": 0.1679975380080666, "learning_rate": 2e-05, "loss": 5.3934, "step": 6960 }, { "epoch": 0.4669148472347989, "grad_norm": 0.158663602604193, "learning_rate": 2e-05, "loss": 5.4511, "step": 6961 }, { "epoch": 0.46698192306402386, "grad_norm": 0.15678210495018338, "learning_rate": 2e-05, "loss": 5.388, "step": 6962 }, { "epoch": 0.4670489988932488, "grad_norm": 0.17086768274088696, "learning_rate": 2e-05, "loss": 5.3663, "step": 6963 }, { "epoch": 0.46711607472247374, "grad_norm": 0.162827416041489, "learning_rate": 2e-05, "loss": 5.4393, "step": 6964 }, { "epoch": 0.4671831505516987, "grad_norm": 0.1550178347049582, "learning_rate": 2e-05, "loss": 5.4802, "step": 6965 }, { "epoch": 0.4672502263809236, "grad_norm": 0.15052288379174217, "learning_rate": 2e-05, "loss": 5.4163, "step": 6966 }, { "epoch": 0.46731730221014856, "grad_norm": 0.1643149266747628, "learning_rate": 2e-05, "loss": 5.4085, "step": 6967 }, { "epoch": 0.4673843780393735, "grad_norm": 0.15428797244819617, "learning_rate": 2e-05, "loss": 5.4965, "step": 6968 }, { "epoch": 0.46745145386859843, "grad_norm": 0.15680098529623573, "learning_rate": 2e-05, "loss": 5.4042, "step": 6969 }, { "epoch": 0.46751852969782337, "grad_norm": 0.15456396020306393, "learning_rate": 2e-05, "loss": 5.4657, "step": 6970 }, { "epoch": 0.4675856055270483, "grad_norm": 0.15908333199389801, "learning_rate": 2e-05, "loss": 5.4843, "step": 6971 }, { "epoch": 0.46765268135627325, "grad_norm": 0.15096196002979345, "learning_rate": 2e-05, "loss": 5.477, "step": 6972 }, { "epoch": 0.4677197571854982, "grad_norm": 0.14517318762685819, "learning_rate": 2e-05, "loss": 5.4378, "step": 6973 }, { "epoch": 0.4677868330147231, "grad_norm": 0.14526678749809008, "learning_rate": 2e-05, "loss": 5.5, "step": 6974 }, { "epoch": 0.46785390884394806, "grad_norm": 0.14838438429484202, "learning_rate": 2e-05, "loss": 5.4366, "step": 6975 }, { "epoch": 0.467920984673173, "grad_norm": 0.15083819579342492, "learning_rate": 2e-05, "loss": 5.4487, "step": 6976 }, { "epoch": 0.46798806050239794, "grad_norm": 0.15078236004040363, "learning_rate": 2e-05, "loss": 5.3287, "step": 6977 }, { "epoch": 0.4680551363316229, "grad_norm": 0.14476192290056245, "learning_rate": 2e-05, "loss": 5.4115, "step": 6978 }, { "epoch": 0.4681222121608478, "grad_norm": 0.1524476741219239, "learning_rate": 2e-05, "loss": 5.3283, "step": 6979 }, { "epoch": 0.46818928799007276, "grad_norm": 0.15244660659648973, "learning_rate": 2e-05, "loss": 5.522, "step": 6980 }, { "epoch": 0.4682563638192977, "grad_norm": 0.14850735971232454, "learning_rate": 2e-05, "loss": 5.4675, "step": 6981 }, { "epoch": 0.46832343964852263, "grad_norm": 0.14867585902672484, "learning_rate": 2e-05, "loss": 5.5048, "step": 6982 }, { "epoch": 0.46839051547774757, "grad_norm": 0.15229088513187325, "learning_rate": 2e-05, "loss": 5.5209, "step": 6983 }, { "epoch": 0.4684575913069725, "grad_norm": 0.15368260048624632, "learning_rate": 2e-05, "loss": 5.3887, "step": 6984 }, { "epoch": 0.46852466713619745, "grad_norm": 0.14816642439735356, "learning_rate": 2e-05, "loss": 5.4227, "step": 6985 }, { "epoch": 0.4685917429654224, "grad_norm": 0.15552532520012194, "learning_rate": 2e-05, "loss": 5.3546, "step": 6986 }, { "epoch": 0.4686588187946473, "grad_norm": 0.15127746452766766, "learning_rate": 2e-05, "loss": 5.4511, "step": 6987 }, { "epoch": 0.46872589462387226, "grad_norm": 0.1458201309497989, "learning_rate": 2e-05, "loss": 5.4824, "step": 6988 }, { "epoch": 0.4687929704530972, "grad_norm": 0.142185068088717, "learning_rate": 2e-05, "loss": 5.3291, "step": 6989 }, { "epoch": 0.46886004628232214, "grad_norm": 0.1528851349212613, "learning_rate": 2e-05, "loss": 5.6121, "step": 6990 }, { "epoch": 0.4689271221115471, "grad_norm": 0.1587520248158114, "learning_rate": 2e-05, "loss": 5.4673, "step": 6991 }, { "epoch": 0.468994197940772, "grad_norm": 0.15525343762500834, "learning_rate": 2e-05, "loss": 5.5033, "step": 6992 }, { "epoch": 0.46906127376999696, "grad_norm": 0.15060961543726586, "learning_rate": 2e-05, "loss": 5.3683, "step": 6993 }, { "epoch": 0.4691283495992219, "grad_norm": 0.1536314619817886, "learning_rate": 2e-05, "loss": 5.3527, "step": 6994 }, { "epoch": 0.46919542542844683, "grad_norm": 0.15283819098722823, "learning_rate": 2e-05, "loss": 5.4251, "step": 6995 }, { "epoch": 0.46926250125767177, "grad_norm": 0.14469738123727, "learning_rate": 2e-05, "loss": 5.2665, "step": 6996 }, { "epoch": 0.4693295770868967, "grad_norm": 0.15031851550471959, "learning_rate": 2e-05, "loss": 5.3827, "step": 6997 }, { "epoch": 0.46939665291612165, "grad_norm": 0.15325799241283403, "learning_rate": 2e-05, "loss": 5.4197, "step": 6998 }, { "epoch": 0.4694637287453466, "grad_norm": 0.1444738715388843, "learning_rate": 2e-05, "loss": 5.4171, "step": 6999 }, { "epoch": 0.4695308045745716, "grad_norm": 0.15026752290282694, "learning_rate": 2e-05, "loss": 5.2907, "step": 7000 }, { "epoch": 0.4695978804037965, "grad_norm": 0.15124675083582748, "learning_rate": 2e-05, "loss": 5.3846, "step": 7001 }, { "epoch": 0.46966495623302146, "grad_norm": 0.15140967716987938, "learning_rate": 2e-05, "loss": 5.5107, "step": 7002 }, { "epoch": 0.4697320320622464, "grad_norm": 0.1580748981080875, "learning_rate": 2e-05, "loss": 5.4361, "step": 7003 }, { "epoch": 0.46979910789147133, "grad_norm": 0.1523201867589836, "learning_rate": 2e-05, "loss": 5.349, "step": 7004 }, { "epoch": 0.4698661837206963, "grad_norm": 0.14733243919103625, "learning_rate": 2e-05, "loss": 5.3569, "step": 7005 }, { "epoch": 0.4699332595499212, "grad_norm": 0.14837503589564116, "learning_rate": 2e-05, "loss": 5.3707, "step": 7006 }, { "epoch": 0.47000033537914615, "grad_norm": 0.1457263705325594, "learning_rate": 2e-05, "loss": 5.3917, "step": 7007 }, { "epoch": 0.4700674112083711, "grad_norm": 0.14606040745015228, "learning_rate": 2e-05, "loss": 5.4696, "step": 7008 }, { "epoch": 0.470134487037596, "grad_norm": 0.15692607254318305, "learning_rate": 2e-05, "loss": 5.4654, "step": 7009 }, { "epoch": 0.47020156286682097, "grad_norm": 0.15575720115298333, "learning_rate": 2e-05, "loss": 5.462, "step": 7010 }, { "epoch": 0.4702686386960459, "grad_norm": 0.14326998630258567, "learning_rate": 2e-05, "loss": 5.4348, "step": 7011 }, { "epoch": 0.47033571452527084, "grad_norm": 0.144821808548869, "learning_rate": 2e-05, "loss": 5.419, "step": 7012 }, { "epoch": 0.4704027903544958, "grad_norm": 0.15347878031280804, "learning_rate": 2e-05, "loss": 5.339, "step": 7013 }, { "epoch": 0.4704698661837207, "grad_norm": 0.15847225713464538, "learning_rate": 2e-05, "loss": 5.4024, "step": 7014 }, { "epoch": 0.47053694201294566, "grad_norm": 0.1455759737754067, "learning_rate": 2e-05, "loss": 5.4692, "step": 7015 }, { "epoch": 0.4706040178421706, "grad_norm": 0.15040515834481022, "learning_rate": 2e-05, "loss": 5.4443, "step": 7016 }, { "epoch": 0.47067109367139554, "grad_norm": 0.15125768525968966, "learning_rate": 2e-05, "loss": 5.4246, "step": 7017 }, { "epoch": 0.4707381695006205, "grad_norm": 0.15180455601271675, "learning_rate": 2e-05, "loss": 5.4035, "step": 7018 }, { "epoch": 0.4708052453298454, "grad_norm": 0.15198463968325512, "learning_rate": 2e-05, "loss": 5.5136, "step": 7019 }, { "epoch": 0.47087232115907035, "grad_norm": 0.16574946399870077, "learning_rate": 2e-05, "loss": 5.4779, "step": 7020 }, { "epoch": 0.4709393969882953, "grad_norm": 0.14894287388374852, "learning_rate": 2e-05, "loss": 5.3972, "step": 7021 }, { "epoch": 0.4710064728175202, "grad_norm": 0.15069343913289585, "learning_rate": 2e-05, "loss": 5.3528, "step": 7022 }, { "epoch": 0.47107354864674517, "grad_norm": 0.16120201934583922, "learning_rate": 2e-05, "loss": 5.4352, "step": 7023 }, { "epoch": 0.4711406244759701, "grad_norm": 0.14591869113627476, "learning_rate": 2e-05, "loss": 5.4248, "step": 7024 }, { "epoch": 0.47120770030519504, "grad_norm": 0.14396289683946004, "learning_rate": 2e-05, "loss": 5.4075, "step": 7025 }, { "epoch": 0.47127477613442, "grad_norm": 0.1549584987498031, "learning_rate": 2e-05, "loss": 5.4564, "step": 7026 }, { "epoch": 0.4713418519636449, "grad_norm": 0.14930450162423045, "learning_rate": 2e-05, "loss": 5.5726, "step": 7027 }, { "epoch": 0.47140892779286986, "grad_norm": 0.14999222937203896, "learning_rate": 2e-05, "loss": 5.5259, "step": 7028 }, { "epoch": 0.4714760036220948, "grad_norm": 0.14997826572325013, "learning_rate": 2e-05, "loss": 5.3575, "step": 7029 }, { "epoch": 0.47154307945131974, "grad_norm": 0.15548288073005398, "learning_rate": 2e-05, "loss": 5.4515, "step": 7030 }, { "epoch": 0.4716101552805447, "grad_norm": 0.14886411229145416, "learning_rate": 2e-05, "loss": 5.4284, "step": 7031 }, { "epoch": 0.4716772311097696, "grad_norm": 0.15337719805985794, "learning_rate": 2e-05, "loss": 5.5198, "step": 7032 }, { "epoch": 0.47174430693899455, "grad_norm": 0.1502071705543121, "learning_rate": 2e-05, "loss": 5.545, "step": 7033 }, { "epoch": 0.4718113827682195, "grad_norm": 0.1476980118117674, "learning_rate": 2e-05, "loss": 5.3544, "step": 7034 }, { "epoch": 0.47187845859744443, "grad_norm": 0.1457508672342308, "learning_rate": 2e-05, "loss": 5.5156, "step": 7035 }, { "epoch": 0.47194553442666937, "grad_norm": 0.14546539959259364, "learning_rate": 2e-05, "loss": 5.4381, "step": 7036 }, { "epoch": 0.4720126102558943, "grad_norm": 0.14592919466654572, "learning_rate": 2e-05, "loss": 5.4729, "step": 7037 }, { "epoch": 0.47207968608511924, "grad_norm": 0.14557976831231637, "learning_rate": 2e-05, "loss": 5.3494, "step": 7038 }, { "epoch": 0.4721467619143442, "grad_norm": 0.15221546779516065, "learning_rate": 2e-05, "loss": 5.2691, "step": 7039 }, { "epoch": 0.4722138377435691, "grad_norm": 0.15445237215868854, "learning_rate": 2e-05, "loss": 5.2303, "step": 7040 }, { "epoch": 0.47228091357279406, "grad_norm": 0.1482400080966502, "learning_rate": 2e-05, "loss": 5.3332, "step": 7041 }, { "epoch": 0.472347989402019, "grad_norm": 0.1453974738415693, "learning_rate": 2e-05, "loss": 5.5634, "step": 7042 }, { "epoch": 0.47241506523124394, "grad_norm": 0.14333494847416883, "learning_rate": 2e-05, "loss": 5.5008, "step": 7043 }, { "epoch": 0.4724821410604689, "grad_norm": 0.15001001145412718, "learning_rate": 2e-05, "loss": 5.3821, "step": 7044 }, { "epoch": 0.4725492168896938, "grad_norm": 0.1476244881701856, "learning_rate": 2e-05, "loss": 5.3958, "step": 7045 }, { "epoch": 0.47261629271891875, "grad_norm": 0.1545180779495855, "learning_rate": 2e-05, "loss": 5.4396, "step": 7046 }, { "epoch": 0.4726833685481437, "grad_norm": 0.14568541040636881, "learning_rate": 2e-05, "loss": 5.4488, "step": 7047 }, { "epoch": 0.47275044437736863, "grad_norm": 0.15233894199417186, "learning_rate": 2e-05, "loss": 5.376, "step": 7048 }, { "epoch": 0.47281752020659357, "grad_norm": 0.1525453208961433, "learning_rate": 2e-05, "loss": 5.4635, "step": 7049 }, { "epoch": 0.4728845960358185, "grad_norm": 0.1501492999166353, "learning_rate": 2e-05, "loss": 5.4358, "step": 7050 }, { "epoch": 0.47295167186504344, "grad_norm": 0.15407883866179395, "learning_rate": 2e-05, "loss": 5.5374, "step": 7051 }, { "epoch": 0.4730187476942684, "grad_norm": 0.15416804151531227, "learning_rate": 2e-05, "loss": 5.3813, "step": 7052 }, { "epoch": 0.4730858235234933, "grad_norm": 0.15495579803064236, "learning_rate": 2e-05, "loss": 5.4438, "step": 7053 }, { "epoch": 0.47315289935271826, "grad_norm": 0.16426293962551494, "learning_rate": 2e-05, "loss": 5.3869, "step": 7054 }, { "epoch": 0.4732199751819432, "grad_norm": 0.15300237598355898, "learning_rate": 2e-05, "loss": 5.3235, "step": 7055 }, { "epoch": 0.47328705101116814, "grad_norm": 0.15259624293121657, "learning_rate": 2e-05, "loss": 5.3646, "step": 7056 }, { "epoch": 0.4733541268403931, "grad_norm": 0.15888804921462327, "learning_rate": 2e-05, "loss": 5.334, "step": 7057 }, { "epoch": 0.473421202669618, "grad_norm": 0.17105387336145111, "learning_rate": 2e-05, "loss": 5.4187, "step": 7058 }, { "epoch": 0.47348827849884295, "grad_norm": 0.16155212181007028, "learning_rate": 2e-05, "loss": 5.5105, "step": 7059 }, { "epoch": 0.4735553543280679, "grad_norm": 0.15102749878527363, "learning_rate": 2e-05, "loss": 5.4294, "step": 7060 }, { "epoch": 0.47362243015729283, "grad_norm": 0.1496486713186681, "learning_rate": 2e-05, "loss": 5.2933, "step": 7061 }, { "epoch": 0.47368950598651777, "grad_norm": 0.15676027513442467, "learning_rate": 2e-05, "loss": 5.5596, "step": 7062 }, { "epoch": 0.4737565818157427, "grad_norm": 0.15441913640555863, "learning_rate": 2e-05, "loss": 5.3985, "step": 7063 }, { "epoch": 0.47382365764496764, "grad_norm": 0.15608366308836696, "learning_rate": 2e-05, "loss": 5.3727, "step": 7064 }, { "epoch": 0.4738907334741926, "grad_norm": 0.15554032002952767, "learning_rate": 2e-05, "loss": 5.417, "step": 7065 }, { "epoch": 0.4739578093034175, "grad_norm": 0.1565772864772116, "learning_rate": 2e-05, "loss": 5.4268, "step": 7066 }, { "epoch": 0.47402488513264246, "grad_norm": 0.15530276000923693, "learning_rate": 2e-05, "loss": 5.3737, "step": 7067 }, { "epoch": 0.4740919609618674, "grad_norm": 0.14587650640771035, "learning_rate": 2e-05, "loss": 5.4018, "step": 7068 }, { "epoch": 0.47415903679109234, "grad_norm": 0.14556864064802366, "learning_rate": 2e-05, "loss": 5.3493, "step": 7069 }, { "epoch": 0.4742261126203173, "grad_norm": 0.1563257736899565, "learning_rate": 2e-05, "loss": 5.4527, "step": 7070 }, { "epoch": 0.4742931884495422, "grad_norm": 0.1482914558110726, "learning_rate": 2e-05, "loss": 5.3431, "step": 7071 }, { "epoch": 0.47436026427876715, "grad_norm": 0.1465860892842665, "learning_rate": 2e-05, "loss": 5.5619, "step": 7072 }, { "epoch": 0.4744273401079921, "grad_norm": 0.1512103598922994, "learning_rate": 2e-05, "loss": 5.3591, "step": 7073 }, { "epoch": 0.47449441593721703, "grad_norm": 0.1454430740876668, "learning_rate": 2e-05, "loss": 5.4539, "step": 7074 }, { "epoch": 0.47456149176644197, "grad_norm": 0.1518782647031839, "learning_rate": 2e-05, "loss": 5.525, "step": 7075 }, { "epoch": 0.4746285675956669, "grad_norm": 0.1589013008251621, "learning_rate": 2e-05, "loss": 5.4673, "step": 7076 }, { "epoch": 0.47469564342489184, "grad_norm": 0.15092694733649542, "learning_rate": 2e-05, "loss": 5.4905, "step": 7077 }, { "epoch": 0.4747627192541168, "grad_norm": 0.14860782855098018, "learning_rate": 2e-05, "loss": 5.5149, "step": 7078 }, { "epoch": 0.4748297950833417, "grad_norm": 0.14835289670659169, "learning_rate": 2e-05, "loss": 5.4625, "step": 7079 }, { "epoch": 0.47489687091256666, "grad_norm": 0.15095604646665606, "learning_rate": 2e-05, "loss": 5.4848, "step": 7080 }, { "epoch": 0.4749639467417916, "grad_norm": 0.15081421556807773, "learning_rate": 2e-05, "loss": 5.4097, "step": 7081 }, { "epoch": 0.47503102257101654, "grad_norm": 0.15639563614816515, "learning_rate": 2e-05, "loss": 5.4587, "step": 7082 }, { "epoch": 0.4750980984002415, "grad_norm": 0.14450567767434483, "learning_rate": 2e-05, "loss": 5.4118, "step": 7083 }, { "epoch": 0.4751651742294664, "grad_norm": 0.15075090309577313, "learning_rate": 2e-05, "loss": 5.5447, "step": 7084 }, { "epoch": 0.47523225005869135, "grad_norm": 0.17026457186549246, "learning_rate": 2e-05, "loss": 5.4593, "step": 7085 }, { "epoch": 0.4752993258879163, "grad_norm": 0.151831800897495, "learning_rate": 2e-05, "loss": 5.3146, "step": 7086 }, { "epoch": 0.47536640171714123, "grad_norm": 0.16474019473008378, "learning_rate": 2e-05, "loss": 5.3182, "step": 7087 }, { "epoch": 0.47543347754636617, "grad_norm": 0.15141010858655685, "learning_rate": 2e-05, "loss": 5.452, "step": 7088 }, { "epoch": 0.4755005533755911, "grad_norm": 0.1495741489809151, "learning_rate": 2e-05, "loss": 5.5541, "step": 7089 }, { "epoch": 0.47556762920481604, "grad_norm": 0.14818934058305516, "learning_rate": 2e-05, "loss": 5.374, "step": 7090 }, { "epoch": 0.475634705034041, "grad_norm": 0.1505527596122846, "learning_rate": 2e-05, "loss": 5.5115, "step": 7091 }, { "epoch": 0.4757017808632659, "grad_norm": 0.14557337942761822, "learning_rate": 2e-05, "loss": 5.4277, "step": 7092 }, { "epoch": 0.47576885669249086, "grad_norm": 0.14912953077347485, "learning_rate": 2e-05, "loss": 5.4288, "step": 7093 }, { "epoch": 0.4758359325217158, "grad_norm": 0.14725768142476706, "learning_rate": 2e-05, "loss": 5.4856, "step": 7094 }, { "epoch": 0.47590300835094074, "grad_norm": 0.14971096477074916, "learning_rate": 2e-05, "loss": 5.3406, "step": 7095 }, { "epoch": 0.4759700841801657, "grad_norm": 0.15454710778429775, "learning_rate": 2e-05, "loss": 5.4377, "step": 7096 }, { "epoch": 0.4760371600093906, "grad_norm": 0.15122767531304657, "learning_rate": 2e-05, "loss": 5.3907, "step": 7097 }, { "epoch": 0.47610423583861555, "grad_norm": 0.1495880296343396, "learning_rate": 2e-05, "loss": 5.3662, "step": 7098 }, { "epoch": 0.4761713116678405, "grad_norm": 0.14866414092183727, "learning_rate": 2e-05, "loss": 5.3875, "step": 7099 }, { "epoch": 0.47623838749706543, "grad_norm": 0.1550179621638455, "learning_rate": 2e-05, "loss": 5.563, "step": 7100 }, { "epoch": 0.47630546332629037, "grad_norm": 0.14794742047900147, "learning_rate": 2e-05, "loss": 5.4479, "step": 7101 }, { "epoch": 0.4763725391555153, "grad_norm": 0.15181509113987407, "learning_rate": 2e-05, "loss": 5.4316, "step": 7102 }, { "epoch": 0.47643961498474025, "grad_norm": 0.15324335247814216, "learning_rate": 2e-05, "loss": 5.4017, "step": 7103 }, { "epoch": 0.4765066908139652, "grad_norm": 0.1503073529074295, "learning_rate": 2e-05, "loss": 5.4335, "step": 7104 }, { "epoch": 0.4765737666431901, "grad_norm": 0.16218907575707472, "learning_rate": 2e-05, "loss": 5.353, "step": 7105 }, { "epoch": 0.47664084247241506, "grad_norm": 0.16973919978090304, "learning_rate": 2e-05, "loss": 5.4921, "step": 7106 }, { "epoch": 0.47670791830164, "grad_norm": 0.14840798072309921, "learning_rate": 2e-05, "loss": 5.5148, "step": 7107 }, { "epoch": 0.47677499413086494, "grad_norm": 0.15403197375771643, "learning_rate": 2e-05, "loss": 5.4896, "step": 7108 }, { "epoch": 0.4768420699600899, "grad_norm": 0.15853772750421377, "learning_rate": 2e-05, "loss": 5.3163, "step": 7109 }, { "epoch": 0.4769091457893148, "grad_norm": 0.1539396463714218, "learning_rate": 2e-05, "loss": 5.3515, "step": 7110 }, { "epoch": 0.47697622161853975, "grad_norm": 0.16022732707726917, "learning_rate": 2e-05, "loss": 5.3171, "step": 7111 }, { "epoch": 0.4770432974477647, "grad_norm": 0.16481221026623305, "learning_rate": 2e-05, "loss": 5.373, "step": 7112 }, { "epoch": 0.47711037327698963, "grad_norm": 0.1540770060749335, "learning_rate": 2e-05, "loss": 5.5448, "step": 7113 }, { "epoch": 0.47717744910621457, "grad_norm": 0.15249764515966305, "learning_rate": 2e-05, "loss": 5.4576, "step": 7114 }, { "epoch": 0.4772445249354395, "grad_norm": 0.1606215745211595, "learning_rate": 2e-05, "loss": 5.4905, "step": 7115 }, { "epoch": 0.47731160076466445, "grad_norm": 0.1580556603853395, "learning_rate": 2e-05, "loss": 5.5124, "step": 7116 }, { "epoch": 0.4773786765938894, "grad_norm": 0.15495528731623578, "learning_rate": 2e-05, "loss": 5.5091, "step": 7117 }, { "epoch": 0.4774457524231143, "grad_norm": 0.16303702654424315, "learning_rate": 2e-05, "loss": 5.3566, "step": 7118 }, { "epoch": 0.47751282825233926, "grad_norm": 0.14742613795220103, "learning_rate": 2e-05, "loss": 5.4279, "step": 7119 }, { "epoch": 0.4775799040815642, "grad_norm": 0.15846344948614238, "learning_rate": 2e-05, "loss": 5.3838, "step": 7120 }, { "epoch": 0.47764697991078914, "grad_norm": 0.1551969568212782, "learning_rate": 2e-05, "loss": 5.3616, "step": 7121 }, { "epoch": 0.4777140557400141, "grad_norm": 0.14348958252661287, "learning_rate": 2e-05, "loss": 5.3965, "step": 7122 }, { "epoch": 0.477781131569239, "grad_norm": 0.15796100738070057, "learning_rate": 2e-05, "loss": 5.3398, "step": 7123 }, { "epoch": 0.47784820739846395, "grad_norm": 0.1435400115671241, "learning_rate": 2e-05, "loss": 5.3768, "step": 7124 }, { "epoch": 0.4779152832276889, "grad_norm": 0.14541366683966472, "learning_rate": 2e-05, "loss": 5.4386, "step": 7125 }, { "epoch": 0.47798235905691383, "grad_norm": 0.14549469605691437, "learning_rate": 2e-05, "loss": 5.4135, "step": 7126 }, { "epoch": 0.47804943488613877, "grad_norm": 0.14391941233102157, "learning_rate": 2e-05, "loss": 5.3327, "step": 7127 }, { "epoch": 0.4781165107153637, "grad_norm": 0.15190921717238606, "learning_rate": 2e-05, "loss": 5.4388, "step": 7128 }, { "epoch": 0.47818358654458865, "grad_norm": 0.1473232117612222, "learning_rate": 2e-05, "loss": 5.4679, "step": 7129 }, { "epoch": 0.4782506623738136, "grad_norm": 0.1471886168172578, "learning_rate": 2e-05, "loss": 5.4574, "step": 7130 }, { "epoch": 0.4783177382030385, "grad_norm": 0.15140057146117514, "learning_rate": 2e-05, "loss": 5.4886, "step": 7131 }, { "epoch": 0.47838481403226346, "grad_norm": 0.15301364638501258, "learning_rate": 2e-05, "loss": 5.4097, "step": 7132 }, { "epoch": 0.4784518898614884, "grad_norm": 0.14662513096978805, "learning_rate": 2e-05, "loss": 5.4277, "step": 7133 }, { "epoch": 0.47851896569071334, "grad_norm": 0.14906702573487288, "learning_rate": 2e-05, "loss": 5.4397, "step": 7134 }, { "epoch": 0.4785860415199383, "grad_norm": 0.1522302810951062, "learning_rate": 2e-05, "loss": 5.4627, "step": 7135 }, { "epoch": 0.4786531173491632, "grad_norm": 0.15712710944592592, "learning_rate": 2e-05, "loss": 5.3835, "step": 7136 }, { "epoch": 0.47872019317838815, "grad_norm": 0.1463798587314211, "learning_rate": 2e-05, "loss": 5.5135, "step": 7137 }, { "epoch": 0.4787872690076131, "grad_norm": 0.14738389234853552, "learning_rate": 2e-05, "loss": 5.3001, "step": 7138 }, { "epoch": 0.47885434483683803, "grad_norm": 0.15056120649457233, "learning_rate": 2e-05, "loss": 5.5988, "step": 7139 }, { "epoch": 0.47892142066606297, "grad_norm": 0.14486173798406385, "learning_rate": 2e-05, "loss": 5.5992, "step": 7140 }, { "epoch": 0.4789884964952879, "grad_norm": 0.14951438366330638, "learning_rate": 2e-05, "loss": 5.3339, "step": 7141 }, { "epoch": 0.47905557232451285, "grad_norm": 0.14523592284045908, "learning_rate": 2e-05, "loss": 5.452, "step": 7142 }, { "epoch": 0.4791226481537378, "grad_norm": 0.14637461306745242, "learning_rate": 2e-05, "loss": 5.3278, "step": 7143 }, { "epoch": 0.4791897239829627, "grad_norm": 0.1471406674901263, "learning_rate": 2e-05, "loss": 5.239, "step": 7144 }, { "epoch": 0.47925679981218766, "grad_norm": 0.15042059536662566, "learning_rate": 2e-05, "loss": 5.2991, "step": 7145 }, { "epoch": 0.4793238756414126, "grad_norm": 0.15012075157493426, "learning_rate": 2e-05, "loss": 5.4285, "step": 7146 }, { "epoch": 0.47939095147063754, "grad_norm": 0.14980168960914758, "learning_rate": 2e-05, "loss": 5.4169, "step": 7147 }, { "epoch": 0.4794580272998625, "grad_norm": 0.1486573783646942, "learning_rate": 2e-05, "loss": 5.4759, "step": 7148 }, { "epoch": 0.4795251031290874, "grad_norm": 0.14805029127774072, "learning_rate": 2e-05, "loss": 5.4376, "step": 7149 }, { "epoch": 0.47959217895831235, "grad_norm": 0.15585679469674993, "learning_rate": 2e-05, "loss": 5.3585, "step": 7150 }, { "epoch": 0.4796592547875373, "grad_norm": 0.14435586982336682, "learning_rate": 2e-05, "loss": 5.3387, "step": 7151 }, { "epoch": 0.47972633061676223, "grad_norm": 0.14640287229526952, "learning_rate": 2e-05, "loss": 5.3128, "step": 7152 }, { "epoch": 0.47979340644598717, "grad_norm": 0.14796852265759888, "learning_rate": 2e-05, "loss": 5.5741, "step": 7153 }, { "epoch": 0.4798604822752121, "grad_norm": 0.14502091283514357, "learning_rate": 2e-05, "loss": 5.5194, "step": 7154 }, { "epoch": 0.47992755810443705, "grad_norm": 0.15494615208648788, "learning_rate": 2e-05, "loss": 5.4171, "step": 7155 }, { "epoch": 0.479994633933662, "grad_norm": 0.15149844425794795, "learning_rate": 2e-05, "loss": 5.3781, "step": 7156 }, { "epoch": 0.4800617097628869, "grad_norm": 0.1466702461556769, "learning_rate": 2e-05, "loss": 5.4374, "step": 7157 }, { "epoch": 0.48012878559211186, "grad_norm": 0.1588476330491587, "learning_rate": 2e-05, "loss": 5.4872, "step": 7158 }, { "epoch": 0.4801958614213368, "grad_norm": 0.1515707869445218, "learning_rate": 2e-05, "loss": 5.4077, "step": 7159 }, { "epoch": 0.48026293725056174, "grad_norm": 0.14864861657478365, "learning_rate": 2e-05, "loss": 5.4259, "step": 7160 }, { "epoch": 0.4803300130797867, "grad_norm": 0.15255478337652367, "learning_rate": 2e-05, "loss": 5.4176, "step": 7161 }, { "epoch": 0.4803970889090116, "grad_norm": 0.15897453247855622, "learning_rate": 2e-05, "loss": 5.5404, "step": 7162 }, { "epoch": 0.48046416473823655, "grad_norm": 0.15044540689810043, "learning_rate": 2e-05, "loss": 5.4386, "step": 7163 }, { "epoch": 0.4805312405674615, "grad_norm": 0.1556626351240373, "learning_rate": 2e-05, "loss": 5.4885, "step": 7164 }, { "epoch": 0.48059831639668643, "grad_norm": 0.14595894743633367, "learning_rate": 2e-05, "loss": 5.4048, "step": 7165 }, { "epoch": 0.48066539222591137, "grad_norm": 0.15134208399259938, "learning_rate": 2e-05, "loss": 5.4638, "step": 7166 }, { "epoch": 0.4807324680551363, "grad_norm": 0.15151241377607125, "learning_rate": 2e-05, "loss": 5.4685, "step": 7167 }, { "epoch": 0.48079954388436125, "grad_norm": 0.15802932347009083, "learning_rate": 2e-05, "loss": 5.4681, "step": 7168 }, { "epoch": 0.4808666197135862, "grad_norm": 0.15153621920174257, "learning_rate": 2e-05, "loss": 5.406, "step": 7169 }, { "epoch": 0.4809336955428111, "grad_norm": 0.14705603366068126, "learning_rate": 2e-05, "loss": 5.4962, "step": 7170 }, { "epoch": 0.48100077137203606, "grad_norm": 0.156340159350827, "learning_rate": 2e-05, "loss": 5.4464, "step": 7171 }, { "epoch": 0.481067847201261, "grad_norm": 0.15794460705235147, "learning_rate": 2e-05, "loss": 5.3045, "step": 7172 }, { "epoch": 0.48113492303048594, "grad_norm": 0.1438930600130996, "learning_rate": 2e-05, "loss": 5.5131, "step": 7173 }, { "epoch": 0.4812019988597109, "grad_norm": 0.14906007530156354, "learning_rate": 2e-05, "loss": 5.4346, "step": 7174 }, { "epoch": 0.4812690746889358, "grad_norm": 0.14930351332128874, "learning_rate": 2e-05, "loss": 5.4465, "step": 7175 }, { "epoch": 0.48133615051816075, "grad_norm": 0.15303461345419767, "learning_rate": 2e-05, "loss": 5.4803, "step": 7176 }, { "epoch": 0.4814032263473857, "grad_norm": 0.14967384017253055, "learning_rate": 2e-05, "loss": 5.4779, "step": 7177 }, { "epoch": 0.48147030217661063, "grad_norm": 0.1477154932622654, "learning_rate": 2e-05, "loss": 5.3413, "step": 7178 }, { "epoch": 0.48153737800583557, "grad_norm": 0.1462549150576034, "learning_rate": 2e-05, "loss": 5.4788, "step": 7179 }, { "epoch": 0.4816044538350605, "grad_norm": 0.15129461953756793, "learning_rate": 2e-05, "loss": 5.4202, "step": 7180 }, { "epoch": 0.48167152966428545, "grad_norm": 0.15036248402378666, "learning_rate": 2e-05, "loss": 5.448, "step": 7181 }, { "epoch": 0.48173860549351044, "grad_norm": 0.14607043923221408, "learning_rate": 2e-05, "loss": 5.4706, "step": 7182 }, { "epoch": 0.4818056813227354, "grad_norm": 0.14673759201158743, "learning_rate": 2e-05, "loss": 5.4352, "step": 7183 }, { "epoch": 0.4818727571519603, "grad_norm": 0.15376199553197617, "learning_rate": 2e-05, "loss": 5.3919, "step": 7184 }, { "epoch": 0.48193983298118526, "grad_norm": 0.14706334524811973, "learning_rate": 2e-05, "loss": 5.3281, "step": 7185 }, { "epoch": 0.4820069088104102, "grad_norm": 0.15865856994213615, "learning_rate": 2e-05, "loss": 5.4867, "step": 7186 }, { "epoch": 0.48207398463963513, "grad_norm": 0.15233387825524852, "learning_rate": 2e-05, "loss": 5.4004, "step": 7187 }, { "epoch": 0.48214106046886007, "grad_norm": 0.15114422030740565, "learning_rate": 2e-05, "loss": 5.5165, "step": 7188 }, { "epoch": 0.482208136298085, "grad_norm": 0.1595546624424555, "learning_rate": 2e-05, "loss": 5.2434, "step": 7189 }, { "epoch": 0.48227521212730995, "grad_norm": 0.15308210518761203, "learning_rate": 2e-05, "loss": 5.4224, "step": 7190 }, { "epoch": 0.4823422879565349, "grad_norm": 0.15759850558057317, "learning_rate": 2e-05, "loss": 5.3783, "step": 7191 }, { "epoch": 0.4824093637857598, "grad_norm": 0.15214376454907774, "learning_rate": 2e-05, "loss": 5.4918, "step": 7192 }, { "epoch": 0.48247643961498476, "grad_norm": 0.15621428669641066, "learning_rate": 2e-05, "loss": 5.4321, "step": 7193 }, { "epoch": 0.4825435154442097, "grad_norm": 0.14971181772536118, "learning_rate": 2e-05, "loss": 5.4541, "step": 7194 }, { "epoch": 0.48261059127343464, "grad_norm": 0.14985748591935463, "learning_rate": 2e-05, "loss": 5.4902, "step": 7195 }, { "epoch": 0.4826776671026596, "grad_norm": 0.1452995024340614, "learning_rate": 2e-05, "loss": 5.3554, "step": 7196 }, { "epoch": 0.4827447429318845, "grad_norm": 0.1512344731888584, "learning_rate": 2e-05, "loss": 5.538, "step": 7197 }, { "epoch": 0.48281181876110946, "grad_norm": 0.15236855763933382, "learning_rate": 2e-05, "loss": 5.59, "step": 7198 }, { "epoch": 0.4828788945903344, "grad_norm": 0.14721440172251093, "learning_rate": 2e-05, "loss": 5.4057, "step": 7199 }, { "epoch": 0.48294597041955933, "grad_norm": 0.15394570408204344, "learning_rate": 2e-05, "loss": 5.2487, "step": 7200 }, { "epoch": 0.4830130462487843, "grad_norm": 0.15833976955644558, "learning_rate": 2e-05, "loss": 5.4323, "step": 7201 }, { "epoch": 0.4830801220780092, "grad_norm": 0.1460480833587102, "learning_rate": 2e-05, "loss": 5.5769, "step": 7202 }, { "epoch": 0.48314719790723415, "grad_norm": 0.14901336553274924, "learning_rate": 2e-05, "loss": 5.4396, "step": 7203 }, { "epoch": 0.4832142737364591, "grad_norm": 0.14973673641754803, "learning_rate": 2e-05, "loss": 5.3468, "step": 7204 }, { "epoch": 0.483281349565684, "grad_norm": 0.14952930254309985, "learning_rate": 2e-05, "loss": 5.4307, "step": 7205 }, { "epoch": 0.48334842539490896, "grad_norm": 0.15457975011053585, "learning_rate": 2e-05, "loss": 5.4822, "step": 7206 }, { "epoch": 0.4834155012241339, "grad_norm": 0.15670827096402998, "learning_rate": 2e-05, "loss": 5.5103, "step": 7207 }, { "epoch": 0.48348257705335884, "grad_norm": 0.14431913658098558, "learning_rate": 2e-05, "loss": 5.5043, "step": 7208 }, { "epoch": 0.4835496528825838, "grad_norm": 0.14900481919289224, "learning_rate": 2e-05, "loss": 5.5302, "step": 7209 }, { "epoch": 0.4836167287118087, "grad_norm": 0.16257939168451774, "learning_rate": 2e-05, "loss": 5.375, "step": 7210 }, { "epoch": 0.48368380454103366, "grad_norm": 0.15706419470573438, "learning_rate": 2e-05, "loss": 5.463, "step": 7211 }, { "epoch": 0.4837508803702586, "grad_norm": 0.14698180286726303, "learning_rate": 2e-05, "loss": 5.4635, "step": 7212 }, { "epoch": 0.48381795619948353, "grad_norm": 0.16144182112517527, "learning_rate": 2e-05, "loss": 5.5842, "step": 7213 }, { "epoch": 0.4838850320287085, "grad_norm": 0.15160042566855805, "learning_rate": 2e-05, "loss": 5.2685, "step": 7214 }, { "epoch": 0.4839521078579334, "grad_norm": 0.1479223062215044, "learning_rate": 2e-05, "loss": 5.4028, "step": 7215 }, { "epoch": 0.48401918368715835, "grad_norm": 0.1723480593939872, "learning_rate": 2e-05, "loss": 5.4066, "step": 7216 }, { "epoch": 0.4840862595163833, "grad_norm": 0.14556003276166202, "learning_rate": 2e-05, "loss": 5.4499, "step": 7217 }, { "epoch": 0.4841533353456082, "grad_norm": 0.14727492038290915, "learning_rate": 2e-05, "loss": 5.4539, "step": 7218 }, { "epoch": 0.48422041117483317, "grad_norm": 0.15191141240931258, "learning_rate": 2e-05, "loss": 5.3853, "step": 7219 }, { "epoch": 0.4842874870040581, "grad_norm": 0.15748275258328945, "learning_rate": 2e-05, "loss": 5.3311, "step": 7220 }, { "epoch": 0.48435456283328304, "grad_norm": 0.15367027542254708, "learning_rate": 2e-05, "loss": 5.4493, "step": 7221 }, { "epoch": 0.484421638662508, "grad_norm": 0.15937339210677987, "learning_rate": 2e-05, "loss": 5.4809, "step": 7222 }, { "epoch": 0.4844887144917329, "grad_norm": 0.1505050570673283, "learning_rate": 2e-05, "loss": 5.4235, "step": 7223 }, { "epoch": 0.48455579032095786, "grad_norm": 0.1561622894509671, "learning_rate": 2e-05, "loss": 5.4289, "step": 7224 }, { "epoch": 0.4846228661501828, "grad_norm": 0.1520399357717214, "learning_rate": 2e-05, "loss": 5.4604, "step": 7225 }, { "epoch": 0.48468994197940773, "grad_norm": 0.16184145803396152, "learning_rate": 2e-05, "loss": 5.3956, "step": 7226 }, { "epoch": 0.4847570178086327, "grad_norm": 0.153995835836526, "learning_rate": 2e-05, "loss": 5.3668, "step": 7227 }, { "epoch": 0.4848240936378576, "grad_norm": 0.16102504275174043, "learning_rate": 2e-05, "loss": 5.4118, "step": 7228 }, { "epoch": 0.48489116946708255, "grad_norm": 0.1514930306662499, "learning_rate": 2e-05, "loss": 5.3949, "step": 7229 }, { "epoch": 0.4849582452963075, "grad_norm": 0.14792295346259657, "learning_rate": 2e-05, "loss": 5.3383, "step": 7230 }, { "epoch": 0.4850253211255324, "grad_norm": 0.14531363794565663, "learning_rate": 2e-05, "loss": 5.5121, "step": 7231 }, { "epoch": 0.48509239695475737, "grad_norm": 0.15375885560313798, "learning_rate": 2e-05, "loss": 5.6094, "step": 7232 }, { "epoch": 0.4851594727839823, "grad_norm": 0.14375431503557334, "learning_rate": 2e-05, "loss": 5.4084, "step": 7233 }, { "epoch": 0.48522654861320724, "grad_norm": 0.1507104667862044, "learning_rate": 2e-05, "loss": 5.383, "step": 7234 }, { "epoch": 0.4852936244424322, "grad_norm": 0.14909048413890083, "learning_rate": 2e-05, "loss": 5.4151, "step": 7235 }, { "epoch": 0.4853607002716571, "grad_norm": 0.15498586068943693, "learning_rate": 2e-05, "loss": 5.4859, "step": 7236 }, { "epoch": 0.48542777610088206, "grad_norm": 0.1487593368095114, "learning_rate": 2e-05, "loss": 5.5252, "step": 7237 }, { "epoch": 0.485494851930107, "grad_norm": 0.1441308767906382, "learning_rate": 2e-05, "loss": 5.3439, "step": 7238 }, { "epoch": 0.48556192775933193, "grad_norm": 0.14471638911085416, "learning_rate": 2e-05, "loss": 5.3225, "step": 7239 }, { "epoch": 0.4856290035885569, "grad_norm": 0.14794392887282126, "learning_rate": 2e-05, "loss": 5.4654, "step": 7240 }, { "epoch": 0.4856960794177818, "grad_norm": 0.15655771164621232, "learning_rate": 2e-05, "loss": 5.3604, "step": 7241 }, { "epoch": 0.48576315524700675, "grad_norm": 0.14686778225930835, "learning_rate": 2e-05, "loss": 5.5356, "step": 7242 }, { "epoch": 0.4858302310762317, "grad_norm": 0.14688443094997333, "learning_rate": 2e-05, "loss": 5.2394, "step": 7243 }, { "epoch": 0.4858973069054566, "grad_norm": 0.14975028423416792, "learning_rate": 2e-05, "loss": 5.5256, "step": 7244 }, { "epoch": 0.48596438273468157, "grad_norm": 0.15171252965856863, "learning_rate": 2e-05, "loss": 5.5066, "step": 7245 }, { "epoch": 0.4860314585639065, "grad_norm": 0.15116870494138113, "learning_rate": 2e-05, "loss": 5.4447, "step": 7246 }, { "epoch": 0.48609853439313144, "grad_norm": 0.1583084576506635, "learning_rate": 2e-05, "loss": 5.4208, "step": 7247 }, { "epoch": 0.4861656102223564, "grad_norm": 0.14687487533256569, "learning_rate": 2e-05, "loss": 5.5114, "step": 7248 }, { "epoch": 0.4862326860515813, "grad_norm": 0.14935406393678977, "learning_rate": 2e-05, "loss": 5.4201, "step": 7249 }, { "epoch": 0.48629976188080626, "grad_norm": 0.14954897755542043, "learning_rate": 2e-05, "loss": 5.496, "step": 7250 }, { "epoch": 0.4863668377100312, "grad_norm": 0.1593018404290953, "learning_rate": 2e-05, "loss": 5.3228, "step": 7251 }, { "epoch": 0.48643391353925614, "grad_norm": 0.15643533625858957, "learning_rate": 2e-05, "loss": 5.3544, "step": 7252 }, { "epoch": 0.4865009893684811, "grad_norm": 0.15317024975845922, "learning_rate": 2e-05, "loss": 5.4816, "step": 7253 }, { "epoch": 0.486568065197706, "grad_norm": 0.15804081139701784, "learning_rate": 2e-05, "loss": 5.2495, "step": 7254 }, { "epoch": 0.48663514102693095, "grad_norm": 0.1512658096795007, "learning_rate": 2e-05, "loss": 5.4525, "step": 7255 }, { "epoch": 0.4867022168561559, "grad_norm": 0.1604444991774679, "learning_rate": 2e-05, "loss": 5.5251, "step": 7256 }, { "epoch": 0.4867692926853808, "grad_norm": 0.15487534591121785, "learning_rate": 2e-05, "loss": 5.4638, "step": 7257 }, { "epoch": 0.48683636851460577, "grad_norm": 0.15744175053912654, "learning_rate": 2e-05, "loss": 5.3925, "step": 7258 }, { "epoch": 0.4869034443438307, "grad_norm": 0.15064438494237076, "learning_rate": 2e-05, "loss": 5.3812, "step": 7259 }, { "epoch": 0.48697052017305564, "grad_norm": 0.14759350471260566, "learning_rate": 2e-05, "loss": 5.3875, "step": 7260 }, { "epoch": 0.4870375960022806, "grad_norm": 0.15488651204879006, "learning_rate": 2e-05, "loss": 5.5139, "step": 7261 }, { "epoch": 0.4871046718315055, "grad_norm": 0.1510709798287007, "learning_rate": 2e-05, "loss": 5.4949, "step": 7262 }, { "epoch": 0.48717174766073046, "grad_norm": 0.15586635036988952, "learning_rate": 2e-05, "loss": 5.2981, "step": 7263 }, { "epoch": 0.4872388234899554, "grad_norm": 0.14894841523593785, "learning_rate": 2e-05, "loss": 5.3099, "step": 7264 }, { "epoch": 0.48730589931918034, "grad_norm": 0.16072686471492592, "learning_rate": 2e-05, "loss": 5.5359, "step": 7265 }, { "epoch": 0.4873729751484053, "grad_norm": 0.15898965332663906, "learning_rate": 2e-05, "loss": 5.4223, "step": 7266 }, { "epoch": 0.4874400509776302, "grad_norm": 0.1586309921049504, "learning_rate": 2e-05, "loss": 5.4204, "step": 7267 }, { "epoch": 0.48750712680685515, "grad_norm": 0.15640945612684462, "learning_rate": 2e-05, "loss": 5.5505, "step": 7268 }, { "epoch": 0.4875742026360801, "grad_norm": 0.15190178550787586, "learning_rate": 2e-05, "loss": 5.4078, "step": 7269 }, { "epoch": 0.48764127846530503, "grad_norm": 0.15615448861194242, "learning_rate": 2e-05, "loss": 5.4231, "step": 7270 }, { "epoch": 0.48770835429452997, "grad_norm": 0.1526838129750418, "learning_rate": 2e-05, "loss": 5.5557, "step": 7271 }, { "epoch": 0.4877754301237549, "grad_norm": 0.15652278016886093, "learning_rate": 2e-05, "loss": 5.4621, "step": 7272 }, { "epoch": 0.48784250595297984, "grad_norm": 0.14567339673387988, "learning_rate": 2e-05, "loss": 5.35, "step": 7273 }, { "epoch": 0.4879095817822048, "grad_norm": 0.15376818869708161, "learning_rate": 2e-05, "loss": 5.443, "step": 7274 }, { "epoch": 0.4879766576114297, "grad_norm": 0.15669586971431973, "learning_rate": 2e-05, "loss": 5.4154, "step": 7275 }, { "epoch": 0.48804373344065466, "grad_norm": 0.15883099207299614, "learning_rate": 2e-05, "loss": 5.4181, "step": 7276 }, { "epoch": 0.4881108092698796, "grad_norm": 0.15043008847153788, "learning_rate": 2e-05, "loss": 5.4429, "step": 7277 }, { "epoch": 0.48817788509910454, "grad_norm": 0.15483432694847063, "learning_rate": 2e-05, "loss": 5.4933, "step": 7278 }, { "epoch": 0.4882449609283295, "grad_norm": 0.1545682021886171, "learning_rate": 2e-05, "loss": 5.4759, "step": 7279 }, { "epoch": 0.4883120367575544, "grad_norm": 0.15432544307291698, "learning_rate": 2e-05, "loss": 5.442, "step": 7280 }, { "epoch": 0.48837911258677935, "grad_norm": 0.15102922866601076, "learning_rate": 2e-05, "loss": 5.4148, "step": 7281 }, { "epoch": 0.4884461884160043, "grad_norm": 0.1511750886042475, "learning_rate": 2e-05, "loss": 5.3881, "step": 7282 }, { "epoch": 0.48851326424522923, "grad_norm": 0.15092538841594275, "learning_rate": 2e-05, "loss": 5.497, "step": 7283 }, { "epoch": 0.48858034007445417, "grad_norm": 0.1514836220463496, "learning_rate": 2e-05, "loss": 5.4697, "step": 7284 }, { "epoch": 0.4886474159036791, "grad_norm": 0.14590567016284545, "learning_rate": 2e-05, "loss": 5.3227, "step": 7285 }, { "epoch": 0.48871449173290404, "grad_norm": 0.14814018405574228, "learning_rate": 2e-05, "loss": 5.263, "step": 7286 }, { "epoch": 0.488781567562129, "grad_norm": 0.16107482286755004, "learning_rate": 2e-05, "loss": 5.2532, "step": 7287 }, { "epoch": 0.4888486433913539, "grad_norm": 0.16204589906385908, "learning_rate": 2e-05, "loss": 5.3667, "step": 7288 }, { "epoch": 0.48891571922057886, "grad_norm": 0.14749550513383114, "learning_rate": 2e-05, "loss": 5.6149, "step": 7289 }, { "epoch": 0.4889827950498038, "grad_norm": 0.1512847998639348, "learning_rate": 2e-05, "loss": 5.5244, "step": 7290 }, { "epoch": 0.48904987087902874, "grad_norm": 0.1514335420552851, "learning_rate": 2e-05, "loss": 5.3617, "step": 7291 }, { "epoch": 0.4891169467082537, "grad_norm": 0.14717525457234365, "learning_rate": 2e-05, "loss": 5.4796, "step": 7292 }, { "epoch": 0.4891840225374786, "grad_norm": 0.16056604494878024, "learning_rate": 2e-05, "loss": 5.4254, "step": 7293 }, { "epoch": 0.48925109836670355, "grad_norm": 0.1540764392954355, "learning_rate": 2e-05, "loss": 5.4145, "step": 7294 }, { "epoch": 0.4893181741959285, "grad_norm": 0.14887749843084067, "learning_rate": 2e-05, "loss": 5.4381, "step": 7295 }, { "epoch": 0.48938525002515343, "grad_norm": 0.15324048633624998, "learning_rate": 2e-05, "loss": 5.4373, "step": 7296 }, { "epoch": 0.48945232585437837, "grad_norm": 0.15361168438499584, "learning_rate": 2e-05, "loss": 5.4034, "step": 7297 }, { "epoch": 0.4895194016836033, "grad_norm": 0.14736234021738698, "learning_rate": 2e-05, "loss": 5.3831, "step": 7298 }, { "epoch": 0.48958647751282824, "grad_norm": 0.15234083022592604, "learning_rate": 2e-05, "loss": 5.4698, "step": 7299 }, { "epoch": 0.4896535533420532, "grad_norm": 0.17178232386592673, "learning_rate": 2e-05, "loss": 5.4816, "step": 7300 }, { "epoch": 0.4897206291712781, "grad_norm": 0.14889977147743444, "learning_rate": 2e-05, "loss": 5.4684, "step": 7301 }, { "epoch": 0.48978770500050306, "grad_norm": 0.15351696885518062, "learning_rate": 2e-05, "loss": 5.4303, "step": 7302 }, { "epoch": 0.489854780829728, "grad_norm": 0.16351921511092238, "learning_rate": 2e-05, "loss": 5.5137, "step": 7303 }, { "epoch": 0.48992185665895294, "grad_norm": 0.1654184985618466, "learning_rate": 2e-05, "loss": 5.4756, "step": 7304 }, { "epoch": 0.4899889324881779, "grad_norm": 0.15750446006244753, "learning_rate": 2e-05, "loss": 5.4447, "step": 7305 }, { "epoch": 0.4900560083174028, "grad_norm": 0.14765041868567377, "learning_rate": 2e-05, "loss": 5.4576, "step": 7306 }, { "epoch": 0.49012308414662775, "grad_norm": 0.16307480625606408, "learning_rate": 2e-05, "loss": 5.4654, "step": 7307 }, { "epoch": 0.4901901599758527, "grad_norm": 0.16886740398968939, "learning_rate": 2e-05, "loss": 5.5234, "step": 7308 }, { "epoch": 0.49025723580507763, "grad_norm": 0.14715670891556135, "learning_rate": 2e-05, "loss": 5.4287, "step": 7309 }, { "epoch": 0.49032431163430257, "grad_norm": 0.16467723103239904, "learning_rate": 2e-05, "loss": 5.3599, "step": 7310 }, { "epoch": 0.4903913874635275, "grad_norm": 0.1641650675084048, "learning_rate": 2e-05, "loss": 5.5028, "step": 7311 }, { "epoch": 0.49045846329275244, "grad_norm": 0.160804365773499, "learning_rate": 2e-05, "loss": 5.2942, "step": 7312 }, { "epoch": 0.4905255391219774, "grad_norm": 0.15114443611370876, "learning_rate": 2e-05, "loss": 5.5131, "step": 7313 }, { "epoch": 0.4905926149512023, "grad_norm": 0.15351866253266877, "learning_rate": 2e-05, "loss": 5.3654, "step": 7314 }, { "epoch": 0.49065969078042726, "grad_norm": 0.1491332456310917, "learning_rate": 2e-05, "loss": 5.471, "step": 7315 }, { "epoch": 0.4907267666096522, "grad_norm": 0.15107609018904766, "learning_rate": 2e-05, "loss": 5.4782, "step": 7316 }, { "epoch": 0.49079384243887714, "grad_norm": 0.15522129549452196, "learning_rate": 2e-05, "loss": 5.4005, "step": 7317 }, { "epoch": 0.4908609182681021, "grad_norm": 0.15176919897160024, "learning_rate": 2e-05, "loss": 5.5291, "step": 7318 }, { "epoch": 0.490927994097327, "grad_norm": 0.15745152959862943, "learning_rate": 2e-05, "loss": 5.4727, "step": 7319 }, { "epoch": 0.49099506992655195, "grad_norm": 0.16024346162750774, "learning_rate": 2e-05, "loss": 5.4279, "step": 7320 }, { "epoch": 0.4910621457557769, "grad_norm": 0.1610090961026196, "learning_rate": 2e-05, "loss": 5.3595, "step": 7321 }, { "epoch": 0.49112922158500183, "grad_norm": 0.14941672910416284, "learning_rate": 2e-05, "loss": 5.3921, "step": 7322 }, { "epoch": 0.49119629741422677, "grad_norm": 0.16082172379639403, "learning_rate": 2e-05, "loss": 5.4394, "step": 7323 }, { "epoch": 0.4912633732434517, "grad_norm": 0.16619122989563992, "learning_rate": 2e-05, "loss": 5.4383, "step": 7324 }, { "epoch": 0.49133044907267664, "grad_norm": 0.15530321961486573, "learning_rate": 2e-05, "loss": 5.5008, "step": 7325 }, { "epoch": 0.4913975249019016, "grad_norm": 0.1633538832361557, "learning_rate": 2e-05, "loss": 5.3698, "step": 7326 }, { "epoch": 0.4914646007311265, "grad_norm": 0.15519371061274828, "learning_rate": 2e-05, "loss": 5.3471, "step": 7327 }, { "epoch": 0.49153167656035146, "grad_norm": 0.14880675116399666, "learning_rate": 2e-05, "loss": 5.219, "step": 7328 }, { "epoch": 0.4915987523895764, "grad_norm": 0.1542097470398536, "learning_rate": 2e-05, "loss": 5.4497, "step": 7329 }, { "epoch": 0.49166582821880134, "grad_norm": 0.16245376731204295, "learning_rate": 2e-05, "loss": 5.4297, "step": 7330 }, { "epoch": 0.4917329040480263, "grad_norm": 0.15428889239198523, "learning_rate": 2e-05, "loss": 5.4762, "step": 7331 }, { "epoch": 0.4917999798772512, "grad_norm": 0.16158572878443142, "learning_rate": 2e-05, "loss": 5.3672, "step": 7332 }, { "epoch": 0.49186705570647615, "grad_norm": 0.16444598528504298, "learning_rate": 2e-05, "loss": 5.3639, "step": 7333 }, { "epoch": 0.4919341315357011, "grad_norm": 0.15446604839917247, "learning_rate": 2e-05, "loss": 5.349, "step": 7334 }, { "epoch": 0.49200120736492603, "grad_norm": 0.15255504876955642, "learning_rate": 2e-05, "loss": 5.4209, "step": 7335 }, { "epoch": 0.49206828319415097, "grad_norm": 0.17099724420502918, "learning_rate": 2e-05, "loss": 5.6624, "step": 7336 }, { "epoch": 0.4921353590233759, "grad_norm": 0.15668427713119792, "learning_rate": 2e-05, "loss": 5.4815, "step": 7337 }, { "epoch": 0.49220243485260085, "grad_norm": 0.16060892597048285, "learning_rate": 2e-05, "loss": 5.4941, "step": 7338 }, { "epoch": 0.4922695106818258, "grad_norm": 0.15036976932564064, "learning_rate": 2e-05, "loss": 5.5813, "step": 7339 }, { "epoch": 0.4923365865110507, "grad_norm": 0.16663014088537828, "learning_rate": 2e-05, "loss": 5.5168, "step": 7340 }, { "epoch": 0.49240366234027566, "grad_norm": 0.15424923292352155, "learning_rate": 2e-05, "loss": 5.4477, "step": 7341 }, { "epoch": 0.4924707381695006, "grad_norm": 0.15118684751994713, "learning_rate": 2e-05, "loss": 5.422, "step": 7342 }, { "epoch": 0.49253781399872554, "grad_norm": 0.15504398032314665, "learning_rate": 2e-05, "loss": 5.3713, "step": 7343 }, { "epoch": 0.4926048898279505, "grad_norm": 0.14348048036938035, "learning_rate": 2e-05, "loss": 5.3093, "step": 7344 }, { "epoch": 0.4926719656571754, "grad_norm": 0.14715761135814384, "learning_rate": 2e-05, "loss": 5.5183, "step": 7345 }, { "epoch": 0.49273904148640035, "grad_norm": 0.14802688271704167, "learning_rate": 2e-05, "loss": 5.4543, "step": 7346 }, { "epoch": 0.4928061173156253, "grad_norm": 0.1637264293291327, "learning_rate": 2e-05, "loss": 5.3748, "step": 7347 }, { "epoch": 0.49287319314485023, "grad_norm": 0.14902247554652967, "learning_rate": 2e-05, "loss": 5.2813, "step": 7348 }, { "epoch": 0.49294026897407517, "grad_norm": 0.14992925160874318, "learning_rate": 2e-05, "loss": 5.2976, "step": 7349 }, { "epoch": 0.4930073448033001, "grad_norm": 0.15076817263914208, "learning_rate": 2e-05, "loss": 5.4521, "step": 7350 }, { "epoch": 0.49307442063252505, "grad_norm": 0.15181978518936617, "learning_rate": 2e-05, "loss": 5.3816, "step": 7351 }, { "epoch": 0.49314149646175, "grad_norm": 0.16157486298243506, "learning_rate": 2e-05, "loss": 5.557, "step": 7352 }, { "epoch": 0.4932085722909749, "grad_norm": 0.1470936428216905, "learning_rate": 2e-05, "loss": 5.4885, "step": 7353 }, { "epoch": 0.49327564812019986, "grad_norm": 0.14869459341195967, "learning_rate": 2e-05, "loss": 5.3043, "step": 7354 }, { "epoch": 0.4933427239494248, "grad_norm": 0.15416617890614712, "learning_rate": 2e-05, "loss": 5.296, "step": 7355 }, { "epoch": 0.49340979977864974, "grad_norm": 0.14813384663982665, "learning_rate": 2e-05, "loss": 5.365, "step": 7356 }, { "epoch": 0.4934768756078747, "grad_norm": 0.14485615679068592, "learning_rate": 2e-05, "loss": 5.462, "step": 7357 }, { "epoch": 0.4935439514370996, "grad_norm": 0.16271656729039963, "learning_rate": 2e-05, "loss": 5.4399, "step": 7358 }, { "epoch": 0.49361102726632455, "grad_norm": 0.1576225954400024, "learning_rate": 2e-05, "loss": 5.2975, "step": 7359 }, { "epoch": 0.4936781030955495, "grad_norm": 0.14936831284340293, "learning_rate": 2e-05, "loss": 5.372, "step": 7360 }, { "epoch": 0.49374517892477443, "grad_norm": 0.15079549749380716, "learning_rate": 2e-05, "loss": 5.5759, "step": 7361 }, { "epoch": 0.49381225475399937, "grad_norm": 0.15284468494725442, "learning_rate": 2e-05, "loss": 5.4067, "step": 7362 }, { "epoch": 0.4938793305832243, "grad_norm": 0.14906736556558497, "learning_rate": 2e-05, "loss": 5.478, "step": 7363 }, { "epoch": 0.4939464064124493, "grad_norm": 0.15180554364497692, "learning_rate": 2e-05, "loss": 5.5037, "step": 7364 }, { "epoch": 0.49401348224167424, "grad_norm": 0.15852383605445566, "learning_rate": 2e-05, "loss": 5.4913, "step": 7365 }, { "epoch": 0.4940805580708992, "grad_norm": 0.1469196116340861, "learning_rate": 2e-05, "loss": 5.5134, "step": 7366 }, { "epoch": 0.4941476339001241, "grad_norm": 0.15529352702521085, "learning_rate": 2e-05, "loss": 5.4973, "step": 7367 }, { "epoch": 0.49421470972934906, "grad_norm": 0.15466601535986912, "learning_rate": 2e-05, "loss": 5.4808, "step": 7368 }, { "epoch": 0.494281785558574, "grad_norm": 0.15574612564136242, "learning_rate": 2e-05, "loss": 5.4229, "step": 7369 }, { "epoch": 0.49434886138779893, "grad_norm": 0.15092105507185322, "learning_rate": 2e-05, "loss": 5.3872, "step": 7370 }, { "epoch": 0.49441593721702387, "grad_norm": 0.15201049110387202, "learning_rate": 2e-05, "loss": 5.4947, "step": 7371 }, { "epoch": 0.4944830130462488, "grad_norm": 0.1491958637249756, "learning_rate": 2e-05, "loss": 5.416, "step": 7372 }, { "epoch": 0.49455008887547375, "grad_norm": 0.15047858337264042, "learning_rate": 2e-05, "loss": 5.526, "step": 7373 }, { "epoch": 0.4946171647046987, "grad_norm": 0.16167055546202805, "learning_rate": 2e-05, "loss": 5.3773, "step": 7374 }, { "epoch": 0.4946842405339236, "grad_norm": 0.1487898324243125, "learning_rate": 2e-05, "loss": 5.4238, "step": 7375 }, { "epoch": 0.49475131636314856, "grad_norm": 0.14987136475420948, "learning_rate": 2e-05, "loss": 5.2138, "step": 7376 }, { "epoch": 0.4948183921923735, "grad_norm": 0.14438882208207665, "learning_rate": 2e-05, "loss": 5.4701, "step": 7377 }, { "epoch": 0.49488546802159844, "grad_norm": 0.15058507796650691, "learning_rate": 2e-05, "loss": 5.5816, "step": 7378 }, { "epoch": 0.4949525438508234, "grad_norm": 0.14828850986778236, "learning_rate": 2e-05, "loss": 5.394, "step": 7379 }, { "epoch": 0.4950196196800483, "grad_norm": 0.15037040791649076, "learning_rate": 2e-05, "loss": 5.5762, "step": 7380 }, { "epoch": 0.49508669550927326, "grad_norm": 0.14375111200201504, "learning_rate": 2e-05, "loss": 5.3719, "step": 7381 }, { "epoch": 0.4951537713384982, "grad_norm": 0.14747365407318966, "learning_rate": 2e-05, "loss": 5.6286, "step": 7382 }, { "epoch": 0.49522084716772313, "grad_norm": 0.1544799929180007, "learning_rate": 2e-05, "loss": 5.4175, "step": 7383 }, { "epoch": 0.49528792299694807, "grad_norm": 0.15145863221557915, "learning_rate": 2e-05, "loss": 5.2984, "step": 7384 }, { "epoch": 0.495354998826173, "grad_norm": 0.1504285231726418, "learning_rate": 2e-05, "loss": 5.6142, "step": 7385 }, { "epoch": 0.49542207465539795, "grad_norm": 0.1504542847226113, "learning_rate": 2e-05, "loss": 5.4413, "step": 7386 }, { "epoch": 0.4954891504846229, "grad_norm": 0.15654252390117626, "learning_rate": 2e-05, "loss": 5.5235, "step": 7387 }, { "epoch": 0.4955562263138478, "grad_norm": 0.14656171902116488, "learning_rate": 2e-05, "loss": 5.2867, "step": 7388 }, { "epoch": 0.49562330214307276, "grad_norm": 0.1408586372399371, "learning_rate": 2e-05, "loss": 5.4082, "step": 7389 }, { "epoch": 0.4956903779722977, "grad_norm": 0.15875647223270764, "learning_rate": 2e-05, "loss": 5.3902, "step": 7390 }, { "epoch": 0.49575745380152264, "grad_norm": 0.15090228138929665, "learning_rate": 2e-05, "loss": 5.459, "step": 7391 }, { "epoch": 0.4958245296307476, "grad_norm": 0.14381644786301362, "learning_rate": 2e-05, "loss": 5.4971, "step": 7392 }, { "epoch": 0.4958916054599725, "grad_norm": 0.14856094058700284, "learning_rate": 2e-05, "loss": 5.4548, "step": 7393 }, { "epoch": 0.49595868128919746, "grad_norm": 0.14531309716895177, "learning_rate": 2e-05, "loss": 5.3603, "step": 7394 }, { "epoch": 0.4960257571184224, "grad_norm": 0.15383101403535365, "learning_rate": 2e-05, "loss": 5.4297, "step": 7395 }, { "epoch": 0.49609283294764733, "grad_norm": 0.15476480678891133, "learning_rate": 2e-05, "loss": 5.3693, "step": 7396 }, { "epoch": 0.49615990877687227, "grad_norm": 0.1602932349819086, "learning_rate": 2e-05, "loss": 5.517, "step": 7397 }, { "epoch": 0.4962269846060972, "grad_norm": 0.1434816171997583, "learning_rate": 2e-05, "loss": 5.3836, "step": 7398 }, { "epoch": 0.49629406043532215, "grad_norm": 0.15329345308163647, "learning_rate": 2e-05, "loss": 5.3469, "step": 7399 }, { "epoch": 0.4963611362645471, "grad_norm": 0.1585917060168978, "learning_rate": 2e-05, "loss": 5.4975, "step": 7400 }, { "epoch": 0.496428212093772, "grad_norm": 0.16137478925880805, "learning_rate": 2e-05, "loss": 5.392, "step": 7401 }, { "epoch": 0.49649528792299696, "grad_norm": 0.15073666678885528, "learning_rate": 2e-05, "loss": 5.3102, "step": 7402 }, { "epoch": 0.4965623637522219, "grad_norm": 0.15669872807964913, "learning_rate": 2e-05, "loss": 5.4322, "step": 7403 }, { "epoch": 0.49662943958144684, "grad_norm": 0.14661242263164462, "learning_rate": 2e-05, "loss": 5.4173, "step": 7404 }, { "epoch": 0.4966965154106718, "grad_norm": 0.15441676435937884, "learning_rate": 2e-05, "loss": 5.4697, "step": 7405 }, { "epoch": 0.4967635912398967, "grad_norm": 0.16071996595779603, "learning_rate": 2e-05, "loss": 5.2499, "step": 7406 }, { "epoch": 0.49683066706912166, "grad_norm": 0.15743580470117136, "learning_rate": 2e-05, "loss": 5.3126, "step": 7407 }, { "epoch": 0.4968977428983466, "grad_norm": 0.15411161835432666, "learning_rate": 2e-05, "loss": 5.4146, "step": 7408 }, { "epoch": 0.49696481872757153, "grad_norm": 0.1689865718608176, "learning_rate": 2e-05, "loss": 5.4926, "step": 7409 }, { "epoch": 0.49703189455679647, "grad_norm": 0.16683067716775685, "learning_rate": 2e-05, "loss": 5.4864, "step": 7410 }, { "epoch": 0.4970989703860214, "grad_norm": 0.15409791750806667, "learning_rate": 2e-05, "loss": 5.2462, "step": 7411 }, { "epoch": 0.49716604621524635, "grad_norm": 0.17872495096452615, "learning_rate": 2e-05, "loss": 5.3524, "step": 7412 }, { "epoch": 0.4972331220444713, "grad_norm": 0.1638047627282164, "learning_rate": 2e-05, "loss": 5.4128, "step": 7413 }, { "epoch": 0.4973001978736962, "grad_norm": 0.14708150480467316, "learning_rate": 2e-05, "loss": 5.4763, "step": 7414 }, { "epoch": 0.49736727370292116, "grad_norm": 0.17311442235230826, "learning_rate": 2e-05, "loss": 5.4357, "step": 7415 }, { "epoch": 0.4974343495321461, "grad_norm": 0.1551489731075624, "learning_rate": 2e-05, "loss": 5.3735, "step": 7416 }, { "epoch": 0.49750142536137104, "grad_norm": 0.1532963874981527, "learning_rate": 2e-05, "loss": 5.2161, "step": 7417 }, { "epoch": 0.497568501190596, "grad_norm": 0.15550569966947356, "learning_rate": 2e-05, "loss": 5.3474, "step": 7418 }, { "epoch": 0.4976355770198209, "grad_norm": 0.151812488530657, "learning_rate": 2e-05, "loss": 5.3328, "step": 7419 }, { "epoch": 0.49770265284904586, "grad_norm": 0.14911730902983716, "learning_rate": 2e-05, "loss": 5.355, "step": 7420 }, { "epoch": 0.4977697286782708, "grad_norm": 0.15374808901290318, "learning_rate": 2e-05, "loss": 5.2593, "step": 7421 }, { "epoch": 0.49783680450749573, "grad_norm": 0.15459340247416756, "learning_rate": 2e-05, "loss": 5.4008, "step": 7422 }, { "epoch": 0.4979038803367207, "grad_norm": 0.14881181808140675, "learning_rate": 2e-05, "loss": 5.4629, "step": 7423 }, { "epoch": 0.4979709561659456, "grad_norm": 0.151215150313599, "learning_rate": 2e-05, "loss": 5.4635, "step": 7424 }, { "epoch": 0.49803803199517055, "grad_norm": 0.15541645019531192, "learning_rate": 2e-05, "loss": 5.278, "step": 7425 }, { "epoch": 0.4981051078243955, "grad_norm": 0.15876429010769288, "learning_rate": 2e-05, "loss": 5.4637, "step": 7426 }, { "epoch": 0.4981721836536204, "grad_norm": 0.15270170925691304, "learning_rate": 2e-05, "loss": 5.3333, "step": 7427 }, { "epoch": 0.49823925948284536, "grad_norm": 0.15207889511382583, "learning_rate": 2e-05, "loss": 5.3556, "step": 7428 }, { "epoch": 0.4983063353120703, "grad_norm": 0.15060122726226408, "learning_rate": 2e-05, "loss": 5.401, "step": 7429 }, { "epoch": 0.49837341114129524, "grad_norm": 0.16688460568266866, "learning_rate": 2e-05, "loss": 5.4604, "step": 7430 }, { "epoch": 0.4984404869705202, "grad_norm": 0.16014342537778195, "learning_rate": 2e-05, "loss": 5.3685, "step": 7431 }, { "epoch": 0.4985075627997451, "grad_norm": 0.14719544350142807, "learning_rate": 2e-05, "loss": 5.4636, "step": 7432 }, { "epoch": 0.49857463862897006, "grad_norm": 0.15738118857293631, "learning_rate": 2e-05, "loss": 5.4745, "step": 7433 }, { "epoch": 0.498641714458195, "grad_norm": 0.16112020196640653, "learning_rate": 2e-05, "loss": 5.5399, "step": 7434 }, { "epoch": 0.49870879028741993, "grad_norm": 0.15067087996804576, "learning_rate": 2e-05, "loss": 5.3715, "step": 7435 }, { "epoch": 0.4987758661166449, "grad_norm": 0.1488473866798747, "learning_rate": 2e-05, "loss": 5.3963, "step": 7436 }, { "epoch": 0.4988429419458698, "grad_norm": 0.1540006975750008, "learning_rate": 2e-05, "loss": 5.4914, "step": 7437 }, { "epoch": 0.49891001777509475, "grad_norm": 0.16073220341683198, "learning_rate": 2e-05, "loss": 5.5453, "step": 7438 }, { "epoch": 0.4989770936043197, "grad_norm": 0.14867142628417684, "learning_rate": 2e-05, "loss": 5.2785, "step": 7439 }, { "epoch": 0.4990441694335446, "grad_norm": 0.15569247685686888, "learning_rate": 2e-05, "loss": 5.2703, "step": 7440 }, { "epoch": 0.49911124526276957, "grad_norm": 0.14694011536293347, "learning_rate": 2e-05, "loss": 5.3905, "step": 7441 }, { "epoch": 0.4991783210919945, "grad_norm": 0.15052813489249597, "learning_rate": 2e-05, "loss": 5.4102, "step": 7442 }, { "epoch": 0.49924539692121944, "grad_norm": 0.1542810183907906, "learning_rate": 2e-05, "loss": 5.5069, "step": 7443 }, { "epoch": 0.4993124727504444, "grad_norm": 0.15042989576473836, "learning_rate": 2e-05, "loss": 5.2159, "step": 7444 }, { "epoch": 0.4993795485796693, "grad_norm": 0.14988137771919247, "learning_rate": 2e-05, "loss": 5.3857, "step": 7445 }, { "epoch": 0.49944662440889426, "grad_norm": 0.15275640032052137, "learning_rate": 2e-05, "loss": 5.5608, "step": 7446 }, { "epoch": 0.4995137002381192, "grad_norm": 0.15126945648402246, "learning_rate": 2e-05, "loss": 5.4024, "step": 7447 }, { "epoch": 0.49958077606734413, "grad_norm": 0.15883782591422005, "learning_rate": 2e-05, "loss": 5.3209, "step": 7448 }, { "epoch": 0.4996478518965691, "grad_norm": 0.15349342931470966, "learning_rate": 2e-05, "loss": 5.5526, "step": 7449 }, { "epoch": 0.499714927725794, "grad_norm": 0.14638013764431043, "learning_rate": 2e-05, "loss": 5.5267, "step": 7450 }, { "epoch": 0.49978200355501895, "grad_norm": 0.15136559821132822, "learning_rate": 2e-05, "loss": 5.3797, "step": 7451 }, { "epoch": 0.4998490793842439, "grad_norm": 0.14998787769067554, "learning_rate": 2e-05, "loss": 5.4178, "step": 7452 }, { "epoch": 0.4999161552134688, "grad_norm": 0.15840799862304758, "learning_rate": 2e-05, "loss": 5.4894, "step": 7453 }, { "epoch": 0.49998323104269377, "grad_norm": 0.14849769526703804, "learning_rate": 2e-05, "loss": 5.4163, "step": 7454 }, { "epoch": 0.5000503068719188, "grad_norm": 0.14468403785693854, "learning_rate": 2e-05, "loss": 5.4748, "step": 7455 }, { "epoch": 0.5001173827011437, "grad_norm": 0.1536513854431842, "learning_rate": 2e-05, "loss": 5.4774, "step": 7456 }, { "epoch": 0.5001844585303686, "grad_norm": 0.1551653800762011, "learning_rate": 2e-05, "loss": 5.5548, "step": 7457 }, { "epoch": 0.5002515343595936, "grad_norm": 0.1460361939763679, "learning_rate": 2e-05, "loss": 5.5239, "step": 7458 }, { "epoch": 0.5003186101888185, "grad_norm": 0.15328549245028047, "learning_rate": 2e-05, "loss": 5.3016, "step": 7459 }, { "epoch": 0.5003856860180435, "grad_norm": 0.15898958587691303, "learning_rate": 2e-05, "loss": 5.4227, "step": 7460 }, { "epoch": 0.5004527618472684, "grad_norm": 0.14853568944815865, "learning_rate": 2e-05, "loss": 5.4812, "step": 7461 }, { "epoch": 0.5005198376764933, "grad_norm": 0.14242239516661745, "learning_rate": 2e-05, "loss": 5.5353, "step": 7462 }, { "epoch": 0.5005869135057183, "grad_norm": 0.15860296455282025, "learning_rate": 2e-05, "loss": 5.3804, "step": 7463 }, { "epoch": 0.5006539893349432, "grad_norm": 0.14894528368643964, "learning_rate": 2e-05, "loss": 5.3532, "step": 7464 }, { "epoch": 0.5007210651641681, "grad_norm": 0.15036953853873208, "learning_rate": 2e-05, "loss": 5.4405, "step": 7465 }, { "epoch": 0.5007881409933931, "grad_norm": 0.1470899224497803, "learning_rate": 2e-05, "loss": 5.5348, "step": 7466 }, { "epoch": 0.500855216822618, "grad_norm": 0.14559703289358347, "learning_rate": 2e-05, "loss": 5.2816, "step": 7467 }, { "epoch": 0.500922292651843, "grad_norm": 0.1474493534721766, "learning_rate": 2e-05, "loss": 5.5469, "step": 7468 }, { "epoch": 0.5009893684810679, "grad_norm": 0.14403289299810737, "learning_rate": 2e-05, "loss": 5.4195, "step": 7469 }, { "epoch": 0.5010564443102928, "grad_norm": 0.14993877040447684, "learning_rate": 2e-05, "loss": 5.4448, "step": 7470 }, { "epoch": 0.5011235201395178, "grad_norm": 0.1581128719959514, "learning_rate": 2e-05, "loss": 5.5002, "step": 7471 }, { "epoch": 0.5011905959687427, "grad_norm": 0.1465733119617993, "learning_rate": 2e-05, "loss": 5.5134, "step": 7472 }, { "epoch": 0.5012576717979677, "grad_norm": 0.14569129751067164, "learning_rate": 2e-05, "loss": 5.4118, "step": 7473 }, { "epoch": 0.5013247476271926, "grad_norm": 0.15336130598634462, "learning_rate": 2e-05, "loss": 5.475, "step": 7474 }, { "epoch": 0.5013918234564175, "grad_norm": 0.15042078315036397, "learning_rate": 2e-05, "loss": 5.2311, "step": 7475 }, { "epoch": 0.5014588992856425, "grad_norm": 0.14840586359556102, "learning_rate": 2e-05, "loss": 5.3188, "step": 7476 }, { "epoch": 0.5015259751148674, "grad_norm": 0.1503979229280777, "learning_rate": 2e-05, "loss": 5.4255, "step": 7477 }, { "epoch": 0.5015930509440923, "grad_norm": 0.14575791892980236, "learning_rate": 2e-05, "loss": 5.43, "step": 7478 }, { "epoch": 0.5016601267733173, "grad_norm": 0.1535817836787605, "learning_rate": 2e-05, "loss": 5.4903, "step": 7479 }, { "epoch": 0.5017272026025422, "grad_norm": 0.15282269498577516, "learning_rate": 2e-05, "loss": 5.4876, "step": 7480 }, { "epoch": 0.5017942784317672, "grad_norm": 0.14605284985272918, "learning_rate": 2e-05, "loss": 5.4032, "step": 7481 }, { "epoch": 0.5018613542609921, "grad_norm": 0.1470085488483259, "learning_rate": 2e-05, "loss": 5.4573, "step": 7482 }, { "epoch": 0.501928430090217, "grad_norm": 0.15016859367679483, "learning_rate": 2e-05, "loss": 5.3463, "step": 7483 }, { "epoch": 0.501995505919442, "grad_norm": 0.15668470595044134, "learning_rate": 2e-05, "loss": 5.4228, "step": 7484 }, { "epoch": 0.5020625817486669, "grad_norm": 0.15199804067890302, "learning_rate": 2e-05, "loss": 5.5068, "step": 7485 }, { "epoch": 0.5021296575778919, "grad_norm": 0.15039841388477043, "learning_rate": 2e-05, "loss": 5.293, "step": 7486 }, { "epoch": 0.5021967334071168, "grad_norm": 0.14749382168170322, "learning_rate": 2e-05, "loss": 5.4478, "step": 7487 }, { "epoch": 0.5022638092363417, "grad_norm": 0.1582346818306294, "learning_rate": 2e-05, "loss": 5.3132, "step": 7488 }, { "epoch": 0.5023308850655667, "grad_norm": 0.1545445294288114, "learning_rate": 2e-05, "loss": 5.4659, "step": 7489 }, { "epoch": 0.5023979608947916, "grad_norm": 0.15415239218859283, "learning_rate": 2e-05, "loss": 5.4686, "step": 7490 }, { "epoch": 0.5024650367240165, "grad_norm": 0.15372309857508143, "learning_rate": 2e-05, "loss": 5.4709, "step": 7491 }, { "epoch": 0.5025321125532415, "grad_norm": 0.14849575550976413, "learning_rate": 2e-05, "loss": 5.4922, "step": 7492 }, { "epoch": 0.5025991883824664, "grad_norm": 0.150202069607838, "learning_rate": 2e-05, "loss": 5.3911, "step": 7493 }, { "epoch": 0.5026662642116914, "grad_norm": 0.15858805114438046, "learning_rate": 2e-05, "loss": 5.421, "step": 7494 }, { "epoch": 0.5027333400409163, "grad_norm": 0.15120216648640586, "learning_rate": 2e-05, "loss": 5.1626, "step": 7495 }, { "epoch": 0.5028004158701412, "grad_norm": 0.16220349942644346, "learning_rate": 2e-05, "loss": 5.4816, "step": 7496 }, { "epoch": 0.5028674916993662, "grad_norm": 0.15301888991750312, "learning_rate": 2e-05, "loss": 5.6546, "step": 7497 }, { "epoch": 0.5029345675285911, "grad_norm": 0.15581420242804653, "learning_rate": 2e-05, "loss": 5.4836, "step": 7498 }, { "epoch": 0.503001643357816, "grad_norm": 0.16409706201197455, "learning_rate": 2e-05, "loss": 5.4034, "step": 7499 }, { "epoch": 0.503068719187041, "grad_norm": 0.16316110661818853, "learning_rate": 2e-05, "loss": 5.5302, "step": 7500 }, { "epoch": 0.5031357950162659, "grad_norm": 0.1552723440105388, "learning_rate": 2e-05, "loss": 5.3801, "step": 7501 }, { "epoch": 0.5032028708454909, "grad_norm": 0.15904496100285218, "learning_rate": 2e-05, "loss": 5.4414, "step": 7502 }, { "epoch": 0.5032699466747158, "grad_norm": 0.16172437947438217, "learning_rate": 2e-05, "loss": 5.3544, "step": 7503 }, { "epoch": 0.5033370225039407, "grad_norm": 0.1555977406966109, "learning_rate": 2e-05, "loss": 5.565, "step": 7504 }, { "epoch": 0.5034040983331657, "grad_norm": 0.1512855758698087, "learning_rate": 2e-05, "loss": 5.6012, "step": 7505 }, { "epoch": 0.5034711741623906, "grad_norm": 0.16741341175430866, "learning_rate": 2e-05, "loss": 5.329, "step": 7506 }, { "epoch": 0.5035382499916156, "grad_norm": 0.15421272991232857, "learning_rate": 2e-05, "loss": 5.4362, "step": 7507 }, { "epoch": 0.5036053258208405, "grad_norm": 0.14945417798386607, "learning_rate": 2e-05, "loss": 5.3949, "step": 7508 }, { "epoch": 0.5036724016500654, "grad_norm": 0.15499159571218507, "learning_rate": 2e-05, "loss": 5.4673, "step": 7509 }, { "epoch": 0.5037394774792904, "grad_norm": 0.16135046871618616, "learning_rate": 2e-05, "loss": 5.491, "step": 7510 }, { "epoch": 0.5038065533085153, "grad_norm": 0.1442685783308196, "learning_rate": 2e-05, "loss": 5.5971, "step": 7511 }, { "epoch": 0.5038736291377403, "grad_norm": 0.1549940509401805, "learning_rate": 2e-05, "loss": 5.4764, "step": 7512 }, { "epoch": 0.5039407049669652, "grad_norm": 0.15443209251802545, "learning_rate": 2e-05, "loss": 5.494, "step": 7513 }, { "epoch": 0.5040077807961901, "grad_norm": 0.15769478747414042, "learning_rate": 2e-05, "loss": 5.3916, "step": 7514 }, { "epoch": 0.5040748566254151, "grad_norm": 0.14724824822733562, "learning_rate": 2e-05, "loss": 5.3798, "step": 7515 }, { "epoch": 0.50414193245464, "grad_norm": 0.16200403724468118, "learning_rate": 2e-05, "loss": 5.3543, "step": 7516 }, { "epoch": 0.504209008283865, "grad_norm": 0.15966422013384904, "learning_rate": 2e-05, "loss": 5.3369, "step": 7517 }, { "epoch": 0.5042760841130899, "grad_norm": 0.15860087424768224, "learning_rate": 2e-05, "loss": 5.3875, "step": 7518 }, { "epoch": 0.5043431599423148, "grad_norm": 0.1701809595720194, "learning_rate": 2e-05, "loss": 5.5964, "step": 7519 }, { "epoch": 0.5044102357715398, "grad_norm": 0.14543982872551395, "learning_rate": 2e-05, "loss": 5.3797, "step": 7520 }, { "epoch": 0.5044773116007647, "grad_norm": 0.15402973294431732, "learning_rate": 2e-05, "loss": 5.3252, "step": 7521 }, { "epoch": 0.5045443874299896, "grad_norm": 0.15622226430320704, "learning_rate": 2e-05, "loss": 5.4391, "step": 7522 }, { "epoch": 0.5046114632592146, "grad_norm": 0.15622885906127595, "learning_rate": 2e-05, "loss": 5.4647, "step": 7523 }, { "epoch": 0.5046785390884395, "grad_norm": 0.1488878580951184, "learning_rate": 2e-05, "loss": 5.4351, "step": 7524 }, { "epoch": 0.5047456149176645, "grad_norm": 0.14865825809103014, "learning_rate": 2e-05, "loss": 5.3247, "step": 7525 }, { "epoch": 0.5048126907468894, "grad_norm": 0.1524148310653133, "learning_rate": 2e-05, "loss": 5.3722, "step": 7526 }, { "epoch": 0.5048797665761143, "grad_norm": 0.15064548386176987, "learning_rate": 2e-05, "loss": 5.4812, "step": 7527 }, { "epoch": 0.5049468424053393, "grad_norm": 0.15336677885681033, "learning_rate": 2e-05, "loss": 5.4941, "step": 7528 }, { "epoch": 0.5050139182345642, "grad_norm": 0.15571494998708832, "learning_rate": 2e-05, "loss": 5.3418, "step": 7529 }, { "epoch": 0.5050809940637891, "grad_norm": 0.14579573042145175, "learning_rate": 2e-05, "loss": 5.392, "step": 7530 }, { "epoch": 0.5051480698930141, "grad_norm": 0.15597170315336029, "learning_rate": 2e-05, "loss": 5.3774, "step": 7531 }, { "epoch": 0.505215145722239, "grad_norm": 0.15051703713472628, "learning_rate": 2e-05, "loss": 5.4618, "step": 7532 }, { "epoch": 0.505282221551464, "grad_norm": 0.15581982642043402, "learning_rate": 2e-05, "loss": 5.4734, "step": 7533 }, { "epoch": 0.5053492973806889, "grad_norm": 0.15255052382456633, "learning_rate": 2e-05, "loss": 5.3646, "step": 7534 }, { "epoch": 0.5054163732099138, "grad_norm": 0.15561757272734764, "learning_rate": 2e-05, "loss": 5.4812, "step": 7535 }, { "epoch": 0.5054834490391388, "grad_norm": 0.14595237775738276, "learning_rate": 2e-05, "loss": 5.5249, "step": 7536 }, { "epoch": 0.5055505248683637, "grad_norm": 0.15246835065024367, "learning_rate": 2e-05, "loss": 5.4411, "step": 7537 }, { "epoch": 0.5056176006975887, "grad_norm": 0.15462230404000918, "learning_rate": 2e-05, "loss": 5.4805, "step": 7538 }, { "epoch": 0.5056846765268136, "grad_norm": 0.14925308527166486, "learning_rate": 2e-05, "loss": 5.5144, "step": 7539 }, { "epoch": 0.5057517523560385, "grad_norm": 0.1477493951387361, "learning_rate": 2e-05, "loss": 5.4629, "step": 7540 }, { "epoch": 0.5058188281852635, "grad_norm": 0.15520885657615124, "learning_rate": 2e-05, "loss": 5.4415, "step": 7541 }, { "epoch": 0.5058859040144884, "grad_norm": 0.14238976743383475, "learning_rate": 2e-05, "loss": 5.3297, "step": 7542 }, { "epoch": 0.5059529798437133, "grad_norm": 0.15294774764066438, "learning_rate": 2e-05, "loss": 5.4485, "step": 7543 }, { "epoch": 0.5060200556729383, "grad_norm": 0.14647035411921716, "learning_rate": 2e-05, "loss": 5.4496, "step": 7544 }, { "epoch": 0.5060871315021632, "grad_norm": 0.1609507220392526, "learning_rate": 2e-05, "loss": 5.3018, "step": 7545 }, { "epoch": 0.5061542073313882, "grad_norm": 0.1540920904053237, "learning_rate": 2e-05, "loss": 5.3038, "step": 7546 }, { "epoch": 0.5062212831606131, "grad_norm": 0.14572142642548347, "learning_rate": 2e-05, "loss": 5.3856, "step": 7547 }, { "epoch": 0.506288358989838, "grad_norm": 0.15462267685004072, "learning_rate": 2e-05, "loss": 5.4626, "step": 7548 }, { "epoch": 0.506355434819063, "grad_norm": 0.15287491033591236, "learning_rate": 2e-05, "loss": 5.3556, "step": 7549 }, { "epoch": 0.5064225106482879, "grad_norm": 0.14631982073646543, "learning_rate": 2e-05, "loss": 5.3563, "step": 7550 }, { "epoch": 0.5064895864775129, "grad_norm": 0.14938858592745075, "learning_rate": 2e-05, "loss": 5.3565, "step": 7551 }, { "epoch": 0.5065566623067378, "grad_norm": 0.14804948860152078, "learning_rate": 2e-05, "loss": 5.3713, "step": 7552 }, { "epoch": 0.5066237381359627, "grad_norm": 0.14608953982602568, "learning_rate": 2e-05, "loss": 5.4406, "step": 7553 }, { "epoch": 0.5066908139651877, "grad_norm": 0.14860303200834343, "learning_rate": 2e-05, "loss": 5.4153, "step": 7554 }, { "epoch": 0.5067578897944126, "grad_norm": 0.14746463124460668, "learning_rate": 2e-05, "loss": 5.5013, "step": 7555 }, { "epoch": 0.5068249656236375, "grad_norm": 0.14792704423918232, "learning_rate": 2e-05, "loss": 5.3648, "step": 7556 }, { "epoch": 0.5068920414528625, "grad_norm": 0.1571752402921104, "learning_rate": 2e-05, "loss": 5.4511, "step": 7557 }, { "epoch": 0.5069591172820874, "grad_norm": 0.1517817598756393, "learning_rate": 2e-05, "loss": 5.3742, "step": 7558 }, { "epoch": 0.5070261931113124, "grad_norm": 0.148922437795159, "learning_rate": 2e-05, "loss": 5.4108, "step": 7559 }, { "epoch": 0.5070932689405373, "grad_norm": 0.14973462643150195, "learning_rate": 2e-05, "loss": 5.4401, "step": 7560 }, { "epoch": 0.5071603447697622, "grad_norm": 0.16104454047550829, "learning_rate": 2e-05, "loss": 5.5939, "step": 7561 }, { "epoch": 0.5072274205989872, "grad_norm": 0.14717330435305756, "learning_rate": 2e-05, "loss": 5.3391, "step": 7562 }, { "epoch": 0.5072944964282121, "grad_norm": 0.14919223523818573, "learning_rate": 2e-05, "loss": 5.3493, "step": 7563 }, { "epoch": 0.507361572257437, "grad_norm": 0.15406037726872496, "learning_rate": 2e-05, "loss": 5.3623, "step": 7564 }, { "epoch": 0.507428648086662, "grad_norm": 0.15006822839063239, "learning_rate": 2e-05, "loss": 5.4125, "step": 7565 }, { "epoch": 0.5074957239158869, "grad_norm": 0.14956077846074434, "learning_rate": 2e-05, "loss": 5.5132, "step": 7566 }, { "epoch": 0.5075627997451119, "grad_norm": 0.15181966359698698, "learning_rate": 2e-05, "loss": 5.178, "step": 7567 }, { "epoch": 0.5076298755743368, "grad_norm": 0.14718386882615855, "learning_rate": 2e-05, "loss": 5.4282, "step": 7568 }, { "epoch": 0.5076969514035617, "grad_norm": 0.14563893946392228, "learning_rate": 2e-05, "loss": 5.2596, "step": 7569 }, { "epoch": 0.5077640272327867, "grad_norm": 0.15052146440477016, "learning_rate": 2e-05, "loss": 5.4976, "step": 7570 }, { "epoch": 0.5078311030620116, "grad_norm": 0.14938538793459796, "learning_rate": 2e-05, "loss": 5.3431, "step": 7571 }, { "epoch": 0.5078981788912366, "grad_norm": 0.147062637690144, "learning_rate": 2e-05, "loss": 5.386, "step": 7572 }, { "epoch": 0.5079652547204615, "grad_norm": 0.15014932738276432, "learning_rate": 2e-05, "loss": 5.3869, "step": 7573 }, { "epoch": 0.5080323305496864, "grad_norm": 0.1489978517965839, "learning_rate": 2e-05, "loss": 5.4018, "step": 7574 }, { "epoch": 0.5080994063789114, "grad_norm": 0.15063499453367937, "learning_rate": 2e-05, "loss": 5.4362, "step": 7575 }, { "epoch": 0.5081664822081363, "grad_norm": 0.150408440855287, "learning_rate": 2e-05, "loss": 5.465, "step": 7576 }, { "epoch": 0.5082335580373613, "grad_norm": 0.15103741755638933, "learning_rate": 2e-05, "loss": 5.527, "step": 7577 }, { "epoch": 0.5083006338665862, "grad_norm": 0.1509799651660148, "learning_rate": 2e-05, "loss": 5.2888, "step": 7578 }, { "epoch": 0.5083677096958111, "grad_norm": 0.14840615309117128, "learning_rate": 2e-05, "loss": 5.4704, "step": 7579 }, { "epoch": 0.5084347855250361, "grad_norm": 0.15018406027837014, "learning_rate": 2e-05, "loss": 5.3081, "step": 7580 }, { "epoch": 0.508501861354261, "grad_norm": 0.15923753262360887, "learning_rate": 2e-05, "loss": 5.4035, "step": 7581 }, { "epoch": 0.508568937183486, "grad_norm": 0.1456558606257999, "learning_rate": 2e-05, "loss": 5.4297, "step": 7582 }, { "epoch": 0.5086360130127109, "grad_norm": 0.14965769461585673, "learning_rate": 2e-05, "loss": 5.4812, "step": 7583 }, { "epoch": 0.5087030888419358, "grad_norm": 0.15422933604087044, "learning_rate": 2e-05, "loss": 5.4153, "step": 7584 }, { "epoch": 0.5087701646711608, "grad_norm": 0.14910146634428328, "learning_rate": 2e-05, "loss": 5.3739, "step": 7585 }, { "epoch": 0.5088372405003857, "grad_norm": 0.1443853942229689, "learning_rate": 2e-05, "loss": 5.4342, "step": 7586 }, { "epoch": 0.5089043163296106, "grad_norm": 0.15334256271399696, "learning_rate": 2e-05, "loss": 5.3126, "step": 7587 }, { "epoch": 0.5089713921588356, "grad_norm": 0.15072423909399402, "learning_rate": 2e-05, "loss": 5.4223, "step": 7588 }, { "epoch": 0.5090384679880605, "grad_norm": 0.14727678453373363, "learning_rate": 2e-05, "loss": 5.5347, "step": 7589 }, { "epoch": 0.5091055438172855, "grad_norm": 0.15863148233431099, "learning_rate": 2e-05, "loss": 5.357, "step": 7590 }, { "epoch": 0.5091726196465104, "grad_norm": 0.15373381307326667, "learning_rate": 2e-05, "loss": 5.5135, "step": 7591 }, { "epoch": 0.5092396954757353, "grad_norm": 0.13931670169627325, "learning_rate": 2e-05, "loss": 5.439, "step": 7592 }, { "epoch": 0.5093067713049603, "grad_norm": 0.15801862930125396, "learning_rate": 2e-05, "loss": 5.4241, "step": 7593 }, { "epoch": 0.5093738471341852, "grad_norm": 0.1573108988707798, "learning_rate": 2e-05, "loss": 5.4273, "step": 7594 }, { "epoch": 0.5094409229634101, "grad_norm": 0.1453394338526487, "learning_rate": 2e-05, "loss": 5.4832, "step": 7595 }, { "epoch": 0.5095079987926351, "grad_norm": 0.14730489405289945, "learning_rate": 2e-05, "loss": 5.4851, "step": 7596 }, { "epoch": 0.50957507462186, "grad_norm": 0.15397280382726442, "learning_rate": 2e-05, "loss": 5.4605, "step": 7597 }, { "epoch": 0.509642150451085, "grad_norm": 0.15546870645047167, "learning_rate": 2e-05, "loss": 5.4355, "step": 7598 }, { "epoch": 0.5097092262803099, "grad_norm": 0.1484884084723075, "learning_rate": 2e-05, "loss": 5.3735, "step": 7599 }, { "epoch": 0.5097763021095348, "grad_norm": 0.1514570436352918, "learning_rate": 2e-05, "loss": 5.5365, "step": 7600 }, { "epoch": 0.5098433779387598, "grad_norm": 0.153990255441471, "learning_rate": 2e-05, "loss": 5.5479, "step": 7601 }, { "epoch": 0.5099104537679847, "grad_norm": 0.15788832680558837, "learning_rate": 2e-05, "loss": 5.3309, "step": 7602 }, { "epoch": 0.5099775295972097, "grad_norm": 0.157094327313736, "learning_rate": 2e-05, "loss": 5.4606, "step": 7603 }, { "epoch": 0.5100446054264346, "grad_norm": 0.15307942020264406, "learning_rate": 2e-05, "loss": 5.3572, "step": 7604 }, { "epoch": 0.5101116812556595, "grad_norm": 0.15079700323832285, "learning_rate": 2e-05, "loss": 5.42, "step": 7605 }, { "epoch": 0.5101787570848845, "grad_norm": 0.15662299469034038, "learning_rate": 2e-05, "loss": 5.4682, "step": 7606 }, { "epoch": 0.5102458329141094, "grad_norm": 0.15609398035392588, "learning_rate": 2e-05, "loss": 5.3695, "step": 7607 }, { "epoch": 0.5103129087433343, "grad_norm": 0.1492633766170121, "learning_rate": 2e-05, "loss": 5.353, "step": 7608 }, { "epoch": 0.5103799845725593, "grad_norm": 0.14941435342987225, "learning_rate": 2e-05, "loss": 5.4519, "step": 7609 }, { "epoch": 0.5104470604017842, "grad_norm": 0.15620502254308494, "learning_rate": 2e-05, "loss": 5.4684, "step": 7610 }, { "epoch": 0.5105141362310092, "grad_norm": 0.14499461845251094, "learning_rate": 2e-05, "loss": 5.4602, "step": 7611 }, { "epoch": 0.5105812120602341, "grad_norm": 0.14766362521951215, "learning_rate": 2e-05, "loss": 5.4673, "step": 7612 }, { "epoch": 0.510648287889459, "grad_norm": 0.14472576882281735, "learning_rate": 2e-05, "loss": 5.5275, "step": 7613 }, { "epoch": 0.510715363718684, "grad_norm": 0.14912541925591422, "learning_rate": 2e-05, "loss": 5.3324, "step": 7614 }, { "epoch": 0.5107824395479089, "grad_norm": 0.15234222343508735, "learning_rate": 2e-05, "loss": 5.4509, "step": 7615 }, { "epoch": 0.5108495153771339, "grad_norm": 0.14868414212318373, "learning_rate": 2e-05, "loss": 5.5894, "step": 7616 }, { "epoch": 0.5109165912063588, "grad_norm": 0.15085819596684, "learning_rate": 2e-05, "loss": 5.3947, "step": 7617 }, { "epoch": 0.5109836670355837, "grad_norm": 0.14476936667735474, "learning_rate": 2e-05, "loss": 5.399, "step": 7618 }, { "epoch": 0.5110507428648087, "grad_norm": 0.14401685248645452, "learning_rate": 2e-05, "loss": 5.5535, "step": 7619 }, { "epoch": 0.5111178186940336, "grad_norm": 0.15172274667696406, "learning_rate": 2e-05, "loss": 5.4994, "step": 7620 }, { "epoch": 0.5111848945232585, "grad_norm": 0.1591335148530309, "learning_rate": 2e-05, "loss": 5.4699, "step": 7621 }, { "epoch": 0.5112519703524835, "grad_norm": 0.15656667069772207, "learning_rate": 2e-05, "loss": 5.396, "step": 7622 }, { "epoch": 0.5113190461817084, "grad_norm": 0.15326980473018242, "learning_rate": 2e-05, "loss": 5.4964, "step": 7623 }, { "epoch": 0.5113861220109334, "grad_norm": 0.14989742764568398, "learning_rate": 2e-05, "loss": 5.4836, "step": 7624 }, { "epoch": 0.5114531978401583, "grad_norm": 0.15302909877947926, "learning_rate": 2e-05, "loss": 5.4993, "step": 7625 }, { "epoch": 0.5115202736693832, "grad_norm": 0.14476993387232606, "learning_rate": 2e-05, "loss": 5.3947, "step": 7626 }, { "epoch": 0.5115873494986082, "grad_norm": 0.14607021395160572, "learning_rate": 2e-05, "loss": 5.4959, "step": 7627 }, { "epoch": 0.5116544253278331, "grad_norm": 0.15997244055005225, "learning_rate": 2e-05, "loss": 5.3844, "step": 7628 }, { "epoch": 0.5117215011570581, "grad_norm": 0.15881026810411028, "learning_rate": 2e-05, "loss": 5.5858, "step": 7629 }, { "epoch": 0.511788576986283, "grad_norm": 0.15695121400969286, "learning_rate": 2e-05, "loss": 5.4645, "step": 7630 }, { "epoch": 0.5118556528155079, "grad_norm": 0.15887182533019192, "learning_rate": 2e-05, "loss": 5.4359, "step": 7631 }, { "epoch": 0.5119227286447329, "grad_norm": 0.15475040428560927, "learning_rate": 2e-05, "loss": 5.4574, "step": 7632 }, { "epoch": 0.5119898044739578, "grad_norm": 0.15442883795291307, "learning_rate": 2e-05, "loss": 5.4706, "step": 7633 }, { "epoch": 0.5120568803031827, "grad_norm": 0.15162997438110584, "learning_rate": 2e-05, "loss": 5.4955, "step": 7634 }, { "epoch": 0.5121239561324077, "grad_norm": 0.15573136519131445, "learning_rate": 2e-05, "loss": 5.3714, "step": 7635 }, { "epoch": 0.5121910319616326, "grad_norm": 0.14705516620108924, "learning_rate": 2e-05, "loss": 5.5971, "step": 7636 }, { "epoch": 0.5122581077908576, "grad_norm": 0.1437975587992663, "learning_rate": 2e-05, "loss": 5.4374, "step": 7637 }, { "epoch": 0.5123251836200825, "grad_norm": 0.1507593343983091, "learning_rate": 2e-05, "loss": 5.439, "step": 7638 }, { "epoch": 0.5123922594493074, "grad_norm": 0.15213452640145486, "learning_rate": 2e-05, "loss": 5.4371, "step": 7639 }, { "epoch": 0.5124593352785324, "grad_norm": 0.15975794290191753, "learning_rate": 2e-05, "loss": 5.3974, "step": 7640 }, { "epoch": 0.5125264111077573, "grad_norm": 0.15000335106080795, "learning_rate": 2e-05, "loss": 5.438, "step": 7641 }, { "epoch": 0.5125934869369823, "grad_norm": 0.15284599122181805, "learning_rate": 2e-05, "loss": 5.4216, "step": 7642 }, { "epoch": 0.5126605627662072, "grad_norm": 0.16316873210352706, "learning_rate": 2e-05, "loss": 5.46, "step": 7643 }, { "epoch": 0.5127276385954321, "grad_norm": 0.15250168852958268, "learning_rate": 2e-05, "loss": 5.3392, "step": 7644 }, { "epoch": 0.5127947144246571, "grad_norm": 0.14562764627007063, "learning_rate": 2e-05, "loss": 5.3206, "step": 7645 }, { "epoch": 0.512861790253882, "grad_norm": 0.1479518019326918, "learning_rate": 2e-05, "loss": 5.5282, "step": 7646 }, { "epoch": 0.512928866083107, "grad_norm": 0.16029993146307445, "learning_rate": 2e-05, "loss": 5.4764, "step": 7647 }, { "epoch": 0.5129959419123319, "grad_norm": 0.15639745502381192, "learning_rate": 2e-05, "loss": 5.3985, "step": 7648 }, { "epoch": 0.5130630177415568, "grad_norm": 0.14423703904796029, "learning_rate": 2e-05, "loss": 5.4677, "step": 7649 }, { "epoch": 0.5131300935707818, "grad_norm": 0.14629728231502157, "learning_rate": 2e-05, "loss": 5.3565, "step": 7650 }, { "epoch": 0.5131971694000067, "grad_norm": 0.1658535509047625, "learning_rate": 2e-05, "loss": 5.4935, "step": 7651 }, { "epoch": 0.5132642452292316, "grad_norm": 0.1481675659466687, "learning_rate": 2e-05, "loss": 5.4921, "step": 7652 }, { "epoch": 0.5133313210584566, "grad_norm": 0.15892548494603626, "learning_rate": 2e-05, "loss": 5.3836, "step": 7653 }, { "epoch": 0.5133983968876815, "grad_norm": 0.16006703389268298, "learning_rate": 2e-05, "loss": 5.527, "step": 7654 }, { "epoch": 0.5134654727169065, "grad_norm": 0.16533057819683486, "learning_rate": 2e-05, "loss": 5.4979, "step": 7655 }, { "epoch": 0.5135325485461314, "grad_norm": 0.15021866358211147, "learning_rate": 2e-05, "loss": 5.4679, "step": 7656 }, { "epoch": 0.5135996243753563, "grad_norm": 0.1608442447319529, "learning_rate": 2e-05, "loss": 5.5141, "step": 7657 }, { "epoch": 0.5136667002045813, "grad_norm": 0.16017250648408266, "learning_rate": 2e-05, "loss": 5.3705, "step": 7658 }, { "epoch": 0.5137337760338062, "grad_norm": 0.1615898978727251, "learning_rate": 2e-05, "loss": 5.3462, "step": 7659 }, { "epoch": 0.5138008518630311, "grad_norm": 0.15245707512707762, "learning_rate": 2e-05, "loss": 5.2335, "step": 7660 }, { "epoch": 0.5138679276922561, "grad_norm": 0.150608780898398, "learning_rate": 2e-05, "loss": 5.3518, "step": 7661 }, { "epoch": 0.513935003521481, "grad_norm": 0.16658344348129966, "learning_rate": 2e-05, "loss": 5.4298, "step": 7662 }, { "epoch": 0.514002079350706, "grad_norm": 0.15414837865642642, "learning_rate": 2e-05, "loss": 5.3994, "step": 7663 }, { "epoch": 0.5140691551799309, "grad_norm": 0.14710594458071233, "learning_rate": 2e-05, "loss": 5.6021, "step": 7664 }, { "epoch": 0.5141362310091558, "grad_norm": 0.15883633776531575, "learning_rate": 2e-05, "loss": 5.4396, "step": 7665 }, { "epoch": 0.5142033068383808, "grad_norm": 0.15656448728474712, "learning_rate": 2e-05, "loss": 5.3889, "step": 7666 }, { "epoch": 0.5142703826676057, "grad_norm": 0.15708723653061363, "learning_rate": 2e-05, "loss": 5.4385, "step": 7667 }, { "epoch": 0.5143374584968307, "grad_norm": 0.1512681668550003, "learning_rate": 2e-05, "loss": 5.4363, "step": 7668 }, { "epoch": 0.5144045343260556, "grad_norm": 0.15001780911602486, "learning_rate": 2e-05, "loss": 5.4518, "step": 7669 }, { "epoch": 0.5144716101552805, "grad_norm": 0.1526575649139081, "learning_rate": 2e-05, "loss": 5.4889, "step": 7670 }, { "epoch": 0.5145386859845055, "grad_norm": 0.15287851987678633, "learning_rate": 2e-05, "loss": 5.442, "step": 7671 }, { "epoch": 0.5146057618137304, "grad_norm": 0.15589549987610213, "learning_rate": 2e-05, "loss": 5.3592, "step": 7672 }, { "epoch": 0.5146728376429553, "grad_norm": 0.14883967822712496, "learning_rate": 2e-05, "loss": 5.4647, "step": 7673 }, { "epoch": 0.5147399134721803, "grad_norm": 0.14671914412309597, "learning_rate": 2e-05, "loss": 5.3792, "step": 7674 }, { "epoch": 0.5148069893014052, "grad_norm": 0.14921673064995838, "learning_rate": 2e-05, "loss": 5.4495, "step": 7675 }, { "epoch": 0.5148740651306302, "grad_norm": 0.16230092916474514, "learning_rate": 2e-05, "loss": 5.5898, "step": 7676 }, { "epoch": 0.5149411409598551, "grad_norm": 0.15442752652597477, "learning_rate": 2e-05, "loss": 5.431, "step": 7677 }, { "epoch": 0.51500821678908, "grad_norm": 0.15378506819838372, "learning_rate": 2e-05, "loss": 5.3752, "step": 7678 }, { "epoch": 0.515075292618305, "grad_norm": 0.1558073644226669, "learning_rate": 2e-05, "loss": 5.4768, "step": 7679 }, { "epoch": 0.5151423684475299, "grad_norm": 0.1583105554294859, "learning_rate": 2e-05, "loss": 5.398, "step": 7680 }, { "epoch": 0.5152094442767549, "grad_norm": 0.14661098547558882, "learning_rate": 2e-05, "loss": 5.4678, "step": 7681 }, { "epoch": 0.5152765201059798, "grad_norm": 0.17733575805153223, "learning_rate": 2e-05, "loss": 5.4574, "step": 7682 }, { "epoch": 0.5153435959352047, "grad_norm": 0.1600050222764656, "learning_rate": 2e-05, "loss": 5.341, "step": 7683 }, { "epoch": 0.5154106717644297, "grad_norm": 0.1549602610877884, "learning_rate": 2e-05, "loss": 5.3858, "step": 7684 }, { "epoch": 0.5154777475936546, "grad_norm": 0.15267494414411276, "learning_rate": 2e-05, "loss": 5.4555, "step": 7685 }, { "epoch": 0.5155448234228796, "grad_norm": 0.16480339005872374, "learning_rate": 2e-05, "loss": 5.3944, "step": 7686 }, { "epoch": 0.5156118992521045, "grad_norm": 0.1536399577533238, "learning_rate": 2e-05, "loss": 5.3053, "step": 7687 }, { "epoch": 0.5156789750813294, "grad_norm": 0.15476920740158034, "learning_rate": 2e-05, "loss": 5.3604, "step": 7688 }, { "epoch": 0.5157460509105544, "grad_norm": 0.15908825798849247, "learning_rate": 2e-05, "loss": 5.3364, "step": 7689 }, { "epoch": 0.5158131267397793, "grad_norm": 0.14820788959885473, "learning_rate": 2e-05, "loss": 5.4232, "step": 7690 }, { "epoch": 0.5158802025690042, "grad_norm": 0.1469596227520547, "learning_rate": 2e-05, "loss": 5.3922, "step": 7691 }, { "epoch": 0.5159472783982292, "grad_norm": 0.1499449168157889, "learning_rate": 2e-05, "loss": 5.3436, "step": 7692 }, { "epoch": 0.5160143542274541, "grad_norm": 0.15300477427799156, "learning_rate": 2e-05, "loss": 5.5045, "step": 7693 }, { "epoch": 0.5160814300566791, "grad_norm": 0.1448400497149715, "learning_rate": 2e-05, "loss": 5.5308, "step": 7694 }, { "epoch": 0.516148505885904, "grad_norm": 0.14829417926863836, "learning_rate": 2e-05, "loss": 5.4247, "step": 7695 }, { "epoch": 0.5162155817151289, "grad_norm": 0.15171919349912233, "learning_rate": 2e-05, "loss": 5.4337, "step": 7696 }, { "epoch": 0.5162826575443539, "grad_norm": 0.14647916072400471, "learning_rate": 2e-05, "loss": 5.3832, "step": 7697 }, { "epoch": 0.5163497333735788, "grad_norm": 0.14662961808995686, "learning_rate": 2e-05, "loss": 5.5266, "step": 7698 }, { "epoch": 0.5164168092028038, "grad_norm": 0.14818305159158227, "learning_rate": 2e-05, "loss": 5.4611, "step": 7699 }, { "epoch": 0.5164838850320287, "grad_norm": 0.1472703916531397, "learning_rate": 2e-05, "loss": 5.4866, "step": 7700 }, { "epoch": 0.5165509608612536, "grad_norm": 0.1498648294507365, "learning_rate": 2e-05, "loss": 5.3916, "step": 7701 }, { "epoch": 0.5166180366904786, "grad_norm": 0.1547913262560126, "learning_rate": 2e-05, "loss": 5.3999, "step": 7702 }, { "epoch": 0.5166851125197035, "grad_norm": 0.14985115160033355, "learning_rate": 2e-05, "loss": 5.4727, "step": 7703 }, { "epoch": 0.5167521883489284, "grad_norm": 0.1491213591920621, "learning_rate": 2e-05, "loss": 5.5009, "step": 7704 }, { "epoch": 0.5168192641781534, "grad_norm": 0.1484014863360101, "learning_rate": 2e-05, "loss": 5.4409, "step": 7705 }, { "epoch": 0.5168863400073783, "grad_norm": 0.14767020121203944, "learning_rate": 2e-05, "loss": 5.3215, "step": 7706 }, { "epoch": 0.5169534158366033, "grad_norm": 0.14523954830326963, "learning_rate": 2e-05, "loss": 5.3752, "step": 7707 }, { "epoch": 0.5170204916658282, "grad_norm": 0.1480729836567034, "learning_rate": 2e-05, "loss": 5.3981, "step": 7708 }, { "epoch": 0.5170875674950531, "grad_norm": 0.14965903400382483, "learning_rate": 2e-05, "loss": 5.4133, "step": 7709 }, { "epoch": 0.5171546433242781, "grad_norm": 0.15254793415495618, "learning_rate": 2e-05, "loss": 5.5497, "step": 7710 }, { "epoch": 0.517221719153503, "grad_norm": 0.1458417599610557, "learning_rate": 2e-05, "loss": 5.4829, "step": 7711 }, { "epoch": 0.517288794982728, "grad_norm": 0.15053496708476682, "learning_rate": 2e-05, "loss": 5.3102, "step": 7712 }, { "epoch": 0.5173558708119529, "grad_norm": 0.14679675421741548, "learning_rate": 2e-05, "loss": 5.3301, "step": 7713 }, { "epoch": 0.5174229466411778, "grad_norm": 0.15303785797929498, "learning_rate": 2e-05, "loss": 5.4063, "step": 7714 }, { "epoch": 0.5174900224704028, "grad_norm": 0.15522401878999745, "learning_rate": 2e-05, "loss": 5.4346, "step": 7715 }, { "epoch": 0.5175570982996277, "grad_norm": 0.14778025186612223, "learning_rate": 2e-05, "loss": 5.4235, "step": 7716 }, { "epoch": 0.5176241741288526, "grad_norm": 0.15256367397035484, "learning_rate": 2e-05, "loss": 5.507, "step": 7717 }, { "epoch": 0.5176912499580776, "grad_norm": 0.15352111225142825, "learning_rate": 2e-05, "loss": 5.3384, "step": 7718 }, { "epoch": 0.5177583257873025, "grad_norm": 0.1459458002368136, "learning_rate": 2e-05, "loss": 5.3999, "step": 7719 }, { "epoch": 0.5178254016165275, "grad_norm": 0.15463651723491362, "learning_rate": 2e-05, "loss": 5.4925, "step": 7720 }, { "epoch": 0.5178924774457524, "grad_norm": 0.1519103050633396, "learning_rate": 2e-05, "loss": 5.4338, "step": 7721 }, { "epoch": 0.5179595532749773, "grad_norm": 0.1518184449778245, "learning_rate": 2e-05, "loss": 5.4893, "step": 7722 }, { "epoch": 0.5180266291042023, "grad_norm": 0.1512378277351227, "learning_rate": 2e-05, "loss": 5.7071, "step": 7723 }, { "epoch": 0.5180937049334272, "grad_norm": 0.15384512355507957, "learning_rate": 2e-05, "loss": 5.4179, "step": 7724 }, { "epoch": 0.5181607807626522, "grad_norm": 0.15415309905112679, "learning_rate": 2e-05, "loss": 5.4563, "step": 7725 }, { "epoch": 0.5182278565918771, "grad_norm": 0.1600368102916811, "learning_rate": 2e-05, "loss": 5.385, "step": 7726 }, { "epoch": 0.518294932421102, "grad_norm": 0.16303720932024757, "learning_rate": 2e-05, "loss": 5.3924, "step": 7727 }, { "epoch": 0.518362008250327, "grad_norm": 0.15420793331311367, "learning_rate": 2e-05, "loss": 5.27, "step": 7728 }, { "epoch": 0.5184290840795519, "grad_norm": 0.15500285062120248, "learning_rate": 2e-05, "loss": 5.355, "step": 7729 }, { "epoch": 0.5184961599087768, "grad_norm": 0.1635744722453185, "learning_rate": 2e-05, "loss": 5.4298, "step": 7730 }, { "epoch": 0.5185632357380018, "grad_norm": 0.1471330943568756, "learning_rate": 2e-05, "loss": 5.4967, "step": 7731 }, { "epoch": 0.5186303115672267, "grad_norm": 0.15240709052636142, "learning_rate": 2e-05, "loss": 5.4287, "step": 7732 }, { "epoch": 0.5186973873964517, "grad_norm": 0.15720918772652734, "learning_rate": 2e-05, "loss": 5.3122, "step": 7733 }, { "epoch": 0.5187644632256766, "grad_norm": 0.14925517069121788, "learning_rate": 2e-05, "loss": 5.5154, "step": 7734 }, { "epoch": 0.5188315390549015, "grad_norm": 0.1501882984802569, "learning_rate": 2e-05, "loss": 5.463, "step": 7735 }, { "epoch": 0.5188986148841265, "grad_norm": 0.15364358821597138, "learning_rate": 2e-05, "loss": 5.3612, "step": 7736 }, { "epoch": 0.5189656907133514, "grad_norm": 0.1557392986372127, "learning_rate": 2e-05, "loss": 5.3272, "step": 7737 }, { "epoch": 0.5190327665425764, "grad_norm": 0.15570753754171898, "learning_rate": 2e-05, "loss": 5.5156, "step": 7738 }, { "epoch": 0.5190998423718013, "grad_norm": 0.16017658011749175, "learning_rate": 2e-05, "loss": 5.6207, "step": 7739 }, { "epoch": 0.5191669182010262, "grad_norm": 0.1549384950662759, "learning_rate": 2e-05, "loss": 5.4334, "step": 7740 }, { "epoch": 0.5192339940302512, "grad_norm": 0.1501984805434172, "learning_rate": 2e-05, "loss": 5.3577, "step": 7741 }, { "epoch": 0.5193010698594761, "grad_norm": 0.1502579822025191, "learning_rate": 2e-05, "loss": 5.39, "step": 7742 }, { "epoch": 0.519368145688701, "grad_norm": 0.15477423334600743, "learning_rate": 2e-05, "loss": 5.4909, "step": 7743 }, { "epoch": 0.519435221517926, "grad_norm": 0.15785626158768254, "learning_rate": 2e-05, "loss": 5.4976, "step": 7744 }, { "epoch": 0.5195022973471509, "grad_norm": 0.1487412840523818, "learning_rate": 2e-05, "loss": 5.3023, "step": 7745 }, { "epoch": 0.5195693731763759, "grad_norm": 0.14653341245293147, "learning_rate": 2e-05, "loss": 5.5266, "step": 7746 }, { "epoch": 0.5196364490056008, "grad_norm": 0.147534331267081, "learning_rate": 2e-05, "loss": 5.4406, "step": 7747 }, { "epoch": 0.5197035248348257, "grad_norm": 0.14573432825398613, "learning_rate": 2e-05, "loss": 5.4381, "step": 7748 }, { "epoch": 0.5197706006640507, "grad_norm": 0.1538001515832809, "learning_rate": 2e-05, "loss": 5.4002, "step": 7749 }, { "epoch": 0.5198376764932756, "grad_norm": 0.14984352418119395, "learning_rate": 2e-05, "loss": 5.4749, "step": 7750 }, { "epoch": 0.5199047523225006, "grad_norm": 0.1522305471893058, "learning_rate": 2e-05, "loss": 5.4602, "step": 7751 }, { "epoch": 0.5199718281517255, "grad_norm": 0.15407570889706987, "learning_rate": 2e-05, "loss": 5.4964, "step": 7752 }, { "epoch": 0.5200389039809504, "grad_norm": 0.15100129091019632, "learning_rate": 2e-05, "loss": 5.3494, "step": 7753 }, { "epoch": 0.5201059798101754, "grad_norm": 0.15459092976610744, "learning_rate": 2e-05, "loss": 5.4935, "step": 7754 }, { "epoch": 0.5201730556394003, "grad_norm": 0.1500051508247163, "learning_rate": 2e-05, "loss": 5.5554, "step": 7755 }, { "epoch": 0.5202401314686252, "grad_norm": 0.15526610859586082, "learning_rate": 2e-05, "loss": 5.3313, "step": 7756 }, { "epoch": 0.5203072072978502, "grad_norm": 0.15499508480194826, "learning_rate": 2e-05, "loss": 5.5955, "step": 7757 }, { "epoch": 0.5203742831270751, "grad_norm": 0.15130242337698946, "learning_rate": 2e-05, "loss": 5.2889, "step": 7758 }, { "epoch": 0.5204413589563001, "grad_norm": 0.15830696765801408, "learning_rate": 2e-05, "loss": 5.2442, "step": 7759 }, { "epoch": 0.520508434785525, "grad_norm": 0.1572949744548908, "learning_rate": 2e-05, "loss": 5.338, "step": 7760 }, { "epoch": 0.5205755106147499, "grad_norm": 0.1459604654815613, "learning_rate": 2e-05, "loss": 5.329, "step": 7761 }, { "epoch": 0.5206425864439749, "grad_norm": 0.1572204425976535, "learning_rate": 2e-05, "loss": 5.5209, "step": 7762 }, { "epoch": 0.5207096622731998, "grad_norm": 0.15645183130435852, "learning_rate": 2e-05, "loss": 5.4577, "step": 7763 }, { "epoch": 0.5207767381024248, "grad_norm": 0.16117027429622713, "learning_rate": 2e-05, "loss": 5.3928, "step": 7764 }, { "epoch": 0.5208438139316497, "grad_norm": 0.16002656280473487, "learning_rate": 2e-05, "loss": 5.3289, "step": 7765 }, { "epoch": 0.5209108897608746, "grad_norm": 0.14938308454589772, "learning_rate": 2e-05, "loss": 5.3241, "step": 7766 }, { "epoch": 0.5209779655900996, "grad_norm": 0.16377694854659905, "learning_rate": 2e-05, "loss": 5.4073, "step": 7767 }, { "epoch": 0.5210450414193245, "grad_norm": 0.15809894600669008, "learning_rate": 2e-05, "loss": 5.423, "step": 7768 }, { "epoch": 0.5211121172485494, "grad_norm": 0.15365266085238227, "learning_rate": 2e-05, "loss": 5.3012, "step": 7769 }, { "epoch": 0.5211791930777744, "grad_norm": 0.16448464538097085, "learning_rate": 2e-05, "loss": 5.3253, "step": 7770 }, { "epoch": 0.5212462689069993, "grad_norm": 0.1409923326631212, "learning_rate": 2e-05, "loss": 5.452, "step": 7771 }, { "epoch": 0.5213133447362243, "grad_norm": 0.1579726933985827, "learning_rate": 2e-05, "loss": 5.4354, "step": 7772 }, { "epoch": 0.5213804205654492, "grad_norm": 0.16555858419571556, "learning_rate": 2e-05, "loss": 5.3472, "step": 7773 }, { "epoch": 0.5214474963946741, "grad_norm": 0.1459448136360117, "learning_rate": 2e-05, "loss": 5.4907, "step": 7774 }, { "epoch": 0.5215145722238991, "grad_norm": 0.15188370354923134, "learning_rate": 2e-05, "loss": 5.4607, "step": 7775 }, { "epoch": 0.521581648053124, "grad_norm": 0.15212481234973688, "learning_rate": 2e-05, "loss": 5.4669, "step": 7776 }, { "epoch": 0.521648723882349, "grad_norm": 0.15097703167170445, "learning_rate": 2e-05, "loss": 5.3932, "step": 7777 }, { "epoch": 0.5217157997115739, "grad_norm": 0.1543832973542512, "learning_rate": 2e-05, "loss": 5.4101, "step": 7778 }, { "epoch": 0.5217828755407988, "grad_norm": 0.148657251230234, "learning_rate": 2e-05, "loss": 5.4027, "step": 7779 }, { "epoch": 0.5218499513700238, "grad_norm": 0.1653826438530842, "learning_rate": 2e-05, "loss": 5.5198, "step": 7780 }, { "epoch": 0.5219170271992487, "grad_norm": 0.15310882108095947, "learning_rate": 2e-05, "loss": 5.4086, "step": 7781 }, { "epoch": 0.5219841030284736, "grad_norm": 0.1498807652890514, "learning_rate": 2e-05, "loss": 5.395, "step": 7782 }, { "epoch": 0.5220511788576986, "grad_norm": 0.16374002547562255, "learning_rate": 2e-05, "loss": 5.3071, "step": 7783 }, { "epoch": 0.5221182546869235, "grad_norm": 0.15395650423817206, "learning_rate": 2e-05, "loss": 5.6125, "step": 7784 }, { "epoch": 0.5221853305161485, "grad_norm": 0.15217238720462195, "learning_rate": 2e-05, "loss": 5.498, "step": 7785 }, { "epoch": 0.5222524063453734, "grad_norm": 0.1534732159214014, "learning_rate": 2e-05, "loss": 5.5392, "step": 7786 }, { "epoch": 0.5223194821745983, "grad_norm": 0.15098889329075169, "learning_rate": 2e-05, "loss": 5.3852, "step": 7787 }, { "epoch": 0.5223865580038233, "grad_norm": 0.15377524795660452, "learning_rate": 2e-05, "loss": 5.5268, "step": 7788 }, { "epoch": 0.5224536338330482, "grad_norm": 0.15364221847653528, "learning_rate": 2e-05, "loss": 5.5734, "step": 7789 }, { "epoch": 0.5225207096622732, "grad_norm": 0.1497946149788449, "learning_rate": 2e-05, "loss": 5.3909, "step": 7790 }, { "epoch": 0.5225877854914981, "grad_norm": 0.15364734674130867, "learning_rate": 2e-05, "loss": 5.4377, "step": 7791 }, { "epoch": 0.522654861320723, "grad_norm": 0.1531406917025649, "learning_rate": 2e-05, "loss": 5.5191, "step": 7792 }, { "epoch": 0.522721937149948, "grad_norm": 0.15186789005735263, "learning_rate": 2e-05, "loss": 5.3441, "step": 7793 }, { "epoch": 0.5227890129791729, "grad_norm": 0.15951888120385435, "learning_rate": 2e-05, "loss": 5.474, "step": 7794 }, { "epoch": 0.5228560888083978, "grad_norm": 0.16031197151125873, "learning_rate": 2e-05, "loss": 5.4297, "step": 7795 }, { "epoch": 0.5229231646376228, "grad_norm": 0.15132770087623668, "learning_rate": 2e-05, "loss": 5.282, "step": 7796 }, { "epoch": 0.5229902404668477, "grad_norm": 0.15789909655824738, "learning_rate": 2e-05, "loss": 5.3604, "step": 7797 }, { "epoch": 0.5230573162960727, "grad_norm": 0.15463590774857486, "learning_rate": 2e-05, "loss": 5.4004, "step": 7798 }, { "epoch": 0.5231243921252976, "grad_norm": 0.15486518902818117, "learning_rate": 2e-05, "loss": 5.3898, "step": 7799 }, { "epoch": 0.5231914679545225, "grad_norm": 0.15597035418653515, "learning_rate": 2e-05, "loss": 5.5954, "step": 7800 }, { "epoch": 0.5232585437837475, "grad_norm": 0.1603367239589493, "learning_rate": 2e-05, "loss": 5.4429, "step": 7801 }, { "epoch": 0.5233256196129724, "grad_norm": 0.1529500392340027, "learning_rate": 2e-05, "loss": 5.4102, "step": 7802 }, { "epoch": 0.5233926954421974, "grad_norm": 0.1558738934671131, "learning_rate": 2e-05, "loss": 5.5235, "step": 7803 }, { "epoch": 0.5234597712714223, "grad_norm": 0.15152993729348724, "learning_rate": 2e-05, "loss": 5.4196, "step": 7804 }, { "epoch": 0.5235268471006472, "grad_norm": 0.1564671248754073, "learning_rate": 2e-05, "loss": 5.3462, "step": 7805 }, { "epoch": 0.5235939229298722, "grad_norm": 0.15485769699665825, "learning_rate": 2e-05, "loss": 5.4917, "step": 7806 }, { "epoch": 0.5236609987590971, "grad_norm": 0.14599405527663023, "learning_rate": 2e-05, "loss": 5.3993, "step": 7807 }, { "epoch": 0.523728074588322, "grad_norm": 0.15647151540812226, "learning_rate": 2e-05, "loss": 5.3653, "step": 7808 }, { "epoch": 0.523795150417547, "grad_norm": 0.16256560821975638, "learning_rate": 2e-05, "loss": 5.3639, "step": 7809 }, { "epoch": 0.5238622262467719, "grad_norm": 0.1553898096004137, "learning_rate": 2e-05, "loss": 5.4629, "step": 7810 }, { "epoch": 0.5239293020759969, "grad_norm": 0.1543514317210756, "learning_rate": 2e-05, "loss": 5.6014, "step": 7811 }, { "epoch": 0.5239963779052218, "grad_norm": 0.15074802944389154, "learning_rate": 2e-05, "loss": 5.5521, "step": 7812 }, { "epoch": 0.5240634537344467, "grad_norm": 0.1603864159794326, "learning_rate": 2e-05, "loss": 5.6569, "step": 7813 }, { "epoch": 0.5241305295636717, "grad_norm": 0.1681061152296203, "learning_rate": 2e-05, "loss": 5.5419, "step": 7814 }, { "epoch": 0.5241976053928966, "grad_norm": 0.1522040028856795, "learning_rate": 2e-05, "loss": 5.2686, "step": 7815 }, { "epoch": 0.5242646812221216, "grad_norm": 0.16210137011498088, "learning_rate": 2e-05, "loss": 5.4298, "step": 7816 }, { "epoch": 0.5243317570513465, "grad_norm": 0.15739068549212606, "learning_rate": 2e-05, "loss": 5.2828, "step": 7817 }, { "epoch": 0.5243988328805715, "grad_norm": 0.1508840476567197, "learning_rate": 2e-05, "loss": 5.3579, "step": 7818 }, { "epoch": 0.5244659087097965, "grad_norm": 0.16056766936238379, "learning_rate": 2e-05, "loss": 5.5143, "step": 7819 }, { "epoch": 0.5245329845390214, "grad_norm": 0.16517815226555924, "learning_rate": 2e-05, "loss": 5.3724, "step": 7820 }, { "epoch": 0.5246000603682464, "grad_norm": 0.1445178206225716, "learning_rate": 2e-05, "loss": 5.4414, "step": 7821 }, { "epoch": 0.5246671361974713, "grad_norm": 0.16146435331071488, "learning_rate": 2e-05, "loss": 5.3663, "step": 7822 }, { "epoch": 0.5247342120266962, "grad_norm": 0.16237203551192098, "learning_rate": 2e-05, "loss": 5.3787, "step": 7823 }, { "epoch": 0.5248012878559212, "grad_norm": 0.15259733943546275, "learning_rate": 2e-05, "loss": 5.3969, "step": 7824 }, { "epoch": 0.5248683636851461, "grad_norm": 0.16035610205891926, "learning_rate": 2e-05, "loss": 5.4286, "step": 7825 }, { "epoch": 0.524935439514371, "grad_norm": 0.1588089922542072, "learning_rate": 2e-05, "loss": 5.5123, "step": 7826 }, { "epoch": 0.525002515343596, "grad_norm": 0.15189237402575362, "learning_rate": 2e-05, "loss": 5.477, "step": 7827 }, { "epoch": 0.5250695911728209, "grad_norm": 0.1504514240318182, "learning_rate": 2e-05, "loss": 5.4887, "step": 7828 }, { "epoch": 0.5251366670020459, "grad_norm": 0.16249341479688614, "learning_rate": 2e-05, "loss": 5.3911, "step": 7829 }, { "epoch": 0.5252037428312708, "grad_norm": 0.15485748258399187, "learning_rate": 2e-05, "loss": 5.3825, "step": 7830 }, { "epoch": 0.5252708186604957, "grad_norm": 0.14396389116924976, "learning_rate": 2e-05, "loss": 5.4722, "step": 7831 }, { "epoch": 0.5253378944897207, "grad_norm": 0.1513086466726961, "learning_rate": 2e-05, "loss": 5.4683, "step": 7832 }, { "epoch": 0.5254049703189456, "grad_norm": 0.16275975148802158, "learning_rate": 2e-05, "loss": 5.4511, "step": 7833 }, { "epoch": 0.5254720461481706, "grad_norm": 0.15906142427706754, "learning_rate": 2e-05, "loss": 5.4092, "step": 7834 }, { "epoch": 0.5255391219773955, "grad_norm": 0.1483855660360458, "learning_rate": 2e-05, "loss": 5.4536, "step": 7835 }, { "epoch": 0.5256061978066204, "grad_norm": 0.1576068884336193, "learning_rate": 2e-05, "loss": 5.2484, "step": 7836 }, { "epoch": 0.5256732736358454, "grad_norm": 0.1446482493243385, "learning_rate": 2e-05, "loss": 5.4381, "step": 7837 }, { "epoch": 0.5257403494650703, "grad_norm": 0.15831069105958231, "learning_rate": 2e-05, "loss": 5.3721, "step": 7838 }, { "epoch": 0.5258074252942952, "grad_norm": 0.15505023263710777, "learning_rate": 2e-05, "loss": 5.494, "step": 7839 }, { "epoch": 0.5258745011235202, "grad_norm": 0.15494663192980804, "learning_rate": 2e-05, "loss": 5.4239, "step": 7840 }, { "epoch": 0.5259415769527451, "grad_norm": 0.14975795730330585, "learning_rate": 2e-05, "loss": 5.4684, "step": 7841 }, { "epoch": 0.5260086527819701, "grad_norm": 0.14949036987480338, "learning_rate": 2e-05, "loss": 5.494, "step": 7842 }, { "epoch": 0.526075728611195, "grad_norm": 0.16413491952728157, "learning_rate": 2e-05, "loss": 5.3754, "step": 7843 }, { "epoch": 0.5261428044404199, "grad_norm": 0.1578570545198146, "learning_rate": 2e-05, "loss": 5.4968, "step": 7844 }, { "epoch": 0.5262098802696449, "grad_norm": 0.14967031446786255, "learning_rate": 2e-05, "loss": 5.4014, "step": 7845 }, { "epoch": 0.5262769560988698, "grad_norm": 0.15354209293757426, "learning_rate": 2e-05, "loss": 5.5115, "step": 7846 }, { "epoch": 0.5263440319280948, "grad_norm": 0.14481298752104105, "learning_rate": 2e-05, "loss": 5.3982, "step": 7847 }, { "epoch": 0.5264111077573197, "grad_norm": 0.15274358620122883, "learning_rate": 2e-05, "loss": 5.5467, "step": 7848 }, { "epoch": 0.5264781835865446, "grad_norm": 0.15948715434912336, "learning_rate": 2e-05, "loss": 5.536, "step": 7849 }, { "epoch": 0.5265452594157696, "grad_norm": 0.1522068348628448, "learning_rate": 2e-05, "loss": 5.46, "step": 7850 }, { "epoch": 0.5266123352449945, "grad_norm": 0.15791434620011074, "learning_rate": 2e-05, "loss": 5.5366, "step": 7851 }, { "epoch": 0.5266794110742195, "grad_norm": 0.15160395307427085, "learning_rate": 2e-05, "loss": 5.5165, "step": 7852 }, { "epoch": 0.5267464869034444, "grad_norm": 0.14822012691440492, "learning_rate": 2e-05, "loss": 5.4997, "step": 7853 }, { "epoch": 0.5268135627326693, "grad_norm": 0.15351617555129007, "learning_rate": 2e-05, "loss": 5.3879, "step": 7854 }, { "epoch": 0.5268806385618943, "grad_norm": 0.14680986794717935, "learning_rate": 2e-05, "loss": 5.4987, "step": 7855 }, { "epoch": 0.5269477143911192, "grad_norm": 0.1493705038981016, "learning_rate": 2e-05, "loss": 5.3971, "step": 7856 }, { "epoch": 0.5270147902203441, "grad_norm": 0.15961843699136083, "learning_rate": 2e-05, "loss": 5.342, "step": 7857 }, { "epoch": 0.5270818660495691, "grad_norm": 0.14693991215459498, "learning_rate": 2e-05, "loss": 5.3445, "step": 7858 }, { "epoch": 0.527148941878794, "grad_norm": 0.15188486447124286, "learning_rate": 2e-05, "loss": 5.341, "step": 7859 }, { "epoch": 0.527216017708019, "grad_norm": 0.15070780808172662, "learning_rate": 2e-05, "loss": 5.4796, "step": 7860 }, { "epoch": 0.5272830935372439, "grad_norm": 0.15119726761021757, "learning_rate": 2e-05, "loss": 5.4415, "step": 7861 }, { "epoch": 0.5273501693664688, "grad_norm": 0.15157601749676133, "learning_rate": 2e-05, "loss": 5.3832, "step": 7862 }, { "epoch": 0.5274172451956938, "grad_norm": 0.15086121524210236, "learning_rate": 2e-05, "loss": 5.4054, "step": 7863 }, { "epoch": 0.5274843210249187, "grad_norm": 0.15145609431685852, "learning_rate": 2e-05, "loss": 5.3714, "step": 7864 }, { "epoch": 0.5275513968541437, "grad_norm": 0.15160043171071863, "learning_rate": 2e-05, "loss": 5.4426, "step": 7865 }, { "epoch": 0.5276184726833686, "grad_norm": 0.1529645795378329, "learning_rate": 2e-05, "loss": 5.4927, "step": 7866 }, { "epoch": 0.5276855485125935, "grad_norm": 0.1576090606745968, "learning_rate": 2e-05, "loss": 5.4699, "step": 7867 }, { "epoch": 0.5277526243418185, "grad_norm": 0.15059675719104398, "learning_rate": 2e-05, "loss": 5.4771, "step": 7868 }, { "epoch": 0.5278197001710434, "grad_norm": 0.1511953023826405, "learning_rate": 2e-05, "loss": 5.4957, "step": 7869 }, { "epoch": 0.5278867760002683, "grad_norm": 0.14667731669488052, "learning_rate": 2e-05, "loss": 5.4542, "step": 7870 }, { "epoch": 0.5279538518294933, "grad_norm": 0.15309711871119253, "learning_rate": 2e-05, "loss": 5.4049, "step": 7871 }, { "epoch": 0.5280209276587182, "grad_norm": 0.15075148633909893, "learning_rate": 2e-05, "loss": 5.3789, "step": 7872 }, { "epoch": 0.5280880034879432, "grad_norm": 0.14575499640162426, "learning_rate": 2e-05, "loss": 5.2921, "step": 7873 }, { "epoch": 0.5281550793171681, "grad_norm": 0.1449291803627925, "learning_rate": 2e-05, "loss": 5.4261, "step": 7874 }, { "epoch": 0.528222155146393, "grad_norm": 0.1503161622343501, "learning_rate": 2e-05, "loss": 5.2234, "step": 7875 }, { "epoch": 0.528289230975618, "grad_norm": 0.18182115584865982, "learning_rate": 2e-05, "loss": 5.4705, "step": 7876 }, { "epoch": 0.5283563068048429, "grad_norm": 0.14779429329085458, "learning_rate": 2e-05, "loss": 5.5049, "step": 7877 }, { "epoch": 0.5284233826340679, "grad_norm": 0.15235973760347493, "learning_rate": 2e-05, "loss": 5.35, "step": 7878 }, { "epoch": 0.5284904584632928, "grad_norm": 0.15513366196432363, "learning_rate": 2e-05, "loss": 5.4311, "step": 7879 }, { "epoch": 0.5285575342925177, "grad_norm": 0.14097231600881868, "learning_rate": 2e-05, "loss": 5.3838, "step": 7880 }, { "epoch": 0.5286246101217427, "grad_norm": 0.14594959992687043, "learning_rate": 2e-05, "loss": 5.3846, "step": 7881 }, { "epoch": 0.5286916859509676, "grad_norm": 0.14643193819553307, "learning_rate": 2e-05, "loss": 5.3655, "step": 7882 }, { "epoch": 0.5287587617801925, "grad_norm": 0.15135195501404244, "learning_rate": 2e-05, "loss": 5.3771, "step": 7883 }, { "epoch": 0.5288258376094175, "grad_norm": 0.14861402876769497, "learning_rate": 2e-05, "loss": 5.3818, "step": 7884 }, { "epoch": 0.5288929134386424, "grad_norm": 0.14422475675919114, "learning_rate": 2e-05, "loss": 5.4842, "step": 7885 }, { "epoch": 0.5289599892678674, "grad_norm": 0.14215302437467428, "learning_rate": 2e-05, "loss": 5.3428, "step": 7886 }, { "epoch": 0.5290270650970923, "grad_norm": 0.15540526075359407, "learning_rate": 2e-05, "loss": 5.3806, "step": 7887 }, { "epoch": 0.5290941409263172, "grad_norm": 0.14790738700693545, "learning_rate": 2e-05, "loss": 5.4384, "step": 7888 }, { "epoch": 0.5291612167555422, "grad_norm": 0.14849144180636384, "learning_rate": 2e-05, "loss": 5.4674, "step": 7889 }, { "epoch": 0.5292282925847671, "grad_norm": 0.14968086101398353, "learning_rate": 2e-05, "loss": 5.4107, "step": 7890 }, { "epoch": 0.529295368413992, "grad_norm": 0.15276096303040979, "learning_rate": 2e-05, "loss": 5.5651, "step": 7891 }, { "epoch": 0.529362444243217, "grad_norm": 0.14441667432730676, "learning_rate": 2e-05, "loss": 5.3955, "step": 7892 }, { "epoch": 0.5294295200724419, "grad_norm": 0.14429171024104792, "learning_rate": 2e-05, "loss": 5.5665, "step": 7893 }, { "epoch": 0.5294965959016669, "grad_norm": 0.148871768656105, "learning_rate": 2e-05, "loss": 5.3817, "step": 7894 }, { "epoch": 0.5295636717308918, "grad_norm": 0.15680387886423344, "learning_rate": 2e-05, "loss": 5.5562, "step": 7895 }, { "epoch": 0.5296307475601167, "grad_norm": 0.14604122239184, "learning_rate": 2e-05, "loss": 5.4688, "step": 7896 }, { "epoch": 0.5296978233893417, "grad_norm": 0.15080588247782387, "learning_rate": 2e-05, "loss": 5.4653, "step": 7897 }, { "epoch": 0.5297648992185666, "grad_norm": 0.15086532791092067, "learning_rate": 2e-05, "loss": 5.5307, "step": 7898 }, { "epoch": 0.5298319750477916, "grad_norm": 0.1537087554713722, "learning_rate": 2e-05, "loss": 5.4841, "step": 7899 }, { "epoch": 0.5298990508770165, "grad_norm": 0.1459453669440902, "learning_rate": 2e-05, "loss": 5.3264, "step": 7900 }, { "epoch": 0.5299661267062414, "grad_norm": 0.15186682814365923, "learning_rate": 2e-05, "loss": 5.4044, "step": 7901 }, { "epoch": 0.5300332025354664, "grad_norm": 0.1569843275812385, "learning_rate": 2e-05, "loss": 5.4647, "step": 7902 }, { "epoch": 0.5301002783646913, "grad_norm": 0.14704962098228108, "learning_rate": 2e-05, "loss": 5.4357, "step": 7903 }, { "epoch": 0.5301673541939163, "grad_norm": 0.14897570927334883, "learning_rate": 2e-05, "loss": 5.4429, "step": 7904 }, { "epoch": 0.5302344300231412, "grad_norm": 0.14606767718283856, "learning_rate": 2e-05, "loss": 5.4161, "step": 7905 }, { "epoch": 0.5303015058523661, "grad_norm": 0.15336883031280332, "learning_rate": 2e-05, "loss": 5.3738, "step": 7906 }, { "epoch": 0.5303685816815911, "grad_norm": 0.14508427375589303, "learning_rate": 2e-05, "loss": 5.496, "step": 7907 }, { "epoch": 0.530435657510816, "grad_norm": 0.15645292003232347, "learning_rate": 2e-05, "loss": 5.4349, "step": 7908 }, { "epoch": 0.5305027333400409, "grad_norm": 0.14693338303129982, "learning_rate": 2e-05, "loss": 5.4305, "step": 7909 }, { "epoch": 0.5305698091692659, "grad_norm": 0.14494568217681658, "learning_rate": 2e-05, "loss": 5.3446, "step": 7910 }, { "epoch": 0.5306368849984908, "grad_norm": 0.14231065744974158, "learning_rate": 2e-05, "loss": 5.2931, "step": 7911 }, { "epoch": 0.5307039608277158, "grad_norm": 0.1457340402389788, "learning_rate": 2e-05, "loss": 5.462, "step": 7912 }, { "epoch": 0.5307710366569407, "grad_norm": 0.1496568332198432, "learning_rate": 2e-05, "loss": 5.3345, "step": 7913 }, { "epoch": 0.5308381124861656, "grad_norm": 0.14589791965052368, "learning_rate": 2e-05, "loss": 5.3382, "step": 7914 }, { "epoch": 0.5309051883153906, "grad_norm": 0.15249542826758267, "learning_rate": 2e-05, "loss": 5.3835, "step": 7915 }, { "epoch": 0.5309722641446155, "grad_norm": 0.14900659779162892, "learning_rate": 2e-05, "loss": 5.4844, "step": 7916 }, { "epoch": 0.5310393399738405, "grad_norm": 0.14548110990228677, "learning_rate": 2e-05, "loss": 5.2137, "step": 7917 }, { "epoch": 0.5311064158030654, "grad_norm": 0.14990601093818076, "learning_rate": 2e-05, "loss": 5.431, "step": 7918 }, { "epoch": 0.5311734916322903, "grad_norm": 0.15077531304582845, "learning_rate": 2e-05, "loss": 5.4468, "step": 7919 }, { "epoch": 0.5312405674615153, "grad_norm": 0.15174763827801516, "learning_rate": 2e-05, "loss": 5.5039, "step": 7920 }, { "epoch": 0.5313076432907402, "grad_norm": 0.14949591091211567, "learning_rate": 2e-05, "loss": 5.4551, "step": 7921 }, { "epoch": 0.5313747191199651, "grad_norm": 0.1497589192643047, "learning_rate": 2e-05, "loss": 5.4879, "step": 7922 }, { "epoch": 0.5314417949491901, "grad_norm": 0.14945421183043242, "learning_rate": 2e-05, "loss": 5.5259, "step": 7923 }, { "epoch": 0.531508870778415, "grad_norm": 0.14569434092195951, "learning_rate": 2e-05, "loss": 5.5597, "step": 7924 }, { "epoch": 0.53157594660764, "grad_norm": 0.14633826309716944, "learning_rate": 2e-05, "loss": 5.3578, "step": 7925 }, { "epoch": 0.5316430224368649, "grad_norm": 0.14333339774900059, "learning_rate": 2e-05, "loss": 5.5712, "step": 7926 }, { "epoch": 0.5317100982660898, "grad_norm": 0.14677909236818223, "learning_rate": 2e-05, "loss": 5.5141, "step": 7927 }, { "epoch": 0.5317771740953148, "grad_norm": 0.15542752014304007, "learning_rate": 2e-05, "loss": 5.4687, "step": 7928 }, { "epoch": 0.5318442499245397, "grad_norm": 0.15321877455537508, "learning_rate": 2e-05, "loss": 5.3525, "step": 7929 }, { "epoch": 0.5319113257537647, "grad_norm": 0.1477680450953325, "learning_rate": 2e-05, "loss": 5.4177, "step": 7930 }, { "epoch": 0.5319784015829896, "grad_norm": 0.15434788277623696, "learning_rate": 2e-05, "loss": 5.4034, "step": 7931 }, { "epoch": 0.5320454774122145, "grad_norm": 0.14692984470464854, "learning_rate": 2e-05, "loss": 5.4179, "step": 7932 }, { "epoch": 0.5321125532414395, "grad_norm": 0.1455853991491499, "learning_rate": 2e-05, "loss": 5.4851, "step": 7933 }, { "epoch": 0.5321796290706644, "grad_norm": 0.14982438689837918, "learning_rate": 2e-05, "loss": 5.3759, "step": 7934 }, { "epoch": 0.5322467048998893, "grad_norm": 0.15452908302703156, "learning_rate": 2e-05, "loss": 5.5032, "step": 7935 }, { "epoch": 0.5323137807291143, "grad_norm": 0.152943379022605, "learning_rate": 2e-05, "loss": 5.4191, "step": 7936 }, { "epoch": 0.5323808565583392, "grad_norm": 0.15695355288606078, "learning_rate": 2e-05, "loss": 5.3668, "step": 7937 }, { "epoch": 0.5324479323875642, "grad_norm": 0.14812575917556958, "learning_rate": 2e-05, "loss": 5.4514, "step": 7938 }, { "epoch": 0.5325150082167891, "grad_norm": 0.14460321811420837, "learning_rate": 2e-05, "loss": 5.3153, "step": 7939 }, { "epoch": 0.532582084046014, "grad_norm": 0.1449861846669442, "learning_rate": 2e-05, "loss": 5.4862, "step": 7940 }, { "epoch": 0.532649159875239, "grad_norm": 0.1548270145744908, "learning_rate": 2e-05, "loss": 5.4111, "step": 7941 }, { "epoch": 0.5327162357044639, "grad_norm": 0.15336700886252208, "learning_rate": 2e-05, "loss": 5.4583, "step": 7942 }, { "epoch": 0.5327833115336889, "grad_norm": 0.14994074587122827, "learning_rate": 2e-05, "loss": 5.5246, "step": 7943 }, { "epoch": 0.5328503873629138, "grad_norm": 0.15556537339921953, "learning_rate": 2e-05, "loss": 5.3805, "step": 7944 }, { "epoch": 0.5329174631921387, "grad_norm": 0.15206201415128698, "learning_rate": 2e-05, "loss": 5.4867, "step": 7945 }, { "epoch": 0.5329845390213637, "grad_norm": 0.15308613157511577, "learning_rate": 2e-05, "loss": 5.4693, "step": 7946 }, { "epoch": 0.5330516148505886, "grad_norm": 0.1571447933618705, "learning_rate": 2e-05, "loss": 5.4948, "step": 7947 }, { "epoch": 0.5331186906798135, "grad_norm": 0.15011956928288958, "learning_rate": 2e-05, "loss": 5.4854, "step": 7948 }, { "epoch": 0.5331857665090385, "grad_norm": 0.1511146839762173, "learning_rate": 2e-05, "loss": 5.3504, "step": 7949 }, { "epoch": 0.5332528423382634, "grad_norm": 0.1545162558795858, "learning_rate": 2e-05, "loss": 5.4493, "step": 7950 }, { "epoch": 0.5333199181674884, "grad_norm": 0.14829154322495153, "learning_rate": 2e-05, "loss": 5.3984, "step": 7951 }, { "epoch": 0.5333869939967133, "grad_norm": 0.15413931895175936, "learning_rate": 2e-05, "loss": 5.3753, "step": 7952 }, { "epoch": 0.5334540698259382, "grad_norm": 0.1568340631863365, "learning_rate": 2e-05, "loss": 5.5781, "step": 7953 }, { "epoch": 0.5335211456551632, "grad_norm": 0.14581521179803322, "learning_rate": 2e-05, "loss": 5.3851, "step": 7954 }, { "epoch": 0.5335882214843881, "grad_norm": 0.15187226082732014, "learning_rate": 2e-05, "loss": 5.2566, "step": 7955 }, { "epoch": 0.533655297313613, "grad_norm": 0.18073211922856136, "learning_rate": 2e-05, "loss": 5.4412, "step": 7956 }, { "epoch": 0.533722373142838, "grad_norm": 0.1528307214483082, "learning_rate": 2e-05, "loss": 5.3595, "step": 7957 }, { "epoch": 0.5337894489720629, "grad_norm": 0.1577671427810294, "learning_rate": 2e-05, "loss": 5.481, "step": 7958 }, { "epoch": 0.5338565248012879, "grad_norm": 0.15924944435553243, "learning_rate": 2e-05, "loss": 5.5028, "step": 7959 }, { "epoch": 0.5339236006305128, "grad_norm": 0.16123419308038286, "learning_rate": 2e-05, "loss": 5.4124, "step": 7960 }, { "epoch": 0.5339906764597377, "grad_norm": 0.16212535270988446, "learning_rate": 2e-05, "loss": 5.5024, "step": 7961 }, { "epoch": 0.5340577522889627, "grad_norm": 0.15736833132201752, "learning_rate": 2e-05, "loss": 5.5497, "step": 7962 }, { "epoch": 0.5341248281181876, "grad_norm": 0.1614228533723106, "learning_rate": 2e-05, "loss": 5.335, "step": 7963 }, { "epoch": 0.5341919039474126, "grad_norm": 0.14980678229321498, "learning_rate": 2e-05, "loss": 5.392, "step": 7964 }, { "epoch": 0.5342589797766375, "grad_norm": 0.15116800141862827, "learning_rate": 2e-05, "loss": 5.4161, "step": 7965 }, { "epoch": 0.5343260556058624, "grad_norm": 0.16126262730468194, "learning_rate": 2e-05, "loss": 5.3035, "step": 7966 }, { "epoch": 0.5343931314350874, "grad_norm": 0.15889674802627435, "learning_rate": 2e-05, "loss": 5.4406, "step": 7967 }, { "epoch": 0.5344602072643123, "grad_norm": 0.1565712383787536, "learning_rate": 2e-05, "loss": 5.552, "step": 7968 }, { "epoch": 0.5345272830935373, "grad_norm": 0.1642500182203417, "learning_rate": 2e-05, "loss": 5.3089, "step": 7969 }, { "epoch": 0.5345943589227622, "grad_norm": 0.15493214848103143, "learning_rate": 2e-05, "loss": 5.3969, "step": 7970 }, { "epoch": 0.5346614347519871, "grad_norm": 0.15692139701810226, "learning_rate": 2e-05, "loss": 5.4865, "step": 7971 }, { "epoch": 0.5347285105812121, "grad_norm": 0.15563480188553255, "learning_rate": 2e-05, "loss": 5.3802, "step": 7972 }, { "epoch": 0.534795586410437, "grad_norm": 0.157492833845838, "learning_rate": 2e-05, "loss": 5.4341, "step": 7973 }, { "epoch": 0.534862662239662, "grad_norm": 0.1525544679990963, "learning_rate": 2e-05, "loss": 5.3433, "step": 7974 }, { "epoch": 0.5349297380688869, "grad_norm": 0.15197576690970116, "learning_rate": 2e-05, "loss": 5.4264, "step": 7975 }, { "epoch": 0.5349968138981118, "grad_norm": 0.1517858425916738, "learning_rate": 2e-05, "loss": 5.3028, "step": 7976 }, { "epoch": 0.5350638897273368, "grad_norm": 0.15468470426097242, "learning_rate": 2e-05, "loss": 5.4085, "step": 7977 }, { "epoch": 0.5351309655565617, "grad_norm": 0.14998016167282283, "learning_rate": 2e-05, "loss": 5.4841, "step": 7978 }, { "epoch": 0.5351980413857866, "grad_norm": 0.148889982391403, "learning_rate": 2e-05, "loss": 5.3721, "step": 7979 }, { "epoch": 0.5352651172150116, "grad_norm": 0.15481407273321532, "learning_rate": 2e-05, "loss": 5.4826, "step": 7980 }, { "epoch": 0.5353321930442365, "grad_norm": 0.15926796764556994, "learning_rate": 2e-05, "loss": 5.3966, "step": 7981 }, { "epoch": 0.5353992688734615, "grad_norm": 0.16217781369087095, "learning_rate": 2e-05, "loss": 5.3795, "step": 7982 }, { "epoch": 0.5354663447026864, "grad_norm": 0.14532243565215963, "learning_rate": 2e-05, "loss": 5.4107, "step": 7983 }, { "epoch": 0.5355334205319113, "grad_norm": 0.1571901947721094, "learning_rate": 2e-05, "loss": 5.4117, "step": 7984 }, { "epoch": 0.5356004963611363, "grad_norm": 0.1603157605383176, "learning_rate": 2e-05, "loss": 5.3533, "step": 7985 }, { "epoch": 0.5356675721903612, "grad_norm": 0.15387055840831695, "learning_rate": 2e-05, "loss": 5.4241, "step": 7986 }, { "epoch": 0.5357346480195861, "grad_norm": 0.1554204669923519, "learning_rate": 2e-05, "loss": 5.4711, "step": 7987 }, { "epoch": 0.5358017238488111, "grad_norm": 0.172204387885398, "learning_rate": 2e-05, "loss": 5.5707, "step": 7988 }, { "epoch": 0.535868799678036, "grad_norm": 0.1571156938620936, "learning_rate": 2e-05, "loss": 5.3108, "step": 7989 }, { "epoch": 0.535935875507261, "grad_norm": 0.1544235628398669, "learning_rate": 2e-05, "loss": 5.43, "step": 7990 }, { "epoch": 0.5360029513364859, "grad_norm": 0.15066702917641026, "learning_rate": 2e-05, "loss": 5.3601, "step": 7991 }, { "epoch": 0.5360700271657108, "grad_norm": 0.15227133868412107, "learning_rate": 2e-05, "loss": 5.5127, "step": 7992 }, { "epoch": 0.5361371029949358, "grad_norm": 0.15116870951332156, "learning_rate": 2e-05, "loss": 5.4102, "step": 7993 }, { "epoch": 0.5362041788241607, "grad_norm": 0.15787341664399057, "learning_rate": 2e-05, "loss": 5.4085, "step": 7994 }, { "epoch": 0.5362712546533857, "grad_norm": 0.1604584382520857, "learning_rate": 2e-05, "loss": 5.4063, "step": 7995 }, { "epoch": 0.5363383304826106, "grad_norm": 0.15536905556543887, "learning_rate": 2e-05, "loss": 5.4345, "step": 7996 }, { "epoch": 0.5364054063118355, "grad_norm": 0.1650013245091036, "learning_rate": 2e-05, "loss": 5.3215, "step": 7997 }, { "epoch": 0.5364724821410605, "grad_norm": 0.14889799733081943, "learning_rate": 2e-05, "loss": 5.3881, "step": 7998 }, { "epoch": 0.5365395579702854, "grad_norm": 0.146719626306035, "learning_rate": 2e-05, "loss": 5.4327, "step": 7999 }, { "epoch": 0.5366066337995103, "grad_norm": 0.16311873578180833, "learning_rate": 2e-05, "loss": 5.4489, "step": 8000 }, { "epoch": 0.5366737096287353, "grad_norm": 0.1592923970825995, "learning_rate": 2e-05, "loss": 5.373, "step": 8001 }, { "epoch": 0.5367407854579602, "grad_norm": 0.15029018207457187, "learning_rate": 2e-05, "loss": 5.5518, "step": 8002 }, { "epoch": 0.5368078612871852, "grad_norm": 0.1576857221011176, "learning_rate": 2e-05, "loss": 5.4873, "step": 8003 }, { "epoch": 0.5368749371164101, "grad_norm": 0.16605944061720984, "learning_rate": 2e-05, "loss": 5.4605, "step": 8004 }, { "epoch": 0.536942012945635, "grad_norm": 0.15218321986193542, "learning_rate": 2e-05, "loss": 5.4303, "step": 8005 }, { "epoch": 0.53700908877486, "grad_norm": 0.14931808586868622, "learning_rate": 2e-05, "loss": 5.4343, "step": 8006 }, { "epoch": 0.5370761646040849, "grad_norm": 0.15591041795073587, "learning_rate": 2e-05, "loss": 5.2907, "step": 8007 }, { "epoch": 0.5371432404333099, "grad_norm": 0.15107953660282145, "learning_rate": 2e-05, "loss": 5.3844, "step": 8008 }, { "epoch": 0.5372103162625348, "grad_norm": 0.15611783947836552, "learning_rate": 2e-05, "loss": 5.441, "step": 8009 }, { "epoch": 0.5372773920917597, "grad_norm": 0.16509538674029575, "learning_rate": 2e-05, "loss": 5.4243, "step": 8010 }, { "epoch": 0.5373444679209847, "grad_norm": 0.15167097556229278, "learning_rate": 2e-05, "loss": 5.4667, "step": 8011 }, { "epoch": 0.5374115437502096, "grad_norm": 0.16175926646371033, "learning_rate": 2e-05, "loss": 5.4341, "step": 8012 }, { "epoch": 0.5374786195794345, "grad_norm": 0.15207447669545077, "learning_rate": 2e-05, "loss": 5.5854, "step": 8013 }, { "epoch": 0.5375456954086595, "grad_norm": 0.1538779074577012, "learning_rate": 2e-05, "loss": 5.3303, "step": 8014 }, { "epoch": 0.5376127712378844, "grad_norm": 0.1586552295952071, "learning_rate": 2e-05, "loss": 5.3269, "step": 8015 }, { "epoch": 0.5376798470671094, "grad_norm": 0.14977918655066744, "learning_rate": 2e-05, "loss": 5.351, "step": 8016 }, { "epoch": 0.5377469228963343, "grad_norm": 0.1629294176877139, "learning_rate": 2e-05, "loss": 5.4991, "step": 8017 }, { "epoch": 0.5378139987255592, "grad_norm": 0.15521655584071947, "learning_rate": 2e-05, "loss": 5.4783, "step": 8018 }, { "epoch": 0.5378810745547842, "grad_norm": 0.1547733858381867, "learning_rate": 2e-05, "loss": 5.5949, "step": 8019 }, { "epoch": 0.5379481503840091, "grad_norm": 0.15571969094846416, "learning_rate": 2e-05, "loss": 5.3261, "step": 8020 }, { "epoch": 0.538015226213234, "grad_norm": 0.1590526815823308, "learning_rate": 2e-05, "loss": 5.4134, "step": 8021 }, { "epoch": 0.538082302042459, "grad_norm": 0.1544878333533228, "learning_rate": 2e-05, "loss": 5.3658, "step": 8022 }, { "epoch": 0.5381493778716839, "grad_norm": 0.16313952939117046, "learning_rate": 2e-05, "loss": 5.5156, "step": 8023 }, { "epoch": 0.5382164537009089, "grad_norm": 0.15649626289625718, "learning_rate": 2e-05, "loss": 5.6011, "step": 8024 }, { "epoch": 0.5382835295301338, "grad_norm": 0.15687723421261496, "learning_rate": 2e-05, "loss": 5.404, "step": 8025 }, { "epoch": 0.5383506053593587, "grad_norm": 0.15871873539009274, "learning_rate": 2e-05, "loss": 5.3097, "step": 8026 }, { "epoch": 0.5384176811885837, "grad_norm": 0.1461845716512738, "learning_rate": 2e-05, "loss": 5.4672, "step": 8027 }, { "epoch": 0.5384847570178086, "grad_norm": 0.15853344768988964, "learning_rate": 2e-05, "loss": 5.3299, "step": 8028 }, { "epoch": 0.5385518328470336, "grad_norm": 0.15213628887606623, "learning_rate": 2e-05, "loss": 5.4738, "step": 8029 }, { "epoch": 0.5386189086762585, "grad_norm": 0.15221433134102988, "learning_rate": 2e-05, "loss": 5.4687, "step": 8030 }, { "epoch": 0.5386859845054834, "grad_norm": 0.1573328532853536, "learning_rate": 2e-05, "loss": 5.5523, "step": 8031 }, { "epoch": 0.5387530603347084, "grad_norm": 0.1520564792589964, "learning_rate": 2e-05, "loss": 5.4098, "step": 8032 }, { "epoch": 0.5388201361639333, "grad_norm": 0.16625355474504297, "learning_rate": 2e-05, "loss": 5.4332, "step": 8033 }, { "epoch": 0.5388872119931583, "grad_norm": 0.1614241539250218, "learning_rate": 2e-05, "loss": 5.498, "step": 8034 }, { "epoch": 0.5389542878223832, "grad_norm": 0.15492934220212962, "learning_rate": 2e-05, "loss": 5.4339, "step": 8035 }, { "epoch": 0.5390213636516081, "grad_norm": 0.1520958852244175, "learning_rate": 2e-05, "loss": 5.3841, "step": 8036 }, { "epoch": 0.5390884394808331, "grad_norm": 0.1533364660919665, "learning_rate": 2e-05, "loss": 5.3306, "step": 8037 }, { "epoch": 0.539155515310058, "grad_norm": 0.15815556345552076, "learning_rate": 2e-05, "loss": 5.2948, "step": 8038 }, { "epoch": 0.539222591139283, "grad_norm": 0.14851044912874253, "learning_rate": 2e-05, "loss": 5.4526, "step": 8039 }, { "epoch": 0.5392896669685079, "grad_norm": 0.1488950584503278, "learning_rate": 2e-05, "loss": 5.3752, "step": 8040 }, { "epoch": 0.5393567427977328, "grad_norm": 0.1518850086834417, "learning_rate": 2e-05, "loss": 5.4446, "step": 8041 }, { "epoch": 0.5394238186269578, "grad_norm": 0.14591696973590323, "learning_rate": 2e-05, "loss": 5.5253, "step": 8042 }, { "epoch": 0.5394908944561827, "grad_norm": 0.15193375463898878, "learning_rate": 2e-05, "loss": 5.2323, "step": 8043 }, { "epoch": 0.5395579702854076, "grad_norm": 0.14801333283368776, "learning_rate": 2e-05, "loss": 5.3993, "step": 8044 }, { "epoch": 0.5396250461146326, "grad_norm": 0.1484799852460922, "learning_rate": 2e-05, "loss": 5.3766, "step": 8045 }, { "epoch": 0.5396921219438575, "grad_norm": 0.16742072210835376, "learning_rate": 2e-05, "loss": 5.5727, "step": 8046 }, { "epoch": 0.5397591977730825, "grad_norm": 0.14829870714418228, "learning_rate": 2e-05, "loss": 5.4249, "step": 8047 }, { "epoch": 0.5398262736023074, "grad_norm": 0.1451079863596441, "learning_rate": 2e-05, "loss": 5.3851, "step": 8048 }, { "epoch": 0.5398933494315323, "grad_norm": 0.14888419339404596, "learning_rate": 2e-05, "loss": 5.3461, "step": 8049 }, { "epoch": 0.5399604252607573, "grad_norm": 0.15109512798190733, "learning_rate": 2e-05, "loss": 5.4603, "step": 8050 }, { "epoch": 0.5400275010899822, "grad_norm": 0.15355653676438805, "learning_rate": 2e-05, "loss": 5.3933, "step": 8051 }, { "epoch": 0.5400945769192071, "grad_norm": 0.14620387446120525, "learning_rate": 2e-05, "loss": 5.447, "step": 8052 }, { "epoch": 0.5401616527484321, "grad_norm": 0.14980421536752347, "learning_rate": 2e-05, "loss": 5.393, "step": 8053 }, { "epoch": 0.540228728577657, "grad_norm": 0.15350446492877712, "learning_rate": 2e-05, "loss": 5.4479, "step": 8054 }, { "epoch": 0.540295804406882, "grad_norm": 0.14878839586578896, "learning_rate": 2e-05, "loss": 5.5018, "step": 8055 }, { "epoch": 0.5403628802361069, "grad_norm": 0.14763412220713984, "learning_rate": 2e-05, "loss": 5.5953, "step": 8056 }, { "epoch": 0.5404299560653318, "grad_norm": 0.15291176875534915, "learning_rate": 2e-05, "loss": 5.4157, "step": 8057 }, { "epoch": 0.5404970318945568, "grad_norm": 0.15239373384783234, "learning_rate": 2e-05, "loss": 5.502, "step": 8058 }, { "epoch": 0.5405641077237817, "grad_norm": 0.15733221740177689, "learning_rate": 2e-05, "loss": 5.3445, "step": 8059 }, { "epoch": 0.5406311835530067, "grad_norm": 0.15254205561318263, "learning_rate": 2e-05, "loss": 5.3182, "step": 8060 }, { "epoch": 0.5406982593822316, "grad_norm": 0.1533211593519062, "learning_rate": 2e-05, "loss": 5.3429, "step": 8061 }, { "epoch": 0.5407653352114565, "grad_norm": 0.14820029849654903, "learning_rate": 2e-05, "loss": 5.2802, "step": 8062 }, { "epoch": 0.5408324110406815, "grad_norm": 0.15274645840545903, "learning_rate": 2e-05, "loss": 5.4427, "step": 8063 }, { "epoch": 0.5408994868699064, "grad_norm": 0.15240707965909359, "learning_rate": 2e-05, "loss": 5.4171, "step": 8064 }, { "epoch": 0.5409665626991313, "grad_norm": 0.14828500100309236, "learning_rate": 2e-05, "loss": 5.4344, "step": 8065 }, { "epoch": 0.5410336385283563, "grad_norm": 0.1583207034406194, "learning_rate": 2e-05, "loss": 5.3632, "step": 8066 }, { "epoch": 0.5411007143575812, "grad_norm": 0.15066016369146115, "learning_rate": 2e-05, "loss": 5.3073, "step": 8067 }, { "epoch": 0.5411677901868062, "grad_norm": 0.15216978106922296, "learning_rate": 2e-05, "loss": 5.3831, "step": 8068 }, { "epoch": 0.5412348660160311, "grad_norm": 0.14689546133809245, "learning_rate": 2e-05, "loss": 5.5097, "step": 8069 }, { "epoch": 0.541301941845256, "grad_norm": 0.1545220509742145, "learning_rate": 2e-05, "loss": 5.4384, "step": 8070 }, { "epoch": 0.541369017674481, "grad_norm": 0.15963050556558567, "learning_rate": 2e-05, "loss": 5.3772, "step": 8071 }, { "epoch": 0.5414360935037059, "grad_norm": 0.14905368035222133, "learning_rate": 2e-05, "loss": 5.2755, "step": 8072 }, { "epoch": 0.5415031693329309, "grad_norm": 0.15278808188202547, "learning_rate": 2e-05, "loss": 5.3032, "step": 8073 }, { "epoch": 0.5415702451621558, "grad_norm": 0.1600204067364068, "learning_rate": 2e-05, "loss": 5.4271, "step": 8074 }, { "epoch": 0.5416373209913807, "grad_norm": 0.15468289392471843, "learning_rate": 2e-05, "loss": 5.4028, "step": 8075 }, { "epoch": 0.5417043968206057, "grad_norm": 0.15024405988841008, "learning_rate": 2e-05, "loss": 5.4302, "step": 8076 }, { "epoch": 0.5417714726498306, "grad_norm": 0.15097516736842412, "learning_rate": 2e-05, "loss": 5.3771, "step": 8077 }, { "epoch": 0.5418385484790555, "grad_norm": 0.16924045640161492, "learning_rate": 2e-05, "loss": 5.4962, "step": 8078 }, { "epoch": 0.5419056243082805, "grad_norm": 0.16557193942858897, "learning_rate": 2e-05, "loss": 5.3839, "step": 8079 }, { "epoch": 0.5419727001375054, "grad_norm": 0.15298784424758777, "learning_rate": 2e-05, "loss": 5.3785, "step": 8080 }, { "epoch": 0.5420397759667304, "grad_norm": 0.15785318577445906, "learning_rate": 2e-05, "loss": 5.3705, "step": 8081 }, { "epoch": 0.5421068517959553, "grad_norm": 0.14998916198639103, "learning_rate": 2e-05, "loss": 5.477, "step": 8082 }, { "epoch": 0.5421739276251802, "grad_norm": 0.15568489485702502, "learning_rate": 2e-05, "loss": 5.326, "step": 8083 }, { "epoch": 0.5422410034544052, "grad_norm": 0.15661898645225955, "learning_rate": 2e-05, "loss": 5.4782, "step": 8084 }, { "epoch": 0.5423080792836301, "grad_norm": 0.15230057528988178, "learning_rate": 2e-05, "loss": 5.6172, "step": 8085 }, { "epoch": 0.5423751551128551, "grad_norm": 0.14997101659835607, "learning_rate": 2e-05, "loss": 5.313, "step": 8086 }, { "epoch": 0.54244223094208, "grad_norm": 0.16562703887980654, "learning_rate": 2e-05, "loss": 5.4864, "step": 8087 }, { "epoch": 0.5425093067713049, "grad_norm": 0.15059162106226817, "learning_rate": 2e-05, "loss": 5.4423, "step": 8088 }, { "epoch": 0.5425763826005299, "grad_norm": 0.15668259444938953, "learning_rate": 2e-05, "loss": 5.4288, "step": 8089 }, { "epoch": 0.5426434584297548, "grad_norm": 0.15118820991837176, "learning_rate": 2e-05, "loss": 5.5856, "step": 8090 }, { "epoch": 0.5427105342589797, "grad_norm": 0.14994576698168324, "learning_rate": 2e-05, "loss": 5.4157, "step": 8091 }, { "epoch": 0.5427776100882047, "grad_norm": 0.15495407728836108, "learning_rate": 2e-05, "loss": 5.4173, "step": 8092 }, { "epoch": 0.5428446859174296, "grad_norm": 0.1510685631626895, "learning_rate": 2e-05, "loss": 5.5098, "step": 8093 }, { "epoch": 0.5429117617466546, "grad_norm": 0.1657283497097139, "learning_rate": 2e-05, "loss": 5.4083, "step": 8094 }, { "epoch": 0.5429788375758795, "grad_norm": 0.16906851070298348, "learning_rate": 2e-05, "loss": 5.4614, "step": 8095 }, { "epoch": 0.5430459134051044, "grad_norm": 0.15378970545017168, "learning_rate": 2e-05, "loss": 5.3852, "step": 8096 }, { "epoch": 0.5431129892343294, "grad_norm": 0.15654581865145553, "learning_rate": 2e-05, "loss": 5.4156, "step": 8097 }, { "epoch": 0.5431800650635543, "grad_norm": 0.1492311410941927, "learning_rate": 2e-05, "loss": 5.3512, "step": 8098 }, { "epoch": 0.5432471408927793, "grad_norm": 0.14887528921579526, "learning_rate": 2e-05, "loss": 5.5123, "step": 8099 }, { "epoch": 0.5433142167220042, "grad_norm": 0.14920987736337352, "learning_rate": 2e-05, "loss": 5.271, "step": 8100 }, { "epoch": 0.5433812925512291, "grad_norm": 0.15643018155938498, "learning_rate": 2e-05, "loss": 5.2471, "step": 8101 }, { "epoch": 0.5434483683804541, "grad_norm": 0.15306642677179472, "learning_rate": 2e-05, "loss": 5.4609, "step": 8102 }, { "epoch": 0.543515444209679, "grad_norm": 0.1483767014915664, "learning_rate": 2e-05, "loss": 5.2796, "step": 8103 }, { "epoch": 0.543582520038904, "grad_norm": 0.1499940666347463, "learning_rate": 2e-05, "loss": 5.405, "step": 8104 }, { "epoch": 0.5436495958681289, "grad_norm": 0.15290890286557696, "learning_rate": 2e-05, "loss": 5.3103, "step": 8105 }, { "epoch": 0.5437166716973538, "grad_norm": 0.16063755354122247, "learning_rate": 2e-05, "loss": 5.384, "step": 8106 }, { "epoch": 0.5437837475265788, "grad_norm": 0.14938905549750517, "learning_rate": 2e-05, "loss": 5.4839, "step": 8107 }, { "epoch": 0.5438508233558037, "grad_norm": 0.15442063589595864, "learning_rate": 2e-05, "loss": 5.4766, "step": 8108 }, { "epoch": 0.5439178991850286, "grad_norm": 0.14560229957882181, "learning_rate": 2e-05, "loss": 5.5143, "step": 8109 }, { "epoch": 0.5439849750142536, "grad_norm": 0.1604898632889769, "learning_rate": 2e-05, "loss": 5.4399, "step": 8110 }, { "epoch": 0.5440520508434785, "grad_norm": 0.15566707066141264, "learning_rate": 2e-05, "loss": 5.4596, "step": 8111 }, { "epoch": 0.5441191266727035, "grad_norm": 0.1535656729824476, "learning_rate": 2e-05, "loss": 5.6032, "step": 8112 }, { "epoch": 0.5441862025019284, "grad_norm": 0.17266386734283076, "learning_rate": 2e-05, "loss": 5.4398, "step": 8113 }, { "epoch": 0.5442532783311533, "grad_norm": 0.146029868382708, "learning_rate": 2e-05, "loss": 5.327, "step": 8114 }, { "epoch": 0.5443203541603783, "grad_norm": 0.15206809889530903, "learning_rate": 2e-05, "loss": 5.3191, "step": 8115 }, { "epoch": 0.5443874299896032, "grad_norm": 0.15552164728474935, "learning_rate": 2e-05, "loss": 5.3918, "step": 8116 }, { "epoch": 0.5444545058188281, "grad_norm": 0.16470811074839356, "learning_rate": 2e-05, "loss": 5.3635, "step": 8117 }, { "epoch": 0.5445215816480531, "grad_norm": 0.14775884432873293, "learning_rate": 2e-05, "loss": 5.5761, "step": 8118 }, { "epoch": 0.544588657477278, "grad_norm": 0.1509437940762139, "learning_rate": 2e-05, "loss": 5.4374, "step": 8119 }, { "epoch": 0.544655733306503, "grad_norm": 0.15460998561761677, "learning_rate": 2e-05, "loss": 5.3768, "step": 8120 }, { "epoch": 0.5447228091357279, "grad_norm": 0.15710065782948024, "learning_rate": 2e-05, "loss": 5.5801, "step": 8121 }, { "epoch": 0.5447898849649528, "grad_norm": 0.15266494961957405, "learning_rate": 2e-05, "loss": 5.5092, "step": 8122 }, { "epoch": 0.5448569607941778, "grad_norm": 0.14844959275823674, "learning_rate": 2e-05, "loss": 5.3352, "step": 8123 }, { "epoch": 0.5449240366234027, "grad_norm": 0.15587385065065248, "learning_rate": 2e-05, "loss": 5.461, "step": 8124 }, { "epoch": 0.5449911124526277, "grad_norm": 0.1506947721820962, "learning_rate": 2e-05, "loss": 5.4432, "step": 8125 }, { "epoch": 0.5450581882818526, "grad_norm": 0.15031844817494744, "learning_rate": 2e-05, "loss": 5.414, "step": 8126 }, { "epoch": 0.5451252641110775, "grad_norm": 0.15349317070741456, "learning_rate": 2e-05, "loss": 5.4502, "step": 8127 }, { "epoch": 0.5451923399403025, "grad_norm": 0.15114717028831717, "learning_rate": 2e-05, "loss": 5.3763, "step": 8128 }, { "epoch": 0.5452594157695274, "grad_norm": 0.15179821769452584, "learning_rate": 2e-05, "loss": 5.5048, "step": 8129 }, { "epoch": 0.5453264915987523, "grad_norm": 0.1562933660114895, "learning_rate": 2e-05, "loss": 5.2784, "step": 8130 }, { "epoch": 0.5453935674279773, "grad_norm": 0.15813847465953904, "learning_rate": 2e-05, "loss": 5.6686, "step": 8131 }, { "epoch": 0.5454606432572022, "grad_norm": 0.15312471722271292, "learning_rate": 2e-05, "loss": 5.3608, "step": 8132 }, { "epoch": 0.5455277190864272, "grad_norm": 0.1661455270457996, "learning_rate": 2e-05, "loss": 5.3934, "step": 8133 }, { "epoch": 0.5455947949156521, "grad_norm": 0.14587072945003215, "learning_rate": 2e-05, "loss": 5.399, "step": 8134 }, { "epoch": 0.545661870744877, "grad_norm": 0.15418472759202576, "learning_rate": 2e-05, "loss": 5.419, "step": 8135 }, { "epoch": 0.545728946574102, "grad_norm": 0.16970131618348905, "learning_rate": 2e-05, "loss": 5.3539, "step": 8136 }, { "epoch": 0.5457960224033269, "grad_norm": 0.1540771483055607, "learning_rate": 2e-05, "loss": 5.3474, "step": 8137 }, { "epoch": 0.5458630982325519, "grad_norm": 0.15523817101300477, "learning_rate": 2e-05, "loss": 5.3407, "step": 8138 }, { "epoch": 0.5459301740617768, "grad_norm": 0.15390005360158374, "learning_rate": 2e-05, "loss": 5.386, "step": 8139 }, { "epoch": 0.5459972498910017, "grad_norm": 0.16012925256326177, "learning_rate": 2e-05, "loss": 5.392, "step": 8140 }, { "epoch": 0.5460643257202267, "grad_norm": 0.1456365592647378, "learning_rate": 2e-05, "loss": 5.3015, "step": 8141 }, { "epoch": 0.5461314015494516, "grad_norm": 0.1590706853943673, "learning_rate": 2e-05, "loss": 5.3675, "step": 8142 }, { "epoch": 0.5461984773786766, "grad_norm": 0.1464833346173416, "learning_rate": 2e-05, "loss": 5.3842, "step": 8143 }, { "epoch": 0.5462655532079015, "grad_norm": 0.14794352178348644, "learning_rate": 2e-05, "loss": 5.4027, "step": 8144 }, { "epoch": 0.5463326290371264, "grad_norm": 0.15527105483825107, "learning_rate": 2e-05, "loss": 5.3568, "step": 8145 }, { "epoch": 0.5463997048663514, "grad_norm": 0.16006541181685022, "learning_rate": 2e-05, "loss": 5.484, "step": 8146 }, { "epoch": 0.5464667806955763, "grad_norm": 0.14863858390997708, "learning_rate": 2e-05, "loss": 5.4718, "step": 8147 }, { "epoch": 0.5465338565248012, "grad_norm": 0.14464507032823687, "learning_rate": 2e-05, "loss": 5.5182, "step": 8148 }, { "epoch": 0.5466009323540262, "grad_norm": 0.1549506323625637, "learning_rate": 2e-05, "loss": 5.4311, "step": 8149 }, { "epoch": 0.5466680081832511, "grad_norm": 0.1500227293738333, "learning_rate": 2e-05, "loss": 5.4301, "step": 8150 }, { "epoch": 0.5467350840124761, "grad_norm": 0.1426058472266355, "learning_rate": 2e-05, "loss": 5.4838, "step": 8151 }, { "epoch": 0.546802159841701, "grad_norm": 0.15250598125754558, "learning_rate": 2e-05, "loss": 5.4623, "step": 8152 }, { "epoch": 0.5468692356709259, "grad_norm": 0.1517587565530394, "learning_rate": 2e-05, "loss": 5.4207, "step": 8153 }, { "epoch": 0.5469363115001509, "grad_norm": 0.14926574354981506, "learning_rate": 2e-05, "loss": 5.2828, "step": 8154 }, { "epoch": 0.5470033873293758, "grad_norm": 0.14488102400689595, "learning_rate": 2e-05, "loss": 5.4292, "step": 8155 }, { "epoch": 0.5470704631586008, "grad_norm": 0.1602347930540904, "learning_rate": 2e-05, "loss": 5.3992, "step": 8156 }, { "epoch": 0.5471375389878257, "grad_norm": 0.15557251567187577, "learning_rate": 2e-05, "loss": 5.3454, "step": 8157 }, { "epoch": 0.5472046148170506, "grad_norm": 0.1497712054390959, "learning_rate": 2e-05, "loss": 5.3676, "step": 8158 }, { "epoch": 0.5472716906462756, "grad_norm": 0.1595922797846491, "learning_rate": 2e-05, "loss": 5.3894, "step": 8159 }, { "epoch": 0.5473387664755005, "grad_norm": 0.1549989749586518, "learning_rate": 2e-05, "loss": 5.4743, "step": 8160 }, { "epoch": 0.5474058423047254, "grad_norm": 0.1486848074985591, "learning_rate": 2e-05, "loss": 5.3591, "step": 8161 }, { "epoch": 0.5474729181339504, "grad_norm": 0.16112149452106658, "learning_rate": 2e-05, "loss": 5.3831, "step": 8162 }, { "epoch": 0.5475399939631753, "grad_norm": 0.15327707386091471, "learning_rate": 2e-05, "loss": 5.4831, "step": 8163 }, { "epoch": 0.5476070697924003, "grad_norm": 0.15052748103176328, "learning_rate": 2e-05, "loss": 5.3488, "step": 8164 }, { "epoch": 0.5476741456216252, "grad_norm": 0.15703251706676932, "learning_rate": 2e-05, "loss": 5.4915, "step": 8165 }, { "epoch": 0.5477412214508501, "grad_norm": 0.1650491344354078, "learning_rate": 2e-05, "loss": 5.3235, "step": 8166 }, { "epoch": 0.5478082972800751, "grad_norm": 0.14484872291207357, "learning_rate": 2e-05, "loss": 5.4289, "step": 8167 }, { "epoch": 0.5478753731093, "grad_norm": 0.1481687325235352, "learning_rate": 2e-05, "loss": 5.3546, "step": 8168 }, { "epoch": 0.547942448938525, "grad_norm": 0.15610630947208215, "learning_rate": 2e-05, "loss": 5.4806, "step": 8169 }, { "epoch": 0.5480095247677499, "grad_norm": 0.14815872625750243, "learning_rate": 2e-05, "loss": 5.3481, "step": 8170 }, { "epoch": 0.5480766005969748, "grad_norm": 0.150955230791515, "learning_rate": 2e-05, "loss": 5.2898, "step": 8171 }, { "epoch": 0.5481436764261998, "grad_norm": 0.14895838790967147, "learning_rate": 2e-05, "loss": 5.4531, "step": 8172 }, { "epoch": 0.5482107522554247, "grad_norm": 0.16348223757242036, "learning_rate": 2e-05, "loss": 5.3862, "step": 8173 }, { "epoch": 0.5482778280846496, "grad_norm": 0.14337758223595606, "learning_rate": 2e-05, "loss": 5.3856, "step": 8174 }, { "epoch": 0.5483449039138746, "grad_norm": 0.1478521685484305, "learning_rate": 2e-05, "loss": 5.3814, "step": 8175 }, { "epoch": 0.5484119797430995, "grad_norm": 0.1549844324485233, "learning_rate": 2e-05, "loss": 5.3314, "step": 8176 }, { "epoch": 0.5484790555723245, "grad_norm": 0.14953250814702862, "learning_rate": 2e-05, "loss": 5.5114, "step": 8177 }, { "epoch": 0.5485461314015494, "grad_norm": 0.15483221294368704, "learning_rate": 2e-05, "loss": 5.3505, "step": 8178 }, { "epoch": 0.5486132072307743, "grad_norm": 0.1539648789606409, "learning_rate": 2e-05, "loss": 5.5057, "step": 8179 }, { "epoch": 0.5486802830599993, "grad_norm": 0.14910185771064308, "learning_rate": 2e-05, "loss": 5.4815, "step": 8180 }, { "epoch": 0.5487473588892242, "grad_norm": 0.14982734098255227, "learning_rate": 2e-05, "loss": 5.4392, "step": 8181 }, { "epoch": 0.5488144347184493, "grad_norm": 0.17184903898059561, "learning_rate": 2e-05, "loss": 5.4937, "step": 8182 }, { "epoch": 0.5488815105476742, "grad_norm": 0.1463617048644366, "learning_rate": 2e-05, "loss": 5.4791, "step": 8183 }, { "epoch": 0.5489485863768991, "grad_norm": 0.15369862481624572, "learning_rate": 2e-05, "loss": 5.5355, "step": 8184 }, { "epoch": 0.5490156622061241, "grad_norm": 0.16368334421590436, "learning_rate": 2e-05, "loss": 5.3461, "step": 8185 }, { "epoch": 0.549082738035349, "grad_norm": 0.14788503373488934, "learning_rate": 2e-05, "loss": 5.555, "step": 8186 }, { "epoch": 0.549149813864574, "grad_norm": 0.1525385302816832, "learning_rate": 2e-05, "loss": 5.4089, "step": 8187 }, { "epoch": 0.5492168896937989, "grad_norm": 0.15208865182691986, "learning_rate": 2e-05, "loss": 5.3385, "step": 8188 }, { "epoch": 0.5492839655230238, "grad_norm": 0.15180239317362337, "learning_rate": 2e-05, "loss": 5.4101, "step": 8189 }, { "epoch": 0.5493510413522488, "grad_norm": 0.1484478447517482, "learning_rate": 2e-05, "loss": 5.4341, "step": 8190 }, { "epoch": 0.5494181171814737, "grad_norm": 0.14680706166323843, "learning_rate": 2e-05, "loss": 5.5426, "step": 8191 }, { "epoch": 0.5494851930106986, "grad_norm": 0.1490990136476689, "learning_rate": 2e-05, "loss": 5.4125, "step": 8192 }, { "epoch": 0.5495522688399236, "grad_norm": 0.1579471987845354, "learning_rate": 2e-05, "loss": 5.5765, "step": 8193 }, { "epoch": 0.5496193446691485, "grad_norm": 0.1528914595796624, "learning_rate": 2e-05, "loss": 5.374, "step": 8194 }, { "epoch": 0.5496864204983735, "grad_norm": 0.14540377351459188, "learning_rate": 2e-05, "loss": 5.4293, "step": 8195 }, { "epoch": 0.5497534963275984, "grad_norm": 0.14183129013897722, "learning_rate": 2e-05, "loss": 5.3963, "step": 8196 }, { "epoch": 0.5498205721568233, "grad_norm": 0.15339978111377944, "learning_rate": 2e-05, "loss": 5.2794, "step": 8197 }, { "epoch": 0.5498876479860483, "grad_norm": 0.15254842969028898, "learning_rate": 2e-05, "loss": 5.4779, "step": 8198 }, { "epoch": 0.5499547238152732, "grad_norm": 0.1483506084635696, "learning_rate": 2e-05, "loss": 5.4594, "step": 8199 }, { "epoch": 0.5500217996444982, "grad_norm": 0.14946801898456127, "learning_rate": 2e-05, "loss": 5.4702, "step": 8200 }, { "epoch": 0.5500888754737231, "grad_norm": 0.14675115486100937, "learning_rate": 2e-05, "loss": 5.4044, "step": 8201 }, { "epoch": 0.550155951302948, "grad_norm": 0.1489944640255545, "learning_rate": 2e-05, "loss": 5.4236, "step": 8202 }, { "epoch": 0.550223027132173, "grad_norm": 0.15119799988277488, "learning_rate": 2e-05, "loss": 5.4785, "step": 8203 }, { "epoch": 0.5502901029613979, "grad_norm": 0.14945116237678122, "learning_rate": 2e-05, "loss": 5.5602, "step": 8204 }, { "epoch": 0.5503571787906228, "grad_norm": 0.1521983044813861, "learning_rate": 2e-05, "loss": 5.4416, "step": 8205 }, { "epoch": 0.5504242546198478, "grad_norm": 0.14728473282127102, "learning_rate": 2e-05, "loss": 5.4194, "step": 8206 }, { "epoch": 0.5504913304490727, "grad_norm": 0.14928465291333395, "learning_rate": 2e-05, "loss": 5.3189, "step": 8207 }, { "epoch": 0.5505584062782977, "grad_norm": 0.15158721241806966, "learning_rate": 2e-05, "loss": 5.3966, "step": 8208 }, { "epoch": 0.5506254821075226, "grad_norm": 0.15126568607216742, "learning_rate": 2e-05, "loss": 5.5323, "step": 8209 }, { "epoch": 0.5506925579367475, "grad_norm": 0.14691165683649607, "learning_rate": 2e-05, "loss": 5.4141, "step": 8210 }, { "epoch": 0.5507596337659725, "grad_norm": 0.15129359742048487, "learning_rate": 2e-05, "loss": 5.3357, "step": 8211 }, { "epoch": 0.5508267095951974, "grad_norm": 0.14617677393442768, "learning_rate": 2e-05, "loss": 5.4249, "step": 8212 }, { "epoch": 0.5508937854244224, "grad_norm": 0.14284685255561347, "learning_rate": 2e-05, "loss": 5.3304, "step": 8213 }, { "epoch": 0.5509608612536473, "grad_norm": 0.14941189274652064, "learning_rate": 2e-05, "loss": 5.4578, "step": 8214 }, { "epoch": 0.5510279370828722, "grad_norm": 0.1508830961339486, "learning_rate": 2e-05, "loss": 5.4534, "step": 8215 }, { "epoch": 0.5510950129120972, "grad_norm": 0.14579965931340863, "learning_rate": 2e-05, "loss": 5.349, "step": 8216 }, { "epoch": 0.5511620887413221, "grad_norm": 0.14300751379554089, "learning_rate": 2e-05, "loss": 5.3539, "step": 8217 }, { "epoch": 0.551229164570547, "grad_norm": 0.15067144250405778, "learning_rate": 2e-05, "loss": 5.2961, "step": 8218 }, { "epoch": 0.551296240399772, "grad_norm": 0.15524502208483504, "learning_rate": 2e-05, "loss": 5.4327, "step": 8219 }, { "epoch": 0.5513633162289969, "grad_norm": 0.15077011770767532, "learning_rate": 2e-05, "loss": 5.4114, "step": 8220 }, { "epoch": 0.5514303920582219, "grad_norm": 0.14478195315584663, "learning_rate": 2e-05, "loss": 5.4707, "step": 8221 }, { "epoch": 0.5514974678874468, "grad_norm": 0.1461726399062049, "learning_rate": 2e-05, "loss": 5.3847, "step": 8222 }, { "epoch": 0.5515645437166717, "grad_norm": 0.15221291978975018, "learning_rate": 2e-05, "loss": 5.4812, "step": 8223 }, { "epoch": 0.5516316195458967, "grad_norm": 0.15278929647322048, "learning_rate": 2e-05, "loss": 5.4735, "step": 8224 }, { "epoch": 0.5516986953751216, "grad_norm": 0.14997534605997337, "learning_rate": 2e-05, "loss": 5.4805, "step": 8225 }, { "epoch": 0.5517657712043466, "grad_norm": 0.1579703071644443, "learning_rate": 2e-05, "loss": 5.2951, "step": 8226 }, { "epoch": 0.5518328470335715, "grad_norm": 0.1495339522761811, "learning_rate": 2e-05, "loss": 5.2821, "step": 8227 }, { "epoch": 0.5518999228627964, "grad_norm": 0.1596901287283497, "learning_rate": 2e-05, "loss": 5.4235, "step": 8228 }, { "epoch": 0.5519669986920214, "grad_norm": 0.15699932299888067, "learning_rate": 2e-05, "loss": 5.37, "step": 8229 }, { "epoch": 0.5520340745212463, "grad_norm": 0.14965354798167987, "learning_rate": 2e-05, "loss": 5.4503, "step": 8230 }, { "epoch": 0.5521011503504712, "grad_norm": 0.15275309768006534, "learning_rate": 2e-05, "loss": 5.5034, "step": 8231 }, { "epoch": 0.5521682261796962, "grad_norm": 0.15575828094070776, "learning_rate": 2e-05, "loss": 5.3314, "step": 8232 }, { "epoch": 0.5522353020089211, "grad_norm": 0.14773924391673862, "learning_rate": 2e-05, "loss": 5.3273, "step": 8233 }, { "epoch": 0.5523023778381461, "grad_norm": 0.1513193969739345, "learning_rate": 2e-05, "loss": 5.3217, "step": 8234 }, { "epoch": 0.552369453667371, "grad_norm": 0.14943311126573827, "learning_rate": 2e-05, "loss": 5.341, "step": 8235 }, { "epoch": 0.5524365294965959, "grad_norm": 0.14541158227237122, "learning_rate": 2e-05, "loss": 5.4136, "step": 8236 }, { "epoch": 0.5525036053258209, "grad_norm": 0.1464255414523395, "learning_rate": 2e-05, "loss": 5.4113, "step": 8237 }, { "epoch": 0.5525706811550458, "grad_norm": 0.1453793890002281, "learning_rate": 2e-05, "loss": 5.4202, "step": 8238 }, { "epoch": 0.5526377569842708, "grad_norm": 0.1513486855254119, "learning_rate": 2e-05, "loss": 5.4883, "step": 8239 }, { "epoch": 0.5527048328134957, "grad_norm": 0.1454040955413562, "learning_rate": 2e-05, "loss": 5.5173, "step": 8240 }, { "epoch": 0.5527719086427206, "grad_norm": 0.147686139395543, "learning_rate": 2e-05, "loss": 5.4986, "step": 8241 }, { "epoch": 0.5528389844719456, "grad_norm": 0.1484648534839245, "learning_rate": 2e-05, "loss": 5.3067, "step": 8242 }, { "epoch": 0.5529060603011705, "grad_norm": 0.15236231785040016, "learning_rate": 2e-05, "loss": 5.4138, "step": 8243 }, { "epoch": 0.5529731361303954, "grad_norm": 0.1464202313646898, "learning_rate": 2e-05, "loss": 5.4219, "step": 8244 }, { "epoch": 0.5530402119596204, "grad_norm": 0.14753626401732203, "learning_rate": 2e-05, "loss": 5.4309, "step": 8245 }, { "epoch": 0.5531072877888453, "grad_norm": 0.15829020618244374, "learning_rate": 2e-05, "loss": 5.3262, "step": 8246 }, { "epoch": 0.5531743636180703, "grad_norm": 0.149583975330147, "learning_rate": 2e-05, "loss": 5.3222, "step": 8247 }, { "epoch": 0.5532414394472952, "grad_norm": 0.15430850607343888, "learning_rate": 2e-05, "loss": 5.4779, "step": 8248 }, { "epoch": 0.5533085152765201, "grad_norm": 0.15205473966919333, "learning_rate": 2e-05, "loss": 5.5225, "step": 8249 }, { "epoch": 0.5533755911057451, "grad_norm": 0.1483926032493202, "learning_rate": 2e-05, "loss": 5.4356, "step": 8250 }, { "epoch": 0.55344266693497, "grad_norm": 0.1559469419891767, "learning_rate": 2e-05, "loss": 5.4377, "step": 8251 }, { "epoch": 0.553509742764195, "grad_norm": 0.14933209162370703, "learning_rate": 2e-05, "loss": 5.3549, "step": 8252 }, { "epoch": 0.5535768185934199, "grad_norm": 0.15315375370619175, "learning_rate": 2e-05, "loss": 5.3308, "step": 8253 }, { "epoch": 0.5536438944226448, "grad_norm": 0.14989681946708774, "learning_rate": 2e-05, "loss": 5.3834, "step": 8254 }, { "epoch": 0.5537109702518698, "grad_norm": 0.15115901346165336, "learning_rate": 2e-05, "loss": 5.426, "step": 8255 }, { "epoch": 0.5537780460810947, "grad_norm": 0.1539068700282336, "learning_rate": 2e-05, "loss": 5.3763, "step": 8256 }, { "epoch": 0.5538451219103196, "grad_norm": 0.14956411001562736, "learning_rate": 2e-05, "loss": 5.5353, "step": 8257 }, { "epoch": 0.5539121977395446, "grad_norm": 0.15525538034032874, "learning_rate": 2e-05, "loss": 5.3347, "step": 8258 }, { "epoch": 0.5539792735687695, "grad_norm": 0.14941868725469717, "learning_rate": 2e-05, "loss": 5.537, "step": 8259 }, { "epoch": 0.5540463493979945, "grad_norm": 0.14818609298542654, "learning_rate": 2e-05, "loss": 5.4207, "step": 8260 }, { "epoch": 0.5541134252272194, "grad_norm": 0.14316490909584592, "learning_rate": 2e-05, "loss": 5.3665, "step": 8261 }, { "epoch": 0.5541805010564443, "grad_norm": 0.16113802157895382, "learning_rate": 2e-05, "loss": 5.5101, "step": 8262 }, { "epoch": 0.5542475768856693, "grad_norm": 0.15035223073019627, "learning_rate": 2e-05, "loss": 5.4965, "step": 8263 }, { "epoch": 0.5543146527148942, "grad_norm": 0.14626139442926084, "learning_rate": 2e-05, "loss": 5.4502, "step": 8264 }, { "epoch": 0.5543817285441192, "grad_norm": 0.1548105072371759, "learning_rate": 2e-05, "loss": 5.2848, "step": 8265 }, { "epoch": 0.5544488043733441, "grad_norm": 0.15593069901071968, "learning_rate": 2e-05, "loss": 5.4074, "step": 8266 }, { "epoch": 0.554515880202569, "grad_norm": 0.14611449997760229, "learning_rate": 2e-05, "loss": 5.4102, "step": 8267 }, { "epoch": 0.554582956031794, "grad_norm": 0.1526076497103781, "learning_rate": 2e-05, "loss": 5.3591, "step": 8268 }, { "epoch": 0.5546500318610189, "grad_norm": 0.15429474734773252, "learning_rate": 2e-05, "loss": 5.4377, "step": 8269 }, { "epoch": 0.5547171076902438, "grad_norm": 0.1584446431419016, "learning_rate": 2e-05, "loss": 5.5587, "step": 8270 }, { "epoch": 0.5547841835194688, "grad_norm": 0.14676391101395134, "learning_rate": 2e-05, "loss": 5.435, "step": 8271 }, { "epoch": 0.5548512593486937, "grad_norm": 0.16126542449570175, "learning_rate": 2e-05, "loss": 5.4226, "step": 8272 }, { "epoch": 0.5549183351779187, "grad_norm": 0.15686653578830856, "learning_rate": 2e-05, "loss": 5.3427, "step": 8273 }, { "epoch": 0.5549854110071436, "grad_norm": 0.14716210903599206, "learning_rate": 2e-05, "loss": 5.4408, "step": 8274 }, { "epoch": 0.5550524868363685, "grad_norm": 0.15180405802146757, "learning_rate": 2e-05, "loss": 5.5865, "step": 8275 }, { "epoch": 0.5551195626655935, "grad_norm": 0.15613321663256705, "learning_rate": 2e-05, "loss": 5.4316, "step": 8276 }, { "epoch": 0.5551866384948184, "grad_norm": 0.15745664692647957, "learning_rate": 2e-05, "loss": 5.4178, "step": 8277 }, { "epoch": 0.5552537143240434, "grad_norm": 0.16100177673687216, "learning_rate": 2e-05, "loss": 5.3655, "step": 8278 }, { "epoch": 0.5553207901532683, "grad_norm": 0.15187883197244395, "learning_rate": 2e-05, "loss": 5.6164, "step": 8279 }, { "epoch": 0.5553878659824932, "grad_norm": 0.15357272727349028, "learning_rate": 2e-05, "loss": 5.4188, "step": 8280 }, { "epoch": 0.5554549418117182, "grad_norm": 0.1576566593108162, "learning_rate": 2e-05, "loss": 5.3755, "step": 8281 }, { "epoch": 0.5555220176409431, "grad_norm": 0.1512132381204248, "learning_rate": 2e-05, "loss": 5.3036, "step": 8282 }, { "epoch": 0.555589093470168, "grad_norm": 0.15307899824236904, "learning_rate": 2e-05, "loss": 5.4666, "step": 8283 }, { "epoch": 0.555656169299393, "grad_norm": 0.14970703253937534, "learning_rate": 2e-05, "loss": 5.4582, "step": 8284 }, { "epoch": 0.5557232451286179, "grad_norm": 0.15828164938874664, "learning_rate": 2e-05, "loss": 5.3389, "step": 8285 }, { "epoch": 0.5557903209578429, "grad_norm": 0.15175198013400706, "learning_rate": 2e-05, "loss": 5.4564, "step": 8286 }, { "epoch": 0.5558573967870678, "grad_norm": 0.15658384048941204, "learning_rate": 2e-05, "loss": 5.3999, "step": 8287 }, { "epoch": 0.5559244726162927, "grad_norm": 0.15828614524175227, "learning_rate": 2e-05, "loss": 5.458, "step": 8288 }, { "epoch": 0.5559915484455177, "grad_norm": 0.1510477471455415, "learning_rate": 2e-05, "loss": 5.4798, "step": 8289 }, { "epoch": 0.5560586242747426, "grad_norm": 0.15588807290749543, "learning_rate": 2e-05, "loss": 5.3888, "step": 8290 }, { "epoch": 0.5561257001039676, "grad_norm": 0.14640978231112872, "learning_rate": 2e-05, "loss": 5.4714, "step": 8291 }, { "epoch": 0.5561927759331925, "grad_norm": 0.1519501752513945, "learning_rate": 2e-05, "loss": 5.3937, "step": 8292 }, { "epoch": 0.5562598517624174, "grad_norm": 0.1532297331404758, "learning_rate": 2e-05, "loss": 5.3869, "step": 8293 }, { "epoch": 0.5563269275916424, "grad_norm": 0.15261630576950933, "learning_rate": 2e-05, "loss": 5.4323, "step": 8294 }, { "epoch": 0.5563940034208673, "grad_norm": 0.14890349447593038, "learning_rate": 2e-05, "loss": 5.5614, "step": 8295 }, { "epoch": 0.5564610792500922, "grad_norm": 0.16363340978450694, "learning_rate": 2e-05, "loss": 5.3704, "step": 8296 }, { "epoch": 0.5565281550793172, "grad_norm": 0.14895165284470765, "learning_rate": 2e-05, "loss": 5.3843, "step": 8297 }, { "epoch": 0.5565952309085421, "grad_norm": 0.1519447507001162, "learning_rate": 2e-05, "loss": 5.3611, "step": 8298 }, { "epoch": 0.5566623067377671, "grad_norm": 0.15421082823425947, "learning_rate": 2e-05, "loss": 5.4555, "step": 8299 }, { "epoch": 0.556729382566992, "grad_norm": 0.15259306204149434, "learning_rate": 2e-05, "loss": 5.5, "step": 8300 }, { "epoch": 0.5567964583962169, "grad_norm": 0.15386453067300931, "learning_rate": 2e-05, "loss": 5.6181, "step": 8301 }, { "epoch": 0.5568635342254419, "grad_norm": 0.14750306075807867, "learning_rate": 2e-05, "loss": 5.3959, "step": 8302 }, { "epoch": 0.5569306100546668, "grad_norm": 0.15748544212857554, "learning_rate": 2e-05, "loss": 5.5321, "step": 8303 }, { "epoch": 0.5569976858838918, "grad_norm": 0.15376834004399553, "learning_rate": 2e-05, "loss": 5.4912, "step": 8304 }, { "epoch": 0.5570647617131167, "grad_norm": 0.15451406242634405, "learning_rate": 2e-05, "loss": 5.5553, "step": 8305 }, { "epoch": 0.5571318375423416, "grad_norm": 0.1569305574527607, "learning_rate": 2e-05, "loss": 5.3952, "step": 8306 }, { "epoch": 0.5571989133715666, "grad_norm": 0.16330563204600257, "learning_rate": 2e-05, "loss": 5.4003, "step": 8307 }, { "epoch": 0.5572659892007915, "grad_norm": 0.14796863515420938, "learning_rate": 2e-05, "loss": 5.4393, "step": 8308 }, { "epoch": 0.5573330650300165, "grad_norm": 0.14303294047330292, "learning_rate": 2e-05, "loss": 5.4505, "step": 8309 }, { "epoch": 0.5574001408592414, "grad_norm": 0.15296427914280272, "learning_rate": 2e-05, "loss": 5.4195, "step": 8310 }, { "epoch": 0.5574672166884663, "grad_norm": 0.15969487402125518, "learning_rate": 2e-05, "loss": 5.2911, "step": 8311 }, { "epoch": 0.5575342925176913, "grad_norm": 0.15138503008053225, "learning_rate": 2e-05, "loss": 5.4948, "step": 8312 }, { "epoch": 0.5576013683469162, "grad_norm": 0.15477826481456702, "learning_rate": 2e-05, "loss": 5.3934, "step": 8313 }, { "epoch": 0.5576684441761411, "grad_norm": 0.15153825051347877, "learning_rate": 2e-05, "loss": 5.5398, "step": 8314 }, { "epoch": 0.5577355200053661, "grad_norm": 0.15199609807307748, "learning_rate": 2e-05, "loss": 5.2844, "step": 8315 }, { "epoch": 0.557802595834591, "grad_norm": 0.1533456392429621, "learning_rate": 2e-05, "loss": 5.4406, "step": 8316 }, { "epoch": 0.557869671663816, "grad_norm": 0.1528296217589683, "learning_rate": 2e-05, "loss": 5.3578, "step": 8317 }, { "epoch": 0.5579367474930409, "grad_norm": 0.14757477522552476, "learning_rate": 2e-05, "loss": 5.348, "step": 8318 }, { "epoch": 0.5580038233222658, "grad_norm": 0.15504064087558125, "learning_rate": 2e-05, "loss": 5.4367, "step": 8319 }, { "epoch": 0.5580708991514908, "grad_norm": 0.15519028766770956, "learning_rate": 2e-05, "loss": 5.501, "step": 8320 }, { "epoch": 0.5581379749807157, "grad_norm": 0.15102130424441665, "learning_rate": 2e-05, "loss": 5.492, "step": 8321 }, { "epoch": 0.5582050508099407, "grad_norm": 0.16009591837059045, "learning_rate": 2e-05, "loss": 5.5231, "step": 8322 }, { "epoch": 0.5582721266391656, "grad_norm": 0.1480009096881547, "learning_rate": 2e-05, "loss": 5.327, "step": 8323 }, { "epoch": 0.5583392024683905, "grad_norm": 0.15753859418171093, "learning_rate": 2e-05, "loss": 5.4104, "step": 8324 }, { "epoch": 0.5584062782976155, "grad_norm": 0.1551262021942478, "learning_rate": 2e-05, "loss": 5.4152, "step": 8325 }, { "epoch": 0.5584733541268404, "grad_norm": 0.1510748817423966, "learning_rate": 2e-05, "loss": 5.4139, "step": 8326 }, { "epoch": 0.5585404299560653, "grad_norm": 0.15135840424785282, "learning_rate": 2e-05, "loss": 5.3907, "step": 8327 }, { "epoch": 0.5586075057852903, "grad_norm": 0.15306869976661122, "learning_rate": 2e-05, "loss": 5.5642, "step": 8328 }, { "epoch": 0.5586745816145152, "grad_norm": 0.1460569902637703, "learning_rate": 2e-05, "loss": 5.4209, "step": 8329 }, { "epoch": 0.5587416574437402, "grad_norm": 0.14997450105466273, "learning_rate": 2e-05, "loss": 5.4753, "step": 8330 }, { "epoch": 0.5588087332729651, "grad_norm": 0.15327190038422833, "learning_rate": 2e-05, "loss": 5.4622, "step": 8331 }, { "epoch": 0.55887580910219, "grad_norm": 0.15336808672343402, "learning_rate": 2e-05, "loss": 5.5805, "step": 8332 }, { "epoch": 0.558942884931415, "grad_norm": 0.14844237368002117, "learning_rate": 2e-05, "loss": 5.6053, "step": 8333 }, { "epoch": 0.5590099607606399, "grad_norm": 0.1540317997029307, "learning_rate": 2e-05, "loss": 5.4428, "step": 8334 }, { "epoch": 0.5590770365898649, "grad_norm": 0.1517265298965081, "learning_rate": 2e-05, "loss": 5.4335, "step": 8335 }, { "epoch": 0.5591441124190898, "grad_norm": 0.15147732903925684, "learning_rate": 2e-05, "loss": 5.4442, "step": 8336 }, { "epoch": 0.5592111882483147, "grad_norm": 0.16075828219236538, "learning_rate": 2e-05, "loss": 5.5868, "step": 8337 }, { "epoch": 0.5592782640775397, "grad_norm": 0.15112382061187563, "learning_rate": 2e-05, "loss": 5.4859, "step": 8338 }, { "epoch": 0.5593453399067646, "grad_norm": 0.14966050543324746, "learning_rate": 2e-05, "loss": 5.3903, "step": 8339 }, { "epoch": 0.5594124157359895, "grad_norm": 0.14442142871261798, "learning_rate": 2e-05, "loss": 5.431, "step": 8340 }, { "epoch": 0.5594794915652145, "grad_norm": 0.15155222164790577, "learning_rate": 2e-05, "loss": 5.3823, "step": 8341 }, { "epoch": 0.5595465673944394, "grad_norm": 0.14763425168887864, "learning_rate": 2e-05, "loss": 5.2399, "step": 8342 }, { "epoch": 0.5596136432236644, "grad_norm": 0.1566247706134838, "learning_rate": 2e-05, "loss": 5.2657, "step": 8343 }, { "epoch": 0.5596807190528893, "grad_norm": 0.15172232818093487, "learning_rate": 2e-05, "loss": 5.4817, "step": 8344 }, { "epoch": 0.5597477948821142, "grad_norm": 0.1540314560578181, "learning_rate": 2e-05, "loss": 5.3569, "step": 8345 }, { "epoch": 0.5598148707113392, "grad_norm": 0.1506744315853682, "learning_rate": 2e-05, "loss": 5.3014, "step": 8346 }, { "epoch": 0.5598819465405641, "grad_norm": 0.14760550155047691, "learning_rate": 2e-05, "loss": 5.4118, "step": 8347 }, { "epoch": 0.559949022369789, "grad_norm": 0.15015271195795335, "learning_rate": 2e-05, "loss": 5.5835, "step": 8348 }, { "epoch": 0.560016098199014, "grad_norm": 0.15972147955388066, "learning_rate": 2e-05, "loss": 5.4376, "step": 8349 }, { "epoch": 0.5600831740282389, "grad_norm": 0.15091135596017882, "learning_rate": 2e-05, "loss": 5.4072, "step": 8350 }, { "epoch": 0.5601502498574639, "grad_norm": 0.1527351784464482, "learning_rate": 2e-05, "loss": 5.399, "step": 8351 }, { "epoch": 0.5602173256866888, "grad_norm": 0.14967316863810318, "learning_rate": 2e-05, "loss": 5.5019, "step": 8352 }, { "epoch": 0.5602844015159137, "grad_norm": 0.1566380364050661, "learning_rate": 2e-05, "loss": 5.5141, "step": 8353 }, { "epoch": 0.5603514773451387, "grad_norm": 0.15100020669556558, "learning_rate": 2e-05, "loss": 5.4401, "step": 8354 }, { "epoch": 0.5604185531743636, "grad_norm": 0.1619616917331649, "learning_rate": 2e-05, "loss": 5.4539, "step": 8355 }, { "epoch": 0.5604856290035886, "grad_norm": 0.1503963485529452, "learning_rate": 2e-05, "loss": 5.4231, "step": 8356 }, { "epoch": 0.5605527048328135, "grad_norm": 0.15473229706002933, "learning_rate": 2e-05, "loss": 5.3786, "step": 8357 }, { "epoch": 0.5606197806620384, "grad_norm": 0.1518823569142259, "learning_rate": 2e-05, "loss": 5.3305, "step": 8358 }, { "epoch": 0.5606868564912634, "grad_norm": 0.15099532128879317, "learning_rate": 2e-05, "loss": 5.4291, "step": 8359 }, { "epoch": 0.5607539323204883, "grad_norm": 0.154266932631846, "learning_rate": 2e-05, "loss": 5.5347, "step": 8360 }, { "epoch": 0.5608210081497133, "grad_norm": 0.15533611304582307, "learning_rate": 2e-05, "loss": 5.3042, "step": 8361 }, { "epoch": 0.5608880839789382, "grad_norm": 0.15060355062219735, "learning_rate": 2e-05, "loss": 5.4909, "step": 8362 }, { "epoch": 0.5609551598081631, "grad_norm": 0.15795026446548519, "learning_rate": 2e-05, "loss": 5.2894, "step": 8363 }, { "epoch": 0.5610222356373881, "grad_norm": 0.14633987572405066, "learning_rate": 2e-05, "loss": 5.4609, "step": 8364 }, { "epoch": 0.561089311466613, "grad_norm": 0.15038676496904652, "learning_rate": 2e-05, "loss": 5.5203, "step": 8365 }, { "epoch": 0.5611563872958379, "grad_norm": 0.1595298496524941, "learning_rate": 2e-05, "loss": 5.3551, "step": 8366 }, { "epoch": 0.5612234631250629, "grad_norm": 0.14888604149280033, "learning_rate": 2e-05, "loss": 5.5212, "step": 8367 }, { "epoch": 0.5612905389542878, "grad_norm": 0.15128021504539538, "learning_rate": 2e-05, "loss": 5.3581, "step": 8368 }, { "epoch": 0.5613576147835128, "grad_norm": 0.14697353930359078, "learning_rate": 2e-05, "loss": 5.6004, "step": 8369 }, { "epoch": 0.5614246906127377, "grad_norm": 0.1527827272727508, "learning_rate": 2e-05, "loss": 5.3725, "step": 8370 }, { "epoch": 0.5614917664419626, "grad_norm": 0.15682145793322352, "learning_rate": 2e-05, "loss": 5.4536, "step": 8371 }, { "epoch": 0.5615588422711876, "grad_norm": 0.15865152406863064, "learning_rate": 2e-05, "loss": 5.3382, "step": 8372 }, { "epoch": 0.5616259181004125, "grad_norm": 0.15137824471286823, "learning_rate": 2e-05, "loss": 5.3982, "step": 8373 }, { "epoch": 0.5616929939296375, "grad_norm": 0.15776370373826257, "learning_rate": 2e-05, "loss": 5.5403, "step": 8374 }, { "epoch": 0.5617600697588624, "grad_norm": 0.15052358763622256, "learning_rate": 2e-05, "loss": 5.4329, "step": 8375 }, { "epoch": 0.5618271455880873, "grad_norm": 0.15090561321261087, "learning_rate": 2e-05, "loss": 5.5386, "step": 8376 }, { "epoch": 0.5618942214173123, "grad_norm": 0.15004043059487152, "learning_rate": 2e-05, "loss": 5.4433, "step": 8377 }, { "epoch": 0.5619612972465372, "grad_norm": 0.14438066679810113, "learning_rate": 2e-05, "loss": 5.4691, "step": 8378 }, { "epoch": 0.5620283730757621, "grad_norm": 0.1520898745400064, "learning_rate": 2e-05, "loss": 5.3517, "step": 8379 }, { "epoch": 0.5620954489049871, "grad_norm": 0.1477204860574268, "learning_rate": 2e-05, "loss": 5.2988, "step": 8380 }, { "epoch": 0.562162524734212, "grad_norm": 0.15270897204231845, "learning_rate": 2e-05, "loss": 5.5864, "step": 8381 }, { "epoch": 0.562229600563437, "grad_norm": 0.15564392818529, "learning_rate": 2e-05, "loss": 5.4094, "step": 8382 }, { "epoch": 0.5622966763926619, "grad_norm": 0.1572526515341801, "learning_rate": 2e-05, "loss": 5.4125, "step": 8383 }, { "epoch": 0.5623637522218868, "grad_norm": 0.17177555573677625, "learning_rate": 2e-05, "loss": 5.3495, "step": 8384 }, { "epoch": 0.5624308280511118, "grad_norm": 0.14694975489229783, "learning_rate": 2e-05, "loss": 5.4277, "step": 8385 }, { "epoch": 0.5624979038803367, "grad_norm": 0.16437681213598365, "learning_rate": 2e-05, "loss": 5.5052, "step": 8386 }, { "epoch": 0.5625649797095617, "grad_norm": 0.15901036436633817, "learning_rate": 2e-05, "loss": 5.3422, "step": 8387 }, { "epoch": 0.5626320555387866, "grad_norm": 0.15006739719843395, "learning_rate": 2e-05, "loss": 5.4071, "step": 8388 }, { "epoch": 0.5626991313680115, "grad_norm": 0.15803539800186256, "learning_rate": 2e-05, "loss": 5.329, "step": 8389 }, { "epoch": 0.5627662071972365, "grad_norm": 0.1598848944290701, "learning_rate": 2e-05, "loss": 5.3917, "step": 8390 }, { "epoch": 0.5628332830264614, "grad_norm": 0.1498341090428145, "learning_rate": 2e-05, "loss": 5.4916, "step": 8391 }, { "epoch": 0.5629003588556863, "grad_norm": 0.1583015242037235, "learning_rate": 2e-05, "loss": 5.4313, "step": 8392 }, { "epoch": 0.5629674346849113, "grad_norm": 0.16476038253448347, "learning_rate": 2e-05, "loss": 5.3549, "step": 8393 }, { "epoch": 0.5630345105141362, "grad_norm": 0.14955668999017047, "learning_rate": 2e-05, "loss": 5.4349, "step": 8394 }, { "epoch": 0.5631015863433612, "grad_norm": 0.15268080141388873, "learning_rate": 2e-05, "loss": 5.4415, "step": 8395 }, { "epoch": 0.5631686621725861, "grad_norm": 0.15903865049506544, "learning_rate": 2e-05, "loss": 5.4189, "step": 8396 }, { "epoch": 0.563235738001811, "grad_norm": 0.15459186892131302, "learning_rate": 2e-05, "loss": 5.4693, "step": 8397 }, { "epoch": 0.563302813831036, "grad_norm": 0.14541495621357595, "learning_rate": 2e-05, "loss": 5.3848, "step": 8398 }, { "epoch": 0.5633698896602609, "grad_norm": 0.1458868796149539, "learning_rate": 2e-05, "loss": 5.3527, "step": 8399 }, { "epoch": 0.5634369654894859, "grad_norm": 0.16608744018766378, "learning_rate": 2e-05, "loss": 5.3655, "step": 8400 }, { "epoch": 0.5635040413187108, "grad_norm": 0.15378546428106524, "learning_rate": 2e-05, "loss": 5.3385, "step": 8401 }, { "epoch": 0.5635711171479357, "grad_norm": 0.15220201418548943, "learning_rate": 2e-05, "loss": 5.4954, "step": 8402 }, { "epoch": 0.5636381929771607, "grad_norm": 0.1530914075935242, "learning_rate": 2e-05, "loss": 5.4459, "step": 8403 }, { "epoch": 0.5637052688063856, "grad_norm": 0.14944556643990206, "learning_rate": 2e-05, "loss": 5.4765, "step": 8404 }, { "epoch": 0.5637723446356105, "grad_norm": 0.1527858542958006, "learning_rate": 2e-05, "loss": 5.4436, "step": 8405 }, { "epoch": 0.5638394204648355, "grad_norm": 0.1489460886452453, "learning_rate": 2e-05, "loss": 5.4707, "step": 8406 }, { "epoch": 0.5639064962940604, "grad_norm": 0.1488990513356671, "learning_rate": 2e-05, "loss": 5.541, "step": 8407 }, { "epoch": 0.5639735721232854, "grad_norm": 0.1495540587991848, "learning_rate": 2e-05, "loss": 5.4878, "step": 8408 }, { "epoch": 0.5640406479525103, "grad_norm": 0.15604963713016998, "learning_rate": 2e-05, "loss": 5.3188, "step": 8409 }, { "epoch": 0.5641077237817352, "grad_norm": 0.14515552657441133, "learning_rate": 2e-05, "loss": 5.2816, "step": 8410 }, { "epoch": 0.5641747996109602, "grad_norm": 0.1456144617312568, "learning_rate": 2e-05, "loss": 5.5187, "step": 8411 }, { "epoch": 0.5642418754401851, "grad_norm": 0.14910032149783178, "learning_rate": 2e-05, "loss": 5.4456, "step": 8412 }, { "epoch": 0.56430895126941, "grad_norm": 0.1506880382358528, "learning_rate": 2e-05, "loss": 5.4183, "step": 8413 }, { "epoch": 0.564376027098635, "grad_norm": 0.15452549278894273, "learning_rate": 2e-05, "loss": 5.3992, "step": 8414 }, { "epoch": 0.5644431029278599, "grad_norm": 0.15850898832514562, "learning_rate": 2e-05, "loss": 5.4203, "step": 8415 }, { "epoch": 0.5645101787570849, "grad_norm": 0.15489653897171982, "learning_rate": 2e-05, "loss": 5.4526, "step": 8416 }, { "epoch": 0.5645772545863098, "grad_norm": 0.15608648445536705, "learning_rate": 2e-05, "loss": 5.4052, "step": 8417 }, { "epoch": 0.5646443304155347, "grad_norm": 0.14779946552260964, "learning_rate": 2e-05, "loss": 5.51, "step": 8418 }, { "epoch": 0.5647114062447597, "grad_norm": 0.1444922558697645, "learning_rate": 2e-05, "loss": 5.3888, "step": 8419 }, { "epoch": 0.5647784820739846, "grad_norm": 0.1522173170663822, "learning_rate": 2e-05, "loss": 5.4694, "step": 8420 }, { "epoch": 0.5648455579032096, "grad_norm": 0.14874645412890672, "learning_rate": 2e-05, "loss": 5.4278, "step": 8421 }, { "epoch": 0.5649126337324345, "grad_norm": 0.1482926367684781, "learning_rate": 2e-05, "loss": 5.3545, "step": 8422 }, { "epoch": 0.5649797095616594, "grad_norm": 0.15230848934103225, "learning_rate": 2e-05, "loss": 5.5115, "step": 8423 }, { "epoch": 0.5650467853908844, "grad_norm": 0.15292766247489034, "learning_rate": 2e-05, "loss": 5.5586, "step": 8424 }, { "epoch": 0.5651138612201093, "grad_norm": 0.1538813957250163, "learning_rate": 2e-05, "loss": 5.4656, "step": 8425 }, { "epoch": 0.5651809370493343, "grad_norm": 0.15784646542422423, "learning_rate": 2e-05, "loss": 5.43, "step": 8426 }, { "epoch": 0.5652480128785592, "grad_norm": 0.16696873485580177, "learning_rate": 2e-05, "loss": 5.4442, "step": 8427 }, { "epoch": 0.5653150887077841, "grad_norm": 0.15656208415397616, "learning_rate": 2e-05, "loss": 5.3664, "step": 8428 }, { "epoch": 0.5653821645370091, "grad_norm": 0.15635242290140475, "learning_rate": 2e-05, "loss": 5.5388, "step": 8429 }, { "epoch": 0.565449240366234, "grad_norm": 0.1586134275186656, "learning_rate": 2e-05, "loss": 5.416, "step": 8430 }, { "epoch": 0.565516316195459, "grad_norm": 0.16847096406947315, "learning_rate": 2e-05, "loss": 5.3797, "step": 8431 }, { "epoch": 0.5655833920246839, "grad_norm": 0.14789299605653566, "learning_rate": 2e-05, "loss": 5.5262, "step": 8432 }, { "epoch": 0.5656504678539088, "grad_norm": 0.15797099555905078, "learning_rate": 2e-05, "loss": 5.4268, "step": 8433 }, { "epoch": 0.5657175436831338, "grad_norm": 0.16602562971830442, "learning_rate": 2e-05, "loss": 5.5051, "step": 8434 }, { "epoch": 0.5657846195123587, "grad_norm": 0.1503582033213871, "learning_rate": 2e-05, "loss": 5.4733, "step": 8435 }, { "epoch": 0.5658516953415836, "grad_norm": 0.1726786449498398, "learning_rate": 2e-05, "loss": 5.4292, "step": 8436 }, { "epoch": 0.5659187711708086, "grad_norm": 0.1548586247893178, "learning_rate": 2e-05, "loss": 5.4334, "step": 8437 }, { "epoch": 0.5659858470000335, "grad_norm": 0.1458126575746454, "learning_rate": 2e-05, "loss": 5.2854, "step": 8438 }, { "epoch": 0.5660529228292585, "grad_norm": 0.1589711132605877, "learning_rate": 2e-05, "loss": 5.4097, "step": 8439 }, { "epoch": 0.5661199986584834, "grad_norm": 0.15772034709294838, "learning_rate": 2e-05, "loss": 5.3893, "step": 8440 }, { "epoch": 0.5661870744877083, "grad_norm": 0.1559549534280849, "learning_rate": 2e-05, "loss": 5.5151, "step": 8441 }, { "epoch": 0.5662541503169333, "grad_norm": 0.1575522491870645, "learning_rate": 2e-05, "loss": 5.4659, "step": 8442 }, { "epoch": 0.5663212261461582, "grad_norm": 0.1517250867815135, "learning_rate": 2e-05, "loss": 5.463, "step": 8443 }, { "epoch": 0.5663883019753831, "grad_norm": 0.14753555682757594, "learning_rate": 2e-05, "loss": 5.5071, "step": 8444 }, { "epoch": 0.5664553778046081, "grad_norm": 0.16741883419211065, "learning_rate": 2e-05, "loss": 5.385, "step": 8445 }, { "epoch": 0.566522453633833, "grad_norm": 0.15371629917850108, "learning_rate": 2e-05, "loss": 5.6194, "step": 8446 }, { "epoch": 0.566589529463058, "grad_norm": 0.15199427486085162, "learning_rate": 2e-05, "loss": 5.533, "step": 8447 }, { "epoch": 0.5666566052922829, "grad_norm": 0.1578834943405736, "learning_rate": 2e-05, "loss": 5.4897, "step": 8448 }, { "epoch": 0.5667236811215078, "grad_norm": 0.15027163355541792, "learning_rate": 2e-05, "loss": 5.4022, "step": 8449 }, { "epoch": 0.5667907569507328, "grad_norm": 0.14923000798474464, "learning_rate": 2e-05, "loss": 5.4695, "step": 8450 }, { "epoch": 0.5668578327799577, "grad_norm": 0.15290069146592325, "learning_rate": 2e-05, "loss": 5.4233, "step": 8451 }, { "epoch": 0.5669249086091827, "grad_norm": 0.14664679119161939, "learning_rate": 2e-05, "loss": 5.4052, "step": 8452 }, { "epoch": 0.5669919844384076, "grad_norm": 0.15501071779918568, "learning_rate": 2e-05, "loss": 5.4496, "step": 8453 }, { "epoch": 0.5670590602676325, "grad_norm": 0.15428853737421733, "learning_rate": 2e-05, "loss": 5.3263, "step": 8454 }, { "epoch": 0.5671261360968575, "grad_norm": 0.1572244546462615, "learning_rate": 2e-05, "loss": 5.3401, "step": 8455 }, { "epoch": 0.5671932119260824, "grad_norm": 0.16084650499984968, "learning_rate": 2e-05, "loss": 5.46, "step": 8456 }, { "epoch": 0.5672602877553073, "grad_norm": 0.1507006618676138, "learning_rate": 2e-05, "loss": 5.2004, "step": 8457 }, { "epoch": 0.5673273635845323, "grad_norm": 0.15300876112799863, "learning_rate": 2e-05, "loss": 5.2884, "step": 8458 }, { "epoch": 0.5673944394137572, "grad_norm": 0.15434474423423553, "learning_rate": 2e-05, "loss": 5.3252, "step": 8459 }, { "epoch": 0.5674615152429822, "grad_norm": 0.15908972640332844, "learning_rate": 2e-05, "loss": 5.3854, "step": 8460 }, { "epoch": 0.5675285910722071, "grad_norm": 0.1523515632045846, "learning_rate": 2e-05, "loss": 5.3934, "step": 8461 }, { "epoch": 0.567595666901432, "grad_norm": 0.16371953814948748, "learning_rate": 2e-05, "loss": 5.4375, "step": 8462 }, { "epoch": 0.567662742730657, "grad_norm": 0.14901318738443828, "learning_rate": 2e-05, "loss": 5.5109, "step": 8463 }, { "epoch": 0.5677298185598819, "grad_norm": 0.15503997977357425, "learning_rate": 2e-05, "loss": 5.3269, "step": 8464 }, { "epoch": 0.5677968943891069, "grad_norm": 0.14968441527340132, "learning_rate": 2e-05, "loss": 5.3473, "step": 8465 }, { "epoch": 0.5678639702183318, "grad_norm": 0.15511608741623054, "learning_rate": 2e-05, "loss": 5.2636, "step": 8466 }, { "epoch": 0.5679310460475567, "grad_norm": 0.1581949967285679, "learning_rate": 2e-05, "loss": 5.5048, "step": 8467 }, { "epoch": 0.5679981218767817, "grad_norm": 0.1515832914460592, "learning_rate": 2e-05, "loss": 5.501, "step": 8468 }, { "epoch": 0.5680651977060066, "grad_norm": 0.16486691888531466, "learning_rate": 2e-05, "loss": 5.542, "step": 8469 }, { "epoch": 0.5681322735352315, "grad_norm": 0.15213133963866501, "learning_rate": 2e-05, "loss": 5.2827, "step": 8470 }, { "epoch": 0.5681993493644565, "grad_norm": 0.1493452317876701, "learning_rate": 2e-05, "loss": 5.4395, "step": 8471 }, { "epoch": 0.5682664251936814, "grad_norm": 0.15784873141233988, "learning_rate": 2e-05, "loss": 5.5033, "step": 8472 }, { "epoch": 0.5683335010229064, "grad_norm": 0.15493931492366828, "learning_rate": 2e-05, "loss": 5.2869, "step": 8473 }, { "epoch": 0.5684005768521313, "grad_norm": 0.14723216986482565, "learning_rate": 2e-05, "loss": 5.4557, "step": 8474 }, { "epoch": 0.5684676526813562, "grad_norm": 0.1580244169345664, "learning_rate": 2e-05, "loss": 5.4025, "step": 8475 }, { "epoch": 0.5685347285105812, "grad_norm": 0.15891621061785166, "learning_rate": 2e-05, "loss": 5.2926, "step": 8476 }, { "epoch": 0.5686018043398061, "grad_norm": 0.14390966227127866, "learning_rate": 2e-05, "loss": 5.5695, "step": 8477 }, { "epoch": 0.568668880169031, "grad_norm": 0.15767102118778262, "learning_rate": 2e-05, "loss": 5.3172, "step": 8478 }, { "epoch": 0.568735955998256, "grad_norm": 0.1588579827262618, "learning_rate": 2e-05, "loss": 5.3973, "step": 8479 }, { "epoch": 0.5688030318274809, "grad_norm": 0.14857056061278134, "learning_rate": 2e-05, "loss": 5.4922, "step": 8480 }, { "epoch": 0.5688701076567059, "grad_norm": 0.15157267489578333, "learning_rate": 2e-05, "loss": 5.4232, "step": 8481 }, { "epoch": 0.5689371834859308, "grad_norm": 0.15297987045363698, "learning_rate": 2e-05, "loss": 5.3408, "step": 8482 }, { "epoch": 0.5690042593151557, "grad_norm": 0.15751013514600856, "learning_rate": 2e-05, "loss": 5.3757, "step": 8483 }, { "epoch": 0.5690713351443807, "grad_norm": 0.15419700114481355, "learning_rate": 2e-05, "loss": 5.3453, "step": 8484 }, { "epoch": 0.5691384109736056, "grad_norm": 0.15644408889275718, "learning_rate": 2e-05, "loss": 5.4378, "step": 8485 }, { "epoch": 0.5692054868028306, "grad_norm": 0.16054423409571883, "learning_rate": 2e-05, "loss": 5.4879, "step": 8486 }, { "epoch": 0.5692725626320555, "grad_norm": 0.15267500762674455, "learning_rate": 2e-05, "loss": 5.5964, "step": 8487 }, { "epoch": 0.5693396384612804, "grad_norm": 0.14748741782925634, "learning_rate": 2e-05, "loss": 5.156, "step": 8488 }, { "epoch": 0.5694067142905054, "grad_norm": 0.15116660431178192, "learning_rate": 2e-05, "loss": 5.5364, "step": 8489 }, { "epoch": 0.5694737901197303, "grad_norm": 0.15148032766952277, "learning_rate": 2e-05, "loss": 5.1452, "step": 8490 }, { "epoch": 0.5695408659489553, "grad_norm": 0.1514017563987818, "learning_rate": 2e-05, "loss": 5.4147, "step": 8491 }, { "epoch": 0.5696079417781802, "grad_norm": 0.1557151317300217, "learning_rate": 2e-05, "loss": 5.3564, "step": 8492 }, { "epoch": 0.5696750176074051, "grad_norm": 0.15952474183624973, "learning_rate": 2e-05, "loss": 5.4136, "step": 8493 }, { "epoch": 0.5697420934366301, "grad_norm": 0.15775733799663494, "learning_rate": 2e-05, "loss": 5.6499, "step": 8494 }, { "epoch": 0.569809169265855, "grad_norm": 0.1510979571135918, "learning_rate": 2e-05, "loss": 5.3946, "step": 8495 }, { "epoch": 0.56987624509508, "grad_norm": 0.15538522493662282, "learning_rate": 2e-05, "loss": 5.4862, "step": 8496 }, { "epoch": 0.5699433209243049, "grad_norm": 0.1603480323535246, "learning_rate": 2e-05, "loss": 5.4807, "step": 8497 }, { "epoch": 0.5700103967535298, "grad_norm": 0.15633978044561814, "learning_rate": 2e-05, "loss": 5.4549, "step": 8498 }, { "epoch": 0.5700774725827548, "grad_norm": 0.16337335933401934, "learning_rate": 2e-05, "loss": 5.4423, "step": 8499 }, { "epoch": 0.5701445484119797, "grad_norm": 0.15766861441914842, "learning_rate": 2e-05, "loss": 5.4161, "step": 8500 }, { "epoch": 0.5702116242412046, "grad_norm": 0.16530274158268696, "learning_rate": 2e-05, "loss": 5.3399, "step": 8501 }, { "epoch": 0.5702787000704296, "grad_norm": 0.1532731372985545, "learning_rate": 2e-05, "loss": 5.473, "step": 8502 }, { "epoch": 0.5703457758996545, "grad_norm": 0.15409150440702696, "learning_rate": 2e-05, "loss": 5.4442, "step": 8503 }, { "epoch": 0.5704128517288795, "grad_norm": 0.15630817655691787, "learning_rate": 2e-05, "loss": 5.5776, "step": 8504 }, { "epoch": 0.5704799275581044, "grad_norm": 0.14676220544163973, "learning_rate": 2e-05, "loss": 5.3869, "step": 8505 }, { "epoch": 0.5705470033873293, "grad_norm": 0.15285598136087647, "learning_rate": 2e-05, "loss": 5.4683, "step": 8506 }, { "epoch": 0.5706140792165543, "grad_norm": 0.15380555839496768, "learning_rate": 2e-05, "loss": 5.3762, "step": 8507 }, { "epoch": 0.5706811550457792, "grad_norm": 0.1557795096847412, "learning_rate": 2e-05, "loss": 5.4825, "step": 8508 }, { "epoch": 0.5707482308750041, "grad_norm": 0.1501139312854184, "learning_rate": 2e-05, "loss": 5.4632, "step": 8509 }, { "epoch": 0.5708153067042291, "grad_norm": 0.15295836990648143, "learning_rate": 2e-05, "loss": 5.497, "step": 8510 }, { "epoch": 0.570882382533454, "grad_norm": 0.15368620779342276, "learning_rate": 2e-05, "loss": 5.4627, "step": 8511 }, { "epoch": 0.570949458362679, "grad_norm": 0.15584504688813106, "learning_rate": 2e-05, "loss": 5.4399, "step": 8512 }, { "epoch": 0.5710165341919039, "grad_norm": 0.1555409479516046, "learning_rate": 2e-05, "loss": 5.3694, "step": 8513 }, { "epoch": 0.5710836100211288, "grad_norm": 0.15630855512733477, "learning_rate": 2e-05, "loss": 5.3322, "step": 8514 }, { "epoch": 0.5711506858503538, "grad_norm": 0.16269288951280308, "learning_rate": 2e-05, "loss": 5.5705, "step": 8515 }, { "epoch": 0.5712177616795787, "grad_norm": 0.15286646185953673, "learning_rate": 2e-05, "loss": 5.4747, "step": 8516 }, { "epoch": 0.5712848375088037, "grad_norm": 0.17165244986244296, "learning_rate": 2e-05, "loss": 5.5666, "step": 8517 }, { "epoch": 0.5713519133380286, "grad_norm": 0.16310191933496454, "learning_rate": 2e-05, "loss": 5.373, "step": 8518 }, { "epoch": 0.5714189891672535, "grad_norm": 0.15491588992705904, "learning_rate": 2e-05, "loss": 5.4947, "step": 8519 }, { "epoch": 0.5714860649964785, "grad_norm": 0.1488184652997587, "learning_rate": 2e-05, "loss": 5.4525, "step": 8520 }, { "epoch": 0.5715531408257034, "grad_norm": 0.16076806729375948, "learning_rate": 2e-05, "loss": 5.4759, "step": 8521 }, { "epoch": 0.5716202166549283, "grad_norm": 0.15784546323669266, "learning_rate": 2e-05, "loss": 5.3787, "step": 8522 }, { "epoch": 0.5716872924841533, "grad_norm": 0.1475270129091651, "learning_rate": 2e-05, "loss": 5.4598, "step": 8523 }, { "epoch": 0.5717543683133782, "grad_norm": 0.15743756391300776, "learning_rate": 2e-05, "loss": 5.483, "step": 8524 }, { "epoch": 0.5718214441426032, "grad_norm": 0.16045943231644696, "learning_rate": 2e-05, "loss": 5.3571, "step": 8525 }, { "epoch": 0.5718885199718281, "grad_norm": 0.1479814786630079, "learning_rate": 2e-05, "loss": 5.467, "step": 8526 }, { "epoch": 0.571955595801053, "grad_norm": 0.15463178127161237, "learning_rate": 2e-05, "loss": 5.4312, "step": 8527 }, { "epoch": 0.572022671630278, "grad_norm": 0.1592374962219593, "learning_rate": 2e-05, "loss": 5.4586, "step": 8528 }, { "epoch": 0.5720897474595029, "grad_norm": 0.15059445433567878, "learning_rate": 2e-05, "loss": 5.4677, "step": 8529 }, { "epoch": 0.5721568232887279, "grad_norm": 0.14823185112809012, "learning_rate": 2e-05, "loss": 5.3409, "step": 8530 }, { "epoch": 0.5722238991179528, "grad_norm": 0.16273872962740665, "learning_rate": 2e-05, "loss": 5.3582, "step": 8531 }, { "epoch": 0.5722909749471777, "grad_norm": 0.15932168156229648, "learning_rate": 2e-05, "loss": 5.3014, "step": 8532 }, { "epoch": 0.5723580507764027, "grad_norm": 0.1492249650865691, "learning_rate": 2e-05, "loss": 5.3487, "step": 8533 }, { "epoch": 0.5724251266056276, "grad_norm": 0.16128652619700662, "learning_rate": 2e-05, "loss": 5.4622, "step": 8534 }, { "epoch": 0.5724922024348525, "grad_norm": 0.1635237451219097, "learning_rate": 2e-05, "loss": 5.422, "step": 8535 }, { "epoch": 0.5725592782640775, "grad_norm": 0.15529691031742313, "learning_rate": 2e-05, "loss": 5.4105, "step": 8536 }, { "epoch": 0.5726263540933024, "grad_norm": 0.1639068347936482, "learning_rate": 2e-05, "loss": 5.4737, "step": 8537 }, { "epoch": 0.5726934299225274, "grad_norm": 0.1585655661841891, "learning_rate": 2e-05, "loss": 5.4307, "step": 8538 }, { "epoch": 0.5727605057517523, "grad_norm": 0.15051279854780178, "learning_rate": 2e-05, "loss": 5.4637, "step": 8539 }, { "epoch": 0.5728275815809772, "grad_norm": 0.15578586082237403, "learning_rate": 2e-05, "loss": 5.5098, "step": 8540 }, { "epoch": 0.5728946574102022, "grad_norm": 0.1542641127007967, "learning_rate": 2e-05, "loss": 5.4233, "step": 8541 }, { "epoch": 0.5729617332394271, "grad_norm": 0.15054599561949678, "learning_rate": 2e-05, "loss": 5.4075, "step": 8542 }, { "epoch": 0.5730288090686521, "grad_norm": 0.14666911641427152, "learning_rate": 2e-05, "loss": 5.3379, "step": 8543 }, { "epoch": 0.573095884897877, "grad_norm": 0.16717758313532485, "learning_rate": 2e-05, "loss": 5.4923, "step": 8544 }, { "epoch": 0.5731629607271019, "grad_norm": 0.16081305091621845, "learning_rate": 2e-05, "loss": 5.4666, "step": 8545 }, { "epoch": 0.573230036556327, "grad_norm": 0.16024898429530782, "learning_rate": 2e-05, "loss": 5.4472, "step": 8546 }, { "epoch": 0.5732971123855519, "grad_norm": 0.16987856079958674, "learning_rate": 2e-05, "loss": 5.3511, "step": 8547 }, { "epoch": 0.5733641882147769, "grad_norm": 0.1498617095733163, "learning_rate": 2e-05, "loss": 5.516, "step": 8548 }, { "epoch": 0.5734312640440018, "grad_norm": 0.1565912790156857, "learning_rate": 2e-05, "loss": 5.4425, "step": 8549 }, { "epoch": 0.5734983398732267, "grad_norm": 0.16361976266563688, "learning_rate": 2e-05, "loss": 5.3551, "step": 8550 }, { "epoch": 0.5735654157024517, "grad_norm": 0.15707510527519383, "learning_rate": 2e-05, "loss": 5.5437, "step": 8551 }, { "epoch": 0.5736324915316766, "grad_norm": 0.15251535615294545, "learning_rate": 2e-05, "loss": 5.4033, "step": 8552 }, { "epoch": 0.5736995673609016, "grad_norm": 0.16398521839653482, "learning_rate": 2e-05, "loss": 5.3324, "step": 8553 }, { "epoch": 0.5737666431901265, "grad_norm": 0.15799944073792763, "learning_rate": 2e-05, "loss": 5.3714, "step": 8554 }, { "epoch": 0.5738337190193514, "grad_norm": 0.15290206294637326, "learning_rate": 2e-05, "loss": 5.5313, "step": 8555 }, { "epoch": 0.5739007948485764, "grad_norm": 0.15040267859944667, "learning_rate": 2e-05, "loss": 5.4401, "step": 8556 }, { "epoch": 0.5739678706778013, "grad_norm": 0.14681179804478442, "learning_rate": 2e-05, "loss": 5.3452, "step": 8557 }, { "epoch": 0.5740349465070262, "grad_norm": 0.15055088134145153, "learning_rate": 2e-05, "loss": 5.3601, "step": 8558 }, { "epoch": 0.5741020223362512, "grad_norm": 0.15029719716438483, "learning_rate": 2e-05, "loss": 5.405, "step": 8559 }, { "epoch": 0.5741690981654761, "grad_norm": 0.16052055120401004, "learning_rate": 2e-05, "loss": 5.2887, "step": 8560 }, { "epoch": 0.5742361739947011, "grad_norm": 0.1508775473341432, "learning_rate": 2e-05, "loss": 5.5038, "step": 8561 }, { "epoch": 0.574303249823926, "grad_norm": 0.16323111230011217, "learning_rate": 2e-05, "loss": 5.3105, "step": 8562 }, { "epoch": 0.5743703256531509, "grad_norm": 0.15489066263149817, "learning_rate": 2e-05, "loss": 5.4643, "step": 8563 }, { "epoch": 0.5744374014823759, "grad_norm": 0.15041064069152693, "learning_rate": 2e-05, "loss": 5.44, "step": 8564 }, { "epoch": 0.5745044773116008, "grad_norm": 0.15730774688290047, "learning_rate": 2e-05, "loss": 5.2418, "step": 8565 }, { "epoch": 0.5745715531408258, "grad_norm": 0.1628971024497423, "learning_rate": 2e-05, "loss": 5.5023, "step": 8566 }, { "epoch": 0.5746386289700507, "grad_norm": 0.15149898117403487, "learning_rate": 2e-05, "loss": 5.4767, "step": 8567 }, { "epoch": 0.5747057047992756, "grad_norm": 0.14334772115318917, "learning_rate": 2e-05, "loss": 5.3136, "step": 8568 }, { "epoch": 0.5747727806285006, "grad_norm": 0.15686673628737427, "learning_rate": 2e-05, "loss": 5.3902, "step": 8569 }, { "epoch": 0.5748398564577255, "grad_norm": 0.15839883574390537, "learning_rate": 2e-05, "loss": 5.4125, "step": 8570 }, { "epoch": 0.5749069322869504, "grad_norm": 0.16050051176774535, "learning_rate": 2e-05, "loss": 5.2628, "step": 8571 }, { "epoch": 0.5749740081161754, "grad_norm": 0.15280334030375128, "learning_rate": 2e-05, "loss": 5.4925, "step": 8572 }, { "epoch": 0.5750410839454003, "grad_norm": 0.1520709732564147, "learning_rate": 2e-05, "loss": 5.4978, "step": 8573 }, { "epoch": 0.5751081597746253, "grad_norm": 0.14802965789023728, "learning_rate": 2e-05, "loss": 5.4911, "step": 8574 }, { "epoch": 0.5751752356038502, "grad_norm": 0.14425334381879507, "learning_rate": 2e-05, "loss": 5.3619, "step": 8575 }, { "epoch": 0.5752423114330751, "grad_norm": 0.15255286720317432, "learning_rate": 2e-05, "loss": 5.3971, "step": 8576 }, { "epoch": 0.5753093872623001, "grad_norm": 0.15321988213453622, "learning_rate": 2e-05, "loss": 5.453, "step": 8577 }, { "epoch": 0.575376463091525, "grad_norm": 0.15833801092736124, "learning_rate": 2e-05, "loss": 5.4506, "step": 8578 }, { "epoch": 0.57544353892075, "grad_norm": 0.15036446336810788, "learning_rate": 2e-05, "loss": 5.5276, "step": 8579 }, { "epoch": 0.5755106147499749, "grad_norm": 0.15233910350584418, "learning_rate": 2e-05, "loss": 5.5283, "step": 8580 }, { "epoch": 0.5755776905791998, "grad_norm": 0.14175506173138822, "learning_rate": 2e-05, "loss": 5.4201, "step": 8581 }, { "epoch": 0.5756447664084248, "grad_norm": 0.15527185601578114, "learning_rate": 2e-05, "loss": 5.356, "step": 8582 }, { "epoch": 0.5757118422376497, "grad_norm": 0.14898327096475977, "learning_rate": 2e-05, "loss": 5.5281, "step": 8583 }, { "epoch": 0.5757789180668746, "grad_norm": 0.14517031645244763, "learning_rate": 2e-05, "loss": 5.3346, "step": 8584 }, { "epoch": 0.5758459938960996, "grad_norm": 0.1415778180833047, "learning_rate": 2e-05, "loss": 5.3058, "step": 8585 }, { "epoch": 0.5759130697253245, "grad_norm": 0.1531117015643341, "learning_rate": 2e-05, "loss": 5.3807, "step": 8586 }, { "epoch": 0.5759801455545495, "grad_norm": 0.1461505099057545, "learning_rate": 2e-05, "loss": 5.4977, "step": 8587 }, { "epoch": 0.5760472213837744, "grad_norm": 0.1537902322602564, "learning_rate": 2e-05, "loss": 5.4225, "step": 8588 }, { "epoch": 0.5761142972129993, "grad_norm": 0.15028844356808027, "learning_rate": 2e-05, "loss": 5.4883, "step": 8589 }, { "epoch": 0.5761813730422243, "grad_norm": 0.1444747808819393, "learning_rate": 2e-05, "loss": 5.596, "step": 8590 }, { "epoch": 0.5762484488714492, "grad_norm": 0.14569189852379952, "learning_rate": 2e-05, "loss": 5.3621, "step": 8591 }, { "epoch": 0.5763155247006742, "grad_norm": 0.1524322260566053, "learning_rate": 2e-05, "loss": 5.5452, "step": 8592 }, { "epoch": 0.5763826005298991, "grad_norm": 0.14779099342711913, "learning_rate": 2e-05, "loss": 5.4562, "step": 8593 }, { "epoch": 0.576449676359124, "grad_norm": 0.15461821949930718, "learning_rate": 2e-05, "loss": 5.3905, "step": 8594 }, { "epoch": 0.576516752188349, "grad_norm": 0.142557323648621, "learning_rate": 2e-05, "loss": 5.4335, "step": 8595 }, { "epoch": 0.5765838280175739, "grad_norm": 0.15027204505906497, "learning_rate": 2e-05, "loss": 5.4049, "step": 8596 }, { "epoch": 0.5766509038467988, "grad_norm": 0.13927610972580665, "learning_rate": 2e-05, "loss": 5.1896, "step": 8597 }, { "epoch": 0.5767179796760238, "grad_norm": 0.15902615280669757, "learning_rate": 2e-05, "loss": 5.3393, "step": 8598 }, { "epoch": 0.5767850555052487, "grad_norm": 0.15482513946240933, "learning_rate": 2e-05, "loss": 5.3666, "step": 8599 }, { "epoch": 0.5768521313344737, "grad_norm": 0.14653756416063993, "learning_rate": 2e-05, "loss": 5.4787, "step": 8600 }, { "epoch": 0.5769192071636986, "grad_norm": 0.1433803862190507, "learning_rate": 2e-05, "loss": 5.4357, "step": 8601 }, { "epoch": 0.5769862829929235, "grad_norm": 0.14490224691380985, "learning_rate": 2e-05, "loss": 5.4277, "step": 8602 }, { "epoch": 0.5770533588221485, "grad_norm": 0.15282621982609265, "learning_rate": 2e-05, "loss": 5.5355, "step": 8603 }, { "epoch": 0.5771204346513734, "grad_norm": 0.14984675740941827, "learning_rate": 2e-05, "loss": 5.5763, "step": 8604 }, { "epoch": 0.5771875104805984, "grad_norm": 0.14949590911483107, "learning_rate": 2e-05, "loss": 5.4656, "step": 8605 }, { "epoch": 0.5772545863098233, "grad_norm": 0.1484272757111534, "learning_rate": 2e-05, "loss": 5.4043, "step": 8606 }, { "epoch": 0.5773216621390482, "grad_norm": 0.14758357442180775, "learning_rate": 2e-05, "loss": 5.4978, "step": 8607 }, { "epoch": 0.5773887379682732, "grad_norm": 0.15083880612986408, "learning_rate": 2e-05, "loss": 5.3378, "step": 8608 }, { "epoch": 0.5774558137974981, "grad_norm": 0.14962234772763613, "learning_rate": 2e-05, "loss": 5.5276, "step": 8609 }, { "epoch": 0.577522889626723, "grad_norm": 0.14619116676332924, "learning_rate": 2e-05, "loss": 5.5217, "step": 8610 }, { "epoch": 0.577589965455948, "grad_norm": 0.14711263195515265, "learning_rate": 2e-05, "loss": 5.2833, "step": 8611 }, { "epoch": 0.5776570412851729, "grad_norm": 0.16135554917101225, "learning_rate": 2e-05, "loss": 5.5664, "step": 8612 }, { "epoch": 0.5777241171143979, "grad_norm": 0.15221389999030016, "learning_rate": 2e-05, "loss": 5.4288, "step": 8613 }, { "epoch": 0.5777911929436228, "grad_norm": 0.15023349568311253, "learning_rate": 2e-05, "loss": 5.5613, "step": 8614 }, { "epoch": 0.5778582687728477, "grad_norm": 0.14969010724494222, "learning_rate": 2e-05, "loss": 5.4634, "step": 8615 }, { "epoch": 0.5779253446020727, "grad_norm": 0.15772319366570378, "learning_rate": 2e-05, "loss": 5.5317, "step": 8616 }, { "epoch": 0.5779924204312976, "grad_norm": 0.148003407220582, "learning_rate": 2e-05, "loss": 5.447, "step": 8617 }, { "epoch": 0.5780594962605226, "grad_norm": 0.16058615096951714, "learning_rate": 2e-05, "loss": 5.4497, "step": 8618 }, { "epoch": 0.5781265720897475, "grad_norm": 0.15766728898582819, "learning_rate": 2e-05, "loss": 5.3589, "step": 8619 }, { "epoch": 0.5781936479189724, "grad_norm": 0.15602206491506523, "learning_rate": 2e-05, "loss": 5.323, "step": 8620 }, { "epoch": 0.5782607237481974, "grad_norm": 0.15691304385972746, "learning_rate": 2e-05, "loss": 5.4112, "step": 8621 }, { "epoch": 0.5783277995774223, "grad_norm": 0.1478039285197623, "learning_rate": 2e-05, "loss": 5.2851, "step": 8622 }, { "epoch": 0.5783948754066472, "grad_norm": 0.154952299368246, "learning_rate": 2e-05, "loss": 5.3887, "step": 8623 }, { "epoch": 0.5784619512358722, "grad_norm": 0.15429174089777317, "learning_rate": 2e-05, "loss": 5.4761, "step": 8624 }, { "epoch": 0.5785290270650971, "grad_norm": 0.1509883726878346, "learning_rate": 2e-05, "loss": 5.3181, "step": 8625 }, { "epoch": 0.5785961028943221, "grad_norm": 0.15099209334923214, "learning_rate": 2e-05, "loss": 5.5404, "step": 8626 }, { "epoch": 0.578663178723547, "grad_norm": 0.14956864424670965, "learning_rate": 2e-05, "loss": 5.5031, "step": 8627 }, { "epoch": 0.5787302545527719, "grad_norm": 0.155790562427099, "learning_rate": 2e-05, "loss": 5.5585, "step": 8628 }, { "epoch": 0.5787973303819969, "grad_norm": 0.15434029704785007, "learning_rate": 2e-05, "loss": 5.3874, "step": 8629 }, { "epoch": 0.5788644062112218, "grad_norm": 0.15200817713377052, "learning_rate": 2e-05, "loss": 5.4689, "step": 8630 }, { "epoch": 0.5789314820404468, "grad_norm": 0.15357500808042576, "learning_rate": 2e-05, "loss": 5.4218, "step": 8631 }, { "epoch": 0.5789985578696717, "grad_norm": 0.14919901220009632, "learning_rate": 2e-05, "loss": 5.4885, "step": 8632 }, { "epoch": 0.5790656336988966, "grad_norm": 0.14974535375990275, "learning_rate": 2e-05, "loss": 5.3111, "step": 8633 }, { "epoch": 0.5791327095281216, "grad_norm": 0.15833436303461257, "learning_rate": 2e-05, "loss": 5.507, "step": 8634 }, { "epoch": 0.5791997853573465, "grad_norm": 0.14707138972761813, "learning_rate": 2e-05, "loss": 5.4786, "step": 8635 }, { "epoch": 0.5792668611865714, "grad_norm": 0.15452260438981197, "learning_rate": 2e-05, "loss": 5.2219, "step": 8636 }, { "epoch": 0.5793339370157964, "grad_norm": 0.14935433747806964, "learning_rate": 2e-05, "loss": 5.5127, "step": 8637 }, { "epoch": 0.5794010128450213, "grad_norm": 0.15464981549661022, "learning_rate": 2e-05, "loss": 5.4997, "step": 8638 }, { "epoch": 0.5794680886742463, "grad_norm": 0.157334632264988, "learning_rate": 2e-05, "loss": 5.4585, "step": 8639 }, { "epoch": 0.5795351645034712, "grad_norm": 0.1625576708103907, "learning_rate": 2e-05, "loss": 5.3488, "step": 8640 }, { "epoch": 0.5796022403326961, "grad_norm": 0.1517623277498487, "learning_rate": 2e-05, "loss": 5.5131, "step": 8641 }, { "epoch": 0.5796693161619211, "grad_norm": 0.1466993544355269, "learning_rate": 2e-05, "loss": 5.3377, "step": 8642 }, { "epoch": 0.579736391991146, "grad_norm": 0.14956118539136706, "learning_rate": 2e-05, "loss": 5.4379, "step": 8643 }, { "epoch": 0.579803467820371, "grad_norm": 0.1539279684452843, "learning_rate": 2e-05, "loss": 5.4806, "step": 8644 }, { "epoch": 0.5798705436495959, "grad_norm": 0.1495728035299672, "learning_rate": 2e-05, "loss": 5.5413, "step": 8645 }, { "epoch": 0.5799376194788208, "grad_norm": 0.15107788579180761, "learning_rate": 2e-05, "loss": 5.5205, "step": 8646 }, { "epoch": 0.5800046953080458, "grad_norm": 0.1500061225590325, "learning_rate": 2e-05, "loss": 5.3582, "step": 8647 }, { "epoch": 0.5800717711372707, "grad_norm": 0.15095223200443705, "learning_rate": 2e-05, "loss": 5.464, "step": 8648 }, { "epoch": 0.5801388469664956, "grad_norm": 0.14775013286545224, "learning_rate": 2e-05, "loss": 5.3971, "step": 8649 }, { "epoch": 0.5802059227957206, "grad_norm": 0.15089382311223734, "learning_rate": 2e-05, "loss": 5.3442, "step": 8650 }, { "epoch": 0.5802729986249455, "grad_norm": 0.15078686325396487, "learning_rate": 2e-05, "loss": 5.4344, "step": 8651 }, { "epoch": 0.5803400744541705, "grad_norm": 0.15881997566108474, "learning_rate": 2e-05, "loss": 5.4802, "step": 8652 }, { "epoch": 0.5804071502833954, "grad_norm": 0.1580852621934443, "learning_rate": 2e-05, "loss": 5.4006, "step": 8653 }, { "epoch": 0.5804742261126203, "grad_norm": 0.15142093912325894, "learning_rate": 2e-05, "loss": 5.5089, "step": 8654 }, { "epoch": 0.5805413019418453, "grad_norm": 0.15605577785628091, "learning_rate": 2e-05, "loss": 5.5202, "step": 8655 }, { "epoch": 0.5806083777710702, "grad_norm": 0.15867931007048944, "learning_rate": 2e-05, "loss": 5.473, "step": 8656 }, { "epoch": 0.5806754536002952, "grad_norm": 0.14622167957139617, "learning_rate": 2e-05, "loss": 5.3977, "step": 8657 }, { "epoch": 0.5807425294295201, "grad_norm": 0.1495081301178369, "learning_rate": 2e-05, "loss": 5.5081, "step": 8658 }, { "epoch": 0.580809605258745, "grad_norm": 0.1449010424639129, "learning_rate": 2e-05, "loss": 5.4393, "step": 8659 }, { "epoch": 0.58087668108797, "grad_norm": 0.14978753373818687, "learning_rate": 2e-05, "loss": 5.2951, "step": 8660 }, { "epoch": 0.5809437569171949, "grad_norm": 0.15037946526583185, "learning_rate": 2e-05, "loss": 5.4581, "step": 8661 }, { "epoch": 0.5810108327464198, "grad_norm": 0.15034252101459084, "learning_rate": 2e-05, "loss": 5.3461, "step": 8662 }, { "epoch": 0.5810779085756448, "grad_norm": 0.14746755649560025, "learning_rate": 2e-05, "loss": 5.3971, "step": 8663 }, { "epoch": 0.5811449844048697, "grad_norm": 0.1515810716102731, "learning_rate": 2e-05, "loss": 5.4526, "step": 8664 }, { "epoch": 0.5812120602340947, "grad_norm": 0.15246472060819383, "learning_rate": 2e-05, "loss": 5.5084, "step": 8665 }, { "epoch": 0.5812791360633196, "grad_norm": 0.14619643246612837, "learning_rate": 2e-05, "loss": 5.48, "step": 8666 }, { "epoch": 0.5813462118925445, "grad_norm": 0.14463563724501308, "learning_rate": 2e-05, "loss": 5.4345, "step": 8667 }, { "epoch": 0.5814132877217695, "grad_norm": 0.14967025330107098, "learning_rate": 2e-05, "loss": 5.3727, "step": 8668 }, { "epoch": 0.5814803635509944, "grad_norm": 0.15323025678471655, "learning_rate": 2e-05, "loss": 5.3254, "step": 8669 }, { "epoch": 0.5815474393802194, "grad_norm": 0.15488387855973437, "learning_rate": 2e-05, "loss": 5.4894, "step": 8670 }, { "epoch": 0.5816145152094443, "grad_norm": 0.15079064272231602, "learning_rate": 2e-05, "loss": 5.5202, "step": 8671 }, { "epoch": 0.5816815910386692, "grad_norm": 0.1500582287344305, "learning_rate": 2e-05, "loss": 5.4252, "step": 8672 }, { "epoch": 0.5817486668678942, "grad_norm": 0.15904504669681427, "learning_rate": 2e-05, "loss": 5.4369, "step": 8673 }, { "epoch": 0.5818157426971191, "grad_norm": 0.1555620133684657, "learning_rate": 2e-05, "loss": 5.3578, "step": 8674 }, { "epoch": 0.581882818526344, "grad_norm": 0.14649108300471184, "learning_rate": 2e-05, "loss": 5.512, "step": 8675 }, { "epoch": 0.581949894355569, "grad_norm": 0.14941318925975142, "learning_rate": 2e-05, "loss": 5.4196, "step": 8676 }, { "epoch": 0.5820169701847939, "grad_norm": 0.1566208517127794, "learning_rate": 2e-05, "loss": 5.4039, "step": 8677 }, { "epoch": 0.5820840460140189, "grad_norm": 0.15297291471239258, "learning_rate": 2e-05, "loss": 5.4503, "step": 8678 }, { "epoch": 0.5821511218432438, "grad_norm": 0.1482053595166656, "learning_rate": 2e-05, "loss": 5.3572, "step": 8679 }, { "epoch": 0.5822181976724687, "grad_norm": 0.16247910154933398, "learning_rate": 2e-05, "loss": 5.3421, "step": 8680 }, { "epoch": 0.5822852735016937, "grad_norm": 0.16278085119046273, "learning_rate": 2e-05, "loss": 5.4595, "step": 8681 }, { "epoch": 0.5823523493309186, "grad_norm": 0.14970950342309064, "learning_rate": 2e-05, "loss": 5.5275, "step": 8682 }, { "epoch": 0.5824194251601436, "grad_norm": 0.15248440455876264, "learning_rate": 2e-05, "loss": 5.3943, "step": 8683 }, { "epoch": 0.5824865009893685, "grad_norm": 0.14877721359589688, "learning_rate": 2e-05, "loss": 5.4713, "step": 8684 }, { "epoch": 0.5825535768185934, "grad_norm": 0.14670771232136393, "learning_rate": 2e-05, "loss": 5.3215, "step": 8685 }, { "epoch": 0.5826206526478184, "grad_norm": 0.15845894577513872, "learning_rate": 2e-05, "loss": 5.2497, "step": 8686 }, { "epoch": 0.5826877284770433, "grad_norm": 0.1493083590699222, "learning_rate": 2e-05, "loss": 5.6382, "step": 8687 }, { "epoch": 0.5827548043062682, "grad_norm": 0.15163362557220086, "learning_rate": 2e-05, "loss": 5.3369, "step": 8688 }, { "epoch": 0.5828218801354932, "grad_norm": 0.14690026845085968, "learning_rate": 2e-05, "loss": 5.3159, "step": 8689 }, { "epoch": 0.5828889559647181, "grad_norm": 0.15041990751849008, "learning_rate": 2e-05, "loss": 5.3997, "step": 8690 }, { "epoch": 0.5829560317939431, "grad_norm": 0.14637649412162704, "learning_rate": 2e-05, "loss": 5.3079, "step": 8691 }, { "epoch": 0.583023107623168, "grad_norm": 0.15368592469053938, "learning_rate": 2e-05, "loss": 5.4518, "step": 8692 }, { "epoch": 0.5830901834523929, "grad_norm": 0.15008431725827764, "learning_rate": 2e-05, "loss": 5.4894, "step": 8693 }, { "epoch": 0.5831572592816179, "grad_norm": 0.14811427073337313, "learning_rate": 2e-05, "loss": 5.3398, "step": 8694 }, { "epoch": 0.5832243351108428, "grad_norm": 0.15298513137330197, "learning_rate": 2e-05, "loss": 5.4544, "step": 8695 }, { "epoch": 0.5832914109400678, "grad_norm": 0.15913387367719692, "learning_rate": 2e-05, "loss": 5.3372, "step": 8696 }, { "epoch": 0.5833584867692927, "grad_norm": 0.14905406864271634, "learning_rate": 2e-05, "loss": 5.3861, "step": 8697 }, { "epoch": 0.5834255625985176, "grad_norm": 0.14702890394814533, "learning_rate": 2e-05, "loss": 5.4921, "step": 8698 }, { "epoch": 0.5834926384277426, "grad_norm": 0.14747104948148848, "learning_rate": 2e-05, "loss": 5.4621, "step": 8699 }, { "epoch": 0.5835597142569675, "grad_norm": 0.14838175103241177, "learning_rate": 2e-05, "loss": 5.3451, "step": 8700 }, { "epoch": 0.5836267900861924, "grad_norm": 0.16273353203939042, "learning_rate": 2e-05, "loss": 5.3402, "step": 8701 }, { "epoch": 0.5836938659154174, "grad_norm": 0.1703747235686989, "learning_rate": 2e-05, "loss": 5.4204, "step": 8702 }, { "epoch": 0.5837609417446423, "grad_norm": 0.14762153161104624, "learning_rate": 2e-05, "loss": 5.4542, "step": 8703 }, { "epoch": 0.5838280175738673, "grad_norm": 0.15304392382718993, "learning_rate": 2e-05, "loss": 5.4481, "step": 8704 }, { "epoch": 0.5838950934030922, "grad_norm": 0.1649276109848706, "learning_rate": 2e-05, "loss": 5.3596, "step": 8705 }, { "epoch": 0.5839621692323171, "grad_norm": 0.1492339953187639, "learning_rate": 2e-05, "loss": 5.2345, "step": 8706 }, { "epoch": 0.5840292450615421, "grad_norm": 0.14827794142748124, "learning_rate": 2e-05, "loss": 5.4714, "step": 8707 }, { "epoch": 0.584096320890767, "grad_norm": 0.15557255666461126, "learning_rate": 2e-05, "loss": 5.3502, "step": 8708 }, { "epoch": 0.584163396719992, "grad_norm": 0.15878419621344114, "learning_rate": 2e-05, "loss": 5.4628, "step": 8709 }, { "epoch": 0.5842304725492169, "grad_norm": 0.14632259881723292, "learning_rate": 2e-05, "loss": 5.419, "step": 8710 }, { "epoch": 0.5842975483784418, "grad_norm": 0.15554312893471306, "learning_rate": 2e-05, "loss": 5.5588, "step": 8711 }, { "epoch": 0.5843646242076668, "grad_norm": 0.15573423450809643, "learning_rate": 2e-05, "loss": 5.4488, "step": 8712 }, { "epoch": 0.5844317000368917, "grad_norm": 0.14899995973325983, "learning_rate": 2e-05, "loss": 5.6146, "step": 8713 }, { "epoch": 0.5844987758661166, "grad_norm": 0.14768121962819405, "learning_rate": 2e-05, "loss": 5.3994, "step": 8714 }, { "epoch": 0.5845658516953416, "grad_norm": 0.1478431721826911, "learning_rate": 2e-05, "loss": 5.3581, "step": 8715 }, { "epoch": 0.5846329275245665, "grad_norm": 0.15806888094975308, "learning_rate": 2e-05, "loss": 5.3943, "step": 8716 }, { "epoch": 0.5847000033537915, "grad_norm": 0.1543160938434998, "learning_rate": 2e-05, "loss": 5.2983, "step": 8717 }, { "epoch": 0.5847670791830164, "grad_norm": 0.15163897528374848, "learning_rate": 2e-05, "loss": 5.4034, "step": 8718 }, { "epoch": 0.5848341550122413, "grad_norm": 0.1509395297787899, "learning_rate": 2e-05, "loss": 5.4692, "step": 8719 }, { "epoch": 0.5849012308414663, "grad_norm": 0.14345263874149802, "learning_rate": 2e-05, "loss": 5.5621, "step": 8720 }, { "epoch": 0.5849683066706912, "grad_norm": 0.15016928341448763, "learning_rate": 2e-05, "loss": 5.3968, "step": 8721 }, { "epoch": 0.5850353824999162, "grad_norm": 0.1498635382124269, "learning_rate": 2e-05, "loss": 5.4653, "step": 8722 }, { "epoch": 0.5851024583291411, "grad_norm": 0.1466433415935404, "learning_rate": 2e-05, "loss": 5.5192, "step": 8723 }, { "epoch": 0.585169534158366, "grad_norm": 0.1533085633863625, "learning_rate": 2e-05, "loss": 5.4353, "step": 8724 }, { "epoch": 0.585236609987591, "grad_norm": 0.1476152400685941, "learning_rate": 2e-05, "loss": 5.3866, "step": 8725 }, { "epoch": 0.5853036858168159, "grad_norm": 0.14277235124397147, "learning_rate": 2e-05, "loss": 5.4596, "step": 8726 }, { "epoch": 0.5853707616460408, "grad_norm": 0.1464658126254063, "learning_rate": 2e-05, "loss": 5.4605, "step": 8727 }, { "epoch": 0.5854378374752658, "grad_norm": 0.15652674781349785, "learning_rate": 2e-05, "loss": 5.4795, "step": 8728 }, { "epoch": 0.5855049133044907, "grad_norm": 0.15785700928576116, "learning_rate": 2e-05, "loss": 5.4652, "step": 8729 }, { "epoch": 0.5855719891337157, "grad_norm": 0.15586874209524912, "learning_rate": 2e-05, "loss": 5.4796, "step": 8730 }, { "epoch": 0.5856390649629406, "grad_norm": 0.14574553079645114, "learning_rate": 2e-05, "loss": 5.3797, "step": 8731 }, { "epoch": 0.5857061407921655, "grad_norm": 0.15099975368312132, "learning_rate": 2e-05, "loss": 5.59, "step": 8732 }, { "epoch": 0.5857732166213905, "grad_norm": 0.14902805613316478, "learning_rate": 2e-05, "loss": 5.3218, "step": 8733 }, { "epoch": 0.5858402924506154, "grad_norm": 0.1661730264304653, "learning_rate": 2e-05, "loss": 5.4354, "step": 8734 }, { "epoch": 0.5859073682798404, "grad_norm": 0.1508564499564276, "learning_rate": 2e-05, "loss": 5.4786, "step": 8735 }, { "epoch": 0.5859744441090653, "grad_norm": 0.14311010699980223, "learning_rate": 2e-05, "loss": 5.3874, "step": 8736 }, { "epoch": 0.5860415199382902, "grad_norm": 0.14904420789835232, "learning_rate": 2e-05, "loss": 5.5228, "step": 8737 }, { "epoch": 0.5861085957675152, "grad_norm": 0.1556357334506694, "learning_rate": 2e-05, "loss": 5.3244, "step": 8738 }, { "epoch": 0.5861756715967401, "grad_norm": 0.15140347221123093, "learning_rate": 2e-05, "loss": 5.4213, "step": 8739 }, { "epoch": 0.586242747425965, "grad_norm": 0.15320729691699353, "learning_rate": 2e-05, "loss": 5.3884, "step": 8740 }, { "epoch": 0.58630982325519, "grad_norm": 0.14548429643239827, "learning_rate": 2e-05, "loss": 5.3524, "step": 8741 }, { "epoch": 0.5863768990844149, "grad_norm": 0.15058650940862708, "learning_rate": 2e-05, "loss": 5.4926, "step": 8742 }, { "epoch": 0.5864439749136399, "grad_norm": 0.15358892042143654, "learning_rate": 2e-05, "loss": 5.4409, "step": 8743 }, { "epoch": 0.5865110507428648, "grad_norm": 0.1602322171621314, "learning_rate": 2e-05, "loss": 5.3973, "step": 8744 }, { "epoch": 0.5865781265720897, "grad_norm": 0.1525516449533458, "learning_rate": 2e-05, "loss": 5.3323, "step": 8745 }, { "epoch": 0.5866452024013147, "grad_norm": 0.1420611975441344, "learning_rate": 2e-05, "loss": 5.3502, "step": 8746 }, { "epoch": 0.5867122782305396, "grad_norm": 0.15317911303668252, "learning_rate": 2e-05, "loss": 5.4126, "step": 8747 }, { "epoch": 0.5867793540597646, "grad_norm": 0.1463971184920436, "learning_rate": 2e-05, "loss": 5.4125, "step": 8748 }, { "epoch": 0.5868464298889895, "grad_norm": 0.15568549715294291, "learning_rate": 2e-05, "loss": 5.543, "step": 8749 }, { "epoch": 0.5869135057182144, "grad_norm": 0.16221370647917868, "learning_rate": 2e-05, "loss": 5.333, "step": 8750 }, { "epoch": 0.5869805815474394, "grad_norm": 0.14504002772650904, "learning_rate": 2e-05, "loss": 5.5276, "step": 8751 }, { "epoch": 0.5870476573766643, "grad_norm": 0.1505336552532555, "learning_rate": 2e-05, "loss": 5.3534, "step": 8752 }, { "epoch": 0.5871147332058892, "grad_norm": 0.15831358197969014, "learning_rate": 2e-05, "loss": 5.4686, "step": 8753 }, { "epoch": 0.5871818090351142, "grad_norm": 0.1508450940589923, "learning_rate": 2e-05, "loss": 5.4349, "step": 8754 }, { "epoch": 0.5872488848643391, "grad_norm": 0.14435226319272487, "learning_rate": 2e-05, "loss": 5.4297, "step": 8755 }, { "epoch": 0.5873159606935641, "grad_norm": 0.14693773458207238, "learning_rate": 2e-05, "loss": 5.3976, "step": 8756 }, { "epoch": 0.587383036522789, "grad_norm": 0.14880970374627545, "learning_rate": 2e-05, "loss": 5.316, "step": 8757 }, { "epoch": 0.5874501123520139, "grad_norm": 0.15068365364221886, "learning_rate": 2e-05, "loss": 5.5078, "step": 8758 }, { "epoch": 0.5875171881812389, "grad_norm": 0.1473978343011179, "learning_rate": 2e-05, "loss": 5.5645, "step": 8759 }, { "epoch": 0.5875842640104638, "grad_norm": 0.1491389426823022, "learning_rate": 2e-05, "loss": 5.4446, "step": 8760 }, { "epoch": 0.5876513398396888, "grad_norm": 0.15211512279357692, "learning_rate": 2e-05, "loss": 5.4481, "step": 8761 }, { "epoch": 0.5877184156689137, "grad_norm": 0.1528041917664228, "learning_rate": 2e-05, "loss": 5.2886, "step": 8762 }, { "epoch": 0.5877854914981386, "grad_norm": 0.16073926672025496, "learning_rate": 2e-05, "loss": 5.4855, "step": 8763 }, { "epoch": 0.5878525673273636, "grad_norm": 0.14880235881383952, "learning_rate": 2e-05, "loss": 5.3312, "step": 8764 }, { "epoch": 0.5879196431565885, "grad_norm": 0.16835233979054567, "learning_rate": 2e-05, "loss": 5.4955, "step": 8765 }, { "epoch": 0.5879867189858135, "grad_norm": 0.16639674098825077, "learning_rate": 2e-05, "loss": 5.4484, "step": 8766 }, { "epoch": 0.5880537948150384, "grad_norm": 0.1538497668588462, "learning_rate": 2e-05, "loss": 5.575, "step": 8767 }, { "epoch": 0.5881208706442633, "grad_norm": 0.15890129546206222, "learning_rate": 2e-05, "loss": 5.4221, "step": 8768 }, { "epoch": 0.5881879464734883, "grad_norm": 0.17211123174197812, "learning_rate": 2e-05, "loss": 5.4138, "step": 8769 }, { "epoch": 0.5882550223027132, "grad_norm": 0.15443322983470872, "learning_rate": 2e-05, "loss": 5.3607, "step": 8770 }, { "epoch": 0.5883220981319381, "grad_norm": 0.15251224409526148, "learning_rate": 2e-05, "loss": 5.4108, "step": 8771 }, { "epoch": 0.5883891739611631, "grad_norm": 0.15057479568942625, "learning_rate": 2e-05, "loss": 5.3998, "step": 8772 }, { "epoch": 0.588456249790388, "grad_norm": 0.15578061987911376, "learning_rate": 2e-05, "loss": 5.3484, "step": 8773 }, { "epoch": 0.588523325619613, "grad_norm": 0.1574270684163253, "learning_rate": 2e-05, "loss": 5.4587, "step": 8774 }, { "epoch": 0.5885904014488379, "grad_norm": 0.16912723265986712, "learning_rate": 2e-05, "loss": 5.4822, "step": 8775 }, { "epoch": 0.5886574772780628, "grad_norm": 0.15426908464973407, "learning_rate": 2e-05, "loss": 5.393, "step": 8776 }, { "epoch": 0.5887245531072878, "grad_norm": 0.1564141620867365, "learning_rate": 2e-05, "loss": 5.4802, "step": 8777 }, { "epoch": 0.5887916289365127, "grad_norm": 0.151244797757271, "learning_rate": 2e-05, "loss": 5.3199, "step": 8778 }, { "epoch": 0.5888587047657377, "grad_norm": 0.15100083114036214, "learning_rate": 2e-05, "loss": 5.4421, "step": 8779 }, { "epoch": 0.5889257805949626, "grad_norm": 0.15086341577375117, "learning_rate": 2e-05, "loss": 5.4385, "step": 8780 }, { "epoch": 0.5889928564241875, "grad_norm": 0.15337600486921457, "learning_rate": 2e-05, "loss": 5.529, "step": 8781 }, { "epoch": 0.5890599322534125, "grad_norm": 0.15275766290777798, "learning_rate": 2e-05, "loss": 5.5029, "step": 8782 }, { "epoch": 0.5891270080826374, "grad_norm": 0.15633750940380195, "learning_rate": 2e-05, "loss": 5.3764, "step": 8783 }, { "epoch": 0.5891940839118623, "grad_norm": 0.15456946383232334, "learning_rate": 2e-05, "loss": 5.3549, "step": 8784 }, { "epoch": 0.5892611597410873, "grad_norm": 0.1505622510952308, "learning_rate": 2e-05, "loss": 5.4961, "step": 8785 }, { "epoch": 0.5893282355703122, "grad_norm": 0.14800569858307644, "learning_rate": 2e-05, "loss": 5.5091, "step": 8786 }, { "epoch": 0.5893953113995372, "grad_norm": 0.15303397532274587, "learning_rate": 2e-05, "loss": 5.3848, "step": 8787 }, { "epoch": 0.5894623872287621, "grad_norm": 0.14953130495786032, "learning_rate": 2e-05, "loss": 5.3636, "step": 8788 }, { "epoch": 0.589529463057987, "grad_norm": 0.150699678709202, "learning_rate": 2e-05, "loss": 5.5591, "step": 8789 }, { "epoch": 0.589596538887212, "grad_norm": 0.15293690226004542, "learning_rate": 2e-05, "loss": 5.2997, "step": 8790 }, { "epoch": 0.5896636147164369, "grad_norm": 0.15376145945413505, "learning_rate": 2e-05, "loss": 5.4271, "step": 8791 }, { "epoch": 0.5897306905456619, "grad_norm": 0.15453766894650206, "learning_rate": 2e-05, "loss": 5.4126, "step": 8792 }, { "epoch": 0.5897977663748868, "grad_norm": 0.15467264753043383, "learning_rate": 2e-05, "loss": 5.3035, "step": 8793 }, { "epoch": 0.5898648422041117, "grad_norm": 0.1601982612780676, "learning_rate": 2e-05, "loss": 5.364, "step": 8794 }, { "epoch": 0.5899319180333367, "grad_norm": 0.14701155877034677, "learning_rate": 2e-05, "loss": 5.4126, "step": 8795 }, { "epoch": 0.5899989938625616, "grad_norm": 0.1582543274108118, "learning_rate": 2e-05, "loss": 5.4991, "step": 8796 }, { "epoch": 0.5900660696917865, "grad_norm": 0.15092202583774714, "learning_rate": 2e-05, "loss": 5.4175, "step": 8797 }, { "epoch": 0.5901331455210115, "grad_norm": 0.15008316999657986, "learning_rate": 2e-05, "loss": 5.371, "step": 8798 }, { "epoch": 0.5902002213502364, "grad_norm": 0.1488604708816072, "learning_rate": 2e-05, "loss": 5.3322, "step": 8799 }, { "epoch": 0.5902672971794614, "grad_norm": 0.1455049610103134, "learning_rate": 2e-05, "loss": 5.4288, "step": 8800 }, { "epoch": 0.5903343730086863, "grad_norm": 0.15095212032579314, "learning_rate": 2e-05, "loss": 5.3333, "step": 8801 }, { "epoch": 0.5904014488379112, "grad_norm": 0.15006345416432376, "learning_rate": 2e-05, "loss": 5.2272, "step": 8802 }, { "epoch": 0.5904685246671362, "grad_norm": 0.15471554698154746, "learning_rate": 2e-05, "loss": 5.4045, "step": 8803 }, { "epoch": 0.5905356004963611, "grad_norm": 0.15882903950296492, "learning_rate": 2e-05, "loss": 5.5098, "step": 8804 }, { "epoch": 0.590602676325586, "grad_norm": 0.1502529446843442, "learning_rate": 2e-05, "loss": 5.466, "step": 8805 }, { "epoch": 0.590669752154811, "grad_norm": 0.15422559697711233, "learning_rate": 2e-05, "loss": 5.3502, "step": 8806 }, { "epoch": 0.5907368279840359, "grad_norm": 0.1466931563386025, "learning_rate": 2e-05, "loss": 5.3959, "step": 8807 }, { "epoch": 0.5908039038132609, "grad_norm": 0.1463584303765083, "learning_rate": 2e-05, "loss": 5.5239, "step": 8808 }, { "epoch": 0.5908709796424858, "grad_norm": 0.1474392999956731, "learning_rate": 2e-05, "loss": 5.4275, "step": 8809 }, { "epoch": 0.5909380554717107, "grad_norm": 0.14681279975545108, "learning_rate": 2e-05, "loss": 5.4834, "step": 8810 }, { "epoch": 0.5910051313009357, "grad_norm": 0.14878097057856787, "learning_rate": 2e-05, "loss": 5.4846, "step": 8811 }, { "epoch": 0.5910722071301606, "grad_norm": 0.14755433805499688, "learning_rate": 2e-05, "loss": 5.3902, "step": 8812 }, { "epoch": 0.5911392829593856, "grad_norm": 0.15002076284300978, "learning_rate": 2e-05, "loss": 5.4606, "step": 8813 }, { "epoch": 0.5912063587886105, "grad_norm": 0.16097257801349588, "learning_rate": 2e-05, "loss": 5.5569, "step": 8814 }, { "epoch": 0.5912734346178354, "grad_norm": 0.15143243367999465, "learning_rate": 2e-05, "loss": 5.4039, "step": 8815 }, { "epoch": 0.5913405104470604, "grad_norm": 0.14902178212856623, "learning_rate": 2e-05, "loss": 5.4693, "step": 8816 }, { "epoch": 0.5914075862762853, "grad_norm": 0.15303101804086944, "learning_rate": 2e-05, "loss": 5.441, "step": 8817 }, { "epoch": 0.5914746621055103, "grad_norm": 0.14822877066437226, "learning_rate": 2e-05, "loss": 5.371, "step": 8818 }, { "epoch": 0.5915417379347352, "grad_norm": 0.14620393842620572, "learning_rate": 2e-05, "loss": 5.5829, "step": 8819 }, { "epoch": 0.5916088137639601, "grad_norm": 0.15042747283183533, "learning_rate": 2e-05, "loss": 5.5114, "step": 8820 }, { "epoch": 0.5916758895931851, "grad_norm": 0.14848912753475052, "learning_rate": 2e-05, "loss": 5.3085, "step": 8821 }, { "epoch": 0.59174296542241, "grad_norm": 0.1558023135492045, "learning_rate": 2e-05, "loss": 5.344, "step": 8822 }, { "epoch": 0.5918100412516349, "grad_norm": 0.15142136410637244, "learning_rate": 2e-05, "loss": 5.5198, "step": 8823 }, { "epoch": 0.5918771170808599, "grad_norm": 0.14378647422472746, "learning_rate": 2e-05, "loss": 5.4238, "step": 8824 }, { "epoch": 0.5919441929100848, "grad_norm": 0.14477620586075624, "learning_rate": 2e-05, "loss": 5.3464, "step": 8825 }, { "epoch": 0.5920112687393098, "grad_norm": 0.15062018799494054, "learning_rate": 2e-05, "loss": 5.3695, "step": 8826 }, { "epoch": 0.5920783445685347, "grad_norm": 0.1472723782177999, "learning_rate": 2e-05, "loss": 5.2854, "step": 8827 }, { "epoch": 0.5921454203977596, "grad_norm": 0.15590281269151698, "learning_rate": 2e-05, "loss": 5.3876, "step": 8828 }, { "epoch": 0.5922124962269846, "grad_norm": 0.16134416191514084, "learning_rate": 2e-05, "loss": 5.4697, "step": 8829 }, { "epoch": 0.5922795720562095, "grad_norm": 0.1497795154290064, "learning_rate": 2e-05, "loss": 5.3853, "step": 8830 }, { "epoch": 0.5923466478854345, "grad_norm": 0.14519322025989478, "learning_rate": 2e-05, "loss": 5.5394, "step": 8831 }, { "epoch": 0.5924137237146594, "grad_norm": 0.14891145226364666, "learning_rate": 2e-05, "loss": 5.4205, "step": 8832 }, { "epoch": 0.5924807995438843, "grad_norm": 0.1449104003968067, "learning_rate": 2e-05, "loss": 5.524, "step": 8833 }, { "epoch": 0.5925478753731093, "grad_norm": 0.15972137450439053, "learning_rate": 2e-05, "loss": 5.2747, "step": 8834 }, { "epoch": 0.5926149512023342, "grad_norm": 0.14647847425655983, "learning_rate": 2e-05, "loss": 5.3486, "step": 8835 }, { "epoch": 0.5926820270315591, "grad_norm": 0.14692885704065697, "learning_rate": 2e-05, "loss": 5.3099, "step": 8836 }, { "epoch": 0.5927491028607841, "grad_norm": 0.1466938688729281, "learning_rate": 2e-05, "loss": 5.3028, "step": 8837 }, { "epoch": 0.592816178690009, "grad_norm": 0.15140888071202277, "learning_rate": 2e-05, "loss": 5.4157, "step": 8838 }, { "epoch": 0.592883254519234, "grad_norm": 0.14976122617388915, "learning_rate": 2e-05, "loss": 5.4218, "step": 8839 }, { "epoch": 0.5929503303484589, "grad_norm": 0.1463176207441066, "learning_rate": 2e-05, "loss": 5.4584, "step": 8840 }, { "epoch": 0.5930174061776838, "grad_norm": 0.15123461824090273, "learning_rate": 2e-05, "loss": 5.3717, "step": 8841 }, { "epoch": 0.5930844820069088, "grad_norm": 0.15420596881265727, "learning_rate": 2e-05, "loss": 5.3603, "step": 8842 }, { "epoch": 0.5931515578361337, "grad_norm": 0.15191458839036573, "learning_rate": 2e-05, "loss": 5.5254, "step": 8843 }, { "epoch": 0.5932186336653587, "grad_norm": 0.1584990979277572, "learning_rate": 2e-05, "loss": 5.4646, "step": 8844 }, { "epoch": 0.5932857094945836, "grad_norm": 0.15741512259944881, "learning_rate": 2e-05, "loss": 5.3565, "step": 8845 }, { "epoch": 0.5933527853238085, "grad_norm": 0.15129449148215446, "learning_rate": 2e-05, "loss": 5.3926, "step": 8846 }, { "epoch": 0.5934198611530335, "grad_norm": 0.1536966984038622, "learning_rate": 2e-05, "loss": 5.3251, "step": 8847 }, { "epoch": 0.5934869369822584, "grad_norm": 0.1512710435396921, "learning_rate": 2e-05, "loss": 5.468, "step": 8848 }, { "epoch": 0.5935540128114833, "grad_norm": 0.14920483829222977, "learning_rate": 2e-05, "loss": 5.4675, "step": 8849 }, { "epoch": 0.5936210886407083, "grad_norm": 0.14694906259879723, "learning_rate": 2e-05, "loss": 5.406, "step": 8850 }, { "epoch": 0.5936881644699332, "grad_norm": 0.15723538333804743, "learning_rate": 2e-05, "loss": 5.5063, "step": 8851 }, { "epoch": 0.5937552402991582, "grad_norm": 0.15494196955677642, "learning_rate": 2e-05, "loss": 5.271, "step": 8852 }, { "epoch": 0.5938223161283831, "grad_norm": 0.1422266801981949, "learning_rate": 2e-05, "loss": 5.4324, "step": 8853 }, { "epoch": 0.593889391957608, "grad_norm": 0.15197762811219703, "learning_rate": 2e-05, "loss": 5.4033, "step": 8854 }, { "epoch": 0.593956467786833, "grad_norm": 0.15494950475552513, "learning_rate": 2e-05, "loss": 5.4748, "step": 8855 }, { "epoch": 0.5940235436160579, "grad_norm": 0.14821978094517224, "learning_rate": 2e-05, "loss": 5.4709, "step": 8856 }, { "epoch": 0.5940906194452829, "grad_norm": 0.15077508705203266, "learning_rate": 2e-05, "loss": 5.3598, "step": 8857 }, { "epoch": 0.5941576952745078, "grad_norm": 0.1565816179765634, "learning_rate": 2e-05, "loss": 5.5734, "step": 8858 }, { "epoch": 0.5942247711037327, "grad_norm": 0.15365586134073278, "learning_rate": 2e-05, "loss": 5.4582, "step": 8859 }, { "epoch": 0.5942918469329577, "grad_norm": 0.15048139359099189, "learning_rate": 2e-05, "loss": 5.3969, "step": 8860 }, { "epoch": 0.5943589227621826, "grad_norm": 0.150811568999591, "learning_rate": 2e-05, "loss": 5.3276, "step": 8861 }, { "epoch": 0.5944259985914075, "grad_norm": 0.15092886287780755, "learning_rate": 2e-05, "loss": 5.4561, "step": 8862 }, { "epoch": 0.5944930744206325, "grad_norm": 0.14549703481940948, "learning_rate": 2e-05, "loss": 5.4942, "step": 8863 }, { "epoch": 0.5945601502498574, "grad_norm": 0.14336584877253794, "learning_rate": 2e-05, "loss": 5.378, "step": 8864 }, { "epoch": 0.5946272260790824, "grad_norm": 0.15517953947836868, "learning_rate": 2e-05, "loss": 5.4235, "step": 8865 }, { "epoch": 0.5946943019083073, "grad_norm": 0.1428738626246211, "learning_rate": 2e-05, "loss": 5.3731, "step": 8866 }, { "epoch": 0.5947613777375322, "grad_norm": 0.146434521644361, "learning_rate": 2e-05, "loss": 5.2707, "step": 8867 }, { "epoch": 0.5948284535667572, "grad_norm": 0.14955164488681366, "learning_rate": 2e-05, "loss": 5.3268, "step": 8868 }, { "epoch": 0.5948955293959821, "grad_norm": 0.15687633087552846, "learning_rate": 2e-05, "loss": 5.3419, "step": 8869 }, { "epoch": 0.594962605225207, "grad_norm": 0.1479935358929816, "learning_rate": 2e-05, "loss": 5.4921, "step": 8870 }, { "epoch": 0.595029681054432, "grad_norm": 0.1487618535371918, "learning_rate": 2e-05, "loss": 5.3852, "step": 8871 }, { "epoch": 0.5950967568836569, "grad_norm": 0.15928673058803572, "learning_rate": 2e-05, "loss": 5.4098, "step": 8872 }, { "epoch": 0.5951638327128819, "grad_norm": 0.14457748767101677, "learning_rate": 2e-05, "loss": 5.25, "step": 8873 }, { "epoch": 0.5952309085421068, "grad_norm": 0.15600442750653748, "learning_rate": 2e-05, "loss": 5.3895, "step": 8874 }, { "epoch": 0.5952979843713317, "grad_norm": 0.16991893907091926, "learning_rate": 2e-05, "loss": 5.4992, "step": 8875 }, { "epoch": 0.5953650602005567, "grad_norm": 0.14544147552320166, "learning_rate": 2e-05, "loss": 5.261, "step": 8876 }, { "epoch": 0.5954321360297816, "grad_norm": 0.15875974068444304, "learning_rate": 2e-05, "loss": 5.2855, "step": 8877 }, { "epoch": 0.5954992118590066, "grad_norm": 0.15245151896879838, "learning_rate": 2e-05, "loss": 5.2538, "step": 8878 }, { "epoch": 0.5955662876882315, "grad_norm": 0.15972850062409083, "learning_rate": 2e-05, "loss": 5.3397, "step": 8879 }, { "epoch": 0.5956333635174564, "grad_norm": 0.14231089919871664, "learning_rate": 2e-05, "loss": 5.5062, "step": 8880 }, { "epoch": 0.5957004393466814, "grad_norm": 0.15443227688556793, "learning_rate": 2e-05, "loss": 5.5852, "step": 8881 }, { "epoch": 0.5957675151759063, "grad_norm": 0.15525189850159074, "learning_rate": 2e-05, "loss": 5.4149, "step": 8882 }, { "epoch": 0.5958345910051313, "grad_norm": 0.15696008000451464, "learning_rate": 2e-05, "loss": 5.5641, "step": 8883 }, { "epoch": 0.5959016668343562, "grad_norm": 0.16725399544822428, "learning_rate": 2e-05, "loss": 5.373, "step": 8884 }, { "epoch": 0.5959687426635811, "grad_norm": 0.1525040201455231, "learning_rate": 2e-05, "loss": 5.4119, "step": 8885 }, { "epoch": 0.5960358184928061, "grad_norm": 0.1529089919821779, "learning_rate": 2e-05, "loss": 5.4538, "step": 8886 }, { "epoch": 0.596102894322031, "grad_norm": 0.15093118441903522, "learning_rate": 2e-05, "loss": 5.4142, "step": 8887 }, { "epoch": 0.596169970151256, "grad_norm": 0.14637428960971616, "learning_rate": 2e-05, "loss": 5.5788, "step": 8888 }, { "epoch": 0.5962370459804809, "grad_norm": 0.14642373207553308, "learning_rate": 2e-05, "loss": 5.4267, "step": 8889 }, { "epoch": 0.5963041218097058, "grad_norm": 0.1486679721175502, "learning_rate": 2e-05, "loss": 5.4528, "step": 8890 }, { "epoch": 0.5963711976389308, "grad_norm": 0.15194954236013772, "learning_rate": 2e-05, "loss": 5.4519, "step": 8891 }, { "epoch": 0.5964382734681557, "grad_norm": 0.14245107867738388, "learning_rate": 2e-05, "loss": 5.4298, "step": 8892 }, { "epoch": 0.5965053492973806, "grad_norm": 0.15475412675159164, "learning_rate": 2e-05, "loss": 5.55, "step": 8893 }, { "epoch": 0.5965724251266056, "grad_norm": 0.14823897285588536, "learning_rate": 2e-05, "loss": 5.3835, "step": 8894 }, { "epoch": 0.5966395009558305, "grad_norm": 0.15126370052147312, "learning_rate": 2e-05, "loss": 5.2158, "step": 8895 }, { "epoch": 0.5967065767850555, "grad_norm": 0.1451987252409673, "learning_rate": 2e-05, "loss": 5.3464, "step": 8896 }, { "epoch": 0.5967736526142804, "grad_norm": 0.15031167868281378, "learning_rate": 2e-05, "loss": 5.3617, "step": 8897 }, { "epoch": 0.5968407284435053, "grad_norm": 0.1499711073306696, "learning_rate": 2e-05, "loss": 5.3728, "step": 8898 }, { "epoch": 0.5969078042727303, "grad_norm": 0.14863845802135792, "learning_rate": 2e-05, "loss": 5.4947, "step": 8899 }, { "epoch": 0.5969748801019552, "grad_norm": 0.1451730208039769, "learning_rate": 2e-05, "loss": 5.3627, "step": 8900 }, { "epoch": 0.5970419559311801, "grad_norm": 0.14232818708456565, "learning_rate": 2e-05, "loss": 5.2539, "step": 8901 }, { "epoch": 0.5971090317604051, "grad_norm": 0.15301233643819712, "learning_rate": 2e-05, "loss": 5.3959, "step": 8902 }, { "epoch": 0.59717610758963, "grad_norm": 0.15493625714660492, "learning_rate": 2e-05, "loss": 5.5294, "step": 8903 }, { "epoch": 0.597243183418855, "grad_norm": 0.1492409459278405, "learning_rate": 2e-05, "loss": 5.3244, "step": 8904 }, { "epoch": 0.5973102592480799, "grad_norm": 0.15179562103103253, "learning_rate": 2e-05, "loss": 5.4665, "step": 8905 }, { "epoch": 0.5973773350773048, "grad_norm": 0.1559493357643404, "learning_rate": 2e-05, "loss": 5.4124, "step": 8906 }, { "epoch": 0.5974444109065298, "grad_norm": 0.1515639706775748, "learning_rate": 2e-05, "loss": 5.4017, "step": 8907 }, { "epoch": 0.5975114867357547, "grad_norm": 0.1582043120088637, "learning_rate": 2e-05, "loss": 5.3281, "step": 8908 }, { "epoch": 0.5975785625649798, "grad_norm": 0.16273746537546904, "learning_rate": 2e-05, "loss": 5.2426, "step": 8909 }, { "epoch": 0.5976456383942047, "grad_norm": 0.14320682599348275, "learning_rate": 2e-05, "loss": 5.441, "step": 8910 }, { "epoch": 0.5977127142234296, "grad_norm": 0.1551317255514292, "learning_rate": 2e-05, "loss": 5.493, "step": 8911 }, { "epoch": 0.5977797900526546, "grad_norm": 0.15097490445583012, "learning_rate": 2e-05, "loss": 5.4219, "step": 8912 }, { "epoch": 0.5978468658818795, "grad_norm": 0.15075016233572527, "learning_rate": 2e-05, "loss": 5.4953, "step": 8913 }, { "epoch": 0.5979139417111045, "grad_norm": 0.15144316849451236, "learning_rate": 2e-05, "loss": 5.396, "step": 8914 }, { "epoch": 0.5979810175403294, "grad_norm": 0.16954722654102236, "learning_rate": 2e-05, "loss": 5.3834, "step": 8915 }, { "epoch": 0.5980480933695543, "grad_norm": 0.15327777413444815, "learning_rate": 2e-05, "loss": 5.3998, "step": 8916 }, { "epoch": 0.5981151691987793, "grad_norm": 0.15399343943509464, "learning_rate": 2e-05, "loss": 5.308, "step": 8917 }, { "epoch": 0.5981822450280042, "grad_norm": 0.16477241042965346, "learning_rate": 2e-05, "loss": 5.3287, "step": 8918 }, { "epoch": 0.5982493208572291, "grad_norm": 0.14939703489727796, "learning_rate": 2e-05, "loss": 5.3944, "step": 8919 }, { "epoch": 0.5983163966864541, "grad_norm": 0.15550363792641597, "learning_rate": 2e-05, "loss": 5.4456, "step": 8920 }, { "epoch": 0.598383472515679, "grad_norm": 0.149789436956052, "learning_rate": 2e-05, "loss": 5.4601, "step": 8921 }, { "epoch": 0.598450548344904, "grad_norm": 0.154481091032482, "learning_rate": 2e-05, "loss": 5.3816, "step": 8922 }, { "epoch": 0.5985176241741289, "grad_norm": 0.1471984083604363, "learning_rate": 2e-05, "loss": 5.4198, "step": 8923 }, { "epoch": 0.5985847000033538, "grad_norm": 0.1499960494633731, "learning_rate": 2e-05, "loss": 5.327, "step": 8924 }, { "epoch": 0.5986517758325788, "grad_norm": 0.1514858658530323, "learning_rate": 2e-05, "loss": 5.4259, "step": 8925 }, { "epoch": 0.5987188516618037, "grad_norm": 0.15397640652636038, "learning_rate": 2e-05, "loss": 5.2469, "step": 8926 }, { "epoch": 0.5987859274910287, "grad_norm": 0.14888293374851314, "learning_rate": 2e-05, "loss": 5.4897, "step": 8927 }, { "epoch": 0.5988530033202536, "grad_norm": 0.15499072903526667, "learning_rate": 2e-05, "loss": 5.5377, "step": 8928 }, { "epoch": 0.5989200791494785, "grad_norm": 0.14915319380950565, "learning_rate": 2e-05, "loss": 5.3622, "step": 8929 }, { "epoch": 0.5989871549787035, "grad_norm": 0.1488767623706366, "learning_rate": 2e-05, "loss": 5.2749, "step": 8930 }, { "epoch": 0.5990542308079284, "grad_norm": 0.1543742664705043, "learning_rate": 2e-05, "loss": 5.4106, "step": 8931 }, { "epoch": 0.5991213066371534, "grad_norm": 0.1581348833177915, "learning_rate": 2e-05, "loss": 5.4469, "step": 8932 }, { "epoch": 0.5991883824663783, "grad_norm": 0.15434236512304, "learning_rate": 2e-05, "loss": 5.315, "step": 8933 }, { "epoch": 0.5992554582956032, "grad_norm": 0.15668518433819012, "learning_rate": 2e-05, "loss": 5.4687, "step": 8934 }, { "epoch": 0.5993225341248282, "grad_norm": 0.15123681198636502, "learning_rate": 2e-05, "loss": 5.627, "step": 8935 }, { "epoch": 0.5993896099540531, "grad_norm": 0.15573952265536892, "learning_rate": 2e-05, "loss": 5.5373, "step": 8936 }, { "epoch": 0.599456685783278, "grad_norm": 0.1434198130682002, "learning_rate": 2e-05, "loss": 5.4687, "step": 8937 }, { "epoch": 0.599523761612503, "grad_norm": 0.1505180301689619, "learning_rate": 2e-05, "loss": 5.2637, "step": 8938 }, { "epoch": 0.5995908374417279, "grad_norm": 0.15136053683321693, "learning_rate": 2e-05, "loss": 5.4481, "step": 8939 }, { "epoch": 0.5996579132709529, "grad_norm": 0.14919847398430724, "learning_rate": 2e-05, "loss": 5.3648, "step": 8940 }, { "epoch": 0.5997249891001778, "grad_norm": 0.15543809135227818, "learning_rate": 2e-05, "loss": 5.4377, "step": 8941 }, { "epoch": 0.5997920649294027, "grad_norm": 0.14770963959610758, "learning_rate": 2e-05, "loss": 5.3781, "step": 8942 }, { "epoch": 0.5998591407586277, "grad_norm": 0.1511334872050148, "learning_rate": 2e-05, "loss": 5.3772, "step": 8943 }, { "epoch": 0.5999262165878526, "grad_norm": 0.15218741822313644, "learning_rate": 2e-05, "loss": 5.3406, "step": 8944 }, { "epoch": 0.5999932924170776, "grad_norm": 0.1542486836345395, "learning_rate": 2e-05, "loss": 5.3126, "step": 8945 }, { "epoch": 0.6000603682463025, "grad_norm": 0.15451660383886048, "learning_rate": 2e-05, "loss": 5.4012, "step": 8946 }, { "epoch": 0.6001274440755274, "grad_norm": 0.16138440751668123, "learning_rate": 2e-05, "loss": 5.2503, "step": 8947 }, { "epoch": 0.6001945199047524, "grad_norm": 0.1492753627709322, "learning_rate": 2e-05, "loss": 5.363, "step": 8948 }, { "epoch": 0.6002615957339773, "grad_norm": 0.15169511522524723, "learning_rate": 2e-05, "loss": 5.4841, "step": 8949 }, { "epoch": 0.6003286715632022, "grad_norm": 0.15038400126487497, "learning_rate": 2e-05, "loss": 5.4553, "step": 8950 }, { "epoch": 0.6003957473924272, "grad_norm": 0.1500153997312142, "learning_rate": 2e-05, "loss": 5.442, "step": 8951 }, { "epoch": 0.6004628232216521, "grad_norm": 0.15408022739030752, "learning_rate": 2e-05, "loss": 5.3068, "step": 8952 }, { "epoch": 0.6005298990508771, "grad_norm": 0.15076596353841898, "learning_rate": 2e-05, "loss": 5.3745, "step": 8953 }, { "epoch": 0.600596974880102, "grad_norm": 0.14442188605308076, "learning_rate": 2e-05, "loss": 5.4601, "step": 8954 }, { "epoch": 0.6006640507093269, "grad_norm": 0.1543946648116208, "learning_rate": 2e-05, "loss": 5.6082, "step": 8955 }, { "epoch": 0.6007311265385519, "grad_norm": 0.1556852252036636, "learning_rate": 2e-05, "loss": 5.4085, "step": 8956 }, { "epoch": 0.6007982023677768, "grad_norm": 0.15877478210477042, "learning_rate": 2e-05, "loss": 5.4809, "step": 8957 }, { "epoch": 0.6008652781970018, "grad_norm": 0.15112758104741317, "learning_rate": 2e-05, "loss": 5.3632, "step": 8958 }, { "epoch": 0.6009323540262267, "grad_norm": 0.15153221356382965, "learning_rate": 2e-05, "loss": 5.4139, "step": 8959 }, { "epoch": 0.6009994298554516, "grad_norm": 0.15484897207698378, "learning_rate": 2e-05, "loss": 5.5857, "step": 8960 }, { "epoch": 0.6010665056846766, "grad_norm": 0.1579073279975335, "learning_rate": 2e-05, "loss": 5.5867, "step": 8961 }, { "epoch": 0.6011335815139015, "grad_norm": 0.15465219090088217, "learning_rate": 2e-05, "loss": 5.4069, "step": 8962 }, { "epoch": 0.6012006573431264, "grad_norm": 0.1531967369602585, "learning_rate": 2e-05, "loss": 5.3471, "step": 8963 }, { "epoch": 0.6012677331723514, "grad_norm": 0.15759248277407034, "learning_rate": 2e-05, "loss": 5.4964, "step": 8964 }, { "epoch": 0.6013348090015763, "grad_norm": 0.14787314299165863, "learning_rate": 2e-05, "loss": 5.4404, "step": 8965 }, { "epoch": 0.6014018848308013, "grad_norm": 0.15122246578523943, "learning_rate": 2e-05, "loss": 5.3929, "step": 8966 }, { "epoch": 0.6014689606600262, "grad_norm": 0.1661789610460339, "learning_rate": 2e-05, "loss": 5.3971, "step": 8967 }, { "epoch": 0.6015360364892511, "grad_norm": 0.14737555893194235, "learning_rate": 2e-05, "loss": 5.4786, "step": 8968 }, { "epoch": 0.6016031123184761, "grad_norm": 0.15464646754633127, "learning_rate": 2e-05, "loss": 5.4399, "step": 8969 }, { "epoch": 0.601670188147701, "grad_norm": 0.15411467148849903, "learning_rate": 2e-05, "loss": 5.5472, "step": 8970 }, { "epoch": 0.601737263976926, "grad_norm": 0.15377254433098472, "learning_rate": 2e-05, "loss": 5.3934, "step": 8971 }, { "epoch": 0.6018043398061509, "grad_norm": 0.1548168442946976, "learning_rate": 2e-05, "loss": 5.3979, "step": 8972 }, { "epoch": 0.6018714156353758, "grad_norm": 0.14305001840246215, "learning_rate": 2e-05, "loss": 5.5346, "step": 8973 }, { "epoch": 0.6019384914646008, "grad_norm": 0.14413154924886673, "learning_rate": 2e-05, "loss": 5.3403, "step": 8974 }, { "epoch": 0.6020055672938257, "grad_norm": 0.14838643282752045, "learning_rate": 2e-05, "loss": 5.2971, "step": 8975 }, { "epoch": 0.6020726431230506, "grad_norm": 0.14871205345921573, "learning_rate": 2e-05, "loss": 5.366, "step": 8976 }, { "epoch": 0.6021397189522756, "grad_norm": 0.15082078703633256, "learning_rate": 2e-05, "loss": 5.4716, "step": 8977 }, { "epoch": 0.6022067947815005, "grad_norm": 0.14928597363732266, "learning_rate": 2e-05, "loss": 5.4194, "step": 8978 }, { "epoch": 0.6022738706107255, "grad_norm": 0.15023228404067324, "learning_rate": 2e-05, "loss": 5.4024, "step": 8979 }, { "epoch": 0.6023409464399504, "grad_norm": 0.15025713005870747, "learning_rate": 2e-05, "loss": 5.2615, "step": 8980 }, { "epoch": 0.6024080222691753, "grad_norm": 0.15403690207945142, "learning_rate": 2e-05, "loss": 5.3488, "step": 8981 }, { "epoch": 0.6024750980984003, "grad_norm": 0.148328267223966, "learning_rate": 2e-05, "loss": 5.3357, "step": 8982 }, { "epoch": 0.6025421739276252, "grad_norm": 0.1553964314173716, "learning_rate": 2e-05, "loss": 5.428, "step": 8983 }, { "epoch": 0.6026092497568502, "grad_norm": 0.14356550963384257, "learning_rate": 2e-05, "loss": 5.4449, "step": 8984 }, { "epoch": 0.6026763255860751, "grad_norm": 0.142019170935836, "learning_rate": 2e-05, "loss": 5.4277, "step": 8985 }, { "epoch": 0.6027434014153, "grad_norm": 0.14850268751193504, "learning_rate": 2e-05, "loss": 5.5613, "step": 8986 }, { "epoch": 0.602810477244525, "grad_norm": 0.1507454832402468, "learning_rate": 2e-05, "loss": 5.4407, "step": 8987 }, { "epoch": 0.6028775530737499, "grad_norm": 0.15543196132438, "learning_rate": 2e-05, "loss": 5.4988, "step": 8988 }, { "epoch": 0.6029446289029748, "grad_norm": 0.14764897724193848, "learning_rate": 2e-05, "loss": 5.3523, "step": 8989 }, { "epoch": 0.6030117047321998, "grad_norm": 0.14991705591401405, "learning_rate": 2e-05, "loss": 5.3284, "step": 8990 }, { "epoch": 0.6030787805614247, "grad_norm": 0.1553581354160704, "learning_rate": 2e-05, "loss": 5.2727, "step": 8991 }, { "epoch": 0.6031458563906497, "grad_norm": 0.15500576069988062, "learning_rate": 2e-05, "loss": 5.4232, "step": 8992 }, { "epoch": 0.6032129322198746, "grad_norm": 0.1463429364609225, "learning_rate": 2e-05, "loss": 5.4765, "step": 8993 }, { "epoch": 0.6032800080490995, "grad_norm": 0.15837228189100128, "learning_rate": 2e-05, "loss": 5.3945, "step": 8994 }, { "epoch": 0.6033470838783245, "grad_norm": 0.14725921696478694, "learning_rate": 2e-05, "loss": 5.4978, "step": 8995 }, { "epoch": 0.6034141597075494, "grad_norm": 0.1525603640052087, "learning_rate": 2e-05, "loss": 5.3951, "step": 8996 }, { "epoch": 0.6034812355367744, "grad_norm": 0.15198840903220093, "learning_rate": 2e-05, "loss": 5.3688, "step": 8997 }, { "epoch": 0.6035483113659993, "grad_norm": 0.15108819788733924, "learning_rate": 2e-05, "loss": 5.3679, "step": 8998 }, { "epoch": 0.6036153871952242, "grad_norm": 0.16321541558801106, "learning_rate": 2e-05, "loss": 5.4714, "step": 8999 }, { "epoch": 0.6036824630244492, "grad_norm": 0.15236251150740707, "learning_rate": 2e-05, "loss": 5.5963, "step": 9000 }, { "epoch": 0.6037495388536741, "grad_norm": 0.15983853687725857, "learning_rate": 2e-05, "loss": 5.4828, "step": 9001 }, { "epoch": 0.603816614682899, "grad_norm": 0.15819974061098582, "learning_rate": 2e-05, "loss": 5.4186, "step": 9002 }, { "epoch": 0.603883690512124, "grad_norm": 0.15216511872137053, "learning_rate": 2e-05, "loss": 5.3866, "step": 9003 }, { "epoch": 0.6039507663413489, "grad_norm": 0.15276083808059057, "learning_rate": 2e-05, "loss": 5.4266, "step": 9004 }, { "epoch": 0.6040178421705739, "grad_norm": 0.15965280581317243, "learning_rate": 2e-05, "loss": 5.329, "step": 9005 }, { "epoch": 0.6040849179997988, "grad_norm": 0.15310023115550733, "learning_rate": 2e-05, "loss": 5.5107, "step": 9006 }, { "epoch": 0.6041519938290237, "grad_norm": 0.14387517632609015, "learning_rate": 2e-05, "loss": 5.6101, "step": 9007 }, { "epoch": 0.6042190696582487, "grad_norm": 0.15517298947954805, "learning_rate": 2e-05, "loss": 5.4462, "step": 9008 }, { "epoch": 0.6042861454874736, "grad_norm": 0.17232547131228793, "learning_rate": 2e-05, "loss": 5.4662, "step": 9009 }, { "epoch": 0.6043532213166986, "grad_norm": 0.14630755391061998, "learning_rate": 2e-05, "loss": 5.365, "step": 9010 }, { "epoch": 0.6044202971459235, "grad_norm": 0.15062601186984317, "learning_rate": 2e-05, "loss": 5.3533, "step": 9011 }, { "epoch": 0.6044873729751484, "grad_norm": 0.15772659558633342, "learning_rate": 2e-05, "loss": 5.3823, "step": 9012 }, { "epoch": 0.6045544488043734, "grad_norm": 0.1500009887502321, "learning_rate": 2e-05, "loss": 5.4222, "step": 9013 }, { "epoch": 0.6046215246335983, "grad_norm": 0.14655572844289408, "learning_rate": 2e-05, "loss": 5.392, "step": 9014 }, { "epoch": 0.6046886004628232, "grad_norm": 0.15090520771682703, "learning_rate": 2e-05, "loss": 5.2534, "step": 9015 }, { "epoch": 0.6047556762920482, "grad_norm": 0.15573891238215898, "learning_rate": 2e-05, "loss": 5.4138, "step": 9016 }, { "epoch": 0.6048227521212731, "grad_norm": 0.15175656419306613, "learning_rate": 2e-05, "loss": 5.4869, "step": 9017 }, { "epoch": 0.6048898279504981, "grad_norm": 0.150313502108073, "learning_rate": 2e-05, "loss": 5.3718, "step": 9018 }, { "epoch": 0.604956903779723, "grad_norm": 0.15603766247727635, "learning_rate": 2e-05, "loss": 5.2364, "step": 9019 }, { "epoch": 0.6050239796089479, "grad_norm": 0.14757537344924523, "learning_rate": 2e-05, "loss": 5.5894, "step": 9020 }, { "epoch": 0.6050910554381729, "grad_norm": 0.17045406922620399, "learning_rate": 2e-05, "loss": 5.6016, "step": 9021 }, { "epoch": 0.6051581312673978, "grad_norm": 0.15951765435310425, "learning_rate": 2e-05, "loss": 5.4626, "step": 9022 }, { "epoch": 0.6052252070966228, "grad_norm": 0.15615472549591994, "learning_rate": 2e-05, "loss": 5.3404, "step": 9023 }, { "epoch": 0.6052922829258477, "grad_norm": 0.16352008072224333, "learning_rate": 2e-05, "loss": 5.5314, "step": 9024 }, { "epoch": 0.6053593587550726, "grad_norm": 0.15262101690059107, "learning_rate": 2e-05, "loss": 5.4742, "step": 9025 }, { "epoch": 0.6054264345842976, "grad_norm": 0.14995412868326066, "learning_rate": 2e-05, "loss": 5.4165, "step": 9026 }, { "epoch": 0.6054935104135225, "grad_norm": 0.16396824953415823, "learning_rate": 2e-05, "loss": 5.4779, "step": 9027 }, { "epoch": 0.6055605862427474, "grad_norm": 0.15367947844926366, "learning_rate": 2e-05, "loss": 5.4574, "step": 9028 }, { "epoch": 0.6056276620719724, "grad_norm": 0.14993829770969036, "learning_rate": 2e-05, "loss": 5.4521, "step": 9029 }, { "epoch": 0.6056947379011973, "grad_norm": 0.1486552251973269, "learning_rate": 2e-05, "loss": 5.3296, "step": 9030 }, { "epoch": 0.6057618137304223, "grad_norm": 0.15252689634945368, "learning_rate": 2e-05, "loss": 5.3456, "step": 9031 }, { "epoch": 0.6058288895596472, "grad_norm": 0.160997564263249, "learning_rate": 2e-05, "loss": 5.2987, "step": 9032 }, { "epoch": 0.6058959653888721, "grad_norm": 0.14736269658009427, "learning_rate": 2e-05, "loss": 5.327, "step": 9033 }, { "epoch": 0.6059630412180971, "grad_norm": 0.15722534869701743, "learning_rate": 2e-05, "loss": 5.4551, "step": 9034 }, { "epoch": 0.606030117047322, "grad_norm": 0.1437140830910672, "learning_rate": 2e-05, "loss": 5.3408, "step": 9035 }, { "epoch": 0.606097192876547, "grad_norm": 0.14674147253267747, "learning_rate": 2e-05, "loss": 5.5675, "step": 9036 }, { "epoch": 0.6061642687057719, "grad_norm": 0.16135903868618987, "learning_rate": 2e-05, "loss": 5.5075, "step": 9037 }, { "epoch": 0.6062313445349968, "grad_norm": 0.15170535853221465, "learning_rate": 2e-05, "loss": 5.2254, "step": 9038 }, { "epoch": 0.6062984203642218, "grad_norm": 0.15061563452745205, "learning_rate": 2e-05, "loss": 5.5477, "step": 9039 }, { "epoch": 0.6063654961934467, "grad_norm": 0.14941987758071648, "learning_rate": 2e-05, "loss": 5.3743, "step": 9040 }, { "epoch": 0.6064325720226716, "grad_norm": 0.14905957219784888, "learning_rate": 2e-05, "loss": 5.3947, "step": 9041 }, { "epoch": 0.6064996478518966, "grad_norm": 0.15236135677264073, "learning_rate": 2e-05, "loss": 5.5152, "step": 9042 }, { "epoch": 0.6065667236811215, "grad_norm": 0.15204669861875156, "learning_rate": 2e-05, "loss": 5.5051, "step": 9043 }, { "epoch": 0.6066337995103465, "grad_norm": 0.1501744019718792, "learning_rate": 2e-05, "loss": 5.2498, "step": 9044 }, { "epoch": 0.6067008753395714, "grad_norm": 0.15835659108354702, "learning_rate": 2e-05, "loss": 5.3579, "step": 9045 }, { "epoch": 0.6067679511687963, "grad_norm": 0.15329364468904505, "learning_rate": 2e-05, "loss": 5.5705, "step": 9046 }, { "epoch": 0.6068350269980213, "grad_norm": 0.1480475226210665, "learning_rate": 2e-05, "loss": 5.5419, "step": 9047 }, { "epoch": 0.6069021028272462, "grad_norm": 0.1610067679666325, "learning_rate": 2e-05, "loss": 5.3716, "step": 9048 }, { "epoch": 0.6069691786564712, "grad_norm": 0.14792985191093305, "learning_rate": 2e-05, "loss": 5.4031, "step": 9049 }, { "epoch": 0.6070362544856961, "grad_norm": 0.15084485940956005, "learning_rate": 2e-05, "loss": 5.3718, "step": 9050 }, { "epoch": 0.607103330314921, "grad_norm": 0.154226644095758, "learning_rate": 2e-05, "loss": 5.4864, "step": 9051 }, { "epoch": 0.607170406144146, "grad_norm": 0.16141146658108096, "learning_rate": 2e-05, "loss": 5.524, "step": 9052 }, { "epoch": 0.6072374819733709, "grad_norm": 0.14835895925580098, "learning_rate": 2e-05, "loss": 5.4596, "step": 9053 }, { "epoch": 0.6073045578025958, "grad_norm": 0.14989467166588258, "learning_rate": 2e-05, "loss": 5.2762, "step": 9054 }, { "epoch": 0.6073716336318208, "grad_norm": 0.16458166988151135, "learning_rate": 2e-05, "loss": 5.4244, "step": 9055 }, { "epoch": 0.6074387094610457, "grad_norm": 0.15441941999247885, "learning_rate": 2e-05, "loss": 5.3784, "step": 9056 }, { "epoch": 0.6075057852902707, "grad_norm": 0.1624701689271173, "learning_rate": 2e-05, "loss": 5.421, "step": 9057 }, { "epoch": 0.6075728611194956, "grad_norm": 0.1643811406943147, "learning_rate": 2e-05, "loss": 5.3702, "step": 9058 }, { "epoch": 0.6076399369487205, "grad_norm": 0.15388195277696853, "learning_rate": 2e-05, "loss": 5.5132, "step": 9059 }, { "epoch": 0.6077070127779455, "grad_norm": 0.15918925104587828, "learning_rate": 2e-05, "loss": 5.4643, "step": 9060 }, { "epoch": 0.6077740886071704, "grad_norm": 0.15355643987004347, "learning_rate": 2e-05, "loss": 5.4012, "step": 9061 }, { "epoch": 0.6078411644363954, "grad_norm": 0.1560296435700642, "learning_rate": 2e-05, "loss": 5.4704, "step": 9062 }, { "epoch": 0.6079082402656203, "grad_norm": 0.15423556707250996, "learning_rate": 2e-05, "loss": 5.3943, "step": 9063 }, { "epoch": 0.6079753160948452, "grad_norm": 0.16701324471122955, "learning_rate": 2e-05, "loss": 5.3997, "step": 9064 }, { "epoch": 0.6080423919240702, "grad_norm": 0.1587009407201048, "learning_rate": 2e-05, "loss": 5.3873, "step": 9065 }, { "epoch": 0.6081094677532951, "grad_norm": 0.14788047646421326, "learning_rate": 2e-05, "loss": 5.3695, "step": 9066 }, { "epoch": 0.60817654358252, "grad_norm": 0.15149529238947582, "learning_rate": 2e-05, "loss": 5.3517, "step": 9067 }, { "epoch": 0.608243619411745, "grad_norm": 0.1539677940905713, "learning_rate": 2e-05, "loss": 5.4893, "step": 9068 }, { "epoch": 0.6083106952409699, "grad_norm": 0.156240157114063, "learning_rate": 2e-05, "loss": 5.3971, "step": 9069 }, { "epoch": 0.6083777710701949, "grad_norm": 0.15466474697001698, "learning_rate": 2e-05, "loss": 5.3091, "step": 9070 }, { "epoch": 0.6084448468994198, "grad_norm": 0.14992377458102238, "learning_rate": 2e-05, "loss": 5.3718, "step": 9071 }, { "epoch": 0.6085119227286447, "grad_norm": 0.16306069738613704, "learning_rate": 2e-05, "loss": 5.5052, "step": 9072 }, { "epoch": 0.6085789985578697, "grad_norm": 0.161290662916394, "learning_rate": 2e-05, "loss": 5.5034, "step": 9073 }, { "epoch": 0.6086460743870946, "grad_norm": 0.1542979000357331, "learning_rate": 2e-05, "loss": 5.3364, "step": 9074 }, { "epoch": 0.6087131502163196, "grad_norm": 0.1560862790102712, "learning_rate": 2e-05, "loss": 5.4304, "step": 9075 }, { "epoch": 0.6087802260455445, "grad_norm": 0.15260175428044737, "learning_rate": 2e-05, "loss": 5.3699, "step": 9076 }, { "epoch": 0.6088473018747694, "grad_norm": 0.15692990653286534, "learning_rate": 2e-05, "loss": 5.3974, "step": 9077 }, { "epoch": 0.6089143777039944, "grad_norm": 0.15118076823407234, "learning_rate": 2e-05, "loss": 5.4479, "step": 9078 }, { "epoch": 0.6089814535332193, "grad_norm": 0.1545019673284196, "learning_rate": 2e-05, "loss": 5.4213, "step": 9079 }, { "epoch": 0.6090485293624442, "grad_norm": 0.15645428787318477, "learning_rate": 2e-05, "loss": 5.5018, "step": 9080 }, { "epoch": 0.6091156051916692, "grad_norm": 0.15160053131839996, "learning_rate": 2e-05, "loss": 5.4273, "step": 9081 }, { "epoch": 0.6091826810208941, "grad_norm": 0.14995496418303117, "learning_rate": 2e-05, "loss": 5.4542, "step": 9082 }, { "epoch": 0.6092497568501191, "grad_norm": 0.15509185340000212, "learning_rate": 2e-05, "loss": 5.4644, "step": 9083 }, { "epoch": 0.609316832679344, "grad_norm": 0.15752633776929348, "learning_rate": 2e-05, "loss": 5.4208, "step": 9084 }, { "epoch": 0.6093839085085689, "grad_norm": 0.15180853440087552, "learning_rate": 2e-05, "loss": 5.4667, "step": 9085 }, { "epoch": 0.6094509843377939, "grad_norm": 0.14387976076719428, "learning_rate": 2e-05, "loss": 5.4063, "step": 9086 }, { "epoch": 0.6095180601670188, "grad_norm": 0.15054827385069564, "learning_rate": 2e-05, "loss": 5.4252, "step": 9087 }, { "epoch": 0.6095851359962438, "grad_norm": 0.15611207883751796, "learning_rate": 2e-05, "loss": 5.4336, "step": 9088 }, { "epoch": 0.6096522118254687, "grad_norm": 0.1492779842193079, "learning_rate": 2e-05, "loss": 5.4079, "step": 9089 }, { "epoch": 0.6097192876546936, "grad_norm": 0.15388225655769122, "learning_rate": 2e-05, "loss": 5.4174, "step": 9090 }, { "epoch": 0.6097863634839186, "grad_norm": 0.15421826092032268, "learning_rate": 2e-05, "loss": 5.4495, "step": 9091 }, { "epoch": 0.6098534393131435, "grad_norm": 0.1507844303454805, "learning_rate": 2e-05, "loss": 5.4441, "step": 9092 }, { "epoch": 0.6099205151423684, "grad_norm": 0.145165364685143, "learning_rate": 2e-05, "loss": 5.5461, "step": 9093 }, { "epoch": 0.6099875909715934, "grad_norm": 0.14635622020401207, "learning_rate": 2e-05, "loss": 5.4008, "step": 9094 }, { "epoch": 0.6100546668008183, "grad_norm": 0.14776912734415257, "learning_rate": 2e-05, "loss": 5.2758, "step": 9095 }, { "epoch": 0.6101217426300433, "grad_norm": 0.14971931721422518, "learning_rate": 2e-05, "loss": 5.6557, "step": 9096 }, { "epoch": 0.6101888184592682, "grad_norm": 0.15018281944937972, "learning_rate": 2e-05, "loss": 5.4215, "step": 9097 }, { "epoch": 0.6102558942884931, "grad_norm": 0.14733589829931923, "learning_rate": 2e-05, "loss": 5.4903, "step": 9098 }, { "epoch": 0.6103229701177181, "grad_norm": 0.1464679490964221, "learning_rate": 2e-05, "loss": 5.3556, "step": 9099 }, { "epoch": 0.610390045946943, "grad_norm": 0.14702674792363746, "learning_rate": 2e-05, "loss": 5.3436, "step": 9100 }, { "epoch": 0.610457121776168, "grad_norm": 0.14915972550948736, "learning_rate": 2e-05, "loss": 5.3824, "step": 9101 }, { "epoch": 0.6105241976053929, "grad_norm": 0.15245428394217023, "learning_rate": 2e-05, "loss": 5.4076, "step": 9102 }, { "epoch": 0.6105912734346178, "grad_norm": 0.15590748206559785, "learning_rate": 2e-05, "loss": 5.3859, "step": 9103 }, { "epoch": 0.6106583492638428, "grad_norm": 0.14536956924912117, "learning_rate": 2e-05, "loss": 5.3493, "step": 9104 }, { "epoch": 0.6107254250930677, "grad_norm": 0.1522827465972116, "learning_rate": 2e-05, "loss": 5.4313, "step": 9105 }, { "epoch": 0.6107925009222926, "grad_norm": 0.14678616950949264, "learning_rate": 2e-05, "loss": 5.5454, "step": 9106 }, { "epoch": 0.6108595767515176, "grad_norm": 0.15178208334602963, "learning_rate": 2e-05, "loss": 5.3848, "step": 9107 }, { "epoch": 0.6109266525807425, "grad_norm": 0.14824846503691663, "learning_rate": 2e-05, "loss": 5.2603, "step": 9108 }, { "epoch": 0.6109937284099675, "grad_norm": 0.14810020022294135, "learning_rate": 2e-05, "loss": 5.4601, "step": 9109 }, { "epoch": 0.6110608042391924, "grad_norm": 0.15698330254477863, "learning_rate": 2e-05, "loss": 5.3704, "step": 9110 }, { "epoch": 0.6111278800684173, "grad_norm": 0.1523687535254334, "learning_rate": 2e-05, "loss": 5.3757, "step": 9111 }, { "epoch": 0.6111949558976423, "grad_norm": 0.15110261553240234, "learning_rate": 2e-05, "loss": 5.2791, "step": 9112 }, { "epoch": 0.6112620317268672, "grad_norm": 0.1560759476046622, "learning_rate": 2e-05, "loss": 5.546, "step": 9113 }, { "epoch": 0.6113291075560922, "grad_norm": 0.1551675260142818, "learning_rate": 2e-05, "loss": 5.3756, "step": 9114 }, { "epoch": 0.6113961833853171, "grad_norm": 0.15069460926644276, "learning_rate": 2e-05, "loss": 5.3119, "step": 9115 }, { "epoch": 0.611463259214542, "grad_norm": 0.15663351341485507, "learning_rate": 2e-05, "loss": 5.5164, "step": 9116 }, { "epoch": 0.611530335043767, "grad_norm": 0.14700169215636272, "learning_rate": 2e-05, "loss": 5.3879, "step": 9117 }, { "epoch": 0.6115974108729919, "grad_norm": 0.15203075598926397, "learning_rate": 2e-05, "loss": 5.4179, "step": 9118 }, { "epoch": 0.6116644867022168, "grad_norm": 0.16417960357222863, "learning_rate": 2e-05, "loss": 5.5013, "step": 9119 }, { "epoch": 0.6117315625314418, "grad_norm": 0.153388498542335, "learning_rate": 2e-05, "loss": 5.4573, "step": 9120 }, { "epoch": 0.6117986383606667, "grad_norm": 0.16109174442139465, "learning_rate": 2e-05, "loss": 5.5615, "step": 9121 }, { "epoch": 0.6118657141898917, "grad_norm": 0.1485843401001438, "learning_rate": 2e-05, "loss": 5.5611, "step": 9122 }, { "epoch": 0.6119327900191166, "grad_norm": 0.1642143904041403, "learning_rate": 2e-05, "loss": 5.5858, "step": 9123 }, { "epoch": 0.6119998658483415, "grad_norm": 0.15393671490996583, "learning_rate": 2e-05, "loss": 5.3904, "step": 9124 }, { "epoch": 0.6120669416775665, "grad_norm": 0.15244460538309557, "learning_rate": 2e-05, "loss": 5.4117, "step": 9125 }, { "epoch": 0.6121340175067914, "grad_norm": 0.15289091439834016, "learning_rate": 2e-05, "loss": 5.4565, "step": 9126 }, { "epoch": 0.6122010933360164, "grad_norm": 0.16032301248126507, "learning_rate": 2e-05, "loss": 5.2736, "step": 9127 }, { "epoch": 0.6122681691652413, "grad_norm": 0.14579745583600154, "learning_rate": 2e-05, "loss": 5.4408, "step": 9128 }, { "epoch": 0.6123352449944662, "grad_norm": 0.15629664135501284, "learning_rate": 2e-05, "loss": 5.4219, "step": 9129 }, { "epoch": 0.6124023208236912, "grad_norm": 0.15367126941106227, "learning_rate": 2e-05, "loss": 5.4514, "step": 9130 }, { "epoch": 0.6124693966529161, "grad_norm": 0.14814662669089393, "learning_rate": 2e-05, "loss": 5.4915, "step": 9131 }, { "epoch": 0.612536472482141, "grad_norm": 0.1570534498223316, "learning_rate": 2e-05, "loss": 5.3172, "step": 9132 }, { "epoch": 0.612603548311366, "grad_norm": 0.1557785183646263, "learning_rate": 2e-05, "loss": 5.3776, "step": 9133 }, { "epoch": 0.6126706241405909, "grad_norm": 0.15262552917103855, "learning_rate": 2e-05, "loss": 5.3925, "step": 9134 }, { "epoch": 0.6127376999698159, "grad_norm": 0.1552633121071096, "learning_rate": 2e-05, "loss": 5.509, "step": 9135 }, { "epoch": 0.6128047757990408, "grad_norm": 0.15988477932644307, "learning_rate": 2e-05, "loss": 5.3074, "step": 9136 }, { "epoch": 0.6128718516282657, "grad_norm": 0.14680909000207615, "learning_rate": 2e-05, "loss": 5.3249, "step": 9137 }, { "epoch": 0.6129389274574907, "grad_norm": 0.1541405253784886, "learning_rate": 2e-05, "loss": 5.3345, "step": 9138 }, { "epoch": 0.6130060032867156, "grad_norm": 0.15823490659934453, "learning_rate": 2e-05, "loss": 5.4797, "step": 9139 }, { "epoch": 0.6130730791159406, "grad_norm": 0.1494131713669312, "learning_rate": 2e-05, "loss": 5.4218, "step": 9140 }, { "epoch": 0.6131401549451655, "grad_norm": 0.15406933289099953, "learning_rate": 2e-05, "loss": 5.5951, "step": 9141 }, { "epoch": 0.6132072307743904, "grad_norm": 0.15443994536494, "learning_rate": 2e-05, "loss": 5.3764, "step": 9142 }, { "epoch": 0.6132743066036154, "grad_norm": 0.15571181708184184, "learning_rate": 2e-05, "loss": 5.3561, "step": 9143 }, { "epoch": 0.6133413824328403, "grad_norm": 0.14768372128980053, "learning_rate": 2e-05, "loss": 5.3452, "step": 9144 }, { "epoch": 0.6134084582620652, "grad_norm": 0.15748398950384546, "learning_rate": 2e-05, "loss": 5.3302, "step": 9145 }, { "epoch": 0.6134755340912902, "grad_norm": 0.16027516483837476, "learning_rate": 2e-05, "loss": 5.4361, "step": 9146 }, { "epoch": 0.6135426099205151, "grad_norm": 0.15179697674075243, "learning_rate": 2e-05, "loss": 5.486, "step": 9147 }, { "epoch": 0.6136096857497401, "grad_norm": 0.1500418413761482, "learning_rate": 2e-05, "loss": 5.3066, "step": 9148 }, { "epoch": 0.613676761578965, "grad_norm": 0.15111073926904983, "learning_rate": 2e-05, "loss": 5.296, "step": 9149 }, { "epoch": 0.6137438374081899, "grad_norm": 0.15382534483414995, "learning_rate": 2e-05, "loss": 5.3167, "step": 9150 }, { "epoch": 0.6138109132374149, "grad_norm": 0.15374774724261073, "learning_rate": 2e-05, "loss": 5.3998, "step": 9151 }, { "epoch": 0.6138779890666398, "grad_norm": 0.15318503736190864, "learning_rate": 2e-05, "loss": 5.496, "step": 9152 }, { "epoch": 0.6139450648958648, "grad_norm": 0.1493165652209338, "learning_rate": 2e-05, "loss": 5.4034, "step": 9153 }, { "epoch": 0.6140121407250897, "grad_norm": 0.15424032851476385, "learning_rate": 2e-05, "loss": 5.347, "step": 9154 }, { "epoch": 0.6140792165543146, "grad_norm": 0.15221929390080222, "learning_rate": 2e-05, "loss": 5.4533, "step": 9155 }, { "epoch": 0.6141462923835396, "grad_norm": 0.15539052833037667, "learning_rate": 2e-05, "loss": 5.4223, "step": 9156 }, { "epoch": 0.6142133682127645, "grad_norm": 0.157220684531554, "learning_rate": 2e-05, "loss": 5.4942, "step": 9157 }, { "epoch": 0.6142804440419894, "grad_norm": 0.15402803173374535, "learning_rate": 2e-05, "loss": 5.4328, "step": 9158 }, { "epoch": 0.6143475198712144, "grad_norm": 0.16419979677466964, "learning_rate": 2e-05, "loss": 5.3061, "step": 9159 }, { "epoch": 0.6144145957004393, "grad_norm": 0.14800888828731965, "learning_rate": 2e-05, "loss": 5.4912, "step": 9160 }, { "epoch": 0.6144816715296643, "grad_norm": 0.1597973529119849, "learning_rate": 2e-05, "loss": 5.4298, "step": 9161 }, { "epoch": 0.6145487473588892, "grad_norm": 0.17011893324697344, "learning_rate": 2e-05, "loss": 5.4764, "step": 9162 }, { "epoch": 0.6146158231881141, "grad_norm": 0.1521485767284959, "learning_rate": 2e-05, "loss": 5.366, "step": 9163 }, { "epoch": 0.6146828990173391, "grad_norm": 0.16489657413204853, "learning_rate": 2e-05, "loss": 5.4302, "step": 9164 }, { "epoch": 0.614749974846564, "grad_norm": 0.1540293993278931, "learning_rate": 2e-05, "loss": 5.4895, "step": 9165 }, { "epoch": 0.614817050675789, "grad_norm": 0.15210382781931017, "learning_rate": 2e-05, "loss": 5.4041, "step": 9166 }, { "epoch": 0.6148841265050139, "grad_norm": 0.15293146763932716, "learning_rate": 2e-05, "loss": 5.3049, "step": 9167 }, { "epoch": 0.6149512023342388, "grad_norm": 0.15593983988422613, "learning_rate": 2e-05, "loss": 5.441, "step": 9168 }, { "epoch": 0.6150182781634638, "grad_norm": 0.1532360120458184, "learning_rate": 2e-05, "loss": 5.3625, "step": 9169 }, { "epoch": 0.6150853539926887, "grad_norm": 0.16935776361391852, "learning_rate": 2e-05, "loss": 5.3193, "step": 9170 }, { "epoch": 0.6151524298219136, "grad_norm": 0.1581791835917573, "learning_rate": 2e-05, "loss": 5.5189, "step": 9171 }, { "epoch": 0.6152195056511386, "grad_norm": 0.14738615703022015, "learning_rate": 2e-05, "loss": 5.5655, "step": 9172 }, { "epoch": 0.6152865814803635, "grad_norm": 0.15568800198597363, "learning_rate": 2e-05, "loss": 5.3983, "step": 9173 }, { "epoch": 0.6153536573095885, "grad_norm": 0.16307742081524065, "learning_rate": 2e-05, "loss": 5.2742, "step": 9174 }, { "epoch": 0.6154207331388134, "grad_norm": 0.15525873674863813, "learning_rate": 2e-05, "loss": 5.4988, "step": 9175 }, { "epoch": 0.6154878089680383, "grad_norm": 0.15378017188946866, "learning_rate": 2e-05, "loss": 5.3985, "step": 9176 }, { "epoch": 0.6155548847972633, "grad_norm": 0.1525424685866855, "learning_rate": 2e-05, "loss": 5.4116, "step": 9177 }, { "epoch": 0.6156219606264882, "grad_norm": 0.15353082993622258, "learning_rate": 2e-05, "loss": 5.4252, "step": 9178 }, { "epoch": 0.6156890364557132, "grad_norm": 0.1636444629934033, "learning_rate": 2e-05, "loss": 5.5225, "step": 9179 }, { "epoch": 0.6157561122849381, "grad_norm": 0.16259004282009284, "learning_rate": 2e-05, "loss": 5.5672, "step": 9180 }, { "epoch": 0.615823188114163, "grad_norm": 0.15356234227749602, "learning_rate": 2e-05, "loss": 5.4028, "step": 9181 }, { "epoch": 0.615890263943388, "grad_norm": 0.16315072343889223, "learning_rate": 2e-05, "loss": 5.4428, "step": 9182 }, { "epoch": 0.6159573397726129, "grad_norm": 0.1531209964329347, "learning_rate": 2e-05, "loss": 5.4918, "step": 9183 }, { "epoch": 0.6160244156018378, "grad_norm": 0.16462808043905916, "learning_rate": 2e-05, "loss": 5.293, "step": 9184 }, { "epoch": 0.6160914914310628, "grad_norm": 0.16668685250676135, "learning_rate": 2e-05, "loss": 5.4314, "step": 9185 }, { "epoch": 0.6161585672602877, "grad_norm": 0.14928969428787342, "learning_rate": 2e-05, "loss": 5.3635, "step": 9186 }, { "epoch": 0.6162256430895127, "grad_norm": 0.15361459018128665, "learning_rate": 2e-05, "loss": 5.4541, "step": 9187 }, { "epoch": 0.6162927189187376, "grad_norm": 0.17997472912383478, "learning_rate": 2e-05, "loss": 5.4929, "step": 9188 }, { "epoch": 0.6163597947479625, "grad_norm": 0.1629036070384782, "learning_rate": 2e-05, "loss": 5.4829, "step": 9189 }, { "epoch": 0.6164268705771875, "grad_norm": 0.15471409708792733, "learning_rate": 2e-05, "loss": 5.3964, "step": 9190 }, { "epoch": 0.6164939464064124, "grad_norm": 0.15609054834066374, "learning_rate": 2e-05, "loss": 5.2624, "step": 9191 }, { "epoch": 0.6165610222356374, "grad_norm": 0.1697376811645988, "learning_rate": 2e-05, "loss": 5.3928, "step": 9192 }, { "epoch": 0.6166280980648623, "grad_norm": 0.17537273470810671, "learning_rate": 2e-05, "loss": 5.3882, "step": 9193 }, { "epoch": 0.6166951738940872, "grad_norm": 0.15064326072853487, "learning_rate": 2e-05, "loss": 5.4437, "step": 9194 }, { "epoch": 0.6167622497233122, "grad_norm": 0.14948902466964062, "learning_rate": 2e-05, "loss": 5.3491, "step": 9195 }, { "epoch": 0.6168293255525371, "grad_norm": 0.15226022696021418, "learning_rate": 2e-05, "loss": 5.3219, "step": 9196 }, { "epoch": 0.616896401381762, "grad_norm": 0.1512232658810304, "learning_rate": 2e-05, "loss": 5.2696, "step": 9197 }, { "epoch": 0.616963477210987, "grad_norm": 0.1585885283358905, "learning_rate": 2e-05, "loss": 5.3069, "step": 9198 }, { "epoch": 0.6170305530402119, "grad_norm": 0.15886283176081692, "learning_rate": 2e-05, "loss": 5.515, "step": 9199 }, { "epoch": 0.6170976288694369, "grad_norm": 0.14672717450671494, "learning_rate": 2e-05, "loss": 5.4465, "step": 9200 }, { "epoch": 0.6171647046986618, "grad_norm": 0.15278016071435357, "learning_rate": 2e-05, "loss": 5.4353, "step": 9201 }, { "epoch": 0.6172317805278867, "grad_norm": 0.15298482521249743, "learning_rate": 2e-05, "loss": 5.4276, "step": 9202 }, { "epoch": 0.6172988563571117, "grad_norm": 0.16141157631793196, "learning_rate": 2e-05, "loss": 5.2524, "step": 9203 }, { "epoch": 0.6173659321863366, "grad_norm": 0.15451190266329898, "learning_rate": 2e-05, "loss": 5.3512, "step": 9204 }, { "epoch": 0.6174330080155616, "grad_norm": 0.14920141198483572, "learning_rate": 2e-05, "loss": 5.3964, "step": 9205 }, { "epoch": 0.6175000838447865, "grad_norm": 0.1527512013819126, "learning_rate": 2e-05, "loss": 5.3618, "step": 9206 }, { "epoch": 0.6175671596740114, "grad_norm": 0.1498084075649274, "learning_rate": 2e-05, "loss": 5.3973, "step": 9207 }, { "epoch": 0.6176342355032364, "grad_norm": 0.15392815816301336, "learning_rate": 2e-05, "loss": 5.5096, "step": 9208 }, { "epoch": 0.6177013113324613, "grad_norm": 0.14676743550634994, "learning_rate": 2e-05, "loss": 5.3687, "step": 9209 }, { "epoch": 0.6177683871616862, "grad_norm": 0.14948323884342046, "learning_rate": 2e-05, "loss": 5.2948, "step": 9210 }, { "epoch": 0.6178354629909112, "grad_norm": 0.15157900123979853, "learning_rate": 2e-05, "loss": 5.4342, "step": 9211 }, { "epoch": 0.6179025388201361, "grad_norm": 0.16800049085795848, "learning_rate": 2e-05, "loss": 5.3318, "step": 9212 }, { "epoch": 0.6179696146493611, "grad_norm": 0.15366618215390754, "learning_rate": 2e-05, "loss": 5.3933, "step": 9213 }, { "epoch": 0.618036690478586, "grad_norm": 0.15572275478577646, "learning_rate": 2e-05, "loss": 5.3284, "step": 9214 }, { "epoch": 0.6181037663078109, "grad_norm": 0.15142441703883947, "learning_rate": 2e-05, "loss": 5.4482, "step": 9215 }, { "epoch": 0.6181708421370359, "grad_norm": 0.1529834639326079, "learning_rate": 2e-05, "loss": 5.3493, "step": 9216 }, { "epoch": 0.6182379179662608, "grad_norm": 0.15201360547245965, "learning_rate": 2e-05, "loss": 5.4335, "step": 9217 }, { "epoch": 0.6183049937954858, "grad_norm": 0.15457462402115332, "learning_rate": 2e-05, "loss": 5.4081, "step": 9218 }, { "epoch": 0.6183720696247107, "grad_norm": 0.1518124341080101, "learning_rate": 2e-05, "loss": 5.3917, "step": 9219 }, { "epoch": 0.6184391454539356, "grad_norm": 0.15535182796987596, "learning_rate": 2e-05, "loss": 5.3658, "step": 9220 }, { "epoch": 0.6185062212831606, "grad_norm": 0.16444999053383863, "learning_rate": 2e-05, "loss": 5.3951, "step": 9221 }, { "epoch": 0.6185732971123855, "grad_norm": 0.1526042716308769, "learning_rate": 2e-05, "loss": 5.4196, "step": 9222 }, { "epoch": 0.6186403729416105, "grad_norm": 0.16340618231599566, "learning_rate": 2e-05, "loss": 5.4576, "step": 9223 }, { "epoch": 0.6187074487708354, "grad_norm": 0.16153597707123132, "learning_rate": 2e-05, "loss": 5.4772, "step": 9224 }, { "epoch": 0.6187745246000603, "grad_norm": 0.15153120135365902, "learning_rate": 2e-05, "loss": 5.4392, "step": 9225 }, { "epoch": 0.6188416004292853, "grad_norm": 0.15098788384477516, "learning_rate": 2e-05, "loss": 5.2693, "step": 9226 }, { "epoch": 0.6189086762585102, "grad_norm": 0.1540565714494348, "learning_rate": 2e-05, "loss": 5.3871, "step": 9227 }, { "epoch": 0.6189757520877351, "grad_norm": 0.15104596744240856, "learning_rate": 2e-05, "loss": 5.393, "step": 9228 }, { "epoch": 0.6190428279169601, "grad_norm": 0.14921236768779056, "learning_rate": 2e-05, "loss": 5.5381, "step": 9229 }, { "epoch": 0.619109903746185, "grad_norm": 0.15771590560397958, "learning_rate": 2e-05, "loss": 5.481, "step": 9230 }, { "epoch": 0.61917697957541, "grad_norm": 0.1578443168078229, "learning_rate": 2e-05, "loss": 5.3935, "step": 9231 }, { "epoch": 0.6192440554046349, "grad_norm": 0.14986919684751107, "learning_rate": 2e-05, "loss": 5.4854, "step": 9232 }, { "epoch": 0.6193111312338598, "grad_norm": 0.15676876361848163, "learning_rate": 2e-05, "loss": 5.456, "step": 9233 }, { "epoch": 0.6193782070630848, "grad_norm": 0.15455955742815858, "learning_rate": 2e-05, "loss": 5.5174, "step": 9234 }, { "epoch": 0.6194452828923097, "grad_norm": 0.15043327985351507, "learning_rate": 2e-05, "loss": 5.3538, "step": 9235 }, { "epoch": 0.6195123587215347, "grad_norm": 0.15639055410765967, "learning_rate": 2e-05, "loss": 5.4732, "step": 9236 }, { "epoch": 0.6195794345507596, "grad_norm": 0.15436831995127073, "learning_rate": 2e-05, "loss": 5.5127, "step": 9237 }, { "epoch": 0.6196465103799845, "grad_norm": 0.14929570031421652, "learning_rate": 2e-05, "loss": 5.3753, "step": 9238 }, { "epoch": 0.6197135862092095, "grad_norm": 0.15364852076901048, "learning_rate": 2e-05, "loss": 5.4078, "step": 9239 }, { "epoch": 0.6197806620384344, "grad_norm": 0.15715848040501815, "learning_rate": 2e-05, "loss": 5.4248, "step": 9240 }, { "epoch": 0.6198477378676593, "grad_norm": 0.14749368864097412, "learning_rate": 2e-05, "loss": 5.4272, "step": 9241 }, { "epoch": 0.6199148136968843, "grad_norm": 0.14762313422403509, "learning_rate": 2e-05, "loss": 5.437, "step": 9242 }, { "epoch": 0.6199818895261092, "grad_norm": 0.1587359808442164, "learning_rate": 2e-05, "loss": 5.3629, "step": 9243 }, { "epoch": 0.6200489653553342, "grad_norm": 0.16115104102351155, "learning_rate": 2e-05, "loss": 5.3916, "step": 9244 }, { "epoch": 0.6201160411845591, "grad_norm": 0.15933100754668286, "learning_rate": 2e-05, "loss": 5.3029, "step": 9245 }, { "epoch": 0.620183117013784, "grad_norm": 0.14782824217767637, "learning_rate": 2e-05, "loss": 5.4063, "step": 9246 }, { "epoch": 0.620250192843009, "grad_norm": 0.16213781171198488, "learning_rate": 2e-05, "loss": 5.4933, "step": 9247 }, { "epoch": 0.6203172686722339, "grad_norm": 0.15485605934888458, "learning_rate": 2e-05, "loss": 5.3357, "step": 9248 }, { "epoch": 0.6203843445014589, "grad_norm": 0.15376228692849342, "learning_rate": 2e-05, "loss": 5.3623, "step": 9249 }, { "epoch": 0.6204514203306838, "grad_norm": 0.15119549331915058, "learning_rate": 2e-05, "loss": 5.5072, "step": 9250 }, { "epoch": 0.6205184961599087, "grad_norm": 0.15901887348639573, "learning_rate": 2e-05, "loss": 5.5507, "step": 9251 }, { "epoch": 0.6205855719891337, "grad_norm": 0.14744370770448922, "learning_rate": 2e-05, "loss": 5.3758, "step": 9252 }, { "epoch": 0.6206526478183586, "grad_norm": 0.14612306184471374, "learning_rate": 2e-05, "loss": 5.4871, "step": 9253 }, { "epoch": 0.6207197236475835, "grad_norm": 0.1482184048563821, "learning_rate": 2e-05, "loss": 5.4165, "step": 9254 }, { "epoch": 0.6207867994768085, "grad_norm": 0.1521540898506381, "learning_rate": 2e-05, "loss": 5.3466, "step": 9255 }, { "epoch": 0.6208538753060334, "grad_norm": 0.1548622721559832, "learning_rate": 2e-05, "loss": 5.4972, "step": 9256 }, { "epoch": 0.6209209511352584, "grad_norm": 0.17481689682593565, "learning_rate": 2e-05, "loss": 5.3233, "step": 9257 }, { "epoch": 0.6209880269644833, "grad_norm": 0.15291788497956313, "learning_rate": 2e-05, "loss": 5.3914, "step": 9258 }, { "epoch": 0.6210551027937082, "grad_norm": 0.15673546123262427, "learning_rate": 2e-05, "loss": 5.4705, "step": 9259 }, { "epoch": 0.6211221786229332, "grad_norm": 0.16818870275985726, "learning_rate": 2e-05, "loss": 5.4645, "step": 9260 }, { "epoch": 0.6211892544521581, "grad_norm": 0.15751993701303263, "learning_rate": 2e-05, "loss": 5.3668, "step": 9261 }, { "epoch": 0.621256330281383, "grad_norm": 0.15375324174477098, "learning_rate": 2e-05, "loss": 5.453, "step": 9262 }, { "epoch": 0.621323406110608, "grad_norm": 0.1622929007578182, "learning_rate": 2e-05, "loss": 5.557, "step": 9263 }, { "epoch": 0.6213904819398329, "grad_norm": 0.15502368185657472, "learning_rate": 2e-05, "loss": 5.4975, "step": 9264 }, { "epoch": 0.6214575577690579, "grad_norm": 0.15721823452445124, "learning_rate": 2e-05, "loss": 5.3247, "step": 9265 }, { "epoch": 0.6215246335982828, "grad_norm": 0.15609998897000488, "learning_rate": 2e-05, "loss": 5.4149, "step": 9266 }, { "epoch": 0.6215917094275077, "grad_norm": 0.15346174518286448, "learning_rate": 2e-05, "loss": 5.4371, "step": 9267 }, { "epoch": 0.6216587852567327, "grad_norm": 0.1504513946090627, "learning_rate": 2e-05, "loss": 5.4975, "step": 9268 }, { "epoch": 0.6217258610859576, "grad_norm": 0.14606802071425437, "learning_rate": 2e-05, "loss": 5.4081, "step": 9269 }, { "epoch": 0.6217929369151826, "grad_norm": 0.15402696060470902, "learning_rate": 2e-05, "loss": 5.4853, "step": 9270 }, { "epoch": 0.6218600127444075, "grad_norm": 0.16424940032249008, "learning_rate": 2e-05, "loss": 5.3883, "step": 9271 }, { "epoch": 0.6219270885736324, "grad_norm": 0.15124918400644813, "learning_rate": 2e-05, "loss": 5.3673, "step": 9272 }, { "epoch": 0.6219941644028575, "grad_norm": 0.15337658003638624, "learning_rate": 2e-05, "loss": 5.5385, "step": 9273 }, { "epoch": 0.6220612402320824, "grad_norm": 0.1590592487034579, "learning_rate": 2e-05, "loss": 5.5259, "step": 9274 }, { "epoch": 0.6221283160613074, "grad_norm": 0.14937191892098028, "learning_rate": 2e-05, "loss": 5.554, "step": 9275 }, { "epoch": 0.6221953918905323, "grad_norm": 0.1542872346044075, "learning_rate": 2e-05, "loss": 5.3639, "step": 9276 }, { "epoch": 0.6222624677197572, "grad_norm": 0.15748514181131734, "learning_rate": 2e-05, "loss": 5.2733, "step": 9277 }, { "epoch": 0.6223295435489822, "grad_norm": 0.1510330384999685, "learning_rate": 2e-05, "loss": 5.4119, "step": 9278 }, { "epoch": 0.6223966193782071, "grad_norm": 0.14804926921760905, "learning_rate": 2e-05, "loss": 5.4605, "step": 9279 }, { "epoch": 0.622463695207432, "grad_norm": 0.16910167875388965, "learning_rate": 2e-05, "loss": 5.4127, "step": 9280 }, { "epoch": 0.622530771036657, "grad_norm": 0.1474651370154078, "learning_rate": 2e-05, "loss": 5.496, "step": 9281 }, { "epoch": 0.6225978468658819, "grad_norm": 0.15138118460717043, "learning_rate": 2e-05, "loss": 5.3828, "step": 9282 }, { "epoch": 0.6226649226951069, "grad_norm": 0.1571288776006305, "learning_rate": 2e-05, "loss": 5.4088, "step": 9283 }, { "epoch": 0.6227319985243318, "grad_norm": 0.15855090347045925, "learning_rate": 2e-05, "loss": 5.4448, "step": 9284 }, { "epoch": 0.6227990743535567, "grad_norm": 0.1547046213668634, "learning_rate": 2e-05, "loss": 5.3135, "step": 9285 }, { "epoch": 0.6228661501827817, "grad_norm": 0.1517780795349633, "learning_rate": 2e-05, "loss": 5.4747, "step": 9286 }, { "epoch": 0.6229332260120066, "grad_norm": 0.14368262950654156, "learning_rate": 2e-05, "loss": 5.3117, "step": 9287 }, { "epoch": 0.6230003018412316, "grad_norm": 0.15305427608995553, "learning_rate": 2e-05, "loss": 5.5155, "step": 9288 }, { "epoch": 0.6230673776704565, "grad_norm": 0.1584626184778909, "learning_rate": 2e-05, "loss": 5.5512, "step": 9289 }, { "epoch": 0.6231344534996814, "grad_norm": 0.1508127855361275, "learning_rate": 2e-05, "loss": 5.4806, "step": 9290 }, { "epoch": 0.6232015293289064, "grad_norm": 0.1517974134630336, "learning_rate": 2e-05, "loss": 5.3977, "step": 9291 }, { "epoch": 0.6232686051581313, "grad_norm": 0.14998233476124237, "learning_rate": 2e-05, "loss": 5.4461, "step": 9292 }, { "epoch": 0.6233356809873563, "grad_norm": 0.15497538004340997, "learning_rate": 2e-05, "loss": 5.5588, "step": 9293 }, { "epoch": 0.6234027568165812, "grad_norm": 0.1483266854991414, "learning_rate": 2e-05, "loss": 5.3464, "step": 9294 }, { "epoch": 0.6234698326458061, "grad_norm": 0.14975399212582835, "learning_rate": 2e-05, "loss": 5.2911, "step": 9295 }, { "epoch": 0.6235369084750311, "grad_norm": 0.1564589109424068, "learning_rate": 2e-05, "loss": 5.5397, "step": 9296 }, { "epoch": 0.623603984304256, "grad_norm": 0.15472077168768686, "learning_rate": 2e-05, "loss": 5.5971, "step": 9297 }, { "epoch": 0.623671060133481, "grad_norm": 0.1448330043482016, "learning_rate": 2e-05, "loss": 5.3045, "step": 9298 }, { "epoch": 0.6237381359627059, "grad_norm": 0.15211393130395262, "learning_rate": 2e-05, "loss": 5.3359, "step": 9299 }, { "epoch": 0.6238052117919308, "grad_norm": 0.1567597430859764, "learning_rate": 2e-05, "loss": 5.499, "step": 9300 }, { "epoch": 0.6238722876211558, "grad_norm": 0.14699951264427674, "learning_rate": 2e-05, "loss": 5.3815, "step": 9301 }, { "epoch": 0.6239393634503807, "grad_norm": 0.15102526007471634, "learning_rate": 2e-05, "loss": 5.3734, "step": 9302 }, { "epoch": 0.6240064392796056, "grad_norm": 0.15770749910870713, "learning_rate": 2e-05, "loss": 5.3351, "step": 9303 }, { "epoch": 0.6240735151088306, "grad_norm": 0.15844481235934602, "learning_rate": 2e-05, "loss": 5.3105, "step": 9304 }, { "epoch": 0.6241405909380555, "grad_norm": 0.1565566434540911, "learning_rate": 2e-05, "loss": 5.3022, "step": 9305 }, { "epoch": 0.6242076667672805, "grad_norm": 0.16265464155376633, "learning_rate": 2e-05, "loss": 5.4638, "step": 9306 }, { "epoch": 0.6242747425965054, "grad_norm": 0.15587220510525404, "learning_rate": 2e-05, "loss": 5.469, "step": 9307 }, { "epoch": 0.6243418184257303, "grad_norm": 0.15207698322516885, "learning_rate": 2e-05, "loss": 5.3421, "step": 9308 }, { "epoch": 0.6244088942549553, "grad_norm": 0.1511669358200155, "learning_rate": 2e-05, "loss": 5.3577, "step": 9309 }, { "epoch": 0.6244759700841802, "grad_norm": 0.14887184330129727, "learning_rate": 2e-05, "loss": 5.4598, "step": 9310 }, { "epoch": 0.6245430459134051, "grad_norm": 0.15150995394115577, "learning_rate": 2e-05, "loss": 5.5094, "step": 9311 }, { "epoch": 0.6246101217426301, "grad_norm": 0.14907178229915738, "learning_rate": 2e-05, "loss": 5.3742, "step": 9312 }, { "epoch": 0.624677197571855, "grad_norm": 0.15568755004652848, "learning_rate": 2e-05, "loss": 5.5166, "step": 9313 }, { "epoch": 0.62474427340108, "grad_norm": 0.1465566681168972, "learning_rate": 2e-05, "loss": 5.3034, "step": 9314 }, { "epoch": 0.6248113492303049, "grad_norm": 0.14822205795101306, "learning_rate": 2e-05, "loss": 5.426, "step": 9315 }, { "epoch": 0.6248784250595298, "grad_norm": 0.14981490514794482, "learning_rate": 2e-05, "loss": 5.5296, "step": 9316 }, { "epoch": 0.6249455008887548, "grad_norm": 0.15323454872852135, "learning_rate": 2e-05, "loss": 5.3426, "step": 9317 }, { "epoch": 0.6250125767179797, "grad_norm": 0.1529639777033927, "learning_rate": 2e-05, "loss": 5.5532, "step": 9318 }, { "epoch": 0.6250796525472047, "grad_norm": 0.15488385321554574, "learning_rate": 2e-05, "loss": 5.3381, "step": 9319 }, { "epoch": 0.6251467283764296, "grad_norm": 0.16030145817703134, "learning_rate": 2e-05, "loss": 5.4267, "step": 9320 }, { "epoch": 0.6252138042056545, "grad_norm": 0.1503653380274123, "learning_rate": 2e-05, "loss": 5.4494, "step": 9321 }, { "epoch": 0.6252808800348795, "grad_norm": 0.14779574602333148, "learning_rate": 2e-05, "loss": 5.483, "step": 9322 }, { "epoch": 0.6253479558641044, "grad_norm": 0.15192951045621747, "learning_rate": 2e-05, "loss": 5.4185, "step": 9323 }, { "epoch": 0.6254150316933293, "grad_norm": 0.15099471116304397, "learning_rate": 2e-05, "loss": 5.6855, "step": 9324 }, { "epoch": 0.6254821075225543, "grad_norm": 0.1563171986281022, "learning_rate": 2e-05, "loss": 5.3855, "step": 9325 }, { "epoch": 0.6255491833517792, "grad_norm": 0.15281134212851236, "learning_rate": 2e-05, "loss": 5.4118, "step": 9326 }, { "epoch": 0.6256162591810042, "grad_norm": 0.15003369900742974, "learning_rate": 2e-05, "loss": 5.3597, "step": 9327 }, { "epoch": 0.6256833350102291, "grad_norm": 0.1555422667583891, "learning_rate": 2e-05, "loss": 5.4668, "step": 9328 }, { "epoch": 0.625750410839454, "grad_norm": 0.1501022977513605, "learning_rate": 2e-05, "loss": 5.4525, "step": 9329 }, { "epoch": 0.625817486668679, "grad_norm": 0.1484074411045815, "learning_rate": 2e-05, "loss": 5.3742, "step": 9330 }, { "epoch": 0.6258845624979039, "grad_norm": 0.16300562517744996, "learning_rate": 2e-05, "loss": 5.4537, "step": 9331 }, { "epoch": 0.6259516383271289, "grad_norm": 0.1514167359245985, "learning_rate": 2e-05, "loss": 5.3992, "step": 9332 }, { "epoch": 0.6260187141563538, "grad_norm": 0.14752022984956242, "learning_rate": 2e-05, "loss": 5.5039, "step": 9333 }, { "epoch": 0.6260857899855787, "grad_norm": 0.16020043285345364, "learning_rate": 2e-05, "loss": 5.5195, "step": 9334 }, { "epoch": 0.6261528658148037, "grad_norm": 0.1519596144815404, "learning_rate": 2e-05, "loss": 5.3951, "step": 9335 }, { "epoch": 0.6262199416440286, "grad_norm": 0.1516130458819609, "learning_rate": 2e-05, "loss": 5.5023, "step": 9336 }, { "epoch": 0.6262870174732535, "grad_norm": 0.14887899339512722, "learning_rate": 2e-05, "loss": 5.3689, "step": 9337 }, { "epoch": 0.6263540933024785, "grad_norm": 0.15406302780116657, "learning_rate": 2e-05, "loss": 5.4537, "step": 9338 }, { "epoch": 0.6264211691317034, "grad_norm": 0.15250107040792965, "learning_rate": 2e-05, "loss": 5.4585, "step": 9339 }, { "epoch": 0.6264882449609284, "grad_norm": 0.14720056301419343, "learning_rate": 2e-05, "loss": 5.3737, "step": 9340 }, { "epoch": 0.6265553207901533, "grad_norm": 0.15675190358722718, "learning_rate": 2e-05, "loss": 5.4895, "step": 9341 }, { "epoch": 0.6266223966193782, "grad_norm": 0.1614702933134975, "learning_rate": 2e-05, "loss": 5.2947, "step": 9342 }, { "epoch": 0.6266894724486032, "grad_norm": 0.15340158128245057, "learning_rate": 2e-05, "loss": 5.3819, "step": 9343 }, { "epoch": 0.6267565482778281, "grad_norm": 0.17926965245092993, "learning_rate": 2e-05, "loss": 5.4628, "step": 9344 }, { "epoch": 0.6268236241070531, "grad_norm": 0.15145428590246568, "learning_rate": 2e-05, "loss": 5.3562, "step": 9345 }, { "epoch": 0.626890699936278, "grad_norm": 0.1621878731263437, "learning_rate": 2e-05, "loss": 5.504, "step": 9346 }, { "epoch": 0.6269577757655029, "grad_norm": 0.15190778320919965, "learning_rate": 2e-05, "loss": 5.3607, "step": 9347 }, { "epoch": 0.6270248515947279, "grad_norm": 0.14957491826710362, "learning_rate": 2e-05, "loss": 5.4087, "step": 9348 }, { "epoch": 0.6270919274239528, "grad_norm": 0.15539299028143083, "learning_rate": 2e-05, "loss": 5.3471, "step": 9349 }, { "epoch": 0.6271590032531777, "grad_norm": 0.15994494937005252, "learning_rate": 2e-05, "loss": 5.4628, "step": 9350 }, { "epoch": 0.6272260790824027, "grad_norm": 0.15472053207213424, "learning_rate": 2e-05, "loss": 5.2943, "step": 9351 }, { "epoch": 0.6272931549116276, "grad_norm": 0.15798005667392523, "learning_rate": 2e-05, "loss": 5.4784, "step": 9352 }, { "epoch": 0.6273602307408526, "grad_norm": 0.15387718139426837, "learning_rate": 2e-05, "loss": 5.3771, "step": 9353 }, { "epoch": 0.6274273065700775, "grad_norm": 0.15518824759680933, "learning_rate": 2e-05, "loss": 5.4795, "step": 9354 }, { "epoch": 0.6274943823993024, "grad_norm": 0.15289974895365846, "learning_rate": 2e-05, "loss": 5.444, "step": 9355 }, { "epoch": 0.6275614582285274, "grad_norm": 0.1472427886484463, "learning_rate": 2e-05, "loss": 5.4142, "step": 9356 }, { "epoch": 0.6276285340577523, "grad_norm": 0.16893801163157196, "learning_rate": 2e-05, "loss": 5.4421, "step": 9357 }, { "epoch": 0.6276956098869773, "grad_norm": 0.15474496836144508, "learning_rate": 2e-05, "loss": 5.3529, "step": 9358 }, { "epoch": 0.6277626857162022, "grad_norm": 0.15415978384005488, "learning_rate": 2e-05, "loss": 5.4808, "step": 9359 }, { "epoch": 0.6278297615454271, "grad_norm": 0.1592805460009479, "learning_rate": 2e-05, "loss": 5.396, "step": 9360 }, { "epoch": 0.6278968373746521, "grad_norm": 0.16600374041049634, "learning_rate": 2e-05, "loss": 5.4051, "step": 9361 }, { "epoch": 0.627963913203877, "grad_norm": 0.15012955019120713, "learning_rate": 2e-05, "loss": 5.3874, "step": 9362 }, { "epoch": 0.628030989033102, "grad_norm": 0.15497144827769838, "learning_rate": 2e-05, "loss": 5.4345, "step": 9363 }, { "epoch": 0.6280980648623269, "grad_norm": 0.1540268023660924, "learning_rate": 2e-05, "loss": 5.4256, "step": 9364 }, { "epoch": 0.6281651406915518, "grad_norm": 0.15150818998291563, "learning_rate": 2e-05, "loss": 5.4337, "step": 9365 }, { "epoch": 0.6282322165207768, "grad_norm": 0.15265461037526573, "learning_rate": 2e-05, "loss": 5.3603, "step": 9366 }, { "epoch": 0.6282992923500017, "grad_norm": 0.15127138597168155, "learning_rate": 2e-05, "loss": 5.4463, "step": 9367 }, { "epoch": 0.6283663681792266, "grad_norm": 0.1567021146851781, "learning_rate": 2e-05, "loss": 5.3723, "step": 9368 }, { "epoch": 0.6284334440084516, "grad_norm": 0.15288547138819844, "learning_rate": 2e-05, "loss": 5.4472, "step": 9369 }, { "epoch": 0.6285005198376765, "grad_norm": 0.1467130241723948, "learning_rate": 2e-05, "loss": 5.4234, "step": 9370 }, { "epoch": 0.6285675956669015, "grad_norm": 0.1576888183639012, "learning_rate": 2e-05, "loss": 5.4393, "step": 9371 }, { "epoch": 0.6286346714961264, "grad_norm": 0.15214648389781002, "learning_rate": 2e-05, "loss": 5.426, "step": 9372 }, { "epoch": 0.6287017473253513, "grad_norm": 0.15645788855709467, "learning_rate": 2e-05, "loss": 5.5096, "step": 9373 }, { "epoch": 0.6287688231545763, "grad_norm": 0.15269193901492767, "learning_rate": 2e-05, "loss": 5.2892, "step": 9374 }, { "epoch": 0.6288358989838012, "grad_norm": 0.15158076893113573, "learning_rate": 2e-05, "loss": 5.3594, "step": 9375 }, { "epoch": 0.6289029748130261, "grad_norm": 0.15525635080639794, "learning_rate": 2e-05, "loss": 5.5074, "step": 9376 }, { "epoch": 0.6289700506422511, "grad_norm": 0.1486004539384565, "learning_rate": 2e-05, "loss": 5.4829, "step": 9377 }, { "epoch": 0.629037126471476, "grad_norm": 0.15083412180350117, "learning_rate": 2e-05, "loss": 5.332, "step": 9378 }, { "epoch": 0.629104202300701, "grad_norm": 0.14951350870373342, "learning_rate": 2e-05, "loss": 5.3728, "step": 9379 }, { "epoch": 0.6291712781299259, "grad_norm": 0.15370659940179374, "learning_rate": 2e-05, "loss": 5.5014, "step": 9380 }, { "epoch": 0.6292383539591508, "grad_norm": 0.16019248966651564, "learning_rate": 2e-05, "loss": 5.3386, "step": 9381 }, { "epoch": 0.6293054297883758, "grad_norm": 0.15847224863165632, "learning_rate": 2e-05, "loss": 5.3947, "step": 9382 }, { "epoch": 0.6293725056176007, "grad_norm": 0.15346349043062782, "learning_rate": 2e-05, "loss": 5.3852, "step": 9383 }, { "epoch": 0.6294395814468257, "grad_norm": 0.1537529103349642, "learning_rate": 2e-05, "loss": 5.331, "step": 9384 }, { "epoch": 0.6295066572760506, "grad_norm": 0.1574282192185749, "learning_rate": 2e-05, "loss": 5.3539, "step": 9385 }, { "epoch": 0.6295737331052755, "grad_norm": 0.15070565851796658, "learning_rate": 2e-05, "loss": 5.3835, "step": 9386 }, { "epoch": 0.6296408089345005, "grad_norm": 0.15006207444228298, "learning_rate": 2e-05, "loss": 5.4859, "step": 9387 }, { "epoch": 0.6297078847637254, "grad_norm": 0.15243322909477694, "learning_rate": 2e-05, "loss": 5.4736, "step": 9388 }, { "epoch": 0.6297749605929504, "grad_norm": 0.15442853966619624, "learning_rate": 2e-05, "loss": 5.3382, "step": 9389 }, { "epoch": 0.6298420364221753, "grad_norm": 0.16252676881111652, "learning_rate": 2e-05, "loss": 5.4422, "step": 9390 }, { "epoch": 0.6299091122514002, "grad_norm": 0.16616750900366756, "learning_rate": 2e-05, "loss": 5.439, "step": 9391 }, { "epoch": 0.6299761880806252, "grad_norm": 0.1527117511568586, "learning_rate": 2e-05, "loss": 5.4695, "step": 9392 }, { "epoch": 0.6300432639098501, "grad_norm": 0.15662737693165685, "learning_rate": 2e-05, "loss": 5.3748, "step": 9393 }, { "epoch": 0.630110339739075, "grad_norm": 0.1506878953351132, "learning_rate": 2e-05, "loss": 5.4502, "step": 9394 }, { "epoch": 0.6301774155683, "grad_norm": 0.15252199467056696, "learning_rate": 2e-05, "loss": 5.4299, "step": 9395 }, { "epoch": 0.6302444913975249, "grad_norm": 0.15720438283149735, "learning_rate": 2e-05, "loss": 5.5455, "step": 9396 }, { "epoch": 0.6303115672267499, "grad_norm": 0.1471050117022988, "learning_rate": 2e-05, "loss": 5.4685, "step": 9397 }, { "epoch": 0.6303786430559748, "grad_norm": 0.1555049628220281, "learning_rate": 2e-05, "loss": 5.6692, "step": 9398 }, { "epoch": 0.6304457188851997, "grad_norm": 0.15744116662361604, "learning_rate": 2e-05, "loss": 5.466, "step": 9399 }, { "epoch": 0.6305127947144247, "grad_norm": 0.1486556196895531, "learning_rate": 2e-05, "loss": 5.4112, "step": 9400 }, { "epoch": 0.6305798705436496, "grad_norm": 0.1568213523364497, "learning_rate": 2e-05, "loss": 5.4821, "step": 9401 }, { "epoch": 0.6306469463728746, "grad_norm": 0.15460415691534965, "learning_rate": 2e-05, "loss": 5.3509, "step": 9402 }, { "epoch": 0.6307140222020995, "grad_norm": 0.15183490097101865, "learning_rate": 2e-05, "loss": 5.5939, "step": 9403 }, { "epoch": 0.6307810980313244, "grad_norm": 0.1530798019791715, "learning_rate": 2e-05, "loss": 5.5448, "step": 9404 }, { "epoch": 0.6308481738605494, "grad_norm": 0.15789831841461469, "learning_rate": 2e-05, "loss": 5.5343, "step": 9405 }, { "epoch": 0.6309152496897743, "grad_norm": 0.15718429053894867, "learning_rate": 2e-05, "loss": 5.453, "step": 9406 }, { "epoch": 0.6309823255189992, "grad_norm": 0.1504412383842006, "learning_rate": 2e-05, "loss": 5.3821, "step": 9407 }, { "epoch": 0.6310494013482242, "grad_norm": 0.1592730106985513, "learning_rate": 2e-05, "loss": 5.4972, "step": 9408 }, { "epoch": 0.6311164771774491, "grad_norm": 0.1535000130902462, "learning_rate": 2e-05, "loss": 5.4063, "step": 9409 }, { "epoch": 0.6311835530066741, "grad_norm": 0.15234881051960175, "learning_rate": 2e-05, "loss": 5.4223, "step": 9410 }, { "epoch": 0.631250628835899, "grad_norm": 0.15367580365531822, "learning_rate": 2e-05, "loss": 5.4954, "step": 9411 }, { "epoch": 0.6313177046651239, "grad_norm": 0.14942708984989578, "learning_rate": 2e-05, "loss": 5.3204, "step": 9412 }, { "epoch": 0.6313847804943489, "grad_norm": 0.15237663057447956, "learning_rate": 2e-05, "loss": 5.3596, "step": 9413 }, { "epoch": 0.6314518563235738, "grad_norm": 0.15039014801624928, "learning_rate": 2e-05, "loss": 5.3852, "step": 9414 }, { "epoch": 0.6315189321527988, "grad_norm": 0.1418609579568539, "learning_rate": 2e-05, "loss": 5.371, "step": 9415 }, { "epoch": 0.6315860079820237, "grad_norm": 0.16102950730656843, "learning_rate": 2e-05, "loss": 5.3, "step": 9416 }, { "epoch": 0.6316530838112486, "grad_norm": 0.15411501987330056, "learning_rate": 2e-05, "loss": 5.427, "step": 9417 }, { "epoch": 0.6317201596404736, "grad_norm": 0.14351749212136589, "learning_rate": 2e-05, "loss": 5.4073, "step": 9418 }, { "epoch": 0.6317872354696985, "grad_norm": 0.14243729819003836, "learning_rate": 2e-05, "loss": 5.554, "step": 9419 }, { "epoch": 0.6318543112989234, "grad_norm": 0.15725739367078473, "learning_rate": 2e-05, "loss": 5.4893, "step": 9420 }, { "epoch": 0.6319213871281484, "grad_norm": 0.15029349160834918, "learning_rate": 2e-05, "loss": 5.4891, "step": 9421 }, { "epoch": 0.6319884629573733, "grad_norm": 0.15233995251626753, "learning_rate": 2e-05, "loss": 5.2992, "step": 9422 }, { "epoch": 0.6320555387865983, "grad_norm": 0.15312730645058026, "learning_rate": 2e-05, "loss": 5.3665, "step": 9423 }, { "epoch": 0.6321226146158232, "grad_norm": 0.14902246635430225, "learning_rate": 2e-05, "loss": 5.3636, "step": 9424 }, { "epoch": 0.6321896904450481, "grad_norm": 0.14694468050861317, "learning_rate": 2e-05, "loss": 5.36, "step": 9425 }, { "epoch": 0.6322567662742731, "grad_norm": 0.15030944347252506, "learning_rate": 2e-05, "loss": 5.5301, "step": 9426 }, { "epoch": 0.632323842103498, "grad_norm": 0.15706323630397048, "learning_rate": 2e-05, "loss": 5.3766, "step": 9427 }, { "epoch": 0.632390917932723, "grad_norm": 0.14588139423171734, "learning_rate": 2e-05, "loss": 5.4507, "step": 9428 }, { "epoch": 0.6324579937619479, "grad_norm": 0.1482794940889182, "learning_rate": 2e-05, "loss": 5.4116, "step": 9429 }, { "epoch": 0.6325250695911728, "grad_norm": 0.14840695331562095, "learning_rate": 2e-05, "loss": 5.4227, "step": 9430 }, { "epoch": 0.6325921454203978, "grad_norm": 0.15174555783509674, "learning_rate": 2e-05, "loss": 5.5166, "step": 9431 }, { "epoch": 0.6326592212496227, "grad_norm": 0.15126502350126086, "learning_rate": 2e-05, "loss": 5.559, "step": 9432 }, { "epoch": 0.6327262970788476, "grad_norm": 0.14968732015439148, "learning_rate": 2e-05, "loss": 5.4261, "step": 9433 }, { "epoch": 0.6327933729080726, "grad_norm": 0.15205844697116036, "learning_rate": 2e-05, "loss": 5.5033, "step": 9434 }, { "epoch": 0.6328604487372975, "grad_norm": 0.15358327080389733, "learning_rate": 2e-05, "loss": 5.3541, "step": 9435 }, { "epoch": 0.6329275245665225, "grad_norm": 0.15705775961784946, "learning_rate": 2e-05, "loss": 5.5333, "step": 9436 }, { "epoch": 0.6329946003957474, "grad_norm": 0.15528950162057042, "learning_rate": 2e-05, "loss": 5.5916, "step": 9437 }, { "epoch": 0.6330616762249723, "grad_norm": 0.15708728869075786, "learning_rate": 2e-05, "loss": 5.4459, "step": 9438 }, { "epoch": 0.6331287520541973, "grad_norm": 0.14953097192377257, "learning_rate": 2e-05, "loss": 5.4639, "step": 9439 }, { "epoch": 0.6331958278834222, "grad_norm": 0.15699397437271584, "learning_rate": 2e-05, "loss": 5.3605, "step": 9440 }, { "epoch": 0.6332629037126472, "grad_norm": 0.15133602323775253, "learning_rate": 2e-05, "loss": 5.4004, "step": 9441 }, { "epoch": 0.6333299795418721, "grad_norm": 0.15769084881285195, "learning_rate": 2e-05, "loss": 5.3081, "step": 9442 }, { "epoch": 0.633397055371097, "grad_norm": 0.15734582283651571, "learning_rate": 2e-05, "loss": 5.3784, "step": 9443 }, { "epoch": 0.633464131200322, "grad_norm": 0.15602718562233542, "learning_rate": 2e-05, "loss": 5.3265, "step": 9444 }, { "epoch": 0.6335312070295469, "grad_norm": 0.15401118300491937, "learning_rate": 2e-05, "loss": 5.4874, "step": 9445 }, { "epoch": 0.6335982828587718, "grad_norm": 0.16075835087560125, "learning_rate": 2e-05, "loss": 5.5627, "step": 9446 }, { "epoch": 0.6336653586879968, "grad_norm": 0.15196628829201494, "learning_rate": 2e-05, "loss": 5.3497, "step": 9447 }, { "epoch": 0.6337324345172217, "grad_norm": 0.15940365948993876, "learning_rate": 2e-05, "loss": 5.3766, "step": 9448 }, { "epoch": 0.6337995103464467, "grad_norm": 0.1584771802454168, "learning_rate": 2e-05, "loss": 5.3804, "step": 9449 }, { "epoch": 0.6338665861756716, "grad_norm": 0.15979137578851854, "learning_rate": 2e-05, "loss": 5.4129, "step": 9450 }, { "epoch": 0.6339336620048965, "grad_norm": 0.16005521198963915, "learning_rate": 2e-05, "loss": 5.3559, "step": 9451 }, { "epoch": 0.6340007378341215, "grad_norm": 0.16101149541184528, "learning_rate": 2e-05, "loss": 5.4141, "step": 9452 }, { "epoch": 0.6340678136633464, "grad_norm": 0.15983234360810777, "learning_rate": 2e-05, "loss": 5.3385, "step": 9453 }, { "epoch": 0.6341348894925714, "grad_norm": 0.18166958649328357, "learning_rate": 2e-05, "loss": 5.2923, "step": 9454 }, { "epoch": 0.6342019653217963, "grad_norm": 0.14818868662644574, "learning_rate": 2e-05, "loss": 5.3421, "step": 9455 }, { "epoch": 0.6342690411510212, "grad_norm": 0.1695367884009991, "learning_rate": 2e-05, "loss": 5.5122, "step": 9456 }, { "epoch": 0.6343361169802462, "grad_norm": 0.16584590147701403, "learning_rate": 2e-05, "loss": 5.4734, "step": 9457 }, { "epoch": 0.6344031928094711, "grad_norm": 0.16215749559117007, "learning_rate": 2e-05, "loss": 5.5513, "step": 9458 }, { "epoch": 0.634470268638696, "grad_norm": 0.15551186833625985, "learning_rate": 2e-05, "loss": 5.4174, "step": 9459 }, { "epoch": 0.634537344467921, "grad_norm": 0.17175033773750564, "learning_rate": 2e-05, "loss": 5.365, "step": 9460 }, { "epoch": 0.6346044202971459, "grad_norm": 0.1569553621318592, "learning_rate": 2e-05, "loss": 5.4916, "step": 9461 }, { "epoch": 0.6346714961263709, "grad_norm": 0.15395521707328289, "learning_rate": 2e-05, "loss": 5.4681, "step": 9462 }, { "epoch": 0.6347385719555958, "grad_norm": 0.1524574915229782, "learning_rate": 2e-05, "loss": 5.48, "step": 9463 }, { "epoch": 0.6348056477848207, "grad_norm": 0.15936305915168625, "learning_rate": 2e-05, "loss": 5.4239, "step": 9464 }, { "epoch": 0.6348727236140457, "grad_norm": 0.15501195137523027, "learning_rate": 2e-05, "loss": 5.6107, "step": 9465 }, { "epoch": 0.6349397994432706, "grad_norm": 0.15434577165326535, "learning_rate": 2e-05, "loss": 5.3584, "step": 9466 }, { "epoch": 0.6350068752724956, "grad_norm": 0.15862561652937449, "learning_rate": 2e-05, "loss": 5.4494, "step": 9467 }, { "epoch": 0.6350739511017205, "grad_norm": 0.15190057425290135, "learning_rate": 2e-05, "loss": 5.5224, "step": 9468 }, { "epoch": 0.6351410269309454, "grad_norm": 0.15353054523840573, "learning_rate": 2e-05, "loss": 5.4595, "step": 9469 }, { "epoch": 0.6352081027601704, "grad_norm": 0.15367267412943597, "learning_rate": 2e-05, "loss": 5.3996, "step": 9470 }, { "epoch": 0.6352751785893953, "grad_norm": 0.16337542946342778, "learning_rate": 2e-05, "loss": 5.372, "step": 9471 }, { "epoch": 0.6353422544186202, "grad_norm": 0.15562022561747302, "learning_rate": 2e-05, "loss": 5.4376, "step": 9472 }, { "epoch": 0.6354093302478452, "grad_norm": 0.15786533453473975, "learning_rate": 2e-05, "loss": 5.2558, "step": 9473 }, { "epoch": 0.6354764060770701, "grad_norm": 0.1517233356145364, "learning_rate": 2e-05, "loss": 5.4219, "step": 9474 }, { "epoch": 0.6355434819062951, "grad_norm": 0.15267037078905313, "learning_rate": 2e-05, "loss": 5.3273, "step": 9475 }, { "epoch": 0.63561055773552, "grad_norm": 0.1645452771582934, "learning_rate": 2e-05, "loss": 5.4951, "step": 9476 }, { "epoch": 0.6356776335647449, "grad_norm": 0.15058177062032424, "learning_rate": 2e-05, "loss": 5.387, "step": 9477 }, { "epoch": 0.6357447093939699, "grad_norm": 0.15680853032346234, "learning_rate": 2e-05, "loss": 5.4198, "step": 9478 }, { "epoch": 0.6358117852231948, "grad_norm": 0.1544590852365142, "learning_rate": 2e-05, "loss": 5.3767, "step": 9479 }, { "epoch": 0.6358788610524198, "grad_norm": 0.15594211265113536, "learning_rate": 2e-05, "loss": 5.3324, "step": 9480 }, { "epoch": 0.6359459368816447, "grad_norm": 0.15499325831077612, "learning_rate": 2e-05, "loss": 5.2871, "step": 9481 }, { "epoch": 0.6360130127108696, "grad_norm": 0.15216954344933586, "learning_rate": 2e-05, "loss": 5.4468, "step": 9482 }, { "epoch": 0.6360800885400946, "grad_norm": 0.15656687948172257, "learning_rate": 2e-05, "loss": 5.4105, "step": 9483 }, { "epoch": 0.6361471643693195, "grad_norm": 0.15611668124033298, "learning_rate": 2e-05, "loss": 5.4402, "step": 9484 }, { "epoch": 0.6362142401985444, "grad_norm": 0.14803703945776656, "learning_rate": 2e-05, "loss": 5.3601, "step": 9485 }, { "epoch": 0.6362813160277694, "grad_norm": 0.1523071481143449, "learning_rate": 2e-05, "loss": 5.3994, "step": 9486 }, { "epoch": 0.6363483918569943, "grad_norm": 0.16388495064251501, "learning_rate": 2e-05, "loss": 5.3718, "step": 9487 }, { "epoch": 0.6364154676862193, "grad_norm": 0.16095607556286004, "learning_rate": 2e-05, "loss": 5.4756, "step": 9488 }, { "epoch": 0.6364825435154442, "grad_norm": 0.16216210804809375, "learning_rate": 2e-05, "loss": 5.3361, "step": 9489 }, { "epoch": 0.6365496193446691, "grad_norm": 0.16532660689493436, "learning_rate": 2e-05, "loss": 5.5006, "step": 9490 }, { "epoch": 0.6366166951738941, "grad_norm": 0.1566142964465771, "learning_rate": 2e-05, "loss": 5.4268, "step": 9491 }, { "epoch": 0.636683771003119, "grad_norm": 0.15885670825423792, "learning_rate": 2e-05, "loss": 5.372, "step": 9492 }, { "epoch": 0.636750846832344, "grad_norm": 0.16549125155465008, "learning_rate": 2e-05, "loss": 5.3934, "step": 9493 }, { "epoch": 0.6368179226615689, "grad_norm": 0.15679751839270195, "learning_rate": 2e-05, "loss": 5.4015, "step": 9494 }, { "epoch": 0.6368849984907938, "grad_norm": 0.15565579707347904, "learning_rate": 2e-05, "loss": 5.3959, "step": 9495 }, { "epoch": 0.6369520743200188, "grad_norm": 0.16780204984453437, "learning_rate": 2e-05, "loss": 5.4178, "step": 9496 }, { "epoch": 0.6370191501492437, "grad_norm": 0.15514808305443784, "learning_rate": 2e-05, "loss": 5.41, "step": 9497 }, { "epoch": 0.6370862259784686, "grad_norm": 0.15799876107557367, "learning_rate": 2e-05, "loss": 5.3247, "step": 9498 }, { "epoch": 0.6371533018076936, "grad_norm": 0.15842338652934157, "learning_rate": 2e-05, "loss": 5.5048, "step": 9499 }, { "epoch": 0.6372203776369185, "grad_norm": 0.15856049367908429, "learning_rate": 2e-05, "loss": 5.5041, "step": 9500 }, { "epoch": 0.6372874534661435, "grad_norm": 0.15292321489799202, "learning_rate": 2e-05, "loss": 5.4398, "step": 9501 }, { "epoch": 0.6373545292953684, "grad_norm": 0.17549276998480648, "learning_rate": 2e-05, "loss": 5.3782, "step": 9502 }, { "epoch": 0.6374216051245933, "grad_norm": 0.1646050816010846, "learning_rate": 2e-05, "loss": 5.5341, "step": 9503 }, { "epoch": 0.6374886809538183, "grad_norm": 0.15412238088672614, "learning_rate": 2e-05, "loss": 5.4122, "step": 9504 }, { "epoch": 0.6375557567830432, "grad_norm": 0.1602054230736469, "learning_rate": 2e-05, "loss": 5.396, "step": 9505 }, { "epoch": 0.6376228326122682, "grad_norm": 0.15049906607923885, "learning_rate": 2e-05, "loss": 5.34, "step": 9506 }, { "epoch": 0.6376899084414931, "grad_norm": 0.1440830202382656, "learning_rate": 2e-05, "loss": 5.4949, "step": 9507 }, { "epoch": 0.637756984270718, "grad_norm": 0.16012268387977605, "learning_rate": 2e-05, "loss": 5.4742, "step": 9508 }, { "epoch": 0.637824060099943, "grad_norm": 0.1486894587072813, "learning_rate": 2e-05, "loss": 5.4094, "step": 9509 }, { "epoch": 0.6378911359291679, "grad_norm": 0.1551215678363046, "learning_rate": 2e-05, "loss": 5.3938, "step": 9510 }, { "epoch": 0.6379582117583928, "grad_norm": 0.16077793682598102, "learning_rate": 2e-05, "loss": 5.4199, "step": 9511 }, { "epoch": 0.6380252875876178, "grad_norm": 0.153037503164381, "learning_rate": 2e-05, "loss": 5.4435, "step": 9512 }, { "epoch": 0.6380923634168427, "grad_norm": 0.15139395757869561, "learning_rate": 2e-05, "loss": 5.4141, "step": 9513 }, { "epoch": 0.6381594392460677, "grad_norm": 0.1579098244245811, "learning_rate": 2e-05, "loss": 5.3735, "step": 9514 }, { "epoch": 0.6382265150752926, "grad_norm": 0.15689015879175175, "learning_rate": 2e-05, "loss": 5.4662, "step": 9515 }, { "epoch": 0.6382935909045175, "grad_norm": 0.14967695880729837, "learning_rate": 2e-05, "loss": 5.5311, "step": 9516 }, { "epoch": 0.6383606667337425, "grad_norm": 0.1530207355040865, "learning_rate": 2e-05, "loss": 5.4214, "step": 9517 }, { "epoch": 0.6384277425629674, "grad_norm": 0.15480594194552336, "learning_rate": 2e-05, "loss": 5.4134, "step": 9518 }, { "epoch": 0.6384948183921924, "grad_norm": 0.1530039141984057, "learning_rate": 2e-05, "loss": 5.4399, "step": 9519 }, { "epoch": 0.6385618942214173, "grad_norm": 0.1438741193033246, "learning_rate": 2e-05, "loss": 5.3394, "step": 9520 }, { "epoch": 0.6386289700506422, "grad_norm": 0.14766797689218333, "learning_rate": 2e-05, "loss": 5.3965, "step": 9521 }, { "epoch": 0.6386960458798672, "grad_norm": 0.15047304335181527, "learning_rate": 2e-05, "loss": 5.3687, "step": 9522 }, { "epoch": 0.6387631217090921, "grad_norm": 0.14809226237755085, "learning_rate": 2e-05, "loss": 5.432, "step": 9523 }, { "epoch": 0.638830197538317, "grad_norm": 0.1493599491928748, "learning_rate": 2e-05, "loss": 5.5055, "step": 9524 }, { "epoch": 0.638897273367542, "grad_norm": 0.14506340590924596, "learning_rate": 2e-05, "loss": 5.4033, "step": 9525 }, { "epoch": 0.6389643491967669, "grad_norm": 0.1495272273568717, "learning_rate": 2e-05, "loss": 5.409, "step": 9526 }, { "epoch": 0.6390314250259919, "grad_norm": 0.15176155856799417, "learning_rate": 2e-05, "loss": 5.2607, "step": 9527 }, { "epoch": 0.6390985008552168, "grad_norm": 0.1566061231279341, "learning_rate": 2e-05, "loss": 5.3873, "step": 9528 }, { "epoch": 0.6391655766844417, "grad_norm": 0.14905132127029663, "learning_rate": 2e-05, "loss": 5.4128, "step": 9529 }, { "epoch": 0.6392326525136667, "grad_norm": 0.15250591747960102, "learning_rate": 2e-05, "loss": 5.3926, "step": 9530 }, { "epoch": 0.6392997283428916, "grad_norm": 0.1584260215854498, "learning_rate": 2e-05, "loss": 5.5026, "step": 9531 }, { "epoch": 0.6393668041721166, "grad_norm": 0.15039580862967292, "learning_rate": 2e-05, "loss": 5.4624, "step": 9532 }, { "epoch": 0.6394338800013415, "grad_norm": 0.14373221965362223, "learning_rate": 2e-05, "loss": 5.2855, "step": 9533 }, { "epoch": 0.6395009558305664, "grad_norm": 0.1560624855566293, "learning_rate": 2e-05, "loss": 5.3199, "step": 9534 }, { "epoch": 0.6395680316597914, "grad_norm": 0.15318114467177987, "learning_rate": 2e-05, "loss": 5.3661, "step": 9535 }, { "epoch": 0.6396351074890163, "grad_norm": 0.14731199097696887, "learning_rate": 2e-05, "loss": 5.5541, "step": 9536 }, { "epoch": 0.6397021833182412, "grad_norm": 0.15836986589374136, "learning_rate": 2e-05, "loss": 5.3941, "step": 9537 }, { "epoch": 0.6397692591474662, "grad_norm": 0.15254663923278802, "learning_rate": 2e-05, "loss": 5.3661, "step": 9538 }, { "epoch": 0.6398363349766911, "grad_norm": 0.150000251669178, "learning_rate": 2e-05, "loss": 5.4193, "step": 9539 }, { "epoch": 0.6399034108059161, "grad_norm": 0.14905334281666166, "learning_rate": 2e-05, "loss": 5.4141, "step": 9540 }, { "epoch": 0.639970486635141, "grad_norm": 0.15004866555456353, "learning_rate": 2e-05, "loss": 5.4549, "step": 9541 }, { "epoch": 0.6400375624643659, "grad_norm": 0.15624355406442722, "learning_rate": 2e-05, "loss": 5.3977, "step": 9542 }, { "epoch": 0.6401046382935909, "grad_norm": 0.1539572352266018, "learning_rate": 2e-05, "loss": 5.3286, "step": 9543 }, { "epoch": 0.6401717141228158, "grad_norm": 0.15034559469437175, "learning_rate": 2e-05, "loss": 5.35, "step": 9544 }, { "epoch": 0.6402387899520408, "grad_norm": 0.15296272309790715, "learning_rate": 2e-05, "loss": 5.4223, "step": 9545 }, { "epoch": 0.6403058657812657, "grad_norm": 0.15793724901545556, "learning_rate": 2e-05, "loss": 5.4591, "step": 9546 }, { "epoch": 0.6403729416104906, "grad_norm": 0.1534399353405954, "learning_rate": 2e-05, "loss": 5.4501, "step": 9547 }, { "epoch": 0.6404400174397156, "grad_norm": 0.16256117721775698, "learning_rate": 2e-05, "loss": 5.4045, "step": 9548 }, { "epoch": 0.6405070932689405, "grad_norm": 0.15841655407378616, "learning_rate": 2e-05, "loss": 5.3571, "step": 9549 }, { "epoch": 0.6405741690981654, "grad_norm": 0.15158164568722682, "learning_rate": 2e-05, "loss": 5.3399, "step": 9550 }, { "epoch": 0.6406412449273904, "grad_norm": 0.14944731691075602, "learning_rate": 2e-05, "loss": 5.2318, "step": 9551 }, { "epoch": 0.6407083207566153, "grad_norm": 0.14974698008869625, "learning_rate": 2e-05, "loss": 5.2541, "step": 9552 }, { "epoch": 0.6407753965858403, "grad_norm": 0.16173997198561788, "learning_rate": 2e-05, "loss": 5.4241, "step": 9553 }, { "epoch": 0.6408424724150652, "grad_norm": 0.15834911716411407, "learning_rate": 2e-05, "loss": 5.4092, "step": 9554 }, { "epoch": 0.6409095482442901, "grad_norm": 0.1486880772581723, "learning_rate": 2e-05, "loss": 5.4221, "step": 9555 }, { "epoch": 0.6409766240735151, "grad_norm": 0.14635236463697232, "learning_rate": 2e-05, "loss": 5.253, "step": 9556 }, { "epoch": 0.64104369990274, "grad_norm": 0.16517672803869055, "learning_rate": 2e-05, "loss": 5.4043, "step": 9557 }, { "epoch": 0.641110775731965, "grad_norm": 0.15659244679530115, "learning_rate": 2e-05, "loss": 5.4211, "step": 9558 }, { "epoch": 0.6411778515611899, "grad_norm": 0.15190532009580324, "learning_rate": 2e-05, "loss": 5.3821, "step": 9559 }, { "epoch": 0.6412449273904148, "grad_norm": 0.15027250902769634, "learning_rate": 2e-05, "loss": 5.4438, "step": 9560 }, { "epoch": 0.6413120032196398, "grad_norm": 0.1608383529271978, "learning_rate": 2e-05, "loss": 5.4229, "step": 9561 }, { "epoch": 0.6413790790488647, "grad_norm": 0.15561615468813103, "learning_rate": 2e-05, "loss": 5.3995, "step": 9562 }, { "epoch": 0.6414461548780896, "grad_norm": 0.1459332140437825, "learning_rate": 2e-05, "loss": 5.4423, "step": 9563 }, { "epoch": 0.6415132307073146, "grad_norm": 0.16492041435749713, "learning_rate": 2e-05, "loss": 5.5011, "step": 9564 }, { "epoch": 0.6415803065365395, "grad_norm": 0.1644600226412454, "learning_rate": 2e-05, "loss": 5.3469, "step": 9565 }, { "epoch": 0.6416473823657645, "grad_norm": 0.1483640072839371, "learning_rate": 2e-05, "loss": 5.4591, "step": 9566 }, { "epoch": 0.6417144581949894, "grad_norm": 0.1500838888756209, "learning_rate": 2e-05, "loss": 5.4474, "step": 9567 }, { "epoch": 0.6417815340242143, "grad_norm": 0.14898070614796155, "learning_rate": 2e-05, "loss": 5.398, "step": 9568 }, { "epoch": 0.6418486098534393, "grad_norm": 0.15492230138064775, "learning_rate": 2e-05, "loss": 5.4485, "step": 9569 }, { "epoch": 0.6419156856826642, "grad_norm": 0.1516304257826873, "learning_rate": 2e-05, "loss": 5.4324, "step": 9570 }, { "epoch": 0.6419827615118892, "grad_norm": 0.15851503462586322, "learning_rate": 2e-05, "loss": 5.3148, "step": 9571 }, { "epoch": 0.6420498373411141, "grad_norm": 0.14819808163335543, "learning_rate": 2e-05, "loss": 5.496, "step": 9572 }, { "epoch": 0.642116913170339, "grad_norm": 0.15652302649547695, "learning_rate": 2e-05, "loss": 5.3795, "step": 9573 }, { "epoch": 0.642183988999564, "grad_norm": 0.15050768352783667, "learning_rate": 2e-05, "loss": 5.3452, "step": 9574 }, { "epoch": 0.6422510648287889, "grad_norm": 0.14859297718326564, "learning_rate": 2e-05, "loss": 5.4501, "step": 9575 }, { "epoch": 0.6423181406580138, "grad_norm": 0.15575916060066813, "learning_rate": 2e-05, "loss": 5.4579, "step": 9576 }, { "epoch": 0.6423852164872388, "grad_norm": 0.15776725321324744, "learning_rate": 2e-05, "loss": 5.4338, "step": 9577 }, { "epoch": 0.6424522923164637, "grad_norm": 0.14662149555613055, "learning_rate": 2e-05, "loss": 5.4537, "step": 9578 }, { "epoch": 0.6425193681456887, "grad_norm": 0.15930252210172244, "learning_rate": 2e-05, "loss": 5.5338, "step": 9579 }, { "epoch": 0.6425864439749136, "grad_norm": 0.1619046507808526, "learning_rate": 2e-05, "loss": 5.4065, "step": 9580 }, { "epoch": 0.6426535198041385, "grad_norm": 0.1472921510344544, "learning_rate": 2e-05, "loss": 5.5831, "step": 9581 }, { "epoch": 0.6427205956333635, "grad_norm": 0.14904885700584777, "learning_rate": 2e-05, "loss": 5.5278, "step": 9582 }, { "epoch": 0.6427876714625884, "grad_norm": 0.1597061010071644, "learning_rate": 2e-05, "loss": 5.5262, "step": 9583 }, { "epoch": 0.6428547472918134, "grad_norm": 0.16602821829194145, "learning_rate": 2e-05, "loss": 5.5237, "step": 9584 }, { "epoch": 0.6429218231210383, "grad_norm": 0.15668672272663148, "learning_rate": 2e-05, "loss": 5.5582, "step": 9585 }, { "epoch": 0.6429888989502632, "grad_norm": 0.1529725659445679, "learning_rate": 2e-05, "loss": 5.4347, "step": 9586 }, { "epoch": 0.6430559747794882, "grad_norm": 0.1537540291076745, "learning_rate": 2e-05, "loss": 5.6293, "step": 9587 }, { "epoch": 0.6431230506087131, "grad_norm": 0.15368245768464844, "learning_rate": 2e-05, "loss": 5.5426, "step": 9588 }, { "epoch": 0.643190126437938, "grad_norm": 0.15819871933031493, "learning_rate": 2e-05, "loss": 5.4072, "step": 9589 }, { "epoch": 0.643257202267163, "grad_norm": 0.16250194688436745, "learning_rate": 2e-05, "loss": 5.6052, "step": 9590 }, { "epoch": 0.6433242780963879, "grad_norm": 0.15155280490478998, "learning_rate": 2e-05, "loss": 5.3599, "step": 9591 }, { "epoch": 0.6433913539256129, "grad_norm": 0.14503235620929636, "learning_rate": 2e-05, "loss": 5.4645, "step": 9592 }, { "epoch": 0.6434584297548378, "grad_norm": 0.14895107121186837, "learning_rate": 2e-05, "loss": 5.4637, "step": 9593 }, { "epoch": 0.6435255055840627, "grad_norm": 0.14524401637305384, "learning_rate": 2e-05, "loss": 5.5015, "step": 9594 }, { "epoch": 0.6435925814132877, "grad_norm": 0.14749287350553403, "learning_rate": 2e-05, "loss": 5.328, "step": 9595 }, { "epoch": 0.6436596572425126, "grad_norm": 0.14784108059777107, "learning_rate": 2e-05, "loss": 5.3873, "step": 9596 }, { "epoch": 0.6437267330717376, "grad_norm": 0.15548942435785398, "learning_rate": 2e-05, "loss": 5.4945, "step": 9597 }, { "epoch": 0.6437938089009625, "grad_norm": 0.14987273044441574, "learning_rate": 2e-05, "loss": 5.402, "step": 9598 }, { "epoch": 0.6438608847301874, "grad_norm": 0.1439594517853578, "learning_rate": 2e-05, "loss": 5.5188, "step": 9599 }, { "epoch": 0.6439279605594124, "grad_norm": 0.15361733539856087, "learning_rate": 2e-05, "loss": 5.3251, "step": 9600 }, { "epoch": 0.6439950363886373, "grad_norm": 0.15643793221075902, "learning_rate": 2e-05, "loss": 5.5434, "step": 9601 }, { "epoch": 0.6440621122178622, "grad_norm": 0.1599749182758217, "learning_rate": 2e-05, "loss": 5.5225, "step": 9602 }, { "epoch": 0.6441291880470872, "grad_norm": 0.14748607964426283, "learning_rate": 2e-05, "loss": 5.3477, "step": 9603 }, { "epoch": 0.6441962638763121, "grad_norm": 0.15054424171417108, "learning_rate": 2e-05, "loss": 5.3704, "step": 9604 }, { "epoch": 0.6442633397055371, "grad_norm": 0.15332051110462994, "learning_rate": 2e-05, "loss": 5.3122, "step": 9605 }, { "epoch": 0.644330415534762, "grad_norm": 0.15263031265372884, "learning_rate": 2e-05, "loss": 5.3981, "step": 9606 }, { "epoch": 0.6443974913639869, "grad_norm": 0.15209941640546784, "learning_rate": 2e-05, "loss": 5.3452, "step": 9607 }, { "epoch": 0.6444645671932119, "grad_norm": 0.14865894251510642, "learning_rate": 2e-05, "loss": 5.2803, "step": 9608 }, { "epoch": 0.6445316430224368, "grad_norm": 0.15093259848735682, "learning_rate": 2e-05, "loss": 5.3327, "step": 9609 }, { "epoch": 0.6445987188516618, "grad_norm": 0.15162854245403987, "learning_rate": 2e-05, "loss": 5.4916, "step": 9610 }, { "epoch": 0.6446657946808867, "grad_norm": 0.15767933702372677, "learning_rate": 2e-05, "loss": 5.3672, "step": 9611 }, { "epoch": 0.6447328705101116, "grad_norm": 0.14834180438120798, "learning_rate": 2e-05, "loss": 5.3431, "step": 9612 }, { "epoch": 0.6447999463393366, "grad_norm": 0.15272372418579383, "learning_rate": 2e-05, "loss": 5.3523, "step": 9613 }, { "epoch": 0.6448670221685615, "grad_norm": 0.14360414554288878, "learning_rate": 2e-05, "loss": 5.4112, "step": 9614 }, { "epoch": 0.6449340979977864, "grad_norm": 0.1508209731383697, "learning_rate": 2e-05, "loss": 5.3585, "step": 9615 }, { "epoch": 0.6450011738270114, "grad_norm": 0.15998607968695755, "learning_rate": 2e-05, "loss": 5.4022, "step": 9616 }, { "epoch": 0.6450682496562363, "grad_norm": 0.14920330566234075, "learning_rate": 2e-05, "loss": 5.4747, "step": 9617 }, { "epoch": 0.6451353254854613, "grad_norm": 0.15472019841954224, "learning_rate": 2e-05, "loss": 5.3639, "step": 9618 }, { "epoch": 0.6452024013146862, "grad_norm": 0.15699400810064046, "learning_rate": 2e-05, "loss": 5.4612, "step": 9619 }, { "epoch": 0.6452694771439111, "grad_norm": 0.15464956821750228, "learning_rate": 2e-05, "loss": 5.3826, "step": 9620 }, { "epoch": 0.6453365529731361, "grad_norm": 0.1490510984055217, "learning_rate": 2e-05, "loss": 5.2621, "step": 9621 }, { "epoch": 0.645403628802361, "grad_norm": 0.15522416890347834, "learning_rate": 2e-05, "loss": 5.2824, "step": 9622 }, { "epoch": 0.645470704631586, "grad_norm": 0.1496237029900653, "learning_rate": 2e-05, "loss": 5.351, "step": 9623 }, { "epoch": 0.6455377804608109, "grad_norm": 0.1503525572270139, "learning_rate": 2e-05, "loss": 5.3106, "step": 9624 }, { "epoch": 0.6456048562900358, "grad_norm": 0.15804641285287227, "learning_rate": 2e-05, "loss": 5.3167, "step": 9625 }, { "epoch": 0.6456719321192608, "grad_norm": 0.15017998534092108, "learning_rate": 2e-05, "loss": 5.4032, "step": 9626 }, { "epoch": 0.6457390079484857, "grad_norm": 0.14650505146820358, "learning_rate": 2e-05, "loss": 5.4673, "step": 9627 }, { "epoch": 0.6458060837777106, "grad_norm": 0.16487629361197814, "learning_rate": 2e-05, "loss": 5.4184, "step": 9628 }, { "epoch": 0.6458731596069356, "grad_norm": 0.153476945233987, "learning_rate": 2e-05, "loss": 5.4452, "step": 9629 }, { "epoch": 0.6459402354361605, "grad_norm": 0.1509269149129582, "learning_rate": 2e-05, "loss": 5.3854, "step": 9630 }, { "epoch": 0.6460073112653855, "grad_norm": 0.17117592528764228, "learning_rate": 2e-05, "loss": 5.3972, "step": 9631 }, { "epoch": 0.6460743870946104, "grad_norm": 0.15434687163730193, "learning_rate": 2e-05, "loss": 5.2649, "step": 9632 }, { "epoch": 0.6461414629238353, "grad_norm": 0.16314195587782038, "learning_rate": 2e-05, "loss": 5.4383, "step": 9633 }, { "epoch": 0.6462085387530603, "grad_norm": 0.16359275633009607, "learning_rate": 2e-05, "loss": 5.3854, "step": 9634 }, { "epoch": 0.6462756145822852, "grad_norm": 0.15049527329063864, "learning_rate": 2e-05, "loss": 5.3036, "step": 9635 }, { "epoch": 0.6463426904115103, "grad_norm": 0.15914597557193916, "learning_rate": 2e-05, "loss": 5.4497, "step": 9636 }, { "epoch": 0.6464097662407352, "grad_norm": 0.1697964179466436, "learning_rate": 2e-05, "loss": 5.3579, "step": 9637 }, { "epoch": 0.6464768420699601, "grad_norm": 0.1503832042783177, "learning_rate": 2e-05, "loss": 5.5467, "step": 9638 }, { "epoch": 0.6465439178991851, "grad_norm": 0.14641791135113585, "learning_rate": 2e-05, "loss": 5.5749, "step": 9639 }, { "epoch": 0.64661099372841, "grad_norm": 0.17906276387635536, "learning_rate": 2e-05, "loss": 5.3968, "step": 9640 }, { "epoch": 0.646678069557635, "grad_norm": 0.15723063140968163, "learning_rate": 2e-05, "loss": 5.3632, "step": 9641 }, { "epoch": 0.6467451453868599, "grad_norm": 0.1483949907105092, "learning_rate": 2e-05, "loss": 5.392, "step": 9642 }, { "epoch": 0.6468122212160848, "grad_norm": 0.1550574374350714, "learning_rate": 2e-05, "loss": 5.3785, "step": 9643 }, { "epoch": 0.6468792970453098, "grad_norm": 0.15702300703700758, "learning_rate": 2e-05, "loss": 5.3813, "step": 9644 }, { "epoch": 0.6469463728745347, "grad_norm": 0.14884135161043074, "learning_rate": 2e-05, "loss": 5.4426, "step": 9645 }, { "epoch": 0.6470134487037597, "grad_norm": 0.15135356994217963, "learning_rate": 2e-05, "loss": 5.3643, "step": 9646 }, { "epoch": 0.6470805245329846, "grad_norm": 0.1612984187819237, "learning_rate": 2e-05, "loss": 5.4672, "step": 9647 }, { "epoch": 0.6471476003622095, "grad_norm": 0.15457457045945183, "learning_rate": 2e-05, "loss": 5.4196, "step": 9648 }, { "epoch": 0.6472146761914345, "grad_norm": 0.14629722448328464, "learning_rate": 2e-05, "loss": 5.3428, "step": 9649 }, { "epoch": 0.6472817520206594, "grad_norm": 0.14902633897594186, "learning_rate": 2e-05, "loss": 5.2753, "step": 9650 }, { "epoch": 0.6473488278498843, "grad_norm": 0.15681053314119572, "learning_rate": 2e-05, "loss": 5.4201, "step": 9651 }, { "epoch": 0.6474159036791093, "grad_norm": 0.15720639401103692, "learning_rate": 2e-05, "loss": 5.4707, "step": 9652 }, { "epoch": 0.6474829795083342, "grad_norm": 0.14759248500496833, "learning_rate": 2e-05, "loss": 5.2691, "step": 9653 }, { "epoch": 0.6475500553375592, "grad_norm": 0.17007938770935263, "learning_rate": 2e-05, "loss": 5.406, "step": 9654 }, { "epoch": 0.6476171311667841, "grad_norm": 0.1606364706128057, "learning_rate": 2e-05, "loss": 5.531, "step": 9655 }, { "epoch": 0.647684206996009, "grad_norm": 0.1549971328062089, "learning_rate": 2e-05, "loss": 5.3729, "step": 9656 }, { "epoch": 0.647751282825234, "grad_norm": 0.16455897125253183, "learning_rate": 2e-05, "loss": 5.4554, "step": 9657 }, { "epoch": 0.6478183586544589, "grad_norm": 0.15711973876693794, "learning_rate": 2e-05, "loss": 5.3649, "step": 9658 }, { "epoch": 0.6478854344836839, "grad_norm": 0.1538204425278777, "learning_rate": 2e-05, "loss": 5.4946, "step": 9659 }, { "epoch": 0.6479525103129088, "grad_norm": 0.152455089406907, "learning_rate": 2e-05, "loss": 5.3582, "step": 9660 }, { "epoch": 0.6480195861421337, "grad_norm": 0.154847811106169, "learning_rate": 2e-05, "loss": 5.3905, "step": 9661 }, { "epoch": 0.6480866619713587, "grad_norm": 0.16275527001741494, "learning_rate": 2e-05, "loss": 5.3243, "step": 9662 }, { "epoch": 0.6481537378005836, "grad_norm": 0.15099188448131698, "learning_rate": 2e-05, "loss": 5.4117, "step": 9663 }, { "epoch": 0.6482208136298085, "grad_norm": 0.15216426817689976, "learning_rate": 2e-05, "loss": 5.322, "step": 9664 }, { "epoch": 0.6482878894590335, "grad_norm": 0.1493831038399376, "learning_rate": 2e-05, "loss": 5.4245, "step": 9665 }, { "epoch": 0.6483549652882584, "grad_norm": 0.1463620006663565, "learning_rate": 2e-05, "loss": 5.446, "step": 9666 }, { "epoch": 0.6484220411174834, "grad_norm": 0.14950009332585928, "learning_rate": 2e-05, "loss": 5.4908, "step": 9667 }, { "epoch": 0.6484891169467083, "grad_norm": 0.15889861240055037, "learning_rate": 2e-05, "loss": 5.3659, "step": 9668 }, { "epoch": 0.6485561927759332, "grad_norm": 0.14877195560874495, "learning_rate": 2e-05, "loss": 5.4918, "step": 9669 }, { "epoch": 0.6486232686051582, "grad_norm": 0.14538447206602842, "learning_rate": 2e-05, "loss": 5.5437, "step": 9670 }, { "epoch": 0.6486903444343831, "grad_norm": 0.15554620918001608, "learning_rate": 2e-05, "loss": 5.3196, "step": 9671 }, { "epoch": 0.648757420263608, "grad_norm": 0.1466811795265504, "learning_rate": 2e-05, "loss": 5.3822, "step": 9672 }, { "epoch": 0.648824496092833, "grad_norm": 0.15388283618245058, "learning_rate": 2e-05, "loss": 5.5141, "step": 9673 }, { "epoch": 0.6488915719220579, "grad_norm": 0.1504590181638396, "learning_rate": 2e-05, "loss": 5.4691, "step": 9674 }, { "epoch": 0.6489586477512829, "grad_norm": 0.15436366922026779, "learning_rate": 2e-05, "loss": 5.5069, "step": 9675 }, { "epoch": 0.6490257235805078, "grad_norm": 0.15118546393689233, "learning_rate": 2e-05, "loss": 5.3641, "step": 9676 }, { "epoch": 0.6490927994097327, "grad_norm": 0.14818090312019802, "learning_rate": 2e-05, "loss": 5.3921, "step": 9677 }, { "epoch": 0.6491598752389577, "grad_norm": 0.1498858592520816, "learning_rate": 2e-05, "loss": 5.5255, "step": 9678 }, { "epoch": 0.6492269510681826, "grad_norm": 0.15056689992987476, "learning_rate": 2e-05, "loss": 5.4798, "step": 9679 }, { "epoch": 0.6492940268974076, "grad_norm": 0.14675061640112003, "learning_rate": 2e-05, "loss": 5.4805, "step": 9680 }, { "epoch": 0.6493611027266325, "grad_norm": 0.15584844145344556, "learning_rate": 2e-05, "loss": 5.3618, "step": 9681 }, { "epoch": 0.6494281785558574, "grad_norm": 0.14720545171611701, "learning_rate": 2e-05, "loss": 5.5196, "step": 9682 }, { "epoch": 0.6494952543850824, "grad_norm": 0.15527082907330772, "learning_rate": 2e-05, "loss": 5.476, "step": 9683 }, { "epoch": 0.6495623302143073, "grad_norm": 0.14858509586423685, "learning_rate": 2e-05, "loss": 5.4638, "step": 9684 }, { "epoch": 0.6496294060435323, "grad_norm": 0.15636126088326038, "learning_rate": 2e-05, "loss": 5.4385, "step": 9685 }, { "epoch": 0.6496964818727572, "grad_norm": 0.15341723451294387, "learning_rate": 2e-05, "loss": 5.4711, "step": 9686 }, { "epoch": 0.6497635577019821, "grad_norm": 0.15217583351572564, "learning_rate": 2e-05, "loss": 5.6358, "step": 9687 }, { "epoch": 0.6498306335312071, "grad_norm": 0.15850348571017153, "learning_rate": 2e-05, "loss": 5.4725, "step": 9688 }, { "epoch": 0.649897709360432, "grad_norm": 0.14900684363873393, "learning_rate": 2e-05, "loss": 5.2956, "step": 9689 }, { "epoch": 0.649964785189657, "grad_norm": 0.15388501847347733, "learning_rate": 2e-05, "loss": 5.5143, "step": 9690 }, { "epoch": 0.6500318610188819, "grad_norm": 0.14785419298304237, "learning_rate": 2e-05, "loss": 5.3518, "step": 9691 }, { "epoch": 0.6500989368481068, "grad_norm": 0.16032422588146475, "learning_rate": 2e-05, "loss": 5.4099, "step": 9692 }, { "epoch": 0.6501660126773318, "grad_norm": 0.1489017434975433, "learning_rate": 2e-05, "loss": 5.4881, "step": 9693 }, { "epoch": 0.6502330885065567, "grad_norm": 0.15033123086003328, "learning_rate": 2e-05, "loss": 5.4075, "step": 9694 }, { "epoch": 0.6503001643357816, "grad_norm": 0.1490755673605477, "learning_rate": 2e-05, "loss": 5.4569, "step": 9695 }, { "epoch": 0.6503672401650066, "grad_norm": 0.1455192906923948, "learning_rate": 2e-05, "loss": 5.4437, "step": 9696 }, { "epoch": 0.6504343159942315, "grad_norm": 0.15851015820476502, "learning_rate": 2e-05, "loss": 5.3692, "step": 9697 }, { "epoch": 0.6505013918234565, "grad_norm": 0.15605767118599992, "learning_rate": 2e-05, "loss": 5.4862, "step": 9698 }, { "epoch": 0.6505684676526814, "grad_norm": 0.150036782090852, "learning_rate": 2e-05, "loss": 5.4284, "step": 9699 }, { "epoch": 0.6506355434819063, "grad_norm": 0.15306919954950293, "learning_rate": 2e-05, "loss": 5.3268, "step": 9700 }, { "epoch": 0.6507026193111313, "grad_norm": 0.16049231085586452, "learning_rate": 2e-05, "loss": 5.5044, "step": 9701 }, { "epoch": 0.6507696951403562, "grad_norm": 0.15667542761614614, "learning_rate": 2e-05, "loss": 5.3606, "step": 9702 }, { "epoch": 0.6508367709695811, "grad_norm": 0.14680535660701238, "learning_rate": 2e-05, "loss": 5.3638, "step": 9703 }, { "epoch": 0.6509038467988061, "grad_norm": 0.14592356570410042, "learning_rate": 2e-05, "loss": 5.5064, "step": 9704 }, { "epoch": 0.650970922628031, "grad_norm": 0.15754036697729962, "learning_rate": 2e-05, "loss": 5.4911, "step": 9705 }, { "epoch": 0.651037998457256, "grad_norm": 0.150721787919293, "learning_rate": 2e-05, "loss": 5.332, "step": 9706 }, { "epoch": 0.6511050742864809, "grad_norm": 0.14645099263297584, "learning_rate": 2e-05, "loss": 5.4232, "step": 9707 }, { "epoch": 0.6511721501157058, "grad_norm": 0.16442496910151358, "learning_rate": 2e-05, "loss": 5.583, "step": 9708 }, { "epoch": 0.6512392259449308, "grad_norm": 0.15030930710170437, "learning_rate": 2e-05, "loss": 5.2164, "step": 9709 }, { "epoch": 0.6513063017741557, "grad_norm": 0.14835391404011564, "learning_rate": 2e-05, "loss": 5.3287, "step": 9710 }, { "epoch": 0.6513733776033807, "grad_norm": 0.15387801010542362, "learning_rate": 2e-05, "loss": 5.474, "step": 9711 }, { "epoch": 0.6514404534326056, "grad_norm": 0.14448224246602356, "learning_rate": 2e-05, "loss": 5.4702, "step": 9712 }, { "epoch": 0.6515075292618305, "grad_norm": 0.14812716260112102, "learning_rate": 2e-05, "loss": 5.3738, "step": 9713 }, { "epoch": 0.6515746050910555, "grad_norm": 0.1501509166975408, "learning_rate": 2e-05, "loss": 5.4491, "step": 9714 }, { "epoch": 0.6516416809202804, "grad_norm": 0.15508437064436953, "learning_rate": 2e-05, "loss": 5.4579, "step": 9715 }, { "epoch": 0.6517087567495053, "grad_norm": 0.15586539713051176, "learning_rate": 2e-05, "loss": 5.4085, "step": 9716 }, { "epoch": 0.6517758325787303, "grad_norm": 0.15499243680302235, "learning_rate": 2e-05, "loss": 5.4505, "step": 9717 }, { "epoch": 0.6518429084079552, "grad_norm": 0.16033869783097102, "learning_rate": 2e-05, "loss": 5.3909, "step": 9718 }, { "epoch": 0.6519099842371802, "grad_norm": 0.14826897708759246, "learning_rate": 2e-05, "loss": 5.4069, "step": 9719 }, { "epoch": 0.6519770600664051, "grad_norm": 0.15827799391634742, "learning_rate": 2e-05, "loss": 5.5011, "step": 9720 }, { "epoch": 0.65204413589563, "grad_norm": 0.15746106067755197, "learning_rate": 2e-05, "loss": 5.4692, "step": 9721 }, { "epoch": 0.652111211724855, "grad_norm": 0.15146908103666518, "learning_rate": 2e-05, "loss": 5.5231, "step": 9722 }, { "epoch": 0.6521782875540799, "grad_norm": 0.1472013456762202, "learning_rate": 2e-05, "loss": 5.3172, "step": 9723 }, { "epoch": 0.6522453633833049, "grad_norm": 0.14920610153620467, "learning_rate": 2e-05, "loss": 5.4055, "step": 9724 }, { "epoch": 0.6523124392125298, "grad_norm": 0.15103080591969462, "learning_rate": 2e-05, "loss": 5.4457, "step": 9725 }, { "epoch": 0.6523795150417547, "grad_norm": 0.1481868326931773, "learning_rate": 2e-05, "loss": 5.3747, "step": 9726 }, { "epoch": 0.6524465908709797, "grad_norm": 0.15071498672963218, "learning_rate": 2e-05, "loss": 5.3479, "step": 9727 }, { "epoch": 0.6525136667002046, "grad_norm": 0.1474889049591453, "learning_rate": 2e-05, "loss": 5.4895, "step": 9728 }, { "epoch": 0.6525807425294295, "grad_norm": 0.14706361219606814, "learning_rate": 2e-05, "loss": 5.3908, "step": 9729 }, { "epoch": 0.6526478183586545, "grad_norm": 0.15193411454709, "learning_rate": 2e-05, "loss": 5.4708, "step": 9730 }, { "epoch": 0.6527148941878794, "grad_norm": 0.14962464733518022, "learning_rate": 2e-05, "loss": 5.4915, "step": 9731 }, { "epoch": 0.6527819700171044, "grad_norm": 0.1519044596161463, "learning_rate": 2e-05, "loss": 5.51, "step": 9732 }, { "epoch": 0.6528490458463293, "grad_norm": 0.1503191436689999, "learning_rate": 2e-05, "loss": 5.5556, "step": 9733 }, { "epoch": 0.6529161216755542, "grad_norm": 0.14734650392312643, "learning_rate": 2e-05, "loss": 5.4152, "step": 9734 }, { "epoch": 0.6529831975047792, "grad_norm": 0.15492935870134697, "learning_rate": 2e-05, "loss": 5.436, "step": 9735 }, { "epoch": 0.6530502733340041, "grad_norm": 0.15723288938345406, "learning_rate": 2e-05, "loss": 5.5335, "step": 9736 }, { "epoch": 0.653117349163229, "grad_norm": 0.1499202146698686, "learning_rate": 2e-05, "loss": 5.3931, "step": 9737 }, { "epoch": 0.653184424992454, "grad_norm": 0.14646712810709975, "learning_rate": 2e-05, "loss": 5.3749, "step": 9738 }, { "epoch": 0.6532515008216789, "grad_norm": 0.15371108878213988, "learning_rate": 2e-05, "loss": 5.2358, "step": 9739 }, { "epoch": 0.6533185766509039, "grad_norm": 0.1499715086094196, "learning_rate": 2e-05, "loss": 5.3972, "step": 9740 }, { "epoch": 0.6533856524801288, "grad_norm": 0.14396547088514178, "learning_rate": 2e-05, "loss": 5.3311, "step": 9741 }, { "epoch": 0.6534527283093537, "grad_norm": 0.1507742439550711, "learning_rate": 2e-05, "loss": 5.4997, "step": 9742 }, { "epoch": 0.6535198041385787, "grad_norm": 0.1514004632994984, "learning_rate": 2e-05, "loss": 5.4991, "step": 9743 }, { "epoch": 0.6535868799678036, "grad_norm": 0.15060164591874142, "learning_rate": 2e-05, "loss": 5.347, "step": 9744 }, { "epoch": 0.6536539557970286, "grad_norm": 0.15741586485560138, "learning_rate": 2e-05, "loss": 5.3845, "step": 9745 }, { "epoch": 0.6537210316262535, "grad_norm": 0.15437227537508724, "learning_rate": 2e-05, "loss": 5.4934, "step": 9746 }, { "epoch": 0.6537881074554784, "grad_norm": 0.1500921368292464, "learning_rate": 2e-05, "loss": 5.4643, "step": 9747 }, { "epoch": 0.6538551832847034, "grad_norm": 0.14649586974287884, "learning_rate": 2e-05, "loss": 5.4684, "step": 9748 }, { "epoch": 0.6539222591139283, "grad_norm": 0.159540979961666, "learning_rate": 2e-05, "loss": 5.5118, "step": 9749 }, { "epoch": 0.6539893349431533, "grad_norm": 0.15298702290974547, "learning_rate": 2e-05, "loss": 5.4075, "step": 9750 }, { "epoch": 0.6540564107723782, "grad_norm": 0.15071914022637994, "learning_rate": 2e-05, "loss": 5.4387, "step": 9751 }, { "epoch": 0.6541234866016031, "grad_norm": 0.154075876512942, "learning_rate": 2e-05, "loss": 5.4352, "step": 9752 }, { "epoch": 0.6541905624308281, "grad_norm": 0.1595109307758106, "learning_rate": 2e-05, "loss": 5.3417, "step": 9753 }, { "epoch": 0.654257638260053, "grad_norm": 0.14371380609704684, "learning_rate": 2e-05, "loss": 5.4191, "step": 9754 }, { "epoch": 0.654324714089278, "grad_norm": 0.1548107546362262, "learning_rate": 2e-05, "loss": 5.4776, "step": 9755 }, { "epoch": 0.6543917899185029, "grad_norm": 0.15275272293759626, "learning_rate": 2e-05, "loss": 5.4465, "step": 9756 }, { "epoch": 0.6544588657477278, "grad_norm": 0.15048179067965053, "learning_rate": 2e-05, "loss": 5.4403, "step": 9757 }, { "epoch": 0.6545259415769528, "grad_norm": 0.15544665053420284, "learning_rate": 2e-05, "loss": 5.3241, "step": 9758 }, { "epoch": 0.6545930174061777, "grad_norm": 0.15418361831437774, "learning_rate": 2e-05, "loss": 5.5554, "step": 9759 }, { "epoch": 0.6546600932354026, "grad_norm": 0.1563502494861759, "learning_rate": 2e-05, "loss": 5.3445, "step": 9760 }, { "epoch": 0.6547271690646276, "grad_norm": 0.1498925512154019, "learning_rate": 2e-05, "loss": 5.4033, "step": 9761 }, { "epoch": 0.6547942448938525, "grad_norm": 0.15435867747186563, "learning_rate": 2e-05, "loss": 5.4559, "step": 9762 }, { "epoch": 0.6548613207230775, "grad_norm": 0.14810807463013037, "learning_rate": 2e-05, "loss": 5.4719, "step": 9763 }, { "epoch": 0.6549283965523024, "grad_norm": 0.1615476208184419, "learning_rate": 2e-05, "loss": 5.4436, "step": 9764 }, { "epoch": 0.6549954723815273, "grad_norm": 0.15392980339223797, "learning_rate": 2e-05, "loss": 5.4179, "step": 9765 }, { "epoch": 0.6550625482107523, "grad_norm": 0.15413187517278365, "learning_rate": 2e-05, "loss": 5.3757, "step": 9766 }, { "epoch": 0.6551296240399772, "grad_norm": 0.1562688733262742, "learning_rate": 2e-05, "loss": 5.394, "step": 9767 }, { "epoch": 0.6551966998692021, "grad_norm": 0.15296545104180503, "learning_rate": 2e-05, "loss": 5.3995, "step": 9768 }, { "epoch": 0.6552637756984271, "grad_norm": 0.15496639551159605, "learning_rate": 2e-05, "loss": 5.5259, "step": 9769 }, { "epoch": 0.655330851527652, "grad_norm": 0.15764311720966634, "learning_rate": 2e-05, "loss": 5.3077, "step": 9770 }, { "epoch": 0.655397927356877, "grad_norm": 0.15246722322270803, "learning_rate": 2e-05, "loss": 5.2912, "step": 9771 }, { "epoch": 0.6554650031861019, "grad_norm": 0.1569902194975582, "learning_rate": 2e-05, "loss": 5.5355, "step": 9772 }, { "epoch": 0.6555320790153268, "grad_norm": 0.15921108955847868, "learning_rate": 2e-05, "loss": 5.4516, "step": 9773 }, { "epoch": 0.6555991548445518, "grad_norm": 0.15541377941918472, "learning_rate": 2e-05, "loss": 5.4043, "step": 9774 }, { "epoch": 0.6556662306737767, "grad_norm": 0.15491730226992895, "learning_rate": 2e-05, "loss": 5.4954, "step": 9775 }, { "epoch": 0.6557333065030017, "grad_norm": 0.16319095932142988, "learning_rate": 2e-05, "loss": 5.3827, "step": 9776 }, { "epoch": 0.6558003823322266, "grad_norm": 0.1565608109547019, "learning_rate": 2e-05, "loss": 5.3732, "step": 9777 }, { "epoch": 0.6558674581614515, "grad_norm": 0.16949436820630132, "learning_rate": 2e-05, "loss": 5.3068, "step": 9778 }, { "epoch": 0.6559345339906765, "grad_norm": 0.1555073115364532, "learning_rate": 2e-05, "loss": 5.5027, "step": 9779 }, { "epoch": 0.6560016098199014, "grad_norm": 0.14934408395956927, "learning_rate": 2e-05, "loss": 5.4861, "step": 9780 }, { "epoch": 0.6560686856491263, "grad_norm": 0.14742049285007605, "learning_rate": 2e-05, "loss": 5.2656, "step": 9781 }, { "epoch": 0.6561357614783513, "grad_norm": 0.1519211461153154, "learning_rate": 2e-05, "loss": 5.3991, "step": 9782 }, { "epoch": 0.6562028373075762, "grad_norm": 0.15343436761034385, "learning_rate": 2e-05, "loss": 5.4188, "step": 9783 }, { "epoch": 0.6562699131368012, "grad_norm": 0.14692460151912007, "learning_rate": 2e-05, "loss": 5.4953, "step": 9784 }, { "epoch": 0.6563369889660261, "grad_norm": 0.15237148397257205, "learning_rate": 2e-05, "loss": 5.2604, "step": 9785 }, { "epoch": 0.656404064795251, "grad_norm": 0.1519391882914495, "learning_rate": 2e-05, "loss": 5.3691, "step": 9786 }, { "epoch": 0.656471140624476, "grad_norm": 0.14787898602531654, "learning_rate": 2e-05, "loss": 5.3635, "step": 9787 }, { "epoch": 0.6565382164537009, "grad_norm": 0.15425924193875706, "learning_rate": 2e-05, "loss": 5.4941, "step": 9788 }, { "epoch": 0.6566052922829259, "grad_norm": 0.1587655269196565, "learning_rate": 2e-05, "loss": 5.4425, "step": 9789 }, { "epoch": 0.6566723681121508, "grad_norm": 0.15387335242446631, "learning_rate": 2e-05, "loss": 5.4487, "step": 9790 }, { "epoch": 0.6567394439413757, "grad_norm": 0.15612251596125895, "learning_rate": 2e-05, "loss": 5.4104, "step": 9791 }, { "epoch": 0.6568065197706007, "grad_norm": 0.1622354168740302, "learning_rate": 2e-05, "loss": 5.4466, "step": 9792 }, { "epoch": 0.6568735955998256, "grad_norm": 0.15727627168120237, "learning_rate": 2e-05, "loss": 5.4056, "step": 9793 }, { "epoch": 0.6569406714290505, "grad_norm": 0.1584620256843739, "learning_rate": 2e-05, "loss": 5.3286, "step": 9794 }, { "epoch": 0.6570077472582755, "grad_norm": 0.1505885989545561, "learning_rate": 2e-05, "loss": 5.4442, "step": 9795 }, { "epoch": 0.6570748230875004, "grad_norm": 0.15204429170423917, "learning_rate": 2e-05, "loss": 5.348, "step": 9796 }, { "epoch": 0.6571418989167254, "grad_norm": 0.16065073934853688, "learning_rate": 2e-05, "loss": 5.43, "step": 9797 }, { "epoch": 0.6572089747459503, "grad_norm": 0.15557768627247187, "learning_rate": 2e-05, "loss": 5.373, "step": 9798 }, { "epoch": 0.6572760505751752, "grad_norm": 0.1474928019301523, "learning_rate": 2e-05, "loss": 5.4937, "step": 9799 }, { "epoch": 0.6573431264044002, "grad_norm": 0.1530401715609346, "learning_rate": 2e-05, "loss": 5.3176, "step": 9800 }, { "epoch": 0.6574102022336251, "grad_norm": 0.15368437891943496, "learning_rate": 2e-05, "loss": 5.4306, "step": 9801 }, { "epoch": 0.6574772780628501, "grad_norm": 0.15905047542823494, "learning_rate": 2e-05, "loss": 5.4137, "step": 9802 }, { "epoch": 0.657544353892075, "grad_norm": 0.1468036774153878, "learning_rate": 2e-05, "loss": 5.4187, "step": 9803 }, { "epoch": 0.6576114297212999, "grad_norm": 0.15158080273208072, "learning_rate": 2e-05, "loss": 5.2619, "step": 9804 }, { "epoch": 0.6576785055505249, "grad_norm": 0.1533306412684322, "learning_rate": 2e-05, "loss": 5.3967, "step": 9805 }, { "epoch": 0.6577455813797498, "grad_norm": 0.14764223827495807, "learning_rate": 2e-05, "loss": 5.4724, "step": 9806 }, { "epoch": 0.6578126572089747, "grad_norm": 0.15535224019718089, "learning_rate": 2e-05, "loss": 5.4867, "step": 9807 }, { "epoch": 0.6578797330381997, "grad_norm": 0.15394775410161804, "learning_rate": 2e-05, "loss": 5.4987, "step": 9808 }, { "epoch": 0.6579468088674246, "grad_norm": 0.14843091371813202, "learning_rate": 2e-05, "loss": 5.4895, "step": 9809 }, { "epoch": 0.6580138846966496, "grad_norm": 0.15262448911071694, "learning_rate": 2e-05, "loss": 5.4716, "step": 9810 }, { "epoch": 0.6580809605258745, "grad_norm": 0.1454589214530626, "learning_rate": 2e-05, "loss": 5.3451, "step": 9811 }, { "epoch": 0.6581480363550994, "grad_norm": 0.1447030825401799, "learning_rate": 2e-05, "loss": 5.3114, "step": 9812 }, { "epoch": 0.6582151121843244, "grad_norm": 0.15336216056136306, "learning_rate": 2e-05, "loss": 5.503, "step": 9813 }, { "epoch": 0.6582821880135493, "grad_norm": 0.16095039326216143, "learning_rate": 2e-05, "loss": 5.3576, "step": 9814 }, { "epoch": 0.6583492638427743, "grad_norm": 0.15099872139001738, "learning_rate": 2e-05, "loss": 5.3701, "step": 9815 }, { "epoch": 0.6584163396719992, "grad_norm": 0.14798140547894278, "learning_rate": 2e-05, "loss": 5.36, "step": 9816 }, { "epoch": 0.6584834155012241, "grad_norm": 0.15316151411498008, "learning_rate": 2e-05, "loss": 5.3361, "step": 9817 }, { "epoch": 0.6585504913304491, "grad_norm": 0.14843405765425724, "learning_rate": 2e-05, "loss": 5.228, "step": 9818 }, { "epoch": 0.658617567159674, "grad_norm": 0.1566834544649518, "learning_rate": 2e-05, "loss": 5.5083, "step": 9819 }, { "epoch": 0.658684642988899, "grad_norm": 0.1533381523250546, "learning_rate": 2e-05, "loss": 5.3839, "step": 9820 }, { "epoch": 0.6587517188181239, "grad_norm": 0.16101767186345364, "learning_rate": 2e-05, "loss": 5.417, "step": 9821 }, { "epoch": 0.6588187946473488, "grad_norm": 0.1513442133428081, "learning_rate": 2e-05, "loss": 5.6083, "step": 9822 }, { "epoch": 0.6588858704765738, "grad_norm": 0.1465606653892973, "learning_rate": 2e-05, "loss": 5.3769, "step": 9823 }, { "epoch": 0.6589529463057987, "grad_norm": 0.1523367260167032, "learning_rate": 2e-05, "loss": 5.4594, "step": 9824 }, { "epoch": 0.6590200221350236, "grad_norm": 0.16342229837553954, "learning_rate": 2e-05, "loss": 5.3971, "step": 9825 }, { "epoch": 0.6590870979642486, "grad_norm": 0.14601310614923427, "learning_rate": 2e-05, "loss": 5.3886, "step": 9826 }, { "epoch": 0.6591541737934735, "grad_norm": 0.1477118313340132, "learning_rate": 2e-05, "loss": 5.3564, "step": 9827 }, { "epoch": 0.6592212496226985, "grad_norm": 0.1643049003513351, "learning_rate": 2e-05, "loss": 5.4272, "step": 9828 }, { "epoch": 0.6592883254519234, "grad_norm": 0.15779434032722361, "learning_rate": 2e-05, "loss": 5.3405, "step": 9829 }, { "epoch": 0.6593554012811483, "grad_norm": 0.15503361925521592, "learning_rate": 2e-05, "loss": 5.3335, "step": 9830 }, { "epoch": 0.6594224771103733, "grad_norm": 0.18614289670721915, "learning_rate": 2e-05, "loss": 5.4074, "step": 9831 }, { "epoch": 0.6594895529395982, "grad_norm": 0.15264048785770074, "learning_rate": 2e-05, "loss": 5.4631, "step": 9832 }, { "epoch": 0.6595566287688231, "grad_norm": 0.15127216222838277, "learning_rate": 2e-05, "loss": 5.297, "step": 9833 }, { "epoch": 0.6596237045980481, "grad_norm": 0.16301623055547051, "learning_rate": 2e-05, "loss": 5.3732, "step": 9834 }, { "epoch": 0.659690780427273, "grad_norm": 0.1572879260840644, "learning_rate": 2e-05, "loss": 5.3801, "step": 9835 }, { "epoch": 0.659757856256498, "grad_norm": 0.14589633273398284, "learning_rate": 2e-05, "loss": 5.3105, "step": 9836 }, { "epoch": 0.6598249320857229, "grad_norm": 0.1516130606056624, "learning_rate": 2e-05, "loss": 5.3121, "step": 9837 }, { "epoch": 0.6598920079149478, "grad_norm": 0.17385985279439833, "learning_rate": 2e-05, "loss": 5.3967, "step": 9838 }, { "epoch": 0.6599590837441728, "grad_norm": 0.15380223473937704, "learning_rate": 2e-05, "loss": 5.3275, "step": 9839 }, { "epoch": 0.6600261595733977, "grad_norm": 0.14911571266792814, "learning_rate": 2e-05, "loss": 5.4139, "step": 9840 }, { "epoch": 0.6600932354026227, "grad_norm": 0.15092318991705292, "learning_rate": 2e-05, "loss": 5.3353, "step": 9841 }, { "epoch": 0.6601603112318476, "grad_norm": 0.16014341073568292, "learning_rate": 2e-05, "loss": 5.3556, "step": 9842 }, { "epoch": 0.6602273870610725, "grad_norm": 0.15356742200288648, "learning_rate": 2e-05, "loss": 5.5064, "step": 9843 }, { "epoch": 0.6602944628902975, "grad_norm": 0.14734004543635065, "learning_rate": 2e-05, "loss": 5.4398, "step": 9844 }, { "epoch": 0.6603615387195224, "grad_norm": 0.15611781911789288, "learning_rate": 2e-05, "loss": 5.4811, "step": 9845 }, { "epoch": 0.6604286145487474, "grad_norm": 0.16048694007051464, "learning_rate": 2e-05, "loss": 5.4075, "step": 9846 }, { "epoch": 0.6604956903779723, "grad_norm": 0.15175523800922774, "learning_rate": 2e-05, "loss": 5.3391, "step": 9847 }, { "epoch": 0.6605627662071972, "grad_norm": 0.14698453372214348, "learning_rate": 2e-05, "loss": 5.4527, "step": 9848 }, { "epoch": 0.6606298420364222, "grad_norm": 0.16057614934436715, "learning_rate": 2e-05, "loss": 5.4368, "step": 9849 }, { "epoch": 0.6606969178656471, "grad_norm": 0.1563890243848878, "learning_rate": 2e-05, "loss": 5.3454, "step": 9850 }, { "epoch": 0.660763993694872, "grad_norm": 0.15093231204648833, "learning_rate": 2e-05, "loss": 5.3735, "step": 9851 }, { "epoch": 0.660831069524097, "grad_norm": 0.15749073826072976, "learning_rate": 2e-05, "loss": 5.4921, "step": 9852 }, { "epoch": 0.6608981453533219, "grad_norm": 0.15572626256204597, "learning_rate": 2e-05, "loss": 5.3504, "step": 9853 }, { "epoch": 0.6609652211825469, "grad_norm": 0.15271494140700653, "learning_rate": 2e-05, "loss": 5.5018, "step": 9854 }, { "epoch": 0.6610322970117718, "grad_norm": 0.155349696296429, "learning_rate": 2e-05, "loss": 5.3971, "step": 9855 }, { "epoch": 0.6610993728409967, "grad_norm": 0.1542571306768787, "learning_rate": 2e-05, "loss": 5.4896, "step": 9856 }, { "epoch": 0.6611664486702217, "grad_norm": 0.15392697228871702, "learning_rate": 2e-05, "loss": 5.4657, "step": 9857 }, { "epoch": 0.6612335244994466, "grad_norm": 0.15250722466333888, "learning_rate": 2e-05, "loss": 5.4928, "step": 9858 }, { "epoch": 0.6613006003286716, "grad_norm": 0.1610520956450239, "learning_rate": 2e-05, "loss": 5.4555, "step": 9859 }, { "epoch": 0.6613676761578965, "grad_norm": 0.15182587435525102, "learning_rate": 2e-05, "loss": 5.5651, "step": 9860 }, { "epoch": 0.6614347519871214, "grad_norm": 0.14635665630882796, "learning_rate": 2e-05, "loss": 5.2996, "step": 9861 }, { "epoch": 0.6615018278163464, "grad_norm": 0.14589756388168565, "learning_rate": 2e-05, "loss": 5.4172, "step": 9862 }, { "epoch": 0.6615689036455713, "grad_norm": 0.15672717224072674, "learning_rate": 2e-05, "loss": 5.3076, "step": 9863 }, { "epoch": 0.6616359794747962, "grad_norm": 0.15388871451956396, "learning_rate": 2e-05, "loss": 5.3605, "step": 9864 }, { "epoch": 0.6617030553040212, "grad_norm": 0.16097679933253342, "learning_rate": 2e-05, "loss": 5.4105, "step": 9865 }, { "epoch": 0.6617701311332461, "grad_norm": 0.16273487752971674, "learning_rate": 2e-05, "loss": 5.4924, "step": 9866 }, { "epoch": 0.6618372069624711, "grad_norm": 0.15253324654799935, "learning_rate": 2e-05, "loss": 5.3764, "step": 9867 }, { "epoch": 0.661904282791696, "grad_norm": 0.15371648720707462, "learning_rate": 2e-05, "loss": 5.3878, "step": 9868 }, { "epoch": 0.6619713586209209, "grad_norm": 0.15683857564391762, "learning_rate": 2e-05, "loss": 5.4085, "step": 9869 }, { "epoch": 0.6620384344501459, "grad_norm": 0.14818698381802295, "learning_rate": 2e-05, "loss": 5.4218, "step": 9870 }, { "epoch": 0.6621055102793708, "grad_norm": 0.15101679860980657, "learning_rate": 2e-05, "loss": 5.6031, "step": 9871 }, { "epoch": 0.6621725861085958, "grad_norm": 0.1584975764758613, "learning_rate": 2e-05, "loss": 5.3797, "step": 9872 }, { "epoch": 0.6622396619378207, "grad_norm": 0.1576532911867124, "learning_rate": 2e-05, "loss": 5.5133, "step": 9873 }, { "epoch": 0.6623067377670456, "grad_norm": 0.15331996867502032, "learning_rate": 2e-05, "loss": 5.3742, "step": 9874 }, { "epoch": 0.6623738135962706, "grad_norm": 0.15498299025147153, "learning_rate": 2e-05, "loss": 5.4532, "step": 9875 }, { "epoch": 0.6624408894254955, "grad_norm": 0.1528273612075145, "learning_rate": 2e-05, "loss": 5.3728, "step": 9876 }, { "epoch": 0.6625079652547204, "grad_norm": 0.15229558109774904, "learning_rate": 2e-05, "loss": 5.4532, "step": 9877 }, { "epoch": 0.6625750410839454, "grad_norm": 0.1545357144556755, "learning_rate": 2e-05, "loss": 5.4097, "step": 9878 }, { "epoch": 0.6626421169131703, "grad_norm": 0.1501862177510736, "learning_rate": 2e-05, "loss": 5.388, "step": 9879 }, { "epoch": 0.6627091927423953, "grad_norm": 0.15281586071981357, "learning_rate": 2e-05, "loss": 5.5405, "step": 9880 }, { "epoch": 0.6627762685716202, "grad_norm": 0.15417097413590003, "learning_rate": 2e-05, "loss": 5.4083, "step": 9881 }, { "epoch": 0.6628433444008451, "grad_norm": 0.1541566899042459, "learning_rate": 2e-05, "loss": 5.3574, "step": 9882 }, { "epoch": 0.6629104202300701, "grad_norm": 0.15025444610911184, "learning_rate": 2e-05, "loss": 5.5429, "step": 9883 }, { "epoch": 0.662977496059295, "grad_norm": 0.16093013560778366, "learning_rate": 2e-05, "loss": 5.4978, "step": 9884 }, { "epoch": 0.66304457188852, "grad_norm": 0.15206985541029774, "learning_rate": 2e-05, "loss": 5.4484, "step": 9885 }, { "epoch": 0.6631116477177449, "grad_norm": 0.1610471747088262, "learning_rate": 2e-05, "loss": 5.4778, "step": 9886 }, { "epoch": 0.6631787235469698, "grad_norm": 0.1527154776704596, "learning_rate": 2e-05, "loss": 5.479, "step": 9887 }, { "epoch": 0.6632457993761948, "grad_norm": 0.15070458139156798, "learning_rate": 2e-05, "loss": 5.3806, "step": 9888 }, { "epoch": 0.6633128752054197, "grad_norm": 0.15647974227721417, "learning_rate": 2e-05, "loss": 5.407, "step": 9889 }, { "epoch": 0.6633799510346446, "grad_norm": 0.15141137632383506, "learning_rate": 2e-05, "loss": 5.4799, "step": 9890 }, { "epoch": 0.6634470268638696, "grad_norm": 0.16239127230446004, "learning_rate": 2e-05, "loss": 5.4672, "step": 9891 }, { "epoch": 0.6635141026930945, "grad_norm": 0.14692724707687202, "learning_rate": 2e-05, "loss": 5.4624, "step": 9892 }, { "epoch": 0.6635811785223195, "grad_norm": 0.150474924918795, "learning_rate": 2e-05, "loss": 5.5457, "step": 9893 }, { "epoch": 0.6636482543515444, "grad_norm": 0.15406601096746556, "learning_rate": 2e-05, "loss": 5.3547, "step": 9894 }, { "epoch": 0.6637153301807693, "grad_norm": 0.14955263831455853, "learning_rate": 2e-05, "loss": 5.4264, "step": 9895 }, { "epoch": 0.6637824060099943, "grad_norm": 0.15968992689082484, "learning_rate": 2e-05, "loss": 5.412, "step": 9896 }, { "epoch": 0.6638494818392192, "grad_norm": 0.15444232386469126, "learning_rate": 2e-05, "loss": 5.4224, "step": 9897 }, { "epoch": 0.6639165576684442, "grad_norm": 0.14622616660921148, "learning_rate": 2e-05, "loss": 5.4496, "step": 9898 }, { "epoch": 0.6639836334976691, "grad_norm": 0.1470881430441305, "learning_rate": 2e-05, "loss": 5.579, "step": 9899 }, { "epoch": 0.664050709326894, "grad_norm": 0.15843972941787934, "learning_rate": 2e-05, "loss": 5.4303, "step": 9900 }, { "epoch": 0.664117785156119, "grad_norm": 0.1526624399528818, "learning_rate": 2e-05, "loss": 5.3936, "step": 9901 }, { "epoch": 0.6641848609853439, "grad_norm": 0.1589124417942036, "learning_rate": 2e-05, "loss": 5.3324, "step": 9902 }, { "epoch": 0.6642519368145688, "grad_norm": 0.1575864283308364, "learning_rate": 2e-05, "loss": 5.5622, "step": 9903 }, { "epoch": 0.6643190126437938, "grad_norm": 0.16254524274777046, "learning_rate": 2e-05, "loss": 5.468, "step": 9904 }, { "epoch": 0.6643860884730187, "grad_norm": 0.15577523793666237, "learning_rate": 2e-05, "loss": 5.4665, "step": 9905 }, { "epoch": 0.6644531643022437, "grad_norm": 0.15862163309634714, "learning_rate": 2e-05, "loss": 5.6137, "step": 9906 }, { "epoch": 0.6645202401314686, "grad_norm": 0.16278496648284885, "learning_rate": 2e-05, "loss": 5.4982, "step": 9907 }, { "epoch": 0.6645873159606935, "grad_norm": 0.15193519080671128, "learning_rate": 2e-05, "loss": 5.3189, "step": 9908 }, { "epoch": 0.6646543917899185, "grad_norm": 0.16461078722477643, "learning_rate": 2e-05, "loss": 5.5028, "step": 9909 }, { "epoch": 0.6647214676191434, "grad_norm": 0.16210005332478203, "learning_rate": 2e-05, "loss": 5.3672, "step": 9910 }, { "epoch": 0.6647885434483684, "grad_norm": 0.14614757273420356, "learning_rate": 2e-05, "loss": 5.432, "step": 9911 }, { "epoch": 0.6648556192775933, "grad_norm": 0.16450511683652738, "learning_rate": 2e-05, "loss": 5.3688, "step": 9912 }, { "epoch": 0.6649226951068182, "grad_norm": 0.15715128515520854, "learning_rate": 2e-05, "loss": 5.3983, "step": 9913 }, { "epoch": 0.6649897709360432, "grad_norm": 0.15529814782687984, "learning_rate": 2e-05, "loss": 5.4654, "step": 9914 }, { "epoch": 0.6650568467652681, "grad_norm": 0.1565738437594841, "learning_rate": 2e-05, "loss": 5.3571, "step": 9915 }, { "epoch": 0.665123922594493, "grad_norm": 0.15875237606167872, "learning_rate": 2e-05, "loss": 5.4879, "step": 9916 }, { "epoch": 0.665190998423718, "grad_norm": 0.14861413964220985, "learning_rate": 2e-05, "loss": 5.4492, "step": 9917 }, { "epoch": 0.6652580742529429, "grad_norm": 0.1497800882094297, "learning_rate": 2e-05, "loss": 5.6035, "step": 9918 }, { "epoch": 0.6653251500821679, "grad_norm": 0.15951319392515279, "learning_rate": 2e-05, "loss": 5.4922, "step": 9919 }, { "epoch": 0.6653922259113928, "grad_norm": 0.15573715800291496, "learning_rate": 2e-05, "loss": 5.5399, "step": 9920 }, { "epoch": 0.6654593017406177, "grad_norm": 0.1489900388193695, "learning_rate": 2e-05, "loss": 5.417, "step": 9921 }, { "epoch": 0.6655263775698427, "grad_norm": 0.15571509160309432, "learning_rate": 2e-05, "loss": 5.4329, "step": 9922 }, { "epoch": 0.6655934533990676, "grad_norm": 0.15123270495262256, "learning_rate": 2e-05, "loss": 5.6707, "step": 9923 }, { "epoch": 0.6656605292282926, "grad_norm": 0.1473301388484114, "learning_rate": 2e-05, "loss": 5.3682, "step": 9924 }, { "epoch": 0.6657276050575175, "grad_norm": 0.15433794000478224, "learning_rate": 2e-05, "loss": 5.4033, "step": 9925 }, { "epoch": 0.6657946808867424, "grad_norm": 0.15073388144085226, "learning_rate": 2e-05, "loss": 5.4613, "step": 9926 }, { "epoch": 0.6658617567159674, "grad_norm": 0.15133290462201973, "learning_rate": 2e-05, "loss": 5.4732, "step": 9927 }, { "epoch": 0.6659288325451923, "grad_norm": 0.15240706764072337, "learning_rate": 2e-05, "loss": 5.4425, "step": 9928 }, { "epoch": 0.6659959083744172, "grad_norm": 0.15169672394459774, "learning_rate": 2e-05, "loss": 5.4423, "step": 9929 }, { "epoch": 0.6660629842036422, "grad_norm": 0.1538894541078572, "learning_rate": 2e-05, "loss": 5.5374, "step": 9930 }, { "epoch": 0.6661300600328671, "grad_norm": 0.15951777153130123, "learning_rate": 2e-05, "loss": 5.4681, "step": 9931 }, { "epoch": 0.6661971358620921, "grad_norm": 0.14720024892693503, "learning_rate": 2e-05, "loss": 5.2664, "step": 9932 }, { "epoch": 0.666264211691317, "grad_norm": 0.15553592377236336, "learning_rate": 2e-05, "loss": 5.3918, "step": 9933 }, { "epoch": 0.6663312875205419, "grad_norm": 0.1499927330002631, "learning_rate": 2e-05, "loss": 5.3682, "step": 9934 }, { "epoch": 0.6663983633497669, "grad_norm": 0.1617817256565762, "learning_rate": 2e-05, "loss": 5.6564, "step": 9935 }, { "epoch": 0.6664654391789918, "grad_norm": 0.1527125226787073, "learning_rate": 2e-05, "loss": 5.4608, "step": 9936 }, { "epoch": 0.6665325150082168, "grad_norm": 0.14882205672565302, "learning_rate": 2e-05, "loss": 5.4668, "step": 9937 }, { "epoch": 0.6665995908374417, "grad_norm": 0.15502790986746764, "learning_rate": 2e-05, "loss": 5.495, "step": 9938 }, { "epoch": 0.6666666666666666, "grad_norm": 0.15684880466398246, "learning_rate": 2e-05, "loss": 5.4302, "step": 9939 }, { "epoch": 0.6667337424958916, "grad_norm": 0.16390565198762222, "learning_rate": 2e-05, "loss": 5.282, "step": 9940 }, { "epoch": 0.6668008183251165, "grad_norm": 0.14824204334654237, "learning_rate": 2e-05, "loss": 5.3143, "step": 9941 }, { "epoch": 0.6668678941543414, "grad_norm": 0.16525798356668472, "learning_rate": 2e-05, "loss": 5.4624, "step": 9942 }, { "epoch": 0.6669349699835664, "grad_norm": 0.14744544545898752, "learning_rate": 2e-05, "loss": 5.4221, "step": 9943 }, { "epoch": 0.6670020458127913, "grad_norm": 0.15309622542448523, "learning_rate": 2e-05, "loss": 5.4132, "step": 9944 }, { "epoch": 0.6670691216420163, "grad_norm": 0.1635222947711619, "learning_rate": 2e-05, "loss": 5.4118, "step": 9945 }, { "epoch": 0.6671361974712412, "grad_norm": 0.15482386482463392, "learning_rate": 2e-05, "loss": 5.3977, "step": 9946 }, { "epoch": 0.6672032733004661, "grad_norm": 0.1543449351531162, "learning_rate": 2e-05, "loss": 5.3858, "step": 9947 }, { "epoch": 0.6672703491296911, "grad_norm": 0.1620916310523679, "learning_rate": 2e-05, "loss": 5.4441, "step": 9948 }, { "epoch": 0.667337424958916, "grad_norm": 0.15547543676521997, "learning_rate": 2e-05, "loss": 5.4328, "step": 9949 }, { "epoch": 0.667404500788141, "grad_norm": 0.1482941582388271, "learning_rate": 2e-05, "loss": 5.5053, "step": 9950 }, { "epoch": 0.6674715766173659, "grad_norm": 0.15136432716871334, "learning_rate": 2e-05, "loss": 5.4064, "step": 9951 }, { "epoch": 0.6675386524465908, "grad_norm": 0.1549246968050513, "learning_rate": 2e-05, "loss": 5.4454, "step": 9952 }, { "epoch": 0.6676057282758158, "grad_norm": 0.15227538556117684, "learning_rate": 2e-05, "loss": 5.3511, "step": 9953 }, { "epoch": 0.6676728041050407, "grad_norm": 0.15509379135906604, "learning_rate": 2e-05, "loss": 5.4369, "step": 9954 }, { "epoch": 0.6677398799342656, "grad_norm": 0.1566494081923976, "learning_rate": 2e-05, "loss": 5.4864, "step": 9955 }, { "epoch": 0.6678069557634906, "grad_norm": 0.16003163192605468, "learning_rate": 2e-05, "loss": 5.304, "step": 9956 }, { "epoch": 0.6678740315927155, "grad_norm": 0.15450070262385776, "learning_rate": 2e-05, "loss": 5.523, "step": 9957 }, { "epoch": 0.6679411074219405, "grad_norm": 0.15602559645805045, "learning_rate": 2e-05, "loss": 5.5701, "step": 9958 }, { "epoch": 0.6680081832511654, "grad_norm": 0.15696806410664377, "learning_rate": 2e-05, "loss": 5.5698, "step": 9959 }, { "epoch": 0.6680752590803903, "grad_norm": 0.1641461831020307, "learning_rate": 2e-05, "loss": 5.5103, "step": 9960 }, { "epoch": 0.6681423349096153, "grad_norm": 0.15707582656050184, "learning_rate": 2e-05, "loss": 5.3952, "step": 9961 }, { "epoch": 0.6682094107388402, "grad_norm": 0.15254124388546, "learning_rate": 2e-05, "loss": 5.4323, "step": 9962 }, { "epoch": 0.6682764865680652, "grad_norm": 0.16300698655026616, "learning_rate": 2e-05, "loss": 5.4246, "step": 9963 }, { "epoch": 0.6683435623972901, "grad_norm": 0.1538032645616055, "learning_rate": 2e-05, "loss": 5.5084, "step": 9964 }, { "epoch": 0.668410638226515, "grad_norm": 0.15121377959457702, "learning_rate": 2e-05, "loss": 5.4942, "step": 9965 }, { "epoch": 0.66847771405574, "grad_norm": 0.15701088267031943, "learning_rate": 2e-05, "loss": 5.5637, "step": 9966 }, { "epoch": 0.6685447898849649, "grad_norm": 0.1566332126977651, "learning_rate": 2e-05, "loss": 5.3458, "step": 9967 }, { "epoch": 0.6686118657141898, "grad_norm": 0.16097437088348204, "learning_rate": 2e-05, "loss": 5.5506, "step": 9968 }, { "epoch": 0.6686789415434148, "grad_norm": 0.14941595096094146, "learning_rate": 2e-05, "loss": 5.383, "step": 9969 }, { "epoch": 0.6687460173726397, "grad_norm": 0.17137129633555515, "learning_rate": 2e-05, "loss": 5.3438, "step": 9970 }, { "epoch": 0.6688130932018647, "grad_norm": 0.15972501963282873, "learning_rate": 2e-05, "loss": 5.4639, "step": 9971 }, { "epoch": 0.6688801690310896, "grad_norm": 0.1470685078787576, "learning_rate": 2e-05, "loss": 5.4223, "step": 9972 }, { "epoch": 0.6689472448603145, "grad_norm": 0.14738151260212207, "learning_rate": 2e-05, "loss": 5.4187, "step": 9973 }, { "epoch": 0.6690143206895395, "grad_norm": 0.15224231856721315, "learning_rate": 2e-05, "loss": 5.5428, "step": 9974 }, { "epoch": 0.6690813965187644, "grad_norm": 0.15236931931780764, "learning_rate": 2e-05, "loss": 5.4708, "step": 9975 }, { "epoch": 0.6691484723479894, "grad_norm": 0.1440581462480556, "learning_rate": 2e-05, "loss": 5.2196, "step": 9976 }, { "epoch": 0.6692155481772143, "grad_norm": 0.15197166232604215, "learning_rate": 2e-05, "loss": 5.5157, "step": 9977 }, { "epoch": 0.6692826240064392, "grad_norm": 0.15139655887583334, "learning_rate": 2e-05, "loss": 5.3791, "step": 9978 }, { "epoch": 0.6693496998356642, "grad_norm": 0.14588590396242446, "learning_rate": 2e-05, "loss": 5.4614, "step": 9979 }, { "epoch": 0.6694167756648891, "grad_norm": 0.15603723878436804, "learning_rate": 2e-05, "loss": 5.4584, "step": 9980 }, { "epoch": 0.669483851494114, "grad_norm": 0.15509686999130207, "learning_rate": 2e-05, "loss": 5.4721, "step": 9981 }, { "epoch": 0.669550927323339, "grad_norm": 0.15615746290872776, "learning_rate": 2e-05, "loss": 5.4512, "step": 9982 }, { "epoch": 0.6696180031525639, "grad_norm": 0.1548041011453804, "learning_rate": 2e-05, "loss": 5.3855, "step": 9983 }, { "epoch": 0.6696850789817889, "grad_norm": 0.1586709516583898, "learning_rate": 2e-05, "loss": 5.302, "step": 9984 }, { "epoch": 0.6697521548110138, "grad_norm": 0.1544436378983193, "learning_rate": 2e-05, "loss": 5.3263, "step": 9985 }, { "epoch": 0.6698192306402387, "grad_norm": 0.1570757299380076, "learning_rate": 2e-05, "loss": 5.3323, "step": 9986 }, { "epoch": 0.6698863064694637, "grad_norm": 0.15746434877463752, "learning_rate": 2e-05, "loss": 5.3288, "step": 9987 }, { "epoch": 0.6699533822986886, "grad_norm": 0.15779685488525402, "learning_rate": 2e-05, "loss": 5.4011, "step": 9988 }, { "epoch": 0.6700204581279136, "grad_norm": 0.16645831436404973, "learning_rate": 2e-05, "loss": 5.4708, "step": 9989 }, { "epoch": 0.6700875339571385, "grad_norm": 0.15911966559437826, "learning_rate": 2e-05, "loss": 5.4077, "step": 9990 }, { "epoch": 0.6701546097863634, "grad_norm": 0.1553468468796053, "learning_rate": 2e-05, "loss": 5.4514, "step": 9991 }, { "epoch": 0.6702216856155884, "grad_norm": 0.1544583189122214, "learning_rate": 2e-05, "loss": 5.4281, "step": 9992 }, { "epoch": 0.6702887614448133, "grad_norm": 0.15193312231861617, "learning_rate": 2e-05, "loss": 5.3875, "step": 9993 }, { "epoch": 0.6703558372740382, "grad_norm": 0.1563376417366738, "learning_rate": 2e-05, "loss": 5.4311, "step": 9994 }, { "epoch": 0.6704229131032632, "grad_norm": 0.14631970063599006, "learning_rate": 2e-05, "loss": 5.3, "step": 9995 }, { "epoch": 0.6704899889324881, "grad_norm": 0.14797232255692694, "learning_rate": 2e-05, "loss": 5.3771, "step": 9996 }, { "epoch": 0.6705570647617131, "grad_norm": 0.15259857235114627, "learning_rate": 2e-05, "loss": 5.4619, "step": 9997 }, { "epoch": 0.670624140590938, "grad_norm": 0.1498548613638616, "learning_rate": 2e-05, "loss": 5.4547, "step": 9998 }, { "epoch": 0.6706912164201629, "grad_norm": 0.15113487916886195, "learning_rate": 2e-05, "loss": 5.4338, "step": 9999 }, { "epoch": 0.670758292249388, "grad_norm": 0.14881007214570116, "learning_rate": 2e-05, "loss": 5.3792, "step": 10000 } ], "logging_steps": 1.0, "max_steps": 745400, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4575457078280192.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }